{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999675974829725, "eval_steps": 2411, "global_step": 9644, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010368805448807264, "grad_norm": 3.078125, "learning_rate": 2e-05, "loss": 11.9266, "step": 1 }, { "epoch": 0.00010368805448807264, "eval_loss": 11.923038482666016, "eval_runtime": 0.4444, "eval_samples_per_second": 335.305, "eval_steps_per_second": 15.753, "step": 1 }, { "epoch": 0.00020737610897614527, "grad_norm": 3.046875, "learning_rate": 4e-05, "loss": 11.9246, "step": 2 }, { "epoch": 0.0003110641634642179, "grad_norm": 3.171875, "learning_rate": 6e-05, "loss": 11.8897, "step": 3 }, { "epoch": 0.00041475221795229055, "grad_norm": 2.890625, "learning_rate": 8e-05, "loss": 11.7637, "step": 4 }, { "epoch": 0.0005184402724403631, "grad_norm": 2.6875, "learning_rate": 0.0001, "loss": 11.6167, "step": 5 }, { "epoch": 0.0006221283269284358, "grad_norm": 2.5625, "learning_rate": 0.00012, "loss": 11.3739, "step": 6 }, { "epoch": 0.0007258163814165084, "grad_norm": 2.53125, "learning_rate": 0.00014, "loss": 11.1311, "step": 7 }, { "epoch": 0.0008295044359045811, "grad_norm": 2.359375, "learning_rate": 0.00016, "loss": 10.8909, "step": 8 }, { "epoch": 0.0009331924903926537, "grad_norm": 2.21875, "learning_rate": 0.00018, "loss": 10.6657, "step": 9 }, { "epoch": 0.0010368805448807263, "grad_norm": 2.125, "learning_rate": 0.0002, "loss": 10.4763, "step": 10 }, { "epoch": 0.001140568599368799, "grad_norm": 2.078125, "learning_rate": 0.00019999999941005286, "loss": 10.2975, "step": 11 }, { "epoch": 0.0012442566538568716, "grad_norm": 2.015625, "learning_rate": 0.00019999999764021143, "loss": 10.1245, "step": 12 }, { "epoch": 0.0013479447083449443, "grad_norm": 2.0, "learning_rate": 0.00019999999469047573, "loss": 9.9439, "step": 13 }, { "epoch": 0.0014516327628330168, "grad_norm": 2.015625, "learning_rate": 0.0001999999905608458, "loss": 9.7362, "step": 14 }, { "epoch": 0.0015553208173210895, "grad_norm": 1.9609375, "learning_rate": 0.00019999998525132166, "loss": 9.5586, "step": 15 }, { "epoch": 0.0016590088718091622, "grad_norm": 1.875, "learning_rate": 0.00019999997876190344, "loss": 9.4056, "step": 16 }, { "epoch": 0.0017626969262972347, "grad_norm": 1.8515625, "learning_rate": 0.00019999997109259115, "loss": 9.2022, "step": 17 }, { "epoch": 0.0018663849807853074, "grad_norm": 1.7890625, "learning_rate": 0.00019999996224338487, "loss": 9.0599, "step": 18 }, { "epoch": 0.00197007303527338, "grad_norm": 1.734375, "learning_rate": 0.0001999999522142848, "loss": 8.9008, "step": 19 }, { "epoch": 0.0020737610897614525, "grad_norm": 1.6171875, "learning_rate": 0.000199999941005291, "loss": 8.8079, "step": 20 }, { "epoch": 0.0021774491442495252, "grad_norm": 1.5625, "learning_rate": 0.00019999992861640355, "loss": 8.654, "step": 21 }, { "epoch": 0.002281137198737598, "grad_norm": 1.46875, "learning_rate": 0.0001999999150476227, "loss": 8.5387, "step": 22 }, { "epoch": 0.0023848252532256706, "grad_norm": 1.390625, "learning_rate": 0.0001999999002989485, "loss": 8.4015, "step": 23 }, { "epoch": 0.0024885133077137433, "grad_norm": 1.2890625, "learning_rate": 0.00019999988437038123, "loss": 8.3069, "step": 24 }, { "epoch": 0.002592201362201816, "grad_norm": 1.2109375, "learning_rate": 0.00019999986726192102, "loss": 8.1671, "step": 25 }, { "epoch": 0.0026958894166898887, "grad_norm": 1.0546875, "learning_rate": 0.00019999984897356806, "loss": 8.1281, "step": 26 }, { "epoch": 0.002799577471177961, "grad_norm": 0.921875, "learning_rate": 0.0001999998295053226, "loss": 8.0346, "step": 27 }, { "epoch": 0.0029032655256660336, "grad_norm": 0.7890625, "learning_rate": 0.00019999980885718487, "loss": 7.9803, "step": 28 }, { "epoch": 0.0030069535801541063, "grad_norm": 0.67578125, "learning_rate": 0.00019999978702915508, "loss": 7.9035, "step": 29 }, { "epoch": 0.003110641634642179, "grad_norm": 0.58984375, "learning_rate": 0.0001999997640212335, "loss": 7.8359, "step": 30 }, { "epoch": 0.0032143296891302517, "grad_norm": 0.470703125, "learning_rate": 0.00019999973983342043, "loss": 7.8463, "step": 31 }, { "epoch": 0.0033180177436183244, "grad_norm": 0.4140625, "learning_rate": 0.0001999997144657161, "loss": 7.7655, "step": 32 }, { "epoch": 0.003421705798106397, "grad_norm": 0.33984375, "learning_rate": 0.0001999996879181209, "loss": 7.7699, "step": 33 }, { "epoch": 0.0035253938525944693, "grad_norm": 0.296875, "learning_rate": 0.00019999966019063506, "loss": 7.7232, "step": 34 }, { "epoch": 0.003629081907082542, "grad_norm": 0.28515625, "learning_rate": 0.00019999963128325892, "loss": 7.658, "step": 35 }, { "epoch": 0.0037327699615706147, "grad_norm": 0.263671875, "learning_rate": 0.00019999960119599283, "loss": 7.6972, "step": 36 }, { "epoch": 0.0038364580160586874, "grad_norm": 0.232421875, "learning_rate": 0.00019999956992883716, "loss": 7.6441, "step": 37 }, { "epoch": 0.00394014607054676, "grad_norm": 0.296875, "learning_rate": 0.00019999953748179228, "loss": 7.5872, "step": 38 }, { "epoch": 0.004043834125034833, "grad_norm": 0.37890625, "learning_rate": 0.00019999950385485855, "loss": 7.5796, "step": 39 }, { "epoch": 0.004147522179522905, "grad_norm": 0.26171875, "learning_rate": 0.00019999946904803638, "loss": 7.5814, "step": 40 }, { "epoch": 0.004251210234010978, "grad_norm": 0.255859375, "learning_rate": 0.00019999943306132621, "loss": 7.5596, "step": 41 }, { "epoch": 0.0043548982884990504, "grad_norm": 0.275390625, "learning_rate": 0.00019999939589472837, "loss": 7.5181, "step": 42 }, { "epoch": 0.004458586342987124, "grad_norm": 0.2392578125, "learning_rate": 0.00019999935754824342, "loss": 7.4972, "step": 43 }, { "epoch": 0.004562274397475196, "grad_norm": 0.2451171875, "learning_rate": 0.00019999931802187172, "loss": 7.4948, "step": 44 }, { "epoch": 0.004665962451963268, "grad_norm": 0.2578125, "learning_rate": 0.0001999992773156138, "loss": 7.4437, "step": 45 }, { "epoch": 0.004769650506451341, "grad_norm": 0.240234375, "learning_rate": 0.0001999992354294701, "loss": 7.4184, "step": 46 }, { "epoch": 0.0048733385609394135, "grad_norm": 0.2421875, "learning_rate": 0.00019999919236344114, "loss": 7.3949, "step": 47 }, { "epoch": 0.004977026615427487, "grad_norm": 0.2451171875, "learning_rate": 0.00019999914811752738, "loss": 7.3632, "step": 48 }, { "epoch": 0.005080714669915559, "grad_norm": 0.28125, "learning_rate": 0.00019999910269172938, "loss": 7.3576, "step": 49 }, { "epoch": 0.005184402724403632, "grad_norm": 0.232421875, "learning_rate": 0.0001999990560860477, "loss": 7.3231, "step": 50 }, { "epoch": 0.005288090778891704, "grad_norm": 0.263671875, "learning_rate": 0.00019999900830048283, "loss": 7.3035, "step": 51 }, { "epoch": 0.005391778833379777, "grad_norm": 0.306640625, "learning_rate": 0.0001999989593350354, "loss": 7.2858, "step": 52 }, { "epoch": 0.00549546688786785, "grad_norm": 0.265625, "learning_rate": 0.00019999890918970592, "loss": 7.2276, "step": 53 }, { "epoch": 0.005599154942355922, "grad_norm": 0.2314453125, "learning_rate": 0.00019999885786449505, "loss": 7.2519, "step": 54 }, { "epoch": 0.005702842996843995, "grad_norm": 0.302734375, "learning_rate": 0.00019999880535940333, "loss": 7.2018, "step": 55 }, { "epoch": 0.005806531051332067, "grad_norm": 0.2392578125, "learning_rate": 0.00019999875167443142, "loss": 7.1873, "step": 56 }, { "epoch": 0.00591021910582014, "grad_norm": 0.2470703125, "learning_rate": 0.00019999869680957993, "loss": 7.1665, "step": 57 }, { "epoch": 0.006013907160308213, "grad_norm": 0.310546875, "learning_rate": 0.00019999864076484955, "loss": 7.1518, "step": 58 }, { "epoch": 0.006117595214796286, "grad_norm": 0.376953125, "learning_rate": 0.0001999985835402409, "loss": 7.1188, "step": 59 }, { "epoch": 0.006221283269284358, "grad_norm": 0.4375, "learning_rate": 0.00019999852513575466, "loss": 7.0633, "step": 60 }, { "epoch": 0.00632497132377243, "grad_norm": 0.376953125, "learning_rate": 0.00019999846555139152, "loss": 7.0708, "step": 61 }, { "epoch": 0.006428659378260503, "grad_norm": 0.296875, "learning_rate": 0.0001999984047871522, "loss": 7.0265, "step": 62 }, { "epoch": 0.006532347432748576, "grad_norm": 0.40625, "learning_rate": 0.0001999983428430374, "loss": 6.976, "step": 63 }, { "epoch": 0.006636035487236649, "grad_norm": 0.47265625, "learning_rate": 0.00019999827971904787, "loss": 6.9527, "step": 64 }, { "epoch": 0.006739723541724721, "grad_norm": 0.357421875, "learning_rate": 0.00019999821541518437, "loss": 6.9225, "step": 65 }, { "epoch": 0.006843411596212794, "grad_norm": 0.28125, "learning_rate": 0.00019999814993144755, "loss": 6.9846, "step": 66 }, { "epoch": 0.0069470996507008664, "grad_norm": 0.310546875, "learning_rate": 0.00019999808326783835, "loss": 6.9026, "step": 67 }, { "epoch": 0.007050787705188939, "grad_norm": 0.298828125, "learning_rate": 0.00019999801542435743, "loss": 6.8856, "step": 68 }, { "epoch": 0.007154475759677012, "grad_norm": 0.32421875, "learning_rate": 0.00019999794640100562, "loss": 6.8605, "step": 69 }, { "epoch": 0.007258163814165084, "grad_norm": 0.251953125, "learning_rate": 0.00019999787619778375, "loss": 6.8081, "step": 70 }, { "epoch": 0.007361851868653157, "grad_norm": 0.380859375, "learning_rate": 0.00019999780481469266, "loss": 6.8199, "step": 71 }, { "epoch": 0.0074655399231412295, "grad_norm": 0.48828125, "learning_rate": 0.00019999773225173314, "loss": 6.8462, "step": 72 }, { "epoch": 0.007569227977629303, "grad_norm": 0.60546875, "learning_rate": 0.00019999765850890614, "loss": 6.7978, "step": 73 }, { "epoch": 0.007672916032117375, "grad_norm": 0.5625, "learning_rate": 0.0001999975835862124, "loss": 6.7541, "step": 74 }, { "epoch": 0.007776604086605447, "grad_norm": 0.4296875, "learning_rate": 0.00019999750748365294, "loss": 6.7211, "step": 75 }, { "epoch": 0.00788029214109352, "grad_norm": 0.453125, "learning_rate": 0.00019999743020122855, "loss": 6.7321, "step": 76 }, { "epoch": 0.007983980195581592, "grad_norm": 0.48828125, "learning_rate": 0.0001999973517389402, "loss": 6.7423, "step": 77 }, { "epoch": 0.008087668250069666, "grad_norm": 0.515625, "learning_rate": 0.00019999727209678883, "loss": 6.6788, "step": 78 }, { "epoch": 0.008191356304557739, "grad_norm": 0.5625, "learning_rate": 0.0001999971912747753, "loss": 6.689, "step": 79 }, { "epoch": 0.00829504435904581, "grad_norm": 0.416015625, "learning_rate": 0.00019999710927290064, "loss": 6.6457, "step": 80 }, { "epoch": 0.008398732413533883, "grad_norm": 0.453125, "learning_rate": 0.00019999702609116578, "loss": 6.6439, "step": 81 }, { "epoch": 0.008502420468021956, "grad_norm": 0.56640625, "learning_rate": 0.00019999694172957174, "loss": 6.6209, "step": 82 }, { "epoch": 0.008606108522510028, "grad_norm": 0.578125, "learning_rate": 0.00019999685618811948, "loss": 6.5961, "step": 83 }, { "epoch": 0.008709796576998101, "grad_norm": 0.5546875, "learning_rate": 0.00019999676946681, "loss": 6.6656, "step": 84 }, { "epoch": 0.008813484631486174, "grad_norm": 0.412109375, "learning_rate": 0.00019999668156564436, "loss": 6.5868, "step": 85 }, { "epoch": 0.008917172685974247, "grad_norm": 0.443359375, "learning_rate": 0.00019999659248462357, "loss": 6.6077, "step": 86 }, { "epoch": 0.009020860740462319, "grad_norm": 0.38671875, "learning_rate": 0.0001999965022237487, "loss": 6.5545, "step": 87 }, { "epoch": 0.009124548794950392, "grad_norm": 0.318359375, "learning_rate": 0.00019999641078302077, "loss": 6.5389, "step": 88 }, { "epoch": 0.009228236849438465, "grad_norm": 0.36328125, "learning_rate": 0.00019999631816244095, "loss": 6.5265, "step": 89 }, { "epoch": 0.009331924903926536, "grad_norm": 0.5078125, "learning_rate": 0.00019999622436201025, "loss": 6.5339, "step": 90 }, { "epoch": 0.00943561295841461, "grad_norm": 0.578125, "learning_rate": 0.0001999961293817298, "loss": 6.5092, "step": 91 }, { "epoch": 0.009539301012902682, "grad_norm": 0.447265625, "learning_rate": 0.0001999960332216007, "loss": 6.4901, "step": 92 }, { "epoch": 0.009642989067390756, "grad_norm": 0.6015625, "learning_rate": 0.00019999593588162414, "loss": 6.5021, "step": 93 }, { "epoch": 0.009746677121878827, "grad_norm": 0.59765625, "learning_rate": 0.00019999583736180122, "loss": 6.4337, "step": 94 }, { "epoch": 0.0098503651763669, "grad_norm": 0.4296875, "learning_rate": 0.00019999573766213313, "loss": 6.4328, "step": 95 }, { "epoch": 0.009954053230854973, "grad_norm": 0.640625, "learning_rate": 0.00019999563678262106, "loss": 6.4444, "step": 96 }, { "epoch": 0.010057741285343045, "grad_norm": 0.71484375, "learning_rate": 0.00019999553472326614, "loss": 6.4165, "step": 97 }, { "epoch": 0.010161429339831118, "grad_norm": 1.4140625, "learning_rate": 0.0001999954314840696, "loss": 6.4282, "step": 98 }, { "epoch": 0.01026511739431919, "grad_norm": 1.1875, "learning_rate": 0.0001999953270650327, "loss": 6.4431, "step": 99 }, { "epoch": 0.010368805448807264, "grad_norm": 0.8515625, "learning_rate": 0.00019999522146615662, "loss": 6.3568, "step": 100 }, { "epoch": 0.010472493503295335, "grad_norm": 0.69921875, "learning_rate": 0.00019999511468744263, "loss": 6.3853, "step": 101 }, { "epoch": 0.010576181557783408, "grad_norm": 0.94921875, "learning_rate": 0.000199995006728892, "loss": 6.4344, "step": 102 }, { "epoch": 0.010679869612271482, "grad_norm": 0.74609375, "learning_rate": 0.000199994897590506, "loss": 6.3957, "step": 103 }, { "epoch": 0.010783557666759555, "grad_norm": 0.60546875, "learning_rate": 0.00019999478727228588, "loss": 6.3347, "step": 104 }, { "epoch": 0.010887245721247626, "grad_norm": 0.5546875, "learning_rate": 0.00019999467577423296, "loss": 6.3382, "step": 105 }, { "epoch": 0.0109909337757357, "grad_norm": 0.62890625, "learning_rate": 0.0001999945630963486, "loss": 6.3785, "step": 106 }, { "epoch": 0.011094621830223772, "grad_norm": 0.439453125, "learning_rate": 0.00019999444923863405, "loss": 6.3325, "step": 107 }, { "epoch": 0.011198309884711844, "grad_norm": 0.482421875, "learning_rate": 0.00019999433420109073, "loss": 6.3663, "step": 108 }, { "epoch": 0.011301997939199917, "grad_norm": 0.41796875, "learning_rate": 0.00019999421798371997, "loss": 6.2954, "step": 109 }, { "epoch": 0.01140568599368799, "grad_norm": 0.474609375, "learning_rate": 0.00019999410058652313, "loss": 6.2503, "step": 110 }, { "epoch": 0.011509374048176063, "grad_norm": 0.396484375, "learning_rate": 0.00019999398200950158, "loss": 6.2885, "step": 111 }, { "epoch": 0.011613062102664135, "grad_norm": 0.462890625, "learning_rate": 0.00019999386225265676, "loss": 6.3214, "step": 112 }, { "epoch": 0.011716750157152208, "grad_norm": 0.314453125, "learning_rate": 0.00019999374131599007, "loss": 6.2841, "step": 113 }, { "epoch": 0.01182043821164028, "grad_norm": 0.39453125, "learning_rate": 0.00019999361919950293, "loss": 6.29, "step": 114 }, { "epoch": 0.011924126266128352, "grad_norm": 0.376953125, "learning_rate": 0.00019999349590319677, "loss": 6.2106, "step": 115 }, { "epoch": 0.012027814320616425, "grad_norm": 0.4296875, "learning_rate": 0.00019999337142707305, "loss": 6.219, "step": 116 }, { "epoch": 0.012131502375104498, "grad_norm": 0.482421875, "learning_rate": 0.00019999324577113324, "loss": 6.2419, "step": 117 }, { "epoch": 0.012235190429592572, "grad_norm": 0.76953125, "learning_rate": 0.00019999311893537883, "loss": 6.2168, "step": 118 }, { "epoch": 0.012338878484080643, "grad_norm": 1.515625, "learning_rate": 0.00019999299091981134, "loss": 6.2602, "step": 119 }, { "epoch": 0.012442566538568716, "grad_norm": 1.15625, "learning_rate": 0.00019999286172443223, "loss": 6.2084, "step": 120 }, { "epoch": 0.01254625459305679, "grad_norm": 0.890625, "learning_rate": 0.00019999273134924307, "loss": 6.2672, "step": 121 }, { "epoch": 0.01264994264754486, "grad_norm": 1.1796875, "learning_rate": 0.00019999259979424535, "loss": 6.2597, "step": 122 }, { "epoch": 0.012753630702032934, "grad_norm": 0.76171875, "learning_rate": 0.00019999246705944068, "loss": 6.1437, "step": 123 }, { "epoch": 0.012857318756521007, "grad_norm": 0.97265625, "learning_rate": 0.00019999233314483056, "loss": 6.216, "step": 124 }, { "epoch": 0.01296100681100908, "grad_norm": 0.87109375, "learning_rate": 0.00019999219805041663, "loss": 6.1778, "step": 125 }, { "epoch": 0.013064694865497151, "grad_norm": 0.8203125, "learning_rate": 0.00019999206177620047, "loss": 6.1466, "step": 126 }, { "epoch": 0.013168382919985224, "grad_norm": 0.74609375, "learning_rate": 0.00019999192432218363, "loss": 6.1517, "step": 127 }, { "epoch": 0.013272070974473298, "grad_norm": 0.67578125, "learning_rate": 0.00019999178568836783, "loss": 6.1833, "step": 128 }, { "epoch": 0.013375759028961369, "grad_norm": 0.6640625, "learning_rate": 0.00019999164587475464, "loss": 6.177, "step": 129 }, { "epoch": 0.013479447083449442, "grad_norm": 0.57421875, "learning_rate": 0.0001999915048813457, "loss": 6.19, "step": 130 }, { "epoch": 0.013583135137937515, "grad_norm": 0.6875, "learning_rate": 0.0001999913627081427, "loss": 6.1394, "step": 131 }, { "epoch": 0.013686823192425588, "grad_norm": 0.578125, "learning_rate": 0.00019999121935514736, "loss": 6.1704, "step": 132 }, { "epoch": 0.01379051124691366, "grad_norm": 0.57421875, "learning_rate": 0.00019999107482236128, "loss": 6.1321, "step": 133 }, { "epoch": 0.013894199301401733, "grad_norm": 0.55078125, "learning_rate": 0.00019999092910978625, "loss": 6.1726, "step": 134 }, { "epoch": 0.013997887355889806, "grad_norm": 0.5703125, "learning_rate": 0.00019999078221742393, "loss": 6.1438, "step": 135 }, { "epoch": 0.014101575410377877, "grad_norm": 0.5703125, "learning_rate": 0.00019999063414527607, "loss": 6.1395, "step": 136 }, { "epoch": 0.01420526346486595, "grad_norm": 0.6875, "learning_rate": 0.00019999048489334443, "loss": 6.0961, "step": 137 }, { "epoch": 0.014308951519354024, "grad_norm": 0.80859375, "learning_rate": 0.00019999033446163077, "loss": 6.1165, "step": 138 }, { "epoch": 0.014412639573842097, "grad_norm": 1.171875, "learning_rate": 0.00019999018285013685, "loss": 6.1087, "step": 139 }, { "epoch": 0.014516327628330168, "grad_norm": 1.421875, "learning_rate": 0.00019999003005886446, "loss": 6.0861, "step": 140 }, { "epoch": 0.014620015682818241, "grad_norm": 0.47265625, "learning_rate": 0.00019998987608781544, "loss": 6.1238, "step": 141 }, { "epoch": 0.014723703737306314, "grad_norm": 1.734375, "learning_rate": 0.00019998972093699153, "loss": 6.0859, "step": 142 }, { "epoch": 0.014827391791794386, "grad_norm": 0.97265625, "learning_rate": 0.00019998956460639465, "loss": 6.1074, "step": 143 }, { "epoch": 0.014931079846282459, "grad_norm": 0.8046875, "learning_rate": 0.00019998940709602657, "loss": 6.1206, "step": 144 }, { "epoch": 0.015034767900770532, "grad_norm": 1.265625, "learning_rate": 0.00019998924840588917, "loss": 6.0488, "step": 145 }, { "epoch": 0.015138455955258605, "grad_norm": 0.8984375, "learning_rate": 0.00019998908853598434, "loss": 6.058, "step": 146 }, { "epoch": 0.015242144009746677, "grad_norm": 0.65625, "learning_rate": 0.000199988927486314, "loss": 6.0474, "step": 147 }, { "epoch": 0.01534583206423475, "grad_norm": 0.6328125, "learning_rate": 0.00019998876525687998, "loss": 6.0019, "step": 148 }, { "epoch": 0.015449520118722823, "grad_norm": 0.7578125, "learning_rate": 0.0001999886018476842, "loss": 6.047, "step": 149 }, { "epoch": 0.015553208173210894, "grad_norm": 0.80859375, "learning_rate": 0.0001999884372587286, "loss": 6.0433, "step": 150 }, { "epoch": 0.015656896227698967, "grad_norm": 0.66796875, "learning_rate": 0.0001999882714900152, "loss": 6.0162, "step": 151 }, { "epoch": 0.01576058428218704, "grad_norm": 0.50390625, "learning_rate": 0.00019998810454154584, "loss": 6.0574, "step": 152 }, { "epoch": 0.015864272336675114, "grad_norm": 0.69140625, "learning_rate": 0.00019998793641332256, "loss": 6.0328, "step": 153 }, { "epoch": 0.015967960391163185, "grad_norm": 0.515625, "learning_rate": 0.0001999877671053473, "loss": 5.9934, "step": 154 }, { "epoch": 0.01607164844565126, "grad_norm": 0.45703125, "learning_rate": 0.0001999875966176221, "loss": 6.0257, "step": 155 }, { "epoch": 0.01617533650013933, "grad_norm": 0.49609375, "learning_rate": 0.00019998742495014896, "loss": 5.9716, "step": 156 }, { "epoch": 0.016279024554627403, "grad_norm": 0.57421875, "learning_rate": 0.0001999872521029299, "loss": 6.011, "step": 157 }, { "epoch": 0.016382712609115477, "grad_norm": 0.373046875, "learning_rate": 0.0001999870780759669, "loss": 5.986, "step": 158 }, { "epoch": 0.01648640066360355, "grad_norm": 0.4375, "learning_rate": 0.0001999869028692621, "loss": 5.9172, "step": 159 }, { "epoch": 0.01659008871809162, "grad_norm": 0.65625, "learning_rate": 0.00019998672648281757, "loss": 5.9745, "step": 160 }, { "epoch": 0.016693776772579695, "grad_norm": 0.81640625, "learning_rate": 0.0001999865489166353, "loss": 5.9591, "step": 161 }, { "epoch": 0.016797464827067767, "grad_norm": 1.1171875, "learning_rate": 0.00019998637017071752, "loss": 5.982, "step": 162 }, { "epoch": 0.016901152881555838, "grad_norm": 1.578125, "learning_rate": 0.0001999861902450662, "loss": 5.9767, "step": 163 }, { "epoch": 0.017004840936043913, "grad_norm": 0.87890625, "learning_rate": 0.00019998600913968356, "loss": 6.0064, "step": 164 }, { "epoch": 0.017108528990531984, "grad_norm": 0.84375, "learning_rate": 0.00019998582685457165, "loss": 5.9651, "step": 165 }, { "epoch": 0.017212217045020056, "grad_norm": 0.9921875, "learning_rate": 0.00019998564338973273, "loss": 5.9246, "step": 166 }, { "epoch": 0.01731590509950813, "grad_norm": 1.015625, "learning_rate": 0.00019998545874516888, "loss": 5.907, "step": 167 }, { "epoch": 0.017419593153996202, "grad_norm": 1.1015625, "learning_rate": 0.00019998527292088228, "loss": 5.9681, "step": 168 }, { "epoch": 0.017523281208484277, "grad_norm": 0.96875, "learning_rate": 0.00019998508591687522, "loss": 5.962, "step": 169 }, { "epoch": 0.017626969262972348, "grad_norm": 0.9609375, "learning_rate": 0.00019998489773314976, "loss": 5.8911, "step": 170 }, { "epoch": 0.01773065731746042, "grad_norm": 0.8359375, "learning_rate": 0.00019998470836970827, "loss": 5.924, "step": 171 }, { "epoch": 0.017834345371948494, "grad_norm": 0.78125, "learning_rate": 0.00019998451782655282, "loss": 5.9342, "step": 172 }, { "epoch": 0.017938033426436566, "grad_norm": 0.921875, "learning_rate": 0.00019998432610368583, "loss": 5.9439, "step": 173 }, { "epoch": 0.018041721480924637, "grad_norm": 0.7734375, "learning_rate": 0.00019998413320110943, "loss": 5.9079, "step": 174 }, { "epoch": 0.018145409535412712, "grad_norm": 0.62890625, "learning_rate": 0.00019998393911882598, "loss": 5.9049, "step": 175 }, { "epoch": 0.018249097589900783, "grad_norm": 0.59765625, "learning_rate": 0.0001999837438568377, "loss": 5.8602, "step": 176 }, { "epoch": 0.018352785644388855, "grad_norm": 0.51953125, "learning_rate": 0.00019998354741514694, "loss": 5.9309, "step": 177 }, { "epoch": 0.01845647369887693, "grad_norm": 0.5625, "learning_rate": 0.00019998334979375604, "loss": 5.9288, "step": 178 }, { "epoch": 0.018560161753365, "grad_norm": 0.5, "learning_rate": 0.00019998315099266728, "loss": 5.8735, "step": 179 }, { "epoch": 0.018663849807853072, "grad_norm": 0.3671875, "learning_rate": 0.000199982951011883, "loss": 5.8988, "step": 180 }, { "epoch": 0.018767537862341147, "grad_norm": 0.50390625, "learning_rate": 0.0001999827498514056, "loss": 5.8772, "step": 181 }, { "epoch": 0.01887122591682922, "grad_norm": 0.69921875, "learning_rate": 0.00019998254751123746, "loss": 5.8569, "step": 182 }, { "epoch": 0.018974913971317293, "grad_norm": 0.63671875, "learning_rate": 0.00019998234399138092, "loss": 5.8623, "step": 183 }, { "epoch": 0.019078602025805365, "grad_norm": 0.5, "learning_rate": 0.00019998213929183842, "loss": 5.832, "step": 184 }, { "epoch": 0.019182290080293436, "grad_norm": 0.453125, "learning_rate": 0.00019998193341261238, "loss": 5.8703, "step": 185 }, { "epoch": 0.01928597813478151, "grad_norm": 0.52734375, "learning_rate": 0.00019998172635370516, "loss": 5.8666, "step": 186 }, { "epoch": 0.019389666189269582, "grad_norm": 0.3671875, "learning_rate": 0.00019998151811511928, "loss": 5.8473, "step": 187 }, { "epoch": 0.019493354243757654, "grad_norm": 0.498046875, "learning_rate": 0.00019998130869685717, "loss": 5.8832, "step": 188 }, { "epoch": 0.01959704229824573, "grad_norm": 0.60546875, "learning_rate": 0.00019998109809892133, "loss": 5.8562, "step": 189 }, { "epoch": 0.0197007303527338, "grad_norm": 0.474609375, "learning_rate": 0.00019998088632131419, "loss": 5.8587, "step": 190 }, { "epoch": 0.01980441840722187, "grad_norm": 0.53515625, "learning_rate": 0.00019998067336403827, "loss": 5.8556, "step": 191 }, { "epoch": 0.019908106461709946, "grad_norm": 0.76171875, "learning_rate": 0.0001999804592270961, "loss": 5.8503, "step": 192 }, { "epoch": 0.020011794516198018, "grad_norm": 1.0390625, "learning_rate": 0.0001999802439104902, "loss": 5.8315, "step": 193 }, { "epoch": 0.02011548257068609, "grad_norm": 1.671875, "learning_rate": 0.0001999800274142231, "loss": 5.7988, "step": 194 }, { "epoch": 0.020219170625174164, "grad_norm": 0.453125, "learning_rate": 0.00019997980973829736, "loss": 5.7978, "step": 195 }, { "epoch": 0.020322858679662235, "grad_norm": 2.46875, "learning_rate": 0.00019997959088271554, "loss": 5.8234, "step": 196 }, { "epoch": 0.02042654673415031, "grad_norm": 0.9375, "learning_rate": 0.00019997937084748025, "loss": 5.8187, "step": 197 }, { "epoch": 0.02053023478863838, "grad_norm": 3.75, "learning_rate": 0.00019997914963259405, "loss": 5.8793, "step": 198 }, { "epoch": 0.020633922843126453, "grad_norm": 3.390625, "learning_rate": 0.00019997892723805957, "loss": 5.8885, "step": 199 }, { "epoch": 0.020737610897614528, "grad_norm": 1.4765625, "learning_rate": 0.00019997870366387943, "loss": 5.8362, "step": 200 }, { "epoch": 0.0208412989521026, "grad_norm": 3.171875, "learning_rate": 0.00019997847891005627, "loss": 5.8813, "step": 201 }, { "epoch": 0.02094498700659067, "grad_norm": 2.4375, "learning_rate": 0.00019997825297659273, "loss": 5.9057, "step": 202 }, { "epoch": 0.021048675061078746, "grad_norm": 2.03125, "learning_rate": 0.0001999780258634915, "loss": 5.8437, "step": 203 }, { "epoch": 0.021152363115566817, "grad_norm": 1.1171875, "learning_rate": 0.00019997779757075526, "loss": 5.8491, "step": 204 }, { "epoch": 0.02125605117005489, "grad_norm": 1.765625, "learning_rate": 0.0001999775680983867, "loss": 5.8136, "step": 205 }, { "epoch": 0.021359739224542963, "grad_norm": 1.0703125, "learning_rate": 0.00019997733744638846, "loss": 5.8221, "step": 206 }, { "epoch": 0.021463427279031035, "grad_norm": 1.6171875, "learning_rate": 0.00019997710561476335, "loss": 5.8324, "step": 207 }, { "epoch": 0.02156711533351911, "grad_norm": 1.1171875, "learning_rate": 0.0001999768726035141, "loss": 5.8158, "step": 208 }, { "epoch": 0.02167080338800718, "grad_norm": 1.2578125, "learning_rate": 0.00019997663841264337, "loss": 5.8085, "step": 209 }, { "epoch": 0.021774491442495252, "grad_norm": 0.89453125, "learning_rate": 0.00019997640304215402, "loss": 5.807, "step": 210 }, { "epoch": 0.021878179496983327, "grad_norm": 1.0703125, "learning_rate": 0.0001999761664920488, "loss": 5.8282, "step": 211 }, { "epoch": 0.0219818675514714, "grad_norm": 0.7578125, "learning_rate": 0.0001999759287623305, "loss": 5.7731, "step": 212 }, { "epoch": 0.02208555560595947, "grad_norm": 0.8515625, "learning_rate": 0.0001999756898530019, "loss": 5.7734, "step": 213 }, { "epoch": 0.022189243660447545, "grad_norm": 0.6953125, "learning_rate": 0.00019997544976406588, "loss": 5.7714, "step": 214 }, { "epoch": 0.022292931714935616, "grad_norm": 0.7109375, "learning_rate": 0.00019997520849552517, "loss": 5.7947, "step": 215 }, { "epoch": 0.022396619769423688, "grad_norm": 0.6875, "learning_rate": 0.00019997496604738272, "loss": 5.8244, "step": 216 }, { "epoch": 0.022500307823911762, "grad_norm": 0.6796875, "learning_rate": 0.00019997472241964134, "loss": 5.7605, "step": 217 }, { "epoch": 0.022603995878399834, "grad_norm": 0.65234375, "learning_rate": 0.00019997447761230393, "loss": 5.7564, "step": 218 }, { "epoch": 0.022707683932887905, "grad_norm": 0.57421875, "learning_rate": 0.00019997423162537335, "loss": 5.7313, "step": 219 }, { "epoch": 0.02281137198737598, "grad_norm": 0.60546875, "learning_rate": 0.00019997398445885248, "loss": 5.7635, "step": 220 }, { "epoch": 0.02291506004186405, "grad_norm": 0.5234375, "learning_rate": 0.00019997373611274432, "loss": 5.7636, "step": 221 }, { "epoch": 0.023018748096352126, "grad_norm": 0.58203125, "learning_rate": 0.00019997348658705173, "loss": 5.7049, "step": 222 }, { "epoch": 0.023122436150840198, "grad_norm": 0.5390625, "learning_rate": 0.00019997323588177767, "loss": 5.7628, "step": 223 }, { "epoch": 0.02322612420532827, "grad_norm": 0.478515625, "learning_rate": 0.0001999729839969251, "loss": 5.7761, "step": 224 }, { "epoch": 0.023329812259816344, "grad_norm": 0.490234375, "learning_rate": 0.000199972730932497, "loss": 5.772, "step": 225 }, { "epoch": 0.023433500314304415, "grad_norm": 0.45703125, "learning_rate": 0.00019997247668849638, "loss": 5.7357, "step": 226 }, { "epoch": 0.023537188368792487, "grad_norm": 0.54296875, "learning_rate": 0.00019997222126492617, "loss": 5.75, "step": 227 }, { "epoch": 0.02364087642328056, "grad_norm": 0.4453125, "learning_rate": 0.00019997196466178943, "loss": 5.7343, "step": 228 }, { "epoch": 0.023744564477768633, "grad_norm": 0.408203125, "learning_rate": 0.00019997170687908919, "loss": 5.7152, "step": 229 }, { "epoch": 0.023848252532256704, "grad_norm": 0.4296875, "learning_rate": 0.00019997144791682848, "loss": 5.734, "step": 230 }, { "epoch": 0.02395194058674478, "grad_norm": 0.392578125, "learning_rate": 0.00019997118777501037, "loss": 5.7307, "step": 231 }, { "epoch": 0.02405562864123285, "grad_norm": 0.45703125, "learning_rate": 0.0001999709264536379, "loss": 5.7429, "step": 232 }, { "epoch": 0.024159316695720922, "grad_norm": 0.458984375, "learning_rate": 0.00019997066395271418, "loss": 5.7384, "step": 233 }, { "epoch": 0.024263004750208997, "grad_norm": 0.330078125, "learning_rate": 0.00019997040027224232, "loss": 5.7171, "step": 234 }, { "epoch": 0.024366692804697068, "grad_norm": 0.400390625, "learning_rate": 0.00019997013541222538, "loss": 5.7003, "step": 235 }, { "epoch": 0.024470380859185143, "grad_norm": 0.33203125, "learning_rate": 0.00019996986937266653, "loss": 5.6908, "step": 236 }, { "epoch": 0.024574068913673214, "grad_norm": 0.33203125, "learning_rate": 0.0001999696021535689, "loss": 5.6583, "step": 237 }, { "epoch": 0.024677756968161286, "grad_norm": 0.34375, "learning_rate": 0.00019996933375493562, "loss": 5.724, "step": 238 }, { "epoch": 0.02478144502264936, "grad_norm": 0.333984375, "learning_rate": 0.0001999690641767699, "loss": 5.6948, "step": 239 }, { "epoch": 0.024885133077137432, "grad_norm": 0.341796875, "learning_rate": 0.00019996879341907487, "loss": 5.7105, "step": 240 }, { "epoch": 0.024988821131625504, "grad_norm": 0.41015625, "learning_rate": 0.00019996852148185373, "loss": 5.6769, "step": 241 }, { "epoch": 0.02509250918611358, "grad_norm": 0.294921875, "learning_rate": 0.00019996824836510975, "loss": 5.671, "step": 242 }, { "epoch": 0.02519619724060165, "grad_norm": 0.365234375, "learning_rate": 0.0001999679740688461, "loss": 5.6549, "step": 243 }, { "epoch": 0.02529988529508972, "grad_norm": 0.447265625, "learning_rate": 0.000199967698593066, "loss": 5.6671, "step": 244 }, { "epoch": 0.025403573349577796, "grad_norm": 0.392578125, "learning_rate": 0.00019996742193777273, "loss": 5.6164, "step": 245 }, { "epoch": 0.025507261404065867, "grad_norm": 0.5, "learning_rate": 0.00019996714410296958, "loss": 5.647, "step": 246 }, { "epoch": 0.02561094945855394, "grad_norm": 0.498046875, "learning_rate": 0.0001999668650886598, "loss": 5.64, "step": 247 }, { "epoch": 0.025714637513042014, "grad_norm": 0.69140625, "learning_rate": 0.00019996658489484666, "loss": 5.6899, "step": 248 }, { "epoch": 0.025818325567530085, "grad_norm": 0.921875, "learning_rate": 0.00019996630352153353, "loss": 5.6688, "step": 249 }, { "epoch": 0.02592201362201816, "grad_norm": 1.78125, "learning_rate": 0.0001999660209687236, "loss": 5.6829, "step": 250 }, { "epoch": 0.02602570167650623, "grad_norm": 0.6484375, "learning_rate": 0.00019996573723642035, "loss": 5.6252, "step": 251 }, { "epoch": 0.026129389730994303, "grad_norm": 0.6015625, "learning_rate": 0.00019996545232462708, "loss": 5.6089, "step": 252 }, { "epoch": 0.026233077785482378, "grad_norm": 1.4765625, "learning_rate": 0.00019996516623334713, "loss": 5.672, "step": 253 }, { "epoch": 0.02633676583997045, "grad_norm": 1.046875, "learning_rate": 0.00019996487896258388, "loss": 5.6516, "step": 254 }, { "epoch": 0.02644045389445852, "grad_norm": 1.1484375, "learning_rate": 0.0001999645905123407, "loss": 5.608, "step": 255 }, { "epoch": 0.026544141948946595, "grad_norm": 0.87890625, "learning_rate": 0.00019996430088262108, "loss": 5.6102, "step": 256 }, { "epoch": 0.026647830003434667, "grad_norm": 0.54296875, "learning_rate": 0.00019996401007342832, "loss": 5.6053, "step": 257 }, { "epoch": 0.026751518057922738, "grad_norm": 0.65625, "learning_rate": 0.00019996371808476596, "loss": 5.6318, "step": 258 }, { "epoch": 0.026855206112410813, "grad_norm": 0.796875, "learning_rate": 0.00019996342491663733, "loss": 5.612, "step": 259 }, { "epoch": 0.026958894166898884, "grad_norm": 0.6171875, "learning_rate": 0.000199963130569046, "loss": 5.6529, "step": 260 }, { "epoch": 0.02706258222138696, "grad_norm": 0.5390625, "learning_rate": 0.00019996283504199538, "loss": 5.5798, "step": 261 }, { "epoch": 0.02716627027587503, "grad_norm": 0.62109375, "learning_rate": 0.00019996253833548896, "loss": 5.6042, "step": 262 }, { "epoch": 0.027269958330363102, "grad_norm": 0.59375, "learning_rate": 0.00019996224044953028, "loss": 5.6064, "step": 263 }, { "epoch": 0.027373646384851177, "grad_norm": 0.447265625, "learning_rate": 0.0001999619413841228, "loss": 5.6279, "step": 264 }, { "epoch": 0.027477334439339248, "grad_norm": 0.51953125, "learning_rate": 0.00019996164113927008, "loss": 5.595, "step": 265 }, { "epoch": 0.02758102249382732, "grad_norm": 0.46875, "learning_rate": 0.00019996133971497568, "loss": 5.6144, "step": 266 }, { "epoch": 0.027684710548315394, "grad_norm": 0.48828125, "learning_rate": 0.00019996103711124313, "loss": 5.6075, "step": 267 }, { "epoch": 0.027788398602803466, "grad_norm": 0.55078125, "learning_rate": 0.000199960733328076, "loss": 5.5957, "step": 268 }, { "epoch": 0.027892086657291537, "grad_norm": 0.392578125, "learning_rate": 0.00019996042836547786, "loss": 5.5676, "step": 269 }, { "epoch": 0.027995774711779612, "grad_norm": 0.435546875, "learning_rate": 0.00019996012222345236, "loss": 5.606, "step": 270 }, { "epoch": 0.028099462766267683, "grad_norm": 0.5, "learning_rate": 0.00019995981490200304, "loss": 5.5437, "step": 271 }, { "epoch": 0.028203150820755755, "grad_norm": 0.41796875, "learning_rate": 0.0001999595064011336, "loss": 5.5388, "step": 272 }, { "epoch": 0.02830683887524383, "grad_norm": 0.5703125, "learning_rate": 0.00019995919672084763, "loss": 5.5548, "step": 273 }, { "epoch": 0.0284105269297319, "grad_norm": 0.70703125, "learning_rate": 0.0001999588858611488, "loss": 5.5774, "step": 274 }, { "epoch": 0.028514214984219976, "grad_norm": 0.63671875, "learning_rate": 0.00019995857382204083, "loss": 5.5795, "step": 275 }, { "epoch": 0.028617903038708047, "grad_norm": 0.57421875, "learning_rate": 0.00019995826060352728, "loss": 5.5806, "step": 276 }, { "epoch": 0.02872159109319612, "grad_norm": 0.70703125, "learning_rate": 0.00019995794620561195, "loss": 5.5304, "step": 277 }, { "epoch": 0.028825279147684194, "grad_norm": 0.7421875, "learning_rate": 0.0001999576306282985, "loss": 5.594, "step": 278 }, { "epoch": 0.028928967202172265, "grad_norm": 0.79296875, "learning_rate": 0.00019995731387159067, "loss": 5.5603, "step": 279 }, { "epoch": 0.029032655256660336, "grad_norm": 1.0390625, "learning_rate": 0.0001999569959354922, "loss": 5.5512, "step": 280 }, { "epoch": 0.02913634331114841, "grad_norm": 1.3125, "learning_rate": 0.00019995667682000683, "loss": 5.5618, "step": 281 }, { "epoch": 0.029240031365636483, "grad_norm": 0.69921875, "learning_rate": 0.00019995635652513835, "loss": 5.5426, "step": 282 }, { "epoch": 0.029343719420124554, "grad_norm": 0.69921875, "learning_rate": 0.0001999560350508905, "loss": 5.5263, "step": 283 }, { "epoch": 0.02944740747461263, "grad_norm": 1.0859375, "learning_rate": 0.0001999557123972671, "loss": 5.5721, "step": 284 }, { "epoch": 0.0295510955291007, "grad_norm": 1.53125, "learning_rate": 0.00019995538856427196, "loss": 5.5413, "step": 285 }, { "epoch": 0.02965478358358877, "grad_norm": 0.546875, "learning_rate": 0.00019995506355190889, "loss": 5.5277, "step": 286 }, { "epoch": 0.029758471638076846, "grad_norm": 1.34375, "learning_rate": 0.00019995473736018172, "loss": 5.5505, "step": 287 }, { "epoch": 0.029862159692564918, "grad_norm": 1.0078125, "learning_rate": 0.00019995440998909431, "loss": 5.5775, "step": 288 }, { "epoch": 0.029965847747052993, "grad_norm": 1.140625, "learning_rate": 0.00019995408143865052, "loss": 5.5016, "step": 289 }, { "epoch": 0.030069535801541064, "grad_norm": 1.265625, "learning_rate": 0.00019995375170885424, "loss": 5.5683, "step": 290 }, { "epoch": 0.030173223856029135, "grad_norm": 0.671875, "learning_rate": 0.00019995342079970932, "loss": 5.5217, "step": 291 }, { "epoch": 0.03027691191051721, "grad_norm": 0.73828125, "learning_rate": 0.00019995308871121971, "loss": 5.562, "step": 292 }, { "epoch": 0.03038059996500528, "grad_norm": 0.7109375, "learning_rate": 0.00019995275544338928, "loss": 5.5251, "step": 293 }, { "epoch": 0.030484288019493353, "grad_norm": 0.875, "learning_rate": 0.00019995242099622203, "loss": 5.5147, "step": 294 }, { "epoch": 0.030587976073981428, "grad_norm": 1.15625, "learning_rate": 0.00019995208536972183, "loss": 5.4956, "step": 295 }, { "epoch": 0.0306916641284695, "grad_norm": 0.703125, "learning_rate": 0.0001999517485638927, "loss": 5.4974, "step": 296 }, { "epoch": 0.03079535218295757, "grad_norm": 0.77734375, "learning_rate": 0.00019995141057873857, "loss": 5.4782, "step": 297 }, { "epoch": 0.030899040237445646, "grad_norm": 0.9609375, "learning_rate": 0.00019995107141426347, "loss": 5.5044, "step": 298 }, { "epoch": 0.031002728291933717, "grad_norm": 1.1328125, "learning_rate": 0.00019995073107047134, "loss": 5.5123, "step": 299 }, { "epoch": 0.03110641634642179, "grad_norm": 0.82421875, "learning_rate": 0.0001999503895473663, "loss": 5.5219, "step": 300 }, { "epoch": 0.031210104400909863, "grad_norm": 0.953125, "learning_rate": 0.00019995004684495227, "loss": 5.5151, "step": 301 }, { "epoch": 0.031313792455397935, "grad_norm": 1.15625, "learning_rate": 0.00019994970296323335, "loss": 5.5151, "step": 302 }, { "epoch": 0.031417480509886006, "grad_norm": 0.90234375, "learning_rate": 0.00019994935790221358, "loss": 5.5093, "step": 303 }, { "epoch": 0.03152116856437408, "grad_norm": 0.83984375, "learning_rate": 0.00019994901166189708, "loss": 5.5052, "step": 304 }, { "epoch": 0.031624856618862156, "grad_norm": 1.09375, "learning_rate": 0.00019994866424228783, "loss": 5.4662, "step": 305 }, { "epoch": 0.03172854467335023, "grad_norm": 0.8671875, "learning_rate": 0.00019994831564339004, "loss": 5.4841, "step": 306 }, { "epoch": 0.0318322327278383, "grad_norm": 0.79296875, "learning_rate": 0.00019994796586520773, "loss": 5.4731, "step": 307 }, { "epoch": 0.03193592078232637, "grad_norm": 0.85546875, "learning_rate": 0.00019994761490774513, "loss": 5.4515, "step": 308 }, { "epoch": 0.03203960883681444, "grad_norm": 0.66015625, "learning_rate": 0.00019994726277100628, "loss": 5.519, "step": 309 }, { "epoch": 0.03214329689130252, "grad_norm": 0.62109375, "learning_rate": 0.0001999469094549954, "loss": 5.5203, "step": 310 }, { "epoch": 0.03224698494579059, "grad_norm": 0.78515625, "learning_rate": 0.0001999465549597166, "loss": 5.5374, "step": 311 }, { "epoch": 0.03235067300027866, "grad_norm": 0.62890625, "learning_rate": 0.00019994619928517416, "loss": 5.508, "step": 312 }, { "epoch": 0.032454361054766734, "grad_norm": 0.6953125, "learning_rate": 0.00019994584243137218, "loss": 5.4988, "step": 313 }, { "epoch": 0.032558049109254805, "grad_norm": 0.68359375, "learning_rate": 0.00019994548439831487, "loss": 5.4792, "step": 314 }, { "epoch": 0.03266173716374288, "grad_norm": 0.462890625, "learning_rate": 0.00019994512518600654, "loss": 5.4493, "step": 315 }, { "epoch": 0.032765425218230955, "grad_norm": 0.69921875, "learning_rate": 0.0001999447647944514, "loss": 5.4875, "step": 316 }, { "epoch": 0.032869113272719026, "grad_norm": 0.55078125, "learning_rate": 0.00019994440322365363, "loss": 5.5292, "step": 317 }, { "epoch": 0.0329728013272071, "grad_norm": 0.55859375, "learning_rate": 0.00019994404047361756, "loss": 5.4906, "step": 318 }, { "epoch": 0.03307648938169517, "grad_norm": 0.404296875, "learning_rate": 0.00019994367654434746, "loss": 5.461, "step": 319 }, { "epoch": 0.03318017743618324, "grad_norm": 0.470703125, "learning_rate": 0.00019994331143584763, "loss": 5.439, "step": 320 }, { "epoch": 0.03328386549067131, "grad_norm": 0.515625, "learning_rate": 0.00019994294514812238, "loss": 5.451, "step": 321 }, { "epoch": 0.03338755354515939, "grad_norm": 0.5703125, "learning_rate": 0.00019994257768117602, "loss": 5.4436, "step": 322 }, { "epoch": 0.03349124159964746, "grad_norm": 0.609375, "learning_rate": 0.0001999422090350129, "loss": 5.4283, "step": 323 }, { "epoch": 0.03359492965413553, "grad_norm": 0.6875, "learning_rate": 0.0001999418392096373, "loss": 5.4542, "step": 324 }, { "epoch": 0.033698617708623604, "grad_norm": 0.5625, "learning_rate": 0.0001999414682050537, "loss": 5.4371, "step": 325 }, { "epoch": 0.033802305763111676, "grad_norm": 0.431640625, "learning_rate": 0.00019994109602126638, "loss": 5.4293, "step": 326 }, { "epoch": 0.033905993817599754, "grad_norm": 0.59765625, "learning_rate": 0.00019994072265827977, "loss": 5.4883, "step": 327 }, { "epoch": 0.034009681872087826, "grad_norm": 0.66015625, "learning_rate": 0.0001999403481160983, "loss": 5.4215, "step": 328 }, { "epoch": 0.0341133699265759, "grad_norm": 0.80859375, "learning_rate": 0.00019993997239472634, "loss": 5.4048, "step": 329 }, { "epoch": 0.03421705798106397, "grad_norm": 1.0703125, "learning_rate": 0.00019993959549416835, "loss": 5.4454, "step": 330 }, { "epoch": 0.03432074603555204, "grad_norm": 1.4921875, "learning_rate": 0.00019993921741442877, "loss": 5.4441, "step": 331 }, { "epoch": 0.03442443409004011, "grad_norm": 0.56640625, "learning_rate": 0.0001999388381555121, "loss": 5.4147, "step": 332 }, { "epoch": 0.03452812214452819, "grad_norm": 0.92578125, "learning_rate": 0.00019993845771742276, "loss": 5.4387, "step": 333 }, { "epoch": 0.03463181019901626, "grad_norm": 1.921875, "learning_rate": 0.00019993807610016524, "loss": 5.3842, "step": 334 }, { "epoch": 0.03473549825350433, "grad_norm": 0.921875, "learning_rate": 0.00019993769330374408, "loss": 5.4189, "step": 335 }, { "epoch": 0.034839186307992404, "grad_norm": 5.53125, "learning_rate": 0.00019993730932816377, "loss": 5.5256, "step": 336 }, { "epoch": 0.034942874362480475, "grad_norm": 5.0, "learning_rate": 0.00019993692417342884, "loss": 5.5175, "step": 337 }, { "epoch": 0.03504656241696855, "grad_norm": 1.28125, "learning_rate": 0.00019993653783954388, "loss": 5.4119, "step": 338 }, { "epoch": 0.035150250471456625, "grad_norm": 3.4375, "learning_rate": 0.00019993615032651337, "loss": 5.5242, "step": 339 }, { "epoch": 0.035253938525944696, "grad_norm": 3.15625, "learning_rate": 0.00019993576163434193, "loss": 5.53, "step": 340 }, { "epoch": 0.03535762658043277, "grad_norm": 1.5546875, "learning_rate": 0.00019993537176303416, "loss": 5.4046, "step": 341 }, { "epoch": 0.03546131463492084, "grad_norm": 2.59375, "learning_rate": 0.00019993498071259463, "loss": 5.4244, "step": 342 }, { "epoch": 0.03556500268940891, "grad_norm": 2.28125, "learning_rate": 0.00019993458848302796, "loss": 5.4798, "step": 343 }, { "epoch": 0.03566869074389699, "grad_norm": 1.484375, "learning_rate": 0.00019993419507433876, "loss": 5.4492, "step": 344 }, { "epoch": 0.03577237879838506, "grad_norm": 1.2109375, "learning_rate": 0.00019993380048653175, "loss": 5.4628, "step": 345 }, { "epoch": 0.03587606685287313, "grad_norm": 1.34375, "learning_rate": 0.0001999334047196115, "loss": 5.4155, "step": 346 }, { "epoch": 0.0359797549073612, "grad_norm": 1.2578125, "learning_rate": 0.00019993300777358268, "loss": 5.4204, "step": 347 }, { "epoch": 0.036083442961849274, "grad_norm": 0.93359375, "learning_rate": 0.00019993260964845, "loss": 5.448, "step": 348 }, { "epoch": 0.03618713101633735, "grad_norm": 1.046875, "learning_rate": 0.0001999322103442182, "loss": 5.4505, "step": 349 }, { "epoch": 0.036290819070825424, "grad_norm": 1.109375, "learning_rate": 0.00019993180986089192, "loss": 5.4001, "step": 350 }, { "epoch": 0.036394507125313495, "grad_norm": 0.96875, "learning_rate": 0.00019993140819847595, "loss": 5.3998, "step": 351 }, { "epoch": 0.03649819517980157, "grad_norm": 0.97265625, "learning_rate": 0.00019993100535697496, "loss": 5.4476, "step": 352 }, { "epoch": 0.03660188323428964, "grad_norm": 0.99609375, "learning_rate": 0.00019993060133639376, "loss": 5.4657, "step": 353 }, { "epoch": 0.03670557128877771, "grad_norm": 0.99609375, "learning_rate": 0.00019993019613673708, "loss": 5.4182, "step": 354 }, { "epoch": 0.03680925934326579, "grad_norm": 0.7578125, "learning_rate": 0.00019992978975800972, "loss": 5.4388, "step": 355 }, { "epoch": 0.03691294739775386, "grad_norm": 0.76171875, "learning_rate": 0.0001999293822002165, "loss": 5.4333, "step": 356 }, { "epoch": 0.03701663545224193, "grad_norm": 0.75, "learning_rate": 0.00019992897346336218, "loss": 5.4078, "step": 357 }, { "epoch": 0.03712032350673, "grad_norm": 0.81640625, "learning_rate": 0.00019992856354745158, "loss": 5.3843, "step": 358 }, { "epoch": 0.03722401156121807, "grad_norm": 0.6640625, "learning_rate": 0.00019992815245248958, "loss": 5.4116, "step": 359 }, { "epoch": 0.037327699615706145, "grad_norm": 0.62109375, "learning_rate": 0.000199927740178481, "loss": 5.3862, "step": 360 }, { "epoch": 0.03743138767019422, "grad_norm": 0.52734375, "learning_rate": 0.00019992732672543073, "loss": 5.3852, "step": 361 }, { "epoch": 0.037535075724682294, "grad_norm": 0.6640625, "learning_rate": 0.00019992691209334362, "loss": 5.3804, "step": 362 }, { "epoch": 0.037638763779170366, "grad_norm": 0.494140625, "learning_rate": 0.0001999264962822246, "loss": 5.3745, "step": 363 }, { "epoch": 0.03774245183365844, "grad_norm": 0.58203125, "learning_rate": 0.00019992607929207853, "loss": 5.3948, "step": 364 }, { "epoch": 0.03784613988814651, "grad_norm": 0.48828125, "learning_rate": 0.00019992566112291034, "loss": 5.384, "step": 365 }, { "epoch": 0.03794982794263459, "grad_norm": 0.51953125, "learning_rate": 0.000199925241774725, "loss": 5.3606, "step": 366 }, { "epoch": 0.03805351599712266, "grad_norm": 0.5703125, "learning_rate": 0.0001999248212475274, "loss": 5.4083, "step": 367 }, { "epoch": 0.03815720405161073, "grad_norm": 0.439453125, "learning_rate": 0.00019992439954132256, "loss": 5.3636, "step": 368 }, { "epoch": 0.0382608921060988, "grad_norm": 0.55078125, "learning_rate": 0.00019992397665611543, "loss": 5.3986, "step": 369 }, { "epoch": 0.03836458016058687, "grad_norm": 0.46484375, "learning_rate": 0.00019992355259191097, "loss": 5.4029, "step": 370 }, { "epoch": 0.038468268215074944, "grad_norm": 0.51171875, "learning_rate": 0.00019992312734871425, "loss": 5.405, "step": 371 }, { "epoch": 0.03857195626956302, "grad_norm": 0.484375, "learning_rate": 0.00019992270092653022, "loss": 5.3392, "step": 372 }, { "epoch": 0.038675644324051094, "grad_norm": 0.48828125, "learning_rate": 0.00019992227332536397, "loss": 5.3901, "step": 373 }, { "epoch": 0.038779332378539165, "grad_norm": 0.439453125, "learning_rate": 0.00019992184454522053, "loss": 5.3694, "step": 374 }, { "epoch": 0.038883020433027236, "grad_norm": 0.5078125, "learning_rate": 0.0001999214145861049, "loss": 5.359, "step": 375 }, { "epoch": 0.03898670848751531, "grad_norm": 0.4609375, "learning_rate": 0.00019992098344802223, "loss": 5.3545, "step": 376 }, { "epoch": 0.039090396542003386, "grad_norm": 0.41796875, "learning_rate": 0.00019992055113097755, "loss": 5.3735, "step": 377 }, { "epoch": 0.03919408459649146, "grad_norm": 0.46484375, "learning_rate": 0.000199920117634976, "loss": 5.3751, "step": 378 }, { "epoch": 0.03929777265097953, "grad_norm": 0.447265625, "learning_rate": 0.0001999196829600227, "loss": 5.2899, "step": 379 }, { "epoch": 0.0394014607054676, "grad_norm": 0.478515625, "learning_rate": 0.00019991924710612276, "loss": 5.3484, "step": 380 }, { "epoch": 0.03950514875995567, "grad_norm": 0.39453125, "learning_rate": 0.00019991881007328131, "loss": 5.3171, "step": 381 }, { "epoch": 0.03960883681444374, "grad_norm": 0.494140625, "learning_rate": 0.0001999183718615035, "loss": 5.3399, "step": 382 }, { "epoch": 0.03971252486893182, "grad_norm": 0.345703125, "learning_rate": 0.00019991793247079457, "loss": 5.3487, "step": 383 }, { "epoch": 0.03981621292341989, "grad_norm": 0.416015625, "learning_rate": 0.00019991749190115962, "loss": 5.335, "step": 384 }, { "epoch": 0.039919900977907964, "grad_norm": 0.435546875, "learning_rate": 0.0001999170501526039, "loss": 5.333, "step": 385 }, { "epoch": 0.040023589032396036, "grad_norm": 0.439453125, "learning_rate": 0.00019991660722513258, "loss": 5.3132, "step": 386 }, { "epoch": 0.04012727708688411, "grad_norm": 0.365234375, "learning_rate": 0.00019991616311875092, "loss": 5.3834, "step": 387 }, { "epoch": 0.04023096514137218, "grad_norm": 0.443359375, "learning_rate": 0.00019991571783346416, "loss": 5.3623, "step": 388 }, { "epoch": 0.04033465319586026, "grad_norm": 0.50390625, "learning_rate": 0.00019991527136927753, "loss": 5.3597, "step": 389 }, { "epoch": 0.04043834125034833, "grad_norm": 0.439453125, "learning_rate": 0.00019991482372619634, "loss": 5.3423, "step": 390 }, { "epoch": 0.0405420293048364, "grad_norm": 0.421875, "learning_rate": 0.0001999143749042258, "loss": 5.3152, "step": 391 }, { "epoch": 0.04064571735932447, "grad_norm": 0.57421875, "learning_rate": 0.00019991392490337128, "loss": 5.3344, "step": 392 }, { "epoch": 0.04074940541381254, "grad_norm": 0.55859375, "learning_rate": 0.00019991347372363806, "loss": 5.3016, "step": 393 }, { "epoch": 0.04085309346830062, "grad_norm": 0.451171875, "learning_rate": 0.00019991302136503148, "loss": 5.3684, "step": 394 }, { "epoch": 0.04095678152278869, "grad_norm": 0.447265625, "learning_rate": 0.00019991256782755684, "loss": 5.3299, "step": 395 }, { "epoch": 0.04106046957727676, "grad_norm": 0.56640625, "learning_rate": 0.0001999121131112195, "loss": 5.2799, "step": 396 }, { "epoch": 0.041164157631764835, "grad_norm": 0.486328125, "learning_rate": 0.00019991165721602484, "loss": 5.3184, "step": 397 }, { "epoch": 0.041267845686252906, "grad_norm": 0.5546875, "learning_rate": 0.00019991120014197828, "loss": 5.3399, "step": 398 }, { "epoch": 0.04137153374074098, "grad_norm": 0.46875, "learning_rate": 0.00019991074188908513, "loss": 5.2717, "step": 399 }, { "epoch": 0.041475221795229056, "grad_norm": 0.546875, "learning_rate": 0.00019991028245735083, "loss": 5.344, "step": 400 }, { "epoch": 0.04157890984971713, "grad_norm": 0.5703125, "learning_rate": 0.00019990982184678086, "loss": 5.3392, "step": 401 }, { "epoch": 0.0416825979042052, "grad_norm": 0.48046875, "learning_rate": 0.00019990936005738052, "loss": 5.262, "step": 402 }, { "epoch": 0.04178628595869327, "grad_norm": 0.69140625, "learning_rate": 0.0001999088970891554, "loss": 5.3249, "step": 403 }, { "epoch": 0.04188997401318134, "grad_norm": 0.53515625, "learning_rate": 0.00019990843294211087, "loss": 5.3172, "step": 404 }, { "epoch": 0.04199366206766942, "grad_norm": 0.578125, "learning_rate": 0.00019990796761625246, "loss": 5.3205, "step": 405 }, { "epoch": 0.04209735012215749, "grad_norm": 0.5859375, "learning_rate": 0.00019990750111158564, "loss": 5.3259, "step": 406 }, { "epoch": 0.04220103817664556, "grad_norm": 0.5625, "learning_rate": 0.0001999070334281159, "loss": 5.3271, "step": 407 }, { "epoch": 0.042304726231133634, "grad_norm": 0.478515625, "learning_rate": 0.00019990656456584876, "loss": 5.3035, "step": 408 }, { "epoch": 0.042408414285621705, "grad_norm": 0.51171875, "learning_rate": 0.0001999060945247898, "loss": 5.2843, "step": 409 }, { "epoch": 0.04251210234010978, "grad_norm": 0.57421875, "learning_rate": 0.0001999056233049445, "loss": 5.2869, "step": 410 }, { "epoch": 0.042615790394597855, "grad_norm": 0.5703125, "learning_rate": 0.00019990515090631848, "loss": 5.2991, "step": 411 }, { "epoch": 0.042719478449085926, "grad_norm": 0.70703125, "learning_rate": 0.00019990467732891725, "loss": 5.2575, "step": 412 }, { "epoch": 0.042823166503574, "grad_norm": 0.6484375, "learning_rate": 0.00019990420257274643, "loss": 5.3164, "step": 413 }, { "epoch": 0.04292685455806207, "grad_norm": 0.6015625, "learning_rate": 0.00019990372663781166, "loss": 5.2825, "step": 414 }, { "epoch": 0.04303054261255014, "grad_norm": 0.625, "learning_rate": 0.00019990324952411846, "loss": 5.2654, "step": 415 }, { "epoch": 0.04313423066703822, "grad_norm": 0.53515625, "learning_rate": 0.00019990277123167258, "loss": 5.2965, "step": 416 }, { "epoch": 0.04323791872152629, "grad_norm": 0.62109375, "learning_rate": 0.00019990229176047958, "loss": 5.2539, "step": 417 }, { "epoch": 0.04334160677601436, "grad_norm": 0.71484375, "learning_rate": 0.0001999018111105451, "loss": 5.2908, "step": 418 }, { "epoch": 0.04344529483050243, "grad_norm": 0.828125, "learning_rate": 0.0001999013292818749, "loss": 5.2979, "step": 419 }, { "epoch": 0.043548982884990504, "grad_norm": 0.7578125, "learning_rate": 0.0001999008462744746, "loss": 5.2747, "step": 420 }, { "epoch": 0.043652670939478576, "grad_norm": 0.4765625, "learning_rate": 0.00019990036208834992, "loss": 5.3147, "step": 421 }, { "epoch": 0.043756358993966654, "grad_norm": 0.486328125, "learning_rate": 0.00019989987672350656, "loss": 5.2847, "step": 422 }, { "epoch": 0.043860047048454726, "grad_norm": 0.67578125, "learning_rate": 0.00019989939017995024, "loss": 5.2892, "step": 423 }, { "epoch": 0.0439637351029428, "grad_norm": 0.73046875, "learning_rate": 0.00019989890245768673, "loss": 5.2348, "step": 424 }, { "epoch": 0.04406742315743087, "grad_norm": 0.70703125, "learning_rate": 0.00019989841355672178, "loss": 5.2523, "step": 425 }, { "epoch": 0.04417111121191894, "grad_norm": 0.58984375, "learning_rate": 0.00019989792347706114, "loss": 5.2648, "step": 426 }, { "epoch": 0.04427479926640701, "grad_norm": 0.51953125, "learning_rate": 0.00019989743221871057, "loss": 5.2998, "step": 427 }, { "epoch": 0.04437848732089509, "grad_norm": 0.66015625, "learning_rate": 0.00019989693978167595, "loss": 5.1773, "step": 428 }, { "epoch": 0.04448217537538316, "grad_norm": 0.58203125, "learning_rate": 0.00019989644616596298, "loss": 5.2602, "step": 429 }, { "epoch": 0.04458586342987123, "grad_norm": 0.5625, "learning_rate": 0.00019989595137157758, "loss": 5.2278, "step": 430 }, { "epoch": 0.044689551484359304, "grad_norm": 0.6484375, "learning_rate": 0.0001998954553985255, "loss": 5.2847, "step": 431 }, { "epoch": 0.044793239538847375, "grad_norm": 0.8828125, "learning_rate": 0.0001998949582468127, "loss": 5.2977, "step": 432 }, { "epoch": 0.04489692759333545, "grad_norm": 1.0625, "learning_rate": 0.00019989445991644496, "loss": 5.2542, "step": 433 }, { "epoch": 0.045000615647823525, "grad_norm": 1.421875, "learning_rate": 0.00019989396040742818, "loss": 5.2308, "step": 434 }, { "epoch": 0.045104303702311596, "grad_norm": 0.81640625, "learning_rate": 0.00019989345971976828, "loss": 5.2806, "step": 435 }, { "epoch": 0.04520799175679967, "grad_norm": 0.9140625, "learning_rate": 0.00019989295785347112, "loss": 5.2863, "step": 436 }, { "epoch": 0.04531167981128774, "grad_norm": 1.21875, "learning_rate": 0.00019989245480854265, "loss": 5.1997, "step": 437 }, { "epoch": 0.04541536786577581, "grad_norm": 0.88671875, "learning_rate": 0.00019989195058498882, "loss": 5.264, "step": 438 }, { "epoch": 0.04551905592026389, "grad_norm": 0.89453125, "learning_rate": 0.00019989144518281558, "loss": 5.2664, "step": 439 }, { "epoch": 0.04562274397475196, "grad_norm": 0.84375, "learning_rate": 0.00019989093860202885, "loss": 5.2437, "step": 440 }, { "epoch": 0.04572643202924003, "grad_norm": 0.63671875, "learning_rate": 0.00019989043084263464, "loss": 5.2767, "step": 441 }, { "epoch": 0.0458301200837281, "grad_norm": 0.79296875, "learning_rate": 0.00019988992190463894, "loss": 5.2763, "step": 442 }, { "epoch": 0.045933808138216174, "grad_norm": 0.9375, "learning_rate": 0.00019988941178804775, "loss": 5.24, "step": 443 }, { "epoch": 0.04603749619270425, "grad_norm": 1.0625, "learning_rate": 0.00019988890049286705, "loss": 5.2116, "step": 444 }, { "epoch": 0.046141184247192324, "grad_norm": 1.34375, "learning_rate": 0.00019988838801910297, "loss": 5.269, "step": 445 }, { "epoch": 0.046244872301680395, "grad_norm": 0.63671875, "learning_rate": 0.00019988787436676147, "loss": 5.2098, "step": 446 }, { "epoch": 0.04634856035616847, "grad_norm": 0.765625, "learning_rate": 0.00019988735953584862, "loss": 5.2502, "step": 447 }, { "epoch": 0.04645224841065654, "grad_norm": 1.203125, "learning_rate": 0.00019988684352637056, "loss": 5.2842, "step": 448 }, { "epoch": 0.04655593646514461, "grad_norm": 1.015625, "learning_rate": 0.0001998863263383333, "loss": 5.2292, "step": 449 }, { "epoch": 0.04665962451963269, "grad_norm": 1.1953125, "learning_rate": 0.00019988580797174297, "loss": 5.2584, "step": 450 }, { "epoch": 0.04676331257412076, "grad_norm": 0.69140625, "learning_rate": 0.0001998852884266057, "loss": 5.1916, "step": 451 }, { "epoch": 0.04686700062860883, "grad_norm": 0.79296875, "learning_rate": 0.00019988476770292762, "loss": 5.2374, "step": 452 }, { "epoch": 0.0469706886830969, "grad_norm": 1.21875, "learning_rate": 0.00019988424580071485, "loss": 5.2478, "step": 453 }, { "epoch": 0.04707437673758497, "grad_norm": 0.75, "learning_rate": 0.00019988372271997356, "loss": 5.2032, "step": 454 }, { "epoch": 0.04717806479207305, "grad_norm": 0.7265625, "learning_rate": 0.0001998831984607099, "loss": 5.2208, "step": 455 }, { "epoch": 0.04728175284656112, "grad_norm": 0.71484375, "learning_rate": 0.00019988267302293013, "loss": 5.2389, "step": 456 }, { "epoch": 0.047385440901049194, "grad_norm": 0.703125, "learning_rate": 0.00019988214640664036, "loss": 5.2707, "step": 457 }, { "epoch": 0.047489128955537266, "grad_norm": 0.796875, "learning_rate": 0.00019988161861184687, "loss": 5.221, "step": 458 }, { "epoch": 0.04759281701002534, "grad_norm": 0.515625, "learning_rate": 0.00019988108963855586, "loss": 5.1917, "step": 459 }, { "epoch": 0.04769650506451341, "grad_norm": 0.61328125, "learning_rate": 0.00019988055948677355, "loss": 5.2225, "step": 460 }, { "epoch": 0.04780019311900149, "grad_norm": 0.57421875, "learning_rate": 0.00019988002815650622, "loss": 5.1813, "step": 461 }, { "epoch": 0.04790388117348956, "grad_norm": 0.72265625, "learning_rate": 0.00019987949564776014, "loss": 5.2486, "step": 462 }, { "epoch": 0.04800756922797763, "grad_norm": 0.83984375, "learning_rate": 0.0001998789619605416, "loss": 5.2211, "step": 463 }, { "epoch": 0.0481112572824657, "grad_norm": 0.81640625, "learning_rate": 0.00019987842709485686, "loss": 5.2175, "step": 464 }, { "epoch": 0.04821494533695377, "grad_norm": 0.6796875, "learning_rate": 0.0001998778910507123, "loss": 5.2004, "step": 465 }, { "epoch": 0.048318633391441844, "grad_norm": 0.51953125, "learning_rate": 0.00019987735382811416, "loss": 5.2169, "step": 466 }, { "epoch": 0.04842232144592992, "grad_norm": 0.45703125, "learning_rate": 0.0001998768154270688, "loss": 5.165, "step": 467 }, { "epoch": 0.048526009500417994, "grad_norm": 0.53515625, "learning_rate": 0.00019987627584758263, "loss": 5.2305, "step": 468 }, { "epoch": 0.048629697554906065, "grad_norm": 0.439453125, "learning_rate": 0.00019987573508966199, "loss": 5.2377, "step": 469 }, { "epoch": 0.048733385609394136, "grad_norm": 0.5234375, "learning_rate": 0.00019987519315331324, "loss": 5.2023, "step": 470 }, { "epoch": 0.04883707366388221, "grad_norm": 0.49609375, "learning_rate": 0.00019987465003854275, "loss": 5.2201, "step": 471 }, { "epoch": 0.048940761718370286, "grad_norm": 0.59765625, "learning_rate": 0.000199874105745357, "loss": 5.1884, "step": 472 }, { "epoch": 0.04904444977285836, "grad_norm": 0.4609375, "learning_rate": 0.00019987356027376238, "loss": 5.1698, "step": 473 }, { "epoch": 0.04914813782734643, "grad_norm": 0.46875, "learning_rate": 0.0001998730136237653, "loss": 5.2097, "step": 474 }, { "epoch": 0.0492518258818345, "grad_norm": 0.609375, "learning_rate": 0.00019987246579537222, "loss": 5.2154, "step": 475 }, { "epoch": 0.04935551393632257, "grad_norm": 0.6328125, "learning_rate": 0.00019987191678858964, "loss": 5.1997, "step": 476 }, { "epoch": 0.04945920199081064, "grad_norm": 0.62890625, "learning_rate": 0.00019987136660342398, "loss": 5.2161, "step": 477 }, { "epoch": 0.04956289004529872, "grad_norm": 0.6875, "learning_rate": 0.00019987081523988178, "loss": 5.2113, "step": 478 }, { "epoch": 0.04966657809978679, "grad_norm": 0.68359375, "learning_rate": 0.00019987026269796952, "loss": 5.1944, "step": 479 }, { "epoch": 0.049770266154274864, "grad_norm": 0.57421875, "learning_rate": 0.00019986970897769375, "loss": 5.1795, "step": 480 }, { "epoch": 0.049873954208762936, "grad_norm": 0.64453125, "learning_rate": 0.00019986915407906096, "loss": 5.2348, "step": 481 }, { "epoch": 0.04997764226325101, "grad_norm": 0.83203125, "learning_rate": 0.00019986859800207772, "loss": 5.1926, "step": 482 }, { "epoch": 0.050081330317739085, "grad_norm": 0.87890625, "learning_rate": 0.00019986804074675058, "loss": 5.2065, "step": 483 }, { "epoch": 0.05018501837222716, "grad_norm": 0.76953125, "learning_rate": 0.00019986748231308615, "loss": 5.1956, "step": 484 }, { "epoch": 0.05028870642671523, "grad_norm": 0.66796875, "learning_rate": 0.00019986692270109098, "loss": 5.1497, "step": 485 }, { "epoch": 0.0503923944812033, "grad_norm": 0.6796875, "learning_rate": 0.00019986636191077168, "loss": 5.1756, "step": 486 }, { "epoch": 0.05049608253569137, "grad_norm": 0.76953125, "learning_rate": 0.00019986579994213486, "loss": 5.2124, "step": 487 }, { "epoch": 0.05059977059017944, "grad_norm": 0.9375, "learning_rate": 0.00019986523679518722, "loss": 5.1792, "step": 488 }, { "epoch": 0.05070345864466752, "grad_norm": 1.09375, "learning_rate": 0.00019986467246993527, "loss": 5.1996, "step": 489 }, { "epoch": 0.05080714669915559, "grad_norm": 0.99609375, "learning_rate": 0.0001998641069663858, "loss": 5.2116, "step": 490 }, { "epoch": 0.05091083475364366, "grad_norm": 1.1875, "learning_rate": 0.00019986354028454542, "loss": 5.1607, "step": 491 }, { "epoch": 0.051014522808131735, "grad_norm": 0.8828125, "learning_rate": 0.0001998629724244208, "loss": 5.1851, "step": 492 }, { "epoch": 0.051118210862619806, "grad_norm": 0.890625, "learning_rate": 0.00019986240338601869, "loss": 5.196, "step": 493 }, { "epoch": 0.05122189891710788, "grad_norm": 1.1953125, "learning_rate": 0.00019986183316934576, "loss": 5.1985, "step": 494 }, { "epoch": 0.051325586971595956, "grad_norm": 0.96484375, "learning_rate": 0.0001998612617744088, "loss": 5.1647, "step": 495 }, { "epoch": 0.05142927502608403, "grad_norm": 0.97265625, "learning_rate": 0.0001998606892012145, "loss": 5.205, "step": 496 }, { "epoch": 0.0515329630805721, "grad_norm": 1.1171875, "learning_rate": 0.00019986011544976956, "loss": 5.2331, "step": 497 }, { "epoch": 0.05163665113506017, "grad_norm": 0.87890625, "learning_rate": 0.00019985954052008085, "loss": 5.1625, "step": 498 }, { "epoch": 0.05174033918954824, "grad_norm": 0.7578125, "learning_rate": 0.00019985896441215514, "loss": 5.1805, "step": 499 }, { "epoch": 0.05184402724403632, "grad_norm": 0.7421875, "learning_rate": 0.0001998583871259992, "loss": 5.1823, "step": 500 }, { "epoch": 0.05194771529852439, "grad_norm": 0.71484375, "learning_rate": 0.00019985780866161985, "loss": 5.1178, "step": 501 }, { "epoch": 0.05205140335301246, "grad_norm": 0.76953125, "learning_rate": 0.00019985722901902389, "loss": 5.1614, "step": 502 }, { "epoch": 0.052155091407500534, "grad_norm": 0.78515625, "learning_rate": 0.0001998566481982182, "loss": 5.178, "step": 503 }, { "epoch": 0.052258779461988605, "grad_norm": 0.80078125, "learning_rate": 0.0001998560661992096, "loss": 5.1482, "step": 504 }, { "epoch": 0.05236246751647668, "grad_norm": 0.74609375, "learning_rate": 0.00019985548302200497, "loss": 5.1702, "step": 505 }, { "epoch": 0.052466155570964755, "grad_norm": 0.78515625, "learning_rate": 0.0001998548986666112, "loss": 5.165, "step": 506 }, { "epoch": 0.052569843625452826, "grad_norm": 0.7421875, "learning_rate": 0.00019985431313303517, "loss": 5.1466, "step": 507 }, { "epoch": 0.0526735316799409, "grad_norm": 0.5546875, "learning_rate": 0.00019985372642128383, "loss": 5.183, "step": 508 }, { "epoch": 0.05277721973442897, "grad_norm": 0.6328125, "learning_rate": 0.00019985313853136403, "loss": 5.1417, "step": 509 }, { "epoch": 0.05288090778891704, "grad_norm": 0.69140625, "learning_rate": 0.00019985254946328274, "loss": 5.1574, "step": 510 }, { "epoch": 0.05298459584340512, "grad_norm": 0.625, "learning_rate": 0.00019985195921704696, "loss": 5.1722, "step": 511 }, { "epoch": 0.05308828389789319, "grad_norm": 0.58203125, "learning_rate": 0.0001998513677926636, "loss": 5.163, "step": 512 }, { "epoch": 0.05319197195238126, "grad_norm": 0.69140625, "learning_rate": 0.0001998507751901396, "loss": 5.1497, "step": 513 }, { "epoch": 0.05329566000686933, "grad_norm": 0.69140625, "learning_rate": 0.000199850181409482, "loss": 5.1216, "step": 514 }, { "epoch": 0.053399348061357405, "grad_norm": 0.8359375, "learning_rate": 0.00019984958645069786, "loss": 5.1793, "step": 515 }, { "epoch": 0.053503036115845476, "grad_norm": 0.9453125, "learning_rate": 0.0001998489903137941, "loss": 5.1748, "step": 516 }, { "epoch": 0.053606724170333554, "grad_norm": 1.1484375, "learning_rate": 0.0001998483929987778, "loss": 5.1428, "step": 517 }, { "epoch": 0.053710412224821626, "grad_norm": 0.875, "learning_rate": 0.00019984779450565605, "loss": 5.1818, "step": 518 }, { "epoch": 0.0538141002793097, "grad_norm": 0.77734375, "learning_rate": 0.00019984719483443587, "loss": 5.1619, "step": 519 }, { "epoch": 0.05391778833379777, "grad_norm": 0.796875, "learning_rate": 0.0001998465939851243, "loss": 5.1613, "step": 520 }, { "epoch": 0.05402147638828584, "grad_norm": 0.82421875, "learning_rate": 0.00019984599195772845, "loss": 5.1229, "step": 521 }, { "epoch": 0.05412516444277392, "grad_norm": 0.9453125, "learning_rate": 0.00019984538875225547, "loss": 5.1587, "step": 522 }, { "epoch": 0.05422885249726199, "grad_norm": 1.21875, "learning_rate": 0.00019984478436871244, "loss": 5.163, "step": 523 }, { "epoch": 0.05433254055175006, "grad_norm": 0.86328125, "learning_rate": 0.00019984417880710646, "loss": 5.1997, "step": 524 }, { "epoch": 0.05443622860623813, "grad_norm": 0.85546875, "learning_rate": 0.00019984357206744474, "loss": 5.1593, "step": 525 }, { "epoch": 0.054539916660726204, "grad_norm": 0.81640625, "learning_rate": 0.0001998429641497344, "loss": 5.1307, "step": 526 }, { "epoch": 0.054643604715214275, "grad_norm": 0.6015625, "learning_rate": 0.00019984235505398262, "loss": 5.0949, "step": 527 }, { "epoch": 0.05474729276970235, "grad_norm": 0.63671875, "learning_rate": 0.0001998417447801966, "loss": 5.1336, "step": 528 }, { "epoch": 0.054850980824190425, "grad_norm": 0.63671875, "learning_rate": 0.00019984113332838352, "loss": 5.1222, "step": 529 }, { "epoch": 0.054954668878678496, "grad_norm": 0.7578125, "learning_rate": 0.0001998405206985506, "loss": 5.1132, "step": 530 }, { "epoch": 0.05505835693316657, "grad_norm": 0.77734375, "learning_rate": 0.00019983990689070508, "loss": 5.1254, "step": 531 }, { "epoch": 0.05516204498765464, "grad_norm": 0.671875, "learning_rate": 0.00019983929190485423, "loss": 5.1434, "step": 532 }, { "epoch": 0.05526573304214271, "grad_norm": 0.67578125, "learning_rate": 0.0001998386757410052, "loss": 5.0891, "step": 533 }, { "epoch": 0.05536942109663079, "grad_norm": 0.69921875, "learning_rate": 0.0001998380583991654, "loss": 5.144, "step": 534 }, { "epoch": 0.05547310915111886, "grad_norm": 0.71484375, "learning_rate": 0.00019983743987934198, "loss": 5.1256, "step": 535 }, { "epoch": 0.05557679720560693, "grad_norm": 0.546875, "learning_rate": 0.00019983682018154234, "loss": 5.1397, "step": 536 }, { "epoch": 0.055680485260095, "grad_norm": 0.58984375, "learning_rate": 0.00019983619930577374, "loss": 5.1786, "step": 537 }, { "epoch": 0.055784173314583074, "grad_norm": 0.478515625, "learning_rate": 0.00019983557725204352, "loss": 5.1161, "step": 538 }, { "epoch": 0.05588786136907115, "grad_norm": 0.54296875, "learning_rate": 0.00019983495402035902, "loss": 5.1185, "step": 539 }, { "epoch": 0.055991549423559224, "grad_norm": 0.546875, "learning_rate": 0.0001998343296107276, "loss": 5.1027, "step": 540 }, { "epoch": 0.056095237478047295, "grad_norm": 0.50390625, "learning_rate": 0.0001998337040231566, "loss": 5.1329, "step": 541 }, { "epoch": 0.05619892553253537, "grad_norm": 0.49609375, "learning_rate": 0.00019983307725765346, "loss": 5.15, "step": 542 }, { "epoch": 0.05630261358702344, "grad_norm": 0.625, "learning_rate": 0.0001998324493142255, "loss": 5.1271, "step": 543 }, { "epoch": 0.05640630164151151, "grad_norm": 0.703125, "learning_rate": 0.00019983182019288017, "loss": 5.1476, "step": 544 }, { "epoch": 0.05650998969599959, "grad_norm": 0.734375, "learning_rate": 0.0001998311898936249, "loss": 5.0857, "step": 545 }, { "epoch": 0.05661367775048766, "grad_norm": 0.578125, "learning_rate": 0.0001998305584164671, "loss": 5.1089, "step": 546 }, { "epoch": 0.05671736580497573, "grad_norm": 0.482421875, "learning_rate": 0.00019982992576141425, "loss": 5.1258, "step": 547 }, { "epoch": 0.0568210538594638, "grad_norm": 0.59765625, "learning_rate": 0.0001998292919284738, "loss": 5.1405, "step": 548 }, { "epoch": 0.05692474191395187, "grad_norm": 0.59765625, "learning_rate": 0.00019982865691765323, "loss": 5.1548, "step": 549 }, { "epoch": 0.05702842996843995, "grad_norm": 0.71875, "learning_rate": 0.00019982802072896004, "loss": 5.0671, "step": 550 }, { "epoch": 0.05713211802292802, "grad_norm": 0.66796875, "learning_rate": 0.00019982738336240172, "loss": 5.1215, "step": 551 }, { "epoch": 0.057235806077416095, "grad_norm": 0.55859375, "learning_rate": 0.0001998267448179858, "loss": 5.0941, "step": 552 }, { "epoch": 0.057339494131904166, "grad_norm": 0.6015625, "learning_rate": 0.00019982610509571979, "loss": 5.1049, "step": 553 }, { "epoch": 0.05744318218639224, "grad_norm": 0.58203125, "learning_rate": 0.0001998254641956113, "loss": 5.1052, "step": 554 }, { "epoch": 0.05754687024088031, "grad_norm": 0.67578125, "learning_rate": 0.0001998248221176678, "loss": 5.0879, "step": 555 }, { "epoch": 0.05765055829536839, "grad_norm": 0.6796875, "learning_rate": 0.00019982417886189698, "loss": 5.1172, "step": 556 }, { "epoch": 0.05775424634985646, "grad_norm": 0.5859375, "learning_rate": 0.00019982353442830634, "loss": 5.0876, "step": 557 }, { "epoch": 0.05785793440434453, "grad_norm": 0.515625, "learning_rate": 0.00019982288881690349, "loss": 5.1051, "step": 558 }, { "epoch": 0.0579616224588326, "grad_norm": 0.59765625, "learning_rate": 0.00019982224202769611, "loss": 5.0873, "step": 559 }, { "epoch": 0.05806531051332067, "grad_norm": 0.66015625, "learning_rate": 0.00019982159406069176, "loss": 5.0969, "step": 560 }, { "epoch": 0.058168998567808744, "grad_norm": 0.6328125, "learning_rate": 0.00019982094491589813, "loss": 5.1099, "step": 561 }, { "epoch": 0.05827268662229682, "grad_norm": 0.6328125, "learning_rate": 0.00019982029459332287, "loss": 5.0659, "step": 562 }, { "epoch": 0.058376374676784894, "grad_norm": 0.5625, "learning_rate": 0.00019981964309297363, "loss": 5.0939, "step": 563 }, { "epoch": 0.058480062731272965, "grad_norm": 0.5859375, "learning_rate": 0.00019981899041485813, "loss": 5.1099, "step": 564 }, { "epoch": 0.058583750785761037, "grad_norm": 0.6484375, "learning_rate": 0.00019981833655898404, "loss": 5.1289, "step": 565 }, { "epoch": 0.05868743884024911, "grad_norm": 0.56640625, "learning_rate": 0.00019981768152535913, "loss": 5.0745, "step": 566 }, { "epoch": 0.058791126894737186, "grad_norm": 0.59765625, "learning_rate": 0.00019981702531399106, "loss": 5.1127, "step": 567 }, { "epoch": 0.05889481494922526, "grad_norm": 0.671875, "learning_rate": 0.0001998163679248876, "loss": 5.0662, "step": 568 }, { "epoch": 0.05899850300371333, "grad_norm": 0.73046875, "learning_rate": 0.0001998157093580565, "loss": 5.109, "step": 569 }, { "epoch": 0.0591021910582014, "grad_norm": 0.95703125, "learning_rate": 0.00019981504961350558, "loss": 5.0697, "step": 570 }, { "epoch": 0.05920587911268947, "grad_norm": 1.1484375, "learning_rate": 0.00019981438869124256, "loss": 5.0217, "step": 571 }, { "epoch": 0.05930956716717754, "grad_norm": 0.765625, "learning_rate": 0.00019981372659127523, "loss": 5.0847, "step": 572 }, { "epoch": 0.05941325522166562, "grad_norm": 0.5078125, "learning_rate": 0.00019981306331361148, "loss": 5.0614, "step": 573 }, { "epoch": 0.05951694327615369, "grad_norm": 0.74609375, "learning_rate": 0.00019981239885825906, "loss": 5.0795, "step": 574 }, { "epoch": 0.059620631330641764, "grad_norm": 0.7734375, "learning_rate": 0.00019981173322522586, "loss": 5.0716, "step": 575 }, { "epoch": 0.059724319385129836, "grad_norm": 0.62109375, "learning_rate": 0.00019981106641451973, "loss": 5.0803, "step": 576 }, { "epoch": 0.05982800743961791, "grad_norm": 0.59765625, "learning_rate": 0.0001998103984261485, "loss": 5.0795, "step": 577 }, { "epoch": 0.059931695494105985, "grad_norm": 0.62109375, "learning_rate": 0.00019980972926012005, "loss": 5.0748, "step": 578 }, { "epoch": 0.06003538354859406, "grad_norm": 0.6328125, "learning_rate": 0.0001998090589164423, "loss": 5.0885, "step": 579 }, { "epoch": 0.06013907160308213, "grad_norm": 0.671875, "learning_rate": 0.0001998083873951232, "loss": 5.0466, "step": 580 }, { "epoch": 0.0602427596575702, "grad_norm": 0.65625, "learning_rate": 0.00019980771469617058, "loss": 5.0745, "step": 581 }, { "epoch": 0.06034644771205827, "grad_norm": 0.625, "learning_rate": 0.00019980704081959248, "loss": 5.0909, "step": 582 }, { "epoch": 0.06045013576654634, "grad_norm": 0.640625, "learning_rate": 0.00019980636576539678, "loss": 5.017, "step": 583 }, { "epoch": 0.06055382382103442, "grad_norm": 0.63671875, "learning_rate": 0.00019980568953359144, "loss": 5.1101, "step": 584 }, { "epoch": 0.06065751187552249, "grad_norm": 0.859375, "learning_rate": 0.00019980501212418447, "loss": 5.0365, "step": 585 }, { "epoch": 0.06076119993001056, "grad_norm": 1.3515625, "learning_rate": 0.0001998043335371839, "loss": 5.07, "step": 586 }, { "epoch": 0.060864887984498635, "grad_norm": 0.7734375, "learning_rate": 0.00019980365377259763, "loss": 5.0933, "step": 587 }, { "epoch": 0.060968576038986706, "grad_norm": 0.921875, "learning_rate": 0.00019980297283043379, "loss": 5.1316, "step": 588 }, { "epoch": 0.061072264093474785, "grad_norm": 1.3359375, "learning_rate": 0.00019980229071070037, "loss": 5.0325, "step": 589 }, { "epoch": 0.061175952147962856, "grad_norm": 0.9375, "learning_rate": 0.00019980160741340537, "loss": 5.1035, "step": 590 }, { "epoch": 0.06127964020245093, "grad_norm": 1.6328125, "learning_rate": 0.0001998009229385569, "loss": 5.0554, "step": 591 }, { "epoch": 0.061383328256939, "grad_norm": 0.6484375, "learning_rate": 0.00019980023728616305, "loss": 5.0741, "step": 592 }, { "epoch": 0.06148701631142707, "grad_norm": 2.171875, "learning_rate": 0.0001997995504562319, "loss": 5.083, "step": 593 }, { "epoch": 0.06159070436591514, "grad_norm": 1.296875, "learning_rate": 0.00019979886244877158, "loss": 5.0724, "step": 594 }, { "epoch": 0.06169439242040322, "grad_norm": 3.359375, "learning_rate": 0.00019979817326379012, "loss": 5.1534, "step": 595 }, { "epoch": 0.06179808047489129, "grad_norm": 3.109375, "learning_rate": 0.00019979748290129573, "loss": 5.1388, "step": 596 }, { "epoch": 0.06190176852937936, "grad_norm": 1.3984375, "learning_rate": 0.00019979679136129653, "loss": 5.0704, "step": 597 }, { "epoch": 0.062005456583867434, "grad_norm": 2.40625, "learning_rate": 0.00019979609864380067, "loss": 5.1138, "step": 598 }, { "epoch": 0.062109144638355505, "grad_norm": 2.265625, "learning_rate": 0.00019979540474881634, "loss": 5.1004, "step": 599 }, { "epoch": 0.06221283269284358, "grad_norm": 1.2890625, "learning_rate": 0.00019979470967635172, "loss": 5.0521, "step": 600 }, { "epoch": 0.062316520747331655, "grad_norm": 1.796875, "learning_rate": 0.00019979401342641503, "loss": 5.0985, "step": 601 }, { "epoch": 0.062420208801819727, "grad_norm": 1.5, "learning_rate": 0.00019979331599901445, "loss": 5.0998, "step": 602 }, { "epoch": 0.0625238968563078, "grad_norm": 1.078125, "learning_rate": 0.00019979261739415825, "loss": 5.1008, "step": 603 }, { "epoch": 0.06262758491079587, "grad_norm": 2.265625, "learning_rate": 0.00019979191761185466, "loss": 5.088, "step": 604 }, { "epoch": 0.06273127296528394, "grad_norm": 1.5625, "learning_rate": 0.00019979121665211186, "loss": 5.1075, "step": 605 }, { "epoch": 0.06283496101977201, "grad_norm": 2.25, "learning_rate": 0.00019979051451493826, "loss": 5.1131, "step": 606 }, { "epoch": 0.06293864907426008, "grad_norm": 1.375, "learning_rate": 0.00019978981120034203, "loss": 5.0863, "step": 607 }, { "epoch": 0.06304233712874815, "grad_norm": 2.875, "learning_rate": 0.0001997891067083315, "loss": 5.126, "step": 608 }, { "epoch": 0.06314602518323624, "grad_norm": 2.453125, "learning_rate": 0.00019978840103891505, "loss": 5.1418, "step": 609 }, { "epoch": 0.06324971323772431, "grad_norm": 1.484375, "learning_rate": 0.0001997876941921009, "loss": 5.0454, "step": 610 }, { "epoch": 0.06335340129221238, "grad_norm": 1.421875, "learning_rate": 0.00019978698616789745, "loss": 5.0991, "step": 611 }, { "epoch": 0.06345708934670045, "grad_norm": 1.4296875, "learning_rate": 0.00019978627696631306, "loss": 5.0266, "step": 612 }, { "epoch": 0.06356077740118853, "grad_norm": 1.3203125, "learning_rate": 0.00019978556658735606, "loss": 5.096, "step": 613 }, { "epoch": 0.0636644654556766, "grad_norm": 1.140625, "learning_rate": 0.00019978485503103485, "loss": 5.0584, "step": 614 }, { "epoch": 0.06376815351016467, "grad_norm": 1.3125, "learning_rate": 0.0001997841422973578, "loss": 5.092, "step": 615 }, { "epoch": 0.06387184156465274, "grad_norm": 1.1484375, "learning_rate": 0.00019978342838633344, "loss": 5.1038, "step": 616 }, { "epoch": 0.06397552961914081, "grad_norm": 1.1015625, "learning_rate": 0.00019978271329797003, "loss": 5.0346, "step": 617 }, { "epoch": 0.06407921767362888, "grad_norm": 0.9921875, "learning_rate": 0.00019978199703227608, "loss": 5.0628, "step": 618 }, { "epoch": 0.06418290572811695, "grad_norm": 0.96484375, "learning_rate": 0.00019978127958926006, "loss": 5.1023, "step": 619 }, { "epoch": 0.06428659378260504, "grad_norm": 0.89453125, "learning_rate": 0.00019978056096893042, "loss": 5.0591, "step": 620 }, { "epoch": 0.06439028183709311, "grad_norm": 0.98046875, "learning_rate": 0.0001997798411712956, "loss": 5.0737, "step": 621 }, { "epoch": 0.06449396989158118, "grad_norm": 0.84765625, "learning_rate": 0.00019977912019636415, "loss": 5.0839, "step": 622 }, { "epoch": 0.06459765794606925, "grad_norm": 0.7421875, "learning_rate": 0.00019977839804414456, "loss": 5.0516, "step": 623 }, { "epoch": 0.06470134600055732, "grad_norm": 0.76171875, "learning_rate": 0.00019977767471464531, "loss": 5.0687, "step": 624 }, { "epoch": 0.0648050340550454, "grad_norm": 0.671875, "learning_rate": 0.00019977695020787498, "loss": 5.0761, "step": 625 }, { "epoch": 0.06490872210953347, "grad_norm": 0.76171875, "learning_rate": 0.00019977622452384212, "loss": 5.0706, "step": 626 }, { "epoch": 0.06501241016402154, "grad_norm": 0.609375, "learning_rate": 0.00019977549766255528, "loss": 5.0099, "step": 627 }, { "epoch": 0.06511609821850961, "grad_norm": 0.66015625, "learning_rate": 0.00019977476962402304, "loss": 5.0794, "step": 628 }, { "epoch": 0.06521978627299768, "grad_norm": 0.69140625, "learning_rate": 0.00019977404040825395, "loss": 5.0676, "step": 629 }, { "epoch": 0.06532347432748575, "grad_norm": 0.7265625, "learning_rate": 0.0001997733100152567, "loss": 5.0754, "step": 630 }, { "epoch": 0.06542716238197384, "grad_norm": 0.66015625, "learning_rate": 0.0001997725784450398, "loss": 5.0106, "step": 631 }, { "epoch": 0.06553085043646191, "grad_norm": 0.474609375, "learning_rate": 0.000199771845697612, "loss": 5.0163, "step": 632 }, { "epoch": 0.06563453849094998, "grad_norm": 0.64453125, "learning_rate": 0.00019977111177298183, "loss": 5.0573, "step": 633 }, { "epoch": 0.06573822654543805, "grad_norm": 0.53125, "learning_rate": 0.00019977037667115802, "loss": 5.0315, "step": 634 }, { "epoch": 0.06584191459992612, "grad_norm": 0.5078125, "learning_rate": 0.00019976964039214923, "loss": 5.0168, "step": 635 }, { "epoch": 0.0659456026544142, "grad_norm": 0.5078125, "learning_rate": 0.00019976890293596416, "loss": 5.0454, "step": 636 }, { "epoch": 0.06604929070890227, "grad_norm": 0.5546875, "learning_rate": 0.00019976816430261146, "loss": 5.0578, "step": 637 }, { "epoch": 0.06615297876339034, "grad_norm": 0.498046875, "learning_rate": 0.00019976742449209992, "loss": 5.0527, "step": 638 }, { "epoch": 0.06625666681787841, "grad_norm": 0.46484375, "learning_rate": 0.0001997666835044382, "loss": 5.083, "step": 639 }, { "epoch": 0.06636035487236648, "grad_norm": 0.6171875, "learning_rate": 0.00019976594133963512, "loss": 5.0622, "step": 640 }, { "epoch": 0.06646404292685455, "grad_norm": 0.46484375, "learning_rate": 0.00019976519799769931, "loss": 5.0684, "step": 641 }, { "epoch": 0.06656773098134262, "grad_norm": 0.5703125, "learning_rate": 0.00019976445347863968, "loss": 5.0087, "step": 642 }, { "epoch": 0.06667141903583071, "grad_norm": 0.546875, "learning_rate": 0.00019976370778246495, "loss": 5.0554, "step": 643 }, { "epoch": 0.06677510709031878, "grad_norm": 0.44140625, "learning_rate": 0.0001997629609091839, "loss": 5.0249, "step": 644 }, { "epoch": 0.06687879514480685, "grad_norm": 0.470703125, "learning_rate": 0.0001997622128588054, "loss": 5.0804, "step": 645 }, { "epoch": 0.06698248319929492, "grad_norm": 0.47265625, "learning_rate": 0.0001997614636313382, "loss": 5.0207, "step": 646 }, { "epoch": 0.067086171253783, "grad_norm": 0.52734375, "learning_rate": 0.0001997607132267912, "loss": 5.0279, "step": 647 }, { "epoch": 0.06718985930827107, "grad_norm": 0.4453125, "learning_rate": 0.00019975996164517325, "loss": 5.062, "step": 648 }, { "epoch": 0.06729354736275914, "grad_norm": 0.50390625, "learning_rate": 0.00019975920888649318, "loss": 4.9891, "step": 649 }, { "epoch": 0.06739723541724721, "grad_norm": 0.443359375, "learning_rate": 0.00019975845495075992, "loss": 5.0609, "step": 650 }, { "epoch": 0.06750092347173528, "grad_norm": 0.5078125, "learning_rate": 0.0001997576998379823, "loss": 5.0621, "step": 651 }, { "epoch": 0.06760461152622335, "grad_norm": 0.419921875, "learning_rate": 0.0001997569435481693, "loss": 4.9655, "step": 652 }, { "epoch": 0.06770829958071142, "grad_norm": 0.466796875, "learning_rate": 0.00019975618608132983, "loss": 5.0336, "step": 653 }, { "epoch": 0.06781198763519951, "grad_norm": 0.5, "learning_rate": 0.0001997554274374728, "loss": 5.0162, "step": 654 }, { "epoch": 0.06791567568968758, "grad_norm": 0.490234375, "learning_rate": 0.00019975466761660714, "loss": 4.9897, "step": 655 }, { "epoch": 0.06801936374417565, "grad_norm": 0.6484375, "learning_rate": 0.00019975390661874188, "loss": 4.9694, "step": 656 }, { "epoch": 0.06812305179866372, "grad_norm": 0.6640625, "learning_rate": 0.00019975314444388597, "loss": 5.0425, "step": 657 }, { "epoch": 0.0682267398531518, "grad_norm": 0.5234375, "learning_rate": 0.00019975238109204836, "loss": 5.0513, "step": 658 }, { "epoch": 0.06833042790763987, "grad_norm": 0.50390625, "learning_rate": 0.00019975161656323812, "loss": 5.0141, "step": 659 }, { "epoch": 0.06843411596212794, "grad_norm": 0.5625, "learning_rate": 0.00019975085085746427, "loss": 5.0405, "step": 660 }, { "epoch": 0.06853780401661601, "grad_norm": 0.51171875, "learning_rate": 0.00019975008397473578, "loss": 5.0309, "step": 661 }, { "epoch": 0.06864149207110408, "grad_norm": 0.44921875, "learning_rate": 0.00019974931591506176, "loss": 5.0045, "step": 662 }, { "epoch": 0.06874518012559215, "grad_norm": 0.58203125, "learning_rate": 0.00019974854667845126, "loss": 5.0045, "step": 663 }, { "epoch": 0.06884886818008022, "grad_norm": 0.55859375, "learning_rate": 0.00019974777626491334, "loss": 5.0137, "step": 664 }, { "epoch": 0.06895255623456831, "grad_norm": 0.439453125, "learning_rate": 0.0001997470046744571, "loss": 5.0363, "step": 665 }, { "epoch": 0.06905624428905638, "grad_norm": 0.55078125, "learning_rate": 0.00019974623190709164, "loss": 5.052, "step": 666 }, { "epoch": 0.06915993234354445, "grad_norm": 0.6015625, "learning_rate": 0.00019974545796282606, "loss": 5.0489, "step": 667 }, { "epoch": 0.06926362039803252, "grad_norm": 0.51171875, "learning_rate": 0.00019974468284166954, "loss": 5.0173, "step": 668 }, { "epoch": 0.06936730845252059, "grad_norm": 0.5234375, "learning_rate": 0.0001997439065436312, "loss": 4.9908, "step": 669 }, { "epoch": 0.06947099650700866, "grad_norm": 0.68359375, "learning_rate": 0.00019974312906872018, "loss": 5.024, "step": 670 }, { "epoch": 0.06957468456149674, "grad_norm": 0.66796875, "learning_rate": 0.00019974235041694566, "loss": 5.0214, "step": 671 }, { "epoch": 0.06967837261598481, "grad_norm": 0.5625, "learning_rate": 0.00019974157058831685, "loss": 5.0328, "step": 672 }, { "epoch": 0.06978206067047288, "grad_norm": 0.51171875, "learning_rate": 0.00019974078958284294, "loss": 4.9868, "step": 673 }, { "epoch": 0.06988574872496095, "grad_norm": 0.67578125, "learning_rate": 0.00019974000740053316, "loss": 4.9927, "step": 674 }, { "epoch": 0.06998943677944902, "grad_norm": 0.6328125, "learning_rate": 0.0001997392240413967, "loss": 5.0442, "step": 675 }, { "epoch": 0.0700931248339371, "grad_norm": 0.53515625, "learning_rate": 0.0001997384395054428, "loss": 5.0179, "step": 676 }, { "epoch": 0.07019681288842518, "grad_norm": 0.58203125, "learning_rate": 0.00019973765379268082, "loss": 5.0022, "step": 677 }, { "epoch": 0.07030050094291325, "grad_norm": 0.7109375, "learning_rate": 0.00019973686690311987, "loss": 5.0517, "step": 678 }, { "epoch": 0.07040418899740132, "grad_norm": 0.458984375, "learning_rate": 0.00019973607883676936, "loss": 4.9883, "step": 679 }, { "epoch": 0.07050787705188939, "grad_norm": 0.546875, "learning_rate": 0.00019973528959363855, "loss": 4.9936, "step": 680 }, { "epoch": 0.07061156510637746, "grad_norm": 0.62890625, "learning_rate": 0.00019973449917373674, "loss": 5.0083, "step": 681 }, { "epoch": 0.07071525316086553, "grad_norm": 0.53515625, "learning_rate": 0.00019973370757707325, "loss": 5.0072, "step": 682 }, { "epoch": 0.0708189412153536, "grad_norm": 0.546875, "learning_rate": 0.00019973291480365743, "loss": 5.0331, "step": 683 }, { "epoch": 0.07092262926984168, "grad_norm": 0.60546875, "learning_rate": 0.00019973212085349867, "loss": 4.9926, "step": 684 }, { "epoch": 0.07102631732432975, "grad_norm": 0.67578125, "learning_rate": 0.00019973132572660628, "loss": 4.9922, "step": 685 }, { "epoch": 0.07113000537881782, "grad_norm": 0.6015625, "learning_rate": 0.00019973052942298967, "loss": 5.0278, "step": 686 }, { "epoch": 0.0712336934333059, "grad_norm": 0.6171875, "learning_rate": 0.00019972973194265823, "loss": 5.0312, "step": 687 }, { "epoch": 0.07133738148779398, "grad_norm": 0.6796875, "learning_rate": 0.00019972893328562137, "loss": 4.9927, "step": 688 }, { "epoch": 0.07144106954228205, "grad_norm": 0.48046875, "learning_rate": 0.00019972813345188852, "loss": 5.0116, "step": 689 }, { "epoch": 0.07154475759677012, "grad_norm": 0.494140625, "learning_rate": 0.00019972733244146912, "loss": 4.9578, "step": 690 }, { "epoch": 0.07164844565125819, "grad_norm": 0.58984375, "learning_rate": 0.00019972653025437261, "loss": 4.9803, "step": 691 }, { "epoch": 0.07175213370574626, "grad_norm": 0.58984375, "learning_rate": 0.00019972572689060846, "loss": 4.9453, "step": 692 }, { "epoch": 0.07185582176023433, "grad_norm": 0.474609375, "learning_rate": 0.00019972492235018616, "loss": 4.9737, "step": 693 }, { "epoch": 0.0719595098147224, "grad_norm": 0.58203125, "learning_rate": 0.00019972411663311517, "loss": 5.0081, "step": 694 }, { "epoch": 0.07206319786921048, "grad_norm": 0.7421875, "learning_rate": 0.00019972330973940503, "loss": 4.9832, "step": 695 }, { "epoch": 0.07216688592369855, "grad_norm": 0.6875, "learning_rate": 0.00019972250166906523, "loss": 5.0042, "step": 696 }, { "epoch": 0.07227057397818662, "grad_norm": 0.640625, "learning_rate": 0.0001997216924221053, "loss": 5.0134, "step": 697 }, { "epoch": 0.0723742620326747, "grad_norm": 0.6015625, "learning_rate": 0.00019972088199853488, "loss": 4.9701, "step": 698 }, { "epoch": 0.07247795008716278, "grad_norm": 0.58984375, "learning_rate": 0.0001997200703983634, "loss": 5.0043, "step": 699 }, { "epoch": 0.07258163814165085, "grad_norm": 0.5234375, "learning_rate": 0.00019971925762160054, "loss": 5.0103, "step": 700 }, { "epoch": 0.07268532619613892, "grad_norm": 0.55078125, "learning_rate": 0.0001997184436682558, "loss": 4.9916, "step": 701 }, { "epoch": 0.07278901425062699, "grad_norm": 0.59375, "learning_rate": 0.00019971762853833886, "loss": 4.9837, "step": 702 }, { "epoch": 0.07289270230511506, "grad_norm": 0.625, "learning_rate": 0.0001997168122318593, "loss": 4.9554, "step": 703 }, { "epoch": 0.07299639035960313, "grad_norm": 0.609375, "learning_rate": 0.0001997159947488268, "loss": 5.0302, "step": 704 }, { "epoch": 0.0731000784140912, "grad_norm": 0.58984375, "learning_rate": 0.00019971517608925092, "loss": 5.0219, "step": 705 }, { "epoch": 0.07320376646857928, "grad_norm": 0.63671875, "learning_rate": 0.00019971435625314139, "loss": 4.9801, "step": 706 }, { "epoch": 0.07330745452306735, "grad_norm": 0.5859375, "learning_rate": 0.00019971353524050783, "loss": 5.0094, "step": 707 }, { "epoch": 0.07341114257755542, "grad_norm": 0.6796875, "learning_rate": 0.00019971271305135998, "loss": 4.9897, "step": 708 }, { "epoch": 0.07351483063204349, "grad_norm": 0.8828125, "learning_rate": 0.00019971188968570752, "loss": 4.9852, "step": 709 }, { "epoch": 0.07361851868653158, "grad_norm": 1.2265625, "learning_rate": 0.00019971106514356018, "loss": 5.0171, "step": 710 }, { "epoch": 0.07372220674101965, "grad_norm": 1.0703125, "learning_rate": 0.00019971023942492763, "loss": 5.022, "step": 711 }, { "epoch": 0.07382589479550772, "grad_norm": 0.8828125, "learning_rate": 0.00019970941252981964, "loss": 4.9833, "step": 712 }, { "epoch": 0.07392958284999579, "grad_norm": 0.6171875, "learning_rate": 0.00019970858445824603, "loss": 5.025, "step": 713 }, { "epoch": 0.07403327090448386, "grad_norm": 0.59375, "learning_rate": 0.0001997077552102165, "loss": 4.9984, "step": 714 }, { "epoch": 0.07413695895897193, "grad_norm": 0.89453125, "learning_rate": 0.00019970692478574084, "loss": 4.9837, "step": 715 }, { "epoch": 0.07424064701346, "grad_norm": 0.98046875, "learning_rate": 0.00019970609318482887, "loss": 5.0337, "step": 716 }, { "epoch": 0.07434433506794808, "grad_norm": 1.0859375, "learning_rate": 0.0001997052604074904, "loss": 5.0083, "step": 717 }, { "epoch": 0.07444802312243615, "grad_norm": 0.9375, "learning_rate": 0.00019970442645373526, "loss": 4.99, "step": 718 }, { "epoch": 0.07455171117692422, "grad_norm": 1.0078125, "learning_rate": 0.00019970359132357327, "loss": 4.9583, "step": 719 }, { "epoch": 0.07465539923141229, "grad_norm": 1.1796875, "learning_rate": 0.0001997027550170143, "loss": 4.9729, "step": 720 }, { "epoch": 0.07475908728590037, "grad_norm": 0.6875, "learning_rate": 0.0001997019175340682, "loss": 5.0005, "step": 721 }, { "epoch": 0.07486277534038845, "grad_norm": 0.7578125, "learning_rate": 0.00019970107887474486, "loss": 4.9606, "step": 722 }, { "epoch": 0.07496646339487652, "grad_norm": 1.1796875, "learning_rate": 0.0001997002390390542, "loss": 4.9606, "step": 723 }, { "epoch": 0.07507015144936459, "grad_norm": 1.0703125, "learning_rate": 0.00019969939802700606, "loss": 4.9956, "step": 724 }, { "epoch": 0.07517383950385266, "grad_norm": 0.875, "learning_rate": 0.00019969855583861046, "loss": 5.0123, "step": 725 }, { "epoch": 0.07527752755834073, "grad_norm": 0.8671875, "learning_rate": 0.00019969771247387724, "loss": 5.0062, "step": 726 }, { "epoch": 0.0753812156128288, "grad_norm": 1.0625, "learning_rate": 0.00019969686793281643, "loss": 4.9654, "step": 727 }, { "epoch": 0.07548490366731687, "grad_norm": 1.0625, "learning_rate": 0.00019969602221543798, "loss": 5.0011, "step": 728 }, { "epoch": 0.07558859172180495, "grad_norm": 0.765625, "learning_rate": 0.00019969517532175183, "loss": 4.9748, "step": 729 }, { "epoch": 0.07569227977629302, "grad_norm": 0.8828125, "learning_rate": 0.000199694327251768, "loss": 4.9883, "step": 730 }, { "epoch": 0.07579596783078109, "grad_norm": 1.140625, "learning_rate": 0.00019969347800549646, "loss": 4.9907, "step": 731 }, { "epoch": 0.07589965588526917, "grad_norm": 0.8984375, "learning_rate": 0.0001996926275829473, "loss": 4.9906, "step": 732 }, { "epoch": 0.07600334393975725, "grad_norm": 0.6875, "learning_rate": 0.0001996917759841305, "loss": 4.9916, "step": 733 }, { "epoch": 0.07610703199424532, "grad_norm": 0.82421875, "learning_rate": 0.0001996909232090561, "loss": 4.9873, "step": 734 }, { "epoch": 0.07621072004873339, "grad_norm": 0.83984375, "learning_rate": 0.0001996900692577342, "loss": 5.0099, "step": 735 }, { "epoch": 0.07631440810322146, "grad_norm": 0.9140625, "learning_rate": 0.00019968921413017487, "loss": 5.0039, "step": 736 }, { "epoch": 0.07641809615770953, "grad_norm": 0.75, "learning_rate": 0.0001996883578263882, "loss": 4.9653, "step": 737 }, { "epoch": 0.0765217842121976, "grad_norm": 0.734375, "learning_rate": 0.00019968750034638427, "loss": 4.967, "step": 738 }, { "epoch": 0.07662547226668567, "grad_norm": 0.7109375, "learning_rate": 0.0001996866416901732, "loss": 4.9534, "step": 739 }, { "epoch": 0.07672916032117374, "grad_norm": 0.6953125, "learning_rate": 0.00019968578185776515, "loss": 4.9588, "step": 740 }, { "epoch": 0.07683284837566182, "grad_norm": 0.828125, "learning_rate": 0.0001996849208491702, "loss": 4.9746, "step": 741 }, { "epoch": 0.07693653643014989, "grad_norm": 0.859375, "learning_rate": 0.0001996840586643986, "loss": 4.9714, "step": 742 }, { "epoch": 0.07704022448463797, "grad_norm": 0.7265625, "learning_rate": 0.00019968319530346048, "loss": 4.9626, "step": 743 }, { "epoch": 0.07714391253912604, "grad_norm": 0.80078125, "learning_rate": 0.000199682330766366, "loss": 4.9415, "step": 744 }, { "epoch": 0.07724760059361412, "grad_norm": 0.84375, "learning_rate": 0.0001996814650531254, "loss": 4.9414, "step": 745 }, { "epoch": 0.07735128864810219, "grad_norm": 1.15625, "learning_rate": 0.00019968059816374888, "loss": 4.9692, "step": 746 }, { "epoch": 0.07745497670259026, "grad_norm": 0.79296875, "learning_rate": 0.00019967973009824664, "loss": 4.9797, "step": 747 }, { "epoch": 0.07755866475707833, "grad_norm": 0.734375, "learning_rate": 0.000199678860856629, "loss": 4.9844, "step": 748 }, { "epoch": 0.0776623528115664, "grad_norm": 0.90625, "learning_rate": 0.00019967799043890615, "loss": 4.982, "step": 749 }, { "epoch": 0.07776604086605447, "grad_norm": 0.82421875, "learning_rate": 0.00019967711884508839, "loss": 4.9981, "step": 750 }, { "epoch": 0.07786972892054254, "grad_norm": 0.87109375, "learning_rate": 0.00019967624607518595, "loss": 4.9928, "step": 751 }, { "epoch": 0.07797341697503062, "grad_norm": 1.078125, "learning_rate": 0.0001996753721292092, "loss": 4.9345, "step": 752 }, { "epoch": 0.07807710502951869, "grad_norm": 0.765625, "learning_rate": 0.0001996744970071684, "loss": 4.9615, "step": 753 }, { "epoch": 0.07818079308400677, "grad_norm": 0.84375, "learning_rate": 0.0001996736207090739, "loss": 5.0009, "step": 754 }, { "epoch": 0.07828448113849484, "grad_norm": 0.85546875, "learning_rate": 0.00019967274323493605, "loss": 4.9854, "step": 755 }, { "epoch": 0.07838816919298291, "grad_norm": 0.61328125, "learning_rate": 0.0001996718645847652, "loss": 4.9598, "step": 756 }, { "epoch": 0.07849185724747099, "grad_norm": 0.7890625, "learning_rate": 0.0001996709847585717, "loss": 4.9534, "step": 757 }, { "epoch": 0.07859554530195906, "grad_norm": 0.95703125, "learning_rate": 0.0001996701037563659, "loss": 4.9808, "step": 758 }, { "epoch": 0.07869923335644713, "grad_norm": 1.0703125, "learning_rate": 0.00019966922157815825, "loss": 4.9569, "step": 759 }, { "epoch": 0.0788029214109352, "grad_norm": 0.94140625, "learning_rate": 0.00019966833822395916, "loss": 4.9297, "step": 760 }, { "epoch": 0.07890660946542327, "grad_norm": 0.91015625, "learning_rate": 0.000199667453693779, "loss": 4.9103, "step": 761 }, { "epoch": 0.07901029751991134, "grad_norm": 0.7109375, "learning_rate": 0.00019966656798762827, "loss": 4.9669, "step": 762 }, { "epoch": 0.07911398557439941, "grad_norm": 0.60546875, "learning_rate": 0.00019966568110551736, "loss": 4.9627, "step": 763 }, { "epoch": 0.07921767362888749, "grad_norm": 0.60546875, "learning_rate": 0.0001996647930474568, "loss": 4.9544, "step": 764 }, { "epoch": 0.07932136168337557, "grad_norm": 0.7734375, "learning_rate": 0.000199663903813457, "loss": 4.9443, "step": 765 }, { "epoch": 0.07942504973786364, "grad_norm": 0.6796875, "learning_rate": 0.00019966301340352852, "loss": 4.9739, "step": 766 }, { "epoch": 0.07952873779235171, "grad_norm": 0.578125, "learning_rate": 0.00019966212181768178, "loss": 4.9593, "step": 767 }, { "epoch": 0.07963242584683979, "grad_norm": 0.77734375, "learning_rate": 0.0001996612290559274, "loss": 4.9728, "step": 768 }, { "epoch": 0.07973611390132786, "grad_norm": 0.65234375, "learning_rate": 0.00019966033511827584, "loss": 4.968, "step": 769 }, { "epoch": 0.07983980195581593, "grad_norm": 0.58203125, "learning_rate": 0.00019965944000473768, "loss": 4.9428, "step": 770 }, { "epoch": 0.079943490010304, "grad_norm": 0.6796875, "learning_rate": 0.00019965854371532346, "loss": 4.9784, "step": 771 }, { "epoch": 0.08004717806479207, "grad_norm": 0.62890625, "learning_rate": 0.00019965764625004377, "loss": 4.9521, "step": 772 }, { "epoch": 0.08015086611928014, "grad_norm": 0.5234375, "learning_rate": 0.0001996567476089092, "loss": 4.9318, "step": 773 }, { "epoch": 0.08025455417376821, "grad_norm": 0.55859375, "learning_rate": 0.00019965584779193035, "loss": 4.9945, "step": 774 }, { "epoch": 0.08035824222825629, "grad_norm": 0.55859375, "learning_rate": 0.00019965494679911782, "loss": 4.9496, "step": 775 }, { "epoch": 0.08046193028274436, "grad_norm": 0.427734375, "learning_rate": 0.0001996540446304823, "loss": 4.9248, "step": 776 }, { "epoch": 0.08056561833723244, "grad_norm": 0.625, "learning_rate": 0.00019965314128603435, "loss": 4.9327, "step": 777 }, { "epoch": 0.08066930639172051, "grad_norm": 0.5546875, "learning_rate": 0.00019965223676578472, "loss": 4.9228, "step": 778 }, { "epoch": 0.08077299444620858, "grad_norm": 0.546875, "learning_rate": 0.00019965133106974396, "loss": 4.9292, "step": 779 }, { "epoch": 0.08087668250069666, "grad_norm": 0.54296875, "learning_rate": 0.00019965042419792288, "loss": 4.944, "step": 780 }, { "epoch": 0.08098037055518473, "grad_norm": 0.51171875, "learning_rate": 0.00019964951615033215, "loss": 4.9587, "step": 781 }, { "epoch": 0.0810840586096728, "grad_norm": 0.6328125, "learning_rate": 0.0001996486069269824, "loss": 4.9611, "step": 782 }, { "epoch": 0.08118774666416087, "grad_norm": 0.61328125, "learning_rate": 0.00019964769652788448, "loss": 4.9236, "step": 783 }, { "epoch": 0.08129143471864894, "grad_norm": 0.63671875, "learning_rate": 0.00019964678495304906, "loss": 4.9575, "step": 784 }, { "epoch": 0.08139512277313701, "grad_norm": 0.55859375, "learning_rate": 0.00019964587220248686, "loss": 4.9343, "step": 785 }, { "epoch": 0.08149881082762508, "grad_norm": 0.5390625, "learning_rate": 0.00019964495827620875, "loss": 4.9315, "step": 786 }, { "epoch": 0.08160249888211316, "grad_norm": 0.640625, "learning_rate": 0.0001996440431742254, "loss": 4.9526, "step": 787 }, { "epoch": 0.08170618693660124, "grad_norm": 0.56640625, "learning_rate": 0.00019964312689654777, "loss": 4.9687, "step": 788 }, { "epoch": 0.08180987499108931, "grad_norm": 0.5390625, "learning_rate": 0.0001996422094431865, "loss": 4.9541, "step": 789 }, { "epoch": 0.08191356304557738, "grad_norm": 0.515625, "learning_rate": 0.0001996412908141525, "loss": 5.007, "step": 790 }, { "epoch": 0.08201725110006546, "grad_norm": 0.65625, "learning_rate": 0.0001996403710094566, "loss": 4.8851, "step": 791 }, { "epoch": 0.08212093915455353, "grad_norm": 0.6484375, "learning_rate": 0.00019963945002910964, "loss": 4.8957, "step": 792 }, { "epoch": 0.0822246272090416, "grad_norm": 0.6015625, "learning_rate": 0.0001996385278731225, "loss": 4.9079, "step": 793 }, { "epoch": 0.08232831526352967, "grad_norm": 0.546875, "learning_rate": 0.00019963760454150603, "loss": 4.9504, "step": 794 }, { "epoch": 0.08243200331801774, "grad_norm": 0.6640625, "learning_rate": 0.0001996366800342712, "loss": 4.9559, "step": 795 }, { "epoch": 0.08253569137250581, "grad_norm": 0.6328125, "learning_rate": 0.0001996357543514288, "loss": 4.9396, "step": 796 }, { "epoch": 0.08263937942699388, "grad_norm": 0.51171875, "learning_rate": 0.00019963482749298984, "loss": 4.9287, "step": 797 }, { "epoch": 0.08274306748148196, "grad_norm": 0.71484375, "learning_rate": 0.00019963389945896527, "loss": 4.922, "step": 798 }, { "epoch": 0.08284675553597004, "grad_norm": 0.73046875, "learning_rate": 0.00019963297024936595, "loss": 4.9211, "step": 799 }, { "epoch": 0.08295044359045811, "grad_norm": 0.87109375, "learning_rate": 0.00019963203986420296, "loss": 4.9202, "step": 800 }, { "epoch": 0.08305413164494618, "grad_norm": 0.98828125, "learning_rate": 0.00019963110830348714, "loss": 4.9276, "step": 801 }, { "epoch": 0.08315781969943425, "grad_norm": 1.2109375, "learning_rate": 0.00019963017556722963, "loss": 4.8891, "step": 802 }, { "epoch": 0.08326150775392233, "grad_norm": 0.9375, "learning_rate": 0.0001996292416554413, "loss": 4.9741, "step": 803 }, { "epoch": 0.0833651958084104, "grad_norm": 0.78515625, "learning_rate": 0.0001996283065681333, "loss": 4.9151, "step": 804 }, { "epoch": 0.08346888386289847, "grad_norm": 0.6796875, "learning_rate": 0.00019962737030531654, "loss": 4.9354, "step": 805 }, { "epoch": 0.08357257191738654, "grad_norm": 0.80859375, "learning_rate": 0.00019962643286700215, "loss": 4.9499, "step": 806 }, { "epoch": 0.08367625997187461, "grad_norm": 0.79296875, "learning_rate": 0.00019962549425320112, "loss": 4.9544, "step": 807 }, { "epoch": 0.08377994802636268, "grad_norm": 0.72265625, "learning_rate": 0.00019962455446392461, "loss": 4.9092, "step": 808 }, { "epoch": 0.08388363608085075, "grad_norm": 0.74609375, "learning_rate": 0.00019962361349918365, "loss": 4.9512, "step": 809 }, { "epoch": 0.08398732413533884, "grad_norm": 0.6171875, "learning_rate": 0.00019962267135898936, "loss": 4.9327, "step": 810 }, { "epoch": 0.08409101218982691, "grad_norm": 0.7109375, "learning_rate": 0.00019962172804335285, "loss": 4.944, "step": 811 }, { "epoch": 0.08419470024431498, "grad_norm": 0.796875, "learning_rate": 0.00019962078355228525, "loss": 4.8733, "step": 812 }, { "epoch": 0.08429838829880305, "grad_norm": 0.765625, "learning_rate": 0.0001996198378857977, "loss": 4.901, "step": 813 }, { "epoch": 0.08440207635329113, "grad_norm": 0.65625, "learning_rate": 0.00019961889104390138, "loss": 4.9424, "step": 814 }, { "epoch": 0.0845057644077792, "grad_norm": 0.62890625, "learning_rate": 0.00019961794302660746, "loss": 4.9486, "step": 815 }, { "epoch": 0.08460945246226727, "grad_norm": 0.73046875, "learning_rate": 0.00019961699383392708, "loss": 4.9393, "step": 816 }, { "epoch": 0.08471314051675534, "grad_norm": 0.6953125, "learning_rate": 0.0001996160434658715, "loss": 4.9351, "step": 817 }, { "epoch": 0.08481682857124341, "grad_norm": 0.62890625, "learning_rate": 0.0001996150919224519, "loss": 4.9562, "step": 818 }, { "epoch": 0.08492051662573148, "grad_norm": 0.61328125, "learning_rate": 0.00019961413920367948, "loss": 4.9268, "step": 819 }, { "epoch": 0.08502420468021955, "grad_norm": 0.73046875, "learning_rate": 0.00019961318530956556, "loss": 4.9037, "step": 820 }, { "epoch": 0.08512789273470764, "grad_norm": 0.625, "learning_rate": 0.00019961223024012132, "loss": 4.9261, "step": 821 }, { "epoch": 0.08523158078919571, "grad_norm": 0.640625, "learning_rate": 0.0001996112739953581, "loss": 4.8719, "step": 822 }, { "epoch": 0.08533526884368378, "grad_norm": 0.7109375, "learning_rate": 0.00019961031657528708, "loss": 4.8848, "step": 823 }, { "epoch": 0.08543895689817185, "grad_norm": 0.63671875, "learning_rate": 0.00019960935797991967, "loss": 4.9255, "step": 824 }, { "epoch": 0.08554264495265992, "grad_norm": 0.65234375, "learning_rate": 0.0001996083982092671, "loss": 4.8943, "step": 825 }, { "epoch": 0.085646333007148, "grad_norm": 0.65234375, "learning_rate": 0.00019960743726334072, "loss": 4.8836, "step": 826 }, { "epoch": 0.08575002106163607, "grad_norm": 0.62109375, "learning_rate": 0.0001996064751421519, "loss": 4.9091, "step": 827 }, { "epoch": 0.08585370911612414, "grad_norm": 0.5234375, "learning_rate": 0.00019960551184571192, "loss": 4.8901, "step": 828 }, { "epoch": 0.08595739717061221, "grad_norm": 0.52734375, "learning_rate": 0.00019960454737403223, "loss": 4.9074, "step": 829 }, { "epoch": 0.08606108522510028, "grad_norm": 0.59765625, "learning_rate": 0.00019960358172712412, "loss": 4.9147, "step": 830 }, { "epoch": 0.08616477327958835, "grad_norm": 0.470703125, "learning_rate": 0.00019960261490499907, "loss": 4.8972, "step": 831 }, { "epoch": 0.08626846133407644, "grad_norm": 0.5234375, "learning_rate": 0.00019960164690766843, "loss": 4.9044, "step": 832 }, { "epoch": 0.08637214938856451, "grad_norm": 0.6171875, "learning_rate": 0.00019960067773514364, "loss": 4.9212, "step": 833 }, { "epoch": 0.08647583744305258, "grad_norm": 0.5546875, "learning_rate": 0.00019959970738743613, "loss": 4.9064, "step": 834 }, { "epoch": 0.08657952549754065, "grad_norm": 0.5859375, "learning_rate": 0.00019959873586455738, "loss": 4.8741, "step": 835 }, { "epoch": 0.08668321355202872, "grad_norm": 0.7578125, "learning_rate": 0.0001995977631665188, "loss": 4.9145, "step": 836 }, { "epoch": 0.0867869016065168, "grad_norm": 0.80859375, "learning_rate": 0.0001995967892933319, "loss": 4.9278, "step": 837 }, { "epoch": 0.08689058966100487, "grad_norm": 0.74609375, "learning_rate": 0.00019959581424500817, "loss": 4.9188, "step": 838 }, { "epoch": 0.08699427771549294, "grad_norm": 0.6953125, "learning_rate": 0.00019959483802155912, "loss": 4.9492, "step": 839 }, { "epoch": 0.08709796576998101, "grad_norm": 0.74609375, "learning_rate": 0.00019959386062299626, "loss": 4.9078, "step": 840 }, { "epoch": 0.08720165382446908, "grad_norm": 0.89453125, "learning_rate": 0.0001995928820493311, "loss": 4.898, "step": 841 }, { "epoch": 0.08730534187895715, "grad_norm": 1.078125, "learning_rate": 0.00019959190230057518, "loss": 4.9058, "step": 842 }, { "epoch": 0.08740902993344522, "grad_norm": 1.203125, "learning_rate": 0.00019959092137674013, "loss": 4.8931, "step": 843 }, { "epoch": 0.08751271798793331, "grad_norm": 0.828125, "learning_rate": 0.0001995899392778375, "loss": 4.9151, "step": 844 }, { "epoch": 0.08761640604242138, "grad_norm": 0.703125, "learning_rate": 0.0001995889560038788, "loss": 4.9288, "step": 845 }, { "epoch": 0.08772009409690945, "grad_norm": 0.6875, "learning_rate": 0.0001995879715548757, "loss": 4.8856, "step": 846 }, { "epoch": 0.08782378215139752, "grad_norm": 1.046875, "learning_rate": 0.00019958698593083981, "loss": 4.8822, "step": 847 }, { "epoch": 0.0879274702058856, "grad_norm": 1.1640625, "learning_rate": 0.00019958599913178277, "loss": 4.9602, "step": 848 }, { "epoch": 0.08803115826037367, "grad_norm": 0.859375, "learning_rate": 0.00019958501115771622, "loss": 4.9163, "step": 849 }, { "epoch": 0.08813484631486174, "grad_norm": 0.83203125, "learning_rate": 0.00019958402200865178, "loss": 4.918, "step": 850 }, { "epoch": 0.08823853436934981, "grad_norm": 0.77734375, "learning_rate": 0.00019958303168460115, "loss": 4.8722, "step": 851 }, { "epoch": 0.08834222242383788, "grad_norm": 0.81640625, "learning_rate": 0.000199582040185576, "loss": 4.8877, "step": 852 }, { "epoch": 0.08844591047832595, "grad_norm": 0.85546875, "learning_rate": 0.00019958104751158806, "loss": 4.905, "step": 853 }, { "epoch": 0.08854959853281402, "grad_norm": 0.70703125, "learning_rate": 0.00019958005366264901, "loss": 4.8835, "step": 854 }, { "epoch": 0.08865328658730211, "grad_norm": 0.6171875, "learning_rate": 0.0001995790586387706, "loss": 4.8851, "step": 855 }, { "epoch": 0.08875697464179018, "grad_norm": 0.57421875, "learning_rate": 0.00019957806243996453, "loss": 4.8804, "step": 856 }, { "epoch": 0.08886066269627825, "grad_norm": 0.6328125, "learning_rate": 0.0001995770650662426, "loss": 4.9273, "step": 857 }, { "epoch": 0.08896435075076632, "grad_norm": 0.7421875, "learning_rate": 0.00019957606651761656, "loss": 4.872, "step": 858 }, { "epoch": 0.0890680388052544, "grad_norm": 0.78125, "learning_rate": 0.00019957506679409818, "loss": 4.9288, "step": 859 }, { "epoch": 0.08917172685974246, "grad_norm": 0.75390625, "learning_rate": 0.00019957406589569927, "loss": 4.8692, "step": 860 }, { "epoch": 0.08927541491423054, "grad_norm": 0.76171875, "learning_rate": 0.00019957306382243167, "loss": 4.9116, "step": 861 }, { "epoch": 0.08937910296871861, "grad_norm": 0.7578125, "learning_rate": 0.00019957206057430712, "loss": 4.8783, "step": 862 }, { "epoch": 0.08948279102320668, "grad_norm": 0.8671875, "learning_rate": 0.00019957105615133754, "loss": 4.8542, "step": 863 }, { "epoch": 0.08958647907769475, "grad_norm": 0.67578125, "learning_rate": 0.00019957005055353474, "loss": 4.8961, "step": 864 }, { "epoch": 0.08969016713218282, "grad_norm": 0.50390625, "learning_rate": 0.0001995690437809106, "loss": 4.848, "step": 865 }, { "epoch": 0.0897938551866709, "grad_norm": 0.765625, "learning_rate": 0.00019956803583347696, "loss": 4.8486, "step": 866 }, { "epoch": 0.08989754324115898, "grad_norm": 0.80859375, "learning_rate": 0.0001995670267112458, "loss": 4.8902, "step": 867 }, { "epoch": 0.09000123129564705, "grad_norm": 0.65234375, "learning_rate": 0.00019956601641422892, "loss": 4.8824, "step": 868 }, { "epoch": 0.09010491935013512, "grad_norm": 0.6953125, "learning_rate": 0.00019956500494243832, "loss": 4.8438, "step": 869 }, { "epoch": 0.09020860740462319, "grad_norm": 0.59765625, "learning_rate": 0.00019956399229588588, "loss": 4.9038, "step": 870 }, { "epoch": 0.09031229545911126, "grad_norm": 0.65234375, "learning_rate": 0.0001995629784745836, "loss": 4.8895, "step": 871 }, { "epoch": 0.09041598351359934, "grad_norm": 0.6953125, "learning_rate": 0.0001995619634785434, "loss": 4.9058, "step": 872 }, { "epoch": 0.0905196715680874, "grad_norm": 0.59765625, "learning_rate": 0.0001995609473077773, "loss": 4.8509, "step": 873 }, { "epoch": 0.09062335962257548, "grad_norm": 0.625, "learning_rate": 0.00019955992996229728, "loss": 4.8988, "step": 874 }, { "epoch": 0.09072704767706355, "grad_norm": 0.65625, "learning_rate": 0.00019955891144211524, "loss": 4.9242, "step": 875 }, { "epoch": 0.09083073573155162, "grad_norm": 0.57421875, "learning_rate": 0.00019955789174724338, "loss": 4.8703, "step": 876 }, { "epoch": 0.0909344237860397, "grad_norm": 0.7890625, "learning_rate": 0.00019955687087769357, "loss": 4.8834, "step": 877 }, { "epoch": 0.09103811184052778, "grad_norm": 0.92578125, "learning_rate": 0.00019955584883347792, "loss": 4.9105, "step": 878 }, { "epoch": 0.09114179989501585, "grad_norm": 0.859375, "learning_rate": 0.00019955482561460853, "loss": 4.9056, "step": 879 }, { "epoch": 0.09124548794950392, "grad_norm": 0.94921875, "learning_rate": 0.00019955380122109738, "loss": 4.8805, "step": 880 }, { "epoch": 0.09134917600399199, "grad_norm": 0.95703125, "learning_rate": 0.00019955277565295666, "loss": 4.8183, "step": 881 }, { "epoch": 0.09145286405848006, "grad_norm": 0.90234375, "learning_rate": 0.00019955174891019838, "loss": 4.8363, "step": 882 }, { "epoch": 0.09155655211296813, "grad_norm": 1.0390625, "learning_rate": 0.00019955072099283472, "loss": 4.8429, "step": 883 }, { "epoch": 0.0916602401674562, "grad_norm": 1.0703125, "learning_rate": 0.00019954969190087777, "loss": 4.8869, "step": 884 }, { "epoch": 0.09176392822194428, "grad_norm": 0.875, "learning_rate": 0.0001995486616343397, "loss": 4.9302, "step": 885 }, { "epoch": 0.09186761627643235, "grad_norm": 1.015625, "learning_rate": 0.00019954763019323265, "loss": 4.8797, "step": 886 }, { "epoch": 0.09197130433092042, "grad_norm": 1.21875, "learning_rate": 0.00019954659757756877, "loss": 4.8558, "step": 887 }, { "epoch": 0.0920749923854085, "grad_norm": 0.5859375, "learning_rate": 0.00019954556378736028, "loss": 4.9109, "step": 888 }, { "epoch": 0.09217868043989658, "grad_norm": 0.8515625, "learning_rate": 0.00019954452882261933, "loss": 4.8751, "step": 889 }, { "epoch": 0.09228236849438465, "grad_norm": 1.3125, "learning_rate": 0.0001995434926833582, "loss": 4.8356, "step": 890 }, { "epoch": 0.09238605654887272, "grad_norm": 0.66796875, "learning_rate": 0.00019954245536958908, "loss": 4.8736, "step": 891 }, { "epoch": 0.09248974460336079, "grad_norm": 0.96875, "learning_rate": 0.00019954141688132419, "loss": 4.922, "step": 892 }, { "epoch": 0.09259343265784886, "grad_norm": 1.1875, "learning_rate": 0.0001995403772185758, "loss": 4.8539, "step": 893 }, { "epoch": 0.09269712071233693, "grad_norm": 0.80859375, "learning_rate": 0.00019953933638135616, "loss": 4.9001, "step": 894 }, { "epoch": 0.092800808766825, "grad_norm": 1.078125, "learning_rate": 0.00019953829436967759, "loss": 4.8676, "step": 895 }, { "epoch": 0.09290449682131308, "grad_norm": 0.94140625, "learning_rate": 0.00019953725118355235, "loss": 4.8616, "step": 896 }, { "epoch": 0.09300818487580115, "grad_norm": 0.84765625, "learning_rate": 0.00019953620682299278, "loss": 4.8614, "step": 897 }, { "epoch": 0.09311187293028922, "grad_norm": 0.9921875, "learning_rate": 0.0001995351612880112, "loss": 4.8647, "step": 898 }, { "epoch": 0.0932155609847773, "grad_norm": 1.0625, "learning_rate": 0.0001995341145786199, "loss": 4.8408, "step": 899 }, { "epoch": 0.09331924903926538, "grad_norm": 1.09375, "learning_rate": 0.00019953306669483127, "loss": 4.9216, "step": 900 }, { "epoch": 0.09342293709375345, "grad_norm": 0.859375, "learning_rate": 0.00019953201763665766, "loss": 4.8706, "step": 901 }, { "epoch": 0.09352662514824152, "grad_norm": 0.96484375, "learning_rate": 0.00019953096740411144, "loss": 4.8723, "step": 902 }, { "epoch": 0.09363031320272959, "grad_norm": 0.96484375, "learning_rate": 0.00019952991599720503, "loss": 4.8612, "step": 903 }, { "epoch": 0.09373400125721766, "grad_norm": 0.8125, "learning_rate": 0.0001995288634159508, "loss": 4.9012, "step": 904 }, { "epoch": 0.09383768931170573, "grad_norm": 1.125, "learning_rate": 0.00019952780966036123, "loss": 4.8422, "step": 905 }, { "epoch": 0.0939413773661938, "grad_norm": 0.64453125, "learning_rate": 0.00019952675473044868, "loss": 4.8762, "step": 906 }, { "epoch": 0.09404506542068188, "grad_norm": 1.0078125, "learning_rate": 0.00019952569862622562, "loss": 4.8799, "step": 907 }, { "epoch": 0.09414875347516995, "grad_norm": 0.87890625, "learning_rate": 0.00019952464134770454, "loss": 4.8731, "step": 908 }, { "epoch": 0.09425244152965802, "grad_norm": 0.9453125, "learning_rate": 0.0001995235828948979, "loss": 4.8628, "step": 909 }, { "epoch": 0.0943561295841461, "grad_norm": 0.96484375, "learning_rate": 0.00019952252326781815, "loss": 4.8959, "step": 910 }, { "epoch": 0.09445981763863417, "grad_norm": 0.9296875, "learning_rate": 0.00019952146246647785, "loss": 4.8658, "step": 911 }, { "epoch": 0.09456350569312225, "grad_norm": 0.8828125, "learning_rate": 0.0001995204004908895, "loss": 4.8525, "step": 912 }, { "epoch": 0.09466719374761032, "grad_norm": 0.68359375, "learning_rate": 0.0001995193373410656, "loss": 4.8731, "step": 913 }, { "epoch": 0.09477088180209839, "grad_norm": 0.765625, "learning_rate": 0.00019951827301701872, "loss": 4.862, "step": 914 }, { "epoch": 0.09487456985658646, "grad_norm": 0.77734375, "learning_rate": 0.00019951720751876142, "loss": 4.8848, "step": 915 }, { "epoch": 0.09497825791107453, "grad_norm": 0.66796875, "learning_rate": 0.00019951614084630625, "loss": 4.8816, "step": 916 }, { "epoch": 0.0950819459655626, "grad_norm": 0.7890625, "learning_rate": 0.00019951507299966585, "loss": 4.8559, "step": 917 }, { "epoch": 0.09518563402005067, "grad_norm": 0.63671875, "learning_rate": 0.00019951400397885273, "loss": 4.8733, "step": 918 }, { "epoch": 0.09528932207453875, "grad_norm": 0.8359375, "learning_rate": 0.00019951293378387962, "loss": 4.7999, "step": 919 }, { "epoch": 0.09539301012902682, "grad_norm": 0.8203125, "learning_rate": 0.000199511862414759, "loss": 4.829, "step": 920 }, { "epoch": 0.09549669818351489, "grad_norm": 0.62890625, "learning_rate": 0.00019951078987150365, "loss": 4.893, "step": 921 }, { "epoch": 0.09560038623800297, "grad_norm": 0.78515625, "learning_rate": 0.00019950971615412616, "loss": 4.853, "step": 922 }, { "epoch": 0.09570407429249105, "grad_norm": 0.7578125, "learning_rate": 0.00019950864126263917, "loss": 4.8412, "step": 923 }, { "epoch": 0.09580776234697912, "grad_norm": 0.76171875, "learning_rate": 0.00019950756519705544, "loss": 4.8648, "step": 924 }, { "epoch": 0.09591145040146719, "grad_norm": 0.6953125, "learning_rate": 0.0001995064879573876, "loss": 4.8454, "step": 925 }, { "epoch": 0.09601513845595526, "grad_norm": 0.68359375, "learning_rate": 0.0001995054095436484, "loss": 4.8425, "step": 926 }, { "epoch": 0.09611882651044333, "grad_norm": 0.6015625, "learning_rate": 0.00019950432995585054, "loss": 4.8581, "step": 927 }, { "epoch": 0.0962225145649314, "grad_norm": 0.73046875, "learning_rate": 0.00019950324919400676, "loss": 4.8878, "step": 928 }, { "epoch": 0.09632620261941947, "grad_norm": 0.79296875, "learning_rate": 0.00019950216725812982, "loss": 4.7899, "step": 929 }, { "epoch": 0.09642989067390755, "grad_norm": 0.5625, "learning_rate": 0.0001995010841482325, "loss": 4.8928, "step": 930 }, { "epoch": 0.09653357872839562, "grad_norm": 0.6328125, "learning_rate": 0.00019949999986432757, "loss": 4.9347, "step": 931 }, { "epoch": 0.09663726678288369, "grad_norm": 0.67578125, "learning_rate": 0.0001994989144064278, "loss": 4.8777, "step": 932 }, { "epoch": 0.09674095483737177, "grad_norm": 0.6328125, "learning_rate": 0.00019949782777454602, "loss": 4.9026, "step": 933 }, { "epoch": 0.09684464289185984, "grad_norm": 0.71484375, "learning_rate": 0.00019949673996869506, "loss": 4.8867, "step": 934 }, { "epoch": 0.09694833094634792, "grad_norm": 0.59375, "learning_rate": 0.00019949565098888771, "loss": 4.847, "step": 935 }, { "epoch": 0.09705201900083599, "grad_norm": 0.60546875, "learning_rate": 0.00019949456083513686, "loss": 4.8488, "step": 936 }, { "epoch": 0.09715570705532406, "grad_norm": 0.578125, "learning_rate": 0.00019949346950745537, "loss": 4.8131, "step": 937 }, { "epoch": 0.09725939510981213, "grad_norm": 0.51171875, "learning_rate": 0.0001994923770058561, "loss": 4.8802, "step": 938 }, { "epoch": 0.0973630831643002, "grad_norm": 0.5546875, "learning_rate": 0.00019949128333035198, "loss": 4.8309, "step": 939 }, { "epoch": 0.09746677121878827, "grad_norm": 0.6640625, "learning_rate": 0.00019949018848095586, "loss": 4.8319, "step": 940 }, { "epoch": 0.09757045927327634, "grad_norm": 0.6015625, "learning_rate": 0.00019948909245768066, "loss": 4.8677, "step": 941 }, { "epoch": 0.09767414732776442, "grad_norm": 0.625, "learning_rate": 0.00019948799526053938, "loss": 4.8561, "step": 942 }, { "epoch": 0.09777783538225249, "grad_norm": 0.7265625, "learning_rate": 0.00019948689688954489, "loss": 4.8883, "step": 943 }, { "epoch": 0.09788152343674057, "grad_norm": 0.62890625, "learning_rate": 0.00019948579734471017, "loss": 4.826, "step": 944 }, { "epoch": 0.09798521149122864, "grad_norm": 0.5625, "learning_rate": 0.00019948469662604823, "loss": 4.7702, "step": 945 }, { "epoch": 0.09808889954571672, "grad_norm": 0.55859375, "learning_rate": 0.00019948359473357202, "loss": 4.8859, "step": 946 }, { "epoch": 0.09819258760020479, "grad_norm": 0.70703125, "learning_rate": 0.00019948249166729454, "loss": 4.8573, "step": 947 }, { "epoch": 0.09829627565469286, "grad_norm": 0.51171875, "learning_rate": 0.0001994813874272288, "loss": 4.8528, "step": 948 }, { "epoch": 0.09839996370918093, "grad_norm": 0.6171875, "learning_rate": 0.00019948028201338788, "loss": 4.9022, "step": 949 }, { "epoch": 0.098503651763669, "grad_norm": 0.578125, "learning_rate": 0.00019947917542578478, "loss": 4.8382, "step": 950 }, { "epoch": 0.09860733981815707, "grad_norm": 0.640625, "learning_rate": 0.00019947806766443255, "loss": 4.8507, "step": 951 }, { "epoch": 0.09871102787264514, "grad_norm": 0.65234375, "learning_rate": 0.0001994769587293443, "loss": 4.7939, "step": 952 }, { "epoch": 0.09881471592713321, "grad_norm": 0.5546875, "learning_rate": 0.00019947584862053307, "loss": 4.8567, "step": 953 }, { "epoch": 0.09891840398162129, "grad_norm": 0.640625, "learning_rate": 0.00019947473733801196, "loss": 4.8055, "step": 954 }, { "epoch": 0.09902209203610937, "grad_norm": 0.76953125, "learning_rate": 0.00019947362488179413, "loss": 4.8541, "step": 955 }, { "epoch": 0.09912578009059744, "grad_norm": 0.89453125, "learning_rate": 0.00019947251125189264, "loss": 4.8555, "step": 956 }, { "epoch": 0.09922946814508551, "grad_norm": 0.9296875, "learning_rate": 0.0001994713964483207, "loss": 4.8593, "step": 957 }, { "epoch": 0.09933315619957359, "grad_norm": 0.796875, "learning_rate": 0.00019947028047109143, "loss": 4.8419, "step": 958 }, { "epoch": 0.09943684425406166, "grad_norm": 0.81640625, "learning_rate": 0.00019946916332021797, "loss": 4.8051, "step": 959 }, { "epoch": 0.09954053230854973, "grad_norm": 0.98046875, "learning_rate": 0.00019946804499571354, "loss": 4.8403, "step": 960 }, { "epoch": 0.0996442203630378, "grad_norm": 1.15625, "learning_rate": 0.00019946692549759133, "loss": 4.8334, "step": 961 }, { "epoch": 0.09974790841752587, "grad_norm": 0.96484375, "learning_rate": 0.00019946580482586452, "loss": 4.8314, "step": 962 }, { "epoch": 0.09985159647201394, "grad_norm": 1.0546875, "learning_rate": 0.00019946468298054636, "loss": 4.8393, "step": 963 }, { "epoch": 0.09995528452650201, "grad_norm": 1.046875, "learning_rate": 0.00019946355996165006, "loss": 4.8274, "step": 964 }, { "epoch": 0.10005897258099009, "grad_norm": 1.0, "learning_rate": 0.00019946243576918893, "loss": 4.844, "step": 965 }, { "epoch": 0.10016266063547817, "grad_norm": 0.90625, "learning_rate": 0.00019946131040317618, "loss": 4.8438, "step": 966 }, { "epoch": 0.10026634868996624, "grad_norm": 0.76171875, "learning_rate": 0.00019946018386362508, "loss": 4.8389, "step": 967 }, { "epoch": 0.10037003674445431, "grad_norm": 0.7734375, "learning_rate": 0.00019945905615054898, "loss": 4.822, "step": 968 }, { "epoch": 0.10047372479894238, "grad_norm": 0.8203125, "learning_rate": 0.00019945792726396114, "loss": 4.8085, "step": 969 }, { "epoch": 0.10057741285343046, "grad_norm": 0.890625, "learning_rate": 0.00019945679720387486, "loss": 4.8656, "step": 970 }, { "epoch": 0.10068110090791853, "grad_norm": 1.03125, "learning_rate": 0.00019945566597030353, "loss": 4.8728, "step": 971 }, { "epoch": 0.1007847889624066, "grad_norm": 0.9765625, "learning_rate": 0.00019945453356326045, "loss": 4.8481, "step": 972 }, { "epoch": 0.10088847701689467, "grad_norm": 0.80859375, "learning_rate": 0.00019945339998275903, "loss": 4.8336, "step": 973 }, { "epoch": 0.10099216507138274, "grad_norm": 0.71875, "learning_rate": 0.0001994522652288126, "loss": 4.8323, "step": 974 }, { "epoch": 0.10109585312587081, "grad_norm": 0.81640625, "learning_rate": 0.00019945112930143456, "loss": 4.8381, "step": 975 }, { "epoch": 0.10119954118035888, "grad_norm": 0.88671875, "learning_rate": 0.00019944999220063834, "loss": 4.8525, "step": 976 }, { "epoch": 0.10130322923484697, "grad_norm": 0.87109375, "learning_rate": 0.00019944885392643734, "loss": 4.8475, "step": 977 }, { "epoch": 0.10140691728933504, "grad_norm": 0.890625, "learning_rate": 0.00019944771447884496, "loss": 4.8167, "step": 978 }, { "epoch": 0.10151060534382311, "grad_norm": 0.94921875, "learning_rate": 0.00019944657385787467, "loss": 4.8381, "step": 979 }, { "epoch": 0.10161429339831118, "grad_norm": 0.93359375, "learning_rate": 0.00019944543206353995, "loss": 4.8668, "step": 980 }, { "epoch": 0.10171798145279926, "grad_norm": 0.80078125, "learning_rate": 0.00019944428909585423, "loss": 4.8108, "step": 981 }, { "epoch": 0.10182166950728733, "grad_norm": 0.8515625, "learning_rate": 0.00019944314495483104, "loss": 4.8041, "step": 982 }, { "epoch": 0.1019253575617754, "grad_norm": 1.015625, "learning_rate": 0.0001994419996404838, "loss": 4.7916, "step": 983 }, { "epoch": 0.10202904561626347, "grad_norm": 1.0546875, "learning_rate": 0.00019944085315282614, "loss": 4.8384, "step": 984 }, { "epoch": 0.10213273367075154, "grad_norm": 1.09375, "learning_rate": 0.0001994397054918715, "loss": 4.8389, "step": 985 }, { "epoch": 0.10223642172523961, "grad_norm": 0.7734375, "learning_rate": 0.00019943855665763345, "loss": 4.8711, "step": 986 }, { "epoch": 0.10234010977972768, "grad_norm": 0.66796875, "learning_rate": 0.00019943740665012553, "loss": 4.8159, "step": 987 }, { "epoch": 0.10244379783421576, "grad_norm": 1.0234375, "learning_rate": 0.00019943625546936134, "loss": 4.7974, "step": 988 }, { "epoch": 0.10254748588870384, "grad_norm": 0.9609375, "learning_rate": 0.00019943510311535445, "loss": 4.8683, "step": 989 }, { "epoch": 0.10265117394319191, "grad_norm": 0.734375, "learning_rate": 0.00019943394958811842, "loss": 4.8304, "step": 990 }, { "epoch": 0.10275486199767998, "grad_norm": 0.62109375, "learning_rate": 0.00019943279488766693, "loss": 4.8283, "step": 991 }, { "epoch": 0.10285855005216805, "grad_norm": 0.80859375, "learning_rate": 0.00019943163901401355, "loss": 4.8337, "step": 992 }, { "epoch": 0.10296223810665613, "grad_norm": 0.79296875, "learning_rate": 0.0001994304819671719, "loss": 4.8154, "step": 993 }, { "epoch": 0.1030659261611442, "grad_norm": 0.57421875, "learning_rate": 0.0001994293237471557, "loss": 4.8173, "step": 994 }, { "epoch": 0.10316961421563227, "grad_norm": 0.73046875, "learning_rate": 0.0001994281643539786, "loss": 4.8074, "step": 995 }, { "epoch": 0.10327330227012034, "grad_norm": 0.671875, "learning_rate": 0.00019942700378765423, "loss": 4.8224, "step": 996 }, { "epoch": 0.10337699032460841, "grad_norm": 0.58203125, "learning_rate": 0.00019942584204819632, "loss": 4.8471, "step": 997 }, { "epoch": 0.10348067837909648, "grad_norm": 0.64453125, "learning_rate": 0.00019942467913561859, "loss": 4.776, "step": 998 }, { "epoch": 0.10358436643358455, "grad_norm": 0.68359375, "learning_rate": 0.0001994235150499347, "loss": 4.8177, "step": 999 }, { "epoch": 0.10368805448807264, "grad_norm": 0.8515625, "learning_rate": 0.00019942234979115848, "loss": 4.8716, "step": 1000 }, { "epoch": 0.10379174254256071, "grad_norm": 0.70703125, "learning_rate": 0.0001994211833593036, "loss": 4.8509, "step": 1001 }, { "epoch": 0.10389543059704878, "grad_norm": 0.5, "learning_rate": 0.00019942001575438384, "loss": 4.7899, "step": 1002 }, { "epoch": 0.10399911865153685, "grad_norm": 0.65625, "learning_rate": 0.00019941884697641298, "loss": 4.8424, "step": 1003 }, { "epoch": 0.10410280670602493, "grad_norm": 0.63671875, "learning_rate": 0.00019941767702540483, "loss": 4.8242, "step": 1004 }, { "epoch": 0.104206494760513, "grad_norm": 0.69921875, "learning_rate": 0.0001994165059013732, "loss": 4.8327, "step": 1005 }, { "epoch": 0.10431018281500107, "grad_norm": 0.640625, "learning_rate": 0.00019941533360433184, "loss": 4.8073, "step": 1006 }, { "epoch": 0.10441387086948914, "grad_norm": 0.56640625, "learning_rate": 0.00019941416013429468, "loss": 4.82, "step": 1007 }, { "epoch": 0.10451755892397721, "grad_norm": 0.8046875, "learning_rate": 0.0001994129854912755, "loss": 4.8072, "step": 1008 }, { "epoch": 0.10462124697846528, "grad_norm": 0.6640625, "learning_rate": 0.00019941180967528817, "loss": 4.8173, "step": 1009 }, { "epoch": 0.10472493503295335, "grad_norm": 0.609375, "learning_rate": 0.00019941063268634655, "loss": 4.7941, "step": 1010 }, { "epoch": 0.10482862308744144, "grad_norm": 0.859375, "learning_rate": 0.00019940945452446456, "loss": 4.8366, "step": 1011 }, { "epoch": 0.10493231114192951, "grad_norm": 0.7890625, "learning_rate": 0.0001994082751896561, "loss": 4.832, "step": 1012 }, { "epoch": 0.10503599919641758, "grad_norm": 0.68359375, "learning_rate": 0.00019940709468193509, "loss": 4.8127, "step": 1013 }, { "epoch": 0.10513968725090565, "grad_norm": 0.734375, "learning_rate": 0.00019940591300131538, "loss": 4.8416, "step": 1014 }, { "epoch": 0.10524337530539372, "grad_norm": 0.68359375, "learning_rate": 0.00019940473014781103, "loss": 4.8164, "step": 1015 }, { "epoch": 0.1053470633598818, "grad_norm": 0.84765625, "learning_rate": 0.0001994035461214359, "loss": 4.8007, "step": 1016 }, { "epoch": 0.10545075141436987, "grad_norm": 0.8125, "learning_rate": 0.00019940236092220404, "loss": 4.7715, "step": 1017 }, { "epoch": 0.10555443946885794, "grad_norm": 0.77734375, "learning_rate": 0.00019940117455012935, "loss": 4.8128, "step": 1018 }, { "epoch": 0.10565812752334601, "grad_norm": 0.58203125, "learning_rate": 0.00019939998700522587, "loss": 4.8322, "step": 1019 }, { "epoch": 0.10576181557783408, "grad_norm": 0.79296875, "learning_rate": 0.00019939879828750768, "loss": 4.7856, "step": 1020 }, { "epoch": 0.10586550363232215, "grad_norm": 0.6875, "learning_rate": 0.00019939760839698866, "loss": 4.7796, "step": 1021 }, { "epoch": 0.10596919168681024, "grad_norm": 0.65625, "learning_rate": 0.00019939641733368298, "loss": 4.812, "step": 1022 }, { "epoch": 0.10607287974129831, "grad_norm": 0.796875, "learning_rate": 0.00019939522509760462, "loss": 4.8104, "step": 1023 }, { "epoch": 0.10617656779578638, "grad_norm": 0.8125, "learning_rate": 0.0001993940316887677, "loss": 4.8058, "step": 1024 }, { "epoch": 0.10628025585027445, "grad_norm": 0.8359375, "learning_rate": 0.00019939283710718623, "loss": 4.8323, "step": 1025 }, { "epoch": 0.10638394390476252, "grad_norm": 0.92578125, "learning_rate": 0.00019939164135287433, "loss": 4.8172, "step": 1026 }, { "epoch": 0.1064876319592506, "grad_norm": 0.7109375, "learning_rate": 0.00019939044442584617, "loss": 4.8447, "step": 1027 }, { "epoch": 0.10659132001373867, "grad_norm": 0.703125, "learning_rate": 0.00019938924632611582, "loss": 4.8457, "step": 1028 }, { "epoch": 0.10669500806822674, "grad_norm": 0.69921875, "learning_rate": 0.00019938804705369741, "loss": 4.8359, "step": 1029 }, { "epoch": 0.10679869612271481, "grad_norm": 0.67578125, "learning_rate": 0.00019938684660860513, "loss": 4.8462, "step": 1030 }, { "epoch": 0.10690238417720288, "grad_norm": 0.69921875, "learning_rate": 0.00019938564499085305, "loss": 4.853, "step": 1031 }, { "epoch": 0.10700607223169095, "grad_norm": 0.7421875, "learning_rate": 0.00019938444220045547, "loss": 4.7956, "step": 1032 }, { "epoch": 0.10710976028617904, "grad_norm": 0.59765625, "learning_rate": 0.0001993832382374265, "loss": 4.8215, "step": 1033 }, { "epoch": 0.10721344834066711, "grad_norm": 0.77734375, "learning_rate": 0.0001993820331017804, "loss": 4.8004, "step": 1034 }, { "epoch": 0.10731713639515518, "grad_norm": 0.796875, "learning_rate": 0.00019938082679353132, "loss": 4.8516, "step": 1035 }, { "epoch": 0.10742082444964325, "grad_norm": 0.80859375, "learning_rate": 0.00019937961931269357, "loss": 4.7898, "step": 1036 }, { "epoch": 0.10752451250413132, "grad_norm": 0.625, "learning_rate": 0.00019937841065928135, "loss": 4.8164, "step": 1037 }, { "epoch": 0.1076282005586194, "grad_norm": 0.6015625, "learning_rate": 0.00019937720083330893, "loss": 4.8158, "step": 1038 }, { "epoch": 0.10773188861310747, "grad_norm": 0.65234375, "learning_rate": 0.00019937598983479058, "loss": 4.8225, "step": 1039 }, { "epoch": 0.10783557666759554, "grad_norm": 0.703125, "learning_rate": 0.0001993747776637406, "loss": 4.7566, "step": 1040 }, { "epoch": 0.10793926472208361, "grad_norm": 0.62890625, "learning_rate": 0.00019937356432017328, "loss": 4.8108, "step": 1041 }, { "epoch": 0.10804295277657168, "grad_norm": 0.640625, "learning_rate": 0.00019937234980410296, "loss": 4.7864, "step": 1042 }, { "epoch": 0.10814664083105975, "grad_norm": 0.7109375, "learning_rate": 0.00019937113411554395, "loss": 4.8389, "step": 1043 }, { "epoch": 0.10825032888554784, "grad_norm": 0.6953125, "learning_rate": 0.00019936991725451057, "loss": 4.8236, "step": 1044 }, { "epoch": 0.10835401694003591, "grad_norm": 0.69921875, "learning_rate": 0.00019936869922101727, "loss": 4.8259, "step": 1045 }, { "epoch": 0.10845770499452398, "grad_norm": 0.69921875, "learning_rate": 0.0001993674800150783, "loss": 4.7816, "step": 1046 }, { "epoch": 0.10856139304901205, "grad_norm": 0.66796875, "learning_rate": 0.00019936625963670813, "loss": 4.7689, "step": 1047 }, { "epoch": 0.10866508110350012, "grad_norm": 0.7734375, "learning_rate": 0.0001993650380859211, "loss": 4.8243, "step": 1048 }, { "epoch": 0.1087687691579882, "grad_norm": 0.828125, "learning_rate": 0.00019936381536273166, "loss": 4.828, "step": 1049 }, { "epoch": 0.10887245721247626, "grad_norm": 0.87109375, "learning_rate": 0.00019936259146715425, "loss": 4.8372, "step": 1050 }, { "epoch": 0.10897614526696434, "grad_norm": 0.89453125, "learning_rate": 0.0001993613663992033, "loss": 4.7794, "step": 1051 }, { "epoch": 0.10907983332145241, "grad_norm": 0.7890625, "learning_rate": 0.00019936014015889321, "loss": 4.7735, "step": 1052 }, { "epoch": 0.10918352137594048, "grad_norm": 0.78125, "learning_rate": 0.00019935891274623852, "loss": 4.7367, "step": 1053 }, { "epoch": 0.10928720943042855, "grad_norm": 0.76953125, "learning_rate": 0.00019935768416125371, "loss": 4.8041, "step": 1054 }, { "epoch": 0.10939089748491662, "grad_norm": 0.875, "learning_rate": 0.00019935645440395322, "loss": 4.8382, "step": 1055 }, { "epoch": 0.1094945855394047, "grad_norm": 0.93359375, "learning_rate": 0.0001993552234743516, "loss": 4.7804, "step": 1056 }, { "epoch": 0.10959827359389278, "grad_norm": 0.77734375, "learning_rate": 0.00019935399137246338, "loss": 4.8278, "step": 1057 }, { "epoch": 0.10970196164838085, "grad_norm": 0.68359375, "learning_rate": 0.00019935275809830307, "loss": 4.7946, "step": 1058 }, { "epoch": 0.10980564970286892, "grad_norm": 0.7265625, "learning_rate": 0.00019935152365188526, "loss": 4.8324, "step": 1059 }, { "epoch": 0.10990933775735699, "grad_norm": 0.65234375, "learning_rate": 0.00019935028803322444, "loss": 4.7871, "step": 1060 }, { "epoch": 0.11001302581184506, "grad_norm": 0.671875, "learning_rate": 0.00019934905124233528, "loss": 4.7859, "step": 1061 }, { "epoch": 0.11011671386633314, "grad_norm": 0.75, "learning_rate": 0.00019934781327923232, "loss": 4.7975, "step": 1062 }, { "epoch": 0.1102204019208212, "grad_norm": 0.921875, "learning_rate": 0.00019934657414393016, "loss": 4.785, "step": 1063 }, { "epoch": 0.11032408997530928, "grad_norm": 0.890625, "learning_rate": 0.00019934533383644348, "loss": 4.8142, "step": 1064 }, { "epoch": 0.11042777802979735, "grad_norm": 0.99609375, "learning_rate": 0.00019934409235678683, "loss": 4.8027, "step": 1065 }, { "epoch": 0.11053146608428542, "grad_norm": 1.046875, "learning_rate": 0.00019934284970497492, "loss": 4.8281, "step": 1066 }, { "epoch": 0.1106351541387735, "grad_norm": 0.80078125, "learning_rate": 0.00019934160588102242, "loss": 4.8019, "step": 1067 }, { "epoch": 0.11073884219326158, "grad_norm": 0.9609375, "learning_rate": 0.00019934036088494394, "loss": 4.7709, "step": 1068 }, { "epoch": 0.11084253024774965, "grad_norm": 1.0, "learning_rate": 0.00019933911471675423, "loss": 4.7505, "step": 1069 }, { "epoch": 0.11094621830223772, "grad_norm": 0.9375, "learning_rate": 0.00019933786737646797, "loss": 4.8155, "step": 1070 }, { "epoch": 0.11104990635672579, "grad_norm": 1.203125, "learning_rate": 0.00019933661886409988, "loss": 4.8044, "step": 1071 }, { "epoch": 0.11115359441121386, "grad_norm": 0.9140625, "learning_rate": 0.00019933536917966468, "loss": 4.8024, "step": 1072 }, { "epoch": 0.11125728246570193, "grad_norm": 0.94140625, "learning_rate": 0.00019933411832317712, "loss": 4.7923, "step": 1073 }, { "epoch": 0.11136097052019, "grad_norm": 0.8828125, "learning_rate": 0.000199332866294652, "loss": 4.7929, "step": 1074 }, { "epoch": 0.11146465857467808, "grad_norm": 0.9296875, "learning_rate": 0.00019933161309410402, "loss": 4.8082, "step": 1075 }, { "epoch": 0.11156834662916615, "grad_norm": 1.0703125, "learning_rate": 0.00019933035872154802, "loss": 4.7484, "step": 1076 }, { "epoch": 0.11167203468365422, "grad_norm": 1.046875, "learning_rate": 0.00019932910317699878, "loss": 4.7837, "step": 1077 }, { "epoch": 0.1117757227381423, "grad_norm": 0.96484375, "learning_rate": 0.00019932784646047112, "loss": 4.8484, "step": 1078 }, { "epoch": 0.11187941079263038, "grad_norm": 0.89453125, "learning_rate": 0.00019932658857197986, "loss": 4.8276, "step": 1079 }, { "epoch": 0.11198309884711845, "grad_norm": 0.765625, "learning_rate": 0.00019932532951153986, "loss": 4.7676, "step": 1080 }, { "epoch": 0.11208678690160652, "grad_norm": 0.8203125, "learning_rate": 0.00019932406927916595, "loss": 4.8098, "step": 1081 }, { "epoch": 0.11219047495609459, "grad_norm": 0.82421875, "learning_rate": 0.000199322807874873, "loss": 4.8138, "step": 1082 }, { "epoch": 0.11229416301058266, "grad_norm": 0.75390625, "learning_rate": 0.00019932154529867595, "loss": 4.8027, "step": 1083 }, { "epoch": 0.11239785106507073, "grad_norm": 0.828125, "learning_rate": 0.00019932028155058963, "loss": 4.8091, "step": 1084 }, { "epoch": 0.1125015391195588, "grad_norm": 0.95703125, "learning_rate": 0.00019931901663062894, "loss": 4.787, "step": 1085 }, { "epoch": 0.11260522717404688, "grad_norm": 0.8125, "learning_rate": 0.00019931775053880888, "loss": 4.7998, "step": 1086 }, { "epoch": 0.11270891522853495, "grad_norm": 0.86328125, "learning_rate": 0.00019931648327514435, "loss": 4.819, "step": 1087 }, { "epoch": 0.11281260328302302, "grad_norm": 0.9375, "learning_rate": 0.0001993152148396503, "loss": 4.7823, "step": 1088 }, { "epoch": 0.1129162913375111, "grad_norm": 0.77734375, "learning_rate": 0.00019931394523234165, "loss": 4.804, "step": 1089 }, { "epoch": 0.11301997939199918, "grad_norm": 0.65625, "learning_rate": 0.00019931267445323346, "loss": 4.8188, "step": 1090 }, { "epoch": 0.11312366744648725, "grad_norm": 0.85546875, "learning_rate": 0.00019931140250234068, "loss": 4.8397, "step": 1091 }, { "epoch": 0.11322735550097532, "grad_norm": 0.7578125, "learning_rate": 0.00019931012937967834, "loss": 4.7646, "step": 1092 }, { "epoch": 0.11333104355546339, "grad_norm": 0.75390625, "learning_rate": 0.00019930885508526145, "loss": 4.8125, "step": 1093 }, { "epoch": 0.11343473160995146, "grad_norm": 0.81640625, "learning_rate": 0.000199307579619105, "loss": 4.7948, "step": 1094 }, { "epoch": 0.11353841966443953, "grad_norm": 0.66015625, "learning_rate": 0.00019930630298122415, "loss": 4.7597, "step": 1095 }, { "epoch": 0.1136421077189276, "grad_norm": 0.66796875, "learning_rate": 0.00019930502517163386, "loss": 4.7938, "step": 1096 }, { "epoch": 0.11374579577341568, "grad_norm": 0.83984375, "learning_rate": 0.00019930374619034927, "loss": 4.8352, "step": 1097 }, { "epoch": 0.11384948382790375, "grad_norm": 0.81640625, "learning_rate": 0.0001993024660373854, "loss": 4.8116, "step": 1098 }, { "epoch": 0.11395317188239182, "grad_norm": 0.734375, "learning_rate": 0.00019930118471275744, "loss": 4.7617, "step": 1099 }, { "epoch": 0.1140568599368799, "grad_norm": 0.77734375, "learning_rate": 0.00019929990221648043, "loss": 4.7716, "step": 1100 }, { "epoch": 0.11416054799136797, "grad_norm": 0.76171875, "learning_rate": 0.00019929861854856956, "loss": 4.8149, "step": 1101 }, { "epoch": 0.11426423604585605, "grad_norm": 0.73828125, "learning_rate": 0.00019929733370903995, "loss": 4.7617, "step": 1102 }, { "epoch": 0.11436792410034412, "grad_norm": 0.6484375, "learning_rate": 0.00019929604769790675, "loss": 4.7973, "step": 1103 }, { "epoch": 0.11447161215483219, "grad_norm": 0.640625, "learning_rate": 0.00019929476051518518, "loss": 4.8084, "step": 1104 }, { "epoch": 0.11457530020932026, "grad_norm": 0.734375, "learning_rate": 0.00019929347216089037, "loss": 4.8131, "step": 1105 }, { "epoch": 0.11467898826380833, "grad_norm": 0.8203125, "learning_rate": 0.00019929218263503752, "loss": 4.7745, "step": 1106 }, { "epoch": 0.1147826763182964, "grad_norm": 0.90625, "learning_rate": 0.00019929089193764193, "loss": 4.7861, "step": 1107 }, { "epoch": 0.11488636437278447, "grad_norm": 0.9296875, "learning_rate": 0.00019928960006871874, "loss": 4.7646, "step": 1108 }, { "epoch": 0.11499005242727255, "grad_norm": 0.6953125, "learning_rate": 0.00019928830702828322, "loss": 4.8098, "step": 1109 }, { "epoch": 0.11509374048176062, "grad_norm": 0.62109375, "learning_rate": 0.00019928701281635066, "loss": 4.7654, "step": 1110 }, { "epoch": 0.1151974285362487, "grad_norm": 0.84375, "learning_rate": 0.00019928571743293625, "loss": 4.7427, "step": 1111 }, { "epoch": 0.11530111659073677, "grad_norm": 1.046875, "learning_rate": 0.00019928442087805537, "loss": 4.8149, "step": 1112 }, { "epoch": 0.11540480464522485, "grad_norm": 0.75, "learning_rate": 0.00019928312315172324, "loss": 4.8035, "step": 1113 }, { "epoch": 0.11550849269971292, "grad_norm": 0.640625, "learning_rate": 0.0001992818242539552, "loss": 4.7691, "step": 1114 }, { "epoch": 0.11561218075420099, "grad_norm": 0.734375, "learning_rate": 0.0001992805241847666, "loss": 4.7827, "step": 1115 }, { "epoch": 0.11571586880868906, "grad_norm": 0.91796875, "learning_rate": 0.00019927922294417277, "loss": 4.7606, "step": 1116 }, { "epoch": 0.11581955686317713, "grad_norm": 0.921875, "learning_rate": 0.00019927792053218903, "loss": 4.7606, "step": 1117 }, { "epoch": 0.1159232449176652, "grad_norm": 0.828125, "learning_rate": 0.00019927661694883075, "loss": 4.7667, "step": 1118 }, { "epoch": 0.11602693297215327, "grad_norm": 0.84375, "learning_rate": 0.00019927531219411337, "loss": 4.7663, "step": 1119 }, { "epoch": 0.11613062102664135, "grad_norm": 0.65234375, "learning_rate": 0.00019927400626805223, "loss": 4.7812, "step": 1120 }, { "epoch": 0.11623430908112942, "grad_norm": 0.76953125, "learning_rate": 0.00019927269917066273, "loss": 4.7531, "step": 1121 }, { "epoch": 0.11633799713561749, "grad_norm": 0.74609375, "learning_rate": 0.00019927139090196035, "loss": 4.7844, "step": 1122 }, { "epoch": 0.11644168519010557, "grad_norm": 0.69921875, "learning_rate": 0.00019927008146196048, "loss": 4.7869, "step": 1123 }, { "epoch": 0.11654537324459364, "grad_norm": 0.80859375, "learning_rate": 0.0001992687708506786, "loss": 4.78, "step": 1124 }, { "epoch": 0.11664906129908172, "grad_norm": 0.6953125, "learning_rate": 0.00019926745906813012, "loss": 4.7729, "step": 1125 }, { "epoch": 0.11675274935356979, "grad_norm": 0.69140625, "learning_rate": 0.00019926614611433057, "loss": 4.8032, "step": 1126 }, { "epoch": 0.11685643740805786, "grad_norm": 0.66796875, "learning_rate": 0.0001992648319892954, "loss": 4.7693, "step": 1127 }, { "epoch": 0.11696012546254593, "grad_norm": 0.6953125, "learning_rate": 0.00019926351669304017, "loss": 4.7452, "step": 1128 }, { "epoch": 0.117063813517034, "grad_norm": 0.6640625, "learning_rate": 0.00019926220022558036, "loss": 4.7493, "step": 1129 }, { "epoch": 0.11716750157152207, "grad_norm": 0.62890625, "learning_rate": 0.00019926088258693153, "loss": 4.7705, "step": 1130 }, { "epoch": 0.11727118962601014, "grad_norm": 0.703125, "learning_rate": 0.0001992595637771092, "loss": 4.7883, "step": 1131 }, { "epoch": 0.11737487768049822, "grad_norm": 0.71484375, "learning_rate": 0.0001992582437961289, "loss": 4.767, "step": 1132 }, { "epoch": 0.11747856573498629, "grad_norm": 0.67578125, "learning_rate": 0.00019925692264400629, "loss": 4.7713, "step": 1133 }, { "epoch": 0.11758225378947437, "grad_norm": 0.68359375, "learning_rate": 0.0001992556003207569, "loss": 4.7332, "step": 1134 }, { "epoch": 0.11768594184396244, "grad_norm": 0.77734375, "learning_rate": 0.00019925427682639636, "loss": 4.7582, "step": 1135 }, { "epoch": 0.11778962989845052, "grad_norm": 0.72265625, "learning_rate": 0.00019925295216094023, "loss": 4.8117, "step": 1136 }, { "epoch": 0.11789331795293859, "grad_norm": 0.609375, "learning_rate": 0.00019925162632440419, "loss": 4.7745, "step": 1137 }, { "epoch": 0.11799700600742666, "grad_norm": 0.6015625, "learning_rate": 0.0001992502993168039, "loss": 4.7928, "step": 1138 }, { "epoch": 0.11810069406191473, "grad_norm": 0.671875, "learning_rate": 0.00019924897113815496, "loss": 4.7954, "step": 1139 }, { "epoch": 0.1182043821164028, "grad_norm": 0.65234375, "learning_rate": 0.0001992476417884731, "loss": 4.7575, "step": 1140 }, { "epoch": 0.11830807017089087, "grad_norm": 0.62109375, "learning_rate": 0.00019924631126777396, "loss": 4.7496, "step": 1141 }, { "epoch": 0.11841175822537894, "grad_norm": 0.65625, "learning_rate": 0.00019924497957607326, "loss": 4.7866, "step": 1142 }, { "epoch": 0.11851544627986701, "grad_norm": 0.84375, "learning_rate": 0.00019924364671338672, "loss": 4.773, "step": 1143 }, { "epoch": 0.11861913433435509, "grad_norm": 0.6953125, "learning_rate": 0.00019924231267973004, "loss": 4.7703, "step": 1144 }, { "epoch": 0.11872282238884317, "grad_norm": 0.6875, "learning_rate": 0.00019924097747511896, "loss": 4.7405, "step": 1145 }, { "epoch": 0.11882651044333124, "grad_norm": 0.79296875, "learning_rate": 0.00019923964109956925, "loss": 4.7476, "step": 1146 }, { "epoch": 0.11893019849781931, "grad_norm": 0.828125, "learning_rate": 0.00019923830355309669, "loss": 4.7496, "step": 1147 }, { "epoch": 0.11903388655230739, "grad_norm": 0.91015625, "learning_rate": 0.00019923696483571703, "loss": 4.7838, "step": 1148 }, { "epoch": 0.11913757460679546, "grad_norm": 0.8828125, "learning_rate": 0.00019923562494744611, "loss": 4.7803, "step": 1149 }, { "epoch": 0.11924126266128353, "grad_norm": 0.59375, "learning_rate": 0.0001992342838882997, "loss": 4.7226, "step": 1150 }, { "epoch": 0.1193449507157716, "grad_norm": 0.68359375, "learning_rate": 0.00019923294165829364, "loss": 4.7638, "step": 1151 }, { "epoch": 0.11944863877025967, "grad_norm": 0.7890625, "learning_rate": 0.00019923159825744376, "loss": 4.7377, "step": 1152 }, { "epoch": 0.11955232682474774, "grad_norm": 0.71484375, "learning_rate": 0.0001992302536857659, "loss": 4.7789, "step": 1153 }, { "epoch": 0.11965601487923581, "grad_norm": 0.59375, "learning_rate": 0.00019922890794327595, "loss": 4.7689, "step": 1154 }, { "epoch": 0.11975970293372389, "grad_norm": 0.7265625, "learning_rate": 0.00019922756102998975, "loss": 4.7883, "step": 1155 }, { "epoch": 0.11986339098821197, "grad_norm": 0.65234375, "learning_rate": 0.00019922621294592326, "loss": 4.7345, "step": 1156 }, { "epoch": 0.11996707904270004, "grad_norm": 0.72265625, "learning_rate": 0.00019922486369109232, "loss": 4.7087, "step": 1157 }, { "epoch": 0.12007076709718811, "grad_norm": 0.875, "learning_rate": 0.00019922351326551288, "loss": 4.764, "step": 1158 }, { "epoch": 0.12017445515167618, "grad_norm": 0.84765625, "learning_rate": 0.00019922216166920088, "loss": 4.751, "step": 1159 }, { "epoch": 0.12027814320616426, "grad_norm": 0.6875, "learning_rate": 0.00019922080890217222, "loss": 4.7546, "step": 1160 }, { "epoch": 0.12038183126065233, "grad_norm": 0.61328125, "learning_rate": 0.00019921945496444293, "loss": 4.7621, "step": 1161 }, { "epoch": 0.1204855193151404, "grad_norm": 0.70703125, "learning_rate": 0.00019921809985602894, "loss": 4.729, "step": 1162 }, { "epoch": 0.12058920736962847, "grad_norm": 0.71875, "learning_rate": 0.00019921674357694624, "loss": 4.8148, "step": 1163 }, { "epoch": 0.12069289542411654, "grad_norm": 0.60546875, "learning_rate": 0.00019921538612721084, "loss": 4.7172, "step": 1164 }, { "epoch": 0.12079658347860461, "grad_norm": 0.83984375, "learning_rate": 0.0001992140275068388, "loss": 4.7725, "step": 1165 }, { "epoch": 0.12090027153309268, "grad_norm": 0.7578125, "learning_rate": 0.00019921266771584604, "loss": 4.7636, "step": 1166 }, { "epoch": 0.12100395958758077, "grad_norm": 0.609375, "learning_rate": 0.0001992113067542487, "loss": 4.7482, "step": 1167 }, { "epoch": 0.12110764764206884, "grad_norm": 0.72265625, "learning_rate": 0.00019920994462206283, "loss": 4.7445, "step": 1168 }, { "epoch": 0.12121133569655691, "grad_norm": 0.76171875, "learning_rate": 0.0001992085813193045, "loss": 4.8108, "step": 1169 }, { "epoch": 0.12131502375104498, "grad_norm": 0.6875, "learning_rate": 0.00019920721684598975, "loss": 4.7589, "step": 1170 }, { "epoch": 0.12141871180553306, "grad_norm": 0.5703125, "learning_rate": 0.0001992058512021347, "loss": 4.7789, "step": 1171 }, { "epoch": 0.12152239986002113, "grad_norm": 0.6171875, "learning_rate": 0.0001992044843877555, "loss": 4.757, "step": 1172 }, { "epoch": 0.1216260879145092, "grad_norm": 0.7421875, "learning_rate": 0.00019920311640286823, "loss": 4.7454, "step": 1173 }, { "epoch": 0.12172977596899727, "grad_norm": 0.66796875, "learning_rate": 0.00019920174724748902, "loss": 4.7181, "step": 1174 }, { "epoch": 0.12183346402348534, "grad_norm": 0.55859375, "learning_rate": 0.00019920037692163409, "loss": 4.7726, "step": 1175 }, { "epoch": 0.12193715207797341, "grad_norm": 0.609375, "learning_rate": 0.00019919900542531956, "loss": 4.7689, "step": 1176 }, { "epoch": 0.12204084013246148, "grad_norm": 0.66015625, "learning_rate": 0.00019919763275856164, "loss": 4.7677, "step": 1177 }, { "epoch": 0.12214452818694957, "grad_norm": 0.671875, "learning_rate": 0.0001991962589213765, "loss": 4.77, "step": 1178 }, { "epoch": 0.12224821624143764, "grad_norm": 0.55078125, "learning_rate": 0.00019919488391378034, "loss": 4.7638, "step": 1179 }, { "epoch": 0.12235190429592571, "grad_norm": 0.6171875, "learning_rate": 0.0001991935077357894, "loss": 4.7877, "step": 1180 }, { "epoch": 0.12245559235041378, "grad_norm": 0.609375, "learning_rate": 0.00019919213038741996, "loss": 4.7555, "step": 1181 }, { "epoch": 0.12255928040490185, "grad_norm": 0.71484375, "learning_rate": 0.00019919075186868824, "loss": 4.7536, "step": 1182 }, { "epoch": 0.12266296845938993, "grad_norm": 0.703125, "learning_rate": 0.00019918937217961043, "loss": 4.7577, "step": 1183 }, { "epoch": 0.122766656513878, "grad_norm": 0.578125, "learning_rate": 0.0001991879913202029, "loss": 4.7453, "step": 1184 }, { "epoch": 0.12287034456836607, "grad_norm": 0.5625, "learning_rate": 0.00019918660929048196, "loss": 4.8053, "step": 1185 }, { "epoch": 0.12297403262285414, "grad_norm": 0.61328125, "learning_rate": 0.00019918522609046387, "loss": 4.7143, "step": 1186 }, { "epoch": 0.12307772067734221, "grad_norm": 0.609375, "learning_rate": 0.00019918384172016494, "loss": 4.7592, "step": 1187 }, { "epoch": 0.12318140873183028, "grad_norm": 0.6484375, "learning_rate": 0.0001991824561796015, "loss": 4.7616, "step": 1188 }, { "epoch": 0.12328509678631835, "grad_norm": 0.65625, "learning_rate": 0.00019918106946878995, "loss": 4.7261, "step": 1189 }, { "epoch": 0.12338878484080644, "grad_norm": 0.68359375, "learning_rate": 0.00019917968158774657, "loss": 4.7459, "step": 1190 }, { "epoch": 0.12349247289529451, "grad_norm": 0.71484375, "learning_rate": 0.00019917829253648784, "loss": 4.7121, "step": 1191 }, { "epoch": 0.12359616094978258, "grad_norm": 0.66796875, "learning_rate": 0.00019917690231503006, "loss": 4.7695, "step": 1192 }, { "epoch": 0.12369984900427065, "grad_norm": 0.5703125, "learning_rate": 0.00019917551092338967, "loss": 4.768, "step": 1193 }, { "epoch": 0.12380353705875873, "grad_norm": 0.61328125, "learning_rate": 0.00019917411836158308, "loss": 4.7858, "step": 1194 }, { "epoch": 0.1239072251132468, "grad_norm": 0.69140625, "learning_rate": 0.00019917272462962674, "loss": 4.776, "step": 1195 }, { "epoch": 0.12401091316773487, "grad_norm": 0.63671875, "learning_rate": 0.0001991713297275371, "loss": 4.77, "step": 1196 }, { "epoch": 0.12411460122222294, "grad_norm": 0.6640625, "learning_rate": 0.00019916993365533056, "loss": 4.7629, "step": 1197 }, { "epoch": 0.12421828927671101, "grad_norm": 0.76953125, "learning_rate": 0.00019916853641302365, "loss": 4.7412, "step": 1198 }, { "epoch": 0.12432197733119908, "grad_norm": 0.8671875, "learning_rate": 0.0001991671380006328, "loss": 4.7251, "step": 1199 }, { "epoch": 0.12442566538568715, "grad_norm": 0.75, "learning_rate": 0.0001991657384181746, "loss": 4.7247, "step": 1200 }, { "epoch": 0.12452935344017524, "grad_norm": 0.79296875, "learning_rate": 0.00019916433766566547, "loss": 4.744, "step": 1201 }, { "epoch": 0.12463304149466331, "grad_norm": 0.73046875, "learning_rate": 0.000199162935743122, "loss": 4.7852, "step": 1202 }, { "epoch": 0.12473672954915138, "grad_norm": 0.80859375, "learning_rate": 0.0001991615326505607, "loss": 4.76, "step": 1203 }, { "epoch": 0.12484041760363945, "grad_norm": 0.95703125, "learning_rate": 0.00019916012838799813, "loss": 4.7716, "step": 1204 }, { "epoch": 0.12494410565812752, "grad_norm": 1.0546875, "learning_rate": 0.00019915872295545086, "loss": 4.7749, "step": 1205 }, { "epoch": 0.1250477937126156, "grad_norm": 1.0, "learning_rate": 0.00019915731635293548, "loss": 4.7514, "step": 1206 }, { "epoch": 0.12515148176710367, "grad_norm": 1.1484375, "learning_rate": 0.00019915590858046858, "loss": 4.765, "step": 1207 }, { "epoch": 0.12525516982159174, "grad_norm": 0.90625, "learning_rate": 0.00019915449963806676, "loss": 4.7111, "step": 1208 }, { "epoch": 0.1253588578760798, "grad_norm": 1.4140625, "learning_rate": 0.0001991530895257467, "loss": 4.806, "step": 1209 }, { "epoch": 0.12546254593056788, "grad_norm": 0.734375, "learning_rate": 0.00019915167824352493, "loss": 4.7864, "step": 1210 }, { "epoch": 0.12556623398505595, "grad_norm": 1.28125, "learning_rate": 0.0001991502657914182, "loss": 4.7845, "step": 1211 }, { "epoch": 0.12566992203954402, "grad_norm": 1.078125, "learning_rate": 0.00019914885216944312, "loss": 4.7299, "step": 1212 }, { "epoch": 0.1257736100940321, "grad_norm": 1.25, "learning_rate": 0.0001991474373776164, "loss": 4.7712, "step": 1213 }, { "epoch": 0.12587729814852017, "grad_norm": 0.74609375, "learning_rate": 0.00019914602141595476, "loss": 4.7366, "step": 1214 }, { "epoch": 0.12598098620300824, "grad_norm": 1.40625, "learning_rate": 0.0001991446042844748, "loss": 4.7595, "step": 1215 }, { "epoch": 0.1260846742574963, "grad_norm": 0.828125, "learning_rate": 0.00019914318598319338, "loss": 4.7051, "step": 1216 }, { "epoch": 0.12618836231198438, "grad_norm": 1.609375, "learning_rate": 0.0001991417665121271, "loss": 4.7561, "step": 1217 }, { "epoch": 0.12629205036647248, "grad_norm": 1.09375, "learning_rate": 0.0001991403458712928, "loss": 4.7613, "step": 1218 }, { "epoch": 0.12639573842096055, "grad_norm": 2.3125, "learning_rate": 0.00019913892406070723, "loss": 4.7776, "step": 1219 }, { "epoch": 0.12649942647544862, "grad_norm": 2.0625, "learning_rate": 0.00019913750108038715, "loss": 4.7397, "step": 1220 }, { "epoch": 0.1266031145299367, "grad_norm": 1.4140625, "learning_rate": 0.00019913607693034934, "loss": 4.7404, "step": 1221 }, { "epoch": 0.12670680258442477, "grad_norm": 1.4921875, "learning_rate": 0.0001991346516106106, "loss": 4.7672, "step": 1222 }, { "epoch": 0.12681049063891284, "grad_norm": 1.2734375, "learning_rate": 0.00019913322512118774, "loss": 4.7471, "step": 1223 }, { "epoch": 0.1269141786934009, "grad_norm": 1.578125, "learning_rate": 0.00019913179746209765, "loss": 4.8062, "step": 1224 }, { "epoch": 0.12701786674788898, "grad_norm": 1.2890625, "learning_rate": 0.00019913036863335713, "loss": 4.776, "step": 1225 }, { "epoch": 0.12712155480237705, "grad_norm": 1.5390625, "learning_rate": 0.00019912893863498305, "loss": 4.7452, "step": 1226 }, { "epoch": 0.12722524285686512, "grad_norm": 1.3125, "learning_rate": 0.00019912750746699226, "loss": 4.7816, "step": 1227 }, { "epoch": 0.1273289309113532, "grad_norm": 1.234375, "learning_rate": 0.0001991260751294017, "loss": 4.719, "step": 1228 }, { "epoch": 0.12743261896584127, "grad_norm": 1.21875, "learning_rate": 0.00019912464162222818, "loss": 4.7678, "step": 1229 }, { "epoch": 0.12753630702032934, "grad_norm": 1.140625, "learning_rate": 0.0001991232069454887, "loss": 4.7393, "step": 1230 }, { "epoch": 0.1276399950748174, "grad_norm": 0.9453125, "learning_rate": 0.00019912177109920016, "loss": 4.7238, "step": 1231 }, { "epoch": 0.12774368312930548, "grad_norm": 1.1328125, "learning_rate": 0.0001991203340833795, "loss": 4.7208, "step": 1232 }, { "epoch": 0.12784737118379355, "grad_norm": 1.0546875, "learning_rate": 0.00019911889589804366, "loss": 4.7451, "step": 1233 }, { "epoch": 0.12795105923828162, "grad_norm": 0.90234375, "learning_rate": 0.00019911745654320963, "loss": 4.7744, "step": 1234 }, { "epoch": 0.1280547472927697, "grad_norm": 1.1015625, "learning_rate": 0.00019911601601889438, "loss": 4.7203, "step": 1235 }, { "epoch": 0.12815843534725777, "grad_norm": 0.78515625, "learning_rate": 0.0001991145743251149, "loss": 4.7109, "step": 1236 }, { "epoch": 0.12826212340174584, "grad_norm": 0.984375, "learning_rate": 0.00019911313146188823, "loss": 4.7083, "step": 1237 }, { "epoch": 0.1283658114562339, "grad_norm": 1.1875, "learning_rate": 0.00019911168742923138, "loss": 4.711, "step": 1238 }, { "epoch": 0.12846949951072198, "grad_norm": 0.82421875, "learning_rate": 0.00019911024222716138, "loss": 4.754, "step": 1239 }, { "epoch": 0.12857318756521008, "grad_norm": 0.83984375, "learning_rate": 0.0001991087958556953, "loss": 4.7559, "step": 1240 }, { "epoch": 0.12867687561969815, "grad_norm": 0.75, "learning_rate": 0.00019910734831485015, "loss": 4.7452, "step": 1241 }, { "epoch": 0.12878056367418622, "grad_norm": 0.69921875, "learning_rate": 0.00019910589960464304, "loss": 4.7699, "step": 1242 }, { "epoch": 0.1288842517286743, "grad_norm": 0.80078125, "learning_rate": 0.00019910444972509112, "loss": 4.7216, "step": 1243 }, { "epoch": 0.12898793978316236, "grad_norm": 0.67578125, "learning_rate": 0.00019910299867621146, "loss": 4.7461, "step": 1244 }, { "epoch": 0.12909162783765044, "grad_norm": 0.70703125, "learning_rate": 0.00019910154645802112, "loss": 4.7232, "step": 1245 }, { "epoch": 0.1291953158921385, "grad_norm": 0.82421875, "learning_rate": 0.00019910009307053735, "loss": 4.7075, "step": 1246 }, { "epoch": 0.12929900394662658, "grad_norm": 0.71484375, "learning_rate": 0.00019909863851377718, "loss": 4.7002, "step": 1247 }, { "epoch": 0.12940269200111465, "grad_norm": 0.73828125, "learning_rate": 0.00019909718278775785, "loss": 4.734, "step": 1248 }, { "epoch": 0.12950638005560272, "grad_norm": 0.6015625, "learning_rate": 0.0001990957258924965, "loss": 4.731, "step": 1249 }, { "epoch": 0.1296100681100908, "grad_norm": 0.640625, "learning_rate": 0.00019909426782801037, "loss": 4.7217, "step": 1250 }, { "epoch": 0.12971375616457886, "grad_norm": 0.67578125, "learning_rate": 0.00019909280859431658, "loss": 4.723, "step": 1251 }, { "epoch": 0.12981744421906694, "grad_norm": 0.59765625, "learning_rate": 0.00019909134819143243, "loss": 4.7431, "step": 1252 }, { "epoch": 0.129921132273555, "grad_norm": 0.6328125, "learning_rate": 0.00019908988661937512, "loss": 4.7613, "step": 1253 }, { "epoch": 0.13002482032804308, "grad_norm": 0.66796875, "learning_rate": 0.00019908842387816183, "loss": 4.7186, "step": 1254 }, { "epoch": 0.13012850838253115, "grad_norm": 0.578125, "learning_rate": 0.00019908695996780993, "loss": 4.7484, "step": 1255 }, { "epoch": 0.13023219643701922, "grad_norm": 0.56640625, "learning_rate": 0.00019908549488833663, "loss": 4.7378, "step": 1256 }, { "epoch": 0.1303358844915073, "grad_norm": 0.6640625, "learning_rate": 0.00019908402863975925, "loss": 4.7338, "step": 1257 }, { "epoch": 0.13043957254599536, "grad_norm": 0.515625, "learning_rate": 0.00019908256122209506, "loss": 4.7363, "step": 1258 }, { "epoch": 0.13054326060048344, "grad_norm": 0.64453125, "learning_rate": 0.00019908109263536134, "loss": 4.7391, "step": 1259 }, { "epoch": 0.1306469486549715, "grad_norm": 0.6015625, "learning_rate": 0.00019907962287957548, "loss": 4.7361, "step": 1260 }, { "epoch": 0.13075063670945958, "grad_norm": 0.6328125, "learning_rate": 0.00019907815195475484, "loss": 4.7187, "step": 1261 }, { "epoch": 0.13085432476394768, "grad_norm": 0.5390625, "learning_rate": 0.0001990766798609167, "loss": 4.7298, "step": 1262 }, { "epoch": 0.13095801281843575, "grad_norm": 0.61328125, "learning_rate": 0.0001990752065980785, "loss": 4.7505, "step": 1263 }, { "epoch": 0.13106170087292382, "grad_norm": 0.57421875, "learning_rate": 0.00019907373216625755, "loss": 4.6917, "step": 1264 }, { "epoch": 0.1311653889274119, "grad_norm": 0.466796875, "learning_rate": 0.0001990722565654713, "loss": 4.7232, "step": 1265 }, { "epoch": 0.13126907698189996, "grad_norm": 0.578125, "learning_rate": 0.00019907077979573713, "loss": 4.722, "step": 1266 }, { "epoch": 0.13137276503638803, "grad_norm": 0.54296875, "learning_rate": 0.0001990693018570725, "loss": 4.734, "step": 1267 }, { "epoch": 0.1314764530908761, "grad_norm": 0.51953125, "learning_rate": 0.00019906782274949482, "loss": 4.7565, "step": 1268 }, { "epoch": 0.13158014114536418, "grad_norm": 0.58984375, "learning_rate": 0.0001990663424730216, "loss": 4.7149, "step": 1269 }, { "epoch": 0.13168382919985225, "grad_norm": 0.5390625, "learning_rate": 0.0001990648610276702, "loss": 4.7146, "step": 1270 }, { "epoch": 0.13178751725434032, "grad_norm": 0.546875, "learning_rate": 0.00019906337841345818, "loss": 4.7258, "step": 1271 }, { "epoch": 0.1318912053088284, "grad_norm": 0.498046875, "learning_rate": 0.00019906189463040299, "loss": 4.7316, "step": 1272 }, { "epoch": 0.13199489336331646, "grad_norm": 0.5859375, "learning_rate": 0.00019906040967852215, "loss": 4.7284, "step": 1273 }, { "epoch": 0.13209858141780453, "grad_norm": 0.59375, "learning_rate": 0.0001990589235578332, "loss": 4.7609, "step": 1274 }, { "epoch": 0.1322022694722926, "grad_norm": 0.63671875, "learning_rate": 0.00019905743626835368, "loss": 4.7654, "step": 1275 }, { "epoch": 0.13230595752678068, "grad_norm": 0.59765625, "learning_rate": 0.0001990559478101011, "loss": 4.7341, "step": 1276 }, { "epoch": 0.13240964558126875, "grad_norm": 0.6640625, "learning_rate": 0.00019905445818309305, "loss": 4.7512, "step": 1277 }, { "epoch": 0.13251333363575682, "grad_norm": 0.57421875, "learning_rate": 0.00019905296738734709, "loss": 4.6994, "step": 1278 }, { "epoch": 0.1326170216902449, "grad_norm": 0.69921875, "learning_rate": 0.00019905147542288086, "loss": 4.7447, "step": 1279 }, { "epoch": 0.13272070974473296, "grad_norm": 0.8046875, "learning_rate": 0.0001990499822897119, "loss": 4.7525, "step": 1280 }, { "epoch": 0.13282439779922103, "grad_norm": 0.73046875, "learning_rate": 0.00019904848798785781, "loss": 4.7446, "step": 1281 }, { "epoch": 0.1329280858537091, "grad_norm": 0.671875, "learning_rate": 0.00019904699251733628, "loss": 4.7214, "step": 1282 }, { "epoch": 0.13303177390819718, "grad_norm": 0.6484375, "learning_rate": 0.00019904549587816494, "loss": 4.7364, "step": 1283 }, { "epoch": 0.13313546196268525, "grad_norm": 0.7265625, "learning_rate": 0.00019904399807036145, "loss": 4.7707, "step": 1284 }, { "epoch": 0.13323915001717335, "grad_norm": 0.73046875, "learning_rate": 0.00019904249909394347, "loss": 4.7137, "step": 1285 }, { "epoch": 0.13334283807166142, "grad_norm": 0.671875, "learning_rate": 0.0001990409989489287, "loss": 4.7243, "step": 1286 }, { "epoch": 0.1334465261261495, "grad_norm": 0.625, "learning_rate": 0.00019903949763533483, "loss": 4.7646, "step": 1287 }, { "epoch": 0.13355021418063756, "grad_norm": 0.6015625, "learning_rate": 0.00019903799515317956, "loss": 4.6683, "step": 1288 }, { "epoch": 0.13365390223512563, "grad_norm": 0.55859375, "learning_rate": 0.00019903649150248068, "loss": 4.7398, "step": 1289 }, { "epoch": 0.1337575902896137, "grad_norm": 0.62109375, "learning_rate": 0.00019903498668325583, "loss": 4.7181, "step": 1290 }, { "epoch": 0.13386127834410178, "grad_norm": 0.50390625, "learning_rate": 0.00019903348069552285, "loss": 4.7208, "step": 1291 }, { "epoch": 0.13396496639858985, "grad_norm": 0.5703125, "learning_rate": 0.0001990319735392995, "loss": 4.7317, "step": 1292 }, { "epoch": 0.13406865445307792, "grad_norm": 0.67578125, "learning_rate": 0.00019903046521460352, "loss": 4.7149, "step": 1293 }, { "epoch": 0.134172342507566, "grad_norm": 0.5234375, "learning_rate": 0.00019902895572145274, "loss": 4.6968, "step": 1294 }, { "epoch": 0.13427603056205406, "grad_norm": 0.55078125, "learning_rate": 0.00019902744505986494, "loss": 4.7335, "step": 1295 }, { "epoch": 0.13437971861654213, "grad_norm": 0.60546875, "learning_rate": 0.00019902593322985797, "loss": 4.6848, "step": 1296 }, { "epoch": 0.1344834066710302, "grad_norm": 0.57421875, "learning_rate": 0.0001990244202314497, "loss": 4.7019, "step": 1297 }, { "epoch": 0.13458709472551827, "grad_norm": 0.6328125, "learning_rate": 0.0001990229060646579, "loss": 4.7149, "step": 1298 }, { "epoch": 0.13469078278000635, "grad_norm": 0.75390625, "learning_rate": 0.0001990213907295005, "loss": 4.7474, "step": 1299 }, { "epoch": 0.13479447083449442, "grad_norm": 0.6796875, "learning_rate": 0.00019901987422599535, "loss": 4.694, "step": 1300 }, { "epoch": 0.1348981588889825, "grad_norm": 0.6015625, "learning_rate": 0.00019901835655416038, "loss": 4.677, "step": 1301 }, { "epoch": 0.13500184694347056, "grad_norm": 0.63671875, "learning_rate": 0.00019901683771401344, "loss": 4.7183, "step": 1302 }, { "epoch": 0.13510553499795863, "grad_norm": 0.62890625, "learning_rate": 0.0001990153177055725, "loss": 4.7433, "step": 1303 }, { "epoch": 0.1352092230524467, "grad_norm": 0.65234375, "learning_rate": 0.00019901379652885543, "loss": 4.7238, "step": 1304 }, { "epoch": 0.13531291110693477, "grad_norm": 0.69921875, "learning_rate": 0.00019901227418388028, "loss": 4.7238, "step": 1305 }, { "epoch": 0.13541659916142285, "grad_norm": 0.53125, "learning_rate": 0.00019901075067066493, "loss": 4.7488, "step": 1306 }, { "epoch": 0.13552028721591095, "grad_norm": 0.6953125, "learning_rate": 0.00019900922598922738, "loss": 4.7578, "step": 1307 }, { "epoch": 0.13562397527039902, "grad_norm": 0.74609375, "learning_rate": 0.00019900770013958562, "loss": 4.7248, "step": 1308 }, { "epoch": 0.1357276633248871, "grad_norm": 0.6015625, "learning_rate": 0.00019900617312175768, "loss": 4.6949, "step": 1309 }, { "epoch": 0.13583135137937516, "grad_norm": 0.5625, "learning_rate": 0.00019900464493576153, "loss": 4.7147, "step": 1310 }, { "epoch": 0.13593503943386323, "grad_norm": 0.63671875, "learning_rate": 0.00019900311558161522, "loss": 4.7156, "step": 1311 }, { "epoch": 0.1360387274883513, "grad_norm": 0.64453125, "learning_rate": 0.00019900158505933678, "loss": 4.7342, "step": 1312 }, { "epoch": 0.13614241554283937, "grad_norm": 0.55078125, "learning_rate": 0.0001990000533689443, "loss": 4.7456, "step": 1313 }, { "epoch": 0.13624610359732744, "grad_norm": 0.6796875, "learning_rate": 0.00019899852051045587, "loss": 4.7326, "step": 1314 }, { "epoch": 0.13634979165181552, "grad_norm": 0.71484375, "learning_rate": 0.00019899698648388948, "loss": 4.7254, "step": 1315 }, { "epoch": 0.1364534797063036, "grad_norm": 0.5703125, "learning_rate": 0.00019899545128926333, "loss": 4.7298, "step": 1316 }, { "epoch": 0.13655716776079166, "grad_norm": 0.5625, "learning_rate": 0.00019899391492659551, "loss": 4.7243, "step": 1317 }, { "epoch": 0.13666085581527973, "grad_norm": 0.59375, "learning_rate": 0.00019899237739590413, "loss": 4.723, "step": 1318 }, { "epoch": 0.1367645438697678, "grad_norm": 0.54296875, "learning_rate": 0.00019899083869720735, "loss": 4.7191, "step": 1319 }, { "epoch": 0.13686823192425587, "grad_norm": 0.546875, "learning_rate": 0.00019898929883052326, "loss": 4.6991, "step": 1320 }, { "epoch": 0.13697191997874394, "grad_norm": 0.71875, "learning_rate": 0.0001989877577958701, "loss": 4.6526, "step": 1321 }, { "epoch": 0.13707560803323202, "grad_norm": 0.5625, "learning_rate": 0.00019898621559326607, "loss": 4.6876, "step": 1322 }, { "epoch": 0.1371792960877201, "grad_norm": 0.51953125, "learning_rate": 0.00019898467222272928, "loss": 4.6841, "step": 1323 }, { "epoch": 0.13728298414220816, "grad_norm": 0.80078125, "learning_rate": 0.00019898312768427802, "loss": 4.677, "step": 1324 }, { "epoch": 0.13738667219669623, "grad_norm": 0.83203125, "learning_rate": 0.00019898158197793046, "loss": 4.7136, "step": 1325 }, { "epoch": 0.1374903602511843, "grad_norm": 0.6484375, "learning_rate": 0.00019898003510370488, "loss": 4.6611, "step": 1326 }, { "epoch": 0.13759404830567237, "grad_norm": 0.7109375, "learning_rate": 0.0001989784870616195, "loss": 4.6735, "step": 1327 }, { "epoch": 0.13769773636016044, "grad_norm": 0.8515625, "learning_rate": 0.00019897693785169261, "loss": 4.7221, "step": 1328 }, { "epoch": 0.13780142441464854, "grad_norm": 0.79296875, "learning_rate": 0.00019897538747394247, "loss": 4.6936, "step": 1329 }, { "epoch": 0.13790511246913661, "grad_norm": 0.64453125, "learning_rate": 0.00019897383592838738, "loss": 4.7377, "step": 1330 }, { "epoch": 0.1380088005236247, "grad_norm": 0.6796875, "learning_rate": 0.00019897228321504563, "loss": 4.7499, "step": 1331 }, { "epoch": 0.13811248857811276, "grad_norm": 0.9609375, "learning_rate": 0.00019897072933393559, "loss": 4.705, "step": 1332 }, { "epoch": 0.13821617663260083, "grad_norm": 1.0390625, "learning_rate": 0.00019896917428507553, "loss": 4.719, "step": 1333 }, { "epoch": 0.1383198646870889, "grad_norm": 1.0546875, "learning_rate": 0.00019896761806848385, "loss": 4.7109, "step": 1334 }, { "epoch": 0.13842355274157697, "grad_norm": 1.109375, "learning_rate": 0.00019896606068417887, "loss": 4.7314, "step": 1335 }, { "epoch": 0.13852724079606504, "grad_norm": 1.140625, "learning_rate": 0.000198964502132179, "loss": 4.7162, "step": 1336 }, { "epoch": 0.13863092885055311, "grad_norm": 0.77734375, "learning_rate": 0.00019896294241250262, "loss": 4.7474, "step": 1337 }, { "epoch": 0.13873461690504119, "grad_norm": 0.67578125, "learning_rate": 0.00019896138152516812, "loss": 4.6801, "step": 1338 }, { "epoch": 0.13883830495952926, "grad_norm": 0.86328125, "learning_rate": 0.00019895981947019392, "loss": 4.6727, "step": 1339 }, { "epoch": 0.13894199301401733, "grad_norm": 1.09375, "learning_rate": 0.00019895825624759845, "loss": 4.7344, "step": 1340 }, { "epoch": 0.1390456810685054, "grad_norm": 0.9453125, "learning_rate": 0.00019895669185740017, "loss": 4.737, "step": 1341 }, { "epoch": 0.13914936912299347, "grad_norm": 1.015625, "learning_rate": 0.00019895512629961753, "loss": 4.6886, "step": 1342 }, { "epoch": 0.13925305717748154, "grad_norm": 0.88671875, "learning_rate": 0.000198953559574269, "loss": 4.658, "step": 1343 }, { "epoch": 0.13935674523196961, "grad_norm": 0.57421875, "learning_rate": 0.00019895199168137306, "loss": 4.6986, "step": 1344 }, { "epoch": 0.13946043328645769, "grad_norm": 0.78515625, "learning_rate": 0.0001989504226209482, "loss": 4.7303, "step": 1345 }, { "epoch": 0.13956412134094576, "grad_norm": 1.0859375, "learning_rate": 0.00019894885239301298, "loss": 4.6842, "step": 1346 }, { "epoch": 0.13966780939543383, "grad_norm": 0.78125, "learning_rate": 0.0001989472809975859, "loss": 4.6811, "step": 1347 }, { "epoch": 0.1397714974499219, "grad_norm": 0.72265625, "learning_rate": 0.00019894570843468544, "loss": 4.713, "step": 1348 }, { "epoch": 0.13987518550440997, "grad_norm": 0.98828125, "learning_rate": 0.00019894413470433026, "loss": 4.6777, "step": 1349 }, { "epoch": 0.13997887355889804, "grad_norm": 0.875, "learning_rate": 0.00019894255980653887, "loss": 4.7175, "step": 1350 }, { "epoch": 0.14008256161338611, "grad_norm": 0.953125, "learning_rate": 0.00019894098374132987, "loss": 4.7008, "step": 1351 }, { "epoch": 0.1401862496678742, "grad_norm": 1.359375, "learning_rate": 0.0001989394065087218, "loss": 4.76, "step": 1352 }, { "epoch": 0.14028993772236228, "grad_norm": 0.74609375, "learning_rate": 0.00019893782810873338, "loss": 4.7276, "step": 1353 }, { "epoch": 0.14039362577685036, "grad_norm": 1.4296875, "learning_rate": 0.00019893624854138312, "loss": 4.7048, "step": 1354 }, { "epoch": 0.14049731383133843, "grad_norm": 0.8046875, "learning_rate": 0.00019893466780668972, "loss": 4.6713, "step": 1355 }, { "epoch": 0.1406010018858265, "grad_norm": 1.4609375, "learning_rate": 0.00019893308590467185, "loss": 4.7421, "step": 1356 }, { "epoch": 0.14070468994031457, "grad_norm": 0.88671875, "learning_rate": 0.0001989315028353481, "loss": 4.7306, "step": 1357 }, { "epoch": 0.14080837799480264, "grad_norm": 1.7109375, "learning_rate": 0.00019892991859873723, "loss": 4.7135, "step": 1358 }, { "epoch": 0.1409120660492907, "grad_norm": 1.3125, "learning_rate": 0.00019892833319485787, "loss": 4.7376, "step": 1359 }, { "epoch": 0.14101575410377878, "grad_norm": 2.25, "learning_rate": 0.00019892674662372876, "loss": 4.7339, "step": 1360 }, { "epoch": 0.14111944215826686, "grad_norm": 2.078125, "learning_rate": 0.0001989251588853686, "loss": 4.717, "step": 1361 }, { "epoch": 0.14122313021275493, "grad_norm": 1.359375, "learning_rate": 0.00019892356997979613, "loss": 4.7236, "step": 1362 }, { "epoch": 0.141326818267243, "grad_norm": 1.59375, "learning_rate": 0.0001989219799070301, "loss": 4.6997, "step": 1363 }, { "epoch": 0.14143050632173107, "grad_norm": 1.2578125, "learning_rate": 0.00019892038866708932, "loss": 4.6979, "step": 1364 }, { "epoch": 0.14153419437621914, "grad_norm": 2.1875, "learning_rate": 0.00019891879625999245, "loss": 4.6499, "step": 1365 }, { "epoch": 0.1416378824307072, "grad_norm": 1.7890625, "learning_rate": 0.00019891720268575837, "loss": 4.7374, "step": 1366 }, { "epoch": 0.14174157048519528, "grad_norm": 2.109375, "learning_rate": 0.00019891560794440587, "loss": 4.6959, "step": 1367 }, { "epoch": 0.14184525853968336, "grad_norm": 1.3828125, "learning_rate": 0.00019891401203595374, "loss": 4.7039, "step": 1368 }, { "epoch": 0.14194894659417143, "grad_norm": 2.609375, "learning_rate": 0.00019891241496042082, "loss": 4.7391, "step": 1369 }, { "epoch": 0.1420526346486595, "grad_norm": 2.40625, "learning_rate": 0.000198910816717826, "loss": 4.7635, "step": 1370 }, { "epoch": 0.14215632270314757, "grad_norm": 1.6171875, "learning_rate": 0.00019890921730818806, "loss": 4.7121, "step": 1371 }, { "epoch": 0.14226001075763564, "grad_norm": 1.6171875, "learning_rate": 0.00019890761673152591, "loss": 4.7292, "step": 1372 }, { "epoch": 0.1423636988121237, "grad_norm": 1.453125, "learning_rate": 0.00019890601498785844, "loss": 4.71, "step": 1373 }, { "epoch": 0.1424673868666118, "grad_norm": 1.421875, "learning_rate": 0.00019890441207720454, "loss": 4.7202, "step": 1374 }, { "epoch": 0.14257107492109988, "grad_norm": 1.25, "learning_rate": 0.0001989028079995831, "loss": 4.7506, "step": 1375 }, { "epoch": 0.14267476297558795, "grad_norm": 1.5390625, "learning_rate": 0.00019890120275501308, "loss": 4.684, "step": 1376 }, { "epoch": 0.14277845103007603, "grad_norm": 1.0703125, "learning_rate": 0.00019889959634351344, "loss": 4.736, "step": 1377 }, { "epoch": 0.1428821390845641, "grad_norm": 2.40625, "learning_rate": 0.0001988979887651031, "loss": 4.7732, "step": 1378 }, { "epoch": 0.14298582713905217, "grad_norm": 2.03125, "learning_rate": 0.00019889638001980103, "loss": 4.7442, "step": 1379 }, { "epoch": 0.14308951519354024, "grad_norm": 1.7890625, "learning_rate": 0.00019889477010762618, "loss": 4.6948, "step": 1380 }, { "epoch": 0.1431932032480283, "grad_norm": 1.53125, "learning_rate": 0.00019889315902859762, "loss": 4.7068, "step": 1381 }, { "epoch": 0.14329689130251638, "grad_norm": 1.7265625, "learning_rate": 0.0001988915467827343, "loss": 4.7583, "step": 1382 }, { "epoch": 0.14340057935700445, "grad_norm": 1.3359375, "learning_rate": 0.00019888993337005526, "loss": 4.7032, "step": 1383 }, { "epoch": 0.14350426741149253, "grad_norm": 1.5859375, "learning_rate": 0.00019888831879057953, "loss": 4.7053, "step": 1384 }, { "epoch": 0.1436079554659806, "grad_norm": 1.0859375, "learning_rate": 0.00019888670304432619, "loss": 4.6836, "step": 1385 }, { "epoch": 0.14371164352046867, "grad_norm": 1.8828125, "learning_rate": 0.00019888508613131426, "loss": 4.7571, "step": 1386 }, { "epoch": 0.14381533157495674, "grad_norm": 1.3515625, "learning_rate": 0.00019888346805156283, "loss": 4.6962, "step": 1387 }, { "epoch": 0.1439190196294448, "grad_norm": 2.1875, "learning_rate": 0.00019888184880509103, "loss": 4.7161, "step": 1388 }, { "epoch": 0.14402270768393288, "grad_norm": 2.03125, "learning_rate": 0.00019888022839191792, "loss": 4.704, "step": 1389 }, { "epoch": 0.14412639573842095, "grad_norm": 1.5390625, "learning_rate": 0.00019887860681206266, "loss": 4.7299, "step": 1390 }, { "epoch": 0.14423008379290903, "grad_norm": 1.5390625, "learning_rate": 0.00019887698406554431, "loss": 4.7452, "step": 1391 }, { "epoch": 0.1443337718473971, "grad_norm": 1.28125, "learning_rate": 0.00019887536015238212, "loss": 4.7157, "step": 1392 }, { "epoch": 0.14443745990188517, "grad_norm": 1.171875, "learning_rate": 0.00019887373507259518, "loss": 4.6886, "step": 1393 }, { "epoch": 0.14454114795637324, "grad_norm": 1.2734375, "learning_rate": 0.00019887210882620266, "loss": 4.7412, "step": 1394 }, { "epoch": 0.1446448360108613, "grad_norm": 1.09375, "learning_rate": 0.00019887048141322376, "loss": 4.7263, "step": 1395 }, { "epoch": 0.1447485240653494, "grad_norm": 1.109375, "learning_rate": 0.00019886885283367772, "loss": 4.7014, "step": 1396 }, { "epoch": 0.14485221211983748, "grad_norm": 0.77734375, "learning_rate": 0.00019886722308758373, "loss": 4.6781, "step": 1397 }, { "epoch": 0.14495590017432555, "grad_norm": 1.125, "learning_rate": 0.00019886559217496098, "loss": 4.6926, "step": 1398 }, { "epoch": 0.14505958822881362, "grad_norm": 0.84375, "learning_rate": 0.00019886396009582876, "loss": 4.7069, "step": 1399 }, { "epoch": 0.1451632762833017, "grad_norm": 0.78515625, "learning_rate": 0.00019886232685020633, "loss": 4.6784, "step": 1400 }, { "epoch": 0.14526696433778977, "grad_norm": 0.87109375, "learning_rate": 0.00019886069243811293, "loss": 4.6576, "step": 1401 }, { "epoch": 0.14537065239227784, "grad_norm": 0.7421875, "learning_rate": 0.0001988590568595679, "loss": 4.6882, "step": 1402 }, { "epoch": 0.1454743404467659, "grad_norm": 0.703125, "learning_rate": 0.00019885742011459045, "loss": 4.7041, "step": 1403 }, { "epoch": 0.14557802850125398, "grad_norm": 0.70703125, "learning_rate": 0.00019885578220319995, "loss": 4.714, "step": 1404 }, { "epoch": 0.14568171655574205, "grad_norm": 0.6953125, "learning_rate": 0.00019885414312541573, "loss": 4.6943, "step": 1405 }, { "epoch": 0.14578540461023012, "grad_norm": 0.61328125, "learning_rate": 0.00019885250288125713, "loss": 4.6866, "step": 1406 }, { "epoch": 0.1458890926647182, "grad_norm": 0.71875, "learning_rate": 0.00019885086147074344, "loss": 4.6937, "step": 1407 }, { "epoch": 0.14599278071920627, "grad_norm": 0.63671875, "learning_rate": 0.0001988492188938941, "loss": 4.7627, "step": 1408 }, { "epoch": 0.14609646877369434, "grad_norm": 0.640625, "learning_rate": 0.00019884757515072844, "loss": 4.7113, "step": 1409 }, { "epoch": 0.1462001568281824, "grad_norm": 0.640625, "learning_rate": 0.00019884593024126592, "loss": 4.6828, "step": 1410 }, { "epoch": 0.14630384488267048, "grad_norm": 0.56640625, "learning_rate": 0.0001988442841655259, "loss": 4.7033, "step": 1411 }, { "epoch": 0.14640753293715855, "grad_norm": 0.68359375, "learning_rate": 0.00019884263692352777, "loss": 4.7208, "step": 1412 }, { "epoch": 0.14651122099164662, "grad_norm": 0.59765625, "learning_rate": 0.00019884098851529104, "loss": 4.7189, "step": 1413 }, { "epoch": 0.1466149090461347, "grad_norm": 0.65625, "learning_rate": 0.00019883933894083514, "loss": 4.6782, "step": 1414 }, { "epoch": 0.14671859710062277, "grad_norm": 0.67578125, "learning_rate": 0.00019883768820017948, "loss": 4.7375, "step": 1415 }, { "epoch": 0.14682228515511084, "grad_norm": 0.68359375, "learning_rate": 0.0001988360362933436, "loss": 4.7187, "step": 1416 }, { "epoch": 0.1469259732095989, "grad_norm": 0.61328125, "learning_rate": 0.00019883438322034695, "loss": 4.7011, "step": 1417 }, { "epoch": 0.14702966126408698, "grad_norm": 0.54296875, "learning_rate": 0.00019883272898120905, "loss": 4.7445, "step": 1418 }, { "epoch": 0.14713334931857508, "grad_norm": 0.65234375, "learning_rate": 0.00019883107357594943, "loss": 4.724, "step": 1419 }, { "epoch": 0.14723703737306315, "grad_norm": 0.625, "learning_rate": 0.0001988294170045876, "loss": 4.6845, "step": 1420 }, { "epoch": 0.14734072542755122, "grad_norm": 0.6015625, "learning_rate": 0.00019882775926714313, "loss": 4.7319, "step": 1421 }, { "epoch": 0.1474444134820393, "grad_norm": 0.6328125, "learning_rate": 0.00019882610036363557, "loss": 4.71, "step": 1422 }, { "epoch": 0.14754810153652737, "grad_norm": 0.828125, "learning_rate": 0.00019882444029408448, "loss": 4.7078, "step": 1423 }, { "epoch": 0.14765178959101544, "grad_norm": 0.6640625, "learning_rate": 0.00019882277905850946, "loss": 4.6876, "step": 1424 }, { "epoch": 0.1477554776455035, "grad_norm": 0.58984375, "learning_rate": 0.00019882111665693011, "loss": 4.6975, "step": 1425 }, { "epoch": 0.14785916569999158, "grad_norm": 0.77734375, "learning_rate": 0.00019881945308936603, "loss": 4.6452, "step": 1426 }, { "epoch": 0.14796285375447965, "grad_norm": 0.75390625, "learning_rate": 0.00019881778835583686, "loss": 4.7131, "step": 1427 }, { "epoch": 0.14806654180896772, "grad_norm": 0.54296875, "learning_rate": 0.00019881612245636226, "loss": 4.681, "step": 1428 }, { "epoch": 0.1481702298634558, "grad_norm": 0.66015625, "learning_rate": 0.00019881445539096185, "loss": 4.7163, "step": 1429 }, { "epoch": 0.14827391791794386, "grad_norm": 0.73828125, "learning_rate": 0.00019881278715965534, "loss": 4.7312, "step": 1430 }, { "epoch": 0.14837760597243194, "grad_norm": 0.6484375, "learning_rate": 0.00019881111776246234, "loss": 4.7217, "step": 1431 }, { "epoch": 0.14848129402692, "grad_norm": 0.74609375, "learning_rate": 0.00019880944719940263, "loss": 4.7345, "step": 1432 }, { "epoch": 0.14858498208140808, "grad_norm": 0.71484375, "learning_rate": 0.0001988077754704959, "loss": 4.6661, "step": 1433 }, { "epoch": 0.14868867013589615, "grad_norm": 0.68359375, "learning_rate": 0.0001988061025757619, "loss": 4.662, "step": 1434 }, { "epoch": 0.14879235819038422, "grad_norm": 0.94921875, "learning_rate": 0.00019880442851522029, "loss": 4.7057, "step": 1435 }, { "epoch": 0.1488960462448723, "grad_norm": 0.89453125, "learning_rate": 0.00019880275328889083, "loss": 4.6554, "step": 1436 }, { "epoch": 0.14899973429936036, "grad_norm": 0.76953125, "learning_rate": 0.00019880107689679337, "loss": 4.673, "step": 1437 }, { "epoch": 0.14910342235384844, "grad_norm": 0.6796875, "learning_rate": 0.00019879939933894762, "loss": 4.6325, "step": 1438 }, { "epoch": 0.1492071104083365, "grad_norm": 0.8515625, "learning_rate": 0.0001987977206153734, "loss": 4.7093, "step": 1439 }, { "epoch": 0.14931079846282458, "grad_norm": 0.71875, "learning_rate": 0.0001987960407260905, "loss": 4.6711, "step": 1440 }, { "epoch": 0.14941448651731268, "grad_norm": 0.6484375, "learning_rate": 0.00019879435967111876, "loss": 4.6034, "step": 1441 }, { "epoch": 0.14951817457180075, "grad_norm": 0.71875, "learning_rate": 0.000198792677450478, "loss": 4.713, "step": 1442 }, { "epoch": 0.14962186262628882, "grad_norm": 0.84375, "learning_rate": 0.00019879099406418807, "loss": 4.7042, "step": 1443 }, { "epoch": 0.1497255506807769, "grad_norm": 0.5703125, "learning_rate": 0.00019878930951226887, "loss": 4.6731, "step": 1444 }, { "epoch": 0.14982923873526496, "grad_norm": 0.69140625, "learning_rate": 0.00019878762379474022, "loss": 4.6662, "step": 1445 }, { "epoch": 0.14993292678975303, "grad_norm": 0.71484375, "learning_rate": 0.00019878593691162203, "loss": 4.6774, "step": 1446 }, { "epoch": 0.1500366148442411, "grad_norm": 0.78515625, "learning_rate": 0.00019878424886293422, "loss": 4.6983, "step": 1447 }, { "epoch": 0.15014030289872918, "grad_norm": 0.70703125, "learning_rate": 0.00019878255964869666, "loss": 4.6673, "step": 1448 }, { "epoch": 0.15024399095321725, "grad_norm": 0.62890625, "learning_rate": 0.00019878086926892934, "loss": 4.6991, "step": 1449 }, { "epoch": 0.15034767900770532, "grad_norm": 0.671875, "learning_rate": 0.00019877917772365215, "loss": 4.7173, "step": 1450 }, { "epoch": 0.1504513670621934, "grad_norm": 0.796875, "learning_rate": 0.0001987774850128851, "loss": 4.7385, "step": 1451 }, { "epoch": 0.15055505511668146, "grad_norm": 0.68359375, "learning_rate": 0.00019877579113664816, "loss": 4.7129, "step": 1452 }, { "epoch": 0.15065874317116953, "grad_norm": 0.64453125, "learning_rate": 0.00019877409609496126, "loss": 4.6592, "step": 1453 }, { "epoch": 0.1507624312256576, "grad_norm": 0.69140625, "learning_rate": 0.00019877239988784444, "loss": 4.7102, "step": 1454 }, { "epoch": 0.15086611928014568, "grad_norm": 0.640625, "learning_rate": 0.00019877070251531772, "loss": 4.7239, "step": 1455 }, { "epoch": 0.15096980733463375, "grad_norm": 0.66015625, "learning_rate": 0.0001987690039774011, "loss": 4.6529, "step": 1456 }, { "epoch": 0.15107349538912182, "grad_norm": 0.98828125, "learning_rate": 0.00019876730427411467, "loss": 4.7155, "step": 1457 }, { "epoch": 0.1511771834436099, "grad_norm": 1.109375, "learning_rate": 0.00019876560340547844, "loss": 4.7025, "step": 1458 }, { "epoch": 0.15128087149809796, "grad_norm": 0.69921875, "learning_rate": 0.00019876390137151247, "loss": 4.6933, "step": 1459 }, { "epoch": 0.15138455955258603, "grad_norm": 0.67578125, "learning_rate": 0.00019876219817223687, "loss": 4.6941, "step": 1460 }, { "epoch": 0.1514882476070741, "grad_norm": 0.8046875, "learning_rate": 0.00019876049380767173, "loss": 4.6765, "step": 1461 }, { "epoch": 0.15159193566156218, "grad_norm": 0.87109375, "learning_rate": 0.00019875878827783713, "loss": 4.6645, "step": 1462 }, { "epoch": 0.15169562371605028, "grad_norm": 0.93359375, "learning_rate": 0.00019875708158275326, "loss": 4.6656, "step": 1463 }, { "epoch": 0.15179931177053835, "grad_norm": 0.7265625, "learning_rate": 0.00019875537372244023, "loss": 4.6743, "step": 1464 }, { "epoch": 0.15190299982502642, "grad_norm": 0.5625, "learning_rate": 0.00019875366469691814, "loss": 4.7085, "step": 1465 }, { "epoch": 0.1520066878795145, "grad_norm": 0.703125, "learning_rate": 0.0001987519545062072, "loss": 4.7214, "step": 1466 }, { "epoch": 0.15211037593400256, "grad_norm": 0.73828125, "learning_rate": 0.0001987502431503276, "loss": 4.6776, "step": 1467 }, { "epoch": 0.15221406398849063, "grad_norm": 0.6015625, "learning_rate": 0.0001987485306292995, "loss": 4.7105, "step": 1468 }, { "epoch": 0.1523177520429787, "grad_norm": 0.63671875, "learning_rate": 0.0001987468169431431, "loss": 4.685, "step": 1469 }, { "epoch": 0.15242144009746678, "grad_norm": 0.59765625, "learning_rate": 0.0001987451020918787, "loss": 4.6538, "step": 1470 }, { "epoch": 0.15252512815195485, "grad_norm": 0.58984375, "learning_rate": 0.00019874338607552642, "loss": 4.6843, "step": 1471 }, { "epoch": 0.15262881620644292, "grad_norm": 0.62109375, "learning_rate": 0.00019874166889410658, "loss": 4.6666, "step": 1472 }, { "epoch": 0.152732504260931, "grad_norm": 0.546875, "learning_rate": 0.0001987399505476394, "loss": 4.6916, "step": 1473 }, { "epoch": 0.15283619231541906, "grad_norm": 0.6171875, "learning_rate": 0.0001987382310361452, "loss": 4.6385, "step": 1474 }, { "epoch": 0.15293988036990713, "grad_norm": 0.6328125, "learning_rate": 0.00019873651035964425, "loss": 4.6691, "step": 1475 }, { "epoch": 0.1530435684243952, "grad_norm": 0.482421875, "learning_rate": 0.00019873478851815683, "loss": 4.6766, "step": 1476 }, { "epoch": 0.15314725647888328, "grad_norm": 0.5546875, "learning_rate": 0.00019873306551170328, "loss": 4.6835, "step": 1477 }, { "epoch": 0.15325094453337135, "grad_norm": 0.58203125, "learning_rate": 0.00019873134134030393, "loss": 4.6331, "step": 1478 }, { "epoch": 0.15335463258785942, "grad_norm": 0.4609375, "learning_rate": 0.0001987296160039791, "loss": 4.6557, "step": 1479 }, { "epoch": 0.1534583206423475, "grad_norm": 0.5390625, "learning_rate": 0.00019872788950274918, "loss": 4.6338, "step": 1480 }, { "epoch": 0.15356200869683556, "grad_norm": 0.5234375, "learning_rate": 0.00019872616183663451, "loss": 4.6594, "step": 1481 }, { "epoch": 0.15366569675132363, "grad_norm": 0.609375, "learning_rate": 0.00019872443300565548, "loss": 4.6934, "step": 1482 }, { "epoch": 0.1537693848058117, "grad_norm": 0.55859375, "learning_rate": 0.0001987227030098325, "loss": 4.664, "step": 1483 }, { "epoch": 0.15387307286029978, "grad_norm": 0.46875, "learning_rate": 0.000198720971849186, "loss": 4.6917, "step": 1484 }, { "epoch": 0.15397676091478785, "grad_norm": 0.58984375, "learning_rate": 0.00019871923952373635, "loss": 4.6441, "step": 1485 }, { "epoch": 0.15408044896927595, "grad_norm": 0.5078125, "learning_rate": 0.00019871750603350408, "loss": 4.6871, "step": 1486 }, { "epoch": 0.15418413702376402, "grad_norm": 0.57421875, "learning_rate": 0.00019871577137850954, "loss": 4.6767, "step": 1487 }, { "epoch": 0.1542878250782521, "grad_norm": 0.61328125, "learning_rate": 0.00019871403555877327, "loss": 4.6942, "step": 1488 }, { "epoch": 0.15439151313274016, "grad_norm": 0.6015625, "learning_rate": 0.00019871229857431572, "loss": 4.6879, "step": 1489 }, { "epoch": 0.15449520118722823, "grad_norm": 0.57421875, "learning_rate": 0.0001987105604251574, "loss": 4.6799, "step": 1490 }, { "epoch": 0.1545988892417163, "grad_norm": 0.5234375, "learning_rate": 0.0001987088211113188, "loss": 4.6938, "step": 1491 }, { "epoch": 0.15470257729620437, "grad_norm": 0.6171875, "learning_rate": 0.00019870708063282044, "loss": 4.6613, "step": 1492 }, { "epoch": 0.15480626535069245, "grad_norm": 0.61328125, "learning_rate": 0.00019870533898968287, "loss": 4.6938, "step": 1493 }, { "epoch": 0.15490995340518052, "grad_norm": 0.51171875, "learning_rate": 0.00019870359618192663, "loss": 4.6946, "step": 1494 }, { "epoch": 0.1550136414596686, "grad_norm": 0.59375, "learning_rate": 0.0001987018522095723, "loss": 4.6413, "step": 1495 }, { "epoch": 0.15511732951415666, "grad_norm": 0.5625, "learning_rate": 0.00019870010707264045, "loss": 4.6721, "step": 1496 }, { "epoch": 0.15522101756864473, "grad_norm": 0.474609375, "learning_rate": 0.00019869836077115164, "loss": 4.7004, "step": 1497 }, { "epoch": 0.1553247056231328, "grad_norm": 0.52734375, "learning_rate": 0.00019869661330512654, "loss": 4.6585, "step": 1498 }, { "epoch": 0.15542839367762087, "grad_norm": 0.5390625, "learning_rate": 0.0001986948646745857, "loss": 4.6284, "step": 1499 }, { "epoch": 0.15553208173210895, "grad_norm": 0.474609375, "learning_rate": 0.0001986931148795498, "loss": 4.6635, "step": 1500 }, { "epoch": 0.15563576978659702, "grad_norm": 0.5390625, "learning_rate": 0.00019869136392003945, "loss": 4.6898, "step": 1501 }, { "epoch": 0.1557394578410851, "grad_norm": 0.51953125, "learning_rate": 0.00019868961179607536, "loss": 4.7494, "step": 1502 }, { "epoch": 0.15584314589557316, "grad_norm": 0.5, "learning_rate": 0.00019868785850767813, "loss": 4.6805, "step": 1503 }, { "epoch": 0.15594683395006123, "grad_norm": 0.478515625, "learning_rate": 0.0001986861040548685, "loss": 4.6183, "step": 1504 }, { "epoch": 0.1560505220045493, "grad_norm": 0.498046875, "learning_rate": 0.00019868434843766717, "loss": 4.6631, "step": 1505 }, { "epoch": 0.15615421005903737, "grad_norm": 0.48046875, "learning_rate": 0.00019868259165609482, "loss": 4.6455, "step": 1506 }, { "epoch": 0.15625789811352545, "grad_norm": 0.486328125, "learning_rate": 0.00019868083371017223, "loss": 4.6403, "step": 1507 }, { "epoch": 0.15636158616801354, "grad_norm": 0.5234375, "learning_rate": 0.0001986790745999201, "loss": 4.6782, "step": 1508 }, { "epoch": 0.15646527422250162, "grad_norm": 0.54296875, "learning_rate": 0.00019867731432535922, "loss": 4.6598, "step": 1509 }, { "epoch": 0.1565689622769897, "grad_norm": 0.578125, "learning_rate": 0.0001986755528865103, "loss": 4.6547, "step": 1510 }, { "epoch": 0.15667265033147776, "grad_norm": 0.53125, "learning_rate": 0.00019867379028339416, "loss": 4.6414, "step": 1511 }, { "epoch": 0.15677633838596583, "grad_norm": 0.56640625, "learning_rate": 0.0001986720265160316, "loss": 4.6567, "step": 1512 }, { "epoch": 0.1568800264404539, "grad_norm": 0.70703125, "learning_rate": 0.00019867026158444344, "loss": 4.6425, "step": 1513 }, { "epoch": 0.15698371449494197, "grad_norm": 0.66015625, "learning_rate": 0.0001986684954886505, "loss": 4.6946, "step": 1514 }, { "epoch": 0.15708740254943004, "grad_norm": 0.6328125, "learning_rate": 0.0001986667282286736, "loss": 4.693, "step": 1515 }, { "epoch": 0.15719109060391812, "grad_norm": 0.65625, "learning_rate": 0.0001986649598045336, "loss": 4.673, "step": 1516 }, { "epoch": 0.1572947786584062, "grad_norm": 0.640625, "learning_rate": 0.00019866319021625138, "loss": 4.6624, "step": 1517 }, { "epoch": 0.15739846671289426, "grad_norm": 0.75390625, "learning_rate": 0.0001986614194638478, "loss": 4.6387, "step": 1518 }, { "epoch": 0.15750215476738233, "grad_norm": 0.84765625, "learning_rate": 0.00019865964754734377, "loss": 4.6426, "step": 1519 }, { "epoch": 0.1576058428218704, "grad_norm": 0.65625, "learning_rate": 0.00019865787446676016, "loss": 4.6452, "step": 1520 }, { "epoch": 0.15770953087635847, "grad_norm": 0.65234375, "learning_rate": 0.00019865610022211795, "loss": 4.6182, "step": 1521 }, { "epoch": 0.15781321893084654, "grad_norm": 0.8359375, "learning_rate": 0.000198654324813438, "loss": 4.6999, "step": 1522 }, { "epoch": 0.15791690698533462, "grad_norm": 0.890625, "learning_rate": 0.00019865254824074136, "loss": 4.6654, "step": 1523 }, { "epoch": 0.1580205950398227, "grad_norm": 0.87890625, "learning_rate": 0.0001986507705040489, "loss": 4.7078, "step": 1524 }, { "epoch": 0.15812428309431076, "grad_norm": 0.65234375, "learning_rate": 0.0001986489916033816, "loss": 4.7079, "step": 1525 }, { "epoch": 0.15822797114879883, "grad_norm": 0.8125, "learning_rate": 0.0001986472115387605, "loss": 4.6934, "step": 1526 }, { "epoch": 0.1583316592032869, "grad_norm": 1.015625, "learning_rate": 0.00019864543031020658, "loss": 4.6808, "step": 1527 }, { "epoch": 0.15843534725777497, "grad_norm": 0.75390625, "learning_rate": 0.00019864364791774084, "loss": 4.6659, "step": 1528 }, { "epoch": 0.15853903531226304, "grad_norm": 0.6953125, "learning_rate": 0.00019864186436138433, "loss": 4.6941, "step": 1529 }, { "epoch": 0.15864272336675114, "grad_norm": 0.70703125, "learning_rate": 0.0001986400796411581, "loss": 4.682, "step": 1530 }, { "epoch": 0.15874641142123921, "grad_norm": 0.734375, "learning_rate": 0.0001986382937570832, "loss": 4.6594, "step": 1531 }, { "epoch": 0.15885009947572729, "grad_norm": 0.79296875, "learning_rate": 0.0001986365067091807, "loss": 4.681, "step": 1532 }, { "epoch": 0.15895378753021536, "grad_norm": 0.73828125, "learning_rate": 0.00019863471849747167, "loss": 4.7219, "step": 1533 }, { "epoch": 0.15905747558470343, "grad_norm": 0.828125, "learning_rate": 0.0001986329291219772, "loss": 4.6721, "step": 1534 }, { "epoch": 0.1591611636391915, "grad_norm": 0.6171875, "learning_rate": 0.00019863113858271846, "loss": 4.6567, "step": 1535 }, { "epoch": 0.15926485169367957, "grad_norm": 0.63671875, "learning_rate": 0.00019862934687971655, "loss": 4.6987, "step": 1536 }, { "epoch": 0.15936853974816764, "grad_norm": 0.78125, "learning_rate": 0.00019862755401299257, "loss": 4.6722, "step": 1537 }, { "epoch": 0.1594722278026557, "grad_norm": 0.85546875, "learning_rate": 0.00019862575998256773, "loss": 4.6555, "step": 1538 }, { "epoch": 0.15957591585714379, "grad_norm": 0.82421875, "learning_rate": 0.00019862396478846316, "loss": 4.6425, "step": 1539 }, { "epoch": 0.15967960391163186, "grad_norm": 0.796875, "learning_rate": 0.0001986221684307001, "loss": 4.6574, "step": 1540 }, { "epoch": 0.15978329196611993, "grad_norm": 0.765625, "learning_rate": 0.00019862037090929966, "loss": 4.6743, "step": 1541 }, { "epoch": 0.159886980020608, "grad_norm": 0.8046875, "learning_rate": 0.00019861857222428308, "loss": 4.6539, "step": 1542 }, { "epoch": 0.15999066807509607, "grad_norm": 0.80859375, "learning_rate": 0.00019861677237567162, "loss": 4.6581, "step": 1543 }, { "epoch": 0.16009435612958414, "grad_norm": 0.7890625, "learning_rate": 0.00019861497136348648, "loss": 4.6538, "step": 1544 }, { "epoch": 0.1601980441840722, "grad_norm": 0.77734375, "learning_rate": 0.0001986131691877489, "loss": 4.6637, "step": 1545 }, { "epoch": 0.16030173223856028, "grad_norm": 0.703125, "learning_rate": 0.00019861136584848019, "loss": 4.6621, "step": 1546 }, { "epoch": 0.16040542029304836, "grad_norm": 0.7578125, "learning_rate": 0.0001986095613457016, "loss": 4.6788, "step": 1547 }, { "epoch": 0.16050910834753643, "grad_norm": 0.84765625, "learning_rate": 0.0001986077556794344, "loss": 4.6588, "step": 1548 }, { "epoch": 0.1606127964020245, "grad_norm": 0.93359375, "learning_rate": 0.00019860594884969993, "loss": 4.6647, "step": 1549 }, { "epoch": 0.16071648445651257, "grad_norm": 1.0625, "learning_rate": 0.0001986041408565195, "loss": 4.6423, "step": 1550 }, { "epoch": 0.16082017251100064, "grad_norm": 0.890625, "learning_rate": 0.0001986023316999144, "loss": 4.6748, "step": 1551 }, { "epoch": 0.1609238605654887, "grad_norm": 0.6953125, "learning_rate": 0.00019860052137990605, "loss": 4.6763, "step": 1552 }, { "epoch": 0.1610275486199768, "grad_norm": 0.84765625, "learning_rate": 0.00019859870989651576, "loss": 4.6478, "step": 1553 }, { "epoch": 0.16113123667446488, "grad_norm": 0.96875, "learning_rate": 0.00019859689724976488, "loss": 4.6574, "step": 1554 }, { "epoch": 0.16123492472895296, "grad_norm": 0.953125, "learning_rate": 0.0001985950834396749, "loss": 4.6907, "step": 1555 }, { "epoch": 0.16133861278344103, "grad_norm": 0.91796875, "learning_rate": 0.00019859326846626712, "loss": 4.6621, "step": 1556 }, { "epoch": 0.1614423008379291, "grad_norm": 0.83984375, "learning_rate": 0.000198591452329563, "loss": 4.6589, "step": 1557 }, { "epoch": 0.16154598889241717, "grad_norm": 0.69921875, "learning_rate": 0.00019858963502958395, "loss": 4.7071, "step": 1558 }, { "epoch": 0.16164967694690524, "grad_norm": 0.71875, "learning_rate": 0.00019858781656635142, "loss": 4.6513, "step": 1559 }, { "epoch": 0.1617533650013933, "grad_norm": 0.94140625, "learning_rate": 0.00019858599693988688, "loss": 4.6913, "step": 1560 }, { "epoch": 0.16185705305588138, "grad_norm": 1.2265625, "learning_rate": 0.00019858417615021176, "loss": 4.6505, "step": 1561 }, { "epoch": 0.16196074111036945, "grad_norm": 0.765625, "learning_rate": 0.00019858235419734758, "loss": 4.6975, "step": 1562 }, { "epoch": 0.16206442916485753, "grad_norm": 1.0546875, "learning_rate": 0.00019858053108131585, "loss": 4.6967, "step": 1563 }, { "epoch": 0.1621681172193456, "grad_norm": 1.203125, "learning_rate": 0.00019857870680213804, "loss": 4.6619, "step": 1564 }, { "epoch": 0.16227180527383367, "grad_norm": 0.81640625, "learning_rate": 0.0001985768813598357, "loss": 4.6586, "step": 1565 }, { "epoch": 0.16237549332832174, "grad_norm": 0.8984375, "learning_rate": 0.00019857505475443033, "loss": 4.6451, "step": 1566 }, { "epoch": 0.1624791813828098, "grad_norm": 0.9921875, "learning_rate": 0.00019857322698594353, "loss": 4.6634, "step": 1567 }, { "epoch": 0.16258286943729788, "grad_norm": 0.87890625, "learning_rate": 0.00019857139805439688, "loss": 4.6611, "step": 1568 }, { "epoch": 0.16268655749178595, "grad_norm": 0.671875, "learning_rate": 0.0001985695679598119, "loss": 4.6922, "step": 1569 }, { "epoch": 0.16279024554627403, "grad_norm": 0.8359375, "learning_rate": 0.0001985677367022102, "loss": 4.5804, "step": 1570 }, { "epoch": 0.1628939336007621, "grad_norm": 0.9140625, "learning_rate": 0.00019856590428161342, "loss": 4.6266, "step": 1571 }, { "epoch": 0.16299762165525017, "grad_norm": 0.85546875, "learning_rate": 0.00019856407069804316, "loss": 4.6377, "step": 1572 }, { "epoch": 0.16310130970973824, "grad_norm": 0.83203125, "learning_rate": 0.00019856223595152104, "loss": 4.6668, "step": 1573 }, { "epoch": 0.1632049977642263, "grad_norm": 0.77734375, "learning_rate": 0.0001985604000420687, "loss": 4.6548, "step": 1574 }, { "epoch": 0.1633086858187144, "grad_norm": 0.73046875, "learning_rate": 0.00019855856296970784, "loss": 4.7052, "step": 1575 }, { "epoch": 0.16341237387320248, "grad_norm": 0.69140625, "learning_rate": 0.00019855672473446012, "loss": 4.6898, "step": 1576 }, { "epoch": 0.16351606192769055, "grad_norm": 0.796875, "learning_rate": 0.00019855488533634724, "loss": 4.6651, "step": 1577 }, { "epoch": 0.16361974998217862, "grad_norm": 0.79296875, "learning_rate": 0.00019855304477539085, "loss": 4.6913, "step": 1578 }, { "epoch": 0.1637234380366667, "grad_norm": 0.828125, "learning_rate": 0.00019855120305161273, "loss": 4.6939, "step": 1579 }, { "epoch": 0.16382712609115477, "grad_norm": 0.7734375, "learning_rate": 0.0001985493601650346, "loss": 4.677, "step": 1580 }, { "epoch": 0.16393081414564284, "grad_norm": 0.69921875, "learning_rate": 0.0001985475161156782, "loss": 4.6388, "step": 1581 }, { "epoch": 0.1640345022001309, "grad_norm": 0.7421875, "learning_rate": 0.00019854567090356526, "loss": 4.6185, "step": 1582 }, { "epoch": 0.16413819025461898, "grad_norm": 0.90625, "learning_rate": 0.00019854382452871757, "loss": 4.6091, "step": 1583 }, { "epoch": 0.16424187830910705, "grad_norm": 0.86328125, "learning_rate": 0.00019854197699115692, "loss": 4.683, "step": 1584 }, { "epoch": 0.16434556636359512, "grad_norm": 0.6875, "learning_rate": 0.0001985401282909051, "loss": 4.6445, "step": 1585 }, { "epoch": 0.1644492544180832, "grad_norm": 0.7890625, "learning_rate": 0.00019853827842798393, "loss": 4.6586, "step": 1586 }, { "epoch": 0.16455294247257127, "grad_norm": 0.63671875, "learning_rate": 0.00019853642740241522, "loss": 4.6508, "step": 1587 }, { "epoch": 0.16465663052705934, "grad_norm": 0.7265625, "learning_rate": 0.00019853457521422084, "loss": 4.6936, "step": 1588 }, { "epoch": 0.1647603185815474, "grad_norm": 0.6015625, "learning_rate": 0.00019853272186342262, "loss": 4.639, "step": 1589 }, { "epoch": 0.16486400663603548, "grad_norm": 0.59765625, "learning_rate": 0.00019853086735004247, "loss": 4.6583, "step": 1590 }, { "epoch": 0.16496769469052355, "grad_norm": 0.72265625, "learning_rate": 0.0001985290116741022, "loss": 4.6363, "step": 1591 }, { "epoch": 0.16507138274501162, "grad_norm": 0.7421875, "learning_rate": 0.00019852715483562375, "loss": 4.6199, "step": 1592 }, { "epoch": 0.1651750707994997, "grad_norm": 0.7578125, "learning_rate": 0.00019852529683462902, "loss": 4.6458, "step": 1593 }, { "epoch": 0.16527875885398777, "grad_norm": 0.62109375, "learning_rate": 0.00019852343767113993, "loss": 4.6503, "step": 1594 }, { "epoch": 0.16538244690847584, "grad_norm": 0.66015625, "learning_rate": 0.00019852157734517843, "loss": 4.6741, "step": 1595 }, { "epoch": 0.1654861349629639, "grad_norm": 0.6484375, "learning_rate": 0.00019851971585676644, "loss": 4.6378, "step": 1596 }, { "epoch": 0.165589823017452, "grad_norm": 0.7109375, "learning_rate": 0.00019851785320592596, "loss": 4.6136, "step": 1597 }, { "epoch": 0.16569351107194008, "grad_norm": 0.70703125, "learning_rate": 0.00019851598939267894, "loss": 4.6617, "step": 1598 }, { "epoch": 0.16579719912642815, "grad_norm": 0.62890625, "learning_rate": 0.0001985141244170474, "loss": 4.67, "step": 1599 }, { "epoch": 0.16590088718091622, "grad_norm": 0.65234375, "learning_rate": 0.0001985122582790533, "loss": 4.6461, "step": 1600 }, { "epoch": 0.1660045752354043, "grad_norm": 0.69921875, "learning_rate": 0.00019851039097871872, "loss": 4.6634, "step": 1601 }, { "epoch": 0.16610826328989237, "grad_norm": 0.62890625, "learning_rate": 0.00019850852251606562, "loss": 4.6191, "step": 1602 }, { "epoch": 0.16621195134438044, "grad_norm": 0.8125, "learning_rate": 0.0001985066528911161, "loss": 4.6047, "step": 1603 }, { "epoch": 0.1663156393988685, "grad_norm": 0.83203125, "learning_rate": 0.00019850478210389218, "loss": 4.6388, "step": 1604 }, { "epoch": 0.16641932745335658, "grad_norm": 0.7578125, "learning_rate": 0.00019850291015441598, "loss": 4.6735, "step": 1605 }, { "epoch": 0.16652301550784465, "grad_norm": 0.73828125, "learning_rate": 0.00019850103704270957, "loss": 4.6201, "step": 1606 }, { "epoch": 0.16662670356233272, "grad_norm": 0.66015625, "learning_rate": 0.000198499162768795, "loss": 4.6704, "step": 1607 }, { "epoch": 0.1667303916168208, "grad_norm": 0.8203125, "learning_rate": 0.00019849728733269446, "loss": 4.6382, "step": 1608 }, { "epoch": 0.16683407967130887, "grad_norm": 0.97265625, "learning_rate": 0.00019849541073443006, "loss": 4.6594, "step": 1609 }, { "epoch": 0.16693776772579694, "grad_norm": 0.91015625, "learning_rate": 0.00019849353297402388, "loss": 4.6754, "step": 1610 }, { "epoch": 0.167041455780285, "grad_norm": 0.9453125, "learning_rate": 0.00019849165405149818, "loss": 4.6522, "step": 1611 }, { "epoch": 0.16714514383477308, "grad_norm": 1.0703125, "learning_rate": 0.00019848977396687504, "loss": 4.636, "step": 1612 }, { "epoch": 0.16724883188926115, "grad_norm": 0.96484375, "learning_rate": 0.00019848789272017668, "loss": 4.6058, "step": 1613 }, { "epoch": 0.16735251994374922, "grad_norm": 0.78515625, "learning_rate": 0.0001984860103114253, "loss": 4.6336, "step": 1614 }, { "epoch": 0.1674562079982373, "grad_norm": 0.75, "learning_rate": 0.00019848412674064306, "loss": 4.6957, "step": 1615 }, { "epoch": 0.16755989605272537, "grad_norm": 0.640625, "learning_rate": 0.00019848224200785228, "loss": 4.6593, "step": 1616 }, { "epoch": 0.16766358410721344, "grad_norm": 0.81640625, "learning_rate": 0.00019848035611307513, "loss": 4.6497, "step": 1617 }, { "epoch": 0.1677672721617015, "grad_norm": 1.1640625, "learning_rate": 0.00019847846905633385, "loss": 4.598, "step": 1618 }, { "epoch": 0.16787096021618958, "grad_norm": 0.90234375, "learning_rate": 0.00019847658083765076, "loss": 4.6646, "step": 1619 }, { "epoch": 0.16797464827067768, "grad_norm": 0.62890625, "learning_rate": 0.0001984746914570481, "loss": 4.6261, "step": 1620 }, { "epoch": 0.16807833632516575, "grad_norm": 0.7890625, "learning_rate": 0.0001984728009145482, "loss": 4.6637, "step": 1621 }, { "epoch": 0.16818202437965382, "grad_norm": 0.9609375, "learning_rate": 0.0001984709092101733, "loss": 4.6613, "step": 1622 }, { "epoch": 0.1682857124341419, "grad_norm": 0.81640625, "learning_rate": 0.00019846901634394576, "loss": 4.6403, "step": 1623 }, { "epoch": 0.16838940048862996, "grad_norm": 0.73828125, "learning_rate": 0.00019846712231588796, "loss": 4.647, "step": 1624 }, { "epoch": 0.16849308854311804, "grad_norm": 0.69140625, "learning_rate": 0.00019846522712602216, "loss": 4.6536, "step": 1625 }, { "epoch": 0.1685967765976061, "grad_norm": 0.75, "learning_rate": 0.00019846333077437077, "loss": 4.6393, "step": 1626 }, { "epoch": 0.16870046465209418, "grad_norm": 0.80859375, "learning_rate": 0.00019846143326095615, "loss": 4.6717, "step": 1627 }, { "epoch": 0.16880415270658225, "grad_norm": 0.69921875, "learning_rate": 0.0001984595345858007, "loss": 4.6327, "step": 1628 }, { "epoch": 0.16890784076107032, "grad_norm": 0.69921875, "learning_rate": 0.00019845763474892681, "loss": 4.6776, "step": 1629 }, { "epoch": 0.1690115288155584, "grad_norm": 0.66796875, "learning_rate": 0.00019845573375035694, "loss": 4.6468, "step": 1630 }, { "epoch": 0.16911521687004646, "grad_norm": 0.78515625, "learning_rate": 0.00019845383159011347, "loss": 4.6192, "step": 1631 }, { "epoch": 0.16921890492453454, "grad_norm": 0.7421875, "learning_rate": 0.00019845192826821884, "loss": 4.6868, "step": 1632 }, { "epoch": 0.1693225929790226, "grad_norm": 0.74609375, "learning_rate": 0.00019845002378469554, "loss": 4.6603, "step": 1633 }, { "epoch": 0.16942628103351068, "grad_norm": 0.83984375, "learning_rate": 0.000198448118139566, "loss": 4.6291, "step": 1634 }, { "epoch": 0.16952996908799875, "grad_norm": 0.6796875, "learning_rate": 0.00019844621133285276, "loss": 4.5767, "step": 1635 }, { "epoch": 0.16963365714248682, "grad_norm": 0.6875, "learning_rate": 0.0001984443033645783, "loss": 4.571, "step": 1636 }, { "epoch": 0.1697373451969749, "grad_norm": 0.75390625, "learning_rate": 0.00019844239423476507, "loss": 4.6399, "step": 1637 }, { "epoch": 0.16984103325146296, "grad_norm": 0.78515625, "learning_rate": 0.00019844048394343568, "loss": 4.6592, "step": 1638 }, { "epoch": 0.16994472130595104, "grad_norm": 0.6875, "learning_rate": 0.00019843857249061264, "loss": 4.6768, "step": 1639 }, { "epoch": 0.1700484093604391, "grad_norm": 0.7265625, "learning_rate": 0.00019843665987631849, "loss": 4.671, "step": 1640 }, { "epoch": 0.17015209741492718, "grad_norm": 0.71484375, "learning_rate": 0.00019843474610057576, "loss": 4.6523, "step": 1641 }, { "epoch": 0.17025578546941528, "grad_norm": 0.6953125, "learning_rate": 0.00019843283116340713, "loss": 4.6186, "step": 1642 }, { "epoch": 0.17035947352390335, "grad_norm": 0.6875, "learning_rate": 0.00019843091506483514, "loss": 4.6413, "step": 1643 }, { "epoch": 0.17046316157839142, "grad_norm": 0.73046875, "learning_rate": 0.00019842899780488237, "loss": 4.66, "step": 1644 }, { "epoch": 0.1705668496328795, "grad_norm": 0.796875, "learning_rate": 0.0001984270793835715, "loss": 4.6728, "step": 1645 }, { "epoch": 0.17067053768736756, "grad_norm": 0.59375, "learning_rate": 0.00019842515980092514, "loss": 4.6588, "step": 1646 }, { "epoch": 0.17077422574185563, "grad_norm": 0.73046875, "learning_rate": 0.00019842323905696588, "loss": 4.6629, "step": 1647 }, { "epoch": 0.1708779137963437, "grad_norm": 0.76171875, "learning_rate": 0.0001984213171517165, "loss": 4.6732, "step": 1648 }, { "epoch": 0.17098160185083178, "grad_norm": 0.73046875, "learning_rate": 0.00019841939408519958, "loss": 4.6467, "step": 1649 }, { "epoch": 0.17108528990531985, "grad_norm": 0.62109375, "learning_rate": 0.00019841746985743786, "loss": 4.6683, "step": 1650 }, { "epoch": 0.17118897795980792, "grad_norm": 0.625, "learning_rate": 0.00019841554446845404, "loss": 4.6328, "step": 1651 }, { "epoch": 0.171292666014296, "grad_norm": 0.73828125, "learning_rate": 0.0001984136179182708, "loss": 4.6829, "step": 1652 }, { "epoch": 0.17139635406878406, "grad_norm": 0.65234375, "learning_rate": 0.0001984116902069109, "loss": 4.6603, "step": 1653 }, { "epoch": 0.17150004212327213, "grad_norm": 0.6875, "learning_rate": 0.00019840976133439706, "loss": 4.6468, "step": 1654 }, { "epoch": 0.1716037301777602, "grad_norm": 0.77734375, "learning_rate": 0.00019840783130075206, "loss": 4.6419, "step": 1655 }, { "epoch": 0.17170741823224828, "grad_norm": 0.78125, "learning_rate": 0.0001984059001059987, "loss": 4.5958, "step": 1656 }, { "epoch": 0.17181110628673635, "grad_norm": 0.7734375, "learning_rate": 0.00019840396775015976, "loss": 4.6597, "step": 1657 }, { "epoch": 0.17191479434122442, "grad_norm": 0.8046875, "learning_rate": 0.000198402034233258, "loss": 4.6056, "step": 1658 }, { "epoch": 0.1720184823957125, "grad_norm": 0.875, "learning_rate": 0.00019840009955531622, "loss": 4.6602, "step": 1659 }, { "epoch": 0.17212217045020056, "grad_norm": 1.0234375, "learning_rate": 0.0001983981637163573, "loss": 4.6517, "step": 1660 }, { "epoch": 0.17222585850468863, "grad_norm": 0.80078125, "learning_rate": 0.00019839622671640405, "loss": 4.6581, "step": 1661 }, { "epoch": 0.1723295465591767, "grad_norm": 0.9921875, "learning_rate": 0.00019839428855547935, "loss": 4.6584, "step": 1662 }, { "epoch": 0.17243323461366478, "grad_norm": 0.91015625, "learning_rate": 0.00019839234923360604, "loss": 4.6582, "step": 1663 }, { "epoch": 0.17253692266815288, "grad_norm": 0.75390625, "learning_rate": 0.00019839040875080702, "loss": 4.6181, "step": 1664 }, { "epoch": 0.17264061072264095, "grad_norm": 0.76171875, "learning_rate": 0.0001983884671071052, "loss": 4.6752, "step": 1665 }, { "epoch": 0.17274429877712902, "grad_norm": 0.80078125, "learning_rate": 0.00019838652430252346, "loss": 4.6136, "step": 1666 }, { "epoch": 0.1728479868316171, "grad_norm": 0.953125, "learning_rate": 0.0001983845803370847, "loss": 4.6409, "step": 1667 }, { "epoch": 0.17295167488610516, "grad_norm": 0.90234375, "learning_rate": 0.00019838263521081191, "loss": 4.578, "step": 1668 }, { "epoch": 0.17305536294059323, "grad_norm": 0.73046875, "learning_rate": 0.000198380688923728, "loss": 4.6316, "step": 1669 }, { "epoch": 0.1731590509950813, "grad_norm": 0.8359375, "learning_rate": 0.000198378741475856, "loss": 4.6198, "step": 1670 }, { "epoch": 0.17326273904956938, "grad_norm": 0.8671875, "learning_rate": 0.0001983767928672188, "loss": 4.6566, "step": 1671 }, { "epoch": 0.17336642710405745, "grad_norm": 0.62890625, "learning_rate": 0.00019837484309783945, "loss": 4.6642, "step": 1672 }, { "epoch": 0.17347011515854552, "grad_norm": 0.69140625, "learning_rate": 0.00019837289216774093, "loss": 4.6507, "step": 1673 }, { "epoch": 0.1735738032130336, "grad_norm": 0.83203125, "learning_rate": 0.0001983709400769463, "loss": 4.5731, "step": 1674 }, { "epoch": 0.17367749126752166, "grad_norm": 0.765625, "learning_rate": 0.00019836898682547852, "loss": 4.6674, "step": 1675 }, { "epoch": 0.17378117932200973, "grad_norm": 0.69921875, "learning_rate": 0.00019836703241336067, "loss": 4.6437, "step": 1676 }, { "epoch": 0.1738848673764978, "grad_norm": 0.703125, "learning_rate": 0.00019836507684061584, "loss": 4.6252, "step": 1677 }, { "epoch": 0.17398855543098588, "grad_norm": 0.8125, "learning_rate": 0.00019836312010726708, "loss": 4.6167, "step": 1678 }, { "epoch": 0.17409224348547395, "grad_norm": 1.0703125, "learning_rate": 0.00019836116221333747, "loss": 4.6435, "step": 1679 }, { "epoch": 0.17419593153996202, "grad_norm": 0.93359375, "learning_rate": 0.0001983592031588501, "loss": 4.6064, "step": 1680 }, { "epoch": 0.1742996195944501, "grad_norm": 0.70703125, "learning_rate": 0.00019835724294382814, "loss": 4.5754, "step": 1681 }, { "epoch": 0.17440330764893816, "grad_norm": 0.6171875, "learning_rate": 0.00019835528156829466, "loss": 4.5984, "step": 1682 }, { "epoch": 0.17450699570342623, "grad_norm": 0.76171875, "learning_rate": 0.00019835331903227284, "loss": 4.6164, "step": 1683 }, { "epoch": 0.1746106837579143, "grad_norm": 0.84375, "learning_rate": 0.0001983513553357858, "loss": 4.6206, "step": 1684 }, { "epoch": 0.17471437181240237, "grad_norm": 0.68359375, "learning_rate": 0.00019834939047885675, "loss": 4.5944, "step": 1685 }, { "epoch": 0.17481805986689045, "grad_norm": 0.78515625, "learning_rate": 0.0001983474244615088, "loss": 4.6713, "step": 1686 }, { "epoch": 0.17492174792137855, "grad_norm": 0.8359375, "learning_rate": 0.00019834545728376527, "loss": 4.646, "step": 1687 }, { "epoch": 0.17502543597586662, "grad_norm": 0.71875, "learning_rate": 0.00019834348894564924, "loss": 4.6358, "step": 1688 }, { "epoch": 0.1751291240303547, "grad_norm": 0.66015625, "learning_rate": 0.00019834151944718404, "loss": 4.6095, "step": 1689 }, { "epoch": 0.17523281208484276, "grad_norm": 0.7890625, "learning_rate": 0.00019833954878839283, "loss": 4.6115, "step": 1690 }, { "epoch": 0.17533650013933083, "grad_norm": 0.796875, "learning_rate": 0.0001983375769692989, "loss": 4.6866, "step": 1691 }, { "epoch": 0.1754401881938189, "grad_norm": 0.78125, "learning_rate": 0.00019833560398992552, "loss": 4.6569, "step": 1692 }, { "epoch": 0.17554387624830697, "grad_norm": 0.66015625, "learning_rate": 0.00019833362985029594, "loss": 4.6235, "step": 1693 }, { "epoch": 0.17564756430279505, "grad_norm": 0.81640625, "learning_rate": 0.0001983316545504335, "loss": 4.6602, "step": 1694 }, { "epoch": 0.17575125235728312, "grad_norm": 0.875, "learning_rate": 0.00019832967809036144, "loss": 4.6386, "step": 1695 }, { "epoch": 0.1758549404117712, "grad_norm": 0.76171875, "learning_rate": 0.00019832770047010316, "loss": 4.6023, "step": 1696 }, { "epoch": 0.17595862846625926, "grad_norm": 0.67578125, "learning_rate": 0.00019832572168968193, "loss": 4.6724, "step": 1697 }, { "epoch": 0.17606231652074733, "grad_norm": 0.83203125, "learning_rate": 0.00019832374174912111, "loss": 4.5923, "step": 1698 }, { "epoch": 0.1761660045752354, "grad_norm": 0.76953125, "learning_rate": 0.00019832176064844408, "loss": 4.6562, "step": 1699 }, { "epoch": 0.17626969262972347, "grad_norm": 0.6640625, "learning_rate": 0.00019831977838767422, "loss": 4.5973, "step": 1700 }, { "epoch": 0.17637338068421154, "grad_norm": 0.859375, "learning_rate": 0.0001983177949668349, "loss": 4.6508, "step": 1701 }, { "epoch": 0.17647706873869962, "grad_norm": 0.78515625, "learning_rate": 0.0001983158103859495, "loss": 4.643, "step": 1702 }, { "epoch": 0.1765807567931877, "grad_norm": 0.546875, "learning_rate": 0.00019831382464504147, "loss": 4.6174, "step": 1703 }, { "epoch": 0.17668444484767576, "grad_norm": 0.64453125, "learning_rate": 0.00019831183774413424, "loss": 4.6328, "step": 1704 }, { "epoch": 0.17678813290216383, "grad_norm": 0.6171875, "learning_rate": 0.00019830984968325122, "loss": 4.6157, "step": 1705 }, { "epoch": 0.1768918209566519, "grad_norm": 0.5703125, "learning_rate": 0.00019830786046241592, "loss": 4.6173, "step": 1706 }, { "epoch": 0.17699550901113997, "grad_norm": 0.671875, "learning_rate": 0.00019830587008165177, "loss": 4.606, "step": 1707 }, { "epoch": 0.17709919706562804, "grad_norm": 0.65234375, "learning_rate": 0.0001983038785409823, "loss": 4.5933, "step": 1708 }, { "epoch": 0.17720288512011614, "grad_norm": 0.640625, "learning_rate": 0.00019830188584043094, "loss": 4.6165, "step": 1709 }, { "epoch": 0.17730657317460422, "grad_norm": 0.68359375, "learning_rate": 0.00019829989198002124, "loss": 4.6634, "step": 1710 }, { "epoch": 0.1774102612290923, "grad_norm": 0.59375, "learning_rate": 0.00019829789695977672, "loss": 4.5776, "step": 1711 }, { "epoch": 0.17751394928358036, "grad_norm": 0.69140625, "learning_rate": 0.00019829590077972094, "loss": 4.6129, "step": 1712 }, { "epoch": 0.17761763733806843, "grad_norm": 0.8671875, "learning_rate": 0.00019829390343987743, "loss": 4.5699, "step": 1713 }, { "epoch": 0.1777213253925565, "grad_norm": 0.921875, "learning_rate": 0.00019829190494026974, "loss": 4.6348, "step": 1714 }, { "epoch": 0.17782501344704457, "grad_norm": 0.87109375, "learning_rate": 0.00019828990528092147, "loss": 4.635, "step": 1715 }, { "epoch": 0.17792870150153264, "grad_norm": 0.828125, "learning_rate": 0.00019828790446185622, "loss": 4.6516, "step": 1716 }, { "epoch": 0.17803238955602071, "grad_norm": 0.7890625, "learning_rate": 0.0001982859024830976, "loss": 4.6052, "step": 1717 }, { "epoch": 0.1781360776105088, "grad_norm": 0.6953125, "learning_rate": 0.0001982838993446692, "loss": 4.6519, "step": 1718 }, { "epoch": 0.17823976566499686, "grad_norm": 0.63671875, "learning_rate": 0.00019828189504659472, "loss": 4.6294, "step": 1719 }, { "epoch": 0.17834345371948493, "grad_norm": 0.5546875, "learning_rate": 0.00019827988958889776, "loss": 4.6044, "step": 1720 }, { "epoch": 0.178447141773973, "grad_norm": 0.640625, "learning_rate": 0.00019827788297160196, "loss": 4.6074, "step": 1721 }, { "epoch": 0.17855082982846107, "grad_norm": 0.67578125, "learning_rate": 0.00019827587519473107, "loss": 4.6163, "step": 1722 }, { "epoch": 0.17865451788294914, "grad_norm": 0.69140625, "learning_rate": 0.00019827386625830871, "loss": 4.6524, "step": 1723 }, { "epoch": 0.17875820593743721, "grad_norm": 0.66015625, "learning_rate": 0.0001982718561623586, "loss": 4.5932, "step": 1724 }, { "epoch": 0.17886189399192529, "grad_norm": 0.59375, "learning_rate": 0.00019826984490690447, "loss": 4.5631, "step": 1725 }, { "epoch": 0.17896558204641336, "grad_norm": 0.69921875, "learning_rate": 0.00019826783249197004, "loss": 4.6035, "step": 1726 }, { "epoch": 0.17906927010090143, "grad_norm": 0.7421875, "learning_rate": 0.00019826581891757908, "loss": 4.6006, "step": 1727 }, { "epoch": 0.1791729581553895, "grad_norm": 0.71875, "learning_rate": 0.00019826380418375532, "loss": 4.6126, "step": 1728 }, { "epoch": 0.17927664620987757, "grad_norm": 0.6171875, "learning_rate": 0.00019826178829052254, "loss": 4.6404, "step": 1729 }, { "epoch": 0.17938033426436564, "grad_norm": 0.68359375, "learning_rate": 0.0001982597712379045, "loss": 4.6289, "step": 1730 }, { "epoch": 0.17948402231885374, "grad_norm": 0.6328125, "learning_rate": 0.00019825775302592503, "loss": 4.6162, "step": 1731 }, { "epoch": 0.1795877103733418, "grad_norm": 0.71875, "learning_rate": 0.00019825573365460798, "loss": 4.615, "step": 1732 }, { "epoch": 0.17969139842782988, "grad_norm": 0.76171875, "learning_rate": 0.0001982537131239771, "loss": 4.6037, "step": 1733 }, { "epoch": 0.17979508648231796, "grad_norm": 0.63671875, "learning_rate": 0.00019825169143405623, "loss": 4.6038, "step": 1734 }, { "epoch": 0.17989877453680603, "grad_norm": 0.62109375, "learning_rate": 0.00019824966858486933, "loss": 4.6219, "step": 1735 }, { "epoch": 0.1800024625912941, "grad_norm": 0.7734375, "learning_rate": 0.00019824764457644016, "loss": 4.6097, "step": 1736 }, { "epoch": 0.18010615064578217, "grad_norm": 0.74609375, "learning_rate": 0.00019824561940879262, "loss": 4.5909, "step": 1737 }, { "epoch": 0.18020983870027024, "grad_norm": 0.70703125, "learning_rate": 0.00019824359308195068, "loss": 4.5772, "step": 1738 }, { "epoch": 0.1803135267547583, "grad_norm": 0.6328125, "learning_rate": 0.00019824156559593813, "loss": 4.6719, "step": 1739 }, { "epoch": 0.18041721480924638, "grad_norm": 0.65234375, "learning_rate": 0.00019823953695077896, "loss": 4.5728, "step": 1740 }, { "epoch": 0.18052090286373446, "grad_norm": 0.7109375, "learning_rate": 0.0001982375071464971, "loss": 4.6433, "step": 1741 }, { "epoch": 0.18062459091822253, "grad_norm": 0.74609375, "learning_rate": 0.00019823547618311654, "loss": 4.6155, "step": 1742 }, { "epoch": 0.1807282789727106, "grad_norm": 0.7421875, "learning_rate": 0.00019823344406066115, "loss": 4.6704, "step": 1743 }, { "epoch": 0.18083196702719867, "grad_norm": 0.88671875, "learning_rate": 0.00019823141077915496, "loss": 4.6568, "step": 1744 }, { "epoch": 0.18093565508168674, "grad_norm": 1.046875, "learning_rate": 0.00019822937633862198, "loss": 4.6189, "step": 1745 }, { "epoch": 0.1810393431361748, "grad_norm": 0.84765625, "learning_rate": 0.00019822734073908618, "loss": 4.631, "step": 1746 }, { "epoch": 0.18114303119066288, "grad_norm": 0.63671875, "learning_rate": 0.0001982253039805716, "loss": 4.5911, "step": 1747 }, { "epoch": 0.18124671924515096, "grad_norm": 0.9453125, "learning_rate": 0.00019822326606310227, "loss": 4.6091, "step": 1748 }, { "epoch": 0.18135040729963903, "grad_norm": 1.125, "learning_rate": 0.0001982212269867022, "loss": 4.5828, "step": 1749 }, { "epoch": 0.1814540953541271, "grad_norm": 0.8671875, "learning_rate": 0.00019821918675139548, "loss": 4.6346, "step": 1750 }, { "epoch": 0.18155778340861517, "grad_norm": 0.79296875, "learning_rate": 0.0001982171453572062, "loss": 4.6333, "step": 1751 }, { "epoch": 0.18166147146310324, "grad_norm": 0.86328125, "learning_rate": 0.00019821510280415837, "loss": 4.658, "step": 1752 }, { "epoch": 0.1817651595175913, "grad_norm": 0.76953125, "learning_rate": 0.00019821305909227622, "loss": 4.6648, "step": 1753 }, { "epoch": 0.1818688475720794, "grad_norm": 0.87109375, "learning_rate": 0.00019821101422158374, "loss": 4.6353, "step": 1754 }, { "epoch": 0.18197253562656748, "grad_norm": 0.94921875, "learning_rate": 0.00019820896819210514, "loss": 4.5773, "step": 1755 }, { "epoch": 0.18207622368105555, "grad_norm": 1.015625, "learning_rate": 0.0001982069210038645, "loss": 4.5819, "step": 1756 }, { "epoch": 0.18217991173554363, "grad_norm": 1.1484375, "learning_rate": 0.00019820487265688602, "loss": 4.6235, "step": 1757 }, { "epoch": 0.1822835997900317, "grad_norm": 0.79296875, "learning_rate": 0.00019820282315119382, "loss": 4.6436, "step": 1758 }, { "epoch": 0.18238728784451977, "grad_norm": 0.8359375, "learning_rate": 0.0001982007724868121, "loss": 4.6404, "step": 1759 }, { "epoch": 0.18249097589900784, "grad_norm": 0.94921875, "learning_rate": 0.00019819872066376512, "loss": 4.6505, "step": 1760 }, { "epoch": 0.1825946639534959, "grad_norm": 1.0703125, "learning_rate": 0.000198196667682077, "loss": 4.6425, "step": 1761 }, { "epoch": 0.18269835200798398, "grad_norm": 1.140625, "learning_rate": 0.00019819461354177205, "loss": 4.666, "step": 1762 }, { "epoch": 0.18280204006247205, "grad_norm": 0.890625, "learning_rate": 0.0001981925582428744, "loss": 4.5952, "step": 1763 }, { "epoch": 0.18290572811696013, "grad_norm": 0.8046875, "learning_rate": 0.0001981905017854084, "loss": 4.6084, "step": 1764 }, { "epoch": 0.1830094161714482, "grad_norm": 0.828125, "learning_rate": 0.00019818844416939822, "loss": 4.6354, "step": 1765 }, { "epoch": 0.18311310422593627, "grad_norm": 0.94921875, "learning_rate": 0.00019818638539486822, "loss": 4.6241, "step": 1766 }, { "epoch": 0.18321679228042434, "grad_norm": 1.2890625, "learning_rate": 0.00019818432546184266, "loss": 4.6408, "step": 1767 }, { "epoch": 0.1833204803349124, "grad_norm": 0.796875, "learning_rate": 0.00019818226437034583, "loss": 4.5669, "step": 1768 }, { "epoch": 0.18342416838940048, "grad_norm": 0.85546875, "learning_rate": 0.0001981802021204021, "loss": 4.6347, "step": 1769 }, { "epoch": 0.18352785644388855, "grad_norm": 1.0703125, "learning_rate": 0.00019817813871203573, "loss": 4.6193, "step": 1770 }, { "epoch": 0.18363154449837663, "grad_norm": 1.2109375, "learning_rate": 0.00019817607414527108, "loss": 4.6221, "step": 1771 }, { "epoch": 0.1837352325528647, "grad_norm": 0.7734375, "learning_rate": 0.00019817400842013258, "loss": 4.6161, "step": 1772 }, { "epoch": 0.18383892060735277, "grad_norm": 0.890625, "learning_rate": 0.00019817194153664455, "loss": 4.5937, "step": 1773 }, { "epoch": 0.18394260866184084, "grad_norm": 0.7421875, "learning_rate": 0.00019816987349483136, "loss": 4.6, "step": 1774 }, { "epoch": 0.1840462967163289, "grad_norm": 0.9375, "learning_rate": 0.00019816780429471743, "loss": 4.6343, "step": 1775 }, { "epoch": 0.184149984770817, "grad_norm": 0.83203125, "learning_rate": 0.0001981657339363272, "loss": 4.592, "step": 1776 }, { "epoch": 0.18425367282530508, "grad_norm": 0.82421875, "learning_rate": 0.00019816366241968506, "loss": 4.6029, "step": 1777 }, { "epoch": 0.18435736087979315, "grad_norm": 0.703125, "learning_rate": 0.00019816158974481548, "loss": 4.6375, "step": 1778 }, { "epoch": 0.18446104893428122, "grad_norm": 0.66796875, "learning_rate": 0.0001981595159117429, "loss": 4.6048, "step": 1779 }, { "epoch": 0.1845647369887693, "grad_norm": 0.79296875, "learning_rate": 0.0001981574409204918, "loss": 4.5995, "step": 1780 }, { "epoch": 0.18466842504325737, "grad_norm": 0.76171875, "learning_rate": 0.00019815536477108662, "loss": 4.6003, "step": 1781 }, { "epoch": 0.18477211309774544, "grad_norm": 0.6328125, "learning_rate": 0.00019815328746355192, "loss": 4.6188, "step": 1782 }, { "epoch": 0.1848758011522335, "grad_norm": 0.62109375, "learning_rate": 0.00019815120899791216, "loss": 4.6491, "step": 1783 }, { "epoch": 0.18497948920672158, "grad_norm": 0.76953125, "learning_rate": 0.0001981491293741919, "loss": 4.6405, "step": 1784 }, { "epoch": 0.18508317726120965, "grad_norm": 0.6796875, "learning_rate": 0.00019814704859241565, "loss": 4.6342, "step": 1785 }, { "epoch": 0.18518686531569772, "grad_norm": 0.640625, "learning_rate": 0.00019814496665260798, "loss": 4.6543, "step": 1786 }, { "epoch": 0.1852905533701858, "grad_norm": 0.71484375, "learning_rate": 0.00019814288355479346, "loss": 4.6173, "step": 1787 }, { "epoch": 0.18539424142467387, "grad_norm": 0.83984375, "learning_rate": 0.00019814079929899662, "loss": 4.6002, "step": 1788 }, { "epoch": 0.18549792947916194, "grad_norm": 0.58984375, "learning_rate": 0.00019813871388524215, "loss": 4.5986, "step": 1789 }, { "epoch": 0.18560161753365, "grad_norm": 0.6171875, "learning_rate": 0.00019813662731355452, "loss": 4.6107, "step": 1790 }, { "epoch": 0.18570530558813808, "grad_norm": 0.7578125, "learning_rate": 0.00019813453958395847, "loss": 4.6364, "step": 1791 }, { "epoch": 0.18580899364262615, "grad_norm": 0.75, "learning_rate": 0.00019813245069647857, "loss": 4.5867, "step": 1792 }, { "epoch": 0.18591268169711422, "grad_norm": 0.7578125, "learning_rate": 0.0001981303606511395, "loss": 4.5961, "step": 1793 }, { "epoch": 0.1860163697516023, "grad_norm": 0.62109375, "learning_rate": 0.00019812826944796586, "loss": 4.6213, "step": 1794 }, { "epoch": 0.18612005780609037, "grad_norm": 0.94921875, "learning_rate": 0.00019812617708698241, "loss": 4.5809, "step": 1795 }, { "epoch": 0.18622374586057844, "grad_norm": 1.0703125, "learning_rate": 0.00019812408356821378, "loss": 4.6406, "step": 1796 }, { "epoch": 0.1863274339150665, "grad_norm": 0.69140625, "learning_rate": 0.00019812198889168468, "loss": 4.6107, "step": 1797 }, { "epoch": 0.1864311219695546, "grad_norm": 0.6484375, "learning_rate": 0.00019811989305741984, "loss": 4.5858, "step": 1798 }, { "epoch": 0.18653481002404268, "grad_norm": 0.8046875, "learning_rate": 0.00019811779606544397, "loss": 4.6217, "step": 1799 }, { "epoch": 0.18663849807853075, "grad_norm": 0.6796875, "learning_rate": 0.00019811569791578182, "loss": 4.5735, "step": 1800 }, { "epoch": 0.18674218613301882, "grad_norm": 0.5859375, "learning_rate": 0.00019811359860845814, "loss": 4.6024, "step": 1801 }, { "epoch": 0.1868458741875069, "grad_norm": 0.640625, "learning_rate": 0.00019811149814349773, "loss": 4.5761, "step": 1802 }, { "epoch": 0.18694956224199497, "grad_norm": 0.58203125, "learning_rate": 0.00019810939652092533, "loss": 4.5857, "step": 1803 }, { "epoch": 0.18705325029648304, "grad_norm": 0.6015625, "learning_rate": 0.00019810729374076575, "loss": 4.633, "step": 1804 }, { "epoch": 0.1871569383509711, "grad_norm": 0.6953125, "learning_rate": 0.00019810518980304385, "loss": 4.628, "step": 1805 }, { "epoch": 0.18726062640545918, "grad_norm": 0.62109375, "learning_rate": 0.00019810308470778436, "loss": 4.5949, "step": 1806 }, { "epoch": 0.18736431445994725, "grad_norm": 0.71484375, "learning_rate": 0.0001981009784550122, "loss": 4.5659, "step": 1807 }, { "epoch": 0.18746800251443532, "grad_norm": 0.69921875, "learning_rate": 0.0001980988710447522, "loss": 4.5788, "step": 1808 }, { "epoch": 0.1875716905689234, "grad_norm": 0.64453125, "learning_rate": 0.0001980967624770292, "loss": 4.5945, "step": 1809 }, { "epoch": 0.18767537862341147, "grad_norm": 0.73046875, "learning_rate": 0.0001980946527518681, "loss": 4.6004, "step": 1810 }, { "epoch": 0.18777906667789954, "grad_norm": 0.6796875, "learning_rate": 0.0001980925418692938, "loss": 4.6061, "step": 1811 }, { "epoch": 0.1878827547323876, "grad_norm": 0.64453125, "learning_rate": 0.00019809042982933117, "loss": 4.6141, "step": 1812 }, { "epoch": 0.18798644278687568, "grad_norm": 0.609375, "learning_rate": 0.00019808831663200517, "loss": 4.641, "step": 1813 }, { "epoch": 0.18809013084136375, "grad_norm": 0.59765625, "learning_rate": 0.0001980862022773407, "loss": 4.542, "step": 1814 }, { "epoch": 0.18819381889585182, "grad_norm": 0.69140625, "learning_rate": 0.00019808408676536275, "loss": 4.587, "step": 1815 }, { "epoch": 0.1882975069503399, "grad_norm": 0.5859375, "learning_rate": 0.00019808197009609624, "loss": 4.6407, "step": 1816 }, { "epoch": 0.18840119500482796, "grad_norm": 0.58203125, "learning_rate": 0.00019807985226956616, "loss": 4.6117, "step": 1817 }, { "epoch": 0.18850488305931604, "grad_norm": 0.63671875, "learning_rate": 0.0001980777332857975, "loss": 4.6044, "step": 1818 }, { "epoch": 0.1886085711138041, "grad_norm": 0.609375, "learning_rate": 0.00019807561314481525, "loss": 4.5919, "step": 1819 }, { "epoch": 0.1887122591682922, "grad_norm": 0.53515625, "learning_rate": 0.00019807349184664447, "loss": 4.5995, "step": 1820 }, { "epoch": 0.18881594722278028, "grad_norm": 0.71875, "learning_rate": 0.0001980713693913101, "loss": 4.6098, "step": 1821 }, { "epoch": 0.18891963527726835, "grad_norm": 0.67578125, "learning_rate": 0.0001980692457788373, "loss": 4.6336, "step": 1822 }, { "epoch": 0.18902332333175642, "grad_norm": 0.67578125, "learning_rate": 0.00019806712100925103, "loss": 4.5602, "step": 1823 }, { "epoch": 0.1891270113862445, "grad_norm": 0.7109375, "learning_rate": 0.00019806499508257636, "loss": 4.585, "step": 1824 }, { "epoch": 0.18923069944073256, "grad_norm": 0.66796875, "learning_rate": 0.00019806286799883846, "loss": 4.6508, "step": 1825 }, { "epoch": 0.18933438749522064, "grad_norm": 0.58203125, "learning_rate": 0.00019806073975806235, "loss": 4.5661, "step": 1826 }, { "epoch": 0.1894380755497087, "grad_norm": 0.6015625, "learning_rate": 0.00019805861036027318, "loss": 4.6343, "step": 1827 }, { "epoch": 0.18954176360419678, "grad_norm": 0.765625, "learning_rate": 0.00019805647980549606, "loss": 4.6045, "step": 1828 }, { "epoch": 0.18964545165868485, "grad_norm": 0.7578125, "learning_rate": 0.0001980543480937561, "loss": 4.6495, "step": 1829 }, { "epoch": 0.18974913971317292, "grad_norm": 0.6171875, "learning_rate": 0.0001980522152250785, "loss": 4.5932, "step": 1830 }, { "epoch": 0.189852827767661, "grad_norm": 0.5625, "learning_rate": 0.00019805008119948842, "loss": 4.6119, "step": 1831 }, { "epoch": 0.18995651582214906, "grad_norm": 0.859375, "learning_rate": 0.000198047946017011, "loss": 4.6007, "step": 1832 }, { "epoch": 0.19006020387663713, "grad_norm": 0.9609375, "learning_rate": 0.00019804580967767147, "loss": 4.5951, "step": 1833 }, { "epoch": 0.1901638919311252, "grad_norm": 0.9609375, "learning_rate": 0.00019804367218149505, "loss": 4.5742, "step": 1834 }, { "epoch": 0.19026757998561328, "grad_norm": 0.8125, "learning_rate": 0.0001980415335285069, "loss": 4.5971, "step": 1835 }, { "epoch": 0.19037126804010135, "grad_norm": 0.69140625, "learning_rate": 0.0001980393937187323, "loss": 4.6173, "step": 1836 }, { "epoch": 0.19047495609458942, "grad_norm": 0.72265625, "learning_rate": 0.00019803725275219648, "loss": 4.6155, "step": 1837 }, { "epoch": 0.1905786441490775, "grad_norm": 0.80859375, "learning_rate": 0.0001980351106289247, "loss": 4.5651, "step": 1838 }, { "epoch": 0.19068233220356556, "grad_norm": 0.62890625, "learning_rate": 0.00019803296734894227, "loss": 4.6264, "step": 1839 }, { "epoch": 0.19078602025805363, "grad_norm": 0.68359375, "learning_rate": 0.00019803082291227443, "loss": 4.599, "step": 1840 }, { "epoch": 0.1908897083125417, "grad_norm": 0.89453125, "learning_rate": 0.0001980286773189465, "loss": 4.5985, "step": 1841 }, { "epoch": 0.19099339636702978, "grad_norm": 0.84375, "learning_rate": 0.0001980265305689838, "loss": 4.5839, "step": 1842 }, { "epoch": 0.19109708442151788, "grad_norm": 0.7265625, "learning_rate": 0.0001980243826624117, "loss": 4.5921, "step": 1843 }, { "epoch": 0.19120077247600595, "grad_norm": 0.91796875, "learning_rate": 0.00019802223359925545, "loss": 4.6503, "step": 1844 }, { "epoch": 0.19130446053049402, "grad_norm": 1.0546875, "learning_rate": 0.00019802008337954047, "loss": 4.5806, "step": 1845 }, { "epoch": 0.1914081485849821, "grad_norm": 0.97265625, "learning_rate": 0.00019801793200329213, "loss": 4.5883, "step": 1846 }, { "epoch": 0.19151183663947016, "grad_norm": 0.765625, "learning_rate": 0.00019801577947053579, "loss": 4.6491, "step": 1847 }, { "epoch": 0.19161552469395823, "grad_norm": 0.66015625, "learning_rate": 0.00019801362578129685, "loss": 4.6137, "step": 1848 }, { "epoch": 0.1917192127484463, "grad_norm": 0.90234375, "learning_rate": 0.00019801147093560076, "loss": 4.6222, "step": 1849 }, { "epoch": 0.19182290080293438, "grad_norm": 0.921875, "learning_rate": 0.00019800931493347288, "loss": 4.6001, "step": 1850 }, { "epoch": 0.19192658885742245, "grad_norm": 0.79296875, "learning_rate": 0.00019800715777493872, "loss": 4.6157, "step": 1851 }, { "epoch": 0.19203027691191052, "grad_norm": 0.7578125, "learning_rate": 0.00019800499946002366, "loss": 4.581, "step": 1852 }, { "epoch": 0.1921339649663986, "grad_norm": 0.78125, "learning_rate": 0.00019800283998875324, "loss": 4.6079, "step": 1853 }, { "epoch": 0.19223765302088666, "grad_norm": 0.75390625, "learning_rate": 0.00019800067936115288, "loss": 4.577, "step": 1854 }, { "epoch": 0.19234134107537473, "grad_norm": 0.72265625, "learning_rate": 0.00019799851757724808, "loss": 4.575, "step": 1855 }, { "epoch": 0.1924450291298628, "grad_norm": 0.58984375, "learning_rate": 0.00019799635463706438, "loss": 4.6081, "step": 1856 }, { "epoch": 0.19254871718435088, "grad_norm": 0.71484375, "learning_rate": 0.00019799419054062728, "loss": 4.6058, "step": 1857 }, { "epoch": 0.19265240523883895, "grad_norm": 0.515625, "learning_rate": 0.00019799202528796231, "loss": 4.618, "step": 1858 }, { "epoch": 0.19275609329332702, "grad_norm": 0.63671875, "learning_rate": 0.00019798985887909502, "loss": 4.6002, "step": 1859 }, { "epoch": 0.1928597813478151, "grad_norm": 0.61328125, "learning_rate": 0.00019798769131405098, "loss": 4.6159, "step": 1860 }, { "epoch": 0.19296346940230316, "grad_norm": 0.640625, "learning_rate": 0.00019798552259285579, "loss": 4.5643, "step": 1861 }, { "epoch": 0.19306715745679123, "grad_norm": 0.625, "learning_rate": 0.000197983352715535, "loss": 4.6061, "step": 1862 }, { "epoch": 0.1931708455112793, "grad_norm": 0.6640625, "learning_rate": 0.0001979811816821142, "loss": 4.5693, "step": 1863 }, { "epoch": 0.19327453356576738, "grad_norm": 0.61328125, "learning_rate": 0.000197979009492619, "loss": 4.6038, "step": 1864 }, { "epoch": 0.19337822162025547, "grad_norm": 0.8125, "learning_rate": 0.00019797683614707512, "loss": 4.5976, "step": 1865 }, { "epoch": 0.19348190967474355, "grad_norm": 0.7890625, "learning_rate": 0.0001979746616455081, "loss": 4.5936, "step": 1866 }, { "epoch": 0.19358559772923162, "grad_norm": 0.8359375, "learning_rate": 0.00019797248598794364, "loss": 4.5944, "step": 1867 }, { "epoch": 0.1936892857837197, "grad_norm": 0.9296875, "learning_rate": 0.0001979703091744074, "loss": 4.6682, "step": 1868 }, { "epoch": 0.19379297383820776, "grad_norm": 0.92578125, "learning_rate": 0.00019796813120492507, "loss": 4.562, "step": 1869 }, { "epoch": 0.19389666189269583, "grad_norm": 0.8671875, "learning_rate": 0.00019796595207952238, "loss": 4.5784, "step": 1870 }, { "epoch": 0.1940003499471839, "grad_norm": 0.78125, "learning_rate": 0.000197963771798225, "loss": 4.5825, "step": 1871 }, { "epoch": 0.19410403800167197, "grad_norm": 0.66015625, "learning_rate": 0.00019796159036105865, "loss": 4.6164, "step": 1872 }, { "epoch": 0.19420772605616005, "grad_norm": 0.74609375, "learning_rate": 0.00019795940776804906, "loss": 4.5685, "step": 1873 }, { "epoch": 0.19431141411064812, "grad_norm": 0.796875, "learning_rate": 0.00019795722401922205, "loss": 4.6074, "step": 1874 }, { "epoch": 0.1944151021651362, "grad_norm": 0.8046875, "learning_rate": 0.0001979550391146033, "loss": 4.5823, "step": 1875 }, { "epoch": 0.19451879021962426, "grad_norm": 0.73828125, "learning_rate": 0.0001979528530542187, "loss": 4.6129, "step": 1876 }, { "epoch": 0.19462247827411233, "grad_norm": 0.75390625, "learning_rate": 0.00019795066583809393, "loss": 4.5628, "step": 1877 }, { "epoch": 0.1947261663286004, "grad_norm": 0.75390625, "learning_rate": 0.00019794847746625483, "loss": 4.5917, "step": 1878 }, { "epoch": 0.19482985438308847, "grad_norm": 0.64453125, "learning_rate": 0.00019794628793872723, "loss": 4.6206, "step": 1879 }, { "epoch": 0.19493354243757655, "grad_norm": 0.64453125, "learning_rate": 0.00019794409725553699, "loss": 4.5926, "step": 1880 }, { "epoch": 0.19503723049206462, "grad_norm": 0.71484375, "learning_rate": 0.00019794190541670993, "loss": 4.6335, "step": 1881 }, { "epoch": 0.1951409185465527, "grad_norm": 0.8984375, "learning_rate": 0.0001979397124222719, "loss": 4.6138, "step": 1882 }, { "epoch": 0.19524460660104076, "grad_norm": 0.765625, "learning_rate": 0.0001979375182722488, "loss": 4.622, "step": 1883 }, { "epoch": 0.19534829465552883, "grad_norm": 0.796875, "learning_rate": 0.0001979353229666665, "loss": 4.5461, "step": 1884 }, { "epoch": 0.1954519827100169, "grad_norm": 0.78515625, "learning_rate": 0.00019793312650555093, "loss": 4.5757, "step": 1885 }, { "epoch": 0.19555567076450497, "grad_norm": 0.88671875, "learning_rate": 0.00019793092888892799, "loss": 4.6087, "step": 1886 }, { "epoch": 0.19565935881899307, "grad_norm": 0.94921875, "learning_rate": 0.00019792873011682357, "loss": 4.5903, "step": 1887 }, { "epoch": 0.19576304687348114, "grad_norm": 0.8515625, "learning_rate": 0.0001979265301892637, "loss": 4.5752, "step": 1888 }, { "epoch": 0.19586673492796922, "grad_norm": 0.83203125, "learning_rate": 0.00019792432910627425, "loss": 4.5782, "step": 1889 }, { "epoch": 0.1959704229824573, "grad_norm": 0.81640625, "learning_rate": 0.00019792212686788122, "loss": 4.5527, "step": 1890 }, { "epoch": 0.19607411103694536, "grad_norm": 0.98046875, "learning_rate": 0.00019791992347411064, "loss": 4.6206, "step": 1891 }, { "epoch": 0.19617779909143343, "grad_norm": 1.28125, "learning_rate": 0.00019791771892498843, "loss": 4.6059, "step": 1892 }, { "epoch": 0.1962814871459215, "grad_norm": 0.8125, "learning_rate": 0.00019791551322054067, "loss": 4.5715, "step": 1893 }, { "epoch": 0.19638517520040957, "grad_norm": 0.86328125, "learning_rate": 0.00019791330636079332, "loss": 4.5888, "step": 1894 }, { "epoch": 0.19648886325489764, "grad_norm": 0.921875, "learning_rate": 0.0001979110983457725, "loss": 4.5763, "step": 1895 }, { "epoch": 0.19659255130938572, "grad_norm": 0.95703125, "learning_rate": 0.0001979088891755042, "loss": 4.5754, "step": 1896 }, { "epoch": 0.1966962393638738, "grad_norm": 0.88671875, "learning_rate": 0.00019790667885001448, "loss": 4.6025, "step": 1897 }, { "epoch": 0.19679992741836186, "grad_norm": 0.8671875, "learning_rate": 0.00019790446736932946, "loss": 4.6139, "step": 1898 }, { "epoch": 0.19690361547284993, "grad_norm": 0.96875, "learning_rate": 0.0001979022547334752, "loss": 4.5304, "step": 1899 }, { "epoch": 0.197007303527338, "grad_norm": 1.015625, "learning_rate": 0.00019790004094247783, "loss": 4.6188, "step": 1900 }, { "epoch": 0.19711099158182607, "grad_norm": 0.81640625, "learning_rate": 0.00019789782599636348, "loss": 4.6019, "step": 1901 }, { "epoch": 0.19721467963631414, "grad_norm": 0.8359375, "learning_rate": 0.00019789560989515824, "loss": 4.5996, "step": 1902 }, { "epoch": 0.19731836769080222, "grad_norm": 1.1015625, "learning_rate": 0.0001978933926388883, "loss": 4.585, "step": 1903 }, { "epoch": 0.1974220557452903, "grad_norm": 0.8828125, "learning_rate": 0.0001978911742275798, "loss": 4.6249, "step": 1904 }, { "epoch": 0.19752574379977836, "grad_norm": 0.83984375, "learning_rate": 0.0001978889546612589, "loss": 4.5999, "step": 1905 }, { "epoch": 0.19762943185426643, "grad_norm": 0.83984375, "learning_rate": 0.00019788673393995182, "loss": 4.6135, "step": 1906 }, { "epoch": 0.1977331199087545, "grad_norm": 0.77734375, "learning_rate": 0.00019788451206368475, "loss": 4.6007, "step": 1907 }, { "epoch": 0.19783680796324257, "grad_norm": 0.83984375, "learning_rate": 0.00019788228903248393, "loss": 4.6105, "step": 1908 }, { "epoch": 0.19794049601773064, "grad_norm": 0.76171875, "learning_rate": 0.00019788006484637553, "loss": 4.6072, "step": 1909 }, { "epoch": 0.19804418407221874, "grad_norm": 0.76171875, "learning_rate": 0.00019787783950538587, "loss": 4.59, "step": 1910 }, { "epoch": 0.19814787212670681, "grad_norm": 0.78515625, "learning_rate": 0.0001978756130095411, "loss": 4.5976, "step": 1911 }, { "epoch": 0.19825156018119489, "grad_norm": 0.6953125, "learning_rate": 0.0001978733853588676, "loss": 4.5981, "step": 1912 }, { "epoch": 0.19835524823568296, "grad_norm": 0.6875, "learning_rate": 0.00019787115655339163, "loss": 4.6151, "step": 1913 }, { "epoch": 0.19845893629017103, "grad_norm": 0.765625, "learning_rate": 0.00019786892659313945, "loss": 4.6266, "step": 1914 }, { "epoch": 0.1985626243446591, "grad_norm": 0.70703125, "learning_rate": 0.00019786669547813737, "loss": 4.6112, "step": 1915 }, { "epoch": 0.19866631239914717, "grad_norm": 0.72265625, "learning_rate": 0.00019786446320841172, "loss": 4.5899, "step": 1916 }, { "epoch": 0.19877000045363524, "grad_norm": 0.84765625, "learning_rate": 0.00019786222978398889, "loss": 4.546, "step": 1917 }, { "epoch": 0.19887368850812331, "grad_norm": 0.734375, "learning_rate": 0.00019785999520489518, "loss": 4.537, "step": 1918 }, { "epoch": 0.19897737656261139, "grad_norm": 0.68359375, "learning_rate": 0.00019785775947115696, "loss": 4.5717, "step": 1919 }, { "epoch": 0.19908106461709946, "grad_norm": 0.8828125, "learning_rate": 0.00019785552258280064, "loss": 4.5215, "step": 1920 }, { "epoch": 0.19918475267158753, "grad_norm": 1.1171875, "learning_rate": 0.00019785328453985257, "loss": 4.6153, "step": 1921 }, { "epoch": 0.1992884407260756, "grad_norm": 0.6875, "learning_rate": 0.00019785104534233918, "loss": 4.5823, "step": 1922 }, { "epoch": 0.19939212878056367, "grad_norm": 0.73046875, "learning_rate": 0.00019784880499028692, "loss": 4.5774, "step": 1923 }, { "epoch": 0.19949581683505174, "grad_norm": 1.0078125, "learning_rate": 0.00019784656348372214, "loss": 4.6047, "step": 1924 }, { "epoch": 0.1995995048895398, "grad_norm": 0.87890625, "learning_rate": 0.00019784432082267138, "loss": 4.612, "step": 1925 }, { "epoch": 0.19970319294402789, "grad_norm": 0.83984375, "learning_rate": 0.00019784207700716103, "loss": 4.6276, "step": 1926 }, { "epoch": 0.19980688099851596, "grad_norm": 0.73828125, "learning_rate": 0.00019783983203721758, "loss": 4.5811, "step": 1927 }, { "epoch": 0.19991056905300403, "grad_norm": 0.953125, "learning_rate": 0.0001978375859128676, "loss": 4.5809, "step": 1928 }, { "epoch": 0.2000142571074921, "grad_norm": 1.0859375, "learning_rate": 0.0001978353386341375, "loss": 4.6231, "step": 1929 }, { "epoch": 0.20011794516198017, "grad_norm": 0.796875, "learning_rate": 0.00019783309020105375, "loss": 4.5642, "step": 1930 }, { "epoch": 0.20022163321646824, "grad_norm": 0.78515625, "learning_rate": 0.00019783084061364303, "loss": 4.581, "step": 1931 }, { "epoch": 0.20032532127095634, "grad_norm": 1.1875, "learning_rate": 0.00019782858987193178, "loss": 4.5868, "step": 1932 }, { "epoch": 0.2004290093254444, "grad_norm": 0.81640625, "learning_rate": 0.00019782633797594659, "loss": 4.617, "step": 1933 }, { "epoch": 0.20053269737993248, "grad_norm": 0.8046875, "learning_rate": 0.00019782408492571399, "loss": 4.6109, "step": 1934 }, { "epoch": 0.20063638543442056, "grad_norm": 1.0234375, "learning_rate": 0.00019782183072126062, "loss": 4.6252, "step": 1935 }, { "epoch": 0.20074007348890863, "grad_norm": 1.0625, "learning_rate": 0.00019781957536261303, "loss": 4.6283, "step": 1936 }, { "epoch": 0.2008437615433967, "grad_norm": 0.99609375, "learning_rate": 0.00019781731884979786, "loss": 4.606, "step": 1937 }, { "epoch": 0.20094744959788477, "grad_norm": 0.86328125, "learning_rate": 0.00019781506118284173, "loss": 4.6014, "step": 1938 }, { "epoch": 0.20105113765237284, "grad_norm": 0.7421875, "learning_rate": 0.00019781280236177127, "loss": 4.6413, "step": 1939 }, { "epoch": 0.2011548257068609, "grad_norm": 0.7734375, "learning_rate": 0.0001978105423866131, "loss": 4.5813, "step": 1940 }, { "epoch": 0.20125851376134898, "grad_norm": 0.66796875, "learning_rate": 0.00019780828125739398, "loss": 4.5541, "step": 1941 }, { "epoch": 0.20136220181583706, "grad_norm": 0.8515625, "learning_rate": 0.0001978060189741405, "loss": 4.6159, "step": 1942 }, { "epoch": 0.20146588987032513, "grad_norm": 0.97265625, "learning_rate": 0.00019780375553687937, "loss": 4.6638, "step": 1943 }, { "epoch": 0.2015695779248132, "grad_norm": 0.96484375, "learning_rate": 0.00019780149094563733, "loss": 4.6151, "step": 1944 }, { "epoch": 0.20167326597930127, "grad_norm": 0.95703125, "learning_rate": 0.00019779922520044108, "loss": 4.5835, "step": 1945 }, { "epoch": 0.20177695403378934, "grad_norm": 0.96875, "learning_rate": 0.00019779695830131732, "loss": 4.5769, "step": 1946 }, { "epoch": 0.2018806420882774, "grad_norm": 0.96875, "learning_rate": 0.00019779469024829285, "loss": 4.6143, "step": 1947 }, { "epoch": 0.20198433014276548, "grad_norm": 1.1796875, "learning_rate": 0.0001977924210413944, "loss": 4.604, "step": 1948 }, { "epoch": 0.20208801819725355, "grad_norm": 0.81640625, "learning_rate": 0.00019779015068064877, "loss": 4.5636, "step": 1949 }, { "epoch": 0.20219170625174163, "grad_norm": 0.91796875, "learning_rate": 0.00019778787916608273, "loss": 4.5581, "step": 1950 }, { "epoch": 0.2022953943062297, "grad_norm": 1.1171875, "learning_rate": 0.00019778560649772305, "loss": 4.5624, "step": 1951 }, { "epoch": 0.20239908236071777, "grad_norm": 0.77734375, "learning_rate": 0.00019778333267559658, "loss": 4.6108, "step": 1952 }, { "epoch": 0.20250277041520584, "grad_norm": 1.015625, "learning_rate": 0.00019778105769973018, "loss": 4.5904, "step": 1953 }, { "epoch": 0.20260645846969394, "grad_norm": 1.3203125, "learning_rate": 0.00019777878157015063, "loss": 4.5902, "step": 1954 }, { "epoch": 0.202710146524182, "grad_norm": 0.67578125, "learning_rate": 0.00019777650428688483, "loss": 4.6012, "step": 1955 }, { "epoch": 0.20281383457867008, "grad_norm": 1.515625, "learning_rate": 0.00019777422584995965, "loss": 4.544, "step": 1956 }, { "epoch": 0.20291752263315815, "grad_norm": 0.76171875, "learning_rate": 0.00019777194625940193, "loss": 4.6165, "step": 1957 }, { "epoch": 0.20302121068764623, "grad_norm": 1.59375, "learning_rate": 0.00019776966551523858, "loss": 4.5806, "step": 1958 }, { "epoch": 0.2031248987421343, "grad_norm": 0.9140625, "learning_rate": 0.00019776738361749655, "loss": 4.5912, "step": 1959 }, { "epoch": 0.20322858679662237, "grad_norm": 1.9375, "learning_rate": 0.00019776510056620272, "loss": 4.6098, "step": 1960 }, { "epoch": 0.20333227485111044, "grad_norm": 1.453125, "learning_rate": 0.00019776281636138407, "loss": 4.5712, "step": 1961 }, { "epoch": 0.2034359629055985, "grad_norm": 2.65625, "learning_rate": 0.0001977605310030675, "loss": 4.6034, "step": 1962 }, { "epoch": 0.20353965096008658, "grad_norm": 2.484375, "learning_rate": 0.00019775824449128003, "loss": 4.6355, "step": 1963 }, { "epoch": 0.20364333901457465, "grad_norm": 1.40625, "learning_rate": 0.0001977559568260486, "loss": 4.5732, "step": 1964 }, { "epoch": 0.20374702706906272, "grad_norm": 1.90625, "learning_rate": 0.0001977536680074002, "loss": 4.5894, "step": 1965 }, { "epoch": 0.2038507151235508, "grad_norm": 1.59375, "learning_rate": 0.00019775137803536186, "loss": 4.6234, "step": 1966 }, { "epoch": 0.20395440317803887, "grad_norm": 1.859375, "learning_rate": 0.00019774908690996056, "loss": 4.646, "step": 1967 }, { "epoch": 0.20405809123252694, "grad_norm": 1.6875, "learning_rate": 0.0001977467946312234, "loss": 4.518, "step": 1968 }, { "epoch": 0.204161779287015, "grad_norm": 1.5546875, "learning_rate": 0.00019774450119917737, "loss": 4.6341, "step": 1969 }, { "epoch": 0.20426546734150308, "grad_norm": 1.28125, "learning_rate": 0.00019774220661384956, "loss": 4.5852, "step": 1970 }, { "epoch": 0.20436915539599115, "grad_norm": 1.5, "learning_rate": 0.000197739910875267, "loss": 4.5918, "step": 1971 }, { "epoch": 0.20447284345047922, "grad_norm": 1.078125, "learning_rate": 0.00019773761398345682, "loss": 4.5398, "step": 1972 }, { "epoch": 0.2045765315049673, "grad_norm": 1.4375, "learning_rate": 0.00019773531593844613, "loss": 4.5249, "step": 1973 }, { "epoch": 0.20468021955945537, "grad_norm": 1.2421875, "learning_rate": 0.00019773301674026197, "loss": 4.5895, "step": 1974 }, { "epoch": 0.20478390761394344, "grad_norm": 1.234375, "learning_rate": 0.00019773071638893157, "loss": 4.6123, "step": 1975 }, { "epoch": 0.2048875956684315, "grad_norm": 1.2265625, "learning_rate": 0.00019772841488448198, "loss": 4.5979, "step": 1976 }, { "epoch": 0.2049912837229196, "grad_norm": 0.90234375, "learning_rate": 0.00019772611222694045, "loss": 4.5542, "step": 1977 }, { "epoch": 0.20509497177740768, "grad_norm": 1.1015625, "learning_rate": 0.00019772380841633406, "loss": 4.5651, "step": 1978 }, { "epoch": 0.20519865983189575, "grad_norm": 0.83984375, "learning_rate": 0.00019772150345269003, "loss": 4.5454, "step": 1979 }, { "epoch": 0.20530234788638382, "grad_norm": 1.0859375, "learning_rate": 0.00019771919733603557, "loss": 4.6146, "step": 1980 }, { "epoch": 0.2054060359408719, "grad_norm": 0.921875, "learning_rate": 0.00019771689006639785, "loss": 4.5667, "step": 1981 }, { "epoch": 0.20550972399535997, "grad_norm": 0.9765625, "learning_rate": 0.00019771458164380415, "loss": 4.5801, "step": 1982 }, { "epoch": 0.20561341204984804, "grad_norm": 1.234375, "learning_rate": 0.00019771227206828167, "loss": 4.6104, "step": 1983 }, { "epoch": 0.2057171001043361, "grad_norm": 0.84765625, "learning_rate": 0.00019770996133985767, "loss": 4.6062, "step": 1984 }, { "epoch": 0.20582078815882418, "grad_norm": 1.5, "learning_rate": 0.00019770764945855937, "loss": 4.5738, "step": 1985 }, { "epoch": 0.20592447621331225, "grad_norm": 0.98828125, "learning_rate": 0.00019770533642441413, "loss": 4.5966, "step": 1986 }, { "epoch": 0.20602816426780032, "grad_norm": 2.078125, "learning_rate": 0.0001977030222374492, "loss": 4.6273, "step": 1987 }, { "epoch": 0.2061318523222884, "grad_norm": 1.765625, "learning_rate": 0.00019770070689769184, "loss": 4.5446, "step": 1988 }, { "epoch": 0.20623554037677647, "grad_norm": 1.921875, "learning_rate": 0.00019769839040516946, "loss": 4.5785, "step": 1989 }, { "epoch": 0.20633922843126454, "grad_norm": 1.7578125, "learning_rate": 0.00019769607275990934, "loss": 4.5787, "step": 1990 }, { "epoch": 0.2064429164857526, "grad_norm": 1.546875, "learning_rate": 0.00019769375396193881, "loss": 4.5644, "step": 1991 }, { "epoch": 0.20654660454024068, "grad_norm": 1.421875, "learning_rate": 0.00019769143401128525, "loss": 4.5818, "step": 1992 }, { "epoch": 0.20665029259472875, "grad_norm": 1.3203125, "learning_rate": 0.00019768911290797604, "loss": 4.5749, "step": 1993 }, { "epoch": 0.20675398064921682, "grad_norm": 1.2890625, "learning_rate": 0.00019768679065203855, "loss": 4.5796, "step": 1994 }, { "epoch": 0.2068576687037049, "grad_norm": 1.03125, "learning_rate": 0.00019768446724350024, "loss": 4.5841, "step": 1995 }, { "epoch": 0.20696135675819297, "grad_norm": 1.4453125, "learning_rate": 0.00019768214268238842, "loss": 4.569, "step": 1996 }, { "epoch": 0.20706504481268104, "grad_norm": 1.046875, "learning_rate": 0.00019767981696873057, "loss": 4.6043, "step": 1997 }, { "epoch": 0.2071687328671691, "grad_norm": 1.7890625, "learning_rate": 0.00019767749010255416, "loss": 4.6222, "step": 1998 }, { "epoch": 0.2072724209216572, "grad_norm": 1.3984375, "learning_rate": 0.0001976751620838866, "loss": 4.5898, "step": 1999 }, { "epoch": 0.20737610897614528, "grad_norm": 2.09375, "learning_rate": 0.00019767283291275537, "loss": 4.5915, "step": 2000 }, { "epoch": 0.20747979703063335, "grad_norm": 1.640625, "learning_rate": 0.00019767050258918798, "loss": 4.5657, "step": 2001 }, { "epoch": 0.20758348508512142, "grad_norm": 2.0, "learning_rate": 0.00019766817111321186, "loss": 4.6221, "step": 2002 }, { "epoch": 0.2076871731396095, "grad_norm": 1.7265625, "learning_rate": 0.0001976658384848546, "loss": 4.6072, "step": 2003 }, { "epoch": 0.20779086119409756, "grad_norm": 1.796875, "learning_rate": 0.00019766350470414365, "loss": 4.576, "step": 2004 }, { "epoch": 0.20789454924858564, "grad_norm": 1.6328125, "learning_rate": 0.00019766116977110661, "loss": 4.5551, "step": 2005 }, { "epoch": 0.2079982373030737, "grad_norm": 1.6015625, "learning_rate": 0.000197658833685771, "loss": 4.5634, "step": 2006 }, { "epoch": 0.20810192535756178, "grad_norm": 1.4375, "learning_rate": 0.00019765649644816436, "loss": 4.5981, "step": 2007 }, { "epoch": 0.20820561341204985, "grad_norm": 1.6484375, "learning_rate": 0.0001976541580583143, "loss": 4.5633, "step": 2008 }, { "epoch": 0.20830930146653792, "grad_norm": 1.3828125, "learning_rate": 0.0001976518185162484, "loss": 4.6089, "step": 2009 }, { "epoch": 0.208412989521026, "grad_norm": 1.8125, "learning_rate": 0.00019764947782199426, "loss": 4.5937, "step": 2010 }, { "epoch": 0.20851667757551406, "grad_norm": 1.4140625, "learning_rate": 0.00019764713597557952, "loss": 4.5695, "step": 2011 }, { "epoch": 0.20862036563000214, "grad_norm": 2.078125, "learning_rate": 0.00019764479297703177, "loss": 4.6169, "step": 2012 }, { "epoch": 0.2087240536844902, "grad_norm": 1.8984375, "learning_rate": 0.00019764244882637867, "loss": 4.6056, "step": 2013 }, { "epoch": 0.20882774173897828, "grad_norm": 1.40625, "learning_rate": 0.00019764010352364792, "loss": 4.5951, "step": 2014 }, { "epoch": 0.20893142979346635, "grad_norm": 1.3046875, "learning_rate": 0.00019763775706886714, "loss": 4.5894, "step": 2015 }, { "epoch": 0.20903511784795442, "grad_norm": 1.375, "learning_rate": 0.00019763540946206404, "loss": 4.6161, "step": 2016 }, { "epoch": 0.2091388059024425, "grad_norm": 1.0625, "learning_rate": 0.00019763306070326632, "loss": 4.5434, "step": 2017 }, { "epoch": 0.20924249395693056, "grad_norm": 1.7734375, "learning_rate": 0.00019763071079250164, "loss": 4.5784, "step": 2018 }, { "epoch": 0.20934618201141864, "grad_norm": 1.453125, "learning_rate": 0.00019762835972979783, "loss": 4.5505, "step": 2019 }, { "epoch": 0.2094498700659067, "grad_norm": 1.671875, "learning_rate": 0.00019762600751518255, "loss": 4.5706, "step": 2020 }, { "epoch": 0.2095535581203948, "grad_norm": 1.46875, "learning_rate": 0.00019762365414868356, "loss": 4.5687, "step": 2021 }, { "epoch": 0.20965724617488288, "grad_norm": 1.78125, "learning_rate": 0.00019762129963032867, "loss": 4.5933, "step": 2022 }, { "epoch": 0.20976093422937095, "grad_norm": 1.4609375, "learning_rate": 0.0001976189439601456, "loss": 4.6114, "step": 2023 }, { "epoch": 0.20986462228385902, "grad_norm": 1.8203125, "learning_rate": 0.0001976165871381622, "loss": 4.5804, "step": 2024 }, { "epoch": 0.2099683103383471, "grad_norm": 1.5625, "learning_rate": 0.0001976142291644063, "loss": 4.6148, "step": 2025 }, { "epoch": 0.21007199839283516, "grad_norm": 1.8828125, "learning_rate": 0.00019761187003890563, "loss": 4.5602, "step": 2026 }, { "epoch": 0.21017568644732323, "grad_norm": 1.6796875, "learning_rate": 0.0001976095097616881, "loss": 4.6301, "step": 2027 }, { "epoch": 0.2102793745018113, "grad_norm": 1.5703125, "learning_rate": 0.00019760714833278148, "loss": 4.5808, "step": 2028 }, { "epoch": 0.21038306255629938, "grad_norm": 1.4453125, "learning_rate": 0.00019760478575221372, "loss": 4.5701, "step": 2029 }, { "epoch": 0.21048675061078745, "grad_norm": 1.40625, "learning_rate": 0.00019760242202001267, "loss": 4.5741, "step": 2030 }, { "epoch": 0.21059043866527552, "grad_norm": 1.1875, "learning_rate": 0.00019760005713620623, "loss": 4.5976, "step": 2031 }, { "epoch": 0.2106941267197636, "grad_norm": 1.3359375, "learning_rate": 0.00019759769110082223, "loss": 4.6137, "step": 2032 }, { "epoch": 0.21079781477425166, "grad_norm": 1.0859375, "learning_rate": 0.00019759532391388867, "loss": 4.5868, "step": 2033 }, { "epoch": 0.21090150282873973, "grad_norm": 1.8125, "learning_rate": 0.00019759295557543344, "loss": 4.5967, "step": 2034 }, { "epoch": 0.2110051908832278, "grad_norm": 1.421875, "learning_rate": 0.0001975905860854845, "loss": 4.5903, "step": 2035 }, { "epoch": 0.21110887893771588, "grad_norm": 2.015625, "learning_rate": 0.0001975882154440698, "loss": 4.6173, "step": 2036 }, { "epoch": 0.21121256699220395, "grad_norm": 1.8984375, "learning_rate": 0.00019758584365121734, "loss": 4.6118, "step": 2037 }, { "epoch": 0.21131625504669202, "grad_norm": 1.4609375, "learning_rate": 0.00019758347070695503, "loss": 4.5663, "step": 2038 }, { "epoch": 0.2114199431011801, "grad_norm": 1.4375, "learning_rate": 0.00019758109661131092, "loss": 4.5646, "step": 2039 }, { "epoch": 0.21152363115566816, "grad_norm": 1.296875, "learning_rate": 0.00019757872136431305, "loss": 4.6037, "step": 2040 }, { "epoch": 0.21162731921015623, "grad_norm": 1.1640625, "learning_rate": 0.0001975763449659894, "loss": 4.5981, "step": 2041 }, { "epoch": 0.2117310072646443, "grad_norm": 1.09375, "learning_rate": 0.00019757396741636803, "loss": 4.6128, "step": 2042 }, { "epoch": 0.21183469531913238, "grad_norm": 1.1875, "learning_rate": 0.00019757158871547699, "loss": 4.523, "step": 2043 }, { "epoch": 0.21193838337362048, "grad_norm": 0.94921875, "learning_rate": 0.00019756920886334432, "loss": 4.5688, "step": 2044 }, { "epoch": 0.21204207142810855, "grad_norm": 1.3671875, "learning_rate": 0.00019756682785999812, "loss": 4.5693, "step": 2045 }, { "epoch": 0.21214575948259662, "grad_norm": 0.96875, "learning_rate": 0.0001975644457054665, "loss": 4.5807, "step": 2046 }, { "epoch": 0.2122494475370847, "grad_norm": 1.7578125, "learning_rate": 0.00019756206239977751, "loss": 4.5522, "step": 2047 }, { "epoch": 0.21235313559157276, "grad_norm": 1.453125, "learning_rate": 0.00019755967794295938, "loss": 4.5546, "step": 2048 }, { "epoch": 0.21245682364606083, "grad_norm": 1.65625, "learning_rate": 0.00019755729233504013, "loss": 4.573, "step": 2049 }, { "epoch": 0.2125605117005489, "grad_norm": 1.390625, "learning_rate": 0.00019755490557604795, "loss": 4.5695, "step": 2050 }, { "epoch": 0.21266419975503698, "grad_norm": 1.859375, "learning_rate": 0.000197552517666011, "loss": 4.5793, "step": 2051 }, { "epoch": 0.21276788780952505, "grad_norm": 1.4375, "learning_rate": 0.00019755012860495747, "loss": 4.601, "step": 2052 }, { "epoch": 0.21287157586401312, "grad_norm": 2.109375, "learning_rate": 0.00019754773839291556, "loss": 4.6343, "step": 2053 }, { "epoch": 0.2129752639185012, "grad_norm": 1.96875, "learning_rate": 0.00019754534702991344, "loss": 4.6069, "step": 2054 }, { "epoch": 0.21307895197298926, "grad_norm": 1.25, "learning_rate": 0.00019754295451597932, "loss": 4.5174, "step": 2055 }, { "epoch": 0.21318264002747733, "grad_norm": 1.296875, "learning_rate": 0.00019754056085114144, "loss": 4.5313, "step": 2056 }, { "epoch": 0.2132863280819654, "grad_norm": 1.15625, "learning_rate": 0.0001975381660354281, "loss": 4.5743, "step": 2057 }, { "epoch": 0.21339001613645348, "grad_norm": 1.140625, "learning_rate": 0.00019753577006886744, "loss": 4.5637, "step": 2058 }, { "epoch": 0.21349370419094155, "grad_norm": 1.1171875, "learning_rate": 0.0001975333729514878, "loss": 4.5662, "step": 2059 }, { "epoch": 0.21359739224542962, "grad_norm": 0.984375, "learning_rate": 0.00019753097468331747, "loss": 4.5688, "step": 2060 }, { "epoch": 0.2137010802999177, "grad_norm": 1.15625, "learning_rate": 0.00019752857526438472, "loss": 4.5697, "step": 2061 }, { "epoch": 0.21380476835440576, "grad_norm": 0.83203125, "learning_rate": 0.0001975261746947179, "loss": 4.6017, "step": 2062 }, { "epoch": 0.21390845640889383, "grad_norm": 1.203125, "learning_rate": 0.0001975237729743453, "loss": 4.5543, "step": 2063 }, { "epoch": 0.2140121444633819, "grad_norm": 0.859375, "learning_rate": 0.00019752137010329527, "loss": 4.5657, "step": 2064 }, { "epoch": 0.21411583251786998, "grad_norm": 1.4609375, "learning_rate": 0.00019751896608159614, "loss": 4.5717, "step": 2065 }, { "epoch": 0.21421952057235807, "grad_norm": 1.1015625, "learning_rate": 0.0001975165609092763, "loss": 4.5827, "step": 2066 }, { "epoch": 0.21432320862684615, "grad_norm": 1.90625, "learning_rate": 0.00019751415458636414, "loss": 4.5571, "step": 2067 }, { "epoch": 0.21442689668133422, "grad_norm": 1.90625, "learning_rate": 0.000197511747112888, "loss": 4.6079, "step": 2068 }, { "epoch": 0.2145305847358223, "grad_norm": 0.91796875, "learning_rate": 0.00019750933848887634, "loss": 4.5673, "step": 2069 }, { "epoch": 0.21463427279031036, "grad_norm": 1.5390625, "learning_rate": 0.00019750692871435755, "loss": 4.5605, "step": 2070 }, { "epoch": 0.21473796084479843, "grad_norm": 1.1015625, "learning_rate": 0.00019750451778936007, "loss": 4.5711, "step": 2071 }, { "epoch": 0.2148416488992865, "grad_norm": 1.578125, "learning_rate": 0.00019750210571391232, "loss": 4.563, "step": 2072 }, { "epoch": 0.21494533695377457, "grad_norm": 1.5078125, "learning_rate": 0.00019749969248804283, "loss": 4.5555, "step": 2073 }, { "epoch": 0.21504902500826265, "grad_norm": 1.015625, "learning_rate": 0.00019749727811178, "loss": 4.589, "step": 2074 }, { "epoch": 0.21515271306275072, "grad_norm": 1.421875, "learning_rate": 0.00019749486258515238, "loss": 4.5742, "step": 2075 }, { "epoch": 0.2152564011172388, "grad_norm": 1.0859375, "learning_rate": 0.00019749244590818842, "loss": 4.5774, "step": 2076 }, { "epoch": 0.21536008917172686, "grad_norm": 1.7421875, "learning_rate": 0.00019749002808091667, "loss": 4.5611, "step": 2077 }, { "epoch": 0.21546377722621493, "grad_norm": 1.5234375, "learning_rate": 0.00019748760910336558, "loss": 4.551, "step": 2078 }, { "epoch": 0.215567465280703, "grad_norm": 1.53125, "learning_rate": 0.00019748518897556383, "loss": 4.6089, "step": 2079 }, { "epoch": 0.21567115333519107, "grad_norm": 1.1796875, "learning_rate": 0.00019748276769753983, "loss": 4.5747, "step": 2080 }, { "epoch": 0.21577484138967915, "grad_norm": 1.515625, "learning_rate": 0.00019748034526932226, "loss": 4.5606, "step": 2081 }, { "epoch": 0.21587852944416722, "grad_norm": 0.9375, "learning_rate": 0.00019747792169093963, "loss": 4.5935, "step": 2082 }, { "epoch": 0.2159822174986553, "grad_norm": 1.8125, "learning_rate": 0.00019747549696242059, "loss": 4.6101, "step": 2083 }, { "epoch": 0.21608590555314336, "grad_norm": 1.59375, "learning_rate": 0.0001974730710837937, "loss": 4.5796, "step": 2084 }, { "epoch": 0.21618959360763143, "grad_norm": 1.7890625, "learning_rate": 0.00019747064405508763, "loss": 4.5739, "step": 2085 }, { "epoch": 0.2162932816621195, "grad_norm": 1.5390625, "learning_rate": 0.00019746821587633099, "loss": 4.5887, "step": 2086 }, { "epoch": 0.21639696971660757, "grad_norm": 1.8359375, "learning_rate": 0.0001974657865475524, "loss": 4.544, "step": 2087 }, { "epoch": 0.21650065777109567, "grad_norm": 1.6875, "learning_rate": 0.00019746335606878054, "loss": 4.5873, "step": 2088 }, { "epoch": 0.21660434582558374, "grad_norm": 1.625, "learning_rate": 0.00019746092444004412, "loss": 4.6125, "step": 2089 }, { "epoch": 0.21670803388007182, "grad_norm": 1.453125, "learning_rate": 0.00019745849166137185, "loss": 4.5585, "step": 2090 }, { "epoch": 0.2168117219345599, "grad_norm": 1.453125, "learning_rate": 0.00019745605773279236, "loss": 4.5969, "step": 2091 }, { "epoch": 0.21691540998904796, "grad_norm": 1.203125, "learning_rate": 0.00019745362265433442, "loss": 4.5379, "step": 2092 }, { "epoch": 0.21701909804353603, "grad_norm": 1.7578125, "learning_rate": 0.00019745118642602673, "loss": 4.5126, "step": 2093 }, { "epoch": 0.2171227860980241, "grad_norm": 1.484375, "learning_rate": 0.00019744874904789806, "loss": 4.5135, "step": 2094 }, { "epoch": 0.21722647415251217, "grad_norm": 1.8359375, "learning_rate": 0.00019744631051997718, "loss": 4.5868, "step": 2095 }, { "epoch": 0.21733016220700024, "grad_norm": 1.640625, "learning_rate": 0.0001974438708422928, "loss": 4.5675, "step": 2096 }, { "epoch": 0.21743385026148832, "grad_norm": 1.453125, "learning_rate": 0.00019744143001487378, "loss": 4.5875, "step": 2097 }, { "epoch": 0.2175375383159764, "grad_norm": 1.34375, "learning_rate": 0.00019743898803774884, "loss": 4.6141, "step": 2098 }, { "epoch": 0.21764122637046446, "grad_norm": 1.4140625, "learning_rate": 0.0001974365449109469, "loss": 4.5761, "step": 2099 }, { "epoch": 0.21774491442495253, "grad_norm": 1.2734375, "learning_rate": 0.0001974341006344967, "loss": 4.5581, "step": 2100 }, { "epoch": 0.2178486024794406, "grad_norm": 1.1484375, "learning_rate": 0.0001974316552084271, "loss": 4.5847, "step": 2101 }, { "epoch": 0.21795229053392867, "grad_norm": 1.7109375, "learning_rate": 0.00019742920863276693, "loss": 4.5691, "step": 2102 }, { "epoch": 0.21805597858841674, "grad_norm": 1.234375, "learning_rate": 0.00019742676090754512, "loss": 4.5477, "step": 2103 }, { "epoch": 0.21815966664290481, "grad_norm": 2.421875, "learning_rate": 0.0001974243120327905, "loss": 4.5926, "step": 2104 }, { "epoch": 0.2182633546973929, "grad_norm": 2.1875, "learning_rate": 0.000197421862008532, "loss": 4.588, "step": 2105 }, { "epoch": 0.21836704275188096, "grad_norm": 1.5703125, "learning_rate": 0.0001974194108347985, "loss": 4.5484, "step": 2106 }, { "epoch": 0.21847073080636903, "grad_norm": 1.515625, "learning_rate": 0.00019741695851161893, "loss": 4.6028, "step": 2107 }, { "epoch": 0.2185744188608571, "grad_norm": 1.6171875, "learning_rate": 0.00019741450503902222, "loss": 4.6113, "step": 2108 }, { "epoch": 0.21867810691534517, "grad_norm": 1.1171875, "learning_rate": 0.00019741205041703733, "loss": 4.5785, "step": 2109 }, { "epoch": 0.21878179496983324, "grad_norm": 2.265625, "learning_rate": 0.0001974095946456932, "loss": 4.5964, "step": 2110 }, { "epoch": 0.21888548302432134, "grad_norm": 2.09375, "learning_rate": 0.0001974071377250188, "loss": 4.5467, "step": 2111 }, { "epoch": 0.2189891710788094, "grad_norm": 1.34375, "learning_rate": 0.0001974046796550432, "loss": 4.5953, "step": 2112 }, { "epoch": 0.21909285913329749, "grad_norm": 1.3359375, "learning_rate": 0.00019740222043579527, "loss": 4.6259, "step": 2113 }, { "epoch": 0.21919654718778556, "grad_norm": 1.421875, "learning_rate": 0.00019739976006730414, "loss": 4.5235, "step": 2114 }, { "epoch": 0.21930023524227363, "grad_norm": 1.140625, "learning_rate": 0.0001973972985495988, "loss": 4.5858, "step": 2115 }, { "epoch": 0.2194039232967617, "grad_norm": 1.84375, "learning_rate": 0.00019739483588270828, "loss": 4.5866, "step": 2116 }, { "epoch": 0.21950761135124977, "grad_norm": 1.484375, "learning_rate": 0.00019739237206666164, "loss": 4.6022, "step": 2117 }, { "epoch": 0.21961129940573784, "grad_norm": 1.8828125, "learning_rate": 0.00019738990710148796, "loss": 4.6095, "step": 2118 }, { "epoch": 0.2197149874602259, "grad_norm": 1.7265625, "learning_rate": 0.00019738744098721632, "loss": 4.5528, "step": 2119 }, { "epoch": 0.21981867551471398, "grad_norm": 1.6796875, "learning_rate": 0.00019738497372387586, "loss": 4.5543, "step": 2120 }, { "epoch": 0.21992236356920206, "grad_norm": 1.4453125, "learning_rate": 0.0001973825053114956, "loss": 4.5641, "step": 2121 }, { "epoch": 0.22002605162369013, "grad_norm": 1.75, "learning_rate": 0.00019738003575010474, "loss": 4.5842, "step": 2122 }, { "epoch": 0.2201297396781782, "grad_norm": 1.3203125, "learning_rate": 0.0001973775650397324, "loss": 4.5878, "step": 2123 }, { "epoch": 0.22023342773266627, "grad_norm": 2.21875, "learning_rate": 0.0001973750931804077, "loss": 4.5831, "step": 2124 }, { "epoch": 0.22033711578715434, "grad_norm": 1.984375, "learning_rate": 0.00019737262017215982, "loss": 4.5747, "step": 2125 }, { "epoch": 0.2204408038416424, "grad_norm": 1.546875, "learning_rate": 0.000197370146015018, "loss": 4.5351, "step": 2126 }, { "epoch": 0.22054449189613048, "grad_norm": 1.5390625, "learning_rate": 0.00019736767070901133, "loss": 4.5474, "step": 2127 }, { "epoch": 0.22064817995061856, "grad_norm": 1.34375, "learning_rate": 0.00019736519425416908, "loss": 4.6022, "step": 2128 }, { "epoch": 0.22075186800510663, "grad_norm": 1.21875, "learning_rate": 0.00019736271665052047, "loss": 4.5988, "step": 2129 }, { "epoch": 0.2208555560595947, "grad_norm": 1.5390625, "learning_rate": 0.00019736023789809472, "loss": 4.5532, "step": 2130 }, { "epoch": 0.22095924411408277, "grad_norm": 1.296875, "learning_rate": 0.0001973577579969211, "loss": 4.5863, "step": 2131 }, { "epoch": 0.22106293216857084, "grad_norm": 2.0, "learning_rate": 0.0001973552769470288, "loss": 4.5876, "step": 2132 }, { "epoch": 0.22116662022305894, "grad_norm": 1.8125, "learning_rate": 0.00019735279474844718, "loss": 4.6137, "step": 2133 }, { "epoch": 0.221270308277547, "grad_norm": 1.4375, "learning_rate": 0.00019735031140120547, "loss": 4.5873, "step": 2134 }, { "epoch": 0.22137399633203508, "grad_norm": 1.3515625, "learning_rate": 0.00019734782690533298, "loss": 4.605, "step": 2135 }, { "epoch": 0.22147768438652315, "grad_norm": 1.4921875, "learning_rate": 0.00019734534126085904, "loss": 4.603, "step": 2136 }, { "epoch": 0.22158137244101123, "grad_norm": 1.34375, "learning_rate": 0.00019734285446781297, "loss": 4.5563, "step": 2137 }, { "epoch": 0.2216850604954993, "grad_norm": 1.5078125, "learning_rate": 0.00019734036652622412, "loss": 4.5683, "step": 2138 }, { "epoch": 0.22178874854998737, "grad_norm": 1.421875, "learning_rate": 0.00019733787743612185, "loss": 4.5973, "step": 2139 }, { "epoch": 0.22189243660447544, "grad_norm": 1.40625, "learning_rate": 0.00019733538719753552, "loss": 4.6036, "step": 2140 }, { "epoch": 0.2219961246589635, "grad_norm": 1.2109375, "learning_rate": 0.00019733289581049448, "loss": 4.5687, "step": 2141 }, { "epoch": 0.22209981271345158, "grad_norm": 1.421875, "learning_rate": 0.00019733040327502815, "loss": 4.6214, "step": 2142 }, { "epoch": 0.22220350076793965, "grad_norm": 1.203125, "learning_rate": 0.000197327909591166, "loss": 4.5759, "step": 2143 }, { "epoch": 0.22230718882242773, "grad_norm": 1.5078125, "learning_rate": 0.00019732541475893733, "loss": 4.5674, "step": 2144 }, { "epoch": 0.2224108768769158, "grad_norm": 1.3125, "learning_rate": 0.00019732291877837165, "loss": 4.5504, "step": 2145 }, { "epoch": 0.22251456493140387, "grad_norm": 1.4296875, "learning_rate": 0.00019732042164949845, "loss": 4.5623, "step": 2146 }, { "epoch": 0.22261825298589194, "grad_norm": 1.1875, "learning_rate": 0.0001973179233723471, "loss": 4.5639, "step": 2147 }, { "epoch": 0.22272194104038, "grad_norm": 1.3671875, "learning_rate": 0.00019731542394694715, "loss": 4.5888, "step": 2148 }, { "epoch": 0.22282562909486808, "grad_norm": 1.171875, "learning_rate": 0.00019731292337332807, "loss": 4.5787, "step": 2149 }, { "epoch": 0.22292931714935615, "grad_norm": 1.46875, "learning_rate": 0.00019731042165151936, "loss": 4.5718, "step": 2150 }, { "epoch": 0.22303300520384423, "grad_norm": 1.21875, "learning_rate": 0.00019730791878155052, "loss": 4.5511, "step": 2151 }, { "epoch": 0.2231366932583323, "grad_norm": 1.484375, "learning_rate": 0.0001973054147634511, "loss": 4.588, "step": 2152 }, { "epoch": 0.22324038131282037, "grad_norm": 1.4140625, "learning_rate": 0.00019730290959725063, "loss": 4.5352, "step": 2153 }, { "epoch": 0.22334406936730844, "grad_norm": 1.484375, "learning_rate": 0.0001973004032829787, "loss": 4.5747, "step": 2154 }, { "epoch": 0.22344775742179654, "grad_norm": 1.3046875, "learning_rate": 0.00019729789582066486, "loss": 4.5886, "step": 2155 }, { "epoch": 0.2235514454762846, "grad_norm": 1.46875, "learning_rate": 0.0001972953872103387, "loss": 4.5761, "step": 2156 }, { "epoch": 0.22365513353077268, "grad_norm": 1.25, "learning_rate": 0.0001972928774520298, "loss": 4.5831, "step": 2157 }, { "epoch": 0.22375882158526075, "grad_norm": 1.5078125, "learning_rate": 0.0001972903665457678, "loss": 4.5881, "step": 2158 }, { "epoch": 0.22386250963974882, "grad_norm": 1.2421875, "learning_rate": 0.00019728785449158232, "loss": 4.5763, "step": 2159 }, { "epoch": 0.2239661976942369, "grad_norm": 1.546875, "learning_rate": 0.00019728534128950299, "loss": 4.5638, "step": 2160 }, { "epoch": 0.22406988574872497, "grad_norm": 1.1171875, "learning_rate": 0.00019728282693955946, "loss": 4.5464, "step": 2161 }, { "epoch": 0.22417357380321304, "grad_norm": 1.78125, "learning_rate": 0.00019728031144178142, "loss": 4.589, "step": 2162 }, { "epoch": 0.2242772618577011, "grad_norm": 1.4296875, "learning_rate": 0.00019727779479619852, "loss": 4.5857, "step": 2163 }, { "epoch": 0.22438094991218918, "grad_norm": 1.984375, "learning_rate": 0.00019727527700284046, "loss": 4.5437, "step": 2164 }, { "epoch": 0.22448463796667725, "grad_norm": 1.7890625, "learning_rate": 0.00019727275806173696, "loss": 4.573, "step": 2165 }, { "epoch": 0.22458832602116532, "grad_norm": 1.421875, "learning_rate": 0.00019727023797291778, "loss": 4.5802, "step": 2166 }, { "epoch": 0.2246920140756534, "grad_norm": 1.359375, "learning_rate": 0.00019726771673641256, "loss": 4.5805, "step": 2167 }, { "epoch": 0.22479570213014147, "grad_norm": 1.65625, "learning_rate": 0.00019726519435225113, "loss": 4.593, "step": 2168 }, { "epoch": 0.22489939018462954, "grad_norm": 1.4296875, "learning_rate": 0.0001972626708204632, "loss": 4.5521, "step": 2169 }, { "epoch": 0.2250030782391176, "grad_norm": 1.546875, "learning_rate": 0.00019726014614107856, "loss": 4.5509, "step": 2170 }, { "epoch": 0.22510676629360568, "grad_norm": 1.484375, "learning_rate": 0.00019725762031412702, "loss": 4.5501, "step": 2171 }, { "epoch": 0.22521045434809375, "grad_norm": 1.515625, "learning_rate": 0.0001972550933396384, "loss": 4.6034, "step": 2172 }, { "epoch": 0.22531414240258182, "grad_norm": 1.328125, "learning_rate": 0.0001972525652176424, "loss": 4.5989, "step": 2173 }, { "epoch": 0.2254178304570699, "grad_norm": 1.4921875, "learning_rate": 0.000197250035948169, "loss": 4.6182, "step": 2174 }, { "epoch": 0.22552151851155797, "grad_norm": 1.296875, "learning_rate": 0.00019724750553124794, "loss": 4.54, "step": 2175 }, { "epoch": 0.22562520656604604, "grad_norm": 1.5625, "learning_rate": 0.00019724497396690911, "loss": 4.6102, "step": 2176 }, { "epoch": 0.2257288946205341, "grad_norm": 1.3984375, "learning_rate": 0.0001972424412551824, "loss": 4.6087, "step": 2177 }, { "epoch": 0.2258325826750222, "grad_norm": 1.484375, "learning_rate": 0.00019723990739609765, "loss": 4.5766, "step": 2178 }, { "epoch": 0.22593627072951028, "grad_norm": 1.3046875, "learning_rate": 0.0001972373723896848, "loss": 4.5922, "step": 2179 }, { "epoch": 0.22603995878399835, "grad_norm": 1.5625, "learning_rate": 0.0001972348362359737, "loss": 4.6109, "step": 2180 }, { "epoch": 0.22614364683848642, "grad_norm": 1.296875, "learning_rate": 0.00019723229893499436, "loss": 4.5587, "step": 2181 }, { "epoch": 0.2262473348929745, "grad_norm": 1.6796875, "learning_rate": 0.00019722976048677668, "loss": 4.6016, "step": 2182 }, { "epoch": 0.22635102294746257, "grad_norm": 1.46875, "learning_rate": 0.00019722722089135058, "loss": 4.6004, "step": 2183 }, { "epoch": 0.22645471100195064, "grad_norm": 1.40625, "learning_rate": 0.00019722468014874602, "loss": 4.5439, "step": 2184 }, { "epoch": 0.2265583990564387, "grad_norm": 1.328125, "learning_rate": 0.00019722213825899306, "loss": 4.538, "step": 2185 }, { "epoch": 0.22666208711092678, "grad_norm": 1.421875, "learning_rate": 0.0001972195952221216, "loss": 4.5763, "step": 2186 }, { "epoch": 0.22676577516541485, "grad_norm": 1.2421875, "learning_rate": 0.00019721705103816167, "loss": 4.5971, "step": 2187 }, { "epoch": 0.22686946321990292, "grad_norm": 1.3671875, "learning_rate": 0.00019721450570714332, "loss": 4.5941, "step": 2188 }, { "epoch": 0.226973151274391, "grad_norm": 1.2578125, "learning_rate": 0.00019721195922909658, "loss": 4.5869, "step": 2189 }, { "epoch": 0.22707683932887907, "grad_norm": 1.515625, "learning_rate": 0.00019720941160405146, "loss": 4.5875, "step": 2190 }, { "epoch": 0.22718052738336714, "grad_norm": 1.2421875, "learning_rate": 0.00019720686283203803, "loss": 4.5523, "step": 2191 }, { "epoch": 0.2272842154378552, "grad_norm": 1.5078125, "learning_rate": 0.0001972043129130864, "loss": 4.5619, "step": 2192 }, { "epoch": 0.22738790349234328, "grad_norm": 1.2890625, "learning_rate": 0.00019720176184722662, "loss": 4.55, "step": 2193 }, { "epoch": 0.22749159154683135, "grad_norm": 1.9375, "learning_rate": 0.00019719920963448876, "loss": 4.5597, "step": 2194 }, { "epoch": 0.22759527960131942, "grad_norm": 1.609375, "learning_rate": 0.000197196656274903, "loss": 4.5254, "step": 2195 }, { "epoch": 0.2276989676558075, "grad_norm": 1.6875, "learning_rate": 0.00019719410176849943, "loss": 4.5352, "step": 2196 }, { "epoch": 0.22780265571029557, "grad_norm": 1.484375, "learning_rate": 0.0001971915461153082, "loss": 4.5494, "step": 2197 }, { "epoch": 0.22790634376478364, "grad_norm": 1.421875, "learning_rate": 0.00019718898931535948, "loss": 4.5682, "step": 2198 }, { "epoch": 0.2280100318192717, "grad_norm": 1.2734375, "learning_rate": 0.0001971864313686834, "loss": 4.562, "step": 2199 }, { "epoch": 0.2281137198737598, "grad_norm": 1.5, "learning_rate": 0.00019718387227531014, "loss": 4.5403, "step": 2200 }, { "epoch": 0.22821740792824788, "grad_norm": 1.21875, "learning_rate": 0.00019718131203526996, "loss": 4.5693, "step": 2201 }, { "epoch": 0.22832109598273595, "grad_norm": 1.4140625, "learning_rate": 0.00019717875064859298, "loss": 4.5906, "step": 2202 }, { "epoch": 0.22842478403722402, "grad_norm": 1.203125, "learning_rate": 0.0001971761881153095, "loss": 4.535, "step": 2203 }, { "epoch": 0.2285284720917121, "grad_norm": 1.6171875, "learning_rate": 0.0001971736244354497, "loss": 4.5743, "step": 2204 }, { "epoch": 0.22863216014620016, "grad_norm": 1.2421875, "learning_rate": 0.00019717105960904386, "loss": 4.5418, "step": 2205 }, { "epoch": 0.22873584820068824, "grad_norm": 1.7421875, "learning_rate": 0.00019716849363612222, "loss": 4.581, "step": 2206 }, { "epoch": 0.2288395362551763, "grad_norm": 1.375, "learning_rate": 0.00019716592651671506, "loss": 4.5945, "step": 2207 }, { "epoch": 0.22894322430966438, "grad_norm": 1.90625, "learning_rate": 0.00019716335825085269, "loss": 4.5853, "step": 2208 }, { "epoch": 0.22904691236415245, "grad_norm": 1.8359375, "learning_rate": 0.0001971607888385654, "loss": 4.574, "step": 2209 }, { "epoch": 0.22915060041864052, "grad_norm": 1.4140625, "learning_rate": 0.0001971582182798835, "loss": 4.5358, "step": 2210 }, { "epoch": 0.2292542884731286, "grad_norm": 1.34375, "learning_rate": 0.0001971556465748373, "loss": 4.5675, "step": 2211 }, { "epoch": 0.22935797652761666, "grad_norm": 1.546875, "learning_rate": 0.0001971530737234572, "loss": 4.5541, "step": 2212 }, { "epoch": 0.22946166458210474, "grad_norm": 1.328125, "learning_rate": 0.00019715049972577353, "loss": 4.5301, "step": 2213 }, { "epoch": 0.2295653526365928, "grad_norm": 1.5703125, "learning_rate": 0.00019714792458181663, "loss": 4.5643, "step": 2214 }, { "epoch": 0.22966904069108088, "grad_norm": 1.4140625, "learning_rate": 0.00019714534829161693, "loss": 4.5358, "step": 2215 }, { "epoch": 0.22977272874556895, "grad_norm": 1.4609375, "learning_rate": 0.0001971427708552048, "loss": 4.5537, "step": 2216 }, { "epoch": 0.22987641680005702, "grad_norm": 1.3046875, "learning_rate": 0.00019714019227261067, "loss": 4.593, "step": 2217 }, { "epoch": 0.2299801048545451, "grad_norm": 1.59375, "learning_rate": 0.00019713761254386495, "loss": 4.5416, "step": 2218 }, { "epoch": 0.23008379290903316, "grad_norm": 1.375, "learning_rate": 0.00019713503166899807, "loss": 4.5677, "step": 2219 }, { "epoch": 0.23018748096352123, "grad_norm": 1.5390625, "learning_rate": 0.0001971324496480405, "loss": 4.5802, "step": 2220 }, { "epoch": 0.2302911690180093, "grad_norm": 1.4609375, "learning_rate": 0.0001971298664810227, "loss": 4.516, "step": 2221 }, { "epoch": 0.2303948570724974, "grad_norm": 1.3828125, "learning_rate": 0.00019712728216797514, "loss": 4.5601, "step": 2222 }, { "epoch": 0.23049854512698548, "grad_norm": 1.1796875, "learning_rate": 0.0001971246967089283, "loss": 4.5543, "step": 2223 }, { "epoch": 0.23060223318147355, "grad_norm": 1.515625, "learning_rate": 0.00019712211010391274, "loss": 4.5348, "step": 2224 }, { "epoch": 0.23070592123596162, "grad_norm": 1.2734375, "learning_rate": 0.0001971195223529589, "loss": 4.5637, "step": 2225 }, { "epoch": 0.2308096092904497, "grad_norm": 1.90625, "learning_rate": 0.00019711693345609739, "loss": 4.608, "step": 2226 }, { "epoch": 0.23091329734493776, "grad_norm": 1.671875, "learning_rate": 0.0001971143434133587, "loss": 4.5851, "step": 2227 }, { "epoch": 0.23101698539942583, "grad_norm": 1.7109375, "learning_rate": 0.00019711175222477344, "loss": 4.5839, "step": 2228 }, { "epoch": 0.2311206734539139, "grad_norm": 1.546875, "learning_rate": 0.00019710915989037213, "loss": 4.5727, "step": 2229 }, { "epoch": 0.23122436150840198, "grad_norm": 1.390625, "learning_rate": 0.0001971065664101854, "loss": 4.5646, "step": 2230 }, { "epoch": 0.23132804956289005, "grad_norm": 1.3828125, "learning_rate": 0.00019710397178424383, "loss": 4.5607, "step": 2231 }, { "epoch": 0.23143173761737812, "grad_norm": 1.1875, "learning_rate": 0.00019710137601257804, "loss": 4.5645, "step": 2232 }, { "epoch": 0.2315354256718662, "grad_norm": 1.1015625, "learning_rate": 0.00019709877909521864, "loss": 4.5555, "step": 2233 }, { "epoch": 0.23163911372635426, "grad_norm": 1.1171875, "learning_rate": 0.0001970961810321963, "loss": 4.5427, "step": 2234 }, { "epoch": 0.23174280178084233, "grad_norm": 0.9140625, "learning_rate": 0.00019709358182354162, "loss": 4.5674, "step": 2235 }, { "epoch": 0.2318464898353304, "grad_norm": 1.171875, "learning_rate": 0.00019709098146928535, "loss": 4.53, "step": 2236 }, { "epoch": 0.23195017788981848, "grad_norm": 0.8515625, "learning_rate": 0.0001970883799694581, "loss": 4.5445, "step": 2237 }, { "epoch": 0.23205386594430655, "grad_norm": 1.375, "learning_rate": 0.00019708577732409062, "loss": 4.5639, "step": 2238 }, { "epoch": 0.23215755399879462, "grad_norm": 1.0703125, "learning_rate": 0.00019708317353321357, "loss": 4.5738, "step": 2239 }, { "epoch": 0.2322612420532827, "grad_norm": 1.609375, "learning_rate": 0.0001970805685968577, "loss": 4.5423, "step": 2240 }, { "epoch": 0.23236493010777076, "grad_norm": 1.5546875, "learning_rate": 0.00019707796251505375, "loss": 4.5786, "step": 2241 }, { "epoch": 0.23246861816225883, "grad_norm": 1.0078125, "learning_rate": 0.00019707535528783244, "loss": 4.562, "step": 2242 }, { "epoch": 0.2325723062167469, "grad_norm": 1.09375, "learning_rate": 0.00019707274691522456, "loss": 4.547, "step": 2243 }, { "epoch": 0.23267599427123498, "grad_norm": 0.99609375, "learning_rate": 0.0001970701373972609, "loss": 4.5492, "step": 2244 }, { "epoch": 0.23277968232572308, "grad_norm": 0.9296875, "learning_rate": 0.00019706752673397218, "loss": 4.5297, "step": 2245 }, { "epoch": 0.23288337038021115, "grad_norm": 0.90625, "learning_rate": 0.00019706491492538927, "loss": 4.5933, "step": 2246 }, { "epoch": 0.23298705843469922, "grad_norm": 0.8984375, "learning_rate": 0.00019706230197154298, "loss": 4.5682, "step": 2247 }, { "epoch": 0.2330907464891873, "grad_norm": 0.83984375, "learning_rate": 0.00019705968787246412, "loss": 4.5609, "step": 2248 }, { "epoch": 0.23319443454367536, "grad_norm": 0.828125, "learning_rate": 0.00019705707262818354, "loss": 4.5367, "step": 2249 }, { "epoch": 0.23329812259816343, "grad_norm": 0.80078125, "learning_rate": 0.0001970544562387321, "loss": 4.5668, "step": 2250 }, { "epoch": 0.2334018106526515, "grad_norm": 0.78515625, "learning_rate": 0.00019705183870414062, "loss": 4.5505, "step": 2251 }, { "epoch": 0.23350549870713957, "grad_norm": 0.73828125, "learning_rate": 0.00019704922002444008, "loss": 4.5626, "step": 2252 }, { "epoch": 0.23360918676162765, "grad_norm": 0.71875, "learning_rate": 0.00019704660019966133, "loss": 4.5656, "step": 2253 }, { "epoch": 0.23371287481611572, "grad_norm": 0.6953125, "learning_rate": 0.00019704397922983526, "loss": 4.5666, "step": 2254 }, { "epoch": 0.2338165628706038, "grad_norm": 0.6328125, "learning_rate": 0.00019704135711499286, "loss": 4.5267, "step": 2255 }, { "epoch": 0.23392025092509186, "grad_norm": 0.6328125, "learning_rate": 0.00019703873385516497, "loss": 4.5764, "step": 2256 }, { "epoch": 0.23402393897957993, "grad_norm": 0.6328125, "learning_rate": 0.0001970361094503826, "loss": 4.5261, "step": 2257 }, { "epoch": 0.234127627034068, "grad_norm": 0.62109375, "learning_rate": 0.00019703348390067674, "loss": 4.5832, "step": 2258 }, { "epoch": 0.23423131508855607, "grad_norm": 0.6171875, "learning_rate": 0.0001970308572060783, "loss": 4.5381, "step": 2259 }, { "epoch": 0.23433500314304415, "grad_norm": 0.62890625, "learning_rate": 0.00019702822936661836, "loss": 4.5449, "step": 2260 }, { "epoch": 0.23443869119753222, "grad_norm": 0.609375, "learning_rate": 0.00019702560038232782, "loss": 4.5032, "step": 2261 }, { "epoch": 0.2345423792520203, "grad_norm": 0.609375, "learning_rate": 0.0001970229702532378, "loss": 4.5321, "step": 2262 }, { "epoch": 0.23464606730650836, "grad_norm": 0.60546875, "learning_rate": 0.00019702033897937927, "loss": 4.5649, "step": 2263 }, { "epoch": 0.23474975536099643, "grad_norm": 0.5390625, "learning_rate": 0.0001970177065607833, "loss": 4.6028, "step": 2264 }, { "epoch": 0.2348534434154845, "grad_norm": 0.55859375, "learning_rate": 0.00019701507299748095, "loss": 4.539, "step": 2265 }, { "epoch": 0.23495713146997257, "grad_norm": 0.5390625, "learning_rate": 0.00019701243828950329, "loss": 4.5593, "step": 2266 }, { "epoch": 0.23506081952446067, "grad_norm": 0.5703125, "learning_rate": 0.0001970098024368814, "loss": 4.5882, "step": 2267 }, { "epoch": 0.23516450757894874, "grad_norm": 0.55859375, "learning_rate": 0.00019700716543964638, "loss": 4.5798, "step": 2268 }, { "epoch": 0.23526819563343682, "grad_norm": 0.56640625, "learning_rate": 0.00019700452729782934, "loss": 4.5523, "step": 2269 }, { "epoch": 0.2353718836879249, "grad_norm": 0.59375, "learning_rate": 0.0001970018880114614, "loss": 4.5734, "step": 2270 }, { "epoch": 0.23547557174241296, "grad_norm": 0.5546875, "learning_rate": 0.00019699924758057377, "loss": 4.5664, "step": 2271 }, { "epoch": 0.23557925979690103, "grad_norm": 0.5703125, "learning_rate": 0.00019699660600519753, "loss": 4.557, "step": 2272 }, { "epoch": 0.2356829478513891, "grad_norm": 0.58203125, "learning_rate": 0.00019699396328536384, "loss": 4.5388, "step": 2273 }, { "epoch": 0.23578663590587717, "grad_norm": 0.59765625, "learning_rate": 0.00019699131942110397, "loss": 4.5299, "step": 2274 }, { "epoch": 0.23589032396036524, "grad_norm": 0.6015625, "learning_rate": 0.000196988674412449, "loss": 4.5497, "step": 2275 }, { "epoch": 0.23599401201485332, "grad_norm": 0.58203125, "learning_rate": 0.0001969860282594302, "loss": 4.5566, "step": 2276 }, { "epoch": 0.2360977000693414, "grad_norm": 0.578125, "learning_rate": 0.00019698338096207883, "loss": 4.5407, "step": 2277 }, { "epoch": 0.23620138812382946, "grad_norm": 0.5625, "learning_rate": 0.00019698073252042605, "loss": 4.5606, "step": 2278 }, { "epoch": 0.23630507617831753, "grad_norm": 0.58984375, "learning_rate": 0.00019697808293450312, "loss": 4.5585, "step": 2279 }, { "epoch": 0.2364087642328056, "grad_norm": 0.482421875, "learning_rate": 0.00019697543220434133, "loss": 4.5512, "step": 2280 }, { "epoch": 0.23651245228729367, "grad_norm": 0.58203125, "learning_rate": 0.00019697278032997198, "loss": 4.5865, "step": 2281 }, { "epoch": 0.23661614034178174, "grad_norm": 0.51171875, "learning_rate": 0.0001969701273114263, "loss": 4.5634, "step": 2282 }, { "epoch": 0.23671982839626982, "grad_norm": 0.5703125, "learning_rate": 0.0001969674731487356, "loss": 4.5198, "step": 2283 }, { "epoch": 0.2368235164507579, "grad_norm": 0.53515625, "learning_rate": 0.00019696481784193127, "loss": 4.4959, "step": 2284 }, { "epoch": 0.23692720450524596, "grad_norm": 0.5234375, "learning_rate": 0.00019696216139104453, "loss": 4.5441, "step": 2285 }, { "epoch": 0.23703089255973403, "grad_norm": 0.52734375, "learning_rate": 0.00019695950379610682, "loss": 4.5769, "step": 2286 }, { "epoch": 0.2371345806142221, "grad_norm": 0.5, "learning_rate": 0.00019695684505714942, "loss": 4.5464, "step": 2287 }, { "epoch": 0.23723826866871017, "grad_norm": 0.5546875, "learning_rate": 0.00019695418517420377, "loss": 4.545, "step": 2288 }, { "epoch": 0.23734195672319827, "grad_norm": 0.5078125, "learning_rate": 0.0001969515241473012, "loss": 4.5357, "step": 2289 }, { "epoch": 0.23744564477768634, "grad_norm": 0.5, "learning_rate": 0.00019694886197647312, "loss": 4.5784, "step": 2290 }, { "epoch": 0.23754933283217441, "grad_norm": 0.482421875, "learning_rate": 0.00019694619866175098, "loss": 4.5894, "step": 2291 }, { "epoch": 0.23765302088666249, "grad_norm": 0.52734375, "learning_rate": 0.00019694353420316615, "loss": 4.5586, "step": 2292 }, { "epoch": 0.23775670894115056, "grad_norm": 0.53515625, "learning_rate": 0.0001969408686007501, "loss": 4.5119, "step": 2293 }, { "epoch": 0.23786039699563863, "grad_norm": 0.53125, "learning_rate": 0.00019693820185453427, "loss": 4.5694, "step": 2294 }, { "epoch": 0.2379640850501267, "grad_norm": 0.5078125, "learning_rate": 0.00019693553396455012, "loss": 4.5731, "step": 2295 }, { "epoch": 0.23806777310461477, "grad_norm": 0.546875, "learning_rate": 0.0001969328649308291, "loss": 4.5255, "step": 2296 }, { "epoch": 0.23817146115910284, "grad_norm": 0.5859375, "learning_rate": 0.0001969301947534028, "loss": 4.5547, "step": 2297 }, { "epoch": 0.23827514921359091, "grad_norm": 0.53515625, "learning_rate": 0.00019692752343230264, "loss": 4.5426, "step": 2298 }, { "epoch": 0.23837883726807899, "grad_norm": 0.5859375, "learning_rate": 0.00019692485096756016, "loss": 4.551, "step": 2299 }, { "epoch": 0.23848252532256706, "grad_norm": 0.59765625, "learning_rate": 0.0001969221773592069, "loss": 4.5627, "step": 2300 }, { "epoch": 0.23858621337705513, "grad_norm": 0.609375, "learning_rate": 0.00019691950260727437, "loss": 4.5379, "step": 2301 }, { "epoch": 0.2386899014315432, "grad_norm": 0.56640625, "learning_rate": 0.00019691682671179415, "loss": 4.5418, "step": 2302 }, { "epoch": 0.23879358948603127, "grad_norm": 0.5859375, "learning_rate": 0.00019691414967279786, "loss": 4.5495, "step": 2303 }, { "epoch": 0.23889727754051934, "grad_norm": 0.578125, "learning_rate": 0.00019691147149031703, "loss": 4.557, "step": 2304 }, { "epoch": 0.23900096559500741, "grad_norm": 0.61328125, "learning_rate": 0.00019690879216438325, "loss": 4.5889, "step": 2305 }, { "epoch": 0.23910465364949549, "grad_norm": 0.5859375, "learning_rate": 0.0001969061116950282, "loss": 4.539, "step": 2306 }, { "epoch": 0.23920834170398356, "grad_norm": 0.57421875, "learning_rate": 0.00019690343008228343, "loss": 4.5705, "step": 2307 }, { "epoch": 0.23931202975847163, "grad_norm": 0.63671875, "learning_rate": 0.00019690074732618066, "loss": 4.5375, "step": 2308 }, { "epoch": 0.2394157178129597, "grad_norm": 0.5703125, "learning_rate": 0.00019689806342675147, "loss": 4.5483, "step": 2309 }, { "epoch": 0.23951940586744777, "grad_norm": 0.625, "learning_rate": 0.00019689537838402758, "loss": 4.5793, "step": 2310 }, { "epoch": 0.23962309392193584, "grad_norm": 0.58203125, "learning_rate": 0.0001968926921980406, "loss": 4.5669, "step": 2311 }, { "epoch": 0.23972678197642394, "grad_norm": 0.5625, "learning_rate": 0.00019689000486882235, "loss": 4.5548, "step": 2312 }, { "epoch": 0.239830470030912, "grad_norm": 0.5703125, "learning_rate": 0.00019688731639640438, "loss": 4.5066, "step": 2313 }, { "epoch": 0.23993415808540008, "grad_norm": 0.59765625, "learning_rate": 0.00019688462678081852, "loss": 4.5549, "step": 2314 }, { "epoch": 0.24003784613988816, "grad_norm": 0.59765625, "learning_rate": 0.00019688193602209652, "loss": 4.5162, "step": 2315 }, { "epoch": 0.24014153419437623, "grad_norm": 0.54296875, "learning_rate": 0.00019687924412027004, "loss": 4.5942, "step": 2316 }, { "epoch": 0.2402452222488643, "grad_norm": 0.59765625, "learning_rate": 0.00019687655107537087, "loss": 4.5106, "step": 2317 }, { "epoch": 0.24034891030335237, "grad_norm": 0.5390625, "learning_rate": 0.00019687385688743083, "loss": 4.5898, "step": 2318 }, { "epoch": 0.24045259835784044, "grad_norm": 0.50390625, "learning_rate": 0.00019687116155648167, "loss": 4.5338, "step": 2319 }, { "epoch": 0.2405562864123285, "grad_norm": 0.53125, "learning_rate": 0.00019686846508255518, "loss": 4.5561, "step": 2320 }, { "epoch": 0.24065997446681658, "grad_norm": 0.5234375, "learning_rate": 0.00019686576746568321, "loss": 4.5322, "step": 2321 }, { "epoch": 0.24076366252130466, "grad_norm": 0.59375, "learning_rate": 0.0001968630687058976, "loss": 4.556, "step": 2322 }, { "epoch": 0.24086735057579273, "grad_norm": 0.486328125, "learning_rate": 0.00019686036880323012, "loss": 4.5641, "step": 2323 }, { "epoch": 0.2409710386302808, "grad_norm": 0.6171875, "learning_rate": 0.00019685766775771272, "loss": 4.541, "step": 2324 }, { "epoch": 0.24107472668476887, "grad_norm": 0.5, "learning_rate": 0.00019685496556937722, "loss": 4.5167, "step": 2325 }, { "epoch": 0.24117841473925694, "grad_norm": 0.56640625, "learning_rate": 0.0001968522622382555, "loss": 4.5524, "step": 2326 }, { "epoch": 0.241282102793745, "grad_norm": 0.4765625, "learning_rate": 0.00019684955776437947, "loss": 4.5259, "step": 2327 }, { "epoch": 0.24138579084823308, "grad_norm": 0.53125, "learning_rate": 0.000196846852147781, "loss": 4.5491, "step": 2328 }, { "epoch": 0.24148947890272116, "grad_norm": 0.55078125, "learning_rate": 0.00019684414538849207, "loss": 4.5132, "step": 2329 }, { "epoch": 0.24159316695720923, "grad_norm": 0.50390625, "learning_rate": 0.00019684143748654458, "loss": 4.5794, "step": 2330 }, { "epoch": 0.2416968550116973, "grad_norm": 0.671875, "learning_rate": 0.00019683872844197052, "loss": 4.5889, "step": 2331 }, { "epoch": 0.24180054306618537, "grad_norm": 0.578125, "learning_rate": 0.0001968360182548018, "loss": 4.5713, "step": 2332 }, { "epoch": 0.24190423112067344, "grad_norm": 0.5859375, "learning_rate": 0.00019683330692507042, "loss": 4.5571, "step": 2333 }, { "epoch": 0.24200791917516154, "grad_norm": 0.56640625, "learning_rate": 0.00019683059445280837, "loss": 4.5484, "step": 2334 }, { "epoch": 0.2421116072296496, "grad_norm": 0.5859375, "learning_rate": 0.00019682788083804771, "loss": 4.538, "step": 2335 }, { "epoch": 0.24221529528413768, "grad_norm": 0.6015625, "learning_rate": 0.00019682516608082037, "loss": 4.5748, "step": 2336 }, { "epoch": 0.24231898333862575, "grad_norm": 0.52734375, "learning_rate": 0.00019682245018115842, "loss": 4.5281, "step": 2337 }, { "epoch": 0.24242267139311383, "grad_norm": 0.53515625, "learning_rate": 0.0001968197331390939, "loss": 4.5424, "step": 2338 }, { "epoch": 0.2425263594476019, "grad_norm": 0.5234375, "learning_rate": 0.00019681701495465889, "loss": 4.5263, "step": 2339 }, { "epoch": 0.24263004750208997, "grad_norm": 0.5546875, "learning_rate": 0.00019681429562788542, "loss": 4.5765, "step": 2340 }, { "epoch": 0.24273373555657804, "grad_norm": 0.58984375, "learning_rate": 0.00019681157515880564, "loss": 4.4784, "step": 2341 }, { "epoch": 0.2428374236110661, "grad_norm": 0.51953125, "learning_rate": 0.00019680885354745158, "loss": 4.542, "step": 2342 }, { "epoch": 0.24294111166555418, "grad_norm": 0.52734375, "learning_rate": 0.00019680613079385537, "loss": 4.5242, "step": 2343 }, { "epoch": 0.24304479972004225, "grad_norm": 0.52734375, "learning_rate": 0.00019680340689804914, "loss": 4.5195, "step": 2344 }, { "epoch": 0.24314848777453033, "grad_norm": 0.54296875, "learning_rate": 0.00019680068186006506, "loss": 4.5474, "step": 2345 }, { "epoch": 0.2432521758290184, "grad_norm": 0.470703125, "learning_rate": 0.00019679795567993527, "loss": 4.5701, "step": 2346 }, { "epoch": 0.24335586388350647, "grad_norm": 0.52734375, "learning_rate": 0.00019679522835769188, "loss": 4.5311, "step": 2347 }, { "epoch": 0.24345955193799454, "grad_norm": 0.439453125, "learning_rate": 0.00019679249989336715, "loss": 4.5522, "step": 2348 }, { "epoch": 0.2435632399924826, "grad_norm": 0.56640625, "learning_rate": 0.00019678977028699318, "loss": 4.5616, "step": 2349 }, { "epoch": 0.24366692804697068, "grad_norm": 0.45703125, "learning_rate": 0.0001967870395386023, "loss": 4.5, "step": 2350 }, { "epoch": 0.24377061610145875, "grad_norm": 0.55859375, "learning_rate": 0.00019678430764822661, "loss": 4.5487, "step": 2351 }, { "epoch": 0.24387430415594682, "grad_norm": 0.466796875, "learning_rate": 0.00019678157461589844, "loss": 4.522, "step": 2352 }, { "epoch": 0.2439779922104349, "grad_norm": 0.609375, "learning_rate": 0.00019677884044164997, "loss": 4.5293, "step": 2353 }, { "epoch": 0.24408168026492297, "grad_norm": 0.5, "learning_rate": 0.00019677610512551348, "loss": 4.5629, "step": 2354 }, { "epoch": 0.24418536831941104, "grad_norm": 0.53125, "learning_rate": 0.00019677336866752123, "loss": 4.5529, "step": 2355 }, { "epoch": 0.24428905637389914, "grad_norm": 0.482421875, "learning_rate": 0.00019677063106770555, "loss": 4.5488, "step": 2356 }, { "epoch": 0.2443927444283872, "grad_norm": 0.52734375, "learning_rate": 0.00019676789232609868, "loss": 4.5474, "step": 2357 }, { "epoch": 0.24449643248287528, "grad_norm": 0.515625, "learning_rate": 0.000196765152442733, "loss": 4.5178, "step": 2358 }, { "epoch": 0.24460012053736335, "grad_norm": 0.5078125, "learning_rate": 0.0001967624114176408, "loss": 4.5141, "step": 2359 }, { "epoch": 0.24470380859185142, "grad_norm": 0.51953125, "learning_rate": 0.00019675966925085443, "loss": 4.4834, "step": 2360 }, { "epoch": 0.2448074966463395, "grad_norm": 0.470703125, "learning_rate": 0.00019675692594240624, "loss": 4.5927, "step": 2361 }, { "epoch": 0.24491118470082757, "grad_norm": 0.45703125, "learning_rate": 0.0001967541814923286, "loss": 4.5491, "step": 2362 }, { "epoch": 0.24501487275531564, "grad_norm": 0.490234375, "learning_rate": 0.00019675143590065387, "loss": 4.5038, "step": 2363 }, { "epoch": 0.2451185608098037, "grad_norm": 0.44921875, "learning_rate": 0.00019674868916741452, "loss": 4.5872, "step": 2364 }, { "epoch": 0.24522224886429178, "grad_norm": 0.51953125, "learning_rate": 0.00019674594129264286, "loss": 4.5441, "step": 2365 }, { "epoch": 0.24532593691877985, "grad_norm": 0.4921875, "learning_rate": 0.0001967431922763714, "loss": 4.5074, "step": 2366 }, { "epoch": 0.24542962497326792, "grad_norm": 0.546875, "learning_rate": 0.00019674044211863247, "loss": 4.4877, "step": 2367 }, { "epoch": 0.245533313027756, "grad_norm": 0.55078125, "learning_rate": 0.00019673769081945863, "loss": 4.527, "step": 2368 }, { "epoch": 0.24563700108224407, "grad_norm": 0.59375, "learning_rate": 0.00019673493837888228, "loss": 4.5584, "step": 2369 }, { "epoch": 0.24574068913673214, "grad_norm": 0.5703125, "learning_rate": 0.00019673218479693592, "loss": 4.5505, "step": 2370 }, { "epoch": 0.2458443771912202, "grad_norm": 0.5703125, "learning_rate": 0.00019672943007365202, "loss": 4.5845, "step": 2371 }, { "epoch": 0.24594806524570828, "grad_norm": 0.625, "learning_rate": 0.00019672667420906308, "loss": 4.5538, "step": 2372 }, { "epoch": 0.24605175330019635, "grad_norm": 0.5703125, "learning_rate": 0.00019672391720320165, "loss": 4.523, "step": 2373 }, { "epoch": 0.24615544135468442, "grad_norm": 0.62109375, "learning_rate": 0.00019672115905610023, "loss": 4.5312, "step": 2374 }, { "epoch": 0.2462591294091725, "grad_norm": 0.5859375, "learning_rate": 0.00019671839976779138, "loss": 4.5019, "step": 2375 }, { "epoch": 0.24636281746366057, "grad_norm": 0.57421875, "learning_rate": 0.00019671563933830767, "loss": 4.5387, "step": 2376 }, { "epoch": 0.24646650551814864, "grad_norm": 0.62890625, "learning_rate": 0.0001967128777676816, "loss": 4.5441, "step": 2377 }, { "epoch": 0.2465701935726367, "grad_norm": 0.60546875, "learning_rate": 0.00019671011505594581, "loss": 4.5023, "step": 2378 }, { "epoch": 0.2466738816271248, "grad_norm": 0.490234375, "learning_rate": 0.0001967073512031329, "loss": 4.5643, "step": 2379 }, { "epoch": 0.24677756968161288, "grad_norm": 0.5859375, "learning_rate": 0.00019670458620927548, "loss": 4.5538, "step": 2380 }, { "epoch": 0.24688125773610095, "grad_norm": 0.55078125, "learning_rate": 0.00019670182007440614, "loss": 4.583, "step": 2381 }, { "epoch": 0.24698494579058902, "grad_norm": 0.55078125, "learning_rate": 0.0001966990527985576, "loss": 4.5283, "step": 2382 }, { "epoch": 0.2470886338450771, "grad_norm": 0.58984375, "learning_rate": 0.0001966962843817624, "loss": 4.5397, "step": 2383 }, { "epoch": 0.24719232189956516, "grad_norm": 0.515625, "learning_rate": 0.00019669351482405324, "loss": 4.5366, "step": 2384 }, { "epoch": 0.24729600995405324, "grad_norm": 0.71875, "learning_rate": 0.00019669074412546284, "loss": 4.5615, "step": 2385 }, { "epoch": 0.2473996980085413, "grad_norm": 0.5078125, "learning_rate": 0.0001966879722860239, "loss": 4.5026, "step": 2386 }, { "epoch": 0.24750338606302938, "grad_norm": 0.71484375, "learning_rate": 0.00019668519930576904, "loss": 4.5545, "step": 2387 }, { "epoch": 0.24760707411751745, "grad_norm": 0.6484375, "learning_rate": 0.00019668242518473106, "loss": 4.5279, "step": 2388 }, { "epoch": 0.24771076217200552, "grad_norm": 0.56640625, "learning_rate": 0.00019667964992294264, "loss": 4.5575, "step": 2389 }, { "epoch": 0.2478144502264936, "grad_norm": 0.6015625, "learning_rate": 0.00019667687352043655, "loss": 4.5308, "step": 2390 }, { "epoch": 0.24791813828098166, "grad_norm": 0.6015625, "learning_rate": 0.00019667409597724553, "loss": 4.5585, "step": 2391 }, { "epoch": 0.24802182633546974, "grad_norm": 0.5625, "learning_rate": 0.0001966713172934024, "loss": 4.5285, "step": 2392 }, { "epoch": 0.2481255143899578, "grad_norm": 0.59765625, "learning_rate": 0.00019666853746893987, "loss": 4.4994, "step": 2393 }, { "epoch": 0.24822920244444588, "grad_norm": 0.5546875, "learning_rate": 0.00019666575650389084, "loss": 4.491, "step": 2394 }, { "epoch": 0.24833289049893395, "grad_norm": 0.53515625, "learning_rate": 0.000196662974398288, "loss": 4.4838, "step": 2395 }, { "epoch": 0.24843657855342202, "grad_norm": 0.53125, "learning_rate": 0.0001966601911521643, "loss": 4.5466, "step": 2396 }, { "epoch": 0.2485402666079101, "grad_norm": 0.59765625, "learning_rate": 0.00019665740676555246, "loss": 4.5693, "step": 2397 }, { "epoch": 0.24864395466239816, "grad_norm": 0.53515625, "learning_rate": 0.00019665462123848545, "loss": 4.5713, "step": 2398 }, { "epoch": 0.24874764271688624, "grad_norm": 0.59765625, "learning_rate": 0.00019665183457099602, "loss": 4.5254, "step": 2399 }, { "epoch": 0.2488513307713743, "grad_norm": 0.53125, "learning_rate": 0.00019664904676311716, "loss": 4.4666, "step": 2400 }, { "epoch": 0.2489550188258624, "grad_norm": 0.57421875, "learning_rate": 0.00019664625781488167, "loss": 4.4776, "step": 2401 }, { "epoch": 0.24905870688035048, "grad_norm": 0.56640625, "learning_rate": 0.00019664346772632252, "loss": 4.5156, "step": 2402 }, { "epoch": 0.24916239493483855, "grad_norm": 0.56640625, "learning_rate": 0.0001966406764974726, "loss": 4.5436, "step": 2403 }, { "epoch": 0.24926608298932662, "grad_norm": 0.54296875, "learning_rate": 0.00019663788412836483, "loss": 4.4748, "step": 2404 }, { "epoch": 0.2493697710438147, "grad_norm": 0.60546875, "learning_rate": 0.0001966350906190322, "loss": 4.5189, "step": 2405 }, { "epoch": 0.24947345909830276, "grad_norm": 0.6796875, "learning_rate": 0.00019663229596950766, "loss": 4.4902, "step": 2406 }, { "epoch": 0.24957714715279083, "grad_norm": 0.59375, "learning_rate": 0.00019662950017982416, "loss": 4.5363, "step": 2407 }, { "epoch": 0.2496808352072789, "grad_norm": 0.55078125, "learning_rate": 0.00019662670325001468, "loss": 4.5453, "step": 2408 }, { "epoch": 0.24978452326176698, "grad_norm": 0.578125, "learning_rate": 0.00019662390518011228, "loss": 4.5477, "step": 2409 }, { "epoch": 0.24988821131625505, "grad_norm": 0.49609375, "learning_rate": 0.0001966211059701499, "loss": 4.5409, "step": 2410 }, { "epoch": 0.24999189937074312, "grad_norm": 0.61328125, "learning_rate": 0.0001966183056201606, "loss": 4.525, "step": 2411 }, { "epoch": 0.24999189937074312, "eval_loss": 4.54582405090332, "eval_runtime": 0.4393, "eval_samples_per_second": 339.196, "eval_steps_per_second": 15.935, "step": 2411 }, { "epoch": 0.2500955874252312, "grad_norm": 0.578125, "learning_rate": 0.00019661550413017746, "loss": 4.5607, "step": 2412 }, { "epoch": 0.25019927547971926, "grad_norm": 0.55078125, "learning_rate": 0.00019661270150023348, "loss": 4.5456, "step": 2413 }, { "epoch": 0.25030296353420733, "grad_norm": 0.6171875, "learning_rate": 0.00019660989773036174, "loss": 4.5299, "step": 2414 }, { "epoch": 0.2504066515886954, "grad_norm": 0.5234375, "learning_rate": 0.00019660709282059532, "loss": 4.4934, "step": 2415 }, { "epoch": 0.2505103396431835, "grad_norm": 0.65234375, "learning_rate": 0.00019660428677096734, "loss": 4.5475, "step": 2416 }, { "epoch": 0.25061402769767155, "grad_norm": 0.494140625, "learning_rate": 0.0001966014795815109, "loss": 4.5659, "step": 2417 }, { "epoch": 0.2507177157521596, "grad_norm": 0.609375, "learning_rate": 0.0001965986712522591, "loss": 4.5103, "step": 2418 }, { "epoch": 0.2508214038066477, "grad_norm": 0.625, "learning_rate": 0.00019659586178324506, "loss": 4.5531, "step": 2419 }, { "epoch": 0.25092509186113576, "grad_norm": 0.515625, "learning_rate": 0.000196593051174502, "loss": 4.5444, "step": 2420 }, { "epoch": 0.25102877991562383, "grad_norm": 0.57421875, "learning_rate": 0.00019659023942606303, "loss": 4.554, "step": 2421 }, { "epoch": 0.2511324679701119, "grad_norm": 0.53515625, "learning_rate": 0.00019658742653796133, "loss": 4.5388, "step": 2422 }, { "epoch": 0.2512361560246, "grad_norm": 0.6015625, "learning_rate": 0.00019658461251023012, "loss": 4.554, "step": 2423 }, { "epoch": 0.25133984407908805, "grad_norm": 0.62109375, "learning_rate": 0.00019658179734290254, "loss": 4.5397, "step": 2424 }, { "epoch": 0.2514435321335761, "grad_norm": 0.55859375, "learning_rate": 0.00019657898103601185, "loss": 4.5455, "step": 2425 }, { "epoch": 0.2515472201880642, "grad_norm": 0.6953125, "learning_rate": 0.00019657616358959126, "loss": 4.5079, "step": 2426 }, { "epoch": 0.25165090824255226, "grad_norm": 0.62109375, "learning_rate": 0.00019657334500367406, "loss": 4.5894, "step": 2427 }, { "epoch": 0.25175459629704033, "grad_norm": 0.5625, "learning_rate": 0.00019657052527829346, "loss": 4.4915, "step": 2428 }, { "epoch": 0.2518582843515284, "grad_norm": 0.546875, "learning_rate": 0.00019656770441348273, "loss": 4.5516, "step": 2429 }, { "epoch": 0.2519619724060165, "grad_norm": 0.6171875, "learning_rate": 0.00019656488240927516, "loss": 4.5341, "step": 2430 }, { "epoch": 0.25206566046050455, "grad_norm": 0.6015625, "learning_rate": 0.00019656205926570406, "loss": 4.5455, "step": 2431 }, { "epoch": 0.2521693485149926, "grad_norm": 0.5390625, "learning_rate": 0.00019655923498280273, "loss": 4.5265, "step": 2432 }, { "epoch": 0.2522730365694807, "grad_norm": 0.49609375, "learning_rate": 0.0001965564095606045, "loss": 4.5306, "step": 2433 }, { "epoch": 0.25237672462396876, "grad_norm": 0.5546875, "learning_rate": 0.0001965535829991427, "loss": 4.5759, "step": 2434 }, { "epoch": 0.2524804126784569, "grad_norm": 0.5078125, "learning_rate": 0.0001965507552984507, "loss": 4.5264, "step": 2435 }, { "epoch": 0.25258410073294496, "grad_norm": 0.5859375, "learning_rate": 0.0001965479264585618, "loss": 4.5116, "step": 2436 }, { "epoch": 0.25268778878743303, "grad_norm": 0.494140625, "learning_rate": 0.00019654509647950945, "loss": 4.4965, "step": 2437 }, { "epoch": 0.2527914768419211, "grad_norm": 0.5625, "learning_rate": 0.000196542265361327, "loss": 4.5255, "step": 2438 }, { "epoch": 0.2528951648964092, "grad_norm": 0.55078125, "learning_rate": 0.0001965394331040479, "loss": 4.5525, "step": 2439 }, { "epoch": 0.25299885295089725, "grad_norm": 0.546875, "learning_rate": 0.0001965365997077055, "loss": 4.5402, "step": 2440 }, { "epoch": 0.2531025410053853, "grad_norm": 0.51171875, "learning_rate": 0.00019653376517233327, "loss": 4.5721, "step": 2441 }, { "epoch": 0.2532062290598734, "grad_norm": 0.5234375, "learning_rate": 0.00019653092949796467, "loss": 4.538, "step": 2442 }, { "epoch": 0.25330991711436146, "grad_norm": 0.50390625, "learning_rate": 0.00019652809268463315, "loss": 4.5701, "step": 2443 }, { "epoch": 0.25341360516884953, "grad_norm": 0.51953125, "learning_rate": 0.00019652525473237215, "loss": 4.5366, "step": 2444 }, { "epoch": 0.2535172932233376, "grad_norm": 0.486328125, "learning_rate": 0.00019652241564121518, "loss": 4.5249, "step": 2445 }, { "epoch": 0.2536209812778257, "grad_norm": 0.5625, "learning_rate": 0.00019651957541119575, "loss": 4.5277, "step": 2446 }, { "epoch": 0.25372466933231375, "grad_norm": 0.5390625, "learning_rate": 0.00019651673404234732, "loss": 4.5749, "step": 2447 }, { "epoch": 0.2538283573868018, "grad_norm": 0.56640625, "learning_rate": 0.00019651389153470348, "loss": 4.5396, "step": 2448 }, { "epoch": 0.2539320454412899, "grad_norm": 0.5078125, "learning_rate": 0.00019651104788829775, "loss": 4.5754, "step": 2449 }, { "epoch": 0.25403573349577796, "grad_norm": 0.546875, "learning_rate": 0.00019650820310316362, "loss": 4.5094, "step": 2450 }, { "epoch": 0.25413942155026603, "grad_norm": 0.46875, "learning_rate": 0.00019650535717933475, "loss": 4.5006, "step": 2451 }, { "epoch": 0.2542431096047541, "grad_norm": 0.5390625, "learning_rate": 0.0001965025101168447, "loss": 4.5539, "step": 2452 }, { "epoch": 0.2543467976592422, "grad_norm": 0.46875, "learning_rate": 0.00019649966191572697, "loss": 4.4919, "step": 2453 }, { "epoch": 0.25445048571373025, "grad_norm": 0.58203125, "learning_rate": 0.0001964968125760153, "loss": 4.5391, "step": 2454 }, { "epoch": 0.2545541737682183, "grad_norm": 0.58203125, "learning_rate": 0.0001964939620977432, "loss": 4.5305, "step": 2455 }, { "epoch": 0.2546578618227064, "grad_norm": 0.5703125, "learning_rate": 0.00019649111048094435, "loss": 4.5178, "step": 2456 }, { "epoch": 0.25476154987719446, "grad_norm": 0.6171875, "learning_rate": 0.00019648825772565238, "loss": 4.545, "step": 2457 }, { "epoch": 0.25486523793168253, "grad_norm": 0.6015625, "learning_rate": 0.00019648540383190098, "loss": 4.5473, "step": 2458 }, { "epoch": 0.2549689259861706, "grad_norm": 0.58203125, "learning_rate": 0.0001964825487997238, "loss": 4.5182, "step": 2459 }, { "epoch": 0.2550726140406587, "grad_norm": 0.59765625, "learning_rate": 0.00019647969262915455, "loss": 4.517, "step": 2460 }, { "epoch": 0.25517630209514675, "grad_norm": 0.6640625, "learning_rate": 0.00019647683532022692, "loss": 4.5324, "step": 2461 }, { "epoch": 0.2552799901496348, "grad_norm": 0.57421875, "learning_rate": 0.00019647397687297456, "loss": 4.5349, "step": 2462 }, { "epoch": 0.2553836782041229, "grad_norm": 0.66015625, "learning_rate": 0.0001964711172874313, "loss": 4.5047, "step": 2463 }, { "epoch": 0.25548736625861096, "grad_norm": 0.5703125, "learning_rate": 0.0001964682565636308, "loss": 4.5019, "step": 2464 }, { "epoch": 0.25559105431309903, "grad_norm": 0.59375, "learning_rate": 0.00019646539470160684, "loss": 4.5242, "step": 2465 }, { "epoch": 0.2556947423675871, "grad_norm": 0.58203125, "learning_rate": 0.0001964625317013932, "loss": 4.5476, "step": 2466 }, { "epoch": 0.2557984304220752, "grad_norm": 0.5390625, "learning_rate": 0.00019645966756302367, "loss": 4.5218, "step": 2467 }, { "epoch": 0.25590211847656325, "grad_norm": 0.53125, "learning_rate": 0.00019645680228653204, "loss": 4.5236, "step": 2468 }, { "epoch": 0.2560058065310513, "grad_norm": 0.60546875, "learning_rate": 0.00019645393587195204, "loss": 4.5162, "step": 2469 }, { "epoch": 0.2561094945855394, "grad_norm": 0.55078125, "learning_rate": 0.0001964510683193176, "loss": 4.5434, "step": 2470 }, { "epoch": 0.25621318264002746, "grad_norm": 0.578125, "learning_rate": 0.0001964481996286625, "loss": 4.4977, "step": 2471 }, { "epoch": 0.25631687069451553, "grad_norm": 0.57421875, "learning_rate": 0.00019644532980002058, "loss": 4.5071, "step": 2472 }, { "epoch": 0.2564205587490036, "grad_norm": 0.5859375, "learning_rate": 0.00019644245883342572, "loss": 4.4715, "step": 2473 }, { "epoch": 0.2565242468034917, "grad_norm": 0.5703125, "learning_rate": 0.00019643958672891181, "loss": 4.5657, "step": 2474 }, { "epoch": 0.25662793485797974, "grad_norm": 0.58984375, "learning_rate": 0.00019643671348651268, "loss": 4.518, "step": 2475 }, { "epoch": 0.2567316229124678, "grad_norm": 0.5703125, "learning_rate": 0.0001964338391062623, "loss": 4.5594, "step": 2476 }, { "epoch": 0.2568353109669559, "grad_norm": 0.546875, "learning_rate": 0.00019643096358819455, "loss": 4.5379, "step": 2477 }, { "epoch": 0.25693899902144396, "grad_norm": 0.6171875, "learning_rate": 0.00019642808693234333, "loss": 4.4973, "step": 2478 }, { "epoch": 0.2570426870759321, "grad_norm": 0.5625, "learning_rate": 0.00019642520913874264, "loss": 4.5271, "step": 2479 }, { "epoch": 0.25714637513042016, "grad_norm": 0.68359375, "learning_rate": 0.0001964223302074264, "loss": 4.4797, "step": 2480 }, { "epoch": 0.25725006318490823, "grad_norm": 0.5703125, "learning_rate": 0.00019641945013842862, "loss": 4.5094, "step": 2481 }, { "epoch": 0.2573537512393963, "grad_norm": 0.65234375, "learning_rate": 0.0001964165689317832, "loss": 4.5076, "step": 2482 }, { "epoch": 0.25745743929388437, "grad_norm": 0.64453125, "learning_rate": 0.0001964136865875242, "loss": 4.5485, "step": 2483 }, { "epoch": 0.25756112734837244, "grad_norm": 0.58203125, "learning_rate": 0.00019641080310568563, "loss": 4.55, "step": 2484 }, { "epoch": 0.2576648154028605, "grad_norm": 0.61328125, "learning_rate": 0.00019640791848630148, "loss": 4.4889, "step": 2485 }, { "epoch": 0.2577685034573486, "grad_norm": 0.64453125, "learning_rate": 0.0001964050327294058, "loss": 4.5108, "step": 2486 }, { "epoch": 0.25787219151183666, "grad_norm": 0.58984375, "learning_rate": 0.00019640214583503264, "loss": 4.5616, "step": 2487 }, { "epoch": 0.25797587956632473, "grad_norm": 0.609375, "learning_rate": 0.00019639925780321607, "loss": 4.5208, "step": 2488 }, { "epoch": 0.2580795676208128, "grad_norm": 0.640625, "learning_rate": 0.0001963963686339901, "loss": 4.5417, "step": 2489 }, { "epoch": 0.25818325567530087, "grad_norm": 0.56640625, "learning_rate": 0.00019639347832738896, "loss": 4.5336, "step": 2490 }, { "epoch": 0.25828694372978894, "grad_norm": 0.65625, "learning_rate": 0.00019639058688344663, "loss": 4.5381, "step": 2491 }, { "epoch": 0.258390631784277, "grad_norm": 0.63671875, "learning_rate": 0.00019638769430219727, "loss": 4.5333, "step": 2492 }, { "epoch": 0.2584943198387651, "grad_norm": 0.59765625, "learning_rate": 0.00019638480058367498, "loss": 4.5055, "step": 2493 }, { "epoch": 0.25859800789325316, "grad_norm": 0.5546875, "learning_rate": 0.00019638190572791392, "loss": 4.5351, "step": 2494 }, { "epoch": 0.25870169594774123, "grad_norm": 0.6484375, "learning_rate": 0.00019637900973494828, "loss": 4.5316, "step": 2495 }, { "epoch": 0.2588053840022293, "grad_norm": 0.625, "learning_rate": 0.00019637611260481223, "loss": 4.545, "step": 2496 }, { "epoch": 0.25890907205671737, "grad_norm": 0.63671875, "learning_rate": 0.00019637321433753986, "loss": 4.5428, "step": 2497 }, { "epoch": 0.25901276011120544, "grad_norm": 0.69921875, "learning_rate": 0.00019637031493316548, "loss": 4.5547, "step": 2498 }, { "epoch": 0.2591164481656935, "grad_norm": 0.625, "learning_rate": 0.00019636741439172322, "loss": 4.5579, "step": 2499 }, { "epoch": 0.2592201362201816, "grad_norm": 0.58984375, "learning_rate": 0.00019636451271324735, "loss": 4.5162, "step": 2500 }, { "epoch": 0.25932382427466966, "grad_norm": 0.73828125, "learning_rate": 0.00019636160989777208, "loss": 4.5121, "step": 2501 }, { "epoch": 0.25942751232915773, "grad_norm": 0.63671875, "learning_rate": 0.0001963587059453317, "loss": 4.5258, "step": 2502 }, { "epoch": 0.2595312003836458, "grad_norm": 0.63671875, "learning_rate": 0.0001963558008559604, "loss": 4.5086, "step": 2503 }, { "epoch": 0.25963488843813387, "grad_norm": 0.609375, "learning_rate": 0.00019635289462969255, "loss": 4.5275, "step": 2504 }, { "epoch": 0.25973857649262194, "grad_norm": 0.5703125, "learning_rate": 0.00019634998726656238, "loss": 4.5625, "step": 2505 }, { "epoch": 0.25984226454711, "grad_norm": 0.64453125, "learning_rate": 0.0001963470787666042, "loss": 4.5497, "step": 2506 }, { "epoch": 0.2599459526015981, "grad_norm": 0.578125, "learning_rate": 0.00019634416912985234, "loss": 4.4894, "step": 2507 }, { "epoch": 0.26004964065608616, "grad_norm": 0.65234375, "learning_rate": 0.0001963412583563411, "loss": 4.4685, "step": 2508 }, { "epoch": 0.2601533287105742, "grad_norm": 0.5625, "learning_rate": 0.00019633834644610488, "loss": 4.5355, "step": 2509 }, { "epoch": 0.2602570167650623, "grad_norm": 0.6171875, "learning_rate": 0.000196335433399178, "loss": 4.5377, "step": 2510 }, { "epoch": 0.26036070481955037, "grad_norm": 0.53515625, "learning_rate": 0.00019633251921559482, "loss": 4.5568, "step": 2511 }, { "epoch": 0.26046439287403844, "grad_norm": 0.62109375, "learning_rate": 0.00019632960389538975, "loss": 4.5713, "step": 2512 }, { "epoch": 0.2605680809285265, "grad_norm": 0.625, "learning_rate": 0.00019632668743859718, "loss": 4.5268, "step": 2513 }, { "epoch": 0.2606717689830146, "grad_norm": 0.62890625, "learning_rate": 0.00019632376984525155, "loss": 4.5509, "step": 2514 }, { "epoch": 0.26077545703750266, "grad_norm": 0.625, "learning_rate": 0.0001963208511153872, "loss": 4.5079, "step": 2515 }, { "epoch": 0.2608791450919907, "grad_norm": 0.640625, "learning_rate": 0.00019631793124903863, "loss": 4.551, "step": 2516 }, { "epoch": 0.2609828331464788, "grad_norm": 0.58984375, "learning_rate": 0.00019631501024624032, "loss": 4.515, "step": 2517 }, { "epoch": 0.26108652120096687, "grad_norm": 0.61328125, "learning_rate": 0.00019631208810702667, "loss": 4.498, "step": 2518 }, { "epoch": 0.26119020925545494, "grad_norm": 0.6796875, "learning_rate": 0.0001963091648314322, "loss": 4.5372, "step": 2519 }, { "epoch": 0.261293897309943, "grad_norm": 0.61328125, "learning_rate": 0.0001963062404194914, "loss": 4.513, "step": 2520 }, { "epoch": 0.2613975853644311, "grad_norm": 0.69921875, "learning_rate": 0.00019630331487123872, "loss": 4.497, "step": 2521 }, { "epoch": 0.26150127341891916, "grad_norm": 0.65625, "learning_rate": 0.00019630038818670874, "loss": 4.4994, "step": 2522 }, { "epoch": 0.2616049614734072, "grad_norm": 0.703125, "learning_rate": 0.000196297460365936, "loss": 4.4981, "step": 2523 }, { "epoch": 0.26170864952789535, "grad_norm": 0.65625, "learning_rate": 0.000196294531408955, "loss": 4.5548, "step": 2524 }, { "epoch": 0.2618123375823834, "grad_norm": 0.6640625, "learning_rate": 0.00019629160131580032, "loss": 4.5412, "step": 2525 }, { "epoch": 0.2619160256368715, "grad_norm": 0.70703125, "learning_rate": 0.00019628867008650652, "loss": 4.4908, "step": 2526 }, { "epoch": 0.26201971369135957, "grad_norm": 0.6796875, "learning_rate": 0.00019628573772110822, "loss": 4.4642, "step": 2527 }, { "epoch": 0.26212340174584764, "grad_norm": 0.71875, "learning_rate": 0.00019628280421963995, "loss": 4.4949, "step": 2528 }, { "epoch": 0.2622270898003357, "grad_norm": 0.6328125, "learning_rate": 0.0001962798695821364, "loss": 4.4916, "step": 2529 }, { "epoch": 0.2623307778548238, "grad_norm": 0.63671875, "learning_rate": 0.00019627693380863215, "loss": 4.5176, "step": 2530 }, { "epoch": 0.26243446590931185, "grad_norm": 0.69140625, "learning_rate": 0.00019627399689916186, "loss": 4.555, "step": 2531 }, { "epoch": 0.2625381539637999, "grad_norm": 0.578125, "learning_rate": 0.00019627105885376017, "loss": 4.5436, "step": 2532 }, { "epoch": 0.262641842018288, "grad_norm": 0.7734375, "learning_rate": 0.00019626811967246173, "loss": 4.509, "step": 2533 }, { "epoch": 0.26274553007277607, "grad_norm": 0.640625, "learning_rate": 0.00019626517935530125, "loss": 4.5512, "step": 2534 }, { "epoch": 0.26284921812726414, "grad_norm": 0.71875, "learning_rate": 0.0001962622379023134, "loss": 4.5567, "step": 2535 }, { "epoch": 0.2629529061817522, "grad_norm": 0.70703125, "learning_rate": 0.0001962592953135329, "loss": 4.5253, "step": 2536 }, { "epoch": 0.2630565942362403, "grad_norm": 0.68359375, "learning_rate": 0.0001962563515889945, "loss": 4.5262, "step": 2537 }, { "epoch": 0.26316028229072835, "grad_norm": 0.6796875, "learning_rate": 0.00019625340672873285, "loss": 4.5034, "step": 2538 }, { "epoch": 0.2632639703452164, "grad_norm": 0.76953125, "learning_rate": 0.00019625046073278276, "loss": 4.5212, "step": 2539 }, { "epoch": 0.2633676583997045, "grad_norm": 0.58984375, "learning_rate": 0.00019624751360117898, "loss": 4.521, "step": 2540 }, { "epoch": 0.26347134645419257, "grad_norm": 0.8359375, "learning_rate": 0.00019624456533395628, "loss": 4.5292, "step": 2541 }, { "epoch": 0.26357503450868064, "grad_norm": 0.75390625, "learning_rate": 0.00019624161593114945, "loss": 4.5043, "step": 2542 }, { "epoch": 0.2636787225631687, "grad_norm": 0.6640625, "learning_rate": 0.00019623866539279327, "loss": 4.4988, "step": 2543 }, { "epoch": 0.2637824106176568, "grad_norm": 0.75, "learning_rate": 0.00019623571371892257, "loss": 4.5159, "step": 2544 }, { "epoch": 0.26388609867214485, "grad_norm": 0.671875, "learning_rate": 0.00019623276090957218, "loss": 4.5345, "step": 2545 }, { "epoch": 0.2639897867266329, "grad_norm": 0.671875, "learning_rate": 0.00019622980696477692, "loss": 4.5314, "step": 2546 }, { "epoch": 0.264093474781121, "grad_norm": 0.765625, "learning_rate": 0.00019622685188457167, "loss": 4.5287, "step": 2547 }, { "epoch": 0.26419716283560907, "grad_norm": 0.7734375, "learning_rate": 0.0001962238956689913, "loss": 4.4609, "step": 2548 }, { "epoch": 0.26430085089009714, "grad_norm": 0.6484375, "learning_rate": 0.00019622093831807064, "loss": 4.5249, "step": 2549 }, { "epoch": 0.2644045389445852, "grad_norm": 0.890625, "learning_rate": 0.00019621797983184464, "loss": 4.5609, "step": 2550 }, { "epoch": 0.2645082269990733, "grad_norm": 0.83203125, "learning_rate": 0.0001962150202103482, "loss": 4.526, "step": 2551 }, { "epoch": 0.26461191505356135, "grad_norm": 0.58203125, "learning_rate": 0.00019621205945361618, "loss": 4.4839, "step": 2552 }, { "epoch": 0.2647156031080494, "grad_norm": 0.8828125, "learning_rate": 0.00019620909756168356, "loss": 4.487, "step": 2553 }, { "epoch": 0.2648192911625375, "grad_norm": 0.90234375, "learning_rate": 0.0001962061345345853, "loss": 4.5395, "step": 2554 }, { "epoch": 0.26492297921702557, "grad_norm": 0.765625, "learning_rate": 0.00019620317037235638, "loss": 4.5355, "step": 2555 }, { "epoch": 0.26502666727151364, "grad_norm": 0.671875, "learning_rate": 0.00019620020507503174, "loss": 4.4756, "step": 2556 }, { "epoch": 0.2651303553260017, "grad_norm": 0.68359375, "learning_rate": 0.0001961972386426463, "loss": 4.5216, "step": 2557 }, { "epoch": 0.2652340433804898, "grad_norm": 0.7421875, "learning_rate": 0.00019619427107523521, "loss": 4.4505, "step": 2558 }, { "epoch": 0.26533773143497785, "grad_norm": 0.640625, "learning_rate": 0.00019619130237283336, "loss": 4.5255, "step": 2559 }, { "epoch": 0.2654414194894659, "grad_norm": 0.62109375, "learning_rate": 0.00019618833253547583, "loss": 4.52, "step": 2560 }, { "epoch": 0.265545107543954, "grad_norm": 0.6328125, "learning_rate": 0.00019618536156319766, "loss": 4.5316, "step": 2561 }, { "epoch": 0.26564879559844207, "grad_norm": 0.6328125, "learning_rate": 0.00019618238945603387, "loss": 4.5438, "step": 2562 }, { "epoch": 0.26575248365293014, "grad_norm": 0.546875, "learning_rate": 0.00019617941621401957, "loss": 4.4668, "step": 2563 }, { "epoch": 0.2658561717074182, "grad_norm": 0.6015625, "learning_rate": 0.00019617644183718983, "loss": 4.5624, "step": 2564 }, { "epoch": 0.2659598597619063, "grad_norm": 0.62109375, "learning_rate": 0.00019617346632557972, "loss": 4.4884, "step": 2565 }, { "epoch": 0.26606354781639435, "grad_norm": 0.63671875, "learning_rate": 0.00019617048967922438, "loss": 4.5099, "step": 2566 }, { "epoch": 0.2661672358708824, "grad_norm": 0.5703125, "learning_rate": 0.00019616751189815892, "loss": 4.5287, "step": 2567 }, { "epoch": 0.2662709239253705, "grad_norm": 0.6015625, "learning_rate": 0.00019616453298241845, "loss": 4.5201, "step": 2568 }, { "epoch": 0.2663746119798586, "grad_norm": 0.5859375, "learning_rate": 0.00019616155293203816, "loss": 4.5195, "step": 2569 }, { "epoch": 0.2664783000343467, "grad_norm": 0.6171875, "learning_rate": 0.0001961585717470532, "loss": 4.5095, "step": 2570 }, { "epoch": 0.26658198808883476, "grad_norm": 0.6484375, "learning_rate": 0.00019615558942749872, "loss": 4.4663, "step": 2571 }, { "epoch": 0.26668567614332284, "grad_norm": 0.6953125, "learning_rate": 0.00019615260597340996, "loss": 4.5458, "step": 2572 }, { "epoch": 0.2667893641978109, "grad_norm": 0.6171875, "learning_rate": 0.00019614962138482205, "loss": 4.5008, "step": 2573 }, { "epoch": 0.266893052252299, "grad_norm": 0.6953125, "learning_rate": 0.00019614663566177028, "loss": 4.4962, "step": 2574 }, { "epoch": 0.26699674030678705, "grad_norm": 0.703125, "learning_rate": 0.0001961436488042898, "loss": 4.5274, "step": 2575 }, { "epoch": 0.2671004283612751, "grad_norm": 0.66796875, "learning_rate": 0.0001961406608124159, "loss": 4.5169, "step": 2576 }, { "epoch": 0.2672041164157632, "grad_norm": 0.6640625, "learning_rate": 0.00019613767168618384, "loss": 4.498, "step": 2577 }, { "epoch": 0.26730780447025126, "grad_norm": 0.75390625, "learning_rate": 0.00019613468142562888, "loss": 4.5055, "step": 2578 }, { "epoch": 0.26741149252473934, "grad_norm": 0.70703125, "learning_rate": 0.00019613169003078628, "loss": 4.4896, "step": 2579 }, { "epoch": 0.2675151805792274, "grad_norm": 0.671875, "learning_rate": 0.0001961286975016914, "loss": 4.5204, "step": 2580 }, { "epoch": 0.2676188686337155, "grad_norm": 0.63671875, "learning_rate": 0.00019612570383837943, "loss": 4.4656, "step": 2581 }, { "epoch": 0.26772255668820355, "grad_norm": 0.75, "learning_rate": 0.00019612270904088578, "loss": 4.5293, "step": 2582 }, { "epoch": 0.2678262447426916, "grad_norm": 0.6171875, "learning_rate": 0.0001961197131092458, "loss": 4.5334, "step": 2583 }, { "epoch": 0.2679299327971797, "grad_norm": 0.734375, "learning_rate": 0.00019611671604349482, "loss": 4.5137, "step": 2584 }, { "epoch": 0.26803362085166776, "grad_norm": 0.7421875, "learning_rate": 0.00019611371784366815, "loss": 4.5113, "step": 2585 }, { "epoch": 0.26813730890615584, "grad_norm": 0.6796875, "learning_rate": 0.00019611071850980123, "loss": 4.5013, "step": 2586 }, { "epoch": 0.2682409969606439, "grad_norm": 0.67578125, "learning_rate": 0.00019610771804192943, "loss": 4.498, "step": 2587 }, { "epoch": 0.268344685015132, "grad_norm": 0.625, "learning_rate": 0.00019610471644008811, "loss": 4.5094, "step": 2588 }, { "epoch": 0.26844837306962005, "grad_norm": 0.71484375, "learning_rate": 0.00019610171370431276, "loss": 4.5432, "step": 2589 }, { "epoch": 0.2685520611241081, "grad_norm": 0.58984375, "learning_rate": 0.00019609870983463876, "loss": 4.5388, "step": 2590 }, { "epoch": 0.2686557491785962, "grad_norm": 0.65234375, "learning_rate": 0.00019609570483110158, "loss": 4.5287, "step": 2591 }, { "epoch": 0.26875943723308426, "grad_norm": 0.63671875, "learning_rate": 0.00019609269869373663, "loss": 4.4932, "step": 2592 }, { "epoch": 0.26886312528757234, "grad_norm": 0.6640625, "learning_rate": 0.00019608969142257941, "loss": 4.4847, "step": 2593 }, { "epoch": 0.2689668133420604, "grad_norm": 0.66015625, "learning_rate": 0.00019608668301766545, "loss": 4.5315, "step": 2594 }, { "epoch": 0.2690705013965485, "grad_norm": 0.59375, "learning_rate": 0.00019608367347903017, "loss": 4.5097, "step": 2595 }, { "epoch": 0.26917418945103655, "grad_norm": 0.64453125, "learning_rate": 0.0001960806628067091, "loss": 4.501, "step": 2596 }, { "epoch": 0.2692778775055246, "grad_norm": 0.640625, "learning_rate": 0.0001960776510007378, "loss": 4.4971, "step": 2597 }, { "epoch": 0.2693815655600127, "grad_norm": 0.5859375, "learning_rate": 0.0001960746380611517, "loss": 4.5445, "step": 2598 }, { "epoch": 0.26948525361450076, "grad_norm": 0.75, "learning_rate": 0.0001960716239879865, "loss": 4.511, "step": 2599 }, { "epoch": 0.26958894166898884, "grad_norm": 0.703125, "learning_rate": 0.00019606860878127767, "loss": 4.5239, "step": 2600 }, { "epoch": 0.2696926297234769, "grad_norm": 0.66796875, "learning_rate": 0.0001960655924410608, "loss": 4.5162, "step": 2601 }, { "epoch": 0.269796317777965, "grad_norm": 0.62109375, "learning_rate": 0.00019606257496737148, "loss": 4.511, "step": 2602 }, { "epoch": 0.26990000583245305, "grad_norm": 0.66015625, "learning_rate": 0.00019605955636024534, "loss": 4.4955, "step": 2603 }, { "epoch": 0.2700036938869411, "grad_norm": 0.6328125, "learning_rate": 0.00019605653661971796, "loss": 4.5352, "step": 2604 }, { "epoch": 0.2701073819414292, "grad_norm": 0.67578125, "learning_rate": 0.00019605351574582497, "loss": 4.5061, "step": 2605 }, { "epoch": 0.27021106999591726, "grad_norm": 0.58984375, "learning_rate": 0.00019605049373860205, "loss": 4.4833, "step": 2606 }, { "epoch": 0.27031475805040533, "grad_norm": 0.55078125, "learning_rate": 0.0001960474705980848, "loss": 4.5036, "step": 2607 }, { "epoch": 0.2704184461048934, "grad_norm": 0.6953125, "learning_rate": 0.00019604444632430895, "loss": 4.4686, "step": 2608 }, { "epoch": 0.2705221341593815, "grad_norm": 0.546875, "learning_rate": 0.00019604142091731016, "loss": 4.5039, "step": 2609 }, { "epoch": 0.27062582221386955, "grad_norm": 0.6328125, "learning_rate": 0.00019603839437712413, "loss": 4.4556, "step": 2610 }, { "epoch": 0.2707295102683576, "grad_norm": 0.6328125, "learning_rate": 0.00019603536670378656, "loss": 4.4963, "step": 2611 }, { "epoch": 0.2708331983228457, "grad_norm": 0.54296875, "learning_rate": 0.00019603233789733317, "loss": 4.4963, "step": 2612 }, { "epoch": 0.2709368863773338, "grad_norm": 0.64453125, "learning_rate": 0.00019602930795779969, "loss": 4.5181, "step": 2613 }, { "epoch": 0.2710405744318219, "grad_norm": 0.625, "learning_rate": 0.00019602627688522191, "loss": 4.5117, "step": 2614 }, { "epoch": 0.27114426248630996, "grad_norm": 0.61328125, "learning_rate": 0.00019602324467963555, "loss": 4.5349, "step": 2615 }, { "epoch": 0.27124795054079803, "grad_norm": 0.66796875, "learning_rate": 0.0001960202113410764, "loss": 4.532, "step": 2616 }, { "epoch": 0.2713516385952861, "grad_norm": 0.6171875, "learning_rate": 0.00019601717686958025, "loss": 4.4892, "step": 2617 }, { "epoch": 0.2714553266497742, "grad_norm": 0.60546875, "learning_rate": 0.00019601414126518293, "loss": 4.468, "step": 2618 }, { "epoch": 0.27155901470426225, "grad_norm": 0.640625, "learning_rate": 0.00019601110452792023, "loss": 4.4757, "step": 2619 }, { "epoch": 0.2716627027587503, "grad_norm": 0.5625, "learning_rate": 0.00019600806665782795, "loss": 4.5217, "step": 2620 }, { "epoch": 0.2717663908132384, "grad_norm": 0.64453125, "learning_rate": 0.00019600502765494202, "loss": 4.4974, "step": 2621 }, { "epoch": 0.27187007886772646, "grad_norm": 0.5625, "learning_rate": 0.00019600198751929822, "loss": 4.4916, "step": 2622 }, { "epoch": 0.27197376692221453, "grad_norm": 0.59765625, "learning_rate": 0.00019599894625093244, "loss": 4.5144, "step": 2623 }, { "epoch": 0.2720774549767026, "grad_norm": 0.6328125, "learning_rate": 0.00019599590384988057, "loss": 4.5205, "step": 2624 }, { "epoch": 0.2721811430311907, "grad_norm": 0.59765625, "learning_rate": 0.00019599286031617852, "loss": 4.5329, "step": 2625 }, { "epoch": 0.27228483108567875, "grad_norm": 0.703125, "learning_rate": 0.00019598981564986217, "loss": 4.5093, "step": 2626 }, { "epoch": 0.2723885191401668, "grad_norm": 0.66796875, "learning_rate": 0.00019598676985096747, "loss": 4.4862, "step": 2627 }, { "epoch": 0.2724922071946549, "grad_norm": 0.61328125, "learning_rate": 0.00019598372291953036, "loss": 4.5171, "step": 2628 }, { "epoch": 0.27259589524914296, "grad_norm": 0.703125, "learning_rate": 0.00019598067485558675, "loss": 4.5127, "step": 2629 }, { "epoch": 0.27269958330363103, "grad_norm": 0.625, "learning_rate": 0.00019597762565917265, "loss": 4.4947, "step": 2630 }, { "epoch": 0.2728032713581191, "grad_norm": 0.71875, "learning_rate": 0.000195974575330324, "loss": 4.4842, "step": 2631 }, { "epoch": 0.2729069594126072, "grad_norm": 0.73828125, "learning_rate": 0.00019597152386907683, "loss": 4.5063, "step": 2632 }, { "epoch": 0.27301064746709525, "grad_norm": 0.57421875, "learning_rate": 0.00019596847127546717, "loss": 4.4661, "step": 2633 }, { "epoch": 0.2731143355215833, "grad_norm": 0.7109375, "learning_rate": 0.0001959654175495309, "loss": 4.5253, "step": 2634 }, { "epoch": 0.2732180235760714, "grad_norm": 0.8359375, "learning_rate": 0.0001959623626913042, "loss": 4.5194, "step": 2635 }, { "epoch": 0.27332171163055946, "grad_norm": 0.85546875, "learning_rate": 0.00019595930670082305, "loss": 4.5654, "step": 2636 }, { "epoch": 0.27342539968504753, "grad_norm": 0.7265625, "learning_rate": 0.00019595624957812353, "loss": 4.5322, "step": 2637 }, { "epoch": 0.2735290877395356, "grad_norm": 0.61328125, "learning_rate": 0.00019595319132324166, "loss": 4.5138, "step": 2638 }, { "epoch": 0.2736327757940237, "grad_norm": 0.8359375, "learning_rate": 0.00019595013193621357, "loss": 4.4681, "step": 2639 }, { "epoch": 0.27373646384851175, "grad_norm": 0.703125, "learning_rate": 0.00019594707141707535, "loss": 4.5471, "step": 2640 }, { "epoch": 0.2738401519029998, "grad_norm": 0.6953125, "learning_rate": 0.00019594400976586315, "loss": 4.5668, "step": 2641 }, { "epoch": 0.2739438399574879, "grad_norm": 0.72265625, "learning_rate": 0.00019594094698261298, "loss": 4.5293, "step": 2642 }, { "epoch": 0.27404752801197596, "grad_norm": 0.63671875, "learning_rate": 0.0001959378830673611, "loss": 4.5093, "step": 2643 }, { "epoch": 0.27415121606646403, "grad_norm": 0.6796875, "learning_rate": 0.00019593481802014358, "loss": 4.5032, "step": 2644 }, { "epoch": 0.2742549041209521, "grad_norm": 0.5625, "learning_rate": 0.00019593175184099662, "loss": 4.5133, "step": 2645 }, { "epoch": 0.2743585921754402, "grad_norm": 0.66796875, "learning_rate": 0.0001959286845299564, "loss": 4.5058, "step": 2646 }, { "epoch": 0.27446228022992825, "grad_norm": 0.71875, "learning_rate": 0.00019592561608705911, "loss": 4.5501, "step": 2647 }, { "epoch": 0.2745659682844163, "grad_norm": 0.71484375, "learning_rate": 0.00019592254651234093, "loss": 4.5248, "step": 2648 }, { "epoch": 0.2746696563389044, "grad_norm": 0.81640625, "learning_rate": 0.0001959194758058381, "loss": 4.4747, "step": 2649 }, { "epoch": 0.27477334439339246, "grad_norm": 0.77734375, "learning_rate": 0.00019591640396758687, "loss": 4.5561, "step": 2650 }, { "epoch": 0.27487703244788053, "grad_norm": 0.6640625, "learning_rate": 0.00019591333099762344, "loss": 4.5369, "step": 2651 }, { "epoch": 0.2749807205023686, "grad_norm": 0.7265625, "learning_rate": 0.00019591025689598407, "loss": 4.5263, "step": 2652 }, { "epoch": 0.2750844085568567, "grad_norm": 0.72265625, "learning_rate": 0.0001959071816627051, "loss": 4.5292, "step": 2653 }, { "epoch": 0.27518809661134475, "grad_norm": 0.8125, "learning_rate": 0.00019590410529782273, "loss": 4.5482, "step": 2654 }, { "epoch": 0.2752917846658328, "grad_norm": 0.67578125, "learning_rate": 0.00019590102780137333, "loss": 4.5152, "step": 2655 }, { "epoch": 0.2753954727203209, "grad_norm": 0.59375, "learning_rate": 0.00019589794917339312, "loss": 4.5202, "step": 2656 }, { "epoch": 0.27549916077480896, "grad_norm": 0.671875, "learning_rate": 0.0001958948694139185, "loss": 4.5526, "step": 2657 }, { "epoch": 0.2756028488292971, "grad_norm": 0.6015625, "learning_rate": 0.0001958917885229858, "loss": 4.4628, "step": 2658 }, { "epoch": 0.27570653688378516, "grad_norm": 0.6171875, "learning_rate": 0.00019588870650063135, "loss": 4.4477, "step": 2659 }, { "epoch": 0.27581022493827323, "grad_norm": 0.78125, "learning_rate": 0.0001958856233468915, "loss": 4.4959, "step": 2660 }, { "epoch": 0.2759139129927613, "grad_norm": 0.66015625, "learning_rate": 0.00019588253906180266, "loss": 4.4946, "step": 2661 }, { "epoch": 0.2760176010472494, "grad_norm": 0.70703125, "learning_rate": 0.0001958794536454012, "loss": 4.4696, "step": 2662 }, { "epoch": 0.27612128910173744, "grad_norm": 0.8046875, "learning_rate": 0.00019587636709772358, "loss": 4.522, "step": 2663 }, { "epoch": 0.2762249771562255, "grad_norm": 0.671875, "learning_rate": 0.00019587327941880615, "loss": 4.483, "step": 2664 }, { "epoch": 0.2763286652107136, "grad_norm": 0.71484375, "learning_rate": 0.00019587019060868536, "loss": 4.5078, "step": 2665 }, { "epoch": 0.27643235326520166, "grad_norm": 0.9140625, "learning_rate": 0.0001958671006673977, "loss": 4.4836, "step": 2666 }, { "epoch": 0.27653604131968973, "grad_norm": 0.69140625, "learning_rate": 0.00019586400959497954, "loss": 4.4048, "step": 2667 }, { "epoch": 0.2766397293741778, "grad_norm": 0.76171875, "learning_rate": 0.00019586091739146738, "loss": 4.5028, "step": 2668 }, { "epoch": 0.27674341742866587, "grad_norm": 0.77734375, "learning_rate": 0.0001958578240568978, "loss": 4.4601, "step": 2669 }, { "epoch": 0.27684710548315394, "grad_norm": 0.76171875, "learning_rate": 0.00019585472959130715, "loss": 4.4928, "step": 2670 }, { "epoch": 0.276950793537642, "grad_norm": 0.65625, "learning_rate": 0.00019585163399473204, "loss": 4.4804, "step": 2671 }, { "epoch": 0.2770544815921301, "grad_norm": 0.6328125, "learning_rate": 0.00019584853726720898, "loss": 4.4653, "step": 2672 }, { "epoch": 0.27715816964661816, "grad_norm": 0.6875, "learning_rate": 0.0001958454394087745, "loss": 4.5314, "step": 2673 }, { "epoch": 0.27726185770110623, "grad_norm": 0.65625, "learning_rate": 0.0001958423404194651, "loss": 4.4906, "step": 2674 }, { "epoch": 0.2773655457555943, "grad_norm": 0.71484375, "learning_rate": 0.00019583924029931745, "loss": 4.4875, "step": 2675 }, { "epoch": 0.27746923381008237, "grad_norm": 0.6953125, "learning_rate": 0.00019583613904836805, "loss": 4.4854, "step": 2676 }, { "epoch": 0.27757292186457044, "grad_norm": 0.5859375, "learning_rate": 0.0001958330366666535, "loss": 4.5304, "step": 2677 }, { "epoch": 0.2776766099190585, "grad_norm": 0.84375, "learning_rate": 0.00019582993315421044, "loss": 4.5095, "step": 2678 }, { "epoch": 0.2777802979735466, "grad_norm": 0.65234375, "learning_rate": 0.00019582682851107547, "loss": 4.4651, "step": 2679 }, { "epoch": 0.27788398602803466, "grad_norm": 0.67578125, "learning_rate": 0.00019582372273728517, "loss": 4.5261, "step": 2680 }, { "epoch": 0.27798767408252273, "grad_norm": 0.71484375, "learning_rate": 0.00019582061583287626, "loss": 4.5165, "step": 2681 }, { "epoch": 0.2780913621370108, "grad_norm": 0.6796875, "learning_rate": 0.00019581750779788538, "loss": 4.511, "step": 2682 }, { "epoch": 0.27819505019149887, "grad_norm": 0.67578125, "learning_rate": 0.00019581439863234918, "loss": 4.5259, "step": 2683 }, { "epoch": 0.27829873824598694, "grad_norm": 0.74609375, "learning_rate": 0.00019581128833630435, "loss": 4.4464, "step": 2684 }, { "epoch": 0.278402426300475, "grad_norm": 0.79296875, "learning_rate": 0.0001958081769097876, "loss": 4.5156, "step": 2685 }, { "epoch": 0.2785061143549631, "grad_norm": 0.64453125, "learning_rate": 0.00019580506435283566, "loss": 4.4865, "step": 2686 }, { "epoch": 0.27860980240945116, "grad_norm": 0.6484375, "learning_rate": 0.00019580195066548522, "loss": 4.4432, "step": 2687 }, { "epoch": 0.27871349046393923, "grad_norm": 0.65625, "learning_rate": 0.000195798835847773, "loss": 4.476, "step": 2688 }, { "epoch": 0.2788171785184273, "grad_norm": 0.68359375, "learning_rate": 0.00019579571989973582, "loss": 4.5375, "step": 2689 }, { "epoch": 0.27892086657291537, "grad_norm": 0.77734375, "learning_rate": 0.00019579260282141038, "loss": 4.5139, "step": 2690 }, { "epoch": 0.27902455462740344, "grad_norm": 0.7578125, "learning_rate": 0.0001957894846128335, "loss": 4.5164, "step": 2691 }, { "epoch": 0.2791282426818915, "grad_norm": 0.5703125, "learning_rate": 0.00019578636527404193, "loss": 4.4868, "step": 2692 }, { "epoch": 0.2792319307363796, "grad_norm": 0.75390625, "learning_rate": 0.00019578324480507253, "loss": 4.4798, "step": 2693 }, { "epoch": 0.27933561879086766, "grad_norm": 0.70703125, "learning_rate": 0.0001957801232059621, "loss": 4.4708, "step": 2694 }, { "epoch": 0.27943930684535573, "grad_norm": 0.69921875, "learning_rate": 0.00019577700047674742, "loss": 4.5079, "step": 2695 }, { "epoch": 0.2795429948998438, "grad_norm": 0.6953125, "learning_rate": 0.00019577387661746538, "loss": 4.508, "step": 2696 }, { "epoch": 0.27964668295433187, "grad_norm": 0.72265625, "learning_rate": 0.00019577075162815284, "loss": 4.5315, "step": 2697 }, { "epoch": 0.27975037100881994, "grad_norm": 0.66796875, "learning_rate": 0.00019576762550884666, "loss": 4.4953, "step": 2698 }, { "epoch": 0.279854059063308, "grad_norm": 0.796875, "learning_rate": 0.00019576449825958373, "loss": 4.4543, "step": 2699 }, { "epoch": 0.2799577471177961, "grad_norm": 0.77734375, "learning_rate": 0.00019576136988040096, "loss": 4.4965, "step": 2700 }, { "epoch": 0.28006143517228416, "grad_norm": 0.77734375, "learning_rate": 0.00019575824037133525, "loss": 4.4783, "step": 2701 }, { "epoch": 0.28016512322677223, "grad_norm": 0.7578125, "learning_rate": 0.00019575510973242355, "loss": 4.5024, "step": 2702 }, { "epoch": 0.28026881128126035, "grad_norm": 0.65234375, "learning_rate": 0.00019575197796370273, "loss": 4.5101, "step": 2703 }, { "epoch": 0.2803724993357484, "grad_norm": 0.6796875, "learning_rate": 0.0001957488450652098, "loss": 4.469, "step": 2704 }, { "epoch": 0.2804761873902365, "grad_norm": 0.67578125, "learning_rate": 0.00019574571103698172, "loss": 4.533, "step": 2705 }, { "epoch": 0.28057987544472457, "grad_norm": 0.6640625, "learning_rate": 0.00019574257587905543, "loss": 4.4898, "step": 2706 }, { "epoch": 0.28068356349921264, "grad_norm": 0.58203125, "learning_rate": 0.00019573943959146797, "loss": 4.5083, "step": 2707 }, { "epoch": 0.2807872515537007, "grad_norm": 0.57421875, "learning_rate": 0.0001957363021742563, "loss": 4.5129, "step": 2708 }, { "epoch": 0.2808909396081888, "grad_norm": 0.52734375, "learning_rate": 0.0001957331636274575, "loss": 4.4601, "step": 2709 }, { "epoch": 0.28099462766267685, "grad_norm": 0.58203125, "learning_rate": 0.00019573002395110854, "loss": 4.4549, "step": 2710 }, { "epoch": 0.2810983157171649, "grad_norm": 0.6015625, "learning_rate": 0.00019572688314524648, "loss": 4.4636, "step": 2711 }, { "epoch": 0.281202003771653, "grad_norm": 0.58203125, "learning_rate": 0.0001957237412099084, "loss": 4.5096, "step": 2712 }, { "epoch": 0.28130569182614107, "grad_norm": 0.71484375, "learning_rate": 0.00019572059814513136, "loss": 4.4603, "step": 2713 }, { "epoch": 0.28140937988062914, "grad_norm": 0.6796875, "learning_rate": 0.00019571745395095244, "loss": 4.4586, "step": 2714 }, { "epoch": 0.2815130679351172, "grad_norm": 0.67578125, "learning_rate": 0.00019571430862740875, "loss": 4.4631, "step": 2715 }, { "epoch": 0.2816167559896053, "grad_norm": 0.65234375, "learning_rate": 0.00019571116217453736, "loss": 4.528, "step": 2716 }, { "epoch": 0.28172044404409335, "grad_norm": 0.81640625, "learning_rate": 0.00019570801459237543, "loss": 4.5021, "step": 2717 }, { "epoch": 0.2818241320985814, "grad_norm": 0.71875, "learning_rate": 0.00019570486588096014, "loss": 4.4842, "step": 2718 }, { "epoch": 0.2819278201530695, "grad_norm": 0.5703125, "learning_rate": 0.00019570171604032857, "loss": 4.4669, "step": 2719 }, { "epoch": 0.28203150820755757, "grad_norm": 0.7109375, "learning_rate": 0.00019569856507051792, "loss": 4.5322, "step": 2720 }, { "epoch": 0.28213519626204564, "grad_norm": 0.6953125, "learning_rate": 0.00019569541297156535, "loss": 4.4856, "step": 2721 }, { "epoch": 0.2822388843165337, "grad_norm": 0.71875, "learning_rate": 0.00019569225974350806, "loss": 4.4897, "step": 2722 }, { "epoch": 0.2823425723710218, "grad_norm": 0.77734375, "learning_rate": 0.00019568910538638327, "loss": 4.5146, "step": 2723 }, { "epoch": 0.28244626042550985, "grad_norm": 0.66796875, "learning_rate": 0.00019568594990022816, "loss": 4.5252, "step": 2724 }, { "epoch": 0.2825499484799979, "grad_norm": 0.75, "learning_rate": 0.00019568279328508, "loss": 4.5108, "step": 2725 }, { "epoch": 0.282653636534486, "grad_norm": 0.72265625, "learning_rate": 0.000195679635540976, "loss": 4.4586, "step": 2726 }, { "epoch": 0.28275732458897407, "grad_norm": 0.69140625, "learning_rate": 0.00019567647666795347, "loss": 4.4596, "step": 2727 }, { "epoch": 0.28286101264346214, "grad_norm": 0.734375, "learning_rate": 0.00019567331666604963, "loss": 4.506, "step": 2728 }, { "epoch": 0.2829647006979502, "grad_norm": 0.95703125, "learning_rate": 0.00019567015553530182, "loss": 4.4883, "step": 2729 }, { "epoch": 0.2830683887524383, "grad_norm": 1.015625, "learning_rate": 0.0001956669932757473, "loss": 4.4837, "step": 2730 }, { "epoch": 0.28317207680692635, "grad_norm": 0.8828125, "learning_rate": 0.00019566382988742332, "loss": 4.4863, "step": 2731 }, { "epoch": 0.2832757648614144, "grad_norm": 0.640625, "learning_rate": 0.00019566066537036734, "loss": 4.5071, "step": 2732 }, { "epoch": 0.2833794529159025, "grad_norm": 0.73046875, "learning_rate": 0.00019565749972461657, "loss": 4.5131, "step": 2733 }, { "epoch": 0.28348314097039057, "grad_norm": 0.90625, "learning_rate": 0.00019565433295020844, "loss": 4.4927, "step": 2734 }, { "epoch": 0.28358682902487864, "grad_norm": 0.82421875, "learning_rate": 0.0001956511650471803, "loss": 4.4971, "step": 2735 }, { "epoch": 0.2836905170793667, "grad_norm": 0.72265625, "learning_rate": 0.00019564799601556955, "loss": 4.4516, "step": 2736 }, { "epoch": 0.2837942051338548, "grad_norm": 0.62890625, "learning_rate": 0.00019564482585541351, "loss": 4.4599, "step": 2737 }, { "epoch": 0.28389789318834285, "grad_norm": 0.6875, "learning_rate": 0.0001956416545667496, "loss": 4.4894, "step": 2738 }, { "epoch": 0.2840015812428309, "grad_norm": 0.69921875, "learning_rate": 0.0001956384821496153, "loss": 4.5148, "step": 2739 }, { "epoch": 0.284105269297319, "grad_norm": 0.5625, "learning_rate": 0.00019563530860404802, "loss": 4.5061, "step": 2740 }, { "epoch": 0.28420895735180707, "grad_norm": 0.6796875, "learning_rate": 0.00019563213393008515, "loss": 4.4999, "step": 2741 }, { "epoch": 0.28431264540629514, "grad_norm": 0.72265625, "learning_rate": 0.0001956289581277642, "loss": 4.5199, "step": 2742 }, { "epoch": 0.2844163334607832, "grad_norm": 0.69921875, "learning_rate": 0.00019562578119712264, "loss": 4.5523, "step": 2743 }, { "epoch": 0.2845200215152713, "grad_norm": 0.68359375, "learning_rate": 0.00019562260313819795, "loss": 4.4767, "step": 2744 }, { "epoch": 0.28462370956975935, "grad_norm": 0.77734375, "learning_rate": 0.00019561942395102762, "loss": 4.5022, "step": 2745 }, { "epoch": 0.2847273976242474, "grad_norm": 0.859375, "learning_rate": 0.00019561624363564914, "loss": 4.4705, "step": 2746 }, { "epoch": 0.28483108567873555, "grad_norm": 0.9609375, "learning_rate": 0.00019561306219210005, "loss": 4.5097, "step": 2747 }, { "epoch": 0.2849347737332236, "grad_norm": 0.88671875, "learning_rate": 0.00019560987962041792, "loss": 4.4899, "step": 2748 }, { "epoch": 0.2850384617877117, "grad_norm": 0.62109375, "learning_rate": 0.00019560669592064026, "loss": 4.4619, "step": 2749 }, { "epoch": 0.28514214984219977, "grad_norm": 0.7734375, "learning_rate": 0.00019560351109280467, "loss": 4.4725, "step": 2750 }, { "epoch": 0.28524583789668784, "grad_norm": 0.75, "learning_rate": 0.00019560032513694865, "loss": 4.4604, "step": 2751 }, { "epoch": 0.2853495259511759, "grad_norm": 0.6875, "learning_rate": 0.0001955971380531099, "loss": 4.5025, "step": 2752 }, { "epoch": 0.285453214005664, "grad_norm": 0.76171875, "learning_rate": 0.00019559394984132596, "loss": 4.5058, "step": 2753 }, { "epoch": 0.28555690206015205, "grad_norm": 0.64453125, "learning_rate": 0.00019559076050163445, "loss": 4.4853, "step": 2754 }, { "epoch": 0.2856605901146401, "grad_norm": 0.71484375, "learning_rate": 0.00019558757003407303, "loss": 4.4615, "step": 2755 }, { "epoch": 0.2857642781691282, "grad_norm": 0.75, "learning_rate": 0.00019558437843867932, "loss": 4.4716, "step": 2756 }, { "epoch": 0.28586796622361627, "grad_norm": 0.71484375, "learning_rate": 0.00019558118571549096, "loss": 4.4484, "step": 2757 }, { "epoch": 0.28597165427810434, "grad_norm": 0.66796875, "learning_rate": 0.00019557799186454566, "loss": 4.5193, "step": 2758 }, { "epoch": 0.2860753423325924, "grad_norm": 0.81640625, "learning_rate": 0.00019557479688588108, "loss": 4.4887, "step": 2759 }, { "epoch": 0.2861790303870805, "grad_norm": 0.68359375, "learning_rate": 0.00019557160077953491, "loss": 4.4622, "step": 2760 }, { "epoch": 0.28628271844156855, "grad_norm": 0.7109375, "learning_rate": 0.0001955684035455449, "loss": 4.4998, "step": 2761 }, { "epoch": 0.2863864064960566, "grad_norm": 0.71484375, "learning_rate": 0.00019556520518394875, "loss": 4.5327, "step": 2762 }, { "epoch": 0.2864900945505447, "grad_norm": 0.8046875, "learning_rate": 0.00019556200569478417, "loss": 4.4883, "step": 2763 }, { "epoch": 0.28659378260503277, "grad_norm": 0.7421875, "learning_rate": 0.00019555880507808894, "loss": 4.5244, "step": 2764 }, { "epoch": 0.28669747065952084, "grad_norm": 0.7265625, "learning_rate": 0.00019555560333390083, "loss": 4.4726, "step": 2765 }, { "epoch": 0.2868011587140089, "grad_norm": 0.89453125, "learning_rate": 0.00019555240046225763, "loss": 4.457, "step": 2766 }, { "epoch": 0.286904846768497, "grad_norm": 1.1015625, "learning_rate": 0.00019554919646319708, "loss": 4.4616, "step": 2767 }, { "epoch": 0.28700853482298505, "grad_norm": 0.9453125, "learning_rate": 0.000195545991336757, "loss": 4.4876, "step": 2768 }, { "epoch": 0.2871122228774731, "grad_norm": 0.890625, "learning_rate": 0.00019554278508297524, "loss": 4.4903, "step": 2769 }, { "epoch": 0.2872159109319612, "grad_norm": 0.72265625, "learning_rate": 0.0001955395777018896, "loss": 4.5038, "step": 2770 }, { "epoch": 0.28731959898644926, "grad_norm": 0.95703125, "learning_rate": 0.00019553636919353794, "loss": 4.4924, "step": 2771 }, { "epoch": 0.28742328704093734, "grad_norm": 1.0703125, "learning_rate": 0.0001955331595579581, "loss": 4.4793, "step": 2772 }, { "epoch": 0.2875269750954254, "grad_norm": 0.73046875, "learning_rate": 0.00019552994879518798, "loss": 4.524, "step": 2773 }, { "epoch": 0.2876306631499135, "grad_norm": 0.78125, "learning_rate": 0.00019552673690526544, "loss": 4.5274, "step": 2774 }, { "epoch": 0.28773435120440155, "grad_norm": 0.83984375, "learning_rate": 0.00019552352388822834, "loss": 4.4656, "step": 2775 }, { "epoch": 0.2878380392588896, "grad_norm": 0.84375, "learning_rate": 0.0001955203097441147, "loss": 4.5221, "step": 2776 }, { "epoch": 0.2879417273133777, "grad_norm": 0.69140625, "learning_rate": 0.0001955170944729623, "loss": 4.4798, "step": 2777 }, { "epoch": 0.28804541536786576, "grad_norm": 0.7421875, "learning_rate": 0.00019551387807480918, "loss": 4.425, "step": 2778 }, { "epoch": 0.28814910342235384, "grad_norm": 0.8203125, "learning_rate": 0.0001955106605496933, "loss": 4.4931, "step": 2779 }, { "epoch": 0.2882527914768419, "grad_norm": 0.80078125, "learning_rate": 0.00019550744189765254, "loss": 4.5125, "step": 2780 }, { "epoch": 0.28835647953133, "grad_norm": 0.8515625, "learning_rate": 0.00019550422211872493, "loss": 4.4673, "step": 2781 }, { "epoch": 0.28846016758581805, "grad_norm": 0.7265625, "learning_rate": 0.00019550100121294844, "loss": 4.4965, "step": 2782 }, { "epoch": 0.2885638556403061, "grad_norm": 0.8359375, "learning_rate": 0.00019549777918036112, "loss": 4.5221, "step": 2783 }, { "epoch": 0.2886675436947942, "grad_norm": 0.85546875, "learning_rate": 0.00019549455602100094, "loss": 4.5056, "step": 2784 }, { "epoch": 0.28877123174928226, "grad_norm": 0.84375, "learning_rate": 0.00019549133173490593, "loss": 4.4957, "step": 2785 }, { "epoch": 0.28887491980377034, "grad_norm": 0.67578125, "learning_rate": 0.00019548810632211413, "loss": 4.5048, "step": 2786 }, { "epoch": 0.2889786078582584, "grad_norm": 0.796875, "learning_rate": 0.00019548487978266363, "loss": 4.462, "step": 2787 }, { "epoch": 0.2890822959127465, "grad_norm": 0.73046875, "learning_rate": 0.00019548165211659248, "loss": 4.4877, "step": 2788 }, { "epoch": 0.28918598396723455, "grad_norm": 0.75390625, "learning_rate": 0.00019547842332393874, "loss": 4.513, "step": 2789 }, { "epoch": 0.2892896720217226, "grad_norm": 0.8359375, "learning_rate": 0.00019547519340474057, "loss": 4.4637, "step": 2790 }, { "epoch": 0.2893933600762107, "grad_norm": 0.79296875, "learning_rate": 0.00019547196235903603, "loss": 4.4883, "step": 2791 }, { "epoch": 0.2894970481306988, "grad_norm": 0.73046875, "learning_rate": 0.00019546873018686322, "loss": 4.4691, "step": 2792 }, { "epoch": 0.2896007361851869, "grad_norm": 0.71875, "learning_rate": 0.00019546549688826038, "loss": 4.4894, "step": 2793 }, { "epoch": 0.28970442423967496, "grad_norm": 0.67578125, "learning_rate": 0.0001954622624632655, "loss": 4.5193, "step": 2794 }, { "epoch": 0.28980811229416303, "grad_norm": 0.90625, "learning_rate": 0.00019545902691191688, "loss": 4.5056, "step": 2795 }, { "epoch": 0.2899118003486511, "grad_norm": 0.8515625, "learning_rate": 0.00019545579023425263, "loss": 4.4285, "step": 2796 }, { "epoch": 0.2900154884031392, "grad_norm": 0.65234375, "learning_rate": 0.00019545255243031098, "loss": 4.4977, "step": 2797 }, { "epoch": 0.29011917645762725, "grad_norm": 0.93359375, "learning_rate": 0.0001954493135001301, "loss": 4.442, "step": 2798 }, { "epoch": 0.2902228645121153, "grad_norm": 0.93359375, "learning_rate": 0.0001954460734437482, "loss": 4.4989, "step": 2799 }, { "epoch": 0.2903265525666034, "grad_norm": 1.078125, "learning_rate": 0.00019544283226120352, "loss": 4.5369, "step": 2800 }, { "epoch": 0.29043024062109146, "grad_norm": 0.80859375, "learning_rate": 0.00019543958995253433, "loss": 4.4906, "step": 2801 }, { "epoch": 0.29053392867557953, "grad_norm": 0.76953125, "learning_rate": 0.00019543634651777886, "loss": 4.489, "step": 2802 }, { "epoch": 0.2906376167300676, "grad_norm": 1.0625, "learning_rate": 0.00019543310195697537, "loss": 4.472, "step": 2803 }, { "epoch": 0.2907413047845557, "grad_norm": 0.8828125, "learning_rate": 0.00019542985627016214, "loss": 4.464, "step": 2804 }, { "epoch": 0.29084499283904375, "grad_norm": 0.74609375, "learning_rate": 0.00019542660945737753, "loss": 4.4578, "step": 2805 }, { "epoch": 0.2909486808935318, "grad_norm": 0.77734375, "learning_rate": 0.00019542336151865975, "loss": 4.5206, "step": 2806 }, { "epoch": 0.2910523689480199, "grad_norm": 0.8515625, "learning_rate": 0.00019542011245404716, "loss": 4.4985, "step": 2807 }, { "epoch": 0.29115605700250796, "grad_norm": 0.8984375, "learning_rate": 0.00019541686226357813, "loss": 4.4695, "step": 2808 }, { "epoch": 0.29125974505699603, "grad_norm": 0.76171875, "learning_rate": 0.00019541361094729102, "loss": 4.4816, "step": 2809 }, { "epoch": 0.2913634331114841, "grad_norm": 0.7578125, "learning_rate": 0.00019541035850522412, "loss": 4.4805, "step": 2810 }, { "epoch": 0.2914671211659722, "grad_norm": 1.046875, "learning_rate": 0.00019540710493741586, "loss": 4.5198, "step": 2811 }, { "epoch": 0.29157080922046025, "grad_norm": 0.9921875, "learning_rate": 0.00019540385024390458, "loss": 4.4809, "step": 2812 }, { "epoch": 0.2916744972749483, "grad_norm": 0.87890625, "learning_rate": 0.00019540059442472874, "loss": 4.4642, "step": 2813 }, { "epoch": 0.2917781853294364, "grad_norm": 0.98828125, "learning_rate": 0.00019539733747992672, "loss": 4.5016, "step": 2814 }, { "epoch": 0.29188187338392446, "grad_norm": 0.99609375, "learning_rate": 0.00019539407940953696, "loss": 4.4307, "step": 2815 }, { "epoch": 0.29198556143841253, "grad_norm": 0.80078125, "learning_rate": 0.0001953908202135979, "loss": 4.4877, "step": 2816 }, { "epoch": 0.2920892494929006, "grad_norm": 0.72265625, "learning_rate": 0.000195387559892148, "loss": 4.4839, "step": 2817 }, { "epoch": 0.2921929375473887, "grad_norm": 0.7265625, "learning_rate": 0.0001953842984452257, "loss": 4.4767, "step": 2818 }, { "epoch": 0.29229662560187675, "grad_norm": 0.71484375, "learning_rate": 0.0001953810358728695, "loss": 4.4613, "step": 2819 }, { "epoch": 0.2924003136563648, "grad_norm": 0.6640625, "learning_rate": 0.00019537777217511795, "loss": 4.4937, "step": 2820 }, { "epoch": 0.2925040017108529, "grad_norm": 0.6640625, "learning_rate": 0.00019537450735200947, "loss": 4.4661, "step": 2821 }, { "epoch": 0.29260768976534096, "grad_norm": 0.85546875, "learning_rate": 0.0001953712414035826, "loss": 4.5173, "step": 2822 }, { "epoch": 0.29271137781982903, "grad_norm": 0.74609375, "learning_rate": 0.00019536797432987592, "loss": 4.5015, "step": 2823 }, { "epoch": 0.2928150658743171, "grad_norm": 0.63671875, "learning_rate": 0.00019536470613092796, "loss": 4.4875, "step": 2824 }, { "epoch": 0.2929187539288052, "grad_norm": 0.6328125, "learning_rate": 0.00019536143680677726, "loss": 4.464, "step": 2825 }, { "epoch": 0.29302244198329325, "grad_norm": 0.71875, "learning_rate": 0.00019535816635746241, "loss": 4.4916, "step": 2826 }, { "epoch": 0.2931261300377813, "grad_norm": 0.7109375, "learning_rate": 0.00019535489478302197, "loss": 4.491, "step": 2827 }, { "epoch": 0.2932298180922694, "grad_norm": 0.72265625, "learning_rate": 0.0001953516220834946, "loss": 4.5309, "step": 2828 }, { "epoch": 0.29333350614675746, "grad_norm": 0.70703125, "learning_rate": 0.00019534834825891886, "loss": 4.4935, "step": 2829 }, { "epoch": 0.29343719420124553, "grad_norm": 0.80859375, "learning_rate": 0.00019534507330933344, "loss": 4.447, "step": 2830 }, { "epoch": 0.2935408822557336, "grad_norm": 0.73046875, "learning_rate": 0.0001953417972347769, "loss": 4.4797, "step": 2831 }, { "epoch": 0.2936445703102217, "grad_norm": 0.66015625, "learning_rate": 0.00019533852003528792, "loss": 4.4689, "step": 2832 }, { "epoch": 0.29374825836470975, "grad_norm": 0.71484375, "learning_rate": 0.0001953352417109052, "loss": 4.5026, "step": 2833 }, { "epoch": 0.2938519464191978, "grad_norm": 0.6875, "learning_rate": 0.00019533196226166738, "loss": 4.4699, "step": 2834 }, { "epoch": 0.2939556344736859, "grad_norm": 0.69921875, "learning_rate": 0.00019532868168761322, "loss": 4.5083, "step": 2835 }, { "epoch": 0.29405932252817396, "grad_norm": 0.69921875, "learning_rate": 0.00019532539998878137, "loss": 4.4928, "step": 2836 }, { "epoch": 0.2941630105826621, "grad_norm": 0.69140625, "learning_rate": 0.00019532211716521053, "loss": 4.492, "step": 2837 }, { "epoch": 0.29426669863715016, "grad_norm": 0.6796875, "learning_rate": 0.0001953188332169395, "loss": 4.4951, "step": 2838 }, { "epoch": 0.29437038669163823, "grad_norm": 0.68359375, "learning_rate": 0.000195315548144007, "loss": 4.4724, "step": 2839 }, { "epoch": 0.2944740747461263, "grad_norm": 0.67578125, "learning_rate": 0.00019531226194645176, "loss": 4.4568, "step": 2840 }, { "epoch": 0.2945777628006144, "grad_norm": 0.67578125, "learning_rate": 0.00019530897462431256, "loss": 4.4589, "step": 2841 }, { "epoch": 0.29468145085510244, "grad_norm": 0.6875, "learning_rate": 0.00019530568617762825, "loss": 4.4916, "step": 2842 }, { "epoch": 0.2947851389095905, "grad_norm": 0.6875, "learning_rate": 0.00019530239660643756, "loss": 4.4933, "step": 2843 }, { "epoch": 0.2948888269640786, "grad_norm": 0.64453125, "learning_rate": 0.00019529910591077932, "loss": 4.4446, "step": 2844 }, { "epoch": 0.29499251501856666, "grad_norm": 0.64453125, "learning_rate": 0.0001952958140906924, "loss": 4.4681, "step": 2845 }, { "epoch": 0.29509620307305473, "grad_norm": 0.82421875, "learning_rate": 0.00019529252114621558, "loss": 4.4875, "step": 2846 }, { "epoch": 0.2951998911275428, "grad_norm": 0.76953125, "learning_rate": 0.00019528922707738776, "loss": 4.4859, "step": 2847 }, { "epoch": 0.2953035791820309, "grad_norm": 0.62109375, "learning_rate": 0.00019528593188424779, "loss": 4.4818, "step": 2848 }, { "epoch": 0.29540726723651894, "grad_norm": 0.69140625, "learning_rate": 0.0001952826355668345, "loss": 4.4778, "step": 2849 }, { "epoch": 0.295510955291007, "grad_norm": 0.703125, "learning_rate": 0.00019527933812518686, "loss": 4.4353, "step": 2850 }, { "epoch": 0.2956146433454951, "grad_norm": 0.62109375, "learning_rate": 0.00019527603955934376, "loss": 4.5151, "step": 2851 }, { "epoch": 0.29571833139998316, "grad_norm": 0.73828125, "learning_rate": 0.0001952727398693441, "loss": 4.508, "step": 2852 }, { "epoch": 0.29582201945447123, "grad_norm": 0.73828125, "learning_rate": 0.00019526943905522678, "loss": 4.5373, "step": 2853 }, { "epoch": 0.2959257075089593, "grad_norm": 0.55078125, "learning_rate": 0.00019526613711703082, "loss": 4.5151, "step": 2854 }, { "epoch": 0.2960293955634474, "grad_norm": 0.73046875, "learning_rate": 0.00019526283405479512, "loss": 4.49, "step": 2855 }, { "epoch": 0.29613308361793544, "grad_norm": 0.68359375, "learning_rate": 0.00019525952986855868, "loss": 4.4924, "step": 2856 }, { "epoch": 0.2962367716724235, "grad_norm": 0.609375, "learning_rate": 0.0001952562245583605, "loss": 4.4681, "step": 2857 }, { "epoch": 0.2963404597269116, "grad_norm": 0.68359375, "learning_rate": 0.00019525291812423954, "loss": 4.4637, "step": 2858 }, { "epoch": 0.29644414778139966, "grad_norm": 0.671875, "learning_rate": 0.00019524961056623482, "loss": 4.4826, "step": 2859 }, { "epoch": 0.29654783583588773, "grad_norm": 0.62109375, "learning_rate": 0.0001952463018843854, "loss": 4.4995, "step": 2860 }, { "epoch": 0.2966515238903758, "grad_norm": 0.76171875, "learning_rate": 0.0001952429920787303, "loss": 4.4589, "step": 2861 }, { "epoch": 0.2967552119448639, "grad_norm": 0.80078125, "learning_rate": 0.00019523968114930858, "loss": 4.49, "step": 2862 }, { "epoch": 0.29685889999935194, "grad_norm": 0.88671875, "learning_rate": 0.00019523636909615926, "loss": 4.4875, "step": 2863 }, { "epoch": 0.29696258805384, "grad_norm": 0.7890625, "learning_rate": 0.00019523305591932148, "loss": 4.48, "step": 2864 }, { "epoch": 0.2970662761083281, "grad_norm": 0.625, "learning_rate": 0.00019522974161883432, "loss": 4.5249, "step": 2865 }, { "epoch": 0.29716996416281616, "grad_norm": 0.78125, "learning_rate": 0.00019522642619473684, "loss": 4.4829, "step": 2866 }, { "epoch": 0.29727365221730423, "grad_norm": 0.76171875, "learning_rate": 0.0001952231096470682, "loss": 4.4608, "step": 2867 }, { "epoch": 0.2973773402717923, "grad_norm": 0.8828125, "learning_rate": 0.0001952197919758675, "loss": 4.4793, "step": 2868 }, { "epoch": 0.2974810283262804, "grad_norm": 0.7421875, "learning_rate": 0.00019521647318117394, "loss": 4.4322, "step": 2869 }, { "epoch": 0.29758471638076844, "grad_norm": 0.64453125, "learning_rate": 0.00019521315326302664, "loss": 4.4672, "step": 2870 }, { "epoch": 0.2976884044352565, "grad_norm": 0.8203125, "learning_rate": 0.00019520983222146476, "loss": 4.4932, "step": 2871 }, { "epoch": 0.2977920924897446, "grad_norm": 0.7265625, "learning_rate": 0.0001952065100565275, "loss": 4.4657, "step": 2872 }, { "epoch": 0.29789578054423266, "grad_norm": 0.6484375, "learning_rate": 0.0001952031867682541, "loss": 4.4955, "step": 2873 }, { "epoch": 0.29799946859872073, "grad_norm": 0.94921875, "learning_rate": 0.0001951998623566837, "loss": 4.487, "step": 2874 }, { "epoch": 0.2981031566532088, "grad_norm": 1.078125, "learning_rate": 0.00019519653682185552, "loss": 4.4835, "step": 2875 }, { "epoch": 0.29820684470769687, "grad_norm": 0.81640625, "learning_rate": 0.00019519321016380888, "loss": 4.4324, "step": 2876 }, { "epoch": 0.29831053276218494, "grad_norm": 0.74609375, "learning_rate": 0.00019518988238258298, "loss": 4.5012, "step": 2877 }, { "epoch": 0.298414220816673, "grad_norm": 0.8515625, "learning_rate": 0.00019518655347821709, "loss": 4.4762, "step": 2878 }, { "epoch": 0.2985179088711611, "grad_norm": 0.96875, "learning_rate": 0.00019518322345075047, "loss": 4.4632, "step": 2879 }, { "epoch": 0.29862159692564916, "grad_norm": 0.8671875, "learning_rate": 0.00019517989230022242, "loss": 4.437, "step": 2880 }, { "epoch": 0.2987252849801373, "grad_norm": 0.828125, "learning_rate": 0.00019517656002667226, "loss": 4.4664, "step": 2881 }, { "epoch": 0.29882897303462536, "grad_norm": 0.765625, "learning_rate": 0.00019517322663013928, "loss": 4.496, "step": 2882 }, { "epoch": 0.2989326610891134, "grad_norm": 0.6953125, "learning_rate": 0.00019516989211066285, "loss": 4.4569, "step": 2883 }, { "epoch": 0.2990363491436015, "grad_norm": 1.046875, "learning_rate": 0.0001951665564682823, "loss": 4.4752, "step": 2884 }, { "epoch": 0.29914003719808957, "grad_norm": 0.828125, "learning_rate": 0.00019516321970303695, "loss": 4.5071, "step": 2885 }, { "epoch": 0.29924372525257764, "grad_norm": 0.73046875, "learning_rate": 0.00019515988181496624, "loss": 4.4836, "step": 2886 }, { "epoch": 0.2993474133070657, "grad_norm": 0.92578125, "learning_rate": 0.00019515654280410945, "loss": 4.4671, "step": 2887 }, { "epoch": 0.2994511013615538, "grad_norm": 1.1484375, "learning_rate": 0.00019515320267050613, "loss": 4.4976, "step": 2888 }, { "epoch": 0.29955478941604186, "grad_norm": 0.6640625, "learning_rate": 0.00019514986141419552, "loss": 4.4499, "step": 2889 }, { "epoch": 0.2996584774705299, "grad_norm": 0.8515625, "learning_rate": 0.00019514651903521717, "loss": 4.5257, "step": 2890 }, { "epoch": 0.299762165525018, "grad_norm": 0.9921875, "learning_rate": 0.00019514317553361043, "loss": 4.4424, "step": 2891 }, { "epoch": 0.29986585357950607, "grad_norm": 0.84375, "learning_rate": 0.00019513983090941483, "loss": 4.4534, "step": 2892 }, { "epoch": 0.29996954163399414, "grad_norm": 0.79296875, "learning_rate": 0.00019513648516266975, "loss": 4.4862, "step": 2893 }, { "epoch": 0.3000732296884822, "grad_norm": 1.0625, "learning_rate": 0.00019513313829341473, "loss": 4.5139, "step": 2894 }, { "epoch": 0.3001769177429703, "grad_norm": 0.63671875, "learning_rate": 0.00019512979030168927, "loss": 4.4577, "step": 2895 }, { "epoch": 0.30028060579745836, "grad_norm": 1.046875, "learning_rate": 0.00019512644118753277, "loss": 4.4544, "step": 2896 }, { "epoch": 0.3003842938519464, "grad_norm": 0.94140625, "learning_rate": 0.0001951230909509849, "loss": 4.4591, "step": 2897 }, { "epoch": 0.3004879819064345, "grad_norm": 0.85546875, "learning_rate": 0.00019511973959208506, "loss": 4.5211, "step": 2898 }, { "epoch": 0.30059166996092257, "grad_norm": 1.0625, "learning_rate": 0.00019511638711087282, "loss": 4.4828, "step": 2899 }, { "epoch": 0.30069535801541064, "grad_norm": 1.1875, "learning_rate": 0.0001951130335073878, "loss": 4.4544, "step": 2900 }, { "epoch": 0.3007990460698987, "grad_norm": 0.7734375, "learning_rate": 0.00019510967878166947, "loss": 4.4558, "step": 2901 }, { "epoch": 0.3009027341243868, "grad_norm": 1.1484375, "learning_rate": 0.00019510632293375752, "loss": 4.4773, "step": 2902 }, { "epoch": 0.30100642217887486, "grad_norm": 0.9296875, "learning_rate": 0.00019510296596369147, "loss": 4.4952, "step": 2903 }, { "epoch": 0.3011101102333629, "grad_norm": 1.0625, "learning_rate": 0.00019509960787151095, "loss": 4.5134, "step": 2904 }, { "epoch": 0.301213798287851, "grad_norm": 0.84375, "learning_rate": 0.00019509624865725558, "loss": 4.4672, "step": 2905 }, { "epoch": 0.30131748634233907, "grad_norm": 1.0703125, "learning_rate": 0.00019509288832096497, "loss": 4.5035, "step": 2906 }, { "epoch": 0.30142117439682714, "grad_norm": 0.83203125, "learning_rate": 0.00019508952686267885, "loss": 4.42, "step": 2907 }, { "epoch": 0.3015248624513152, "grad_norm": 0.90234375, "learning_rate": 0.00019508616428243677, "loss": 4.465, "step": 2908 }, { "epoch": 0.3016285505058033, "grad_norm": 0.76953125, "learning_rate": 0.0001950828005802785, "loss": 4.4657, "step": 2909 }, { "epoch": 0.30173223856029135, "grad_norm": 0.65234375, "learning_rate": 0.0001950794357562437, "loss": 4.484, "step": 2910 }, { "epoch": 0.3018359266147794, "grad_norm": 0.81640625, "learning_rate": 0.00019507606981037203, "loss": 4.4433, "step": 2911 }, { "epoch": 0.3019396146692675, "grad_norm": 0.83984375, "learning_rate": 0.00019507270274270324, "loss": 4.4762, "step": 2912 }, { "epoch": 0.30204330272375557, "grad_norm": 0.7109375, "learning_rate": 0.00019506933455327706, "loss": 4.5061, "step": 2913 }, { "epoch": 0.30214699077824364, "grad_norm": 0.76953125, "learning_rate": 0.00019506596524213325, "loss": 4.4683, "step": 2914 }, { "epoch": 0.3022506788327317, "grad_norm": 0.73828125, "learning_rate": 0.0001950625948093115, "loss": 4.4668, "step": 2915 }, { "epoch": 0.3023543668872198, "grad_norm": 0.71875, "learning_rate": 0.00019505922325485165, "loss": 4.4326, "step": 2916 }, { "epoch": 0.30245805494170785, "grad_norm": 0.64453125, "learning_rate": 0.00019505585057879343, "loss": 4.4932, "step": 2917 }, { "epoch": 0.3025617429961959, "grad_norm": 0.7890625, "learning_rate": 0.00019505247678117663, "loss": 4.4537, "step": 2918 }, { "epoch": 0.302665431050684, "grad_norm": 0.6875, "learning_rate": 0.0001950491018620411, "loss": 4.4999, "step": 2919 }, { "epoch": 0.30276911910517207, "grad_norm": 0.82421875, "learning_rate": 0.00019504572582142667, "loss": 4.4697, "step": 2920 }, { "epoch": 0.30287280715966014, "grad_norm": 0.60546875, "learning_rate": 0.0001950423486593731, "loss": 4.4829, "step": 2921 }, { "epoch": 0.3029764952141482, "grad_norm": 0.7890625, "learning_rate": 0.0001950389703759203, "loss": 4.4401, "step": 2922 }, { "epoch": 0.3030801832686363, "grad_norm": 0.640625, "learning_rate": 0.00019503559097110807, "loss": 4.4518, "step": 2923 }, { "epoch": 0.30318387132312435, "grad_norm": 0.74609375, "learning_rate": 0.00019503221044497637, "loss": 4.4695, "step": 2924 }, { "epoch": 0.3032875593776124, "grad_norm": 0.7578125, "learning_rate": 0.00019502882879756503, "loss": 4.4743, "step": 2925 }, { "epoch": 0.30339124743210055, "grad_norm": 0.6953125, "learning_rate": 0.00019502544602891395, "loss": 4.5007, "step": 2926 }, { "epoch": 0.3034949354865886, "grad_norm": 0.7265625, "learning_rate": 0.00019502206213906306, "loss": 4.4697, "step": 2927 }, { "epoch": 0.3035986235410767, "grad_norm": 0.6484375, "learning_rate": 0.0001950186771280523, "loss": 4.4931, "step": 2928 }, { "epoch": 0.30370231159556477, "grad_norm": 0.75, "learning_rate": 0.00019501529099592155, "loss": 4.4786, "step": 2929 }, { "epoch": 0.30380599965005284, "grad_norm": 0.82421875, "learning_rate": 0.0001950119037427108, "loss": 4.4402, "step": 2930 }, { "epoch": 0.3039096877045409, "grad_norm": 0.6640625, "learning_rate": 0.00019500851536846008, "loss": 4.4227, "step": 2931 }, { "epoch": 0.304013375759029, "grad_norm": 0.609375, "learning_rate": 0.00019500512587320926, "loss": 4.4456, "step": 2932 }, { "epoch": 0.30411706381351705, "grad_norm": 0.640625, "learning_rate": 0.0001950017352569984, "loss": 4.4523, "step": 2933 }, { "epoch": 0.3042207518680051, "grad_norm": 0.66796875, "learning_rate": 0.00019499834351986746, "loss": 4.453, "step": 2934 }, { "epoch": 0.3043244399224932, "grad_norm": 0.67578125, "learning_rate": 0.00019499495066185646, "loss": 4.4963, "step": 2935 }, { "epoch": 0.30442812797698127, "grad_norm": 0.7109375, "learning_rate": 0.0001949915566830055, "loss": 4.4715, "step": 2936 }, { "epoch": 0.30453181603146934, "grad_norm": 0.703125, "learning_rate": 0.00019498816158335458, "loss": 4.4423, "step": 2937 }, { "epoch": 0.3046355040859574, "grad_norm": 0.640625, "learning_rate": 0.00019498476536294375, "loss": 4.4536, "step": 2938 }, { "epoch": 0.3047391921404455, "grad_norm": 0.65625, "learning_rate": 0.0001949813680218131, "loss": 4.4166, "step": 2939 }, { "epoch": 0.30484288019493355, "grad_norm": 0.6875, "learning_rate": 0.0001949779695600027, "loss": 4.4335, "step": 2940 }, { "epoch": 0.3049465682494216, "grad_norm": 0.6484375, "learning_rate": 0.00019497456997755264, "loss": 4.4783, "step": 2941 }, { "epoch": 0.3050502563039097, "grad_norm": 0.73046875, "learning_rate": 0.00019497116927450305, "loss": 4.474, "step": 2942 }, { "epoch": 0.30515394435839777, "grad_norm": 0.62890625, "learning_rate": 0.00019496776745089406, "loss": 4.4656, "step": 2943 }, { "epoch": 0.30525763241288584, "grad_norm": 0.69140625, "learning_rate": 0.0001949643645067658, "loss": 4.4925, "step": 2944 }, { "epoch": 0.3053613204673739, "grad_norm": 0.7265625, "learning_rate": 0.00019496096044215847, "loss": 4.4422, "step": 2945 }, { "epoch": 0.305465008521862, "grad_norm": 0.64453125, "learning_rate": 0.00019495755525711212, "loss": 4.4618, "step": 2946 }, { "epoch": 0.30556869657635005, "grad_norm": 0.8125, "learning_rate": 0.000194954148951667, "loss": 4.4711, "step": 2947 }, { "epoch": 0.3056723846308381, "grad_norm": 0.578125, "learning_rate": 0.0001949507415258633, "loss": 4.45, "step": 2948 }, { "epoch": 0.3057760726853262, "grad_norm": 0.7265625, "learning_rate": 0.00019494733297974125, "loss": 4.428, "step": 2949 }, { "epoch": 0.30587976073981427, "grad_norm": 0.71484375, "learning_rate": 0.000194943923313341, "loss": 4.451, "step": 2950 }, { "epoch": 0.30598344879430234, "grad_norm": 0.6328125, "learning_rate": 0.0001949405125267028, "loss": 4.4983, "step": 2951 }, { "epoch": 0.3060871368487904, "grad_norm": 0.8984375, "learning_rate": 0.00019493710061986694, "loss": 4.4957, "step": 2952 }, { "epoch": 0.3061908249032785, "grad_norm": 0.90625, "learning_rate": 0.00019493368759287361, "loss": 4.4796, "step": 2953 }, { "epoch": 0.30629451295776655, "grad_norm": 0.76171875, "learning_rate": 0.00019493027344576316, "loss": 4.4595, "step": 2954 }, { "epoch": 0.3063982010122546, "grad_norm": 0.8359375, "learning_rate": 0.0001949268581785758, "loss": 4.4666, "step": 2955 }, { "epoch": 0.3065018890667427, "grad_norm": 0.84765625, "learning_rate": 0.00019492344179135188, "loss": 4.498, "step": 2956 }, { "epoch": 0.30660557712123077, "grad_norm": 1.0078125, "learning_rate": 0.00019492002428413168, "loss": 4.4927, "step": 2957 }, { "epoch": 0.30670926517571884, "grad_norm": 1.0078125, "learning_rate": 0.0001949166056569555, "loss": 4.4527, "step": 2958 }, { "epoch": 0.3068129532302069, "grad_norm": 0.921875, "learning_rate": 0.0001949131859098637, "loss": 4.4533, "step": 2959 }, { "epoch": 0.306916641284695, "grad_norm": 0.984375, "learning_rate": 0.00019490976504289668, "loss": 4.4634, "step": 2960 }, { "epoch": 0.30702032933918305, "grad_norm": 0.8984375, "learning_rate": 0.00019490634305609471, "loss": 4.5015, "step": 2961 }, { "epoch": 0.3071240173936711, "grad_norm": 0.9140625, "learning_rate": 0.00019490291994949828, "loss": 4.4517, "step": 2962 }, { "epoch": 0.3072277054481592, "grad_norm": 0.8828125, "learning_rate": 0.00019489949572314765, "loss": 4.4849, "step": 2963 }, { "epoch": 0.30733139350264727, "grad_norm": 0.6796875, "learning_rate": 0.0001948960703770833, "loss": 4.4918, "step": 2964 }, { "epoch": 0.30743508155713534, "grad_norm": 0.9609375, "learning_rate": 0.00019489264391134563, "loss": 4.4788, "step": 2965 }, { "epoch": 0.3075387696116234, "grad_norm": 0.84375, "learning_rate": 0.00019488921632597505, "loss": 4.5025, "step": 2966 }, { "epoch": 0.3076424576661115, "grad_norm": 0.83984375, "learning_rate": 0.00019488578762101203, "loss": 4.467, "step": 2967 }, { "epoch": 0.30774614572059955, "grad_norm": 0.8125, "learning_rate": 0.00019488235779649703, "loss": 4.4541, "step": 2968 }, { "epoch": 0.3078498337750876, "grad_norm": 0.78125, "learning_rate": 0.0001948789268524705, "loss": 4.4652, "step": 2969 }, { "epoch": 0.3079535218295757, "grad_norm": 0.7890625, "learning_rate": 0.0001948754947889729, "loss": 4.4786, "step": 2970 }, { "epoch": 0.3080572098840638, "grad_norm": 0.859375, "learning_rate": 0.0001948720616060448, "loss": 4.4873, "step": 2971 }, { "epoch": 0.3081608979385519, "grad_norm": 0.75, "learning_rate": 0.0001948686273037266, "loss": 4.4651, "step": 2972 }, { "epoch": 0.30826458599303996, "grad_norm": 0.78515625, "learning_rate": 0.00019486519188205892, "loss": 4.5188, "step": 2973 }, { "epoch": 0.30836827404752803, "grad_norm": 0.86328125, "learning_rate": 0.00019486175534108225, "loss": 4.5097, "step": 2974 }, { "epoch": 0.3084719621020161, "grad_norm": 0.8046875, "learning_rate": 0.00019485831768083713, "loss": 4.4577, "step": 2975 }, { "epoch": 0.3085756501565042, "grad_norm": 0.73828125, "learning_rate": 0.00019485487890136412, "loss": 4.4674, "step": 2976 }, { "epoch": 0.30867933821099225, "grad_norm": 0.875, "learning_rate": 0.00019485143900270383, "loss": 4.4575, "step": 2977 }, { "epoch": 0.3087830262654803, "grad_norm": 0.88671875, "learning_rate": 0.00019484799798489676, "loss": 4.5353, "step": 2978 }, { "epoch": 0.3088867143199684, "grad_norm": 0.97265625, "learning_rate": 0.00019484455584798361, "loss": 4.4304, "step": 2979 }, { "epoch": 0.30899040237445646, "grad_norm": 0.95703125, "learning_rate": 0.000194841112592005, "loss": 4.4927, "step": 2980 }, { "epoch": 0.30909409042894453, "grad_norm": 0.87109375, "learning_rate": 0.00019483766821700146, "loss": 4.483, "step": 2981 }, { "epoch": 0.3091977784834326, "grad_norm": 0.66015625, "learning_rate": 0.0001948342227230137, "loss": 4.4858, "step": 2982 }, { "epoch": 0.3093014665379207, "grad_norm": 0.9453125, "learning_rate": 0.00019483077611008235, "loss": 4.4839, "step": 2983 }, { "epoch": 0.30940515459240875, "grad_norm": 0.9609375, "learning_rate": 0.0001948273283782481, "loss": 4.4471, "step": 2984 }, { "epoch": 0.3095088426468968, "grad_norm": 0.625, "learning_rate": 0.0001948238795275516, "loss": 4.4844, "step": 2985 }, { "epoch": 0.3096125307013849, "grad_norm": 0.78125, "learning_rate": 0.00019482042955803355, "loss": 4.4448, "step": 2986 }, { "epoch": 0.30971621875587296, "grad_norm": 0.890625, "learning_rate": 0.00019481697846973465, "loss": 4.4542, "step": 2987 }, { "epoch": 0.30981990681036103, "grad_norm": 0.73828125, "learning_rate": 0.00019481352626269565, "loss": 4.4617, "step": 2988 }, { "epoch": 0.3099235948648491, "grad_norm": 0.875, "learning_rate": 0.00019481007293695727, "loss": 4.458, "step": 2989 }, { "epoch": 0.3100272829193372, "grad_norm": 0.8203125, "learning_rate": 0.00019480661849256023, "loss": 4.4468, "step": 2990 }, { "epoch": 0.31013097097382525, "grad_norm": 0.89453125, "learning_rate": 0.0001948031629295453, "loss": 4.4601, "step": 2991 }, { "epoch": 0.3102346590283133, "grad_norm": 0.83984375, "learning_rate": 0.00019479970624795327, "loss": 4.4764, "step": 2992 }, { "epoch": 0.3103383470828014, "grad_norm": 0.69921875, "learning_rate": 0.0001947962484478249, "loss": 4.4696, "step": 2993 }, { "epoch": 0.31044203513728946, "grad_norm": 0.87109375, "learning_rate": 0.00019479278952920102, "loss": 4.477, "step": 2994 }, { "epoch": 0.31054572319177753, "grad_norm": 0.94140625, "learning_rate": 0.00019478932949212245, "loss": 4.4776, "step": 2995 }, { "epoch": 0.3106494112462656, "grad_norm": 0.68359375, "learning_rate": 0.00019478586833662995, "loss": 4.4574, "step": 2996 }, { "epoch": 0.3107530993007537, "grad_norm": 0.75390625, "learning_rate": 0.00019478240606276442, "loss": 4.4864, "step": 2997 }, { "epoch": 0.31085678735524175, "grad_norm": 0.91796875, "learning_rate": 0.00019477894267056666, "loss": 4.368, "step": 2998 }, { "epoch": 0.3109604754097298, "grad_norm": 0.84765625, "learning_rate": 0.00019477547816007756, "loss": 4.4526, "step": 2999 }, { "epoch": 0.3110641634642179, "grad_norm": 0.7421875, "learning_rate": 0.000194772012531338, "loss": 4.4717, "step": 3000 }, { "epoch": 0.31116785151870596, "grad_norm": 0.77734375, "learning_rate": 0.0001947685457843889, "loss": 4.4854, "step": 3001 }, { "epoch": 0.31127153957319403, "grad_norm": 0.76171875, "learning_rate": 0.00019476507791927112, "loss": 4.4169, "step": 3002 }, { "epoch": 0.3113752276276821, "grad_norm": 0.89453125, "learning_rate": 0.0001947616089360256, "loss": 4.4557, "step": 3003 }, { "epoch": 0.3114789156821702, "grad_norm": 0.86328125, "learning_rate": 0.00019475813883469326, "loss": 4.4646, "step": 3004 }, { "epoch": 0.31158260373665825, "grad_norm": 0.8515625, "learning_rate": 0.00019475466761531505, "loss": 4.4866, "step": 3005 }, { "epoch": 0.3116862917911463, "grad_norm": 0.87890625, "learning_rate": 0.0001947511952779319, "loss": 4.4938, "step": 3006 }, { "epoch": 0.3117899798456344, "grad_norm": 0.81640625, "learning_rate": 0.0001947477218225848, "loss": 4.4514, "step": 3007 }, { "epoch": 0.31189366790012246, "grad_norm": 1.1015625, "learning_rate": 0.00019474424724931475, "loss": 4.4562, "step": 3008 }, { "epoch": 0.31199735595461053, "grad_norm": 0.9609375, "learning_rate": 0.00019474077155816276, "loss": 4.4845, "step": 3009 }, { "epoch": 0.3121010440090986, "grad_norm": 0.80859375, "learning_rate": 0.00019473729474916976, "loss": 4.4958, "step": 3010 }, { "epoch": 0.3122047320635867, "grad_norm": 0.80859375, "learning_rate": 0.00019473381682237685, "loss": 4.5092, "step": 3011 }, { "epoch": 0.31230842011807475, "grad_norm": 0.7421875, "learning_rate": 0.00019473033777782503, "loss": 4.4751, "step": 3012 }, { "epoch": 0.3124121081725628, "grad_norm": 0.88671875, "learning_rate": 0.00019472685761555536, "loss": 4.4743, "step": 3013 }, { "epoch": 0.3125157962270509, "grad_norm": 0.9375, "learning_rate": 0.0001947233763356089, "loss": 4.491, "step": 3014 }, { "epoch": 0.312619484281539, "grad_norm": 0.82421875, "learning_rate": 0.00019471989393802673, "loss": 4.4783, "step": 3015 }, { "epoch": 0.3127231723360271, "grad_norm": 0.6484375, "learning_rate": 0.00019471641042284992, "loss": 4.4875, "step": 3016 }, { "epoch": 0.31282686039051516, "grad_norm": 0.765625, "learning_rate": 0.0001947129257901196, "loss": 4.4885, "step": 3017 }, { "epoch": 0.31293054844500323, "grad_norm": 0.74609375, "learning_rate": 0.00019470944003987687, "loss": 4.4643, "step": 3018 }, { "epoch": 0.3130342364994913, "grad_norm": 0.70703125, "learning_rate": 0.00019470595317216288, "loss": 4.4748, "step": 3019 }, { "epoch": 0.3131379245539794, "grad_norm": 0.78125, "learning_rate": 0.0001947024651870187, "loss": 4.4483, "step": 3020 }, { "epoch": 0.31324161260846745, "grad_norm": 0.671875, "learning_rate": 0.0001946989760844856, "loss": 4.4574, "step": 3021 }, { "epoch": 0.3133453006629555, "grad_norm": 0.59375, "learning_rate": 0.00019469548586460464, "loss": 4.4674, "step": 3022 }, { "epoch": 0.3134489887174436, "grad_norm": 0.7578125, "learning_rate": 0.00019469199452741705, "loss": 4.5057, "step": 3023 }, { "epoch": 0.31355267677193166, "grad_norm": 0.71875, "learning_rate": 0.00019468850207296403, "loss": 4.4566, "step": 3024 }, { "epoch": 0.31365636482641973, "grad_norm": 0.7734375, "learning_rate": 0.0001946850085012868, "loss": 4.4284, "step": 3025 }, { "epoch": 0.3137600528809078, "grad_norm": 0.8203125, "learning_rate": 0.00019468151381242649, "loss": 4.4086, "step": 3026 }, { "epoch": 0.3138637409353959, "grad_norm": 0.70703125, "learning_rate": 0.00019467801800642444, "loss": 4.4613, "step": 3027 }, { "epoch": 0.31396742898988395, "grad_norm": 0.8203125, "learning_rate": 0.00019467452108332185, "loss": 4.438, "step": 3028 }, { "epoch": 0.314071117044372, "grad_norm": 0.75, "learning_rate": 0.00019467102304316, "loss": 4.4498, "step": 3029 }, { "epoch": 0.3141748050988601, "grad_norm": 0.7421875, "learning_rate": 0.00019466752388598013, "loss": 4.5043, "step": 3030 }, { "epoch": 0.31427849315334816, "grad_norm": 0.81640625, "learning_rate": 0.00019466402361182356, "loss": 4.4541, "step": 3031 }, { "epoch": 0.31438218120783623, "grad_norm": 0.796875, "learning_rate": 0.00019466052222073157, "loss": 4.4769, "step": 3032 }, { "epoch": 0.3144858692623243, "grad_norm": 0.7734375, "learning_rate": 0.00019465701971274548, "loss": 4.489, "step": 3033 }, { "epoch": 0.3145895573168124, "grad_norm": 1.0390625, "learning_rate": 0.0001946535160879066, "loss": 4.4899, "step": 3034 }, { "epoch": 0.31469324537130045, "grad_norm": 0.8125, "learning_rate": 0.0001946500113462563, "loss": 4.4684, "step": 3035 }, { "epoch": 0.3147969334257885, "grad_norm": 0.84375, "learning_rate": 0.00019464650548783592, "loss": 4.4623, "step": 3036 }, { "epoch": 0.3149006214802766, "grad_norm": 0.8203125, "learning_rate": 0.0001946429985126868, "loss": 4.4772, "step": 3037 }, { "epoch": 0.31500430953476466, "grad_norm": 0.81640625, "learning_rate": 0.00019463949042085036, "loss": 4.4399, "step": 3038 }, { "epoch": 0.31510799758925273, "grad_norm": 0.90625, "learning_rate": 0.00019463598121236797, "loss": 4.3842, "step": 3039 }, { "epoch": 0.3152116856437408, "grad_norm": 0.76953125, "learning_rate": 0.00019463247088728102, "loss": 4.462, "step": 3040 }, { "epoch": 0.3153153736982289, "grad_norm": 0.8125, "learning_rate": 0.00019462895944563098, "loss": 4.4954, "step": 3041 }, { "epoch": 0.31541906175271694, "grad_norm": 0.734375, "learning_rate": 0.0001946254468874592, "loss": 4.4741, "step": 3042 }, { "epoch": 0.315522749807205, "grad_norm": 0.78515625, "learning_rate": 0.0001946219332128072, "loss": 4.4557, "step": 3043 }, { "epoch": 0.3156264378616931, "grad_norm": 0.78515625, "learning_rate": 0.0001946184184217164, "loss": 4.4779, "step": 3044 }, { "epoch": 0.31573012591618116, "grad_norm": 0.77734375, "learning_rate": 0.00019461490251422827, "loss": 4.4799, "step": 3045 }, { "epoch": 0.31583381397066923, "grad_norm": 0.6875, "learning_rate": 0.0001946113854903843, "loss": 4.476, "step": 3046 }, { "epoch": 0.3159375020251573, "grad_norm": 0.7265625, "learning_rate": 0.000194607867350226, "loss": 4.4456, "step": 3047 }, { "epoch": 0.3160411900796454, "grad_norm": 0.80859375, "learning_rate": 0.00019460434809379486, "loss": 4.4778, "step": 3048 }, { "epoch": 0.31614487813413344, "grad_norm": 0.8046875, "learning_rate": 0.00019460082772113245, "loss": 4.4231, "step": 3049 }, { "epoch": 0.3162485661886215, "grad_norm": 0.78515625, "learning_rate": 0.00019459730623228022, "loss": 4.4713, "step": 3050 }, { "epoch": 0.3163522542431096, "grad_norm": 0.90234375, "learning_rate": 0.0001945937836272798, "loss": 4.4625, "step": 3051 }, { "epoch": 0.31645594229759766, "grad_norm": 0.91796875, "learning_rate": 0.00019459025990617272, "loss": 4.4777, "step": 3052 }, { "epoch": 0.31655963035208573, "grad_norm": 0.875, "learning_rate": 0.00019458673506900052, "loss": 4.4678, "step": 3053 }, { "epoch": 0.3166633184065738, "grad_norm": 0.953125, "learning_rate": 0.0001945832091158049, "loss": 4.4341, "step": 3054 }, { "epoch": 0.3167670064610619, "grad_norm": 0.98046875, "learning_rate": 0.00019457968204662733, "loss": 4.4406, "step": 3055 }, { "epoch": 0.31687069451554994, "grad_norm": 0.890625, "learning_rate": 0.00019457615386150954, "loss": 4.4484, "step": 3056 }, { "epoch": 0.316974382570038, "grad_norm": 0.87109375, "learning_rate": 0.00019457262456049307, "loss": 4.4671, "step": 3057 }, { "epoch": 0.3170780706245261, "grad_norm": 1.046875, "learning_rate": 0.00019456909414361962, "loss": 4.4402, "step": 3058 }, { "epoch": 0.31718175867901416, "grad_norm": 0.63671875, "learning_rate": 0.0001945655626109308, "loss": 4.4295, "step": 3059 }, { "epoch": 0.3172854467335023, "grad_norm": 0.8125, "learning_rate": 0.0001945620299624683, "loss": 4.414, "step": 3060 }, { "epoch": 0.31738913478799036, "grad_norm": 0.87890625, "learning_rate": 0.00019455849619827382, "loss": 4.4506, "step": 3061 }, { "epoch": 0.31749282284247843, "grad_norm": 0.76171875, "learning_rate": 0.000194554961318389, "loss": 4.4642, "step": 3062 }, { "epoch": 0.3175965108969665, "grad_norm": 0.87890625, "learning_rate": 0.00019455142532285563, "loss": 4.4586, "step": 3063 }, { "epoch": 0.31770019895145457, "grad_norm": 0.76953125, "learning_rate": 0.00019454788821171538, "loss": 4.4649, "step": 3064 }, { "epoch": 0.31780388700594264, "grad_norm": 0.77734375, "learning_rate": 0.00019454434998501, "loss": 4.4466, "step": 3065 }, { "epoch": 0.3179075750604307, "grad_norm": 0.88671875, "learning_rate": 0.0001945408106427812, "loss": 4.4542, "step": 3066 }, { "epoch": 0.3180112631149188, "grad_norm": 0.75390625, "learning_rate": 0.00019453727018507077, "loss": 4.5246, "step": 3067 }, { "epoch": 0.31811495116940686, "grad_norm": 0.7421875, "learning_rate": 0.0001945337286119205, "loss": 4.5043, "step": 3068 }, { "epoch": 0.31821863922389493, "grad_norm": 0.73828125, "learning_rate": 0.00019453018592337213, "loss": 4.4265, "step": 3069 }, { "epoch": 0.318322327278383, "grad_norm": 0.828125, "learning_rate": 0.00019452664211946753, "loss": 4.5128, "step": 3070 }, { "epoch": 0.31842601533287107, "grad_norm": 0.83203125, "learning_rate": 0.00019452309720024844, "loss": 4.4348, "step": 3071 }, { "epoch": 0.31852970338735914, "grad_norm": 0.796875, "learning_rate": 0.00019451955116575674, "loss": 4.4831, "step": 3072 }, { "epoch": 0.3186333914418472, "grad_norm": 0.66015625, "learning_rate": 0.00019451600401603422, "loss": 4.4866, "step": 3073 }, { "epoch": 0.3187370794963353, "grad_norm": 0.7421875, "learning_rate": 0.00019451245575112278, "loss": 4.3868, "step": 3074 }, { "epoch": 0.31884076755082336, "grad_norm": 0.70703125, "learning_rate": 0.00019450890637106428, "loss": 4.4373, "step": 3075 }, { "epoch": 0.3189444556053114, "grad_norm": 0.76171875, "learning_rate": 0.00019450535587590056, "loss": 4.4538, "step": 3076 }, { "epoch": 0.3190481436597995, "grad_norm": 0.6953125, "learning_rate": 0.00019450180426567354, "loss": 4.4422, "step": 3077 }, { "epoch": 0.31915183171428757, "grad_norm": 0.7578125, "learning_rate": 0.00019449825154042513, "loss": 4.4561, "step": 3078 }, { "epoch": 0.31925551976877564, "grad_norm": 0.7109375, "learning_rate": 0.00019449469770019723, "loss": 4.5059, "step": 3079 }, { "epoch": 0.3193592078232637, "grad_norm": 0.6640625, "learning_rate": 0.00019449114274503178, "loss": 4.4227, "step": 3080 }, { "epoch": 0.3194628958777518, "grad_norm": 0.6796875, "learning_rate": 0.00019448758667497075, "loss": 4.4908, "step": 3081 }, { "epoch": 0.31956658393223986, "grad_norm": 0.82421875, "learning_rate": 0.00019448402949005607, "loss": 4.4663, "step": 3082 }, { "epoch": 0.3196702719867279, "grad_norm": 0.67578125, "learning_rate": 0.0001944804711903297, "loss": 4.4608, "step": 3083 }, { "epoch": 0.319773960041216, "grad_norm": 0.6796875, "learning_rate": 0.00019447691177583364, "loss": 4.4182, "step": 3084 }, { "epoch": 0.31987764809570407, "grad_norm": 0.6875, "learning_rate": 0.00019447335124660992, "loss": 4.4701, "step": 3085 }, { "epoch": 0.31998133615019214, "grad_norm": 0.703125, "learning_rate": 0.00019446978960270048, "loss": 4.4572, "step": 3086 }, { "epoch": 0.3200850242046802, "grad_norm": 0.72265625, "learning_rate": 0.00019446622684414738, "loss": 4.4828, "step": 3087 }, { "epoch": 0.3201887122591683, "grad_norm": 0.88671875, "learning_rate": 0.0001944626629709927, "loss": 4.4391, "step": 3088 }, { "epoch": 0.32029240031365636, "grad_norm": 0.84765625, "learning_rate": 0.0001944590979832784, "loss": 4.4336, "step": 3089 }, { "epoch": 0.3203960883681444, "grad_norm": 0.68359375, "learning_rate": 0.00019445553188104665, "loss": 4.4397, "step": 3090 }, { "epoch": 0.3204997764226325, "grad_norm": 0.80859375, "learning_rate": 0.0001944519646643394, "loss": 4.4129, "step": 3091 }, { "epoch": 0.32060346447712057, "grad_norm": 0.83203125, "learning_rate": 0.00019444839633319885, "loss": 4.4109, "step": 3092 }, { "epoch": 0.32070715253160864, "grad_norm": 0.90625, "learning_rate": 0.00019444482688766703, "loss": 4.4595, "step": 3093 }, { "epoch": 0.3208108405860967, "grad_norm": 0.87890625, "learning_rate": 0.00019444125632778612, "loss": 4.4086, "step": 3094 }, { "epoch": 0.3209145286405848, "grad_norm": 0.77734375, "learning_rate": 0.0001944376846535982, "loss": 4.4644, "step": 3095 }, { "epoch": 0.32101821669507286, "grad_norm": 0.80859375, "learning_rate": 0.00019443411186514543, "loss": 4.4432, "step": 3096 }, { "epoch": 0.3211219047495609, "grad_norm": 1.0234375, "learning_rate": 0.00019443053796246992, "loss": 4.4119, "step": 3097 }, { "epoch": 0.321225592804049, "grad_norm": 1.0625, "learning_rate": 0.00019442696294561394, "loss": 4.4644, "step": 3098 }, { "epoch": 0.32132928085853707, "grad_norm": 1.046875, "learning_rate": 0.00019442338681461958, "loss": 4.4355, "step": 3099 }, { "epoch": 0.32143296891302514, "grad_norm": 0.85546875, "learning_rate": 0.00019441980956952905, "loss": 4.4222, "step": 3100 }, { "epoch": 0.3215366569675132, "grad_norm": 0.81640625, "learning_rate": 0.00019441623121038462, "loss": 4.4843, "step": 3101 }, { "epoch": 0.3216403450220013, "grad_norm": 1.1796875, "learning_rate": 0.00019441265173722843, "loss": 4.4442, "step": 3102 }, { "epoch": 0.32174403307648936, "grad_norm": 0.89453125, "learning_rate": 0.00019440907115010275, "loss": 4.503, "step": 3103 }, { "epoch": 0.3218477211309774, "grad_norm": 1.0625, "learning_rate": 0.00019440548944904985, "loss": 4.475, "step": 3104 }, { "epoch": 0.32195140918546555, "grad_norm": 1.0859375, "learning_rate": 0.00019440190663411194, "loss": 4.4357, "step": 3105 }, { "epoch": 0.3220550972399536, "grad_norm": 0.953125, "learning_rate": 0.00019439832270533132, "loss": 4.4239, "step": 3106 }, { "epoch": 0.3221587852944417, "grad_norm": 1.1015625, "learning_rate": 0.00019439473766275027, "loss": 4.4629, "step": 3107 }, { "epoch": 0.32226247334892977, "grad_norm": 0.921875, "learning_rate": 0.0001943911515064111, "loss": 4.4815, "step": 3108 }, { "epoch": 0.32236616140341784, "grad_norm": 1.2421875, "learning_rate": 0.00019438756423635615, "loss": 4.4407, "step": 3109 }, { "epoch": 0.3224698494579059, "grad_norm": 0.84765625, "learning_rate": 0.00019438397585262767, "loss": 4.467, "step": 3110 }, { "epoch": 0.322573537512394, "grad_norm": 1.0625, "learning_rate": 0.00019438038635526806, "loss": 4.4749, "step": 3111 }, { "epoch": 0.32267722556688205, "grad_norm": 0.8671875, "learning_rate": 0.00019437679574431965, "loss": 4.4781, "step": 3112 }, { "epoch": 0.3227809136213701, "grad_norm": 0.94921875, "learning_rate": 0.00019437320401982481, "loss": 4.4409, "step": 3113 }, { "epoch": 0.3228846016758582, "grad_norm": 1.15625, "learning_rate": 0.00019436961118182592, "loss": 4.4956, "step": 3114 }, { "epoch": 0.32298828973034627, "grad_norm": 0.7265625, "learning_rate": 0.0001943660172303654, "loss": 4.4362, "step": 3115 }, { "epoch": 0.32309197778483434, "grad_norm": 1.1484375, "learning_rate": 0.0001943624221654856, "loss": 4.4844, "step": 3116 }, { "epoch": 0.3231956658393224, "grad_norm": 0.92578125, "learning_rate": 0.00019435882598722897, "loss": 4.4239, "step": 3117 }, { "epoch": 0.3232993538938105, "grad_norm": 0.87109375, "learning_rate": 0.00019435522869563793, "loss": 4.4157, "step": 3118 }, { "epoch": 0.32340304194829855, "grad_norm": 0.9765625, "learning_rate": 0.00019435163029075491, "loss": 4.4409, "step": 3119 }, { "epoch": 0.3235067300027866, "grad_norm": 0.83984375, "learning_rate": 0.00019434803077262244, "loss": 4.4128, "step": 3120 }, { "epoch": 0.3236104180572747, "grad_norm": 0.83984375, "learning_rate": 0.00019434443014128288, "loss": 4.4665, "step": 3121 }, { "epoch": 0.32371410611176277, "grad_norm": 0.98828125, "learning_rate": 0.00019434082839677879, "loss": 4.4656, "step": 3122 }, { "epoch": 0.32381779416625084, "grad_norm": 1.234375, "learning_rate": 0.00019433722553915267, "loss": 4.4474, "step": 3123 }, { "epoch": 0.3239214822207389, "grad_norm": 0.8125, "learning_rate": 0.00019433362156844698, "loss": 4.4509, "step": 3124 }, { "epoch": 0.324025170275227, "grad_norm": 1.1015625, "learning_rate": 0.00019433001648470427, "loss": 4.415, "step": 3125 }, { "epoch": 0.32412885832971505, "grad_norm": 1.1796875, "learning_rate": 0.0001943264102879671, "loss": 4.4765, "step": 3126 }, { "epoch": 0.3242325463842031, "grad_norm": 0.86328125, "learning_rate": 0.00019432280297827797, "loss": 4.4392, "step": 3127 }, { "epoch": 0.3243362344386912, "grad_norm": 1.1015625, "learning_rate": 0.0001943191945556795, "loss": 4.4432, "step": 3128 }, { "epoch": 0.32443992249317927, "grad_norm": 0.7734375, "learning_rate": 0.0001943155850202142, "loss": 4.4573, "step": 3129 }, { "epoch": 0.32454361054766734, "grad_norm": 0.9765625, "learning_rate": 0.00019431197437192471, "loss": 4.444, "step": 3130 }, { "epoch": 0.3246472986021554, "grad_norm": 0.99609375, "learning_rate": 0.00019430836261085364, "loss": 4.4296, "step": 3131 }, { "epoch": 0.3247509866566435, "grad_norm": 0.83203125, "learning_rate": 0.00019430474973704354, "loss": 4.458, "step": 3132 }, { "epoch": 0.32485467471113155, "grad_norm": 0.6796875, "learning_rate": 0.00019430113575053708, "loss": 4.4591, "step": 3133 }, { "epoch": 0.3249583627656196, "grad_norm": 0.78515625, "learning_rate": 0.0001942975206513769, "loss": 4.4305, "step": 3134 }, { "epoch": 0.3250620508201077, "grad_norm": 0.7421875, "learning_rate": 0.00019429390443960568, "loss": 4.4696, "step": 3135 }, { "epoch": 0.32516573887459577, "grad_norm": 0.69921875, "learning_rate": 0.00019429028711526604, "loss": 4.4581, "step": 3136 }, { "epoch": 0.32526942692908384, "grad_norm": 0.73828125, "learning_rate": 0.00019428666867840066, "loss": 4.4581, "step": 3137 }, { "epoch": 0.3253731149835719, "grad_norm": 0.75390625, "learning_rate": 0.0001942830491290523, "loss": 4.4528, "step": 3138 }, { "epoch": 0.32547680303806, "grad_norm": 0.734375, "learning_rate": 0.0001942794284672636, "loss": 4.4092, "step": 3139 }, { "epoch": 0.32558049109254805, "grad_norm": 0.65625, "learning_rate": 0.0001942758066930773, "loss": 4.4827, "step": 3140 }, { "epoch": 0.3256841791470361, "grad_norm": 0.6875, "learning_rate": 0.00019427218380653613, "loss": 4.5186, "step": 3141 }, { "epoch": 0.3257878672015242, "grad_norm": 0.63671875, "learning_rate": 0.00019426855980768287, "loss": 4.4642, "step": 3142 }, { "epoch": 0.32589155525601227, "grad_norm": 0.6796875, "learning_rate": 0.0001942649346965602, "loss": 4.4665, "step": 3143 }, { "epoch": 0.32599524331050034, "grad_norm": 0.65234375, "learning_rate": 0.00019426130847321097, "loss": 4.4525, "step": 3144 }, { "epoch": 0.3260989313649884, "grad_norm": 0.6953125, "learning_rate": 0.00019425768113767795, "loss": 4.4475, "step": 3145 }, { "epoch": 0.3262026194194765, "grad_norm": 0.72265625, "learning_rate": 0.0001942540526900039, "loss": 4.4347, "step": 3146 }, { "epoch": 0.32630630747396455, "grad_norm": 0.6171875, "learning_rate": 0.0001942504231302317, "loss": 4.4461, "step": 3147 }, { "epoch": 0.3264099955284526, "grad_norm": 0.671875, "learning_rate": 0.0001942467924584041, "loss": 4.4618, "step": 3148 }, { "epoch": 0.32651368358294075, "grad_norm": 0.75390625, "learning_rate": 0.00019424316067456396, "loss": 4.4542, "step": 3149 }, { "epoch": 0.3266173716374288, "grad_norm": 0.6796875, "learning_rate": 0.00019423952777875418, "loss": 4.3964, "step": 3150 }, { "epoch": 0.3267210596919169, "grad_norm": 0.5703125, "learning_rate": 0.00019423589377101758, "loss": 4.4189, "step": 3151 }, { "epoch": 0.32682474774640496, "grad_norm": 0.75, "learning_rate": 0.00019423225865139703, "loss": 4.503, "step": 3152 }, { "epoch": 0.32692843580089304, "grad_norm": 0.76953125, "learning_rate": 0.00019422862241993545, "loss": 4.4763, "step": 3153 }, { "epoch": 0.3270321238553811, "grad_norm": 0.734375, "learning_rate": 0.00019422498507667572, "loss": 4.4683, "step": 3154 }, { "epoch": 0.3271358119098692, "grad_norm": 0.70703125, "learning_rate": 0.00019422134662166077, "loss": 4.4992, "step": 3155 }, { "epoch": 0.32723949996435725, "grad_norm": 0.7109375, "learning_rate": 0.00019421770705493354, "loss": 4.4608, "step": 3156 }, { "epoch": 0.3273431880188453, "grad_norm": 0.75, "learning_rate": 0.00019421406637653692, "loss": 4.482, "step": 3157 }, { "epoch": 0.3274468760733334, "grad_norm": 0.73828125, "learning_rate": 0.00019421042458651395, "loss": 4.4546, "step": 3158 }, { "epoch": 0.32755056412782146, "grad_norm": 0.74609375, "learning_rate": 0.00019420678168490755, "loss": 4.4352, "step": 3159 }, { "epoch": 0.32765425218230954, "grad_norm": 0.796875, "learning_rate": 0.00019420313767176065, "loss": 4.4403, "step": 3160 }, { "epoch": 0.3277579402367976, "grad_norm": 0.7265625, "learning_rate": 0.00019419949254711636, "loss": 4.4726, "step": 3161 }, { "epoch": 0.3278616282912857, "grad_norm": 0.71875, "learning_rate": 0.0001941958463110176, "loss": 4.4651, "step": 3162 }, { "epoch": 0.32796531634577375, "grad_norm": 0.91796875, "learning_rate": 0.00019419219896350747, "loss": 4.515, "step": 3163 }, { "epoch": 0.3280690044002618, "grad_norm": 1.0, "learning_rate": 0.0001941885505046289, "loss": 4.4578, "step": 3164 }, { "epoch": 0.3281726924547499, "grad_norm": 0.74609375, "learning_rate": 0.00019418490093442504, "loss": 4.4244, "step": 3165 }, { "epoch": 0.32827638050923796, "grad_norm": 0.81640625, "learning_rate": 0.00019418125025293887, "loss": 4.4859, "step": 3166 }, { "epoch": 0.32838006856372604, "grad_norm": 1.015625, "learning_rate": 0.00019417759846021354, "loss": 4.4438, "step": 3167 }, { "epoch": 0.3284837566182141, "grad_norm": 0.984375, "learning_rate": 0.00019417394555629208, "loss": 4.4728, "step": 3168 }, { "epoch": 0.3285874446727022, "grad_norm": 0.87890625, "learning_rate": 0.00019417029154121757, "loss": 4.4602, "step": 3169 }, { "epoch": 0.32869113272719025, "grad_norm": 0.83984375, "learning_rate": 0.00019416663641503323, "loss": 4.423, "step": 3170 }, { "epoch": 0.3287948207816783, "grad_norm": 0.87890625, "learning_rate": 0.00019416298017778207, "loss": 4.4466, "step": 3171 }, { "epoch": 0.3288985088361664, "grad_norm": 0.98828125, "learning_rate": 0.00019415932282950728, "loss": 4.4669, "step": 3172 }, { "epoch": 0.32900219689065446, "grad_norm": 1.0390625, "learning_rate": 0.00019415566437025206, "loss": 4.4427, "step": 3173 }, { "epoch": 0.32910588494514253, "grad_norm": 0.79296875, "learning_rate": 0.00019415200480005947, "loss": 4.4346, "step": 3174 }, { "epoch": 0.3292095729996306, "grad_norm": 0.99609375, "learning_rate": 0.0001941483441189728, "loss": 4.4512, "step": 3175 }, { "epoch": 0.3293132610541187, "grad_norm": 1.140625, "learning_rate": 0.00019414468232703517, "loss": 4.4743, "step": 3176 }, { "epoch": 0.32941694910860675, "grad_norm": 0.7265625, "learning_rate": 0.00019414101942428978, "loss": 4.4539, "step": 3177 }, { "epoch": 0.3295206371630948, "grad_norm": 1.0234375, "learning_rate": 0.00019413735541077987, "loss": 4.4291, "step": 3178 }, { "epoch": 0.3296243252175829, "grad_norm": 0.79296875, "learning_rate": 0.00019413369028654868, "loss": 4.4172, "step": 3179 }, { "epoch": 0.32972801327207096, "grad_norm": 0.859375, "learning_rate": 0.00019413002405163944, "loss": 4.4088, "step": 3180 }, { "epoch": 0.32983170132655903, "grad_norm": 0.97265625, "learning_rate": 0.00019412635670609544, "loss": 4.4669, "step": 3181 }, { "epoch": 0.3299353893810471, "grad_norm": 0.85546875, "learning_rate": 0.00019412268824995992, "loss": 4.4327, "step": 3182 }, { "epoch": 0.3300390774355352, "grad_norm": 0.73828125, "learning_rate": 0.00019411901868327617, "loss": 4.4497, "step": 3183 }, { "epoch": 0.33014276549002325, "grad_norm": 0.90625, "learning_rate": 0.0001941153480060875, "loss": 4.4268, "step": 3184 }, { "epoch": 0.3302464535445113, "grad_norm": 0.765625, "learning_rate": 0.0001941116762184372, "loss": 4.4297, "step": 3185 }, { "epoch": 0.3303501415989994, "grad_norm": 0.73828125, "learning_rate": 0.0001941080033203686, "loss": 4.4364, "step": 3186 }, { "epoch": 0.33045382965348746, "grad_norm": 0.87109375, "learning_rate": 0.00019410432931192504, "loss": 4.4927, "step": 3187 }, { "epoch": 0.33055751770797553, "grad_norm": 0.87109375, "learning_rate": 0.00019410065419314985, "loss": 4.4977, "step": 3188 }, { "epoch": 0.3306612057624636, "grad_norm": 0.8828125, "learning_rate": 0.00019409697796408641, "loss": 4.4403, "step": 3189 }, { "epoch": 0.3307648938169517, "grad_norm": 1.09375, "learning_rate": 0.0001940933006247781, "loss": 4.467, "step": 3190 }, { "epoch": 0.33086858187143975, "grad_norm": 0.9375, "learning_rate": 0.00019408962217526833, "loss": 4.4594, "step": 3191 }, { "epoch": 0.3309722699259278, "grad_norm": 0.94921875, "learning_rate": 0.00019408594261560044, "loss": 4.4412, "step": 3192 }, { "epoch": 0.3310759579804159, "grad_norm": 0.93359375, "learning_rate": 0.0001940822619458179, "loss": 4.4959, "step": 3193 }, { "epoch": 0.331179646034904, "grad_norm": 0.8125, "learning_rate": 0.0001940785801659641, "loss": 4.4427, "step": 3194 }, { "epoch": 0.3312833340893921, "grad_norm": 0.9921875, "learning_rate": 0.0001940748972760825, "loss": 4.4779, "step": 3195 }, { "epoch": 0.33138702214388016, "grad_norm": 1.15625, "learning_rate": 0.00019407121327621657, "loss": 4.4233, "step": 3196 }, { "epoch": 0.33149071019836823, "grad_norm": 0.640625, "learning_rate": 0.00019406752816640977, "loss": 4.443, "step": 3197 }, { "epoch": 0.3315943982528563, "grad_norm": 1.1640625, "learning_rate": 0.00019406384194670554, "loss": 4.4023, "step": 3198 }, { "epoch": 0.3316980863073444, "grad_norm": 1.0078125, "learning_rate": 0.00019406015461714745, "loss": 4.4119, "step": 3199 }, { "epoch": 0.33180177436183245, "grad_norm": 0.90625, "learning_rate": 0.00019405646617777893, "loss": 4.4525, "step": 3200 }, { "epoch": 0.3319054624163205, "grad_norm": 0.796875, "learning_rate": 0.0001940527766286435, "loss": 4.4334, "step": 3201 }, { "epoch": 0.3320091504708086, "grad_norm": 0.98828125, "learning_rate": 0.00019404908596978477, "loss": 4.4435, "step": 3202 }, { "epoch": 0.33211283852529666, "grad_norm": 0.87890625, "learning_rate": 0.00019404539420124622, "loss": 4.4833, "step": 3203 }, { "epoch": 0.33221652657978473, "grad_norm": 0.91796875, "learning_rate": 0.00019404170132307144, "loss": 4.4724, "step": 3204 }, { "epoch": 0.3323202146342728, "grad_norm": 0.6640625, "learning_rate": 0.000194038007335304, "loss": 4.4751, "step": 3205 }, { "epoch": 0.3324239026887609, "grad_norm": 0.83203125, "learning_rate": 0.00019403431223798747, "loss": 4.4767, "step": 3206 }, { "epoch": 0.33252759074324895, "grad_norm": 0.84375, "learning_rate": 0.00019403061603116543, "loss": 4.4561, "step": 3207 }, { "epoch": 0.332631278797737, "grad_norm": 0.7265625, "learning_rate": 0.00019402691871488154, "loss": 4.4188, "step": 3208 }, { "epoch": 0.3327349668522251, "grad_norm": 0.80859375, "learning_rate": 0.0001940232202891794, "loss": 4.4851, "step": 3209 }, { "epoch": 0.33283865490671316, "grad_norm": 0.7265625, "learning_rate": 0.00019401952075410263, "loss": 4.4548, "step": 3210 }, { "epoch": 0.33294234296120123, "grad_norm": 0.68359375, "learning_rate": 0.00019401582010969494, "loss": 4.405, "step": 3211 }, { "epoch": 0.3330460310156893, "grad_norm": 0.703125, "learning_rate": 0.00019401211835599989, "loss": 4.439, "step": 3212 }, { "epoch": 0.3331497190701774, "grad_norm": 0.84375, "learning_rate": 0.00019400841549306125, "loss": 4.4627, "step": 3213 }, { "epoch": 0.33325340712466545, "grad_norm": 0.8046875, "learning_rate": 0.00019400471152092267, "loss": 4.4395, "step": 3214 }, { "epoch": 0.3333570951791535, "grad_norm": 0.74609375, "learning_rate": 0.00019400100643962787, "loss": 4.4621, "step": 3215 }, { "epoch": 0.3334607832336416, "grad_norm": 0.88671875, "learning_rate": 0.00019399730024922057, "loss": 4.4411, "step": 3216 }, { "epoch": 0.33356447128812966, "grad_norm": 0.96875, "learning_rate": 0.00019399359294974445, "loss": 4.4311, "step": 3217 }, { "epoch": 0.33366815934261773, "grad_norm": 1.0546875, "learning_rate": 0.00019398988454124333, "loss": 4.4267, "step": 3218 }, { "epoch": 0.3337718473971058, "grad_norm": 0.8359375, "learning_rate": 0.00019398617502376092, "loss": 4.4588, "step": 3219 }, { "epoch": 0.3338755354515939, "grad_norm": 0.796875, "learning_rate": 0.00019398246439734096, "loss": 4.4264, "step": 3220 }, { "epoch": 0.33397922350608195, "grad_norm": 0.94921875, "learning_rate": 0.00019397875266202727, "loss": 4.4731, "step": 3221 }, { "epoch": 0.33408291156057, "grad_norm": 1.109375, "learning_rate": 0.00019397503981786365, "loss": 4.4846, "step": 3222 }, { "epoch": 0.3341865996150581, "grad_norm": 0.69921875, "learning_rate": 0.0001939713258648939, "loss": 4.3899, "step": 3223 }, { "epoch": 0.33429028766954616, "grad_norm": 0.88671875, "learning_rate": 0.00019396761080316184, "loss": 4.4881, "step": 3224 }, { "epoch": 0.33439397572403423, "grad_norm": 1.2421875, "learning_rate": 0.00019396389463271127, "loss": 4.4176, "step": 3225 }, { "epoch": 0.3344976637785223, "grad_norm": 0.73046875, "learning_rate": 0.0001939601773535861, "loss": 4.409, "step": 3226 }, { "epoch": 0.3346013518330104, "grad_norm": 0.91796875, "learning_rate": 0.00019395645896583017, "loss": 4.4441, "step": 3227 }, { "epoch": 0.33470503988749845, "grad_norm": 1.3125, "learning_rate": 0.0001939527394694873, "loss": 4.4747, "step": 3228 }, { "epoch": 0.3348087279419865, "grad_norm": 0.84375, "learning_rate": 0.00019394901886460143, "loss": 4.4516, "step": 3229 }, { "epoch": 0.3349124159964746, "grad_norm": 1.015625, "learning_rate": 0.00019394529715121645, "loss": 4.4232, "step": 3230 }, { "epoch": 0.33501610405096266, "grad_norm": 1.21875, "learning_rate": 0.00019394157432937629, "loss": 4.4171, "step": 3231 }, { "epoch": 0.33511979210545073, "grad_norm": 0.8125, "learning_rate": 0.00019393785039912483, "loss": 4.4399, "step": 3232 }, { "epoch": 0.3352234801599388, "grad_norm": 1.359375, "learning_rate": 0.00019393412536050604, "loss": 4.4708, "step": 3233 }, { "epoch": 0.3353271682144269, "grad_norm": 0.80078125, "learning_rate": 0.00019393039921356385, "loss": 4.4745, "step": 3234 }, { "epoch": 0.33543085626891495, "grad_norm": 1.3125, "learning_rate": 0.0001939266719583422, "loss": 4.4292, "step": 3235 }, { "epoch": 0.335534544323403, "grad_norm": 0.7265625, "learning_rate": 0.0001939229435948852, "loss": 4.4479, "step": 3236 }, { "epoch": 0.3356382323778911, "grad_norm": 1.421875, "learning_rate": 0.0001939192141232367, "loss": 4.4192, "step": 3237 }, { "epoch": 0.33574192043237916, "grad_norm": 0.88671875, "learning_rate": 0.00019391548354344074, "loss": 4.4338, "step": 3238 }, { "epoch": 0.3358456084868673, "grad_norm": 1.6796875, "learning_rate": 0.00019391175185554135, "loss": 4.4788, "step": 3239 }, { "epoch": 0.33594929654135536, "grad_norm": 1.0703125, "learning_rate": 0.00019390801905958257, "loss": 4.4474, "step": 3240 }, { "epoch": 0.33605298459584343, "grad_norm": 2.5, "learning_rate": 0.00019390428515560841, "loss": 4.4906, "step": 3241 }, { "epoch": 0.3361566726503315, "grad_norm": 2.203125, "learning_rate": 0.00019390055014366296, "loss": 4.4659, "step": 3242 }, { "epoch": 0.33626036070481957, "grad_norm": 1.53125, "learning_rate": 0.00019389681402379028, "loss": 4.4293, "step": 3243 }, { "epoch": 0.33636404875930764, "grad_norm": 1.640625, "learning_rate": 0.00019389307679603442, "loss": 4.4542, "step": 3244 }, { "epoch": 0.3364677368137957, "grad_norm": 1.375, "learning_rate": 0.00019388933846043954, "loss": 4.4636, "step": 3245 }, { "epoch": 0.3365714248682838, "grad_norm": 1.3671875, "learning_rate": 0.0001938855990170497, "loss": 4.4272, "step": 3246 }, { "epoch": 0.33667511292277186, "grad_norm": 1.1796875, "learning_rate": 0.00019388185846590903, "loss": 4.4256, "step": 3247 }, { "epoch": 0.33677880097725993, "grad_norm": 1.546875, "learning_rate": 0.00019387811680706167, "loss": 4.4195, "step": 3248 }, { "epoch": 0.336882489031748, "grad_norm": 1.1953125, "learning_rate": 0.00019387437404055175, "loss": 4.4388, "step": 3249 }, { "epoch": 0.33698617708623607, "grad_norm": 2.046875, "learning_rate": 0.00019387063016642345, "loss": 4.4516, "step": 3250 }, { "epoch": 0.33708986514072414, "grad_norm": 1.734375, "learning_rate": 0.00019386688518472096, "loss": 4.4562, "step": 3251 }, { "epoch": 0.3371935531952122, "grad_norm": 1.9453125, "learning_rate": 0.00019386313909548842, "loss": 4.4315, "step": 3252 }, { "epoch": 0.3372972412497003, "grad_norm": 1.625, "learning_rate": 0.00019385939189877008, "loss": 4.4577, "step": 3253 }, { "epoch": 0.33740092930418836, "grad_norm": 2.328125, "learning_rate": 0.00019385564359461013, "loss": 4.4885, "step": 3254 }, { "epoch": 0.33750461735867643, "grad_norm": 1.8828125, "learning_rate": 0.00019385189418305275, "loss": 4.4272, "step": 3255 }, { "epoch": 0.3376083054131645, "grad_norm": 2.3125, "learning_rate": 0.00019384814366414231, "loss": 4.4444, "step": 3256 }, { "epoch": 0.33771199346765257, "grad_norm": 2.28125, "learning_rate": 0.00019384439203792291, "loss": 4.4779, "step": 3257 }, { "epoch": 0.33781568152214064, "grad_norm": 1.1796875, "learning_rate": 0.00019384063930443887, "loss": 4.4524, "step": 3258 }, { "epoch": 0.3379193695766287, "grad_norm": 1.3359375, "learning_rate": 0.00019383688546373453, "loss": 4.4602, "step": 3259 }, { "epoch": 0.3380230576311168, "grad_norm": 1.2890625, "learning_rate": 0.0001938331305158541, "loss": 4.4119, "step": 3260 }, { "epoch": 0.33812674568560486, "grad_norm": 1.1796875, "learning_rate": 0.00019382937446084194, "loss": 4.4518, "step": 3261 }, { "epoch": 0.33823043374009293, "grad_norm": 1.25, "learning_rate": 0.00019382561729874232, "loss": 4.4152, "step": 3262 }, { "epoch": 0.338334121794581, "grad_norm": 1.0078125, "learning_rate": 0.00019382185902959962, "loss": 4.4262, "step": 3263 }, { "epoch": 0.33843780984906907, "grad_norm": 1.453125, "learning_rate": 0.00019381809965345813, "loss": 4.4535, "step": 3264 }, { "epoch": 0.33854149790355714, "grad_norm": 1.046875, "learning_rate": 0.00019381433917036223, "loss": 4.4597, "step": 3265 }, { "epoch": 0.3386451859580452, "grad_norm": 2.453125, "learning_rate": 0.0001938105775803563, "loss": 4.4852, "step": 3266 }, { "epoch": 0.3387488740125333, "grad_norm": 2.234375, "learning_rate": 0.00019380681488348473, "loss": 4.4376, "step": 3267 }, { "epoch": 0.33885256206702136, "grad_norm": 1.6875, "learning_rate": 0.00019380305107979191, "loss": 4.4622, "step": 3268 }, { "epoch": 0.33895625012150943, "grad_norm": 1.6484375, "learning_rate": 0.0001937992861693222, "loss": 4.4553, "step": 3269 }, { "epoch": 0.3390599381759975, "grad_norm": 1.640625, "learning_rate": 0.0001937955201521201, "loss": 4.4485, "step": 3270 }, { "epoch": 0.33916362623048557, "grad_norm": 1.4609375, "learning_rate": 0.00019379175302823, "loss": 4.4232, "step": 3271 }, { "epoch": 0.33926731428497364, "grad_norm": 1.7890625, "learning_rate": 0.00019378798479769636, "loss": 4.4852, "step": 3272 }, { "epoch": 0.3393710023394617, "grad_norm": 1.609375, "learning_rate": 0.00019378421546056363, "loss": 4.4148, "step": 3273 }, { "epoch": 0.3394746903939498, "grad_norm": 1.828125, "learning_rate": 0.0001937804450168763, "loss": 4.4718, "step": 3274 }, { "epoch": 0.33957837844843786, "grad_norm": 1.5546875, "learning_rate": 0.00019377667346667885, "loss": 4.4646, "step": 3275 }, { "epoch": 0.33968206650292593, "grad_norm": 1.9453125, "learning_rate": 0.00019377290081001576, "loss": 4.4659, "step": 3276 }, { "epoch": 0.339785754557414, "grad_norm": 1.7265625, "learning_rate": 0.0001937691270469316, "loss": 4.4167, "step": 3277 }, { "epoch": 0.33988944261190207, "grad_norm": 1.71875, "learning_rate": 0.0001937653521774708, "loss": 4.4329, "step": 3278 }, { "epoch": 0.33999313066639014, "grad_norm": 1.4921875, "learning_rate": 0.000193761576201678, "loss": 4.4596, "step": 3279 }, { "epoch": 0.3400968187208782, "grad_norm": 1.8984375, "learning_rate": 0.0001937577991195977, "loss": 4.4565, "step": 3280 }, { "epoch": 0.3402005067753663, "grad_norm": 1.703125, "learning_rate": 0.0001937540209312745, "loss": 4.4684, "step": 3281 }, { "epoch": 0.34030419482985436, "grad_norm": 2.0, "learning_rate": 0.00019375024163675292, "loss": 4.4353, "step": 3282 }, { "epoch": 0.3404078828843425, "grad_norm": 1.8828125, "learning_rate": 0.00019374646123607764, "loss": 4.444, "step": 3283 }, { "epoch": 0.34051157093883055, "grad_norm": 1.65625, "learning_rate": 0.00019374267972929317, "loss": 4.4671, "step": 3284 }, { "epoch": 0.3406152589933186, "grad_norm": 1.5234375, "learning_rate": 0.00019373889711644417, "loss": 4.4257, "step": 3285 }, { "epoch": 0.3407189470478067, "grad_norm": 1.7265625, "learning_rate": 0.0001937351133975753, "loss": 4.4502, "step": 3286 }, { "epoch": 0.34082263510229477, "grad_norm": 1.4921875, "learning_rate": 0.00019373132857273114, "loss": 4.4666, "step": 3287 }, { "epoch": 0.34092632315678284, "grad_norm": 2.046875, "learning_rate": 0.0001937275426419564, "loss": 4.4717, "step": 3288 }, { "epoch": 0.3410300112112709, "grad_norm": 1.859375, "learning_rate": 0.00019372375560529567, "loss": 4.4035, "step": 3289 }, { "epoch": 0.341133699265759, "grad_norm": 1.890625, "learning_rate": 0.00019371996746279376, "loss": 4.4463, "step": 3290 }, { "epoch": 0.34123738732024705, "grad_norm": 1.71875, "learning_rate": 0.0001937161782144953, "loss": 4.4601, "step": 3291 }, { "epoch": 0.3413410753747351, "grad_norm": 1.890625, "learning_rate": 0.00019371238786044498, "loss": 4.4237, "step": 3292 }, { "epoch": 0.3414447634292232, "grad_norm": 1.7578125, "learning_rate": 0.00019370859640068755, "loss": 4.4393, "step": 3293 }, { "epoch": 0.34154845148371127, "grad_norm": 1.65625, "learning_rate": 0.00019370480383526774, "loss": 4.461, "step": 3294 }, { "epoch": 0.34165213953819934, "grad_norm": 1.4921875, "learning_rate": 0.00019370101016423028, "loss": 4.4328, "step": 3295 }, { "epoch": 0.3417558275926874, "grad_norm": 1.8046875, "learning_rate": 0.00019369721538761996, "loss": 4.4228, "step": 3296 }, { "epoch": 0.3418595156471755, "grad_norm": 1.6015625, "learning_rate": 0.00019369341950548153, "loss": 4.4922, "step": 3297 }, { "epoch": 0.34196320370166355, "grad_norm": 1.78125, "learning_rate": 0.0001936896225178598, "loss": 4.443, "step": 3298 }, { "epoch": 0.3420668917561516, "grad_norm": 1.625, "learning_rate": 0.00019368582442479953, "loss": 4.4383, "step": 3299 }, { "epoch": 0.3421705798106397, "grad_norm": 1.90625, "learning_rate": 0.0001936820252263456, "loss": 4.4737, "step": 3300 }, { "epoch": 0.34227426786512777, "grad_norm": 1.828125, "learning_rate": 0.0001936782249225428, "loss": 4.4784, "step": 3301 }, { "epoch": 0.34237795591961584, "grad_norm": 1.71875, "learning_rate": 0.00019367442351343593, "loss": 4.4926, "step": 3302 }, { "epoch": 0.3424816439741039, "grad_norm": 1.5625, "learning_rate": 0.0001936706209990699, "loss": 4.4373, "step": 3303 }, { "epoch": 0.342585332028592, "grad_norm": 1.6328125, "learning_rate": 0.00019366681737948956, "loss": 4.4738, "step": 3304 }, { "epoch": 0.34268902008308005, "grad_norm": 1.453125, "learning_rate": 0.00019366301265473978, "loss": 4.4515, "step": 3305 }, { "epoch": 0.3427927081375681, "grad_norm": 1.84375, "learning_rate": 0.00019365920682486547, "loss": 4.4219, "step": 3306 }, { "epoch": 0.3428963961920562, "grad_norm": 1.6015625, "learning_rate": 0.0001936553998899115, "loss": 4.4172, "step": 3307 }, { "epoch": 0.34300008424654427, "grad_norm": 2.09375, "learning_rate": 0.00019365159184992284, "loss": 4.4943, "step": 3308 }, { "epoch": 0.34310377230103234, "grad_norm": 1.890625, "learning_rate": 0.00019364778270494437, "loss": 4.4547, "step": 3309 }, { "epoch": 0.3432074603555204, "grad_norm": 1.7109375, "learning_rate": 0.00019364397245502107, "loss": 4.4844, "step": 3310 }, { "epoch": 0.3433111484100085, "grad_norm": 1.515625, "learning_rate": 0.00019364016110019785, "loss": 4.4454, "step": 3311 }, { "epoch": 0.34341483646449655, "grad_norm": 1.796875, "learning_rate": 0.0001936363486405197, "loss": 4.4257, "step": 3312 }, { "epoch": 0.3435185245189846, "grad_norm": 1.53125, "learning_rate": 0.00019363253507603164, "loss": 4.4496, "step": 3313 }, { "epoch": 0.3436222125734727, "grad_norm": 2.125, "learning_rate": 0.00019362872040677866, "loss": 4.4632, "step": 3314 }, { "epoch": 0.34372590062796077, "grad_norm": 1.9921875, "learning_rate": 0.00019362490463280572, "loss": 4.4439, "step": 3315 }, { "epoch": 0.34382958868244884, "grad_norm": 1.5078125, "learning_rate": 0.0001936210877541579, "loss": 4.5136, "step": 3316 }, { "epoch": 0.3439332767369369, "grad_norm": 1.4921875, "learning_rate": 0.00019361726977088018, "loss": 4.435, "step": 3317 }, { "epoch": 0.344036964791425, "grad_norm": 1.5625, "learning_rate": 0.00019361345068301763, "loss": 4.4213, "step": 3318 }, { "epoch": 0.34414065284591305, "grad_norm": 1.3359375, "learning_rate": 0.00019360963049061533, "loss": 4.4375, "step": 3319 }, { "epoch": 0.3442443409004011, "grad_norm": 1.796875, "learning_rate": 0.00019360580919371834, "loss": 4.449, "step": 3320 }, { "epoch": 0.3443480289548892, "grad_norm": 1.6015625, "learning_rate": 0.00019360198679237172, "loss": 4.4631, "step": 3321 }, { "epoch": 0.34445171700937727, "grad_norm": 1.625, "learning_rate": 0.00019359816328662065, "loss": 4.4259, "step": 3322 }, { "epoch": 0.34455540506386534, "grad_norm": 1.5234375, "learning_rate": 0.00019359433867651018, "loss": 4.4103, "step": 3323 }, { "epoch": 0.3446590931183534, "grad_norm": 1.640625, "learning_rate": 0.0001935905129620854, "loss": 4.4288, "step": 3324 }, { "epoch": 0.3447627811728415, "grad_norm": 1.4765625, "learning_rate": 0.00019358668614339152, "loss": 4.47, "step": 3325 }, { "epoch": 0.34486646922732955, "grad_norm": 1.8125, "learning_rate": 0.0001935828582204737, "loss": 4.4248, "step": 3326 }, { "epoch": 0.3449701572818176, "grad_norm": 1.6640625, "learning_rate": 0.00019357902919337706, "loss": 4.3995, "step": 3327 }, { "epoch": 0.34507384533630575, "grad_norm": 1.5234375, "learning_rate": 0.00019357519906214676, "loss": 4.4162, "step": 3328 }, { "epoch": 0.3451775333907938, "grad_norm": 1.375, "learning_rate": 0.00019357136782682804, "loss": 4.4012, "step": 3329 }, { "epoch": 0.3452812214452819, "grad_norm": 1.6953125, "learning_rate": 0.00019356753548746612, "loss": 4.4357, "step": 3330 }, { "epoch": 0.34538490949976997, "grad_norm": 1.546875, "learning_rate": 0.00019356370204410615, "loss": 4.441, "step": 3331 }, { "epoch": 0.34548859755425804, "grad_norm": 1.6953125, "learning_rate": 0.00019355986749679342, "loss": 4.4196, "step": 3332 }, { "epoch": 0.3455922856087461, "grad_norm": 1.5078125, "learning_rate": 0.00019355603184557314, "loss": 4.4693, "step": 3333 }, { "epoch": 0.3456959736632342, "grad_norm": 1.671875, "learning_rate": 0.00019355219509049058, "loss": 4.481, "step": 3334 }, { "epoch": 0.34579966171772225, "grad_norm": 1.546875, "learning_rate": 0.000193548357231591, "loss": 4.4777, "step": 3335 }, { "epoch": 0.3459033497722103, "grad_norm": 1.7890625, "learning_rate": 0.00019354451826891967, "loss": 4.4111, "step": 3336 }, { "epoch": 0.3460070378266984, "grad_norm": 1.6171875, "learning_rate": 0.00019354067820252194, "loss": 4.4399, "step": 3337 }, { "epoch": 0.34611072588118647, "grad_norm": 1.671875, "learning_rate": 0.00019353683703244307, "loss": 4.4452, "step": 3338 }, { "epoch": 0.34621441393567454, "grad_norm": 1.609375, "learning_rate": 0.0001935329947587284, "loss": 4.4372, "step": 3339 }, { "epoch": 0.3463181019901626, "grad_norm": 1.6953125, "learning_rate": 0.00019352915138142325, "loss": 4.4171, "step": 3340 }, { "epoch": 0.3464217900446507, "grad_norm": 1.5, "learning_rate": 0.000193525306900573, "loss": 4.4561, "step": 3341 }, { "epoch": 0.34652547809913875, "grad_norm": 1.5546875, "learning_rate": 0.00019352146131622298, "loss": 4.4123, "step": 3342 }, { "epoch": 0.3466291661536268, "grad_norm": 1.4375, "learning_rate": 0.00019351761462841857, "loss": 4.4771, "step": 3343 }, { "epoch": 0.3467328542081149, "grad_norm": 1.609375, "learning_rate": 0.00019351376683720515, "loss": 4.4315, "step": 3344 }, { "epoch": 0.34683654226260296, "grad_norm": 1.4609375, "learning_rate": 0.00019350991794262813, "loss": 4.4284, "step": 3345 }, { "epoch": 0.34694023031709104, "grad_norm": 1.703125, "learning_rate": 0.00019350606794473293, "loss": 4.4584, "step": 3346 }, { "epoch": 0.3470439183715791, "grad_norm": 1.5625, "learning_rate": 0.000193502216843565, "loss": 4.3793, "step": 3347 }, { "epoch": 0.3471476064260672, "grad_norm": 1.53125, "learning_rate": 0.0001934983646391697, "loss": 4.4539, "step": 3348 }, { "epoch": 0.34725129448055525, "grad_norm": 1.4140625, "learning_rate": 0.00019349451133159255, "loss": 4.458, "step": 3349 }, { "epoch": 0.3473549825350433, "grad_norm": 1.4375, "learning_rate": 0.000193490656920879, "loss": 4.4281, "step": 3350 }, { "epoch": 0.3474586705895314, "grad_norm": 1.328125, "learning_rate": 0.0001934868014070745, "loss": 4.4403, "step": 3351 }, { "epoch": 0.34756235864401946, "grad_norm": 1.5, "learning_rate": 0.00019348294479022457, "loss": 4.4345, "step": 3352 }, { "epoch": 0.34766604669850754, "grad_norm": 1.3828125, "learning_rate": 0.0001934790870703747, "loss": 4.4486, "step": 3353 }, { "epoch": 0.3477697347529956, "grad_norm": 1.59375, "learning_rate": 0.00019347522824757042, "loss": 4.4779, "step": 3354 }, { "epoch": 0.3478734228074837, "grad_norm": 1.4375, "learning_rate": 0.00019347136832185727, "loss": 4.4556, "step": 3355 }, { "epoch": 0.34797711086197175, "grad_norm": 1.5234375, "learning_rate": 0.00019346750729328077, "loss": 4.4484, "step": 3356 }, { "epoch": 0.3480807989164598, "grad_norm": 1.3125, "learning_rate": 0.00019346364516188648, "loss": 4.4024, "step": 3357 }, { "epoch": 0.3481844869709479, "grad_norm": 1.859375, "learning_rate": 0.00019345978192772, "loss": 4.4108, "step": 3358 }, { "epoch": 0.34828817502543596, "grad_norm": 1.7109375, "learning_rate": 0.00019345591759082684, "loss": 4.4214, "step": 3359 }, { "epoch": 0.34839186307992404, "grad_norm": 1.7109375, "learning_rate": 0.00019345205215125265, "loss": 4.4519, "step": 3360 }, { "epoch": 0.3484955511344121, "grad_norm": 1.5546875, "learning_rate": 0.00019344818560904306, "loss": 4.4294, "step": 3361 }, { "epoch": 0.3485992391889002, "grad_norm": 1.625, "learning_rate": 0.00019344431796424364, "loss": 4.4023, "step": 3362 }, { "epoch": 0.34870292724338825, "grad_norm": 1.4453125, "learning_rate": 0.0001934404492169, "loss": 4.4062, "step": 3363 }, { "epoch": 0.3488066152978763, "grad_norm": 1.7734375, "learning_rate": 0.0001934365793670579, "loss": 4.4425, "step": 3364 }, { "epoch": 0.3489103033523644, "grad_norm": 1.6484375, "learning_rate": 0.0001934327084147629, "loss": 4.4259, "step": 3365 }, { "epoch": 0.34901399140685246, "grad_norm": 1.453125, "learning_rate": 0.0001934288363600607, "loss": 4.4137, "step": 3366 }, { "epoch": 0.34911767946134054, "grad_norm": 1.40625, "learning_rate": 0.000193424963202997, "loss": 4.4226, "step": 3367 }, { "epoch": 0.3492213675158286, "grad_norm": 1.3984375, "learning_rate": 0.0001934210889436175, "loss": 4.4478, "step": 3368 }, { "epoch": 0.3493250555703167, "grad_norm": 1.2265625, "learning_rate": 0.00019341721358196785, "loss": 4.4312, "step": 3369 }, { "epoch": 0.34942874362480475, "grad_norm": 1.6875, "learning_rate": 0.00019341333711809386, "loss": 4.4476, "step": 3370 }, { "epoch": 0.3495324316792928, "grad_norm": 1.3828125, "learning_rate": 0.00019340945955204121, "loss": 4.4335, "step": 3371 }, { "epoch": 0.3496361197337809, "grad_norm": 2.0, "learning_rate": 0.0001934055808838557, "loss": 4.4462, "step": 3372 }, { "epoch": 0.349739807788269, "grad_norm": 1.9375, "learning_rate": 0.0001934017011135831, "loss": 4.4577, "step": 3373 }, { "epoch": 0.3498434958427571, "grad_norm": 1.28125, "learning_rate": 0.00019339782024126908, "loss": 4.3599, "step": 3374 }, { "epoch": 0.34994718389724516, "grad_norm": 1.359375, "learning_rate": 0.00019339393826695958, "loss": 4.4257, "step": 3375 }, { "epoch": 0.35005087195173323, "grad_norm": 1.4296875, "learning_rate": 0.00019339005519070028, "loss": 4.443, "step": 3376 }, { "epoch": 0.3501545600062213, "grad_norm": 1.078125, "learning_rate": 0.0001933861710125371, "loss": 4.4858, "step": 3377 }, { "epoch": 0.3502582480607094, "grad_norm": 1.9921875, "learning_rate": 0.00019338228573251575, "loss": 4.4624, "step": 3378 }, { "epoch": 0.35036193611519745, "grad_norm": 1.8671875, "learning_rate": 0.00019337839935068218, "loss": 4.4479, "step": 3379 }, { "epoch": 0.3504656241696855, "grad_norm": 1.453125, "learning_rate": 0.00019337451186708218, "loss": 4.4735, "step": 3380 }, { "epoch": 0.3505693122241736, "grad_norm": 1.359375, "learning_rate": 0.00019337062328176165, "loss": 4.4579, "step": 3381 }, { "epoch": 0.35067300027866166, "grad_norm": 1.609375, "learning_rate": 0.00019336673359476647, "loss": 4.4533, "step": 3382 }, { "epoch": 0.35077668833314973, "grad_norm": 1.3671875, "learning_rate": 0.0001933628428061425, "loss": 4.4427, "step": 3383 }, { "epoch": 0.3508803763876378, "grad_norm": 1.96875, "learning_rate": 0.00019335895091593573, "loss": 4.4462, "step": 3384 }, { "epoch": 0.3509840644421259, "grad_norm": 1.7734375, "learning_rate": 0.00019335505792419198, "loss": 4.4379, "step": 3385 }, { "epoch": 0.35108775249661395, "grad_norm": 1.515625, "learning_rate": 0.00019335116383095724, "loss": 4.4643, "step": 3386 }, { "epoch": 0.351191440551102, "grad_norm": 1.484375, "learning_rate": 0.00019334726863627744, "loss": 4.4121, "step": 3387 }, { "epoch": 0.3512951286055901, "grad_norm": 1.5234375, "learning_rate": 0.00019334337234019856, "loss": 4.4721, "step": 3388 }, { "epoch": 0.35139881666007816, "grad_norm": 1.3046875, "learning_rate": 0.0001933394749427665, "loss": 4.4425, "step": 3389 }, { "epoch": 0.35150250471456623, "grad_norm": 1.796875, "learning_rate": 0.00019333557644402735, "loss": 4.4795, "step": 3390 }, { "epoch": 0.3516061927690543, "grad_norm": 1.59375, "learning_rate": 0.00019333167684402704, "loss": 4.4531, "step": 3391 }, { "epoch": 0.3517098808235424, "grad_norm": 1.9140625, "learning_rate": 0.00019332777614281162, "loss": 4.4808, "step": 3392 }, { "epoch": 0.35181356887803045, "grad_norm": 1.828125, "learning_rate": 0.00019332387434042706, "loss": 4.4205, "step": 3393 }, { "epoch": 0.3519172569325185, "grad_norm": 1.640625, "learning_rate": 0.00019331997143691947, "loss": 4.429, "step": 3394 }, { "epoch": 0.3520209449870066, "grad_norm": 1.46875, "learning_rate": 0.00019331606743233483, "loss": 4.4505, "step": 3395 }, { "epoch": 0.35212463304149466, "grad_norm": 1.546875, "learning_rate": 0.00019331216232671924, "loss": 4.4677, "step": 3396 }, { "epoch": 0.35222832109598273, "grad_norm": 1.4296875, "learning_rate": 0.00019330825612011877, "loss": 4.428, "step": 3397 }, { "epoch": 0.3523320091504708, "grad_norm": 1.59375, "learning_rate": 0.0001933043488125795, "loss": 4.4564, "step": 3398 }, { "epoch": 0.3524356972049589, "grad_norm": 1.3125, "learning_rate": 0.00019330044040414754, "loss": 4.4115, "step": 3399 }, { "epoch": 0.35253938525944695, "grad_norm": 1.4921875, "learning_rate": 0.00019329653089486903, "loss": 4.4736, "step": 3400 }, { "epoch": 0.352643073313935, "grad_norm": 1.2265625, "learning_rate": 0.00019329262028479005, "loss": 4.4048, "step": 3401 }, { "epoch": 0.3527467613684231, "grad_norm": 1.7265625, "learning_rate": 0.0001932887085739568, "loss": 4.4026, "step": 3402 }, { "epoch": 0.35285044942291116, "grad_norm": 1.4609375, "learning_rate": 0.00019328479576241535, "loss": 4.4926, "step": 3403 }, { "epoch": 0.35295413747739923, "grad_norm": 1.6484375, "learning_rate": 0.00019328088185021196, "loss": 4.4422, "step": 3404 }, { "epoch": 0.3530578255318873, "grad_norm": 1.53125, "learning_rate": 0.00019327696683739274, "loss": 4.4698, "step": 3405 }, { "epoch": 0.3531615135863754, "grad_norm": 1.4375, "learning_rate": 0.00019327305072400393, "loss": 4.4035, "step": 3406 }, { "epoch": 0.35326520164086345, "grad_norm": 1.28125, "learning_rate": 0.00019326913351009172, "loss": 4.4461, "step": 3407 }, { "epoch": 0.3533688896953515, "grad_norm": 1.515625, "learning_rate": 0.0001932652151957023, "loss": 4.4329, "step": 3408 }, { "epoch": 0.3534725777498396, "grad_norm": 1.2578125, "learning_rate": 0.00019326129578088193, "loss": 4.4534, "step": 3409 }, { "epoch": 0.35357626580432766, "grad_norm": 1.8671875, "learning_rate": 0.00019325737526567683, "loss": 4.457, "step": 3410 }, { "epoch": 0.35367995385881573, "grad_norm": 1.6796875, "learning_rate": 0.00019325345365013333, "loss": 4.4506, "step": 3411 }, { "epoch": 0.3537836419133038, "grad_norm": 1.59375, "learning_rate": 0.0001932495309342976, "loss": 4.4096, "step": 3412 }, { "epoch": 0.3538873299677919, "grad_norm": 1.46875, "learning_rate": 0.000193245607118216, "loss": 4.4292, "step": 3413 }, { "epoch": 0.35399101802227995, "grad_norm": 1.65625, "learning_rate": 0.0001932416822019348, "loss": 4.4447, "step": 3414 }, { "epoch": 0.354094706076768, "grad_norm": 1.5234375, "learning_rate": 0.00019323775618550033, "loss": 4.4365, "step": 3415 }, { "epoch": 0.3541983941312561, "grad_norm": 1.6328125, "learning_rate": 0.00019323382906895887, "loss": 4.4433, "step": 3416 }, { "epoch": 0.3543020821857442, "grad_norm": 1.5078125, "learning_rate": 0.00019322990085235677, "loss": 4.3989, "step": 3417 }, { "epoch": 0.3544057702402323, "grad_norm": 1.453125, "learning_rate": 0.00019322597153574041, "loss": 4.4656, "step": 3418 }, { "epoch": 0.35450945829472036, "grad_norm": 1.3515625, "learning_rate": 0.00019322204111915612, "loss": 4.4606, "step": 3419 }, { "epoch": 0.35461314634920843, "grad_norm": 1.6640625, "learning_rate": 0.00019321810960265027, "loss": 4.3985, "step": 3420 }, { "epoch": 0.3547168344036965, "grad_norm": 1.3984375, "learning_rate": 0.0001932141769862693, "loss": 4.4343, "step": 3421 }, { "epoch": 0.3548205224581846, "grad_norm": 1.765625, "learning_rate": 0.00019321024327005953, "loss": 4.4955, "step": 3422 }, { "epoch": 0.35492421051267264, "grad_norm": 1.6640625, "learning_rate": 0.00019320630845406743, "loss": 4.4026, "step": 3423 }, { "epoch": 0.3550278985671607, "grad_norm": 1.4453125, "learning_rate": 0.00019320237253833944, "loss": 4.4636, "step": 3424 }, { "epoch": 0.3551315866216488, "grad_norm": 1.359375, "learning_rate": 0.00019319843552292193, "loss": 4.4602, "step": 3425 }, { "epoch": 0.35523527467613686, "grad_norm": 1.484375, "learning_rate": 0.00019319449740786145, "loss": 4.4292, "step": 3426 }, { "epoch": 0.35533896273062493, "grad_norm": 1.3515625, "learning_rate": 0.00019319055819320437, "loss": 4.4169, "step": 3427 }, { "epoch": 0.355442650785113, "grad_norm": 1.546875, "learning_rate": 0.00019318661787899724, "loss": 4.4037, "step": 3428 }, { "epoch": 0.3555463388396011, "grad_norm": 1.390625, "learning_rate": 0.0001931826764652865, "loss": 4.4441, "step": 3429 }, { "epoch": 0.35565002689408914, "grad_norm": 1.5859375, "learning_rate": 0.00019317873395211868, "loss": 4.406, "step": 3430 }, { "epoch": 0.3557537149485772, "grad_norm": 1.4375, "learning_rate": 0.0001931747903395403, "loss": 4.4601, "step": 3431 }, { "epoch": 0.3558574030030653, "grad_norm": 1.6328125, "learning_rate": 0.00019317084562759786, "loss": 4.4696, "step": 3432 }, { "epoch": 0.35596109105755336, "grad_norm": 1.46875, "learning_rate": 0.00019316689981633796, "loss": 4.4326, "step": 3433 }, { "epoch": 0.35606477911204143, "grad_norm": 1.5625, "learning_rate": 0.00019316295290580708, "loss": 4.443, "step": 3434 }, { "epoch": 0.3561684671665295, "grad_norm": 1.4375, "learning_rate": 0.00019315900489605186, "loss": 4.4226, "step": 3435 }, { "epoch": 0.3562721552210176, "grad_norm": 1.4609375, "learning_rate": 0.00019315505578711888, "loss": 4.4494, "step": 3436 }, { "epoch": 0.35637584327550564, "grad_norm": 1.390625, "learning_rate": 0.0001931511055790547, "loss": 4.4322, "step": 3437 }, { "epoch": 0.3564795313299937, "grad_norm": 1.5859375, "learning_rate": 0.00019314715427190592, "loss": 4.3908, "step": 3438 }, { "epoch": 0.3565832193844818, "grad_norm": 1.4296875, "learning_rate": 0.00019314320186571919, "loss": 4.4296, "step": 3439 }, { "epoch": 0.35668690743896986, "grad_norm": 1.5625, "learning_rate": 0.0001931392483605411, "loss": 4.4108, "step": 3440 }, { "epoch": 0.35679059549345793, "grad_norm": 1.46875, "learning_rate": 0.0001931352937564184, "loss": 4.4312, "step": 3441 }, { "epoch": 0.356894283547946, "grad_norm": 1.53125, "learning_rate": 0.00019313133805339762, "loss": 4.4593, "step": 3442 }, { "epoch": 0.35699797160243407, "grad_norm": 1.3203125, "learning_rate": 0.00019312738125152554, "loss": 4.3779, "step": 3443 }, { "epoch": 0.35710165965692214, "grad_norm": 1.6171875, "learning_rate": 0.00019312342335084878, "loss": 4.3964, "step": 3444 }, { "epoch": 0.3572053477114102, "grad_norm": 1.4765625, "learning_rate": 0.00019311946435141407, "loss": 4.4928, "step": 3445 }, { "epoch": 0.3573090357658983, "grad_norm": 1.4296875, "learning_rate": 0.0001931155042532681, "loss": 4.4779, "step": 3446 }, { "epoch": 0.35741272382038636, "grad_norm": 1.3671875, "learning_rate": 0.00019311154305645762, "loss": 4.4363, "step": 3447 }, { "epoch": 0.35751641187487443, "grad_norm": 1.3515625, "learning_rate": 0.00019310758076102933, "loss": 4.4306, "step": 3448 }, { "epoch": 0.3576200999293625, "grad_norm": 1.2734375, "learning_rate": 0.00019310361736703003, "loss": 4.4224, "step": 3449 }, { "epoch": 0.35772378798385057, "grad_norm": 1.375, "learning_rate": 0.00019309965287450645, "loss": 4.4059, "step": 3450 }, { "epoch": 0.35782747603833864, "grad_norm": 1.2734375, "learning_rate": 0.00019309568728350537, "loss": 4.506, "step": 3451 }, { "epoch": 0.3579311640928267, "grad_norm": 1.46875, "learning_rate": 0.0001930917205940736, "loss": 4.4283, "step": 3452 }, { "epoch": 0.3580348521473148, "grad_norm": 1.3125, "learning_rate": 0.00019308775280625794, "loss": 4.4682, "step": 3453 }, { "epoch": 0.35813854020180286, "grad_norm": 1.3515625, "learning_rate": 0.0001930837839201052, "loss": 4.4591, "step": 3454 }, { "epoch": 0.35824222825629093, "grad_norm": 1.2421875, "learning_rate": 0.00019307981393566217, "loss": 4.4646, "step": 3455 }, { "epoch": 0.358345916310779, "grad_norm": 1.3515625, "learning_rate": 0.00019307584285297572, "loss": 4.3935, "step": 3456 }, { "epoch": 0.35844960436526707, "grad_norm": 1.1875, "learning_rate": 0.00019307187067209273, "loss": 4.469, "step": 3457 }, { "epoch": 0.35855329241975514, "grad_norm": 1.5078125, "learning_rate": 0.00019306789739306006, "loss": 4.4591, "step": 3458 }, { "epoch": 0.3586569804742432, "grad_norm": 1.3515625, "learning_rate": 0.00019306392301592456, "loss": 4.4508, "step": 3459 }, { "epoch": 0.3587606685287313, "grad_norm": 1.390625, "learning_rate": 0.00019305994754073314, "loss": 4.3816, "step": 3460 }, { "epoch": 0.35886435658321936, "grad_norm": 1.2421875, "learning_rate": 0.00019305597096753272, "loss": 4.4473, "step": 3461 }, { "epoch": 0.3589680446377075, "grad_norm": 1.4453125, "learning_rate": 0.00019305199329637017, "loss": 4.4264, "step": 3462 }, { "epoch": 0.35907173269219556, "grad_norm": 1.3046875, "learning_rate": 0.0001930480145272925, "loss": 4.4503, "step": 3463 }, { "epoch": 0.3591754207466836, "grad_norm": 1.484375, "learning_rate": 0.0001930440346603466, "loss": 4.4796, "step": 3464 }, { "epoch": 0.3592791088011717, "grad_norm": 1.2734375, "learning_rate": 0.00019304005369557946, "loss": 4.4133, "step": 3465 }, { "epoch": 0.35938279685565977, "grad_norm": 1.5703125, "learning_rate": 0.00019303607163303804, "loss": 4.4165, "step": 3466 }, { "epoch": 0.35948648491014784, "grad_norm": 1.40625, "learning_rate": 0.00019303208847276928, "loss": 4.4471, "step": 3467 }, { "epoch": 0.3595901729646359, "grad_norm": 1.5703125, "learning_rate": 0.00019302810421482021, "loss": 4.3831, "step": 3468 }, { "epoch": 0.359693861019124, "grad_norm": 1.4375, "learning_rate": 0.00019302411885923788, "loss": 4.4317, "step": 3469 }, { "epoch": 0.35979754907361206, "grad_norm": 1.7890625, "learning_rate": 0.00019302013240606928, "loss": 4.4411, "step": 3470 }, { "epoch": 0.3599012371281001, "grad_norm": 1.6015625, "learning_rate": 0.00019301614485536144, "loss": 4.4209, "step": 3471 }, { "epoch": 0.3600049251825882, "grad_norm": 1.5, "learning_rate": 0.0001930121562071614, "loss": 4.4295, "step": 3472 }, { "epoch": 0.36010861323707627, "grad_norm": 1.3984375, "learning_rate": 0.00019300816646151622, "loss": 4.4055, "step": 3473 }, { "epoch": 0.36021230129156434, "grad_norm": 1.5703125, "learning_rate": 0.000193004175618473, "loss": 4.4217, "step": 3474 }, { "epoch": 0.3603159893460524, "grad_norm": 1.3984375, "learning_rate": 0.00019300018367807885, "loss": 4.4355, "step": 3475 }, { "epoch": 0.3604196774005405, "grad_norm": 1.625, "learning_rate": 0.0001929961906403808, "loss": 4.425, "step": 3476 }, { "epoch": 0.36052336545502855, "grad_norm": 1.4765625, "learning_rate": 0.00019299219650542604, "loss": 4.4662, "step": 3477 }, { "epoch": 0.3606270535095166, "grad_norm": 1.5, "learning_rate": 0.0001929882012732616, "loss": 4.4452, "step": 3478 }, { "epoch": 0.3607307415640047, "grad_norm": 1.453125, "learning_rate": 0.00019298420494393473, "loss": 4.4603, "step": 3479 }, { "epoch": 0.36083442961849277, "grad_norm": 1.1875, "learning_rate": 0.00019298020751749251, "loss": 4.39, "step": 3480 }, { "epoch": 0.36093811767298084, "grad_norm": 1.15625, "learning_rate": 0.00019297620899398212, "loss": 4.4747, "step": 3481 }, { "epoch": 0.3610418057274689, "grad_norm": 1.3671875, "learning_rate": 0.00019297220937345078, "loss": 4.4274, "step": 3482 }, { "epoch": 0.361145493781957, "grad_norm": 1.1640625, "learning_rate": 0.00019296820865594562, "loss": 4.4059, "step": 3483 }, { "epoch": 0.36124918183644505, "grad_norm": 1.5859375, "learning_rate": 0.00019296420684151384, "loss": 4.3839, "step": 3484 }, { "epoch": 0.3613528698909331, "grad_norm": 1.4609375, "learning_rate": 0.00019296020393020272, "loss": 4.4561, "step": 3485 }, { "epoch": 0.3614565579454212, "grad_norm": 1.40625, "learning_rate": 0.00019295619992205944, "loss": 4.45, "step": 3486 }, { "epoch": 0.36156024599990927, "grad_norm": 1.34375, "learning_rate": 0.0001929521948171313, "loss": 4.4206, "step": 3487 }, { "epoch": 0.36166393405439734, "grad_norm": 1.4453125, "learning_rate": 0.00019294818861546547, "loss": 4.4488, "step": 3488 }, { "epoch": 0.3617676221088854, "grad_norm": 1.296875, "learning_rate": 0.00019294418131710926, "loss": 4.4317, "step": 3489 }, { "epoch": 0.3618713101633735, "grad_norm": 1.4296875, "learning_rate": 0.00019294017292211, "loss": 4.4465, "step": 3490 }, { "epoch": 0.36197499821786155, "grad_norm": 1.3359375, "learning_rate": 0.00019293616343051491, "loss": 4.4293, "step": 3491 }, { "epoch": 0.3620786862723496, "grad_norm": 1.203125, "learning_rate": 0.00019293215284237134, "loss": 4.4282, "step": 3492 }, { "epoch": 0.3621823743268377, "grad_norm": 1.078125, "learning_rate": 0.0001929281411577266, "loss": 4.4317, "step": 3493 }, { "epoch": 0.36228606238132577, "grad_norm": 1.46875, "learning_rate": 0.000192924128376628, "loss": 4.4273, "step": 3494 }, { "epoch": 0.36238975043581384, "grad_norm": 1.1953125, "learning_rate": 0.00019292011449912295, "loss": 4.422, "step": 3495 }, { "epoch": 0.3624934384903019, "grad_norm": 1.6640625, "learning_rate": 0.00019291609952525876, "loss": 4.441, "step": 3496 }, { "epoch": 0.36259712654479, "grad_norm": 1.4765625, "learning_rate": 0.00019291208345508277, "loss": 4.4409, "step": 3497 }, { "epoch": 0.36270081459927805, "grad_norm": 1.40625, "learning_rate": 0.00019290806628864245, "loss": 4.4563, "step": 3498 }, { "epoch": 0.3628045026537661, "grad_norm": 1.328125, "learning_rate": 0.00019290404802598516, "loss": 4.4164, "step": 3499 }, { "epoch": 0.3629081907082542, "grad_norm": 1.4609375, "learning_rate": 0.00019290002866715827, "loss": 4.4176, "step": 3500 }, { "epoch": 0.36301187876274227, "grad_norm": 1.3046875, "learning_rate": 0.00019289600821220928, "loss": 4.455, "step": 3501 }, { "epoch": 0.36311556681723034, "grad_norm": 1.484375, "learning_rate": 0.00019289198666118558, "loss": 4.4429, "step": 3502 }, { "epoch": 0.3632192548717184, "grad_norm": 1.390625, "learning_rate": 0.00019288796401413462, "loss": 4.4002, "step": 3503 }, { "epoch": 0.3633229429262065, "grad_norm": 1.3125, "learning_rate": 0.00019288394027110387, "loss": 4.413, "step": 3504 }, { "epoch": 0.36342663098069455, "grad_norm": 1.1796875, "learning_rate": 0.00019287991543214084, "loss": 4.4612, "step": 3505 }, { "epoch": 0.3635303190351826, "grad_norm": 1.4453125, "learning_rate": 0.00019287588949729295, "loss": 4.4533, "step": 3506 }, { "epoch": 0.36363400708967075, "grad_norm": 1.2734375, "learning_rate": 0.00019287186246660774, "loss": 4.4742, "step": 3507 }, { "epoch": 0.3637376951441588, "grad_norm": 1.5546875, "learning_rate": 0.0001928678343401327, "loss": 4.4463, "step": 3508 }, { "epoch": 0.3638413831986469, "grad_norm": 1.3046875, "learning_rate": 0.0001928638051179154, "loss": 4.41, "step": 3509 }, { "epoch": 0.36394507125313497, "grad_norm": 1.5546875, "learning_rate": 0.0001928597748000034, "loss": 4.4391, "step": 3510 }, { "epoch": 0.36404875930762304, "grad_norm": 1.328125, "learning_rate": 0.0001928557433864442, "loss": 4.4267, "step": 3511 }, { "epoch": 0.3641524473621111, "grad_norm": 1.6484375, "learning_rate": 0.00019285171087728536, "loss": 4.4649, "step": 3512 }, { "epoch": 0.3642561354165992, "grad_norm": 1.4921875, "learning_rate": 0.00019284767727257445, "loss": 4.4497, "step": 3513 }, { "epoch": 0.36435982347108725, "grad_norm": 1.4375, "learning_rate": 0.00019284364257235917, "loss": 4.479, "step": 3514 }, { "epoch": 0.3644635115255753, "grad_norm": 1.328125, "learning_rate": 0.000192839606776687, "loss": 4.4417, "step": 3515 }, { "epoch": 0.3645671995800634, "grad_norm": 1.2890625, "learning_rate": 0.00019283556988560562, "loss": 4.3996, "step": 3516 }, { "epoch": 0.36467088763455147, "grad_norm": 1.203125, "learning_rate": 0.0001928315318991626, "loss": 4.4304, "step": 3517 }, { "epoch": 0.36477457568903954, "grad_norm": 1.546875, "learning_rate": 0.0001928274928174057, "loss": 4.4209, "step": 3518 }, { "epoch": 0.3648782637435276, "grad_norm": 1.4609375, "learning_rate": 0.00019282345264038246, "loss": 4.4319, "step": 3519 }, { "epoch": 0.3649819517980157, "grad_norm": 1.328125, "learning_rate": 0.0001928194113681406, "loss": 4.4293, "step": 3520 }, { "epoch": 0.36508563985250375, "grad_norm": 1.15625, "learning_rate": 0.00019281536900072783, "loss": 4.4231, "step": 3521 }, { "epoch": 0.3651893279069918, "grad_norm": 1.4609375, "learning_rate": 0.00019281132553819182, "loss": 4.435, "step": 3522 }, { "epoch": 0.3652930159614799, "grad_norm": 1.3125, "learning_rate": 0.00019280728098058022, "loss": 4.444, "step": 3523 }, { "epoch": 0.36539670401596797, "grad_norm": 1.75, "learning_rate": 0.00019280323532794085, "loss": 4.4257, "step": 3524 }, { "epoch": 0.36550039207045604, "grad_norm": 1.59375, "learning_rate": 0.0001927991885803214, "loss": 4.39, "step": 3525 }, { "epoch": 0.3656040801249441, "grad_norm": 1.3984375, "learning_rate": 0.00019279514073776958, "loss": 4.4168, "step": 3526 }, { "epoch": 0.3657077681794322, "grad_norm": 1.3203125, "learning_rate": 0.00019279109180033322, "loss": 4.4334, "step": 3527 }, { "epoch": 0.36581145623392025, "grad_norm": 1.390625, "learning_rate": 0.00019278704176806008, "loss": 4.4292, "step": 3528 }, { "epoch": 0.3659151442884083, "grad_norm": 1.2578125, "learning_rate": 0.0001927829906409979, "loss": 4.4186, "step": 3529 }, { "epoch": 0.3660188323428964, "grad_norm": 1.6484375, "learning_rate": 0.00019277893841919451, "loss": 4.4232, "step": 3530 }, { "epoch": 0.36612252039738447, "grad_norm": 1.4140625, "learning_rate": 0.00019277488510269773, "loss": 4.4418, "step": 3531 }, { "epoch": 0.36622620845187254, "grad_norm": 1.4453125, "learning_rate": 0.00019277083069155535, "loss": 4.4193, "step": 3532 }, { "epoch": 0.3663298965063606, "grad_norm": 1.3359375, "learning_rate": 0.00019276677518581523, "loss": 4.4612, "step": 3533 }, { "epoch": 0.3664335845608487, "grad_norm": 1.5390625, "learning_rate": 0.00019276271858552523, "loss": 4.4345, "step": 3534 }, { "epoch": 0.36653727261533675, "grad_norm": 1.421875, "learning_rate": 0.0001927586608907332, "loss": 4.4532, "step": 3535 }, { "epoch": 0.3666409606698248, "grad_norm": 1.5390625, "learning_rate": 0.00019275460210148705, "loss": 4.456, "step": 3536 }, { "epoch": 0.3667446487243129, "grad_norm": 1.4296875, "learning_rate": 0.00019275054221783462, "loss": 4.4461, "step": 3537 }, { "epoch": 0.36684833677880097, "grad_norm": 1.453125, "learning_rate": 0.00019274648123982383, "loss": 4.393, "step": 3538 }, { "epoch": 0.36695202483328904, "grad_norm": 1.3203125, "learning_rate": 0.0001927424191675026, "loss": 4.4317, "step": 3539 }, { "epoch": 0.3670557128877771, "grad_norm": 1.4765625, "learning_rate": 0.00019273835600091887, "loss": 4.4409, "step": 3540 }, { "epoch": 0.3671594009422652, "grad_norm": 1.3515625, "learning_rate": 0.00019273429174012057, "loss": 4.4772, "step": 3541 }, { "epoch": 0.36726308899675325, "grad_norm": 1.421875, "learning_rate": 0.00019273022638515564, "loss": 4.4135, "step": 3542 }, { "epoch": 0.3673667770512413, "grad_norm": 1.234375, "learning_rate": 0.00019272615993607206, "loss": 4.5041, "step": 3543 }, { "epoch": 0.3674704651057294, "grad_norm": 1.765625, "learning_rate": 0.0001927220923929178, "loss": 4.449, "step": 3544 }, { "epoch": 0.36757415316021746, "grad_norm": 1.5, "learning_rate": 0.00019271802375574087, "loss": 4.4044, "step": 3545 }, { "epoch": 0.36767784121470554, "grad_norm": 1.75, "learning_rate": 0.00019271395402458926, "loss": 4.5054, "step": 3546 }, { "epoch": 0.3677815292691936, "grad_norm": 1.6796875, "learning_rate": 0.00019270988319951103, "loss": 4.4233, "step": 3547 }, { "epoch": 0.3678852173236817, "grad_norm": 1.3046875, "learning_rate": 0.00019270581128055412, "loss": 4.466, "step": 3548 }, { "epoch": 0.36798890537816975, "grad_norm": 1.234375, "learning_rate": 0.00019270173826776667, "loss": 4.4506, "step": 3549 }, { "epoch": 0.3680925934326578, "grad_norm": 1.3984375, "learning_rate": 0.0001926976641611967, "loss": 4.4769, "step": 3550 }, { "epoch": 0.36819628148714595, "grad_norm": 1.140625, "learning_rate": 0.00019269358896089226, "loss": 4.4227, "step": 3551 }, { "epoch": 0.368299969541634, "grad_norm": 1.7890625, "learning_rate": 0.00019268951266690146, "loss": 4.4437, "step": 3552 }, { "epoch": 0.3684036575961221, "grad_norm": 1.625, "learning_rate": 0.00019268543527927237, "loss": 4.4569, "step": 3553 }, { "epoch": 0.36850734565061016, "grad_norm": 1.5078125, "learning_rate": 0.00019268135679805312, "loss": 4.4919, "step": 3554 }, { "epoch": 0.36861103370509823, "grad_norm": 1.4609375, "learning_rate": 0.0001926772772232919, "loss": 4.4385, "step": 3555 }, { "epoch": 0.3687147217595863, "grad_norm": 1.296875, "learning_rate": 0.00019267319655503665, "loss": 4.4652, "step": 3556 }, { "epoch": 0.3688184098140744, "grad_norm": 1.2109375, "learning_rate": 0.00019266911479333572, "loss": 4.3996, "step": 3557 }, { "epoch": 0.36892209786856245, "grad_norm": 1.515625, "learning_rate": 0.00019266503193823717, "loss": 4.4644, "step": 3558 }, { "epoch": 0.3690257859230505, "grad_norm": 1.3046875, "learning_rate": 0.00019266094798978922, "loss": 4.4592, "step": 3559 }, { "epoch": 0.3691294739775386, "grad_norm": 1.6015625, "learning_rate": 0.00019265686294804, "loss": 4.444, "step": 3560 }, { "epoch": 0.36923316203202666, "grad_norm": 1.46875, "learning_rate": 0.00019265277681303775, "loss": 4.4513, "step": 3561 }, { "epoch": 0.36933685008651473, "grad_norm": 1.4296875, "learning_rate": 0.00019264868958483066, "loss": 4.435, "step": 3562 }, { "epoch": 0.3694405381410028, "grad_norm": 1.2578125, "learning_rate": 0.00019264460126346697, "loss": 4.4655, "step": 3563 }, { "epoch": 0.3695442261954909, "grad_norm": 1.4453125, "learning_rate": 0.00019264051184899494, "loss": 4.4762, "step": 3564 }, { "epoch": 0.36964791424997895, "grad_norm": 1.3125, "learning_rate": 0.00019263642134146277, "loss": 4.4287, "step": 3565 }, { "epoch": 0.369751602304467, "grad_norm": 1.5546875, "learning_rate": 0.00019263232974091877, "loss": 4.4286, "step": 3566 }, { "epoch": 0.3698552903589551, "grad_norm": 1.421875, "learning_rate": 0.00019262823704741119, "loss": 4.4731, "step": 3567 }, { "epoch": 0.36995897841344316, "grad_norm": 1.34375, "learning_rate": 0.00019262414326098832, "loss": 4.4016, "step": 3568 }, { "epoch": 0.37006266646793123, "grad_norm": 1.1640625, "learning_rate": 0.00019262004838169845, "loss": 4.3934, "step": 3569 }, { "epoch": 0.3701663545224193, "grad_norm": 1.4453125, "learning_rate": 0.00019261595240958993, "loss": 4.4368, "step": 3570 }, { "epoch": 0.3702700425769074, "grad_norm": 1.2421875, "learning_rate": 0.00019261185534471108, "loss": 4.4612, "step": 3571 }, { "epoch": 0.37037373063139545, "grad_norm": 1.515625, "learning_rate": 0.0001926077571871102, "loss": 4.4292, "step": 3572 }, { "epoch": 0.3704774186858835, "grad_norm": 1.3515625, "learning_rate": 0.00019260365793683572, "loss": 4.421, "step": 3573 }, { "epoch": 0.3705811067403716, "grad_norm": 1.359375, "learning_rate": 0.00019259955759393593, "loss": 4.4411, "step": 3574 }, { "epoch": 0.37068479479485966, "grad_norm": 1.265625, "learning_rate": 0.00019259545615845925, "loss": 4.4327, "step": 3575 }, { "epoch": 0.37078848284934773, "grad_norm": 1.4609375, "learning_rate": 0.00019259135363045406, "loss": 4.4329, "step": 3576 }, { "epoch": 0.3708921709038358, "grad_norm": 1.3046875, "learning_rate": 0.00019258725000996881, "loss": 4.4206, "step": 3577 }, { "epoch": 0.3709958589583239, "grad_norm": 1.3828125, "learning_rate": 0.0001925831452970518, "loss": 4.4502, "step": 3578 }, { "epoch": 0.37109954701281195, "grad_norm": 1.3515625, "learning_rate": 0.0001925790394917516, "loss": 4.4349, "step": 3579 }, { "epoch": 0.3712032350673, "grad_norm": 1.203125, "learning_rate": 0.00019257493259411659, "loss": 4.4454, "step": 3580 }, { "epoch": 0.3713069231217881, "grad_norm": 1.0859375, "learning_rate": 0.0001925708246041952, "loss": 4.4163, "step": 3581 }, { "epoch": 0.37141061117627616, "grad_norm": 1.359375, "learning_rate": 0.00019256671552203596, "loss": 4.4385, "step": 3582 }, { "epoch": 0.37151429923076423, "grad_norm": 1.0703125, "learning_rate": 0.00019256260534768733, "loss": 4.4556, "step": 3583 }, { "epoch": 0.3716179872852523, "grad_norm": 1.671875, "learning_rate": 0.00019255849408119778, "loss": 4.4005, "step": 3584 }, { "epoch": 0.3717216753397404, "grad_norm": 1.4453125, "learning_rate": 0.00019255438172261586, "loss": 4.4442, "step": 3585 }, { "epoch": 0.37182536339422845, "grad_norm": 1.40625, "learning_rate": 0.00019255026827199006, "loss": 4.4232, "step": 3586 }, { "epoch": 0.3719290514487165, "grad_norm": 1.34375, "learning_rate": 0.0001925461537293689, "loss": 4.4578, "step": 3587 }, { "epoch": 0.3720327395032046, "grad_norm": 1.3671875, "learning_rate": 0.00019254203809480097, "loss": 4.4158, "step": 3588 }, { "epoch": 0.37213642755769266, "grad_norm": 1.171875, "learning_rate": 0.00019253792136833482, "loss": 4.4179, "step": 3589 }, { "epoch": 0.37224011561218073, "grad_norm": 1.328125, "learning_rate": 0.000192533803550019, "loss": 4.4066, "step": 3590 }, { "epoch": 0.3723438036666688, "grad_norm": 1.234375, "learning_rate": 0.0001925296846399021, "loss": 4.4247, "step": 3591 }, { "epoch": 0.3724474917211569, "grad_norm": 1.6640625, "learning_rate": 0.00019252556463803279, "loss": 4.4401, "step": 3592 }, { "epoch": 0.37255117977564495, "grad_norm": 1.5390625, "learning_rate": 0.00019252144354445957, "loss": 4.4367, "step": 3593 }, { "epoch": 0.372654867830133, "grad_norm": 1.2734375, "learning_rate": 0.0001925173213592311, "loss": 4.3991, "step": 3594 }, { "epoch": 0.3727585558846211, "grad_norm": 1.1953125, "learning_rate": 0.00019251319808239609, "loss": 4.4147, "step": 3595 }, { "epoch": 0.3728622439391092, "grad_norm": 1.2109375, "learning_rate": 0.00019250907371400308, "loss": 4.4117, "step": 3596 }, { "epoch": 0.3729659319935973, "grad_norm": 1.046875, "learning_rate": 0.0001925049482541008, "loss": 4.4487, "step": 3597 }, { "epoch": 0.37306962004808536, "grad_norm": 1.5234375, "learning_rate": 0.00019250082170273793, "loss": 4.4308, "step": 3598 }, { "epoch": 0.37317330810257343, "grad_norm": 1.3046875, "learning_rate": 0.0001924966940599631, "loss": 4.4148, "step": 3599 }, { "epoch": 0.3732769961570615, "grad_norm": 1.609375, "learning_rate": 0.0001924925653258251, "loss": 4.4425, "step": 3600 }, { "epoch": 0.3733806842115496, "grad_norm": 1.5078125, "learning_rate": 0.0001924884355003726, "loss": 4.4362, "step": 3601 }, { "epoch": 0.37348437226603765, "grad_norm": 1.140625, "learning_rate": 0.0001924843045836543, "loss": 4.4168, "step": 3602 }, { "epoch": 0.3735880603205257, "grad_norm": 1.09375, "learning_rate": 0.000192480172575719, "loss": 4.4619, "step": 3603 }, { "epoch": 0.3736917483750138, "grad_norm": 1.25, "learning_rate": 0.00019247603947661535, "loss": 4.424, "step": 3604 }, { "epoch": 0.37379543642950186, "grad_norm": 1.1015625, "learning_rate": 0.00019247190528639223, "loss": 4.4545, "step": 3605 }, { "epoch": 0.37389912448398993, "grad_norm": 1.6171875, "learning_rate": 0.00019246777000509838, "loss": 4.4457, "step": 3606 }, { "epoch": 0.374002812538478, "grad_norm": 1.4765625, "learning_rate": 0.00019246363363278259, "loss": 4.4527, "step": 3607 }, { "epoch": 0.3741065005929661, "grad_norm": 1.2421875, "learning_rate": 0.00019245949616949367, "loss": 4.4568, "step": 3608 }, { "epoch": 0.37421018864745415, "grad_norm": 1.1171875, "learning_rate": 0.00019245535761528037, "loss": 4.4389, "step": 3609 }, { "epoch": 0.3743138767019422, "grad_norm": 1.3125, "learning_rate": 0.00019245121797019165, "loss": 4.411, "step": 3610 }, { "epoch": 0.3744175647564303, "grad_norm": 1.0859375, "learning_rate": 0.00019244707723427623, "loss": 4.3858, "step": 3611 }, { "epoch": 0.37452125281091836, "grad_norm": 1.515625, "learning_rate": 0.00019244293540758304, "loss": 4.4078, "step": 3612 }, { "epoch": 0.37462494086540643, "grad_norm": 1.2890625, "learning_rate": 0.00019243879249016094, "loss": 4.4748, "step": 3613 }, { "epoch": 0.3747286289198945, "grad_norm": 1.5234375, "learning_rate": 0.0001924346484820588, "loss": 4.439, "step": 3614 }, { "epoch": 0.3748323169743826, "grad_norm": 1.4140625, "learning_rate": 0.0001924305033833255, "loss": 4.3731, "step": 3615 }, { "epoch": 0.37493600502887064, "grad_norm": 1.3359375, "learning_rate": 0.00019242635719400996, "loss": 4.4164, "step": 3616 }, { "epoch": 0.3750396930833587, "grad_norm": 1.25, "learning_rate": 0.00019242220991416112, "loss": 4.4429, "step": 3617 }, { "epoch": 0.3751433811378468, "grad_norm": 1.359375, "learning_rate": 0.00019241806154382792, "loss": 4.3936, "step": 3618 }, { "epoch": 0.37524706919233486, "grad_norm": 1.234375, "learning_rate": 0.00019241391208305926, "loss": 4.4168, "step": 3619 }, { "epoch": 0.37535075724682293, "grad_norm": 1.359375, "learning_rate": 0.0001924097615319041, "loss": 4.4512, "step": 3620 }, { "epoch": 0.375454445301311, "grad_norm": 1.1640625, "learning_rate": 0.00019240560989041146, "loss": 4.4483, "step": 3621 }, { "epoch": 0.3755581333557991, "grad_norm": 1.640625, "learning_rate": 0.0001924014571586303, "loss": 4.4152, "step": 3622 }, { "epoch": 0.37566182141028714, "grad_norm": 1.4375, "learning_rate": 0.0001923973033366096, "loss": 4.4342, "step": 3623 }, { "epoch": 0.3757655094647752, "grad_norm": 1.453125, "learning_rate": 0.0001923931484243984, "loss": 4.3984, "step": 3624 }, { "epoch": 0.3758691975192633, "grad_norm": 1.390625, "learning_rate": 0.0001923889924220457, "loss": 4.433, "step": 3625 }, { "epoch": 0.37597288557375136, "grad_norm": 1.328125, "learning_rate": 0.00019238483532960058, "loss": 4.4543, "step": 3626 }, { "epoch": 0.37607657362823943, "grad_norm": 1.25, "learning_rate": 0.00019238067714711203, "loss": 4.4161, "step": 3627 }, { "epoch": 0.3761802616827275, "grad_norm": 1.5546875, "learning_rate": 0.00019237651787462916, "loss": 4.4185, "step": 3628 }, { "epoch": 0.3762839497372156, "grad_norm": 1.4375, "learning_rate": 0.00019237235751220103, "loss": 4.4455, "step": 3629 }, { "epoch": 0.37638763779170364, "grad_norm": 1.3046875, "learning_rate": 0.0001923681960598767, "loss": 4.409, "step": 3630 }, { "epoch": 0.3764913258461917, "grad_norm": 1.2265625, "learning_rate": 0.0001923640335177053, "loss": 4.4611, "step": 3631 }, { "epoch": 0.3765950139006798, "grad_norm": 1.296875, "learning_rate": 0.00019235986988573595, "loss": 4.4378, "step": 3632 }, { "epoch": 0.37669870195516786, "grad_norm": 1.171875, "learning_rate": 0.00019235570516401776, "loss": 4.4345, "step": 3633 }, { "epoch": 0.37680239000965593, "grad_norm": 1.53125, "learning_rate": 0.00019235153935259986, "loss": 4.458, "step": 3634 }, { "epoch": 0.376906078064144, "grad_norm": 1.328125, "learning_rate": 0.00019234737245153142, "loss": 4.4146, "step": 3635 }, { "epoch": 0.3770097661186321, "grad_norm": 1.5546875, "learning_rate": 0.0001923432044608616, "loss": 4.4413, "step": 3636 }, { "epoch": 0.37711345417312014, "grad_norm": 1.3828125, "learning_rate": 0.00019233903538063958, "loss": 4.4489, "step": 3637 }, { "epoch": 0.3772171422276082, "grad_norm": 1.5390625, "learning_rate": 0.00019233486521091458, "loss": 4.4059, "step": 3638 }, { "epoch": 0.3773208302820963, "grad_norm": 1.3984375, "learning_rate": 0.00019233069395173573, "loss": 4.4205, "step": 3639 }, { "epoch": 0.3774245183365844, "grad_norm": 1.5625, "learning_rate": 0.0001923265216031523, "loss": 4.4273, "step": 3640 }, { "epoch": 0.3775282063910725, "grad_norm": 1.4375, "learning_rate": 0.00019232234816521352, "loss": 4.4103, "step": 3641 }, { "epoch": 0.37763189444556056, "grad_norm": 1.625, "learning_rate": 0.0001923181736379686, "loss": 4.44, "step": 3642 }, { "epoch": 0.37773558250004863, "grad_norm": 1.5, "learning_rate": 0.00019231399802146685, "loss": 4.4276, "step": 3643 }, { "epoch": 0.3778392705545367, "grad_norm": 1.453125, "learning_rate": 0.00019230982131575747, "loss": 4.4077, "step": 3644 }, { "epoch": 0.37794295860902477, "grad_norm": 1.3984375, "learning_rate": 0.00019230564352088977, "loss": 4.4196, "step": 3645 }, { "epoch": 0.37804664666351284, "grad_norm": 1.3203125, "learning_rate": 0.00019230146463691307, "loss": 4.4645, "step": 3646 }, { "epoch": 0.3781503347180009, "grad_norm": 1.1875, "learning_rate": 0.00019229728466387664, "loss": 4.4093, "step": 3647 }, { "epoch": 0.378254022772489, "grad_norm": 1.34375, "learning_rate": 0.00019229310360182984, "loss": 4.4172, "step": 3648 }, { "epoch": 0.37835771082697706, "grad_norm": 1.21875, "learning_rate": 0.00019228892145082193, "loss": 4.4356, "step": 3649 }, { "epoch": 0.3784613988814651, "grad_norm": 1.5625, "learning_rate": 0.00019228473821090235, "loss": 4.4533, "step": 3650 }, { "epoch": 0.3785650869359532, "grad_norm": 1.390625, "learning_rate": 0.0001922805538821204, "loss": 4.4694, "step": 3651 }, { "epoch": 0.37866877499044127, "grad_norm": 1.3046875, "learning_rate": 0.00019227636846452542, "loss": 4.4129, "step": 3652 }, { "epoch": 0.37877246304492934, "grad_norm": 1.2421875, "learning_rate": 0.00019227218195816685, "loss": 4.436, "step": 3653 }, { "epoch": 0.3788761510994174, "grad_norm": 1.296875, "learning_rate": 0.0001922679943630941, "loss": 4.4356, "step": 3654 }, { "epoch": 0.3789798391539055, "grad_norm": 1.1875, "learning_rate": 0.0001922638056793565, "loss": 4.4148, "step": 3655 }, { "epoch": 0.37908352720839356, "grad_norm": 1.6171875, "learning_rate": 0.00019225961590700353, "loss": 4.4184, "step": 3656 }, { "epoch": 0.3791872152628816, "grad_norm": 1.3828125, "learning_rate": 0.00019225542504608465, "loss": 4.4467, "step": 3657 }, { "epoch": 0.3792909033173697, "grad_norm": 1.5, "learning_rate": 0.00019225123309664924, "loss": 4.4668, "step": 3658 }, { "epoch": 0.37939459137185777, "grad_norm": 1.359375, "learning_rate": 0.00019224704005874684, "loss": 4.4578, "step": 3659 }, { "epoch": 0.37949827942634584, "grad_norm": 1.3046875, "learning_rate": 0.0001922428459324268, "loss": 4.4322, "step": 3660 }, { "epoch": 0.3796019674808339, "grad_norm": 1.1875, "learning_rate": 0.00019223865071773874, "loss": 4.4463, "step": 3661 }, { "epoch": 0.379705655535322, "grad_norm": 1.3984375, "learning_rate": 0.00019223445441473208, "loss": 4.425, "step": 3662 }, { "epoch": 0.37980934358981006, "grad_norm": 1.2734375, "learning_rate": 0.00019223025702345639, "loss": 4.4147, "step": 3663 }, { "epoch": 0.3799130316442981, "grad_norm": 1.296875, "learning_rate": 0.00019222605854396111, "loss": 4.4286, "step": 3664 }, { "epoch": 0.3800167196987862, "grad_norm": 1.203125, "learning_rate": 0.00019222185897629584, "loss": 4.4276, "step": 3665 }, { "epoch": 0.38012040775327427, "grad_norm": 1.3203125, "learning_rate": 0.00019221765832051012, "loss": 4.4228, "step": 3666 }, { "epoch": 0.38022409580776234, "grad_norm": 1.1796875, "learning_rate": 0.00019221345657665352, "loss": 4.4041, "step": 3667 }, { "epoch": 0.3803277838622504, "grad_norm": 1.359375, "learning_rate": 0.0001922092537447756, "loss": 4.447, "step": 3668 }, { "epoch": 0.3804314719167385, "grad_norm": 1.234375, "learning_rate": 0.00019220504982492596, "loss": 4.4779, "step": 3669 }, { "epoch": 0.38053515997122656, "grad_norm": 1.2578125, "learning_rate": 0.00019220084481715418, "loss": 4.4111, "step": 3670 }, { "epoch": 0.3806388480257146, "grad_norm": 1.171875, "learning_rate": 0.0001921966387215099, "loss": 4.4375, "step": 3671 }, { "epoch": 0.3807425360802027, "grad_norm": 1.2109375, "learning_rate": 0.00019219243153804273, "loss": 4.4184, "step": 3672 }, { "epoch": 0.38084622413469077, "grad_norm": 1.078125, "learning_rate": 0.00019218822326680235, "loss": 4.4274, "step": 3673 }, { "epoch": 0.38094991218917884, "grad_norm": 1.3984375, "learning_rate": 0.00019218401390783834, "loss": 4.4228, "step": 3674 }, { "epoch": 0.3810536002436669, "grad_norm": 1.2734375, "learning_rate": 0.00019217980346120044, "loss": 4.4196, "step": 3675 }, { "epoch": 0.381157288298155, "grad_norm": 1.265625, "learning_rate": 0.00019217559192693831, "loss": 4.4682, "step": 3676 }, { "epoch": 0.38126097635264306, "grad_norm": 1.1640625, "learning_rate": 0.0001921713793051016, "loss": 4.4386, "step": 3677 }, { "epoch": 0.3813646644071311, "grad_norm": 1.3203125, "learning_rate": 0.00019216716559574003, "loss": 4.3986, "step": 3678 }, { "epoch": 0.3814683524616192, "grad_norm": 1.140625, "learning_rate": 0.00019216295079890338, "loss": 4.4526, "step": 3679 }, { "epoch": 0.38157204051610727, "grad_norm": 1.4453125, "learning_rate": 0.0001921587349146413, "loss": 4.4512, "step": 3680 }, { "epoch": 0.38167572857059534, "grad_norm": 1.2734375, "learning_rate": 0.00019215451794300355, "loss": 4.456, "step": 3681 }, { "epoch": 0.3817794166250834, "grad_norm": 1.46875, "learning_rate": 0.00019215029988403992, "loss": 4.4242, "step": 3682 }, { "epoch": 0.3818831046795715, "grad_norm": 1.34375, "learning_rate": 0.00019214608073780015, "loss": 4.4795, "step": 3683 }, { "epoch": 0.38198679273405955, "grad_norm": 1.46875, "learning_rate": 0.00019214186050433405, "loss": 4.427, "step": 3684 }, { "epoch": 0.3820904807885477, "grad_norm": 1.390625, "learning_rate": 0.00019213763918369137, "loss": 4.4174, "step": 3685 }, { "epoch": 0.38219416884303575, "grad_norm": 1.2421875, "learning_rate": 0.00019213341677592197, "loss": 4.4468, "step": 3686 }, { "epoch": 0.3822978568975238, "grad_norm": 1.1484375, "learning_rate": 0.00019212919328107564, "loss": 4.4515, "step": 3687 }, { "epoch": 0.3824015449520119, "grad_norm": 1.359375, "learning_rate": 0.00019212496869920218, "loss": 4.4227, "step": 3688 }, { "epoch": 0.38250523300649997, "grad_norm": 1.25, "learning_rate": 0.0001921207430303515, "loss": 4.457, "step": 3689 }, { "epoch": 0.38260892106098804, "grad_norm": 1.5078125, "learning_rate": 0.0001921165162745734, "loss": 4.4166, "step": 3690 }, { "epoch": 0.3827126091154761, "grad_norm": 1.359375, "learning_rate": 0.0001921122884319178, "loss": 4.3881, "step": 3691 }, { "epoch": 0.3828162971699642, "grad_norm": 1.1953125, "learning_rate": 0.00019210805950243455, "loss": 4.3417, "step": 3692 }, { "epoch": 0.38291998522445225, "grad_norm": 1.125, "learning_rate": 0.00019210382948617357, "loss": 4.4258, "step": 3693 }, { "epoch": 0.3830236732789403, "grad_norm": 1.3515625, "learning_rate": 0.00019209959838318476, "loss": 4.4543, "step": 3694 }, { "epoch": 0.3831273613334284, "grad_norm": 1.203125, "learning_rate": 0.00019209536619351804, "loss": 4.4249, "step": 3695 }, { "epoch": 0.38323104938791647, "grad_norm": 1.5078125, "learning_rate": 0.00019209113291722334, "loss": 4.4234, "step": 3696 }, { "epoch": 0.38333473744240454, "grad_norm": 1.4140625, "learning_rate": 0.0001920868985543506, "loss": 4.4391, "step": 3697 }, { "epoch": 0.3834384254968926, "grad_norm": 1.15625, "learning_rate": 0.00019208266310494985, "loss": 4.3681, "step": 3698 }, { "epoch": 0.3835421135513807, "grad_norm": 1.1953125, "learning_rate": 0.00019207842656907099, "loss": 4.4259, "step": 3699 }, { "epoch": 0.38364580160586875, "grad_norm": 1.203125, "learning_rate": 0.000192074188946764, "loss": 4.4409, "step": 3700 }, { "epoch": 0.3837494896603568, "grad_norm": 1.09375, "learning_rate": 0.00019206995023807893, "loss": 4.4505, "step": 3701 }, { "epoch": 0.3838531777148449, "grad_norm": 1.4375, "learning_rate": 0.00019206571044306578, "loss": 4.4179, "step": 3702 }, { "epoch": 0.38395686576933297, "grad_norm": 1.28125, "learning_rate": 0.00019206146956177454, "loss": 4.4102, "step": 3703 }, { "epoch": 0.38406055382382104, "grad_norm": 1.3359375, "learning_rate": 0.00019205722759425527, "loss": 4.4051, "step": 3704 }, { "epoch": 0.3841642418783091, "grad_norm": 1.1953125, "learning_rate": 0.00019205298454055806, "loss": 4.4391, "step": 3705 }, { "epoch": 0.3842679299327972, "grad_norm": 1.28125, "learning_rate": 0.0001920487404007329, "loss": 4.4612, "step": 3706 }, { "epoch": 0.38437161798728525, "grad_norm": 1.125, "learning_rate": 0.00019204449517482993, "loss": 4.4197, "step": 3707 }, { "epoch": 0.3844753060417733, "grad_norm": 1.515625, "learning_rate": 0.00019204024886289919, "loss": 4.4594, "step": 3708 }, { "epoch": 0.3845789940962614, "grad_norm": 1.375, "learning_rate": 0.0001920360014649908, "loss": 4.4012, "step": 3709 }, { "epoch": 0.38468268215074947, "grad_norm": 1.4453125, "learning_rate": 0.00019203175298115492, "loss": 4.4191, "step": 3710 }, { "epoch": 0.38478637020523754, "grad_norm": 1.34375, "learning_rate": 0.0001920275034114416, "loss": 4.3976, "step": 3711 }, { "epoch": 0.3848900582597256, "grad_norm": 1.3203125, "learning_rate": 0.00019202325275590102, "loss": 4.3799, "step": 3712 }, { "epoch": 0.3849937463142137, "grad_norm": 1.203125, "learning_rate": 0.00019201900101458333, "loss": 4.4567, "step": 3713 }, { "epoch": 0.38509743436870175, "grad_norm": 1.2265625, "learning_rate": 0.0001920147481875387, "loss": 4.3867, "step": 3714 }, { "epoch": 0.3852011224231898, "grad_norm": 1.125, "learning_rate": 0.0001920104942748173, "loss": 4.4149, "step": 3715 }, { "epoch": 0.3853048104776779, "grad_norm": 1.3984375, "learning_rate": 0.00019200623927646934, "loss": 4.4151, "step": 3716 }, { "epoch": 0.38540849853216597, "grad_norm": 1.328125, "learning_rate": 0.000192001983192545, "loss": 4.4309, "step": 3717 }, { "epoch": 0.38551218658665404, "grad_norm": 1.1171875, "learning_rate": 0.00019199772602309452, "loss": 4.37, "step": 3718 }, { "epoch": 0.3856158746411421, "grad_norm": 1.1015625, "learning_rate": 0.00019199346776816812, "loss": 4.4452, "step": 3719 }, { "epoch": 0.3857195626956302, "grad_norm": 1.28125, "learning_rate": 0.00019198920842781604, "loss": 4.4276, "step": 3720 }, { "epoch": 0.38582325075011825, "grad_norm": 1.1484375, "learning_rate": 0.00019198494800208853, "loss": 4.4077, "step": 3721 }, { "epoch": 0.3859269388046063, "grad_norm": 1.3203125, "learning_rate": 0.00019198068649103585, "loss": 4.3955, "step": 3722 }, { "epoch": 0.3860306268590944, "grad_norm": 1.203125, "learning_rate": 0.00019197642389470832, "loss": 4.4649, "step": 3723 }, { "epoch": 0.38613431491358247, "grad_norm": 1.375, "learning_rate": 0.00019197216021315622, "loss": 4.4707, "step": 3724 }, { "epoch": 0.38623800296807054, "grad_norm": 1.2890625, "learning_rate": 0.00019196789544642981, "loss": 4.4139, "step": 3725 }, { "epoch": 0.3863416910225586, "grad_norm": 1.2109375, "learning_rate": 0.00019196362959457948, "loss": 4.4305, "step": 3726 }, { "epoch": 0.3864453790770467, "grad_norm": 1.1171875, "learning_rate": 0.00019195936265765552, "loss": 4.4144, "step": 3727 }, { "epoch": 0.38654906713153475, "grad_norm": 1.359375, "learning_rate": 0.00019195509463570831, "loss": 4.3813, "step": 3728 }, { "epoch": 0.3866527551860228, "grad_norm": 1.21875, "learning_rate": 0.00019195082552878815, "loss": 4.4248, "step": 3729 }, { "epoch": 0.38675644324051095, "grad_norm": 1.46875, "learning_rate": 0.00019194655533694545, "loss": 4.4362, "step": 3730 }, { "epoch": 0.386860131294999, "grad_norm": 1.4140625, "learning_rate": 0.0001919422840602306, "loss": 4.4018, "step": 3731 }, { "epoch": 0.3869638193494871, "grad_norm": 1.25, "learning_rate": 0.000191938011698694, "loss": 4.4026, "step": 3732 }, { "epoch": 0.38706750740397516, "grad_norm": 1.21875, "learning_rate": 0.00019193373825238602, "loss": 4.46, "step": 3733 }, { "epoch": 0.38717119545846324, "grad_norm": 1.2734375, "learning_rate": 0.00019192946372135713, "loss": 4.3934, "step": 3734 }, { "epoch": 0.3872748835129513, "grad_norm": 1.1796875, "learning_rate": 0.00019192518810565772, "loss": 4.4224, "step": 3735 }, { "epoch": 0.3873785715674394, "grad_norm": 1.3828125, "learning_rate": 0.00019192091140533824, "loss": 4.4006, "step": 3736 }, { "epoch": 0.38748225962192745, "grad_norm": 1.2890625, "learning_rate": 0.0001919166336204492, "loss": 4.4281, "step": 3737 }, { "epoch": 0.3875859476764155, "grad_norm": 1.265625, "learning_rate": 0.00019191235475104104, "loss": 4.4234, "step": 3738 }, { "epoch": 0.3876896357309036, "grad_norm": 1.2421875, "learning_rate": 0.00019190807479716425, "loss": 4.4203, "step": 3739 }, { "epoch": 0.38779332378539166, "grad_norm": 1.1015625, "learning_rate": 0.00019190379375886934, "loss": 4.4245, "step": 3740 }, { "epoch": 0.38789701183987974, "grad_norm": 1.0625, "learning_rate": 0.00019189951163620678, "loss": 4.4599, "step": 3741 }, { "epoch": 0.3880006998943678, "grad_norm": 1.1015625, "learning_rate": 0.00019189522842922714, "loss": 4.3865, "step": 3742 }, { "epoch": 0.3881043879488559, "grad_norm": 0.9609375, "learning_rate": 0.00019189094413798094, "loss": 4.4373, "step": 3743 }, { "epoch": 0.38820807600334395, "grad_norm": 1.2578125, "learning_rate": 0.00019188665876251874, "loss": 4.4681, "step": 3744 }, { "epoch": 0.388311764057832, "grad_norm": 1.0, "learning_rate": 0.0001918823723028911, "loss": 4.4097, "step": 3745 }, { "epoch": 0.3884154521123201, "grad_norm": 1.59375, "learning_rate": 0.00019187808475914855, "loss": 4.4054, "step": 3746 }, { "epoch": 0.38851914016680816, "grad_norm": 1.328125, "learning_rate": 0.00019187379613134174, "loss": 4.394, "step": 3747 }, { "epoch": 0.38862282822129623, "grad_norm": 1.328125, "learning_rate": 0.00019186950641952128, "loss": 4.4425, "step": 3748 }, { "epoch": 0.3887265162757843, "grad_norm": 1.2109375, "learning_rate": 0.00019186521562373774, "loss": 4.4016, "step": 3749 }, { "epoch": 0.3888302043302724, "grad_norm": 1.3125, "learning_rate": 0.00019186092374404176, "loss": 4.3771, "step": 3750 }, { "epoch": 0.38893389238476045, "grad_norm": 1.1171875, "learning_rate": 0.00019185663078048396, "loss": 4.4122, "step": 3751 }, { "epoch": 0.3890375804392485, "grad_norm": 1.375, "learning_rate": 0.00019185233673311506, "loss": 4.4324, "step": 3752 }, { "epoch": 0.3891412684937366, "grad_norm": 1.1640625, "learning_rate": 0.0001918480416019856, "loss": 4.4282, "step": 3753 }, { "epoch": 0.38924495654822466, "grad_norm": 1.453125, "learning_rate": 0.0001918437453871464, "loss": 4.4434, "step": 3754 }, { "epoch": 0.38934864460271273, "grad_norm": 1.3671875, "learning_rate": 0.00019183944808864807, "loss": 4.4197, "step": 3755 }, { "epoch": 0.3894523326572008, "grad_norm": 1.265625, "learning_rate": 0.00019183514970654136, "loss": 4.3956, "step": 3756 }, { "epoch": 0.3895560207116889, "grad_norm": 1.203125, "learning_rate": 0.0001918308502408769, "loss": 4.4278, "step": 3757 }, { "epoch": 0.38965970876617695, "grad_norm": 1.265625, "learning_rate": 0.00019182654969170551, "loss": 4.4023, "step": 3758 }, { "epoch": 0.389763396820665, "grad_norm": 1.0859375, "learning_rate": 0.0001918222480590779, "loss": 4.4272, "step": 3759 }, { "epoch": 0.3898670848751531, "grad_norm": 1.6953125, "learning_rate": 0.00019181794534304484, "loss": 4.4145, "step": 3760 }, { "epoch": 0.38997077292964116, "grad_norm": 1.5546875, "learning_rate": 0.00019181364154365705, "loss": 4.4161, "step": 3761 }, { "epoch": 0.39007446098412923, "grad_norm": 1.40625, "learning_rate": 0.00019180933666096536, "loss": 4.4161, "step": 3762 }, { "epoch": 0.3901781490386173, "grad_norm": 1.328125, "learning_rate": 0.00019180503069502052, "loss": 4.4283, "step": 3763 }, { "epoch": 0.3902818370931054, "grad_norm": 1.421875, "learning_rate": 0.0001918007236458734, "loss": 4.4056, "step": 3764 }, { "epoch": 0.39038552514759345, "grad_norm": 1.25, "learning_rate": 0.00019179641551357474, "loss": 4.4236, "step": 3765 }, { "epoch": 0.3904892132020815, "grad_norm": 1.59375, "learning_rate": 0.00019179210629817543, "loss": 4.3897, "step": 3766 }, { "epoch": 0.3905929012565696, "grad_norm": 1.4609375, "learning_rate": 0.0001917877959997263, "loss": 4.3884, "step": 3767 }, { "epoch": 0.39069658931105766, "grad_norm": 1.4453125, "learning_rate": 0.00019178348461827818, "loss": 4.4123, "step": 3768 }, { "epoch": 0.39080027736554573, "grad_norm": 1.2890625, "learning_rate": 0.00019177917215388196, "loss": 4.4079, "step": 3769 }, { "epoch": 0.3909039654200338, "grad_norm": 1.640625, "learning_rate": 0.00019177485860658852, "loss": 4.4478, "step": 3770 }, { "epoch": 0.3910076534745219, "grad_norm": 1.46875, "learning_rate": 0.00019177054397644878, "loss": 4.4614, "step": 3771 }, { "epoch": 0.39111134152900995, "grad_norm": 1.6171875, "learning_rate": 0.00019176622826351362, "loss": 4.416, "step": 3772 }, { "epoch": 0.391215029583498, "grad_norm": 1.546875, "learning_rate": 0.00019176191146783394, "loss": 4.4209, "step": 3773 }, { "epoch": 0.39131871763798615, "grad_norm": 1.15625, "learning_rate": 0.00019175759358946076, "loss": 4.4549, "step": 3774 }, { "epoch": 0.3914224056924742, "grad_norm": 1.1015625, "learning_rate": 0.00019175327462844495, "loss": 4.411, "step": 3775 }, { "epoch": 0.3915260937469623, "grad_norm": 1.15625, "learning_rate": 0.00019174895458483744, "loss": 4.4674, "step": 3776 }, { "epoch": 0.39162978180145036, "grad_norm": 0.96875, "learning_rate": 0.0001917446334586893, "loss": 4.4098, "step": 3777 }, { "epoch": 0.39173346985593843, "grad_norm": 1.4140625, "learning_rate": 0.00019174031125005145, "loss": 4.4235, "step": 3778 }, { "epoch": 0.3918371579104265, "grad_norm": 1.2421875, "learning_rate": 0.0001917359879589749, "loss": 4.4081, "step": 3779 }, { "epoch": 0.3919408459649146, "grad_norm": 1.5390625, "learning_rate": 0.00019173166358551065, "loss": 4.4185, "step": 3780 }, { "epoch": 0.39204453401940265, "grad_norm": 1.390625, "learning_rate": 0.00019172733812970974, "loss": 4.4354, "step": 3781 }, { "epoch": 0.3921482220738907, "grad_norm": 1.21875, "learning_rate": 0.00019172301159162318, "loss": 4.4443, "step": 3782 }, { "epoch": 0.3922519101283788, "grad_norm": 1.0859375, "learning_rate": 0.00019171868397130205, "loss": 4.4231, "step": 3783 }, { "epoch": 0.39235559818286686, "grad_norm": 1.2734375, "learning_rate": 0.00019171435526879743, "loss": 4.452, "step": 3784 }, { "epoch": 0.39245928623735493, "grad_norm": 1.15625, "learning_rate": 0.00019171002548416035, "loss": 4.3952, "step": 3785 }, { "epoch": 0.392562974291843, "grad_norm": 1.2421875, "learning_rate": 0.0001917056946174419, "loss": 4.37, "step": 3786 }, { "epoch": 0.3926666623463311, "grad_norm": 1.09375, "learning_rate": 0.00019170136266869319, "loss": 4.4024, "step": 3787 }, { "epoch": 0.39277035040081915, "grad_norm": 1.2578125, "learning_rate": 0.00019169702963796534, "loss": 4.3948, "step": 3788 }, { "epoch": 0.3928740384553072, "grad_norm": 1.171875, "learning_rate": 0.00019169269552530947, "loss": 4.3902, "step": 3789 }, { "epoch": 0.3929777265097953, "grad_norm": 1.25, "learning_rate": 0.00019168836033077672, "loss": 4.4408, "step": 3790 }, { "epoch": 0.39308141456428336, "grad_norm": 1.1328125, "learning_rate": 0.0001916840240544182, "loss": 4.4168, "step": 3791 }, { "epoch": 0.39318510261877143, "grad_norm": 1.296875, "learning_rate": 0.00019167968669628517, "loss": 4.3858, "step": 3792 }, { "epoch": 0.3932887906732595, "grad_norm": 1.140625, "learning_rate": 0.0001916753482564287, "loss": 4.4098, "step": 3793 }, { "epoch": 0.3933924787277476, "grad_norm": 1.4453125, "learning_rate": 0.00019167100873490004, "loss": 4.4368, "step": 3794 }, { "epoch": 0.39349616678223565, "grad_norm": 1.34375, "learning_rate": 0.00019166666813175037, "loss": 4.4083, "step": 3795 }, { "epoch": 0.3935998548367237, "grad_norm": 1.234375, "learning_rate": 0.00019166232644703092, "loss": 4.3992, "step": 3796 }, { "epoch": 0.3937035428912118, "grad_norm": 1.1953125, "learning_rate": 0.00019165798368079293, "loss": 4.4001, "step": 3797 }, { "epoch": 0.39380723094569986, "grad_norm": 1.2578125, "learning_rate": 0.0001916536398330876, "loss": 4.3935, "step": 3798 }, { "epoch": 0.39391091900018793, "grad_norm": 1.1328125, "learning_rate": 0.0001916492949039662, "loss": 4.4262, "step": 3799 }, { "epoch": 0.394014607054676, "grad_norm": 1.4609375, "learning_rate": 0.00019164494889348, "loss": 4.4222, "step": 3800 }, { "epoch": 0.3941182951091641, "grad_norm": 1.28125, "learning_rate": 0.00019164060180168027, "loss": 4.4313, "step": 3801 }, { "epoch": 0.39422198316365215, "grad_norm": 1.390625, "learning_rate": 0.0001916362536286183, "loss": 4.3749, "step": 3802 }, { "epoch": 0.3943256712181402, "grad_norm": 1.203125, "learning_rate": 0.00019163190437434543, "loss": 4.4329, "step": 3803 }, { "epoch": 0.3944293592726283, "grad_norm": 1.453125, "learning_rate": 0.00019162755403891293, "loss": 4.4685, "step": 3804 }, { "epoch": 0.39453304732711636, "grad_norm": 1.3125, "learning_rate": 0.00019162320262237215, "loss": 4.4298, "step": 3805 }, { "epoch": 0.39463673538160443, "grad_norm": 1.296875, "learning_rate": 0.00019161885012477443, "loss": 4.4126, "step": 3806 }, { "epoch": 0.3947404234360925, "grad_norm": 1.265625, "learning_rate": 0.00019161449654617112, "loss": 4.4101, "step": 3807 }, { "epoch": 0.3948441114905806, "grad_norm": 1.3046875, "learning_rate": 0.0001916101418866136, "loss": 4.3926, "step": 3808 }, { "epoch": 0.39494779954506865, "grad_norm": 1.2421875, "learning_rate": 0.00019160578614615321, "loss": 4.4267, "step": 3809 }, { "epoch": 0.3950514875995567, "grad_norm": 1.2265625, "learning_rate": 0.00019160142932484142, "loss": 4.4472, "step": 3810 }, { "epoch": 0.3951551756540448, "grad_norm": 1.1484375, "learning_rate": 0.00019159707142272952, "loss": 4.4031, "step": 3811 }, { "epoch": 0.39525886370853286, "grad_norm": 1.3125, "learning_rate": 0.0001915927124398691, "loss": 4.4008, "step": 3812 }, { "epoch": 0.39536255176302093, "grad_norm": 1.203125, "learning_rate": 0.00019158835237631142, "loss": 4.3912, "step": 3813 }, { "epoch": 0.395466239817509, "grad_norm": 1.1796875, "learning_rate": 0.00019158399123210803, "loss": 4.3948, "step": 3814 }, { "epoch": 0.3955699278719971, "grad_norm": 1.1171875, "learning_rate": 0.0001915796290073103, "loss": 4.3967, "step": 3815 }, { "epoch": 0.39567361592648514, "grad_norm": 1.3515625, "learning_rate": 0.00019157526570196982, "loss": 4.4215, "step": 3816 }, { "epoch": 0.3957773039809732, "grad_norm": 1.265625, "learning_rate": 0.00019157090131613797, "loss": 4.4453, "step": 3817 }, { "epoch": 0.3958809920354613, "grad_norm": 1.265625, "learning_rate": 0.00019156653584986627, "loss": 4.4207, "step": 3818 }, { "epoch": 0.3959846800899494, "grad_norm": 1.140625, "learning_rate": 0.00019156216930320624, "loss": 4.372, "step": 3819 }, { "epoch": 0.3960883681444375, "grad_norm": 1.2421875, "learning_rate": 0.0001915578016762094, "loss": 4.4139, "step": 3820 }, { "epoch": 0.39619205619892556, "grad_norm": 1.15625, "learning_rate": 0.0001915534329689273, "loss": 4.4365, "step": 3821 }, { "epoch": 0.39629574425341363, "grad_norm": 1.46875, "learning_rate": 0.00019154906318141147, "loss": 4.3953, "step": 3822 }, { "epoch": 0.3963994323079017, "grad_norm": 1.3671875, "learning_rate": 0.00019154469231371345, "loss": 4.4415, "step": 3823 }, { "epoch": 0.39650312036238977, "grad_norm": 1.2734375, "learning_rate": 0.0001915403203658848, "loss": 4.4293, "step": 3824 }, { "epoch": 0.39660680841687784, "grad_norm": 1.203125, "learning_rate": 0.00019153594733797716, "loss": 4.4074, "step": 3825 }, { "epoch": 0.3967104964713659, "grad_norm": 1.25, "learning_rate": 0.00019153157323004212, "loss": 4.4193, "step": 3826 }, { "epoch": 0.396814184525854, "grad_norm": 1.140625, "learning_rate": 0.00019152719804213124, "loss": 4.4267, "step": 3827 }, { "epoch": 0.39691787258034206, "grad_norm": 1.421875, "learning_rate": 0.0001915228217742962, "loss": 4.4383, "step": 3828 }, { "epoch": 0.39702156063483013, "grad_norm": 1.28125, "learning_rate": 0.00019151844442658858, "loss": 4.3961, "step": 3829 }, { "epoch": 0.3971252486893182, "grad_norm": 1.2578125, "learning_rate": 0.00019151406599906005, "loss": 4.4261, "step": 3830 }, { "epoch": 0.39722893674380627, "grad_norm": 1.1796875, "learning_rate": 0.00019150968649176228, "loss": 4.4638, "step": 3831 }, { "epoch": 0.39733262479829434, "grad_norm": 1.1484375, "learning_rate": 0.00019150530590474694, "loss": 4.4539, "step": 3832 }, { "epoch": 0.3974363128527824, "grad_norm": 1.046875, "learning_rate": 0.00019150092423806574, "loss": 4.4545, "step": 3833 }, { "epoch": 0.3975400009072705, "grad_norm": 1.1484375, "learning_rate": 0.00019149654149177035, "loss": 4.4372, "step": 3834 }, { "epoch": 0.39764368896175856, "grad_norm": 0.97265625, "learning_rate": 0.00019149215766591247, "loss": 4.401, "step": 3835 }, { "epoch": 0.39774737701624663, "grad_norm": 1.3203125, "learning_rate": 0.00019148777276054385, "loss": 4.4252, "step": 3836 }, { "epoch": 0.3978510650707347, "grad_norm": 1.140625, "learning_rate": 0.0001914833867757162, "loss": 4.417, "step": 3837 }, { "epoch": 0.39795475312522277, "grad_norm": 1.390625, "learning_rate": 0.0001914789997114813, "loss": 4.428, "step": 3838 }, { "epoch": 0.39805844117971084, "grad_norm": 1.1953125, "learning_rate": 0.00019147461156789088, "loss": 4.401, "step": 3839 }, { "epoch": 0.3981621292341989, "grad_norm": 1.375, "learning_rate": 0.00019147022234499675, "loss": 4.4456, "step": 3840 }, { "epoch": 0.398265817288687, "grad_norm": 1.1796875, "learning_rate": 0.00019146583204285073, "loss": 4.425, "step": 3841 }, { "epoch": 0.39836950534317506, "grad_norm": 1.359375, "learning_rate": 0.00019146144066150452, "loss": 4.456, "step": 3842 }, { "epoch": 0.39847319339766313, "grad_norm": 1.2421875, "learning_rate": 0.00019145704820101, "loss": 4.3935, "step": 3843 }, { "epoch": 0.3985768814521512, "grad_norm": 1.1953125, "learning_rate": 0.00019145265466141898, "loss": 4.4166, "step": 3844 }, { "epoch": 0.39868056950663927, "grad_norm": 1.1171875, "learning_rate": 0.00019144826004278333, "loss": 4.4599, "step": 3845 }, { "epoch": 0.39878425756112734, "grad_norm": 1.3359375, "learning_rate": 0.00019144386434515487, "loss": 4.4153, "step": 3846 }, { "epoch": 0.3988879456156154, "grad_norm": 1.1328125, "learning_rate": 0.00019143946756858547, "loss": 4.3922, "step": 3847 }, { "epoch": 0.3989916336701035, "grad_norm": 1.46875, "learning_rate": 0.00019143506971312698, "loss": 4.4292, "step": 3848 }, { "epoch": 0.39909532172459156, "grad_norm": 1.3125, "learning_rate": 0.00019143067077883138, "loss": 4.4366, "step": 3849 }, { "epoch": 0.3991990097790796, "grad_norm": 1.2421875, "learning_rate": 0.00019142627076575047, "loss": 4.3678, "step": 3850 }, { "epoch": 0.3993026978335677, "grad_norm": 1.125, "learning_rate": 0.0001914218696739362, "loss": 4.3722, "step": 3851 }, { "epoch": 0.39940638588805577, "grad_norm": 1.296875, "learning_rate": 0.0001914174675034405, "loss": 4.4099, "step": 3852 }, { "epoch": 0.39951007394254384, "grad_norm": 1.109375, "learning_rate": 0.00019141306425431533, "loss": 4.4257, "step": 3853 }, { "epoch": 0.3996137619970319, "grad_norm": 1.71875, "learning_rate": 0.00019140865992661263, "loss": 4.4804, "step": 3854 }, { "epoch": 0.39971745005152, "grad_norm": 1.5625, "learning_rate": 0.00019140425452038437, "loss": 4.4406, "step": 3855 }, { "epoch": 0.39982113810600806, "grad_norm": 1.3125, "learning_rate": 0.0001913998480356825, "loss": 4.4184, "step": 3856 }, { "epoch": 0.3999248261604961, "grad_norm": 1.2890625, "learning_rate": 0.00019139544047255908, "loss": 4.3869, "step": 3857 }, { "epoch": 0.4000285142149842, "grad_norm": 1.1953125, "learning_rate": 0.00019139103183106606, "loss": 4.3996, "step": 3858 }, { "epoch": 0.40013220226947227, "grad_norm": 1.1484375, "learning_rate": 0.00019138662211125546, "loss": 4.3957, "step": 3859 }, { "epoch": 0.40023589032396034, "grad_norm": 1.28125, "learning_rate": 0.0001913822113131793, "loss": 4.4421, "step": 3860 }, { "epoch": 0.4003395783784484, "grad_norm": 1.1484375, "learning_rate": 0.00019137779943688966, "loss": 4.4283, "step": 3861 }, { "epoch": 0.4004432664329365, "grad_norm": 1.3359375, "learning_rate": 0.00019137338648243855, "loss": 4.4069, "step": 3862 }, { "epoch": 0.40054695448742456, "grad_norm": 1.2265625, "learning_rate": 0.00019136897244987814, "loss": 4.4076, "step": 3863 }, { "epoch": 0.4006506425419127, "grad_norm": 1.140625, "learning_rate": 0.00019136455733926035, "loss": 4.4231, "step": 3864 }, { "epoch": 0.40075433059640075, "grad_norm": 1.0, "learning_rate": 0.0001913601411506374, "loss": 4.4225, "step": 3865 }, { "epoch": 0.4008580186508888, "grad_norm": 1.15625, "learning_rate": 0.00019135572388406135, "loss": 4.4317, "step": 3866 }, { "epoch": 0.4009617067053769, "grad_norm": 1.015625, "learning_rate": 0.00019135130553958433, "loss": 4.4126, "step": 3867 }, { "epoch": 0.40106539475986497, "grad_norm": 1.3203125, "learning_rate": 0.0001913468861172585, "loss": 4.4147, "step": 3868 }, { "epoch": 0.40116908281435304, "grad_norm": 1.21875, "learning_rate": 0.0001913424656171359, "loss": 4.407, "step": 3869 }, { "epoch": 0.4012727708688411, "grad_norm": 1.3671875, "learning_rate": 0.00019133804403926883, "loss": 4.4141, "step": 3870 }, { "epoch": 0.4013764589233292, "grad_norm": 1.234375, "learning_rate": 0.00019133362138370935, "loss": 4.3927, "step": 3871 }, { "epoch": 0.40148014697781725, "grad_norm": 1.328125, "learning_rate": 0.00019132919765050969, "loss": 4.3875, "step": 3872 }, { "epoch": 0.4015838350323053, "grad_norm": 1.2109375, "learning_rate": 0.000191324772839722, "loss": 4.4231, "step": 3873 }, { "epoch": 0.4016875230867934, "grad_norm": 1.328125, "learning_rate": 0.00019132034695139858, "loss": 4.3975, "step": 3874 }, { "epoch": 0.40179121114128147, "grad_norm": 1.234375, "learning_rate": 0.00019131591998559157, "loss": 4.3899, "step": 3875 }, { "epoch": 0.40189489919576954, "grad_norm": 1.2109375, "learning_rate": 0.00019131149194235323, "loss": 4.3924, "step": 3876 }, { "epoch": 0.4019985872502576, "grad_norm": 1.1640625, "learning_rate": 0.0001913070628217358, "loss": 4.4521, "step": 3877 }, { "epoch": 0.4021022753047457, "grad_norm": 1.25, "learning_rate": 0.0001913026326237916, "loss": 4.4667, "step": 3878 }, { "epoch": 0.40220596335923375, "grad_norm": 1.1484375, "learning_rate": 0.00019129820134857278, "loss": 4.4182, "step": 3879 }, { "epoch": 0.4023096514137218, "grad_norm": 1.2421875, "learning_rate": 0.0001912937689961317, "loss": 4.4307, "step": 3880 }, { "epoch": 0.4024133394682099, "grad_norm": 1.1875, "learning_rate": 0.00019128933556652067, "loss": 4.4365, "step": 3881 }, { "epoch": 0.40251702752269797, "grad_norm": 1.3046875, "learning_rate": 0.00019128490105979195, "loss": 4.4601, "step": 3882 }, { "epoch": 0.40262071557718604, "grad_norm": 1.203125, "learning_rate": 0.00019128046547599791, "loss": 4.4408, "step": 3883 }, { "epoch": 0.4027244036316741, "grad_norm": 1.3671875, "learning_rate": 0.00019127602881519086, "loss": 4.4029, "step": 3884 }, { "epoch": 0.4028280916861622, "grad_norm": 1.265625, "learning_rate": 0.00019127159107742316, "loss": 4.4356, "step": 3885 }, { "epoch": 0.40293177974065025, "grad_norm": 1.2421875, "learning_rate": 0.00019126715226274715, "loss": 4.4414, "step": 3886 }, { "epoch": 0.4030354677951383, "grad_norm": 1.15625, "learning_rate": 0.00019126271237121523, "loss": 4.4243, "step": 3887 }, { "epoch": 0.4031391558496264, "grad_norm": 1.3125, "learning_rate": 0.00019125827140287975, "loss": 4.3972, "step": 3888 }, { "epoch": 0.40324284390411447, "grad_norm": 1.203125, "learning_rate": 0.00019125382935779314, "loss": 4.3953, "step": 3889 }, { "epoch": 0.40334653195860254, "grad_norm": 1.4140625, "learning_rate": 0.0001912493862360078, "loss": 4.4277, "step": 3890 }, { "epoch": 0.4034502200130906, "grad_norm": 1.2578125, "learning_rate": 0.0001912449420375762, "loss": 4.4313, "step": 3891 }, { "epoch": 0.4035539080675787, "grad_norm": 1.5, "learning_rate": 0.00019124049676255068, "loss": 4.4294, "step": 3892 }, { "epoch": 0.40365759612206675, "grad_norm": 1.3203125, "learning_rate": 0.00019123605041098377, "loss": 4.4509, "step": 3893 }, { "epoch": 0.4037612841765548, "grad_norm": 1.328125, "learning_rate": 0.00019123160298292792, "loss": 4.4061, "step": 3894 }, { "epoch": 0.4038649722310429, "grad_norm": 1.2421875, "learning_rate": 0.00019122715447843555, "loss": 4.4158, "step": 3895 }, { "epoch": 0.40396866028553097, "grad_norm": 1.2890625, "learning_rate": 0.00019122270489755922, "loss": 4.3951, "step": 3896 }, { "epoch": 0.40407234834001904, "grad_norm": 1.1484375, "learning_rate": 0.00019121825424035137, "loss": 4.3816, "step": 3897 }, { "epoch": 0.4041760363945071, "grad_norm": 1.3125, "learning_rate": 0.0001912138025068646, "loss": 4.4206, "step": 3898 }, { "epoch": 0.4042797244489952, "grad_norm": 1.28125, "learning_rate": 0.00019120934969715133, "loss": 4.4204, "step": 3899 }, { "epoch": 0.40438341250348325, "grad_norm": 1.265625, "learning_rate": 0.00019120489581126414, "loss": 4.4165, "step": 3900 }, { "epoch": 0.4044871005579713, "grad_norm": 1.203125, "learning_rate": 0.0001912004408492556, "loss": 4.4463, "step": 3901 }, { "epoch": 0.4045907886124594, "grad_norm": 1.1328125, "learning_rate": 0.00019119598481117826, "loss": 4.4039, "step": 3902 }, { "epoch": 0.40469447666694747, "grad_norm": 1.125, "learning_rate": 0.00019119152769708474, "loss": 4.4281, "step": 3903 }, { "epoch": 0.40479816472143554, "grad_norm": 1.28125, "learning_rate": 0.00019118706950702752, "loss": 4.4457, "step": 3904 }, { "epoch": 0.4049018527759236, "grad_norm": 1.140625, "learning_rate": 0.0001911826102410593, "loss": 4.4312, "step": 3905 }, { "epoch": 0.4050055408304117, "grad_norm": 1.3125, "learning_rate": 0.00019117814989923267, "loss": 4.4275, "step": 3906 }, { "epoch": 0.40510922888489975, "grad_norm": 1.2109375, "learning_rate": 0.00019117368848160023, "loss": 4.4088, "step": 3907 }, { "epoch": 0.4052129169393879, "grad_norm": 1.1796875, "learning_rate": 0.00019116922598821464, "loss": 4.4433, "step": 3908 }, { "epoch": 0.40531660499387595, "grad_norm": 1.0546875, "learning_rate": 0.00019116476241912858, "loss": 4.4212, "step": 3909 }, { "epoch": 0.405420293048364, "grad_norm": 1.1796875, "learning_rate": 0.00019116029777439467, "loss": 4.3693, "step": 3910 }, { "epoch": 0.4055239811028521, "grad_norm": 1.0234375, "learning_rate": 0.0001911558320540656, "loss": 4.416, "step": 3911 }, { "epoch": 0.40562766915734016, "grad_norm": 1.265625, "learning_rate": 0.00019115136525819408, "loss": 4.4272, "step": 3912 }, { "epoch": 0.40573135721182824, "grad_norm": 1.125, "learning_rate": 0.00019114689738683281, "loss": 4.3669, "step": 3913 }, { "epoch": 0.4058350452663163, "grad_norm": 1.4296875, "learning_rate": 0.0001911424284400345, "loss": 4.4228, "step": 3914 }, { "epoch": 0.4059387333208044, "grad_norm": 1.265625, "learning_rate": 0.00019113795841785183, "loss": 4.415, "step": 3915 }, { "epoch": 0.40604242137529245, "grad_norm": 1.3515625, "learning_rate": 0.00019113348732033764, "loss": 4.4196, "step": 3916 }, { "epoch": 0.4061461094297805, "grad_norm": 1.3203125, "learning_rate": 0.0001911290151475446, "loss": 4.3969, "step": 3917 }, { "epoch": 0.4062497974842686, "grad_norm": 1.2890625, "learning_rate": 0.0001911245418995255, "loss": 4.3875, "step": 3918 }, { "epoch": 0.40635348553875666, "grad_norm": 1.1484375, "learning_rate": 0.00019112006757633317, "loss": 4.3719, "step": 3919 }, { "epoch": 0.40645717359324474, "grad_norm": 1.3515625, "learning_rate": 0.00019111559217802033, "loss": 4.3987, "step": 3920 }, { "epoch": 0.4065608616477328, "grad_norm": 1.234375, "learning_rate": 0.00019111111570463982, "loss": 4.434, "step": 3921 }, { "epoch": 0.4066645497022209, "grad_norm": 1.5, "learning_rate": 0.00019110663815624448, "loss": 4.4007, "step": 3922 }, { "epoch": 0.40676823775670895, "grad_norm": 1.375, "learning_rate": 0.00019110215953288708, "loss": 4.3783, "step": 3923 }, { "epoch": 0.406871925811197, "grad_norm": 1.3359375, "learning_rate": 0.0001910976798346205, "loss": 4.4016, "step": 3924 }, { "epoch": 0.4069756138656851, "grad_norm": 1.1875, "learning_rate": 0.0001910931990614976, "loss": 4.4789, "step": 3925 }, { "epoch": 0.40707930192017316, "grad_norm": 1.3828125, "learning_rate": 0.00019108871721357126, "loss": 4.419, "step": 3926 }, { "epoch": 0.40718298997466124, "grad_norm": 1.234375, "learning_rate": 0.00019108423429089433, "loss": 4.389, "step": 3927 }, { "epoch": 0.4072866780291493, "grad_norm": 1.296875, "learning_rate": 0.0001910797502935197, "loss": 4.4432, "step": 3928 }, { "epoch": 0.4073903660836374, "grad_norm": 1.234375, "learning_rate": 0.0001910752652215003, "loss": 4.4265, "step": 3929 }, { "epoch": 0.40749405413812545, "grad_norm": 1.203125, "learning_rate": 0.00019107077907488905, "loss": 4.3687, "step": 3930 }, { "epoch": 0.4075977421926135, "grad_norm": 1.1328125, "learning_rate": 0.00019106629185373887, "loss": 4.4174, "step": 3931 }, { "epoch": 0.4077014302471016, "grad_norm": 1.1640625, "learning_rate": 0.0001910618035581027, "loss": 4.4237, "step": 3932 }, { "epoch": 0.40780511830158966, "grad_norm": 1.0390625, "learning_rate": 0.0001910573141880335, "loss": 4.4584, "step": 3933 }, { "epoch": 0.40790880635607774, "grad_norm": 1.1875, "learning_rate": 0.00019105282374358424, "loss": 4.4041, "step": 3934 }, { "epoch": 0.4080124944105658, "grad_norm": 1.0, "learning_rate": 0.00019104833222480792, "loss": 4.4426, "step": 3935 }, { "epoch": 0.4081161824650539, "grad_norm": 1.5546875, "learning_rate": 0.00019104383963175755, "loss": 4.4512, "step": 3936 }, { "epoch": 0.40821987051954195, "grad_norm": 1.390625, "learning_rate": 0.00019103934596448606, "loss": 4.4471, "step": 3937 }, { "epoch": 0.40832355857403, "grad_norm": 1.2265625, "learning_rate": 0.00019103485122304653, "loss": 4.432, "step": 3938 }, { "epoch": 0.4084272466285181, "grad_norm": 1.15625, "learning_rate": 0.000191030355407492, "loss": 4.3946, "step": 3939 }, { "epoch": 0.40853093468300616, "grad_norm": 1.1953125, "learning_rate": 0.0001910258585178755, "loss": 4.4321, "step": 3940 }, { "epoch": 0.40863462273749424, "grad_norm": 0.99609375, "learning_rate": 0.00019102136055425007, "loss": 4.4022, "step": 3941 }, { "epoch": 0.4087383107919823, "grad_norm": 1.359375, "learning_rate": 0.0001910168615166688, "loss": 4.4023, "step": 3942 }, { "epoch": 0.4088419988464704, "grad_norm": 1.234375, "learning_rate": 0.0001910123614051848, "loss": 4.4107, "step": 3943 }, { "epoch": 0.40894568690095845, "grad_norm": 1.34375, "learning_rate": 0.00019100786021985112, "loss": 4.3875, "step": 3944 }, { "epoch": 0.4090493749554465, "grad_norm": 1.265625, "learning_rate": 0.00019100335796072088, "loss": 4.4059, "step": 3945 }, { "epoch": 0.4091530630099346, "grad_norm": 1.2265625, "learning_rate": 0.00019099885462784723, "loss": 4.4487, "step": 3946 }, { "epoch": 0.40925675106442266, "grad_norm": 1.046875, "learning_rate": 0.00019099435022128328, "loss": 4.3937, "step": 3947 }, { "epoch": 0.40936043911891073, "grad_norm": 1.234375, "learning_rate": 0.0001909898447410822, "loss": 4.4254, "step": 3948 }, { "epoch": 0.4094641271733988, "grad_norm": 1.1171875, "learning_rate": 0.0001909853381872971, "loss": 4.4152, "step": 3949 }, { "epoch": 0.4095678152278869, "grad_norm": 1.3984375, "learning_rate": 0.0001909808305599812, "loss": 4.4128, "step": 3950 }, { "epoch": 0.40967150328237495, "grad_norm": 1.296875, "learning_rate": 0.00019097632185918767, "loss": 4.3842, "step": 3951 }, { "epoch": 0.409775191336863, "grad_norm": 1.15625, "learning_rate": 0.0001909718120849697, "loss": 4.4393, "step": 3952 }, { "epoch": 0.40987887939135115, "grad_norm": 1.1015625, "learning_rate": 0.00019096730123738053, "loss": 4.4265, "step": 3953 }, { "epoch": 0.4099825674458392, "grad_norm": 1.203125, "learning_rate": 0.00019096278931647333, "loss": 4.4435, "step": 3954 }, { "epoch": 0.4100862555003273, "grad_norm": 1.0390625, "learning_rate": 0.0001909582763223014, "loss": 4.3978, "step": 3955 }, { "epoch": 0.41018994355481536, "grad_norm": 1.2265625, "learning_rate": 0.00019095376225491793, "loss": 4.4641, "step": 3956 }, { "epoch": 0.41029363160930343, "grad_norm": 1.1328125, "learning_rate": 0.00019094924711437623, "loss": 4.4192, "step": 3957 }, { "epoch": 0.4103973196637915, "grad_norm": 1.2265625, "learning_rate": 0.00019094473090072955, "loss": 4.4066, "step": 3958 }, { "epoch": 0.4105010077182796, "grad_norm": 1.15625, "learning_rate": 0.00019094021361403115, "loss": 4.4015, "step": 3959 }, { "epoch": 0.41060469577276765, "grad_norm": 1.203125, "learning_rate": 0.00019093569525433437, "loss": 4.4408, "step": 3960 }, { "epoch": 0.4107083838272557, "grad_norm": 1.0546875, "learning_rate": 0.00019093117582169252, "loss": 4.4277, "step": 3961 }, { "epoch": 0.4108120718817438, "grad_norm": 1.25, "learning_rate": 0.00019092665531615893, "loss": 4.3769, "step": 3962 }, { "epoch": 0.41091575993623186, "grad_norm": 1.1953125, "learning_rate": 0.0001909221337377869, "loss": 4.4123, "step": 3963 }, { "epoch": 0.41101944799071993, "grad_norm": 1.2109375, "learning_rate": 0.00019091761108662979, "loss": 4.4338, "step": 3964 }, { "epoch": 0.411123136045208, "grad_norm": 1.1484375, "learning_rate": 0.00019091308736274098, "loss": 4.4592, "step": 3965 }, { "epoch": 0.4112268240996961, "grad_norm": 1.09375, "learning_rate": 0.00019090856256617384, "loss": 4.449, "step": 3966 }, { "epoch": 0.41133051215418415, "grad_norm": 1.0234375, "learning_rate": 0.00019090403669698178, "loss": 4.4338, "step": 3967 }, { "epoch": 0.4114342002086722, "grad_norm": 1.3125, "learning_rate": 0.00019089950975521814, "loss": 4.4226, "step": 3968 }, { "epoch": 0.4115378882631603, "grad_norm": 1.125, "learning_rate": 0.0001908949817409364, "loss": 4.3943, "step": 3969 }, { "epoch": 0.41164157631764836, "grad_norm": 1.4375, "learning_rate": 0.0001908904526541899, "loss": 4.3946, "step": 3970 }, { "epoch": 0.41174526437213643, "grad_norm": 1.28125, "learning_rate": 0.0001908859224950322, "loss": 4.4524, "step": 3971 }, { "epoch": 0.4118489524266245, "grad_norm": 1.40625, "learning_rate": 0.00019088139126351665, "loss": 4.4208, "step": 3972 }, { "epoch": 0.4119526404811126, "grad_norm": 1.25, "learning_rate": 0.00019087685895969675, "loss": 4.3971, "step": 3973 }, { "epoch": 0.41205632853560065, "grad_norm": 1.5546875, "learning_rate": 0.000190872325583626, "loss": 4.389, "step": 3974 }, { "epoch": 0.4121600165900887, "grad_norm": 1.2421875, "learning_rate": 0.00019086779113535785, "loss": 4.3749, "step": 3975 }, { "epoch": 0.4122637046445768, "grad_norm": 1.8203125, "learning_rate": 0.00019086325561494581, "loss": 4.4153, "step": 3976 }, { "epoch": 0.41236739269906486, "grad_norm": 1.640625, "learning_rate": 0.00019085871902244344, "loss": 4.4229, "step": 3977 }, { "epoch": 0.41247108075355293, "grad_norm": 1.1328125, "learning_rate": 0.00019085418135790416, "loss": 4.4027, "step": 3978 }, { "epoch": 0.412574768808041, "grad_norm": 1.171875, "learning_rate": 0.00019084964262138163, "loss": 4.387, "step": 3979 }, { "epoch": 0.4126784568625291, "grad_norm": 1.03125, "learning_rate": 0.00019084510281292932, "loss": 4.4023, "step": 3980 }, { "epoch": 0.41278214491701715, "grad_norm": 1.1171875, "learning_rate": 0.00019084056193260086, "loss": 4.4215, "step": 3981 }, { "epoch": 0.4128858329715052, "grad_norm": 1.0625, "learning_rate": 0.00019083601998044975, "loss": 4.4307, "step": 3982 }, { "epoch": 0.4129895210259933, "grad_norm": 0.96484375, "learning_rate": 0.00019083147695652964, "loss": 4.4602, "step": 3983 }, { "epoch": 0.41309320908048136, "grad_norm": 1.0390625, "learning_rate": 0.0001908269328608941, "loss": 4.4425, "step": 3984 }, { "epoch": 0.41319689713496943, "grad_norm": 0.8671875, "learning_rate": 0.00019082238769359676, "loss": 4.3991, "step": 3985 }, { "epoch": 0.4133005851894575, "grad_norm": 0.94140625, "learning_rate": 0.00019081784145469127, "loss": 4.3821, "step": 3986 }, { "epoch": 0.4134042732439456, "grad_norm": 0.8515625, "learning_rate": 0.00019081329414423123, "loss": 4.4002, "step": 3987 }, { "epoch": 0.41350796129843365, "grad_norm": 0.8203125, "learning_rate": 0.00019080874576227034, "loss": 4.3869, "step": 3988 }, { "epoch": 0.4136116493529217, "grad_norm": 0.82421875, "learning_rate": 0.00019080419630886222, "loss": 4.4355, "step": 3989 }, { "epoch": 0.4137153374074098, "grad_norm": 0.75390625, "learning_rate": 0.00019079964578406057, "loss": 4.4083, "step": 3990 }, { "epoch": 0.41381902546189786, "grad_norm": 0.76953125, "learning_rate": 0.00019079509418791908, "loss": 4.4459, "step": 3991 }, { "epoch": 0.41392271351638593, "grad_norm": 0.703125, "learning_rate": 0.00019079054152049146, "loss": 4.4067, "step": 3992 }, { "epoch": 0.414026401570874, "grad_norm": 0.671875, "learning_rate": 0.00019078598778183138, "loss": 4.4076, "step": 3993 }, { "epoch": 0.4141300896253621, "grad_norm": 0.69140625, "learning_rate": 0.00019078143297199268, "loss": 4.4011, "step": 3994 }, { "epoch": 0.41423377767985015, "grad_norm": 0.65234375, "learning_rate": 0.000190776877091029, "loss": 4.3641, "step": 3995 }, { "epoch": 0.4143374657343382, "grad_norm": 0.67578125, "learning_rate": 0.00019077232013899409, "loss": 4.3824, "step": 3996 }, { "epoch": 0.4144411537888263, "grad_norm": 0.60546875, "learning_rate": 0.00019076776211594178, "loss": 4.3664, "step": 3997 }, { "epoch": 0.4145448418433144, "grad_norm": 0.6484375, "learning_rate": 0.00019076320302192584, "loss": 4.442, "step": 3998 }, { "epoch": 0.4146485298978025, "grad_norm": 0.58203125, "learning_rate": 0.00019075864285700002, "loss": 4.4118, "step": 3999 }, { "epoch": 0.41475221795229056, "grad_norm": 0.59765625, "learning_rate": 0.00019075408162121815, "loss": 4.4445, "step": 4000 }, { "epoch": 0.41485590600677863, "grad_norm": 0.59375, "learning_rate": 0.00019074951931463406, "loss": 4.375, "step": 4001 }, { "epoch": 0.4149595940612667, "grad_norm": 0.66015625, "learning_rate": 0.00019074495593730158, "loss": 4.4621, "step": 4002 }, { "epoch": 0.4150632821157548, "grad_norm": 0.5859375, "learning_rate": 0.00019074039148927452, "loss": 4.354, "step": 4003 }, { "epoch": 0.41516697017024284, "grad_norm": 0.66015625, "learning_rate": 0.00019073582597060678, "loss": 4.3905, "step": 4004 }, { "epoch": 0.4152706582247309, "grad_norm": 0.59765625, "learning_rate": 0.00019073125938135217, "loss": 4.4419, "step": 4005 }, { "epoch": 0.415374346279219, "grad_norm": 0.5859375, "learning_rate": 0.0001907266917215646, "loss": 4.4289, "step": 4006 }, { "epoch": 0.41547803433370706, "grad_norm": 0.609375, "learning_rate": 0.00019072212299129802, "loss": 4.4418, "step": 4007 }, { "epoch": 0.41558172238819513, "grad_norm": 0.578125, "learning_rate": 0.00019071755319060628, "loss": 4.3701, "step": 4008 }, { "epoch": 0.4156854104426832, "grad_norm": 0.578125, "learning_rate": 0.00019071298231954325, "loss": 4.3819, "step": 4009 }, { "epoch": 0.41578909849717127, "grad_norm": 0.60546875, "learning_rate": 0.00019070841037816297, "loss": 4.4376, "step": 4010 }, { "epoch": 0.41589278655165934, "grad_norm": 0.5625, "learning_rate": 0.0001907038373665193, "loss": 4.4225, "step": 4011 }, { "epoch": 0.4159964746061474, "grad_norm": 0.6171875, "learning_rate": 0.00019069926328466624, "loss": 4.394, "step": 4012 }, { "epoch": 0.4161001626606355, "grad_norm": 0.52734375, "learning_rate": 0.00019069468813265775, "loss": 4.4233, "step": 4013 }, { "epoch": 0.41620385071512356, "grad_norm": 0.59765625, "learning_rate": 0.00019069011191054784, "loss": 4.3972, "step": 4014 }, { "epoch": 0.41630753876961163, "grad_norm": 0.53125, "learning_rate": 0.00019068553461839042, "loss": 4.4305, "step": 4015 }, { "epoch": 0.4164112268240997, "grad_norm": 0.60546875, "learning_rate": 0.0001906809562562396, "loss": 4.41, "step": 4016 }, { "epoch": 0.41651491487858777, "grad_norm": 0.6171875, "learning_rate": 0.0001906763768241493, "loss": 4.4322, "step": 4017 }, { "epoch": 0.41661860293307584, "grad_norm": 0.55859375, "learning_rate": 0.00019067179632217363, "loss": 4.4129, "step": 4018 }, { "epoch": 0.4167222909875639, "grad_norm": 0.53125, "learning_rate": 0.00019066721475036657, "loss": 4.4214, "step": 4019 }, { "epoch": 0.416825979042052, "grad_norm": 0.54296875, "learning_rate": 0.00019066263210878225, "loss": 4.4172, "step": 4020 }, { "epoch": 0.41692966709654006, "grad_norm": 0.515625, "learning_rate": 0.0001906580483974747, "loss": 4.3925, "step": 4021 }, { "epoch": 0.41703335515102813, "grad_norm": 0.5703125, "learning_rate": 0.000190653463616498, "loss": 4.4388, "step": 4022 }, { "epoch": 0.4171370432055162, "grad_norm": 0.51953125, "learning_rate": 0.0001906488777659063, "loss": 4.3655, "step": 4023 }, { "epoch": 0.41724073126000427, "grad_norm": 0.5546875, "learning_rate": 0.0001906442908457536, "loss": 4.3968, "step": 4024 }, { "epoch": 0.41734441931449234, "grad_norm": 0.5, "learning_rate": 0.00019063970285609412, "loss": 4.3332, "step": 4025 }, { "epoch": 0.4174481073689804, "grad_norm": 0.6015625, "learning_rate": 0.00019063511379698196, "loss": 4.4032, "step": 4026 }, { "epoch": 0.4175517954234685, "grad_norm": 0.5390625, "learning_rate": 0.00019063052366847125, "loss": 4.4094, "step": 4027 }, { "epoch": 0.41765548347795656, "grad_norm": 0.5546875, "learning_rate": 0.00019062593247061614, "loss": 4.4239, "step": 4028 }, { "epoch": 0.41775917153244463, "grad_norm": 0.515625, "learning_rate": 0.00019062134020347084, "loss": 4.4453, "step": 4029 }, { "epoch": 0.4178628595869327, "grad_norm": 0.5390625, "learning_rate": 0.00019061674686708953, "loss": 4.3898, "step": 4030 }, { "epoch": 0.41796654764142077, "grad_norm": 0.5, "learning_rate": 0.00019061215246152637, "loss": 4.3921, "step": 4031 }, { "epoch": 0.41807023569590884, "grad_norm": 0.5078125, "learning_rate": 0.00019060755698683561, "loss": 4.4481, "step": 4032 }, { "epoch": 0.4181739237503969, "grad_norm": 0.51171875, "learning_rate": 0.00019060296044307146, "loss": 4.4255, "step": 4033 }, { "epoch": 0.418277611804885, "grad_norm": 0.4765625, "learning_rate": 0.00019059836283028813, "loss": 4.4163, "step": 4034 }, { "epoch": 0.41838129985937306, "grad_norm": 0.5234375, "learning_rate": 0.00019059376414853988, "loss": 4.4293, "step": 4035 }, { "epoch": 0.41848498791386113, "grad_norm": 0.51953125, "learning_rate": 0.00019058916439788098, "loss": 4.431, "step": 4036 }, { "epoch": 0.4185886759683492, "grad_norm": 0.50390625, "learning_rate": 0.00019058456357836571, "loss": 4.3368, "step": 4037 }, { "epoch": 0.41869236402283727, "grad_norm": 0.5234375, "learning_rate": 0.00019057996169004833, "loss": 4.4211, "step": 4038 }, { "epoch": 0.41879605207732534, "grad_norm": 0.57421875, "learning_rate": 0.0001905753587329832, "loss": 4.4138, "step": 4039 }, { "epoch": 0.4188997401318134, "grad_norm": 0.53515625, "learning_rate": 0.0001905707547072245, "loss": 4.373, "step": 4040 }, { "epoch": 0.4190034281863015, "grad_norm": 0.54296875, "learning_rate": 0.00019056614961282666, "loss": 4.3839, "step": 4041 }, { "epoch": 0.4191071162407896, "grad_norm": 0.486328125, "learning_rate": 0.000190561543449844, "loss": 4.4151, "step": 4042 }, { "epoch": 0.4192108042952777, "grad_norm": 0.546875, "learning_rate": 0.00019055693621833087, "loss": 4.3428, "step": 4043 }, { "epoch": 0.41931449234976576, "grad_norm": 0.53125, "learning_rate": 0.00019055232791834157, "loss": 4.3879, "step": 4044 }, { "epoch": 0.4194181804042538, "grad_norm": 0.57421875, "learning_rate": 0.00019054771854993055, "loss": 4.3823, "step": 4045 }, { "epoch": 0.4195218684587419, "grad_norm": 0.5859375, "learning_rate": 0.00019054310811315213, "loss": 4.411, "step": 4046 }, { "epoch": 0.41962555651322997, "grad_norm": 0.51171875, "learning_rate": 0.00019053849660806077, "loss": 4.4158, "step": 4047 }, { "epoch": 0.41972924456771804, "grad_norm": 0.52734375, "learning_rate": 0.00019053388403471086, "loss": 4.4367, "step": 4048 }, { "epoch": 0.4198329326222061, "grad_norm": 0.5546875, "learning_rate": 0.0001905292703931568, "loss": 4.4272, "step": 4049 }, { "epoch": 0.4199366206766942, "grad_norm": 0.4765625, "learning_rate": 0.00019052465568345306, "loss": 4.3869, "step": 4050 }, { "epoch": 0.42004030873118225, "grad_norm": 0.54296875, "learning_rate": 0.00019052003990565407, "loss": 4.3906, "step": 4051 }, { "epoch": 0.4201439967856703, "grad_norm": 0.50390625, "learning_rate": 0.0001905154230598143, "loss": 4.4166, "step": 4052 }, { "epoch": 0.4202476848401584, "grad_norm": 0.5390625, "learning_rate": 0.0001905108051459882, "loss": 4.423, "step": 4053 }, { "epoch": 0.42035137289464647, "grad_norm": 0.51171875, "learning_rate": 0.00019050618616423025, "loss": 4.4247, "step": 4054 }, { "epoch": 0.42045506094913454, "grad_norm": 0.55859375, "learning_rate": 0.00019050156611459502, "loss": 4.4092, "step": 4055 }, { "epoch": 0.4205587490036226, "grad_norm": 0.53125, "learning_rate": 0.00019049694499713695, "loss": 4.4116, "step": 4056 }, { "epoch": 0.4206624370581107, "grad_norm": 0.55078125, "learning_rate": 0.0001904923228119106, "loss": 4.4161, "step": 4057 }, { "epoch": 0.42076612511259875, "grad_norm": 0.49609375, "learning_rate": 0.00019048769955897047, "loss": 4.4182, "step": 4058 }, { "epoch": 0.4208698131670868, "grad_norm": 0.57421875, "learning_rate": 0.00019048307523837114, "loss": 4.3953, "step": 4059 }, { "epoch": 0.4209735012215749, "grad_norm": 0.5546875, "learning_rate": 0.00019047844985016718, "loss": 4.455, "step": 4060 }, { "epoch": 0.42107718927606297, "grad_norm": 0.57421875, "learning_rate": 0.00019047382339441315, "loss": 4.4332, "step": 4061 }, { "epoch": 0.42118087733055104, "grad_norm": 0.5234375, "learning_rate": 0.00019046919587116366, "loss": 4.456, "step": 4062 }, { "epoch": 0.4212845653850391, "grad_norm": 0.54296875, "learning_rate": 0.00019046456728047327, "loss": 4.421, "step": 4063 }, { "epoch": 0.4213882534395272, "grad_norm": 0.5390625, "learning_rate": 0.0001904599376223966, "loss": 4.393, "step": 4064 }, { "epoch": 0.42149194149401525, "grad_norm": 0.58203125, "learning_rate": 0.00019045530689698828, "loss": 4.391, "step": 4065 }, { "epoch": 0.4215956295485033, "grad_norm": 0.55859375, "learning_rate": 0.00019045067510430297, "loss": 4.3836, "step": 4066 }, { "epoch": 0.4216993176029914, "grad_norm": 0.56640625, "learning_rate": 0.0001904460422443953, "loss": 4.4405, "step": 4067 }, { "epoch": 0.42180300565747947, "grad_norm": 0.546875, "learning_rate": 0.00019044140831731997, "loss": 4.418, "step": 4068 }, { "epoch": 0.42190669371196754, "grad_norm": 0.5625, "learning_rate": 0.0001904367733231316, "loss": 4.4034, "step": 4069 }, { "epoch": 0.4220103817664556, "grad_norm": 0.5390625, "learning_rate": 0.00019043213726188486, "loss": 4.3926, "step": 4070 }, { "epoch": 0.4221140698209437, "grad_norm": 0.55859375, "learning_rate": 0.00019042750013363452, "loss": 4.4189, "step": 4071 }, { "epoch": 0.42221775787543175, "grad_norm": 0.51171875, "learning_rate": 0.00019042286193843527, "loss": 4.4032, "step": 4072 }, { "epoch": 0.4223214459299198, "grad_norm": 0.56640625, "learning_rate": 0.00019041822267634184, "loss": 4.4009, "step": 4073 }, { "epoch": 0.4224251339844079, "grad_norm": 0.55078125, "learning_rate": 0.00019041358234740896, "loss": 4.4393, "step": 4074 }, { "epoch": 0.42252882203889597, "grad_norm": 0.6015625, "learning_rate": 0.00019040894095169138, "loss": 4.3988, "step": 4075 }, { "epoch": 0.42263251009338404, "grad_norm": 0.60546875, "learning_rate": 0.00019040429848924385, "loss": 4.4107, "step": 4076 }, { "epoch": 0.4227361981478721, "grad_norm": 0.60546875, "learning_rate": 0.00019039965496012116, "loss": 4.4161, "step": 4077 }, { "epoch": 0.4228398862023602, "grad_norm": 0.58203125, "learning_rate": 0.00019039501036437808, "loss": 4.3999, "step": 4078 }, { "epoch": 0.42294357425684825, "grad_norm": 0.6015625, "learning_rate": 0.00019039036470206946, "loss": 4.4145, "step": 4079 }, { "epoch": 0.4230472623113363, "grad_norm": 0.58984375, "learning_rate": 0.00019038571797325005, "loss": 4.4175, "step": 4080 }, { "epoch": 0.4231509503658244, "grad_norm": 0.6328125, "learning_rate": 0.00019038107017797472, "loss": 4.3827, "step": 4081 }, { "epoch": 0.42325463842031247, "grad_norm": 0.64453125, "learning_rate": 0.0001903764213162983, "loss": 4.3996, "step": 4082 }, { "epoch": 0.42335832647480054, "grad_norm": 0.640625, "learning_rate": 0.00019037177138827568, "loss": 4.4007, "step": 4083 }, { "epoch": 0.4234620145292886, "grad_norm": 0.66015625, "learning_rate": 0.00019036712039396166, "loss": 4.4087, "step": 4084 }, { "epoch": 0.4235657025837767, "grad_norm": 0.63671875, "learning_rate": 0.00019036246833341113, "loss": 4.4105, "step": 4085 }, { "epoch": 0.42366939063826475, "grad_norm": 0.6328125, "learning_rate": 0.00019035781520667898, "loss": 4.4509, "step": 4086 }, { "epoch": 0.4237730786927529, "grad_norm": 0.625, "learning_rate": 0.00019035316101382015, "loss": 4.3775, "step": 4087 }, { "epoch": 0.42387676674724095, "grad_norm": 0.69921875, "learning_rate": 0.0001903485057548895, "loss": 4.4293, "step": 4088 }, { "epoch": 0.423980454801729, "grad_norm": 0.5625, "learning_rate": 0.000190343849429942, "loss": 4.4232, "step": 4089 }, { "epoch": 0.4240841428562171, "grad_norm": 0.6328125, "learning_rate": 0.00019033919203903258, "loss": 4.3965, "step": 4090 }, { "epoch": 0.42418783091070517, "grad_norm": 0.609375, "learning_rate": 0.0001903345335822162, "loss": 4.4255, "step": 4091 }, { "epoch": 0.42429151896519324, "grad_norm": 0.640625, "learning_rate": 0.0001903298740595478, "loss": 4.4579, "step": 4092 }, { "epoch": 0.4243952070196813, "grad_norm": 0.58984375, "learning_rate": 0.00019032521347108237, "loss": 4.4192, "step": 4093 }, { "epoch": 0.4244988950741694, "grad_norm": 0.6875, "learning_rate": 0.00019032055181687486, "loss": 4.4378, "step": 4094 }, { "epoch": 0.42460258312865745, "grad_norm": 0.58984375, "learning_rate": 0.00019031588909698037, "loss": 4.4109, "step": 4095 }, { "epoch": 0.4247062711831455, "grad_norm": 0.60546875, "learning_rate": 0.00019031122531145382, "loss": 4.3871, "step": 4096 }, { "epoch": 0.4248099592376336, "grad_norm": 0.609375, "learning_rate": 0.00019030656046035028, "loss": 4.3683, "step": 4097 }, { "epoch": 0.42491364729212167, "grad_norm": 0.5625, "learning_rate": 0.0001903018945437248, "loss": 4.3856, "step": 4098 }, { "epoch": 0.42501733534660974, "grad_norm": 0.5859375, "learning_rate": 0.00019029722756163243, "loss": 4.3411, "step": 4099 }, { "epoch": 0.4251210234010978, "grad_norm": 0.5703125, "learning_rate": 0.0001902925595141282, "loss": 4.3784, "step": 4100 }, { "epoch": 0.4252247114555859, "grad_norm": 0.62109375, "learning_rate": 0.00019028789040126718, "loss": 4.4135, "step": 4101 }, { "epoch": 0.42532839951007395, "grad_norm": 0.57421875, "learning_rate": 0.00019028322022310453, "loss": 4.443, "step": 4102 }, { "epoch": 0.425432087564562, "grad_norm": 0.6171875, "learning_rate": 0.0001902785489796953, "loss": 4.3813, "step": 4103 }, { "epoch": 0.4255357756190501, "grad_norm": 0.5859375, "learning_rate": 0.00019027387667109463, "loss": 4.4411, "step": 4104 }, { "epoch": 0.42563946367353817, "grad_norm": 0.59375, "learning_rate": 0.00019026920329735764, "loss": 4.4123, "step": 4105 }, { "epoch": 0.42574315172802624, "grad_norm": 0.6015625, "learning_rate": 0.00019026452885853947, "loss": 4.4107, "step": 4106 }, { "epoch": 0.4258468397825143, "grad_norm": 0.62109375, "learning_rate": 0.00019025985335469528, "loss": 4.4329, "step": 4107 }, { "epoch": 0.4259505278370024, "grad_norm": 0.55859375, "learning_rate": 0.00019025517678588022, "loss": 4.4004, "step": 4108 }, { "epoch": 0.42605421589149045, "grad_norm": 0.57421875, "learning_rate": 0.00019025049915214948, "loss": 4.356, "step": 4109 }, { "epoch": 0.4261579039459785, "grad_norm": 0.53515625, "learning_rate": 0.00019024582045355825, "loss": 4.3517, "step": 4110 }, { "epoch": 0.4262615920004666, "grad_norm": 0.6171875, "learning_rate": 0.00019024114069016173, "loss": 4.4328, "step": 4111 }, { "epoch": 0.42636528005495467, "grad_norm": 0.59765625, "learning_rate": 0.00019023645986201513, "loss": 4.3555, "step": 4112 }, { "epoch": 0.42646896810944274, "grad_norm": 0.62109375, "learning_rate": 0.0001902317779691737, "loss": 4.4117, "step": 4113 }, { "epoch": 0.4265726561639308, "grad_norm": 0.5390625, "learning_rate": 0.00019022709501169267, "loss": 4.4447, "step": 4114 }, { "epoch": 0.4266763442184189, "grad_norm": 0.6015625, "learning_rate": 0.00019022241098962727, "loss": 4.4215, "step": 4115 }, { "epoch": 0.42678003227290695, "grad_norm": 0.53125, "learning_rate": 0.0001902177259030328, "loss": 4.4491, "step": 4116 }, { "epoch": 0.426883720327395, "grad_norm": 0.66015625, "learning_rate": 0.00019021303975196453, "loss": 4.4125, "step": 4117 }, { "epoch": 0.4269874083818831, "grad_norm": 0.52734375, "learning_rate": 0.00019020835253647777, "loss": 4.433, "step": 4118 }, { "epoch": 0.42709109643637116, "grad_norm": 0.67578125, "learning_rate": 0.0001902036642566278, "loss": 4.3897, "step": 4119 }, { "epoch": 0.42719478449085924, "grad_norm": 0.5703125, "learning_rate": 0.0001901989749124699, "loss": 4.4053, "step": 4120 }, { "epoch": 0.4272984725453473, "grad_norm": 0.60546875, "learning_rate": 0.0001901942845040595, "loss": 4.4107, "step": 4121 }, { "epoch": 0.4274021605998354, "grad_norm": 0.5546875, "learning_rate": 0.00019018959303145186, "loss": 4.4075, "step": 4122 }, { "epoch": 0.42750584865432345, "grad_norm": 0.609375, "learning_rate": 0.00019018490049470236, "loss": 4.3824, "step": 4123 }, { "epoch": 0.4276095367088115, "grad_norm": 0.62109375, "learning_rate": 0.00019018020689386632, "loss": 4.4052, "step": 4124 }, { "epoch": 0.4277132247632996, "grad_norm": 0.578125, "learning_rate": 0.0001901755122289992, "loss": 4.3915, "step": 4125 }, { "epoch": 0.42781691281778766, "grad_norm": 0.6640625, "learning_rate": 0.0001901708165001564, "loss": 4.4056, "step": 4126 }, { "epoch": 0.42792060087227574, "grad_norm": 0.55859375, "learning_rate": 0.0001901661197073932, "loss": 4.424, "step": 4127 }, { "epoch": 0.4280242889267638, "grad_norm": 0.625, "learning_rate": 0.00019016142185076512, "loss": 4.3931, "step": 4128 }, { "epoch": 0.4281279769812519, "grad_norm": 0.53515625, "learning_rate": 0.0001901567229303276, "loss": 4.4321, "step": 4129 }, { "epoch": 0.42823166503573995, "grad_norm": 0.6328125, "learning_rate": 0.00019015202294613603, "loss": 4.4057, "step": 4130 }, { "epoch": 0.428335353090228, "grad_norm": 0.56640625, "learning_rate": 0.00019014732189824587, "loss": 4.3677, "step": 4131 }, { "epoch": 0.42843904114471615, "grad_norm": 0.6171875, "learning_rate": 0.00019014261978671262, "loss": 4.4088, "step": 4132 }, { "epoch": 0.4285427291992042, "grad_norm": 0.5625, "learning_rate": 0.00019013791661159172, "loss": 4.4014, "step": 4133 }, { "epoch": 0.4286464172536923, "grad_norm": 0.5859375, "learning_rate": 0.0001901332123729387, "loss": 4.4197, "step": 4134 }, { "epoch": 0.42875010530818036, "grad_norm": 0.5546875, "learning_rate": 0.00019012850707080905, "loss": 4.4187, "step": 4135 }, { "epoch": 0.42885379336266843, "grad_norm": 0.54296875, "learning_rate": 0.00019012380070525828, "loss": 4.4087, "step": 4136 }, { "epoch": 0.4289574814171565, "grad_norm": 0.546875, "learning_rate": 0.00019011909327634193, "loss": 4.4242, "step": 4137 }, { "epoch": 0.4290611694716446, "grad_norm": 0.55078125, "learning_rate": 0.00019011438478411553, "loss": 4.4295, "step": 4138 }, { "epoch": 0.42916485752613265, "grad_norm": 0.6171875, "learning_rate": 0.00019010967522863466, "loss": 4.3558, "step": 4139 }, { "epoch": 0.4292685455806207, "grad_norm": 0.52734375, "learning_rate": 0.00019010496460995487, "loss": 4.3369, "step": 4140 }, { "epoch": 0.4293722336351088, "grad_norm": 0.5703125, "learning_rate": 0.00019010025292813174, "loss": 4.3923, "step": 4141 }, { "epoch": 0.42947592168959686, "grad_norm": 0.515625, "learning_rate": 0.00019009554018322087, "loss": 4.4044, "step": 4142 }, { "epoch": 0.42957960974408493, "grad_norm": 0.58203125, "learning_rate": 0.00019009082637527785, "loss": 4.3502, "step": 4143 }, { "epoch": 0.429683297798573, "grad_norm": 0.61328125, "learning_rate": 0.0001900861115043583, "loss": 4.3683, "step": 4144 }, { "epoch": 0.4297869858530611, "grad_norm": 0.55859375, "learning_rate": 0.0001900813955705179, "loss": 4.3883, "step": 4145 }, { "epoch": 0.42989067390754915, "grad_norm": 0.62890625, "learning_rate": 0.00019007667857381223, "loss": 4.4058, "step": 4146 }, { "epoch": 0.4299943619620372, "grad_norm": 0.57421875, "learning_rate": 0.00019007196051429694, "loss": 4.4143, "step": 4147 }, { "epoch": 0.4300980500165253, "grad_norm": 0.6171875, "learning_rate": 0.00019006724139202774, "loss": 4.4171, "step": 4148 }, { "epoch": 0.43020173807101336, "grad_norm": 0.57421875, "learning_rate": 0.00019006252120706032, "loss": 4.4089, "step": 4149 }, { "epoch": 0.43030542612550143, "grad_norm": 0.56640625, "learning_rate": 0.00019005779995945034, "loss": 4.4398, "step": 4150 }, { "epoch": 0.4304091141799895, "grad_norm": 0.5703125, "learning_rate": 0.00019005307764925351, "loss": 4.382, "step": 4151 }, { "epoch": 0.4305128022344776, "grad_norm": 0.60546875, "learning_rate": 0.00019004835427652555, "loss": 4.3662, "step": 4152 }, { "epoch": 0.43061649028896565, "grad_norm": 0.5703125, "learning_rate": 0.0001900436298413222, "loss": 4.3927, "step": 4153 }, { "epoch": 0.4307201783434537, "grad_norm": 0.64453125, "learning_rate": 0.0001900389043436992, "loss": 4.4008, "step": 4154 }, { "epoch": 0.4308238663979418, "grad_norm": 0.55078125, "learning_rate": 0.00019003417778371232, "loss": 4.4127, "step": 4155 }, { "epoch": 0.43092755445242986, "grad_norm": 0.60546875, "learning_rate": 0.00019002945016141726, "loss": 4.4235, "step": 4156 }, { "epoch": 0.43103124250691793, "grad_norm": 0.62109375, "learning_rate": 0.00019002472147686993, "loss": 4.3804, "step": 4157 }, { "epoch": 0.431134930561406, "grad_norm": 0.66015625, "learning_rate": 0.00019001999173012596, "loss": 4.371, "step": 4158 }, { "epoch": 0.4312386186158941, "grad_norm": 0.60546875, "learning_rate": 0.00019001526092124132, "loss": 4.3823, "step": 4159 }, { "epoch": 0.43134230667038215, "grad_norm": 0.66015625, "learning_rate": 0.0001900105290502717, "loss": 4.4178, "step": 4160 }, { "epoch": 0.4314459947248702, "grad_norm": 0.578125, "learning_rate": 0.00019000579611727302, "loss": 4.3976, "step": 4161 }, { "epoch": 0.4315496827793583, "grad_norm": 0.59375, "learning_rate": 0.00019000106212230106, "loss": 4.3859, "step": 4162 }, { "epoch": 0.43165337083384636, "grad_norm": 0.60546875, "learning_rate": 0.0001899963270654117, "loss": 4.4141, "step": 4163 }, { "epoch": 0.43175705888833443, "grad_norm": 0.6640625, "learning_rate": 0.00018999159094666083, "loss": 4.4009, "step": 4164 }, { "epoch": 0.4318607469428225, "grad_norm": 0.6015625, "learning_rate": 0.0001899868537661043, "loss": 4.3679, "step": 4165 }, { "epoch": 0.4319644349973106, "grad_norm": 0.64453125, "learning_rate": 0.00018998211552379802, "loss": 4.4297, "step": 4166 }, { "epoch": 0.43206812305179865, "grad_norm": 0.65625, "learning_rate": 0.0001899773762197979, "loss": 4.4041, "step": 4167 }, { "epoch": 0.4321718111062867, "grad_norm": 0.6484375, "learning_rate": 0.00018997263585415986, "loss": 4.3974, "step": 4168 }, { "epoch": 0.4322754991607748, "grad_norm": 0.6875, "learning_rate": 0.00018996789442693977, "loss": 4.4237, "step": 4169 }, { "epoch": 0.43237918721526286, "grad_norm": 0.6484375, "learning_rate": 0.00018996315193819369, "loss": 4.3515, "step": 4170 }, { "epoch": 0.43248287526975093, "grad_norm": 0.64453125, "learning_rate": 0.00018995840838797746, "loss": 4.3885, "step": 4171 }, { "epoch": 0.432586563324239, "grad_norm": 0.68359375, "learning_rate": 0.00018995366377634712, "loss": 4.3844, "step": 4172 }, { "epoch": 0.4326902513787271, "grad_norm": 0.6328125, "learning_rate": 0.00018994891810335864, "loss": 4.4171, "step": 4173 }, { "epoch": 0.43279393943321515, "grad_norm": 0.69140625, "learning_rate": 0.00018994417136906802, "loss": 4.4068, "step": 4174 }, { "epoch": 0.4328976274877032, "grad_norm": 0.59765625, "learning_rate": 0.00018993942357353123, "loss": 4.3834, "step": 4175 }, { "epoch": 0.43300131554219135, "grad_norm": 0.6328125, "learning_rate": 0.0001899346747168043, "loss": 4.3841, "step": 4176 }, { "epoch": 0.4331050035966794, "grad_norm": 0.6640625, "learning_rate": 0.00018992992479894332, "loss": 4.3845, "step": 4177 }, { "epoch": 0.4332086916511675, "grad_norm": 0.578125, "learning_rate": 0.00018992517382000426, "loss": 4.4253, "step": 4178 }, { "epoch": 0.43331237970565556, "grad_norm": 0.7421875, "learning_rate": 0.00018992042178004315, "loss": 4.3755, "step": 4179 }, { "epoch": 0.43341606776014363, "grad_norm": 0.55078125, "learning_rate": 0.00018991566867911617, "loss": 4.4059, "step": 4180 }, { "epoch": 0.4335197558146317, "grad_norm": 0.66796875, "learning_rate": 0.00018991091451727937, "loss": 4.4073, "step": 4181 }, { "epoch": 0.4336234438691198, "grad_norm": 0.56640625, "learning_rate": 0.0001899061592945888, "loss": 4.3329, "step": 4182 }, { "epoch": 0.43372713192360784, "grad_norm": 0.63671875, "learning_rate": 0.00018990140301110056, "loss": 4.3915, "step": 4183 }, { "epoch": 0.4338308199780959, "grad_norm": 0.5625, "learning_rate": 0.0001898966456668708, "loss": 4.4172, "step": 4184 }, { "epoch": 0.433934508032584, "grad_norm": 0.62890625, "learning_rate": 0.00018989188726195567, "loss": 4.4165, "step": 4185 }, { "epoch": 0.43403819608707206, "grad_norm": 0.55078125, "learning_rate": 0.00018988712779641128, "loss": 4.402, "step": 4186 }, { "epoch": 0.43414188414156013, "grad_norm": 0.63671875, "learning_rate": 0.0001898823672702938, "loss": 4.4, "step": 4187 }, { "epoch": 0.4342455721960482, "grad_norm": 0.57421875, "learning_rate": 0.00018987760568365937, "loss": 4.4032, "step": 4188 }, { "epoch": 0.4343492602505363, "grad_norm": 0.625, "learning_rate": 0.0001898728430365642, "loss": 4.4163, "step": 4189 }, { "epoch": 0.43445294830502434, "grad_norm": 0.6015625, "learning_rate": 0.0001898680793290645, "loss": 4.3809, "step": 4190 }, { "epoch": 0.4345566363595124, "grad_norm": 0.65625, "learning_rate": 0.00018986331456121648, "loss": 4.3944, "step": 4191 }, { "epoch": 0.4346603244140005, "grad_norm": 0.609375, "learning_rate": 0.00018985854873307628, "loss": 4.3181, "step": 4192 }, { "epoch": 0.43476401246848856, "grad_norm": 0.6640625, "learning_rate": 0.00018985378184470023, "loss": 4.4519, "step": 4193 }, { "epoch": 0.43486770052297663, "grad_norm": 0.5859375, "learning_rate": 0.00018984901389614453, "loss": 4.3376, "step": 4194 }, { "epoch": 0.4349713885774647, "grad_norm": 0.6484375, "learning_rate": 0.00018984424488746544, "loss": 4.3662, "step": 4195 }, { "epoch": 0.4350750766319528, "grad_norm": 0.58203125, "learning_rate": 0.00018983947481871922, "loss": 4.4071, "step": 4196 }, { "epoch": 0.43517876468644084, "grad_norm": 0.64453125, "learning_rate": 0.0001898347036899622, "loss": 4.4058, "step": 4197 }, { "epoch": 0.4352824527409289, "grad_norm": 0.62109375, "learning_rate": 0.00018982993150125057, "loss": 4.4075, "step": 4198 }, { "epoch": 0.435386140795417, "grad_norm": 0.6328125, "learning_rate": 0.00018982515825264072, "loss": 4.4165, "step": 4199 }, { "epoch": 0.43548982884990506, "grad_norm": 0.57421875, "learning_rate": 0.000189820383944189, "loss": 4.4017, "step": 4200 }, { "epoch": 0.43559351690439313, "grad_norm": 0.6640625, "learning_rate": 0.00018981560857595167, "loss": 4.4161, "step": 4201 }, { "epoch": 0.4356972049588812, "grad_norm": 0.53125, "learning_rate": 0.00018981083214798507, "loss": 4.4522, "step": 4202 }, { "epoch": 0.4358008930133693, "grad_norm": 0.73046875, "learning_rate": 0.0001898060546603456, "loss": 4.4195, "step": 4203 }, { "epoch": 0.43590458106785734, "grad_norm": 0.546875, "learning_rate": 0.0001898012761130896, "loss": 4.4096, "step": 4204 }, { "epoch": 0.4360082691223454, "grad_norm": 0.69921875, "learning_rate": 0.0001897964965062735, "loss": 4.3819, "step": 4205 }, { "epoch": 0.4361119571768335, "grad_norm": 0.5546875, "learning_rate": 0.00018979171583995366, "loss": 4.3974, "step": 4206 }, { "epoch": 0.43621564523132156, "grad_norm": 0.69921875, "learning_rate": 0.00018978693411418648, "loss": 4.4042, "step": 4207 }, { "epoch": 0.43631933328580963, "grad_norm": 0.515625, "learning_rate": 0.00018978215132902838, "loss": 4.3937, "step": 4208 }, { "epoch": 0.4364230213402977, "grad_norm": 0.70703125, "learning_rate": 0.00018977736748453577, "loss": 4.3937, "step": 4209 }, { "epoch": 0.4365267093947858, "grad_norm": 0.53125, "learning_rate": 0.00018977258258076515, "loss": 4.4096, "step": 4210 }, { "epoch": 0.43663039744927384, "grad_norm": 0.66796875, "learning_rate": 0.00018976779661777297, "loss": 4.3821, "step": 4211 }, { "epoch": 0.4367340855037619, "grad_norm": 0.60546875, "learning_rate": 0.00018976300959561564, "loss": 4.3688, "step": 4212 }, { "epoch": 0.43683777355825, "grad_norm": 0.703125, "learning_rate": 0.00018975822151434972, "loss": 4.3522, "step": 4213 }, { "epoch": 0.43694146161273806, "grad_norm": 0.64453125, "learning_rate": 0.00018975343237403162, "loss": 4.3861, "step": 4214 }, { "epoch": 0.43704514966722613, "grad_norm": 0.671875, "learning_rate": 0.00018974864217471792, "loss": 4.3924, "step": 4215 }, { "epoch": 0.4371488377217142, "grad_norm": 0.6015625, "learning_rate": 0.0001897438509164651, "loss": 4.3727, "step": 4216 }, { "epoch": 0.43725252577620227, "grad_norm": 0.68359375, "learning_rate": 0.00018973905859932973, "loss": 4.4095, "step": 4217 }, { "epoch": 0.43735621383069034, "grad_norm": 0.6796875, "learning_rate": 0.00018973426522336827, "loss": 4.4024, "step": 4218 }, { "epoch": 0.4374599018851784, "grad_norm": 0.62890625, "learning_rate": 0.00018972947078863738, "loss": 4.399, "step": 4219 }, { "epoch": 0.4375635899396665, "grad_norm": 0.63671875, "learning_rate": 0.00018972467529519357, "loss": 4.3364, "step": 4220 }, { "epoch": 0.4376672779941546, "grad_norm": 0.625, "learning_rate": 0.00018971987874309343, "loss": 4.4305, "step": 4221 }, { "epoch": 0.4377709660486427, "grad_norm": 0.60546875, "learning_rate": 0.00018971508113239358, "loss": 4.3965, "step": 4222 }, { "epoch": 0.43787465410313076, "grad_norm": 0.66015625, "learning_rate": 0.00018971028246315058, "loss": 4.435, "step": 4223 }, { "epoch": 0.4379783421576188, "grad_norm": 0.58203125, "learning_rate": 0.00018970548273542108, "loss": 4.3942, "step": 4224 }, { "epoch": 0.4380820302121069, "grad_norm": 0.68359375, "learning_rate": 0.0001897006819492617, "loss": 4.3815, "step": 4225 }, { "epoch": 0.43818571826659497, "grad_norm": 0.6328125, "learning_rate": 0.00018969588010472915, "loss": 4.4046, "step": 4226 }, { "epoch": 0.43828940632108304, "grad_norm": 0.640625, "learning_rate": 0.00018969107720187995, "loss": 4.4266, "step": 4227 }, { "epoch": 0.4383930943755711, "grad_norm": 0.671875, "learning_rate": 0.00018968627324077088, "loss": 4.3653, "step": 4228 }, { "epoch": 0.4384967824300592, "grad_norm": 0.65625, "learning_rate": 0.00018968146822145857, "loss": 4.4342, "step": 4229 }, { "epoch": 0.43860047048454726, "grad_norm": 0.64453125, "learning_rate": 0.00018967666214399976, "loss": 4.4118, "step": 4230 }, { "epoch": 0.4387041585390353, "grad_norm": 0.64453125, "learning_rate": 0.00018967185500845112, "loss": 4.3927, "step": 4231 }, { "epoch": 0.4388078465935234, "grad_norm": 0.65625, "learning_rate": 0.00018966704681486936, "loss": 4.3668, "step": 4232 }, { "epoch": 0.43891153464801147, "grad_norm": 0.6171875, "learning_rate": 0.00018966223756331125, "loss": 4.3601, "step": 4233 }, { "epoch": 0.43901522270249954, "grad_norm": 0.63671875, "learning_rate": 0.0001896574272538335, "loss": 4.4051, "step": 4234 }, { "epoch": 0.4391189107569876, "grad_norm": 0.67578125, "learning_rate": 0.0001896526158864929, "loss": 4.3614, "step": 4235 }, { "epoch": 0.4392225988114757, "grad_norm": 0.5625, "learning_rate": 0.00018964780346134618, "loss": 4.414, "step": 4236 }, { "epoch": 0.43932628686596376, "grad_norm": 0.6171875, "learning_rate": 0.00018964298997845017, "loss": 4.4161, "step": 4237 }, { "epoch": 0.4394299749204518, "grad_norm": 0.6171875, "learning_rate": 0.0001896381754378616, "loss": 4.4016, "step": 4238 }, { "epoch": 0.4395336629749399, "grad_norm": 0.546875, "learning_rate": 0.0001896333598396373, "loss": 4.3883, "step": 4239 }, { "epoch": 0.43963735102942797, "grad_norm": 0.640625, "learning_rate": 0.00018962854318383413, "loss": 4.4013, "step": 4240 }, { "epoch": 0.43974103908391604, "grad_norm": 0.58984375, "learning_rate": 0.00018962372547050887, "loss": 4.4009, "step": 4241 }, { "epoch": 0.4398447271384041, "grad_norm": 0.60546875, "learning_rate": 0.0001896189066997184, "loss": 4.4045, "step": 4242 }, { "epoch": 0.4399484151928922, "grad_norm": 0.63671875, "learning_rate": 0.00018961408687151959, "loss": 4.3849, "step": 4243 }, { "epoch": 0.44005210324738026, "grad_norm": 0.546875, "learning_rate": 0.00018960926598596926, "loss": 4.3956, "step": 4244 }, { "epoch": 0.4401557913018683, "grad_norm": 0.578125, "learning_rate": 0.0001896044440431243, "loss": 4.4034, "step": 4245 }, { "epoch": 0.4402594793563564, "grad_norm": 0.5546875, "learning_rate": 0.00018959962104304163, "loss": 4.3885, "step": 4246 }, { "epoch": 0.44036316741084447, "grad_norm": 0.54296875, "learning_rate": 0.00018959479698577814, "loss": 4.3715, "step": 4247 }, { "epoch": 0.44046685546533254, "grad_norm": 0.6328125, "learning_rate": 0.00018958997187139073, "loss": 4.4171, "step": 4248 }, { "epoch": 0.4405705435198206, "grad_norm": 0.6171875, "learning_rate": 0.00018958514569993636, "loss": 4.3923, "step": 4249 }, { "epoch": 0.4406742315743087, "grad_norm": 0.5859375, "learning_rate": 0.000189580318471472, "loss": 4.3919, "step": 4250 }, { "epoch": 0.44077791962879675, "grad_norm": 0.58984375, "learning_rate": 0.00018957549018605455, "loss": 4.3884, "step": 4251 }, { "epoch": 0.4408816076832848, "grad_norm": 0.640625, "learning_rate": 0.000189570660843741, "loss": 4.3758, "step": 4252 }, { "epoch": 0.4409852957377729, "grad_norm": 0.5703125, "learning_rate": 0.00018956583044458833, "loss": 4.3725, "step": 4253 }, { "epoch": 0.44108898379226097, "grad_norm": 0.61328125, "learning_rate": 0.00018956099898865354, "loss": 4.4041, "step": 4254 }, { "epoch": 0.44119267184674904, "grad_norm": 0.63671875, "learning_rate": 0.00018955616647599365, "loss": 4.4445, "step": 4255 }, { "epoch": 0.4412963599012371, "grad_norm": 0.5859375, "learning_rate": 0.00018955133290666563, "loss": 4.376, "step": 4256 }, { "epoch": 0.4414000479557252, "grad_norm": 0.640625, "learning_rate": 0.00018954649828072654, "loss": 4.4271, "step": 4257 }, { "epoch": 0.44150373601021325, "grad_norm": 0.58984375, "learning_rate": 0.00018954166259823346, "loss": 4.3593, "step": 4258 }, { "epoch": 0.4416074240647013, "grad_norm": 0.75, "learning_rate": 0.00018953682585924337, "loss": 4.4018, "step": 4259 }, { "epoch": 0.4417111121191894, "grad_norm": 0.6171875, "learning_rate": 0.0001895319880638134, "loss": 4.4092, "step": 4260 }, { "epoch": 0.44181480017367747, "grad_norm": 0.59375, "learning_rate": 0.00018952714921200063, "loss": 4.3735, "step": 4261 }, { "epoch": 0.44191848822816554, "grad_norm": 0.640625, "learning_rate": 0.0001895223093038621, "loss": 4.4423, "step": 4262 }, { "epoch": 0.4420221762826536, "grad_norm": 0.58984375, "learning_rate": 0.00018951746833945497, "loss": 4.3934, "step": 4263 }, { "epoch": 0.4421258643371417, "grad_norm": 0.64453125, "learning_rate": 0.0001895126263188363, "loss": 4.3609, "step": 4264 }, { "epoch": 0.44222955239162975, "grad_norm": 0.55078125, "learning_rate": 0.0001895077832420633, "loss": 4.3961, "step": 4265 }, { "epoch": 0.4423332404461179, "grad_norm": 0.62890625, "learning_rate": 0.00018950293910919305, "loss": 4.4182, "step": 4266 }, { "epoch": 0.44243692850060595, "grad_norm": 0.53125, "learning_rate": 0.00018949809392028276, "loss": 4.3712, "step": 4267 }, { "epoch": 0.442540616555094, "grad_norm": 0.56640625, "learning_rate": 0.00018949324767538955, "loss": 4.3777, "step": 4268 }, { "epoch": 0.4426443046095821, "grad_norm": 0.55859375, "learning_rate": 0.00018948840037457057, "loss": 4.3713, "step": 4269 }, { "epoch": 0.44274799266407017, "grad_norm": 0.59765625, "learning_rate": 0.00018948355201788312, "loss": 4.4353, "step": 4270 }, { "epoch": 0.44285168071855824, "grad_norm": 0.609375, "learning_rate": 0.0001894787026053843, "loss": 4.4071, "step": 4271 }, { "epoch": 0.4429553687730463, "grad_norm": 0.640625, "learning_rate": 0.0001894738521371314, "loss": 4.3544, "step": 4272 }, { "epoch": 0.4430590568275344, "grad_norm": 0.61328125, "learning_rate": 0.00018946900061318162, "loss": 4.3539, "step": 4273 }, { "epoch": 0.44316274488202245, "grad_norm": 0.66015625, "learning_rate": 0.00018946414803359222, "loss": 4.4077, "step": 4274 }, { "epoch": 0.4432664329365105, "grad_norm": 0.6796875, "learning_rate": 0.0001894592943984204, "loss": 4.3844, "step": 4275 }, { "epoch": 0.4433701209909986, "grad_norm": 0.6484375, "learning_rate": 0.0001894544397077235, "loss": 4.3638, "step": 4276 }, { "epoch": 0.44347380904548667, "grad_norm": 0.828125, "learning_rate": 0.00018944958396155878, "loss": 4.3844, "step": 4277 }, { "epoch": 0.44357749709997474, "grad_norm": 0.6484375, "learning_rate": 0.0001894447271599835, "loss": 4.4215, "step": 4278 }, { "epoch": 0.4436811851544628, "grad_norm": 0.65625, "learning_rate": 0.00018943986930305498, "loss": 4.38, "step": 4279 }, { "epoch": 0.4437848732089509, "grad_norm": 0.6640625, "learning_rate": 0.00018943501039083056, "loss": 4.391, "step": 4280 }, { "epoch": 0.44388856126343895, "grad_norm": 0.60546875, "learning_rate": 0.00018943015042336754, "loss": 4.4133, "step": 4281 }, { "epoch": 0.443992249317927, "grad_norm": 0.63671875, "learning_rate": 0.00018942528940072329, "loss": 4.3592, "step": 4282 }, { "epoch": 0.4440959373724151, "grad_norm": 0.640625, "learning_rate": 0.00018942042732295514, "loss": 4.3841, "step": 4283 }, { "epoch": 0.44419962542690317, "grad_norm": 0.70703125, "learning_rate": 0.00018941556419012047, "loss": 4.362, "step": 4284 }, { "epoch": 0.44430331348139124, "grad_norm": 0.58203125, "learning_rate": 0.00018941070000227667, "loss": 4.3791, "step": 4285 }, { "epoch": 0.4444070015358793, "grad_norm": 0.71484375, "learning_rate": 0.0001894058347594811, "loss": 4.3727, "step": 4286 }, { "epoch": 0.4445106895903674, "grad_norm": 0.64453125, "learning_rate": 0.00018940096846179123, "loss": 4.3808, "step": 4287 }, { "epoch": 0.44461437764485545, "grad_norm": 0.69140625, "learning_rate": 0.00018939610110926437, "loss": 4.4176, "step": 4288 }, { "epoch": 0.4447180656993435, "grad_norm": 0.640625, "learning_rate": 0.00018939123270195806, "loss": 4.3873, "step": 4289 }, { "epoch": 0.4448217537538316, "grad_norm": 0.6015625, "learning_rate": 0.0001893863632399297, "loss": 4.4036, "step": 4290 }, { "epoch": 0.44492544180831967, "grad_norm": 0.69140625, "learning_rate": 0.00018938149272323672, "loss": 4.4184, "step": 4291 }, { "epoch": 0.44502912986280774, "grad_norm": 0.64453125, "learning_rate": 0.0001893766211519366, "loss": 4.3895, "step": 4292 }, { "epoch": 0.4451328179172958, "grad_norm": 0.65625, "learning_rate": 0.00018937174852608682, "loss": 4.4001, "step": 4293 }, { "epoch": 0.4452365059717839, "grad_norm": 0.703125, "learning_rate": 0.0001893668748457449, "loss": 4.4087, "step": 4294 }, { "epoch": 0.44534019402627195, "grad_norm": 0.6328125, "learning_rate": 0.0001893620001109683, "loss": 4.3992, "step": 4295 }, { "epoch": 0.44544388208076, "grad_norm": 0.6953125, "learning_rate": 0.00018935712432181459, "loss": 4.3913, "step": 4296 }, { "epoch": 0.4455475701352481, "grad_norm": 0.671875, "learning_rate": 0.00018935224747834123, "loss": 4.4231, "step": 4297 }, { "epoch": 0.44565125818973617, "grad_norm": 0.62890625, "learning_rate": 0.0001893473695806058, "loss": 4.4124, "step": 4298 }, { "epoch": 0.44575494624422424, "grad_norm": 0.67578125, "learning_rate": 0.0001893424906286659, "loss": 4.4123, "step": 4299 }, { "epoch": 0.4458586342987123, "grad_norm": 0.59765625, "learning_rate": 0.000189337610622579, "loss": 4.4489, "step": 4300 }, { "epoch": 0.4459623223532004, "grad_norm": 0.73046875, "learning_rate": 0.00018933272956240277, "loss": 4.3554, "step": 4301 }, { "epoch": 0.44606601040768845, "grad_norm": 0.6796875, "learning_rate": 0.00018932784744819472, "loss": 4.4026, "step": 4302 }, { "epoch": 0.4461696984621765, "grad_norm": 0.65625, "learning_rate": 0.00018932296428001252, "loss": 4.3834, "step": 4303 }, { "epoch": 0.4462733865166646, "grad_norm": 0.6640625, "learning_rate": 0.00018931808005791373, "loss": 4.4165, "step": 4304 }, { "epoch": 0.44637707457115267, "grad_norm": 0.61328125, "learning_rate": 0.00018931319478195606, "loss": 4.3488, "step": 4305 }, { "epoch": 0.44648076262564074, "grad_norm": 0.63671875, "learning_rate": 0.00018930830845219706, "loss": 4.4097, "step": 4306 }, { "epoch": 0.4465844506801288, "grad_norm": 0.69140625, "learning_rate": 0.00018930342106869444, "loss": 4.3845, "step": 4307 }, { "epoch": 0.4466881387346169, "grad_norm": 0.59375, "learning_rate": 0.00018929853263150584, "loss": 4.3781, "step": 4308 }, { "epoch": 0.44679182678910495, "grad_norm": 0.71875, "learning_rate": 0.00018929364314068897, "loss": 4.4109, "step": 4309 }, { "epoch": 0.4468955148435931, "grad_norm": 0.640625, "learning_rate": 0.00018928875259630146, "loss": 4.3925, "step": 4310 }, { "epoch": 0.44699920289808115, "grad_norm": 0.65234375, "learning_rate": 0.00018928386099840107, "loss": 4.3674, "step": 4311 }, { "epoch": 0.4471028909525692, "grad_norm": 0.6484375, "learning_rate": 0.00018927896834704548, "loss": 4.4255, "step": 4312 }, { "epoch": 0.4472065790070573, "grad_norm": 0.734375, "learning_rate": 0.00018927407464229247, "loss": 4.3883, "step": 4313 }, { "epoch": 0.44731026706154536, "grad_norm": 0.72265625, "learning_rate": 0.00018926917988419973, "loss": 4.4199, "step": 4314 }, { "epoch": 0.44741395511603343, "grad_norm": 0.70703125, "learning_rate": 0.00018926428407282502, "loss": 4.4236, "step": 4315 }, { "epoch": 0.4475176431705215, "grad_norm": 0.78125, "learning_rate": 0.00018925938720822612, "loss": 4.4018, "step": 4316 }, { "epoch": 0.4476213312250096, "grad_norm": 0.66796875, "learning_rate": 0.00018925448929046082, "loss": 4.3846, "step": 4317 }, { "epoch": 0.44772501927949765, "grad_norm": 0.765625, "learning_rate": 0.00018924959031958686, "loss": 4.3673, "step": 4318 }, { "epoch": 0.4478287073339857, "grad_norm": 0.69140625, "learning_rate": 0.0001892446902956621, "loss": 4.4087, "step": 4319 }, { "epoch": 0.4479323953884738, "grad_norm": 0.63671875, "learning_rate": 0.0001892397892187443, "loss": 4.4178, "step": 4320 }, { "epoch": 0.44803608344296186, "grad_norm": 0.6953125, "learning_rate": 0.00018923488708889137, "loss": 4.3939, "step": 4321 }, { "epoch": 0.44813977149744993, "grad_norm": 0.66796875, "learning_rate": 0.00018922998390616104, "loss": 4.4221, "step": 4322 }, { "epoch": 0.448243459551938, "grad_norm": 0.63671875, "learning_rate": 0.00018922507967061126, "loss": 4.3855, "step": 4323 }, { "epoch": 0.4483471476064261, "grad_norm": 0.65625, "learning_rate": 0.00018922017438229984, "loss": 4.3899, "step": 4324 }, { "epoch": 0.44845083566091415, "grad_norm": 0.74609375, "learning_rate": 0.00018921526804128468, "loss": 4.4192, "step": 4325 }, { "epoch": 0.4485545237154022, "grad_norm": 0.6328125, "learning_rate": 0.00018921036064762365, "loss": 4.4315, "step": 4326 }, { "epoch": 0.4486582117698903, "grad_norm": 0.640625, "learning_rate": 0.00018920545220137467, "loss": 4.4587, "step": 4327 }, { "epoch": 0.44876189982437836, "grad_norm": 0.66796875, "learning_rate": 0.00018920054270259566, "loss": 4.3906, "step": 4328 }, { "epoch": 0.44886558787886643, "grad_norm": 0.67578125, "learning_rate": 0.00018919563215134453, "loss": 4.3824, "step": 4329 }, { "epoch": 0.4489692759333545, "grad_norm": 0.625, "learning_rate": 0.0001891907205476792, "loss": 4.4012, "step": 4330 }, { "epoch": 0.4490729639878426, "grad_norm": 0.76171875, "learning_rate": 0.00018918580789165765, "loss": 4.4116, "step": 4331 }, { "epoch": 0.44917665204233065, "grad_norm": 0.61328125, "learning_rate": 0.00018918089418333786, "loss": 4.4315, "step": 4332 }, { "epoch": 0.4492803400968187, "grad_norm": 0.66796875, "learning_rate": 0.00018917597942277777, "loss": 4.4087, "step": 4333 }, { "epoch": 0.4493840281513068, "grad_norm": 0.6328125, "learning_rate": 0.0001891710636100354, "loss": 4.3834, "step": 4334 }, { "epoch": 0.44948771620579486, "grad_norm": 0.6796875, "learning_rate": 0.00018916614674516875, "loss": 4.4191, "step": 4335 }, { "epoch": 0.44959140426028293, "grad_norm": 0.73046875, "learning_rate": 0.00018916122882823582, "loss": 4.4078, "step": 4336 }, { "epoch": 0.449695092314771, "grad_norm": 0.70703125, "learning_rate": 0.0001891563098592946, "loss": 4.3853, "step": 4337 }, { "epoch": 0.4497987803692591, "grad_norm": 0.66796875, "learning_rate": 0.00018915138983840318, "loss": 4.366, "step": 4338 }, { "epoch": 0.44990246842374715, "grad_norm": 0.67578125, "learning_rate": 0.00018914646876561962, "loss": 4.3723, "step": 4339 }, { "epoch": 0.4500061564782352, "grad_norm": 0.67578125, "learning_rate": 0.00018914154664100195, "loss": 4.4083, "step": 4340 }, { "epoch": 0.4501098445327233, "grad_norm": 0.69140625, "learning_rate": 0.00018913662346460824, "loss": 4.3813, "step": 4341 }, { "epoch": 0.45021353258721136, "grad_norm": 0.6484375, "learning_rate": 0.00018913169923649663, "loss": 4.3809, "step": 4342 }, { "epoch": 0.45031722064169943, "grad_norm": 0.72265625, "learning_rate": 0.00018912677395672514, "loss": 4.3945, "step": 4343 }, { "epoch": 0.4504209086961875, "grad_norm": 0.6171875, "learning_rate": 0.00018912184762535195, "loss": 4.3716, "step": 4344 }, { "epoch": 0.4505245967506756, "grad_norm": 0.69140625, "learning_rate": 0.00018911692024243517, "loss": 4.3868, "step": 4345 }, { "epoch": 0.45062828480516365, "grad_norm": 0.6328125, "learning_rate": 0.00018911199180803294, "loss": 4.3969, "step": 4346 }, { "epoch": 0.4507319728596517, "grad_norm": 0.70703125, "learning_rate": 0.00018910706232220338, "loss": 4.4198, "step": 4347 }, { "epoch": 0.4508356609141398, "grad_norm": 0.61328125, "learning_rate": 0.0001891021317850047, "loss": 4.4407, "step": 4348 }, { "epoch": 0.45093934896862786, "grad_norm": 0.6875, "learning_rate": 0.000189097200196495, "loss": 4.3974, "step": 4349 }, { "epoch": 0.45104303702311593, "grad_norm": 0.6484375, "learning_rate": 0.00018909226755673257, "loss": 4.3658, "step": 4350 }, { "epoch": 0.451146725077604, "grad_norm": 0.65234375, "learning_rate": 0.00018908733386577552, "loss": 4.3892, "step": 4351 }, { "epoch": 0.4512504131320921, "grad_norm": 0.72265625, "learning_rate": 0.00018908239912368213, "loss": 4.4091, "step": 4352 }, { "epoch": 0.45135410118658015, "grad_norm": 0.546875, "learning_rate": 0.0001890774633305106, "loss": 4.3467, "step": 4353 }, { "epoch": 0.4514577892410682, "grad_norm": 0.703125, "learning_rate": 0.00018907252648631912, "loss": 4.3949, "step": 4354 }, { "epoch": 0.45156147729555635, "grad_norm": 0.58203125, "learning_rate": 0.00018906758859116602, "loss": 4.371, "step": 4355 }, { "epoch": 0.4516651653500444, "grad_norm": 0.74609375, "learning_rate": 0.0001890626496451095, "loss": 4.4002, "step": 4356 }, { "epoch": 0.4517688534045325, "grad_norm": 0.640625, "learning_rate": 0.0001890577096482079, "loss": 4.3438, "step": 4357 }, { "epoch": 0.45187254145902056, "grad_norm": 0.7578125, "learning_rate": 0.0001890527686005194, "loss": 4.4444, "step": 4358 }, { "epoch": 0.45197622951350863, "grad_norm": 0.625, "learning_rate": 0.00018904782650210243, "loss": 4.4304, "step": 4359 }, { "epoch": 0.4520799175679967, "grad_norm": 0.703125, "learning_rate": 0.00018904288335301516, "loss": 4.3794, "step": 4360 }, { "epoch": 0.4521836056224848, "grad_norm": 0.765625, "learning_rate": 0.00018903793915331604, "loss": 4.4026, "step": 4361 }, { "epoch": 0.45228729367697285, "grad_norm": 0.65625, "learning_rate": 0.00018903299390306334, "loss": 4.4174, "step": 4362 }, { "epoch": 0.4523909817314609, "grad_norm": 0.703125, "learning_rate": 0.00018902804760231545, "loss": 4.4262, "step": 4363 }, { "epoch": 0.452494669785949, "grad_norm": 0.640625, "learning_rate": 0.00018902310025113066, "loss": 4.3765, "step": 4364 }, { "epoch": 0.45259835784043706, "grad_norm": 0.64453125, "learning_rate": 0.00018901815184956742, "loss": 4.3958, "step": 4365 }, { "epoch": 0.45270204589492513, "grad_norm": 0.64453125, "learning_rate": 0.00018901320239768406, "loss": 4.3625, "step": 4366 }, { "epoch": 0.4528057339494132, "grad_norm": 0.6484375, "learning_rate": 0.00018900825189553904, "loss": 4.3915, "step": 4367 }, { "epoch": 0.4529094220039013, "grad_norm": 0.62890625, "learning_rate": 0.00018900330034319073, "loss": 4.3817, "step": 4368 }, { "epoch": 0.45301311005838935, "grad_norm": 0.6171875, "learning_rate": 0.00018899834774069752, "loss": 4.4392, "step": 4369 }, { "epoch": 0.4531167981128774, "grad_norm": 0.62890625, "learning_rate": 0.0001889933940881179, "loss": 4.3706, "step": 4370 }, { "epoch": 0.4532204861673655, "grad_norm": 0.5859375, "learning_rate": 0.00018898843938551028, "loss": 4.4122, "step": 4371 }, { "epoch": 0.45332417422185356, "grad_norm": 0.55078125, "learning_rate": 0.00018898348363293317, "loss": 4.3939, "step": 4372 }, { "epoch": 0.45342786227634163, "grad_norm": 0.5703125, "learning_rate": 0.00018897852683044498, "loss": 4.3972, "step": 4373 }, { "epoch": 0.4535315503308297, "grad_norm": 0.5625, "learning_rate": 0.00018897356897810426, "loss": 4.4002, "step": 4374 }, { "epoch": 0.4536352383853178, "grad_norm": 0.5390625, "learning_rate": 0.00018896861007596948, "loss": 4.3926, "step": 4375 }, { "epoch": 0.45373892643980585, "grad_norm": 0.55078125, "learning_rate": 0.00018896365012409914, "loss": 4.4, "step": 4376 }, { "epoch": 0.4538426144942939, "grad_norm": 0.61328125, "learning_rate": 0.00018895868912255175, "loss": 4.3701, "step": 4377 }, { "epoch": 0.453946302548782, "grad_norm": 0.5859375, "learning_rate": 0.00018895372707138585, "loss": 4.3929, "step": 4378 }, { "epoch": 0.45404999060327006, "grad_norm": 0.59765625, "learning_rate": 0.00018894876397066002, "loss": 4.4144, "step": 4379 }, { "epoch": 0.45415367865775813, "grad_norm": 0.578125, "learning_rate": 0.0001889437998204328, "loss": 4.4537, "step": 4380 }, { "epoch": 0.4542573667122462, "grad_norm": 0.69921875, "learning_rate": 0.00018893883462076273, "loss": 4.3803, "step": 4381 }, { "epoch": 0.4543610547667343, "grad_norm": 0.66796875, "learning_rate": 0.00018893386837170849, "loss": 4.3838, "step": 4382 }, { "epoch": 0.45446474282122235, "grad_norm": 0.62890625, "learning_rate": 0.00018892890107332857, "loss": 4.382, "step": 4383 }, { "epoch": 0.4545684308757104, "grad_norm": 0.6796875, "learning_rate": 0.0001889239327256816, "loss": 4.3865, "step": 4384 }, { "epoch": 0.4546721189301985, "grad_norm": 0.59375, "learning_rate": 0.00018891896332882624, "loss": 4.3959, "step": 4385 }, { "epoch": 0.45477580698468656, "grad_norm": 0.66796875, "learning_rate": 0.0001889139928828211, "loss": 4.3629, "step": 4386 }, { "epoch": 0.45487949503917463, "grad_norm": 0.66015625, "learning_rate": 0.00018890902138772483, "loss": 4.3834, "step": 4387 }, { "epoch": 0.4549831830936627, "grad_norm": 0.64453125, "learning_rate": 0.00018890404884359613, "loss": 4.3842, "step": 4388 }, { "epoch": 0.4550868711481508, "grad_norm": 0.77734375, "learning_rate": 0.00018889907525049355, "loss": 4.4223, "step": 4389 }, { "epoch": 0.45519055920263884, "grad_norm": 0.68359375, "learning_rate": 0.0001888941006084759, "loss": 4.3973, "step": 4390 }, { "epoch": 0.4552942472571269, "grad_norm": 0.734375, "learning_rate": 0.00018888912491760182, "loss": 4.3702, "step": 4391 }, { "epoch": 0.455397935311615, "grad_norm": 0.62890625, "learning_rate": 0.00018888414817793003, "loss": 4.413, "step": 4392 }, { "epoch": 0.45550162336610306, "grad_norm": 0.734375, "learning_rate": 0.00018887917038951926, "loss": 4.3807, "step": 4393 }, { "epoch": 0.45560531142059113, "grad_norm": 0.62109375, "learning_rate": 0.0001888741915524282, "loss": 4.4024, "step": 4394 }, { "epoch": 0.4557089994750792, "grad_norm": 0.6875, "learning_rate": 0.00018886921166671563, "loss": 4.3569, "step": 4395 }, { "epoch": 0.4558126875295673, "grad_norm": 0.65234375, "learning_rate": 0.00018886423073244033, "loss": 4.3702, "step": 4396 }, { "epoch": 0.45591637558405534, "grad_norm": 0.6796875, "learning_rate": 0.00018885924874966102, "loss": 4.3712, "step": 4397 }, { "epoch": 0.4560200636385434, "grad_norm": 0.71484375, "learning_rate": 0.0001888542657184365, "loss": 4.4314, "step": 4398 }, { "epoch": 0.4561237516930315, "grad_norm": 0.63671875, "learning_rate": 0.00018884928163882556, "loss": 4.3574, "step": 4399 }, { "epoch": 0.4562274397475196, "grad_norm": 0.73046875, "learning_rate": 0.00018884429651088702, "loss": 4.3811, "step": 4400 }, { "epoch": 0.4563311278020077, "grad_norm": 0.66015625, "learning_rate": 0.0001888393103346797, "loss": 4.3547, "step": 4401 }, { "epoch": 0.45643481585649576, "grad_norm": 0.82421875, "learning_rate": 0.00018883432311026242, "loss": 4.4052, "step": 4402 }, { "epoch": 0.45653850391098383, "grad_norm": 0.62890625, "learning_rate": 0.00018882933483769403, "loss": 4.3587, "step": 4403 }, { "epoch": 0.4566421919654719, "grad_norm": 0.79296875, "learning_rate": 0.0001888243455170334, "loss": 4.3803, "step": 4404 }, { "epoch": 0.45674588001995997, "grad_norm": 0.7421875, "learning_rate": 0.00018881935514833935, "loss": 4.3837, "step": 4405 }, { "epoch": 0.45684956807444804, "grad_norm": 0.75, "learning_rate": 0.00018881436373167083, "loss": 4.3864, "step": 4406 }, { "epoch": 0.4569532561289361, "grad_norm": 0.7109375, "learning_rate": 0.0001888093712670867, "loss": 4.3537, "step": 4407 }, { "epoch": 0.4570569441834242, "grad_norm": 0.734375, "learning_rate": 0.00018880437775464583, "loss": 4.3655, "step": 4408 }, { "epoch": 0.45716063223791226, "grad_norm": 0.82421875, "learning_rate": 0.0001887993831944072, "loss": 4.4028, "step": 4409 }, { "epoch": 0.45726432029240033, "grad_norm": 0.609375, "learning_rate": 0.00018879438758642972, "loss": 4.3589, "step": 4410 }, { "epoch": 0.4573680083468884, "grad_norm": 0.69140625, "learning_rate": 0.00018878939093077232, "loss": 4.3918, "step": 4411 }, { "epoch": 0.45747169640137647, "grad_norm": 0.671875, "learning_rate": 0.00018878439322749392, "loss": 4.3798, "step": 4412 }, { "epoch": 0.45757538445586454, "grad_norm": 0.69140625, "learning_rate": 0.00018877939447665358, "loss": 4.3469, "step": 4413 }, { "epoch": 0.4576790725103526, "grad_norm": 0.63671875, "learning_rate": 0.00018877439467831022, "loss": 4.3823, "step": 4414 }, { "epoch": 0.4577827605648407, "grad_norm": 0.7421875, "learning_rate": 0.00018876939383252284, "loss": 4.3694, "step": 4415 }, { "epoch": 0.45788644861932876, "grad_norm": 0.6484375, "learning_rate": 0.00018876439193935042, "loss": 4.3747, "step": 4416 }, { "epoch": 0.45799013667381683, "grad_norm": 0.703125, "learning_rate": 0.00018875938899885202, "loss": 4.4072, "step": 4417 }, { "epoch": 0.4580938247283049, "grad_norm": 0.66796875, "learning_rate": 0.00018875438501108667, "loss": 4.395, "step": 4418 }, { "epoch": 0.45819751278279297, "grad_norm": 0.64453125, "learning_rate": 0.00018874937997611336, "loss": 4.3889, "step": 4419 }, { "epoch": 0.45830120083728104, "grad_norm": 0.7265625, "learning_rate": 0.00018874437389399123, "loss": 4.372, "step": 4420 }, { "epoch": 0.4584048888917691, "grad_norm": 0.671875, "learning_rate": 0.00018873936676477927, "loss": 4.3597, "step": 4421 }, { "epoch": 0.4585085769462572, "grad_norm": 0.75390625, "learning_rate": 0.00018873435858853655, "loss": 4.3904, "step": 4422 }, { "epoch": 0.45861226500074526, "grad_norm": 0.671875, "learning_rate": 0.0001887293493653222, "loss": 4.3842, "step": 4423 }, { "epoch": 0.4587159530552333, "grad_norm": 0.625, "learning_rate": 0.00018872433909519537, "loss": 4.3949, "step": 4424 }, { "epoch": 0.4588196411097214, "grad_norm": 0.62109375, "learning_rate": 0.00018871932777821509, "loss": 4.4121, "step": 4425 }, { "epoch": 0.45892332916420947, "grad_norm": 0.64453125, "learning_rate": 0.00018871431541444053, "loss": 4.3992, "step": 4426 }, { "epoch": 0.45902701721869754, "grad_norm": 0.6484375, "learning_rate": 0.00018870930200393083, "loss": 4.3586, "step": 4427 }, { "epoch": 0.4591307052731856, "grad_norm": 0.66796875, "learning_rate": 0.0001887042875467451, "loss": 4.3472, "step": 4428 }, { "epoch": 0.4592343933276737, "grad_norm": 0.6015625, "learning_rate": 0.00018869927204294258, "loss": 4.3902, "step": 4429 }, { "epoch": 0.45933808138216176, "grad_norm": 0.5546875, "learning_rate": 0.0001886942554925824, "loss": 4.4539, "step": 4430 }, { "epoch": 0.4594417694366498, "grad_norm": 0.625, "learning_rate": 0.00018868923789572376, "loss": 4.3847, "step": 4431 }, { "epoch": 0.4595454574911379, "grad_norm": 0.57421875, "learning_rate": 0.00018868421925242586, "loss": 4.3835, "step": 4432 }, { "epoch": 0.45964914554562597, "grad_norm": 0.640625, "learning_rate": 0.0001886791995627479, "loss": 4.3715, "step": 4433 }, { "epoch": 0.45975283360011404, "grad_norm": 0.58203125, "learning_rate": 0.00018867417882674915, "loss": 4.3309, "step": 4434 }, { "epoch": 0.4598565216546021, "grad_norm": 0.62109375, "learning_rate": 0.0001886691570444888, "loss": 4.3963, "step": 4435 }, { "epoch": 0.4599602097090902, "grad_norm": 0.59375, "learning_rate": 0.00018866413421602613, "loss": 4.4098, "step": 4436 }, { "epoch": 0.46006389776357826, "grad_norm": 0.6015625, "learning_rate": 0.00018865911034142042, "loss": 4.3952, "step": 4437 }, { "epoch": 0.4601675858180663, "grad_norm": 0.58203125, "learning_rate": 0.00018865408542073089, "loss": 4.4235, "step": 4438 }, { "epoch": 0.4602712738725544, "grad_norm": 0.640625, "learning_rate": 0.00018864905945401687, "loss": 4.3633, "step": 4439 }, { "epoch": 0.46037496192704247, "grad_norm": 0.58984375, "learning_rate": 0.00018864403244133767, "loss": 4.395, "step": 4440 }, { "epoch": 0.46047864998153054, "grad_norm": 0.65625, "learning_rate": 0.00018863900438275256, "loss": 4.3403, "step": 4441 }, { "epoch": 0.4605823380360186, "grad_norm": 0.6328125, "learning_rate": 0.00018863397527832095, "loss": 4.3693, "step": 4442 }, { "epoch": 0.4606860260905067, "grad_norm": 0.6328125, "learning_rate": 0.00018862894512810207, "loss": 4.4441, "step": 4443 }, { "epoch": 0.4607897141449948, "grad_norm": 0.68359375, "learning_rate": 0.00018862391393215534, "loss": 4.3215, "step": 4444 }, { "epoch": 0.4608934021994829, "grad_norm": 0.5859375, "learning_rate": 0.00018861888169054012, "loss": 4.3838, "step": 4445 }, { "epoch": 0.46099709025397095, "grad_norm": 0.68359375, "learning_rate": 0.00018861384840331575, "loss": 4.3675, "step": 4446 }, { "epoch": 0.461100778308459, "grad_norm": 0.60546875, "learning_rate": 0.00018860881407054163, "loss": 4.3918, "step": 4447 }, { "epoch": 0.4612044663629471, "grad_norm": 0.6328125, "learning_rate": 0.0001886037786922772, "loss": 4.3753, "step": 4448 }, { "epoch": 0.46130815441743517, "grad_norm": 0.63671875, "learning_rate": 0.0001885987422685818, "loss": 4.4139, "step": 4449 }, { "epoch": 0.46141184247192324, "grad_norm": 0.65234375, "learning_rate": 0.00018859370479951492, "loss": 4.4179, "step": 4450 }, { "epoch": 0.4615155305264113, "grad_norm": 0.625, "learning_rate": 0.00018858866628513598, "loss": 4.32, "step": 4451 }, { "epoch": 0.4616192185808994, "grad_norm": 0.546875, "learning_rate": 0.0001885836267255044, "loss": 4.4127, "step": 4452 }, { "epoch": 0.46172290663538745, "grad_norm": 0.67578125, "learning_rate": 0.00018857858612067967, "loss": 4.3886, "step": 4453 }, { "epoch": 0.4618265946898755, "grad_norm": 0.57421875, "learning_rate": 0.00018857354447072123, "loss": 4.3949, "step": 4454 }, { "epoch": 0.4619302827443636, "grad_norm": 0.6171875, "learning_rate": 0.00018856850177568864, "loss": 4.3957, "step": 4455 }, { "epoch": 0.46203397079885167, "grad_norm": 0.66015625, "learning_rate": 0.00018856345803564133, "loss": 4.3744, "step": 4456 }, { "epoch": 0.46213765885333974, "grad_norm": 0.62109375, "learning_rate": 0.00018855841325063883, "loss": 4.3692, "step": 4457 }, { "epoch": 0.4622413469078278, "grad_norm": 0.59375, "learning_rate": 0.00018855336742074066, "loss": 4.3896, "step": 4458 }, { "epoch": 0.4623450349623159, "grad_norm": 0.61328125, "learning_rate": 0.00018854832054600635, "loss": 4.3986, "step": 4459 }, { "epoch": 0.46244872301680395, "grad_norm": 0.6015625, "learning_rate": 0.00018854327262649546, "loss": 4.377, "step": 4460 }, { "epoch": 0.462552411071292, "grad_norm": 0.6484375, "learning_rate": 0.00018853822366226756, "loss": 4.3758, "step": 4461 }, { "epoch": 0.4626560991257801, "grad_norm": 0.56640625, "learning_rate": 0.00018853317365338218, "loss": 4.4131, "step": 4462 }, { "epoch": 0.46275978718026817, "grad_norm": 0.625, "learning_rate": 0.000188528122599899, "loss": 4.3954, "step": 4463 }, { "epoch": 0.46286347523475624, "grad_norm": 0.60546875, "learning_rate": 0.00018852307050187749, "loss": 4.3832, "step": 4464 }, { "epoch": 0.4629671632892443, "grad_norm": 0.6640625, "learning_rate": 0.00018851801735937732, "loss": 4.3479, "step": 4465 }, { "epoch": 0.4630708513437324, "grad_norm": 0.66796875, "learning_rate": 0.00018851296317245816, "loss": 4.3886, "step": 4466 }, { "epoch": 0.46317453939822045, "grad_norm": 0.58203125, "learning_rate": 0.00018850790794117957, "loss": 4.3542, "step": 4467 }, { "epoch": 0.4632782274527085, "grad_norm": 0.6875, "learning_rate": 0.0001885028516656012, "loss": 4.3936, "step": 4468 }, { "epoch": 0.4633819155071966, "grad_norm": 0.66015625, "learning_rate": 0.00018849779434578276, "loss": 4.4084, "step": 4469 }, { "epoch": 0.46348560356168467, "grad_norm": 0.65234375, "learning_rate": 0.0001884927359817839, "loss": 4.4057, "step": 4470 }, { "epoch": 0.46358929161617274, "grad_norm": 0.6484375, "learning_rate": 0.0001884876765736643, "loss": 4.3766, "step": 4471 }, { "epoch": 0.4636929796706608, "grad_norm": 0.69140625, "learning_rate": 0.0001884826161214836, "loss": 4.3469, "step": 4472 }, { "epoch": 0.4637966677251489, "grad_norm": 0.69140625, "learning_rate": 0.00018847755462530162, "loss": 4.4156, "step": 4473 }, { "epoch": 0.46390035577963695, "grad_norm": 0.765625, "learning_rate": 0.000188472492085178, "loss": 4.4327, "step": 4474 }, { "epoch": 0.464004043834125, "grad_norm": 0.6640625, "learning_rate": 0.00018846742850117248, "loss": 4.4199, "step": 4475 }, { "epoch": 0.4641077318886131, "grad_norm": 0.63671875, "learning_rate": 0.0001884623638733448, "loss": 4.3494, "step": 4476 }, { "epoch": 0.46421141994310117, "grad_norm": 0.68359375, "learning_rate": 0.00018845729820175477, "loss": 4.3995, "step": 4477 }, { "epoch": 0.46431510799758924, "grad_norm": 0.67578125, "learning_rate": 0.0001884522314864621, "loss": 4.329, "step": 4478 }, { "epoch": 0.4644187960520773, "grad_norm": 0.74609375, "learning_rate": 0.00018844716372752663, "loss": 4.4106, "step": 4479 }, { "epoch": 0.4645224841065654, "grad_norm": 0.70703125, "learning_rate": 0.00018844209492500813, "loss": 4.4032, "step": 4480 }, { "epoch": 0.46462617216105345, "grad_norm": 0.69921875, "learning_rate": 0.00018843702507896634, "loss": 4.4141, "step": 4481 }, { "epoch": 0.4647298602155415, "grad_norm": 0.6484375, "learning_rate": 0.00018843195418946117, "loss": 4.4069, "step": 4482 }, { "epoch": 0.4648335482700296, "grad_norm": 0.69140625, "learning_rate": 0.00018842688225655243, "loss": 4.3916, "step": 4483 }, { "epoch": 0.46493723632451767, "grad_norm": 0.70703125, "learning_rate": 0.00018842180928029992, "loss": 4.3536, "step": 4484 }, { "epoch": 0.46504092437900574, "grad_norm": 0.71875, "learning_rate": 0.00018841673526076355, "loss": 4.3959, "step": 4485 }, { "epoch": 0.4651446124334938, "grad_norm": 0.6484375, "learning_rate": 0.00018841166019800315, "loss": 4.4139, "step": 4486 }, { "epoch": 0.4652483004879819, "grad_norm": 0.70703125, "learning_rate": 0.00018840658409207862, "loss": 4.31, "step": 4487 }, { "epoch": 0.46535198854246995, "grad_norm": 0.640625, "learning_rate": 0.00018840150694304986, "loss": 4.3708, "step": 4488 }, { "epoch": 0.4654556765969581, "grad_norm": 0.671875, "learning_rate": 0.00018839642875097674, "loss": 4.3896, "step": 4489 }, { "epoch": 0.46555936465144615, "grad_norm": 0.6875, "learning_rate": 0.0001883913495159192, "loss": 4.374, "step": 4490 }, { "epoch": 0.4656630527059342, "grad_norm": 0.5859375, "learning_rate": 0.0001883862692379372, "loss": 4.3423, "step": 4491 }, { "epoch": 0.4657667407604223, "grad_norm": 0.63671875, "learning_rate": 0.00018838118791709063, "loss": 4.4045, "step": 4492 }, { "epoch": 0.46587042881491036, "grad_norm": 0.65625, "learning_rate": 0.00018837610555343947, "loss": 4.3357, "step": 4493 }, { "epoch": 0.46597411686939844, "grad_norm": 0.6875, "learning_rate": 0.00018837102214704367, "loss": 4.3608, "step": 4494 }, { "epoch": 0.4660778049238865, "grad_norm": 0.62890625, "learning_rate": 0.0001883659376979632, "loss": 4.3956, "step": 4495 }, { "epoch": 0.4661814929783746, "grad_norm": 0.6875, "learning_rate": 0.00018836085220625814, "loss": 4.4046, "step": 4496 }, { "epoch": 0.46628518103286265, "grad_norm": 0.58984375, "learning_rate": 0.00018835576567198837, "loss": 4.3886, "step": 4497 }, { "epoch": 0.4663888690873507, "grad_norm": 0.7421875, "learning_rate": 0.000188350678095214, "loss": 4.363, "step": 4498 }, { "epoch": 0.4664925571418388, "grad_norm": 0.59765625, "learning_rate": 0.00018834558947599498, "loss": 4.321, "step": 4499 }, { "epoch": 0.46659624519632686, "grad_norm": 0.68359375, "learning_rate": 0.0001883404998143914, "loss": 4.3768, "step": 4500 }, { "epoch": 0.46669993325081494, "grad_norm": 0.73046875, "learning_rate": 0.0001883354091104633, "loss": 4.369, "step": 4501 }, { "epoch": 0.466803621305303, "grad_norm": 0.6171875, "learning_rate": 0.00018833031736427075, "loss": 4.3661, "step": 4502 }, { "epoch": 0.4669073093597911, "grad_norm": 0.73046875, "learning_rate": 0.00018832522457587386, "loss": 4.3625, "step": 4503 }, { "epoch": 0.46701099741427915, "grad_norm": 0.6640625, "learning_rate": 0.00018832013074533265, "loss": 4.3675, "step": 4504 }, { "epoch": 0.4671146854687672, "grad_norm": 0.6796875, "learning_rate": 0.00018831503587270727, "loss": 4.3922, "step": 4505 }, { "epoch": 0.4672183735232553, "grad_norm": 0.67578125, "learning_rate": 0.00018830993995805782, "loss": 4.385, "step": 4506 }, { "epoch": 0.46732206157774336, "grad_norm": 0.66796875, "learning_rate": 0.0001883048430014444, "loss": 4.3942, "step": 4507 }, { "epoch": 0.46742574963223144, "grad_norm": 0.6875, "learning_rate": 0.00018829974500292717, "loss": 4.3906, "step": 4508 }, { "epoch": 0.4675294376867195, "grad_norm": 0.76171875, "learning_rate": 0.00018829464596256632, "loss": 4.3429, "step": 4509 }, { "epoch": 0.4676331257412076, "grad_norm": 0.66015625, "learning_rate": 0.00018828954588042196, "loss": 4.4135, "step": 4510 }, { "epoch": 0.46773681379569565, "grad_norm": 0.82421875, "learning_rate": 0.0001882844447565543, "loss": 4.3824, "step": 4511 }, { "epoch": 0.4678405018501837, "grad_norm": 0.61328125, "learning_rate": 0.00018827934259102352, "loss": 4.3978, "step": 4512 }, { "epoch": 0.4679441899046718, "grad_norm": 0.78515625, "learning_rate": 0.00018827423938388977, "loss": 4.3683, "step": 4513 }, { "epoch": 0.46804787795915986, "grad_norm": 0.69921875, "learning_rate": 0.00018826913513521335, "loss": 4.4184, "step": 4514 }, { "epoch": 0.46815156601364794, "grad_norm": 0.62109375, "learning_rate": 0.0001882640298450544, "loss": 4.3855, "step": 4515 }, { "epoch": 0.468255254068136, "grad_norm": 0.64453125, "learning_rate": 0.0001882589235134732, "loss": 4.3802, "step": 4516 }, { "epoch": 0.4683589421226241, "grad_norm": 0.62109375, "learning_rate": 0.00018825381614053004, "loss": 4.3976, "step": 4517 }, { "epoch": 0.46846263017711215, "grad_norm": 0.5859375, "learning_rate": 0.00018824870772628512, "loss": 4.3788, "step": 4518 }, { "epoch": 0.4685663182316002, "grad_norm": 0.62109375, "learning_rate": 0.00018824359827079873, "loss": 4.391, "step": 4519 }, { "epoch": 0.4686700062860883, "grad_norm": 0.56640625, "learning_rate": 0.00018823848777413114, "loss": 4.3887, "step": 4520 }, { "epoch": 0.46877369434057636, "grad_norm": 0.62890625, "learning_rate": 0.00018823337623634267, "loss": 4.407, "step": 4521 }, { "epoch": 0.46887738239506443, "grad_norm": 0.58203125, "learning_rate": 0.00018822826365749365, "loss": 4.4155, "step": 4522 }, { "epoch": 0.4689810704495525, "grad_norm": 0.640625, "learning_rate": 0.00018822315003764434, "loss": 4.4231, "step": 4523 }, { "epoch": 0.4690847585040406, "grad_norm": 0.58984375, "learning_rate": 0.00018821803537685515, "loss": 4.3997, "step": 4524 }, { "epoch": 0.46918844655852865, "grad_norm": 0.7421875, "learning_rate": 0.00018821291967518637, "loss": 4.3025, "step": 4525 }, { "epoch": 0.4692921346130167, "grad_norm": 0.640625, "learning_rate": 0.0001882078029326984, "loss": 4.4274, "step": 4526 }, { "epoch": 0.4693958226675048, "grad_norm": 0.66796875, "learning_rate": 0.0001882026851494516, "loss": 4.3967, "step": 4527 }, { "epoch": 0.46949951072199286, "grad_norm": 0.74609375, "learning_rate": 0.00018819756632550635, "loss": 4.3833, "step": 4528 }, { "epoch": 0.46960319877648093, "grad_norm": 0.6640625, "learning_rate": 0.00018819244646092303, "loss": 4.3692, "step": 4529 }, { "epoch": 0.469706886830969, "grad_norm": 0.6875, "learning_rate": 0.00018818732555576207, "loss": 4.3743, "step": 4530 }, { "epoch": 0.4698105748854571, "grad_norm": 0.72265625, "learning_rate": 0.0001881822036100839, "loss": 4.3747, "step": 4531 }, { "epoch": 0.46991426293994515, "grad_norm": 0.59765625, "learning_rate": 0.0001881770806239489, "loss": 4.3656, "step": 4532 }, { "epoch": 0.4700179509944332, "grad_norm": 0.66796875, "learning_rate": 0.0001881719565974176, "loss": 4.3915, "step": 4533 }, { "epoch": 0.47012163904892135, "grad_norm": 0.60546875, "learning_rate": 0.0001881668315305504, "loss": 4.3953, "step": 4534 }, { "epoch": 0.4702253271034094, "grad_norm": 0.6953125, "learning_rate": 0.0001881617054234078, "loss": 4.3876, "step": 4535 }, { "epoch": 0.4703290151578975, "grad_norm": 0.609375, "learning_rate": 0.00018815657827605023, "loss": 4.3467, "step": 4536 }, { "epoch": 0.47043270321238556, "grad_norm": 0.6640625, "learning_rate": 0.00018815145008853823, "loss": 4.3883, "step": 4537 }, { "epoch": 0.47053639126687363, "grad_norm": 0.6328125, "learning_rate": 0.00018814632086093234, "loss": 4.3748, "step": 4538 }, { "epoch": 0.4706400793213617, "grad_norm": 0.640625, "learning_rate": 0.000188141190593293, "loss": 4.3926, "step": 4539 }, { "epoch": 0.4707437673758498, "grad_norm": 0.66796875, "learning_rate": 0.0001881360592856808, "loss": 4.3796, "step": 4540 }, { "epoch": 0.47084745543033785, "grad_norm": 0.6171875, "learning_rate": 0.00018813092693815623, "loss": 4.357, "step": 4541 }, { "epoch": 0.4709511434848259, "grad_norm": 0.68359375, "learning_rate": 0.00018812579355077992, "loss": 4.3667, "step": 4542 }, { "epoch": 0.471054831539314, "grad_norm": 0.66015625, "learning_rate": 0.0001881206591236124, "loss": 4.3696, "step": 4543 }, { "epoch": 0.47115851959380206, "grad_norm": 0.6953125, "learning_rate": 0.00018811552365671422, "loss": 4.4268, "step": 4544 }, { "epoch": 0.47126220764829013, "grad_norm": 0.59375, "learning_rate": 0.000188110387150146, "loss": 4.372, "step": 4545 }, { "epoch": 0.4713658957027782, "grad_norm": 0.6328125, "learning_rate": 0.00018810524960396837, "loss": 4.3459, "step": 4546 }, { "epoch": 0.4714695837572663, "grad_norm": 0.6171875, "learning_rate": 0.00018810011101824193, "loss": 4.3908, "step": 4547 }, { "epoch": 0.47157327181175435, "grad_norm": 0.640625, "learning_rate": 0.0001880949713930273, "loss": 4.4018, "step": 4548 }, { "epoch": 0.4716769598662424, "grad_norm": 0.609375, "learning_rate": 0.0001880898307283851, "loss": 4.3963, "step": 4549 }, { "epoch": 0.4717806479207305, "grad_norm": 0.65625, "learning_rate": 0.00018808468902437606, "loss": 4.4004, "step": 4550 }, { "epoch": 0.47188433597521856, "grad_norm": 0.58984375, "learning_rate": 0.00018807954628106076, "loss": 4.3346, "step": 4551 }, { "epoch": 0.47198802402970663, "grad_norm": 0.70703125, "learning_rate": 0.00018807440249849996, "loss": 4.3979, "step": 4552 }, { "epoch": 0.4720917120841947, "grad_norm": 0.5859375, "learning_rate": 0.00018806925767675425, "loss": 4.3794, "step": 4553 }, { "epoch": 0.4721954001386828, "grad_norm": 0.69921875, "learning_rate": 0.00018806411181588443, "loss": 4.3476, "step": 4554 }, { "epoch": 0.47229908819317085, "grad_norm": 0.671875, "learning_rate": 0.00018805896491595116, "loss": 4.3423, "step": 4555 }, { "epoch": 0.4724027762476589, "grad_norm": 0.64453125, "learning_rate": 0.00018805381697701524, "loss": 4.3548, "step": 4556 }, { "epoch": 0.472506464302147, "grad_norm": 0.69140625, "learning_rate": 0.0001880486679991373, "loss": 4.3531, "step": 4557 }, { "epoch": 0.47261015235663506, "grad_norm": 0.58203125, "learning_rate": 0.00018804351798237817, "loss": 4.3784, "step": 4558 }, { "epoch": 0.47271384041112313, "grad_norm": 0.6640625, "learning_rate": 0.00018803836692679856, "loss": 4.3495, "step": 4559 }, { "epoch": 0.4728175284656112, "grad_norm": 0.63671875, "learning_rate": 0.00018803321483245932, "loss": 4.3597, "step": 4560 }, { "epoch": 0.4729212165200993, "grad_norm": 0.62890625, "learning_rate": 0.00018802806169942123, "loss": 4.3687, "step": 4561 }, { "epoch": 0.47302490457458735, "grad_norm": 0.64453125, "learning_rate": 0.000188022907527745, "loss": 4.3541, "step": 4562 }, { "epoch": 0.4731285926290754, "grad_norm": 0.6015625, "learning_rate": 0.00018801775231749152, "loss": 4.412, "step": 4563 }, { "epoch": 0.4732322806835635, "grad_norm": 0.71875, "learning_rate": 0.0001880125960687216, "loss": 4.359, "step": 4564 }, { "epoch": 0.47333596873805156, "grad_norm": 0.60546875, "learning_rate": 0.00018800743878149613, "loss": 4.3814, "step": 4565 }, { "epoch": 0.47343965679253963, "grad_norm": 0.703125, "learning_rate": 0.00018800228045587586, "loss": 4.3545, "step": 4566 }, { "epoch": 0.4735433448470277, "grad_norm": 0.58984375, "learning_rate": 0.0001879971210919217, "loss": 4.377, "step": 4567 }, { "epoch": 0.4736470329015158, "grad_norm": 0.69140625, "learning_rate": 0.00018799196068969453, "loss": 4.3649, "step": 4568 }, { "epoch": 0.47375072095600385, "grad_norm": 0.640625, "learning_rate": 0.00018798679924925525, "loss": 4.3682, "step": 4569 }, { "epoch": 0.4738544090104919, "grad_norm": 0.62890625, "learning_rate": 0.00018798163677066475, "loss": 4.3693, "step": 4570 }, { "epoch": 0.47395809706498, "grad_norm": 0.66015625, "learning_rate": 0.00018797647325398392, "loss": 4.3592, "step": 4571 }, { "epoch": 0.47406178511946806, "grad_norm": 0.625, "learning_rate": 0.0001879713086992737, "loss": 4.3882, "step": 4572 }, { "epoch": 0.47416547317395613, "grad_norm": 0.703125, "learning_rate": 0.00018796614310659506, "loss": 4.3977, "step": 4573 }, { "epoch": 0.4742691612284442, "grad_norm": 0.6953125, "learning_rate": 0.00018796097647600887, "loss": 4.3923, "step": 4574 }, { "epoch": 0.4743728492829323, "grad_norm": 0.70703125, "learning_rate": 0.00018795580880757618, "loss": 4.3809, "step": 4575 }, { "epoch": 0.47447653733742035, "grad_norm": 0.79296875, "learning_rate": 0.00018795064010135787, "loss": 4.403, "step": 4576 }, { "epoch": 0.4745802253919084, "grad_norm": 0.67578125, "learning_rate": 0.000187945470357415, "loss": 4.3187, "step": 4577 }, { "epoch": 0.47468391344639654, "grad_norm": 0.68359375, "learning_rate": 0.00018794029957580857, "loss": 4.3486, "step": 4578 }, { "epoch": 0.4747876015008846, "grad_norm": 0.77734375, "learning_rate": 0.0001879351277565995, "loss": 4.3955, "step": 4579 }, { "epoch": 0.4748912895553727, "grad_norm": 0.59765625, "learning_rate": 0.00018792995489984893, "loss": 4.3646, "step": 4580 }, { "epoch": 0.47499497760986076, "grad_norm": 0.80859375, "learning_rate": 0.0001879247810056178, "loss": 4.3669, "step": 4581 }, { "epoch": 0.47509866566434883, "grad_norm": 0.68359375, "learning_rate": 0.00018791960607396723, "loss": 4.3681, "step": 4582 }, { "epoch": 0.4752023537188369, "grad_norm": 0.60546875, "learning_rate": 0.0001879144301049582, "loss": 4.3619, "step": 4583 }, { "epoch": 0.47530604177332497, "grad_norm": 0.62109375, "learning_rate": 0.0001879092530986519, "loss": 4.3535, "step": 4584 }, { "epoch": 0.47540972982781304, "grad_norm": 0.6015625, "learning_rate": 0.00018790407505510929, "loss": 4.33, "step": 4585 }, { "epoch": 0.4755134178823011, "grad_norm": 0.6015625, "learning_rate": 0.00018789889597439148, "loss": 4.4277, "step": 4586 }, { "epoch": 0.4756171059367892, "grad_norm": 0.65625, "learning_rate": 0.00018789371585655964, "loss": 4.3586, "step": 4587 }, { "epoch": 0.47572079399127726, "grad_norm": 0.73046875, "learning_rate": 0.00018788853470167488, "loss": 4.3724, "step": 4588 }, { "epoch": 0.47582448204576533, "grad_norm": 0.6796875, "learning_rate": 0.00018788335250979828, "loss": 4.3605, "step": 4589 }, { "epoch": 0.4759281701002534, "grad_norm": 0.671875, "learning_rate": 0.00018787816928099102, "loss": 4.3503, "step": 4590 }, { "epoch": 0.47603185815474147, "grad_norm": 0.671875, "learning_rate": 0.00018787298501531428, "loss": 4.4126, "step": 4591 }, { "epoch": 0.47613554620922954, "grad_norm": 0.76953125, "learning_rate": 0.00018786779971282917, "loss": 4.3549, "step": 4592 }, { "epoch": 0.4762392342637176, "grad_norm": 0.60546875, "learning_rate": 0.0001878626133735969, "loss": 4.3635, "step": 4593 }, { "epoch": 0.4763429223182057, "grad_norm": 0.80078125, "learning_rate": 0.00018785742599767872, "loss": 4.3495, "step": 4594 }, { "epoch": 0.47644661037269376, "grad_norm": 0.6953125, "learning_rate": 0.00018785223758513575, "loss": 4.3772, "step": 4595 }, { "epoch": 0.47655029842718183, "grad_norm": 0.6875, "learning_rate": 0.00018784704813602925, "loss": 4.3902, "step": 4596 }, { "epoch": 0.4766539864816699, "grad_norm": 0.73046875, "learning_rate": 0.00018784185765042042, "loss": 4.3868, "step": 4597 }, { "epoch": 0.47675767453615797, "grad_norm": 0.6796875, "learning_rate": 0.00018783666612837054, "loss": 4.3831, "step": 4598 }, { "epoch": 0.47686136259064604, "grad_norm": 0.6953125, "learning_rate": 0.00018783147356994084, "loss": 4.3242, "step": 4599 }, { "epoch": 0.4769650506451341, "grad_norm": 0.66796875, "learning_rate": 0.0001878262799751926, "loss": 4.3894, "step": 4600 }, { "epoch": 0.4770687386996222, "grad_norm": 0.8125, "learning_rate": 0.00018782108534418708, "loss": 4.3856, "step": 4601 }, { "epoch": 0.47717242675411026, "grad_norm": 0.66796875, "learning_rate": 0.00018781588967698557, "loss": 4.3688, "step": 4602 }, { "epoch": 0.47727611480859833, "grad_norm": 0.73828125, "learning_rate": 0.00018781069297364944, "loss": 4.3715, "step": 4603 }, { "epoch": 0.4773798028630864, "grad_norm": 0.65234375, "learning_rate": 0.0001878054952342399, "loss": 4.3521, "step": 4604 }, { "epoch": 0.47748349091757447, "grad_norm": 0.67578125, "learning_rate": 0.00018780029645881836, "loss": 4.3793, "step": 4605 }, { "epoch": 0.47758717897206254, "grad_norm": 0.71484375, "learning_rate": 0.0001877950966474461, "loss": 4.3731, "step": 4606 }, { "epoch": 0.4776908670265506, "grad_norm": 0.66015625, "learning_rate": 0.00018778989580018455, "loss": 4.3969, "step": 4607 }, { "epoch": 0.4777945550810387, "grad_norm": 0.75390625, "learning_rate": 0.000187784693917095, "loss": 4.3838, "step": 4608 }, { "epoch": 0.47789824313552676, "grad_norm": 0.70703125, "learning_rate": 0.00018777949099823887, "loss": 4.3661, "step": 4609 }, { "epoch": 0.47800193119001483, "grad_norm": 0.734375, "learning_rate": 0.00018777428704367752, "loss": 4.3812, "step": 4610 }, { "epoch": 0.4781056192445029, "grad_norm": 0.70703125, "learning_rate": 0.00018776908205347237, "loss": 4.3726, "step": 4611 }, { "epoch": 0.47820930729899097, "grad_norm": 0.84765625, "learning_rate": 0.00018776387602768483, "loss": 4.3957, "step": 4612 }, { "epoch": 0.47831299535347904, "grad_norm": 0.640625, "learning_rate": 0.0001877586689663763, "loss": 4.3695, "step": 4613 }, { "epoch": 0.4784166834079671, "grad_norm": 0.69921875, "learning_rate": 0.00018775346086960827, "loss": 4.3326, "step": 4614 }, { "epoch": 0.4785203714624552, "grad_norm": 0.68359375, "learning_rate": 0.00018774825173744212, "loss": 4.394, "step": 4615 }, { "epoch": 0.47862405951694326, "grad_norm": 0.64453125, "learning_rate": 0.0001877430415699394, "loss": 4.3301, "step": 4616 }, { "epoch": 0.47872774757143133, "grad_norm": 0.62109375, "learning_rate": 0.00018773783036716153, "loss": 4.3479, "step": 4617 }, { "epoch": 0.4788314356259194, "grad_norm": 0.67578125, "learning_rate": 0.00018773261812916997, "loss": 4.3828, "step": 4618 }, { "epoch": 0.47893512368040747, "grad_norm": 0.625, "learning_rate": 0.00018772740485602628, "loss": 4.3921, "step": 4619 }, { "epoch": 0.47903881173489554, "grad_norm": 0.671875, "learning_rate": 0.00018772219054779193, "loss": 4.3895, "step": 4620 }, { "epoch": 0.4791424997893836, "grad_norm": 0.6484375, "learning_rate": 0.0001877169752045285, "loss": 4.3958, "step": 4621 }, { "epoch": 0.4792461878438717, "grad_norm": 0.671875, "learning_rate": 0.00018771175882629744, "loss": 4.3306, "step": 4622 }, { "epoch": 0.4793498758983598, "grad_norm": 0.68359375, "learning_rate": 0.00018770654141316037, "loss": 4.3752, "step": 4623 }, { "epoch": 0.4794535639528479, "grad_norm": 0.62890625, "learning_rate": 0.0001877013229651788, "loss": 4.3535, "step": 4624 }, { "epoch": 0.47955725200733595, "grad_norm": 0.6953125, "learning_rate": 0.00018769610348241434, "loss": 4.3461, "step": 4625 }, { "epoch": 0.479660940061824, "grad_norm": 0.62890625, "learning_rate": 0.00018769088296492854, "loss": 4.3541, "step": 4626 }, { "epoch": 0.4797646281163121, "grad_norm": 0.6640625, "learning_rate": 0.000187685661412783, "loss": 4.3587, "step": 4627 }, { "epoch": 0.47986831617080017, "grad_norm": 0.73828125, "learning_rate": 0.00018768043882603935, "loss": 4.3785, "step": 4628 }, { "epoch": 0.47997200422528824, "grad_norm": 0.6875, "learning_rate": 0.00018767521520475925, "loss": 4.3602, "step": 4629 }, { "epoch": 0.4800756922797763, "grad_norm": 0.80859375, "learning_rate": 0.00018766999054900424, "loss": 4.403, "step": 4630 }, { "epoch": 0.4801793803342644, "grad_norm": 0.77734375, "learning_rate": 0.00018766476485883603, "loss": 4.3446, "step": 4631 }, { "epoch": 0.48028306838875245, "grad_norm": 0.6953125, "learning_rate": 0.00018765953813431628, "loss": 4.3514, "step": 4632 }, { "epoch": 0.4803867564432405, "grad_norm": 0.80078125, "learning_rate": 0.00018765431037550662, "loss": 4.3735, "step": 4633 }, { "epoch": 0.4804904444977286, "grad_norm": 0.7265625, "learning_rate": 0.00018764908158246875, "loss": 4.3809, "step": 4634 }, { "epoch": 0.48059413255221667, "grad_norm": 0.76953125, "learning_rate": 0.00018764385175526436, "loss": 4.3671, "step": 4635 }, { "epoch": 0.48069782060670474, "grad_norm": 0.67578125, "learning_rate": 0.00018763862089395515, "loss": 4.3401, "step": 4636 }, { "epoch": 0.4808015086611928, "grad_norm": 0.71875, "learning_rate": 0.00018763338899860287, "loss": 4.3297, "step": 4637 }, { "epoch": 0.4809051967156809, "grad_norm": 0.73828125, "learning_rate": 0.00018762815606926926, "loss": 4.3903, "step": 4638 }, { "epoch": 0.48100888477016895, "grad_norm": 0.7578125, "learning_rate": 0.000187622922106016, "loss": 4.3983, "step": 4639 }, { "epoch": 0.481112572824657, "grad_norm": 0.66796875, "learning_rate": 0.0001876176871089049, "loss": 4.3711, "step": 4640 }, { "epoch": 0.4812162608791451, "grad_norm": 0.78515625, "learning_rate": 0.00018761245107799769, "loss": 4.3311, "step": 4641 }, { "epoch": 0.48131994893363317, "grad_norm": 0.6328125, "learning_rate": 0.0001876072140133562, "loss": 4.3567, "step": 4642 }, { "epoch": 0.48142363698812124, "grad_norm": 0.75390625, "learning_rate": 0.00018760197591504213, "loss": 4.3462, "step": 4643 }, { "epoch": 0.4815273250426093, "grad_norm": 0.75, "learning_rate": 0.0001875967367831174, "loss": 4.3802, "step": 4644 }, { "epoch": 0.4816310130970974, "grad_norm": 0.69140625, "learning_rate": 0.00018759149661764374, "loss": 4.3746, "step": 4645 }, { "epoch": 0.48173470115158545, "grad_norm": 0.7578125, "learning_rate": 0.00018758625541868303, "loss": 4.3654, "step": 4646 }, { "epoch": 0.4818383892060735, "grad_norm": 0.67578125, "learning_rate": 0.00018758101318629706, "loss": 4.3534, "step": 4647 }, { "epoch": 0.4819420772605616, "grad_norm": 0.70703125, "learning_rate": 0.00018757576992054772, "loss": 4.388, "step": 4648 }, { "epoch": 0.48204576531504967, "grad_norm": 0.796875, "learning_rate": 0.0001875705256214969, "loss": 4.3819, "step": 4649 }, { "epoch": 0.48214945336953774, "grad_norm": 0.69921875, "learning_rate": 0.00018756528028920642, "loss": 4.3552, "step": 4650 }, { "epoch": 0.4822531414240258, "grad_norm": 0.640625, "learning_rate": 0.00018756003392373817, "loss": 4.394, "step": 4651 }, { "epoch": 0.4823568294785139, "grad_norm": 0.7421875, "learning_rate": 0.00018755478652515407, "loss": 4.3123, "step": 4652 }, { "epoch": 0.48246051753300195, "grad_norm": 0.6015625, "learning_rate": 0.00018754953809351608, "loss": 4.3751, "step": 4653 }, { "epoch": 0.48256420558749, "grad_norm": 0.70703125, "learning_rate": 0.00018754428862888606, "loss": 4.3575, "step": 4654 }, { "epoch": 0.4826678936419781, "grad_norm": 0.65625, "learning_rate": 0.000187539038131326, "loss": 4.3291, "step": 4655 }, { "epoch": 0.48277158169646617, "grad_norm": 0.64453125, "learning_rate": 0.0001875337866008978, "loss": 4.3593, "step": 4656 }, { "epoch": 0.48287526975095424, "grad_norm": 0.69140625, "learning_rate": 0.00018752853403766344, "loss": 4.3647, "step": 4657 }, { "epoch": 0.4829789578054423, "grad_norm": 0.63671875, "learning_rate": 0.00018752328044168492, "loss": 4.3949, "step": 4658 }, { "epoch": 0.4830826458599304, "grad_norm": 0.66796875, "learning_rate": 0.0001875180258130242, "loss": 4.3514, "step": 4659 }, { "epoch": 0.48318633391441845, "grad_norm": 0.68359375, "learning_rate": 0.00018751277015174327, "loss": 4.3909, "step": 4660 }, { "epoch": 0.4832900219689065, "grad_norm": 0.6796875, "learning_rate": 0.00018750751345790416, "loss": 4.3897, "step": 4661 }, { "epoch": 0.4833937100233946, "grad_norm": 0.6796875, "learning_rate": 0.00018750225573156893, "loss": 4.3775, "step": 4662 }, { "epoch": 0.48349739807788267, "grad_norm": 0.62109375, "learning_rate": 0.00018749699697279953, "loss": 4.3591, "step": 4663 }, { "epoch": 0.48360108613237074, "grad_norm": 0.69921875, "learning_rate": 0.00018749173718165805, "loss": 4.3801, "step": 4664 }, { "epoch": 0.4837047741868588, "grad_norm": 0.625, "learning_rate": 0.00018748647635820657, "loss": 4.3897, "step": 4665 }, { "epoch": 0.4838084622413469, "grad_norm": 0.6640625, "learning_rate": 0.00018748121450250715, "loss": 4.3256, "step": 4666 }, { "epoch": 0.48391215029583495, "grad_norm": 0.63671875, "learning_rate": 0.0001874759516146219, "loss": 4.3901, "step": 4667 }, { "epoch": 0.4840158383503231, "grad_norm": 0.59375, "learning_rate": 0.00018747068769461284, "loss": 4.3546, "step": 4668 }, { "epoch": 0.48411952640481115, "grad_norm": 0.65234375, "learning_rate": 0.00018746542274254214, "loss": 4.3735, "step": 4669 }, { "epoch": 0.4842232144592992, "grad_norm": 0.609375, "learning_rate": 0.0001874601567584719, "loss": 4.3351, "step": 4670 }, { "epoch": 0.4843269025137873, "grad_norm": 0.6484375, "learning_rate": 0.00018745488974246431, "loss": 4.3622, "step": 4671 }, { "epoch": 0.48443059056827537, "grad_norm": 0.62109375, "learning_rate": 0.0001874496216945814, "loss": 4.3652, "step": 4672 }, { "epoch": 0.48453427862276344, "grad_norm": 0.5703125, "learning_rate": 0.00018744435261488541, "loss": 4.3184, "step": 4673 }, { "epoch": 0.4846379666772515, "grad_norm": 0.6953125, "learning_rate": 0.00018743908250343848, "loss": 4.3506, "step": 4674 }, { "epoch": 0.4847416547317396, "grad_norm": 0.5703125, "learning_rate": 0.00018743381136030284, "loss": 4.372, "step": 4675 }, { "epoch": 0.48484534278622765, "grad_norm": 0.59765625, "learning_rate": 0.00018742853918554065, "loss": 4.3609, "step": 4676 }, { "epoch": 0.4849490308407157, "grad_norm": 0.55859375, "learning_rate": 0.00018742326597921406, "loss": 4.3369, "step": 4677 }, { "epoch": 0.4850527188952038, "grad_norm": 0.6484375, "learning_rate": 0.00018741799174138538, "loss": 4.425, "step": 4678 }, { "epoch": 0.48515640694969187, "grad_norm": 0.69140625, "learning_rate": 0.0001874127164721168, "loss": 4.3741, "step": 4679 }, { "epoch": 0.48526009500417994, "grad_norm": 0.59765625, "learning_rate": 0.00018740744017147056, "loss": 4.3473, "step": 4680 }, { "epoch": 0.485363783058668, "grad_norm": 0.71484375, "learning_rate": 0.00018740216283950895, "loss": 4.3419, "step": 4681 }, { "epoch": 0.4854674711131561, "grad_norm": 0.58203125, "learning_rate": 0.00018739688447629415, "loss": 4.3533, "step": 4682 }, { "epoch": 0.48557115916764415, "grad_norm": 0.73828125, "learning_rate": 0.00018739160508188853, "loss": 4.3423, "step": 4683 }, { "epoch": 0.4856748472221322, "grad_norm": 0.65625, "learning_rate": 0.00018738632465635434, "loss": 4.2913, "step": 4684 }, { "epoch": 0.4857785352766203, "grad_norm": 0.71484375, "learning_rate": 0.0001873810431997539, "loss": 4.3524, "step": 4685 }, { "epoch": 0.48588222333110836, "grad_norm": 0.796875, "learning_rate": 0.00018737576071214948, "loss": 4.377, "step": 4686 }, { "epoch": 0.48598591138559644, "grad_norm": 0.76171875, "learning_rate": 0.00018737047719360347, "loss": 4.3887, "step": 4687 }, { "epoch": 0.4860895994400845, "grad_norm": 0.75, "learning_rate": 0.00018736519264417822, "loss": 4.4016, "step": 4688 }, { "epoch": 0.4861932874945726, "grad_norm": 0.8359375, "learning_rate": 0.00018735990706393599, "loss": 4.3848, "step": 4689 }, { "epoch": 0.48629697554906065, "grad_norm": 0.75390625, "learning_rate": 0.00018735462045293923, "loss": 4.3491, "step": 4690 }, { "epoch": 0.4864006636035487, "grad_norm": 0.7421875, "learning_rate": 0.00018734933281125028, "loss": 4.3585, "step": 4691 }, { "epoch": 0.4865043516580368, "grad_norm": 0.64453125, "learning_rate": 0.0001873440441389315, "loss": 4.3719, "step": 4692 }, { "epoch": 0.48660803971252486, "grad_norm": 0.7734375, "learning_rate": 0.00018733875443604538, "loss": 4.3889, "step": 4693 }, { "epoch": 0.48671172776701294, "grad_norm": 0.66796875, "learning_rate": 0.00018733346370265427, "loss": 4.382, "step": 4694 }, { "epoch": 0.486815415821501, "grad_norm": 0.67578125, "learning_rate": 0.00018732817193882058, "loss": 4.3575, "step": 4695 }, { "epoch": 0.4869191038759891, "grad_norm": 0.73046875, "learning_rate": 0.00018732287914460677, "loss": 4.3879, "step": 4696 }, { "epoch": 0.48702279193047715, "grad_norm": 0.6796875, "learning_rate": 0.0001873175853200753, "loss": 4.3682, "step": 4697 }, { "epoch": 0.4871264799849652, "grad_norm": 0.69140625, "learning_rate": 0.00018731229046528865, "loss": 4.3852, "step": 4698 }, { "epoch": 0.4872301680394533, "grad_norm": 0.72265625, "learning_rate": 0.0001873069945803092, "loss": 4.3816, "step": 4699 }, { "epoch": 0.48733385609394136, "grad_norm": 0.63671875, "learning_rate": 0.00018730169766519955, "loss": 4.3754, "step": 4700 }, { "epoch": 0.48743754414842944, "grad_norm": 0.76171875, "learning_rate": 0.00018729639972002215, "loss": 4.3726, "step": 4701 }, { "epoch": 0.4875412322029175, "grad_norm": 0.67578125, "learning_rate": 0.00018729110074483952, "loss": 4.3491, "step": 4702 }, { "epoch": 0.4876449202574056, "grad_norm": 0.7421875, "learning_rate": 0.00018728580073971413, "loss": 4.4031, "step": 4703 }, { "epoch": 0.48774860831189365, "grad_norm": 0.62890625, "learning_rate": 0.00018728049970470858, "loss": 4.3821, "step": 4704 }, { "epoch": 0.4878522963663817, "grad_norm": 0.6796875, "learning_rate": 0.0001872751976398854, "loss": 4.3892, "step": 4705 }, { "epoch": 0.4879559844208698, "grad_norm": 0.69140625, "learning_rate": 0.00018726989454530713, "loss": 4.3552, "step": 4706 }, { "epoch": 0.48805967247535786, "grad_norm": 0.625, "learning_rate": 0.00018726459042103634, "loss": 4.3492, "step": 4707 }, { "epoch": 0.48816336052984594, "grad_norm": 0.73046875, "learning_rate": 0.00018725928526713566, "loss": 4.3485, "step": 4708 }, { "epoch": 0.488267048584334, "grad_norm": 0.58984375, "learning_rate": 0.00018725397908366762, "loss": 4.3352, "step": 4709 }, { "epoch": 0.4883707366388221, "grad_norm": 0.71484375, "learning_rate": 0.00018724867187069487, "loss": 4.3679, "step": 4710 }, { "epoch": 0.48847442469331015, "grad_norm": 0.62890625, "learning_rate": 0.00018724336362828002, "loss": 4.3396, "step": 4711 }, { "epoch": 0.4885781127477983, "grad_norm": 0.64453125, "learning_rate": 0.0001872380543564857, "loss": 4.3776, "step": 4712 }, { "epoch": 0.48868180080228635, "grad_norm": 0.87109375, "learning_rate": 0.00018723274405537454, "loss": 4.3836, "step": 4713 }, { "epoch": 0.4887854888567744, "grad_norm": 0.66796875, "learning_rate": 0.00018722743272500921, "loss": 4.352, "step": 4714 }, { "epoch": 0.4888891769112625, "grad_norm": 0.70703125, "learning_rate": 0.0001872221203654524, "loss": 4.3951, "step": 4715 }, { "epoch": 0.48899286496575056, "grad_norm": 0.6953125, "learning_rate": 0.00018721680697676675, "loss": 4.4221, "step": 4716 }, { "epoch": 0.48909655302023863, "grad_norm": 0.6171875, "learning_rate": 0.00018721149255901499, "loss": 4.3905, "step": 4717 }, { "epoch": 0.4892002410747267, "grad_norm": 0.7109375, "learning_rate": 0.0001872061771122598, "loss": 4.3255, "step": 4718 }, { "epoch": 0.4893039291292148, "grad_norm": 0.6484375, "learning_rate": 0.00018720086063656388, "loss": 4.371, "step": 4719 }, { "epoch": 0.48940761718370285, "grad_norm": 0.62109375, "learning_rate": 0.00018719554313198996, "loss": 4.3986, "step": 4720 }, { "epoch": 0.4895113052381909, "grad_norm": 0.64453125, "learning_rate": 0.00018719022459860084, "loss": 4.4115, "step": 4721 }, { "epoch": 0.489614993292679, "grad_norm": 0.640625, "learning_rate": 0.00018718490503645923, "loss": 4.3245, "step": 4722 }, { "epoch": 0.48971868134716706, "grad_norm": 0.63671875, "learning_rate": 0.0001871795844456279, "loss": 4.4211, "step": 4723 }, { "epoch": 0.48982236940165513, "grad_norm": 0.671875, "learning_rate": 0.0001871742628261696, "loss": 4.407, "step": 4724 }, { "epoch": 0.4899260574561432, "grad_norm": 0.609375, "learning_rate": 0.00018716894017814718, "loss": 4.3849, "step": 4725 }, { "epoch": 0.4900297455106313, "grad_norm": 0.6796875, "learning_rate": 0.00018716361650162336, "loss": 4.3758, "step": 4726 }, { "epoch": 0.49013343356511935, "grad_norm": 0.703125, "learning_rate": 0.00018715829179666104, "loss": 4.3135, "step": 4727 }, { "epoch": 0.4902371216196074, "grad_norm": 0.6640625, "learning_rate": 0.00018715296606332303, "loss": 4.427, "step": 4728 }, { "epoch": 0.4903408096740955, "grad_norm": 0.7578125, "learning_rate": 0.0001871476393016721, "loss": 4.3769, "step": 4729 }, { "epoch": 0.49044449772858356, "grad_norm": 0.63671875, "learning_rate": 0.00018714231151177116, "loss": 4.3326, "step": 4730 }, { "epoch": 0.49054818578307163, "grad_norm": 0.7734375, "learning_rate": 0.00018713698269368306, "loss": 4.3383, "step": 4731 }, { "epoch": 0.4906518738375597, "grad_norm": 0.71875, "learning_rate": 0.00018713165284747069, "loss": 4.39, "step": 4732 }, { "epoch": 0.4907555618920478, "grad_norm": 0.7265625, "learning_rate": 0.00018712632197319688, "loss": 4.38, "step": 4733 }, { "epoch": 0.49085924994653585, "grad_norm": 0.72265625, "learning_rate": 0.0001871209900709246, "loss": 4.359, "step": 4734 }, { "epoch": 0.4909629380010239, "grad_norm": 0.81640625, "learning_rate": 0.0001871156571407167, "loss": 4.3808, "step": 4735 }, { "epoch": 0.491066626055512, "grad_norm": 0.765625, "learning_rate": 0.00018711032318263616, "loss": 4.3714, "step": 4736 }, { "epoch": 0.49117031411000006, "grad_norm": 0.80078125, "learning_rate": 0.00018710498819674584, "loss": 4.3578, "step": 4737 }, { "epoch": 0.49127400216448813, "grad_norm": 0.7734375, "learning_rate": 0.0001870996521831088, "loss": 4.3482, "step": 4738 }, { "epoch": 0.4913776902189762, "grad_norm": 0.78515625, "learning_rate": 0.0001870943151417879, "loss": 4.4213, "step": 4739 }, { "epoch": 0.4914813782734643, "grad_norm": 0.77734375, "learning_rate": 0.00018708897707284613, "loss": 4.3695, "step": 4740 }, { "epoch": 0.49158506632795235, "grad_norm": 0.75, "learning_rate": 0.0001870836379763465, "loss": 4.3712, "step": 4741 }, { "epoch": 0.4916887543824404, "grad_norm": 0.73828125, "learning_rate": 0.00018707829785235197, "loss": 4.3614, "step": 4742 }, { "epoch": 0.4917924424369285, "grad_norm": 0.80078125, "learning_rate": 0.0001870729567009256, "loss": 4.3272, "step": 4743 }, { "epoch": 0.49189613049141656, "grad_norm": 0.75, "learning_rate": 0.00018706761452213038, "loss": 4.3603, "step": 4744 }, { "epoch": 0.49199981854590463, "grad_norm": 0.74609375, "learning_rate": 0.00018706227131602934, "loss": 4.384, "step": 4745 }, { "epoch": 0.4921035066003927, "grad_norm": 0.76171875, "learning_rate": 0.00018705692708268553, "loss": 4.3522, "step": 4746 }, { "epoch": 0.4922071946548808, "grad_norm": 0.7421875, "learning_rate": 0.000187051581822162, "loss": 4.3933, "step": 4747 }, { "epoch": 0.49231088270936885, "grad_norm": 0.81640625, "learning_rate": 0.0001870462355345218, "loss": 4.3596, "step": 4748 }, { "epoch": 0.4924145707638569, "grad_norm": 0.8125, "learning_rate": 0.00018704088821982806, "loss": 4.3722, "step": 4749 }, { "epoch": 0.492518258818345, "grad_norm": 0.87890625, "learning_rate": 0.00018703553987814382, "loss": 4.388, "step": 4750 }, { "epoch": 0.49262194687283306, "grad_norm": 0.80859375, "learning_rate": 0.00018703019050953223, "loss": 4.3509, "step": 4751 }, { "epoch": 0.49272563492732113, "grad_norm": 0.84375, "learning_rate": 0.0001870248401140564, "loss": 4.3761, "step": 4752 }, { "epoch": 0.4928293229818092, "grad_norm": 0.84765625, "learning_rate": 0.00018701948869177942, "loss": 4.3907, "step": 4753 }, { "epoch": 0.4929330110362973, "grad_norm": 0.86328125, "learning_rate": 0.00018701413624276446, "loss": 4.3387, "step": 4754 }, { "epoch": 0.49303669909078535, "grad_norm": 0.83984375, "learning_rate": 0.0001870087827670747, "loss": 4.3632, "step": 4755 }, { "epoch": 0.4931403871452734, "grad_norm": 0.72265625, "learning_rate": 0.00018700342826477324, "loss": 4.3249, "step": 4756 }, { "epoch": 0.49324407519976154, "grad_norm": 0.84765625, "learning_rate": 0.0001869980727359233, "loss": 4.3865, "step": 4757 }, { "epoch": 0.4933477632542496, "grad_norm": 0.6796875, "learning_rate": 0.0001869927161805881, "loss": 4.3384, "step": 4758 }, { "epoch": 0.4934514513087377, "grad_norm": 0.86328125, "learning_rate": 0.00018698735859883076, "loss": 4.3589, "step": 4759 }, { "epoch": 0.49355513936322576, "grad_norm": 0.8046875, "learning_rate": 0.00018698199999071455, "loss": 4.388, "step": 4760 }, { "epoch": 0.49365882741771383, "grad_norm": 0.81640625, "learning_rate": 0.0001869766403563027, "loss": 4.3556, "step": 4761 }, { "epoch": 0.4937625154722019, "grad_norm": 0.8828125, "learning_rate": 0.0001869712796956584, "loss": 4.3647, "step": 4762 }, { "epoch": 0.49386620352669, "grad_norm": 0.8125, "learning_rate": 0.00018696591800884495, "loss": 4.3689, "step": 4763 }, { "epoch": 0.49396989158117804, "grad_norm": 0.80859375, "learning_rate": 0.00018696055529592557, "loss": 4.3974, "step": 4764 }, { "epoch": 0.4940735796356661, "grad_norm": 0.75390625, "learning_rate": 0.0001869551915569636, "loss": 4.359, "step": 4765 }, { "epoch": 0.4941772676901542, "grad_norm": 0.75, "learning_rate": 0.00018694982679202227, "loss": 4.3794, "step": 4766 }, { "epoch": 0.49428095574464226, "grad_norm": 0.74609375, "learning_rate": 0.0001869444610011649, "loss": 4.3763, "step": 4767 }, { "epoch": 0.49438464379913033, "grad_norm": 0.79296875, "learning_rate": 0.00018693909418445478, "loss": 4.3788, "step": 4768 }, { "epoch": 0.4944883318536184, "grad_norm": 0.62890625, "learning_rate": 0.00018693372634195527, "loss": 4.3416, "step": 4769 }, { "epoch": 0.4945920199081065, "grad_norm": 0.78125, "learning_rate": 0.00018692835747372965, "loss": 4.3486, "step": 4770 }, { "epoch": 0.49469570796259454, "grad_norm": 0.640625, "learning_rate": 0.00018692298757984132, "loss": 4.3836, "step": 4771 }, { "epoch": 0.4947993960170826, "grad_norm": 0.89453125, "learning_rate": 0.0001869176166603536, "loss": 4.3463, "step": 4772 }, { "epoch": 0.4949030840715707, "grad_norm": 0.66796875, "learning_rate": 0.00018691224471532988, "loss": 4.3993, "step": 4773 }, { "epoch": 0.49500677212605876, "grad_norm": 0.78125, "learning_rate": 0.0001869068717448336, "loss": 4.35, "step": 4774 }, { "epoch": 0.49511046018054683, "grad_norm": 0.77734375, "learning_rate": 0.00018690149774892802, "loss": 4.3844, "step": 4775 }, { "epoch": 0.4952141482350349, "grad_norm": 0.75, "learning_rate": 0.00018689612272767666, "loss": 4.3631, "step": 4776 }, { "epoch": 0.495317836289523, "grad_norm": 0.67578125, "learning_rate": 0.00018689074668114294, "loss": 4.3669, "step": 4777 }, { "epoch": 0.49542152434401104, "grad_norm": 0.80078125, "learning_rate": 0.00018688536960939018, "loss": 4.3282, "step": 4778 }, { "epoch": 0.4955252123984991, "grad_norm": 0.61328125, "learning_rate": 0.00018687999151248197, "loss": 4.3554, "step": 4779 }, { "epoch": 0.4956289004529872, "grad_norm": 0.7890625, "learning_rate": 0.00018687461239048165, "loss": 4.3386, "step": 4780 }, { "epoch": 0.49573258850747526, "grad_norm": 0.69921875, "learning_rate": 0.00018686923224345273, "loss": 4.3446, "step": 4781 }, { "epoch": 0.49583627656196333, "grad_norm": 0.6796875, "learning_rate": 0.00018686385107145875, "loss": 4.3566, "step": 4782 }, { "epoch": 0.4959399646164514, "grad_norm": 0.640625, "learning_rate": 0.0001868584688745631, "loss": 4.3457, "step": 4783 }, { "epoch": 0.49604365267093947, "grad_norm": 0.640625, "learning_rate": 0.00018685308565282935, "loss": 4.3423, "step": 4784 }, { "epoch": 0.49614734072542754, "grad_norm": 0.78515625, "learning_rate": 0.00018684770140632099, "loss": 4.3752, "step": 4785 }, { "epoch": 0.4962510287799156, "grad_norm": 0.78515625, "learning_rate": 0.00018684231613510152, "loss": 4.4074, "step": 4786 }, { "epoch": 0.4963547168344037, "grad_norm": 0.7109375, "learning_rate": 0.00018683692983923456, "loss": 4.3825, "step": 4787 }, { "epoch": 0.49645840488889176, "grad_norm": 0.71875, "learning_rate": 0.0001868315425187836, "loss": 4.3474, "step": 4788 }, { "epoch": 0.49656209294337983, "grad_norm": 0.734375, "learning_rate": 0.00018682615417381224, "loss": 4.3531, "step": 4789 }, { "epoch": 0.4966657809978679, "grad_norm": 0.796875, "learning_rate": 0.00018682076480438405, "loss": 4.3531, "step": 4790 }, { "epoch": 0.49676946905235597, "grad_norm": 0.6796875, "learning_rate": 0.0001868153744105626, "loss": 4.3875, "step": 4791 }, { "epoch": 0.49687315710684404, "grad_norm": 0.75, "learning_rate": 0.00018680998299241147, "loss": 4.3805, "step": 4792 }, { "epoch": 0.4969768451613321, "grad_norm": 0.74609375, "learning_rate": 0.00018680459054999432, "loss": 4.3138, "step": 4793 }, { "epoch": 0.4970805332158202, "grad_norm": 0.68359375, "learning_rate": 0.00018679919708337476, "loss": 4.3827, "step": 4794 }, { "epoch": 0.49718422127030826, "grad_norm": 0.765625, "learning_rate": 0.00018679380259261646, "loss": 4.3639, "step": 4795 }, { "epoch": 0.49728790932479633, "grad_norm": 0.7890625, "learning_rate": 0.00018678840707778298, "loss": 4.391, "step": 4796 }, { "epoch": 0.4973915973792844, "grad_norm": 0.66796875, "learning_rate": 0.00018678301053893804, "loss": 4.3497, "step": 4797 }, { "epoch": 0.49749528543377247, "grad_norm": 0.83203125, "learning_rate": 0.00018677761297614532, "loss": 4.4161, "step": 4798 }, { "epoch": 0.49759897348826054, "grad_norm": 0.671875, "learning_rate": 0.00018677221438946853, "loss": 4.3588, "step": 4799 }, { "epoch": 0.4977026615427486, "grad_norm": 0.79296875, "learning_rate": 0.0001867668147789713, "loss": 4.3636, "step": 4800 }, { "epoch": 0.4978063495972367, "grad_norm": 0.69921875, "learning_rate": 0.00018676141414471738, "loss": 4.3342, "step": 4801 }, { "epoch": 0.4979100376517248, "grad_norm": 0.8515625, "learning_rate": 0.0001867560124867705, "loss": 4.3903, "step": 4802 }, { "epoch": 0.4980137257062129, "grad_norm": 0.67578125, "learning_rate": 0.0001867506098051943, "loss": 4.3743, "step": 4803 }, { "epoch": 0.49811741376070096, "grad_norm": 0.84765625, "learning_rate": 0.0001867452061000527, "loss": 4.3445, "step": 4804 }, { "epoch": 0.498221101815189, "grad_norm": 0.6953125, "learning_rate": 0.0001867398013714093, "loss": 4.3474, "step": 4805 }, { "epoch": 0.4983247898696771, "grad_norm": 0.84765625, "learning_rate": 0.00018673439561932797, "loss": 4.3979, "step": 4806 }, { "epoch": 0.49842847792416517, "grad_norm": 0.6328125, "learning_rate": 0.00018672898884387244, "loss": 4.3501, "step": 4807 }, { "epoch": 0.49853216597865324, "grad_norm": 0.94140625, "learning_rate": 0.0001867235810451065, "loss": 4.4264, "step": 4808 }, { "epoch": 0.4986358540331413, "grad_norm": 0.71875, "learning_rate": 0.000186718172223094, "loss": 4.4047, "step": 4809 }, { "epoch": 0.4987395420876294, "grad_norm": 0.734375, "learning_rate": 0.00018671276237789872, "loss": 4.3913, "step": 4810 }, { "epoch": 0.49884323014211746, "grad_norm": 0.85546875, "learning_rate": 0.00018670735150958453, "loss": 4.3599, "step": 4811 }, { "epoch": 0.4989469181966055, "grad_norm": 0.84765625, "learning_rate": 0.0001867019396182152, "loss": 4.3785, "step": 4812 }, { "epoch": 0.4990506062510936, "grad_norm": 0.859375, "learning_rate": 0.00018669652670385463, "loss": 4.3586, "step": 4813 }, { "epoch": 0.49915429430558167, "grad_norm": 0.77734375, "learning_rate": 0.00018669111276656673, "loss": 4.3503, "step": 4814 }, { "epoch": 0.49925798236006974, "grad_norm": 0.97265625, "learning_rate": 0.00018668569780641532, "loss": 4.3362, "step": 4815 }, { "epoch": 0.4993616704145578, "grad_norm": 0.7578125, "learning_rate": 0.0001866802818234643, "loss": 4.3846, "step": 4816 }, { "epoch": 0.4994653584690459, "grad_norm": 0.88671875, "learning_rate": 0.0001866748648177776, "loss": 4.3617, "step": 4817 }, { "epoch": 0.49956904652353396, "grad_norm": 0.81640625, "learning_rate": 0.00018666944678941909, "loss": 4.3697, "step": 4818 }, { "epoch": 0.499672734578022, "grad_norm": 0.8125, "learning_rate": 0.00018666402773845272, "loss": 4.3483, "step": 4819 }, { "epoch": 0.4997764226325101, "grad_norm": 0.78515625, "learning_rate": 0.00018665860766494245, "loss": 4.399, "step": 4820 }, { "epoch": 0.49988011068699817, "grad_norm": 0.92578125, "learning_rate": 0.00018665318656895219, "loss": 4.4077, "step": 4821 }, { "epoch": 0.49998379874148624, "grad_norm": 0.984375, "learning_rate": 0.00018664776445054596, "loss": 4.4045, "step": 4822 }, { "epoch": 0.49998379874148624, "eval_loss": 4.3779425621032715, "eval_runtime": 0.4398, "eval_samples_per_second": 338.822, "eval_steps_per_second": 15.918, "step": 4822 }, { "epoch": 0.5000874867959743, "grad_norm": 0.73828125, "learning_rate": 0.00018664234130978766, "loss": 4.3838, "step": 4823 }, { "epoch": 0.5001911748504624, "grad_norm": 1.0, "learning_rate": 0.00018663691714674132, "loss": 4.3242, "step": 4824 }, { "epoch": 0.5002948629049505, "grad_norm": 0.82421875, "learning_rate": 0.00018663149196147095, "loss": 4.3391, "step": 4825 }, { "epoch": 0.5003985509594385, "grad_norm": 0.984375, "learning_rate": 0.00018662606575404054, "loss": 4.3059, "step": 4826 }, { "epoch": 0.5005022390139267, "grad_norm": 0.9765625, "learning_rate": 0.00018662063852451413, "loss": 4.3825, "step": 4827 }, { "epoch": 0.5006059270684147, "grad_norm": 0.77734375, "learning_rate": 0.00018661521027295573, "loss": 4.3923, "step": 4828 }, { "epoch": 0.5007096151229028, "grad_norm": 0.9921875, "learning_rate": 0.00018660978099942945, "loss": 4.3616, "step": 4829 }, { "epoch": 0.5008133031773908, "grad_norm": 0.71875, "learning_rate": 0.00018660435070399925, "loss": 4.3542, "step": 4830 }, { "epoch": 0.5009169912318789, "grad_norm": 0.94140625, "learning_rate": 0.0001865989193867293, "loss": 4.4224, "step": 4831 }, { "epoch": 0.501020679286367, "grad_norm": 0.73046875, "learning_rate": 0.00018659348704768363, "loss": 4.3345, "step": 4832 }, { "epoch": 0.5011243673408551, "grad_norm": 0.8515625, "learning_rate": 0.00018658805368692636, "loss": 4.3521, "step": 4833 }, { "epoch": 0.5012280553953431, "grad_norm": 0.7421875, "learning_rate": 0.00018658261930452153, "loss": 4.4144, "step": 4834 }, { "epoch": 0.5013317434498312, "grad_norm": 0.79296875, "learning_rate": 0.00018657718390053336, "loss": 4.378, "step": 4835 }, { "epoch": 0.5014354315043192, "grad_norm": 0.703125, "learning_rate": 0.00018657174747502593, "loss": 4.318, "step": 4836 }, { "epoch": 0.5015391195588074, "grad_norm": 0.69921875, "learning_rate": 0.00018656631002806338, "loss": 4.3585, "step": 4837 }, { "epoch": 0.5016428076132954, "grad_norm": 0.7734375, "learning_rate": 0.0001865608715597099, "loss": 4.3919, "step": 4838 }, { "epoch": 0.5017464956677835, "grad_norm": 0.69921875, "learning_rate": 0.00018655543207002958, "loss": 4.3366, "step": 4839 }, { "epoch": 0.5018501837222715, "grad_norm": 0.69140625, "learning_rate": 0.0001865499915590867, "loss": 4.3427, "step": 4840 }, { "epoch": 0.5019538717767597, "grad_norm": 0.6796875, "learning_rate": 0.0001865445500269454, "loss": 4.3433, "step": 4841 }, { "epoch": 0.5020575598312477, "grad_norm": 0.69140625, "learning_rate": 0.00018653910747366988, "loss": 4.393, "step": 4842 }, { "epoch": 0.5021612478857358, "grad_norm": 0.66015625, "learning_rate": 0.0001865336638993244, "loss": 4.3742, "step": 4843 }, { "epoch": 0.5022649359402238, "grad_norm": 0.64453125, "learning_rate": 0.0001865282193039731, "loss": 4.3722, "step": 4844 }, { "epoch": 0.5023686239947119, "grad_norm": 0.65234375, "learning_rate": 0.00018652277368768033, "loss": 4.3399, "step": 4845 }, { "epoch": 0.5024723120492, "grad_norm": 0.6171875, "learning_rate": 0.00018651732705051024, "loss": 4.3586, "step": 4846 }, { "epoch": 0.5025760001036881, "grad_norm": 0.62890625, "learning_rate": 0.00018651187939252716, "loss": 4.37, "step": 4847 }, { "epoch": 0.5026796881581761, "grad_norm": 0.6171875, "learning_rate": 0.00018650643071379538, "loss": 4.3741, "step": 4848 }, { "epoch": 0.5027833762126642, "grad_norm": 0.6328125, "learning_rate": 0.00018650098101437914, "loss": 4.3927, "step": 4849 }, { "epoch": 0.5028870642671522, "grad_norm": 0.640625, "learning_rate": 0.00018649553029434274, "loss": 4.3703, "step": 4850 }, { "epoch": 0.5029907523216404, "grad_norm": 0.68359375, "learning_rate": 0.00018649007855375054, "loss": 4.3834, "step": 4851 }, { "epoch": 0.5030944403761284, "grad_norm": 0.76171875, "learning_rate": 0.00018648462579266684, "loss": 4.3607, "step": 4852 }, { "epoch": 0.5031981284306165, "grad_norm": 0.6484375, "learning_rate": 0.00018647917201115597, "loss": 4.3626, "step": 4853 }, { "epoch": 0.5033018164851045, "grad_norm": 0.69921875, "learning_rate": 0.00018647371720928227, "loss": 4.3501, "step": 4854 }, { "epoch": 0.5034055045395927, "grad_norm": 0.8203125, "learning_rate": 0.0001864682613871101, "loss": 4.2801, "step": 4855 }, { "epoch": 0.5035091925940807, "grad_norm": 0.671875, "learning_rate": 0.0001864628045447039, "loss": 4.3684, "step": 4856 }, { "epoch": 0.5036128806485688, "grad_norm": 0.81640625, "learning_rate": 0.00018645734668212795, "loss": 4.3729, "step": 4857 }, { "epoch": 0.5037165687030568, "grad_norm": 0.73046875, "learning_rate": 0.00018645188779944672, "loss": 4.3496, "step": 4858 }, { "epoch": 0.5038202567575449, "grad_norm": 0.91796875, "learning_rate": 0.00018644642789672456, "loss": 4.3412, "step": 4859 }, { "epoch": 0.503923944812033, "grad_norm": 0.703125, "learning_rate": 0.00018644096697402598, "loss": 4.3277, "step": 4860 }, { "epoch": 0.5040276328665211, "grad_norm": 1.109375, "learning_rate": 0.00018643550503141533, "loss": 4.412, "step": 4861 }, { "epoch": 0.5041313209210091, "grad_norm": 0.71484375, "learning_rate": 0.0001864300420689571, "loss": 4.3538, "step": 4862 }, { "epoch": 0.5042350089754972, "grad_norm": 0.8359375, "learning_rate": 0.00018642457808671573, "loss": 4.3333, "step": 4863 }, { "epoch": 0.5043386970299852, "grad_norm": 0.9765625, "learning_rate": 0.00018641911308475573, "loss": 4.3947, "step": 4864 }, { "epoch": 0.5044423850844734, "grad_norm": 0.7265625, "learning_rate": 0.00018641364706314148, "loss": 4.3542, "step": 4865 }, { "epoch": 0.5045460731389614, "grad_norm": 0.91015625, "learning_rate": 0.0001864081800219376, "loss": 4.3132, "step": 4866 }, { "epoch": 0.5046497611934495, "grad_norm": 0.80859375, "learning_rate": 0.00018640271196120848, "loss": 4.3881, "step": 4867 }, { "epoch": 0.5047534492479375, "grad_norm": 0.69140625, "learning_rate": 0.0001863972428810187, "loss": 4.3479, "step": 4868 }, { "epoch": 0.5048571373024257, "grad_norm": 0.91796875, "learning_rate": 0.0001863917727814328, "loss": 4.3538, "step": 4869 }, { "epoch": 0.5049608253569138, "grad_norm": 0.66015625, "learning_rate": 0.0001863863016625153, "loss": 4.4002, "step": 4870 }, { "epoch": 0.5050645134114018, "grad_norm": 0.9609375, "learning_rate": 0.00018638082952433072, "loss": 4.3441, "step": 4871 }, { "epoch": 0.5051682014658899, "grad_norm": 0.68359375, "learning_rate": 0.0001863753563669437, "loss": 4.3473, "step": 4872 }, { "epoch": 0.5052718895203779, "grad_norm": 1.0234375, "learning_rate": 0.00018636988219041878, "loss": 4.3742, "step": 4873 }, { "epoch": 0.5053755775748661, "grad_norm": 0.91796875, "learning_rate": 0.00018636440699482053, "loss": 4.3611, "step": 4874 }, { "epoch": 0.5054792656293541, "grad_norm": 0.796875, "learning_rate": 0.00018635893078021356, "loss": 4.3787, "step": 4875 }, { "epoch": 0.5055829536838422, "grad_norm": 0.90625, "learning_rate": 0.00018635345354666252, "loss": 4.3462, "step": 4876 }, { "epoch": 0.5056866417383302, "grad_norm": 0.7734375, "learning_rate": 0.000186347975294232, "loss": 4.3807, "step": 4877 }, { "epoch": 0.5057903297928183, "grad_norm": 0.8515625, "learning_rate": 0.0001863424960229866, "loss": 4.3645, "step": 4878 }, { "epoch": 0.5058940178473064, "grad_norm": 0.85546875, "learning_rate": 0.00018633701573299107, "loss": 4.3672, "step": 4879 }, { "epoch": 0.5059977059017945, "grad_norm": 0.765625, "learning_rate": 0.00018633153442430998, "loss": 4.3876, "step": 4880 }, { "epoch": 0.5061013939562825, "grad_norm": 0.67578125, "learning_rate": 0.00018632605209700808, "loss": 4.3838, "step": 4881 }, { "epoch": 0.5062050820107706, "grad_norm": 0.8359375, "learning_rate": 0.00018632056875115, "loss": 4.3444, "step": 4882 }, { "epoch": 0.5063087700652587, "grad_norm": 0.6875, "learning_rate": 0.00018631508438680047, "loss": 4.3192, "step": 4883 }, { "epoch": 0.5064124581197468, "grad_norm": 0.77734375, "learning_rate": 0.00018630959900402412, "loss": 4.3604, "step": 4884 }, { "epoch": 0.5065161461742348, "grad_norm": 0.77734375, "learning_rate": 0.0001863041126028858, "loss": 4.3894, "step": 4885 }, { "epoch": 0.5066198342287229, "grad_norm": 0.67578125, "learning_rate": 0.00018629862518345017, "loss": 4.3822, "step": 4886 }, { "epoch": 0.5067235222832109, "grad_norm": 0.77734375, "learning_rate": 0.00018629313674578196, "loss": 4.3275, "step": 4887 }, { "epoch": 0.5068272103376991, "grad_norm": 0.6640625, "learning_rate": 0.00018628764728994594, "loss": 4.3346, "step": 4888 }, { "epoch": 0.5069308983921871, "grad_norm": 0.6796875, "learning_rate": 0.00018628215681600692, "loss": 4.3965, "step": 4889 }, { "epoch": 0.5070345864466752, "grad_norm": 0.74609375, "learning_rate": 0.00018627666532402962, "loss": 4.3549, "step": 4890 }, { "epoch": 0.5071382745011632, "grad_norm": 0.68359375, "learning_rate": 0.00018627117281407892, "loss": 4.3535, "step": 4891 }, { "epoch": 0.5072419625556513, "grad_norm": 0.6875, "learning_rate": 0.00018626567928621955, "loss": 4.3435, "step": 4892 }, { "epoch": 0.5073456506101394, "grad_norm": 0.7109375, "learning_rate": 0.00018626018474051634, "loss": 4.295, "step": 4893 }, { "epoch": 0.5074493386646275, "grad_norm": 0.7578125, "learning_rate": 0.00018625468917703414, "loss": 4.3831, "step": 4894 }, { "epoch": 0.5075530267191155, "grad_norm": 0.71875, "learning_rate": 0.0001862491925958378, "loss": 4.3633, "step": 4895 }, { "epoch": 0.5076567147736036, "grad_norm": 0.79296875, "learning_rate": 0.0001862436949969921, "loss": 4.3539, "step": 4896 }, { "epoch": 0.5077604028280917, "grad_norm": 0.7734375, "learning_rate": 0.00018623819638056204, "loss": 4.3631, "step": 4897 }, { "epoch": 0.5078640908825798, "grad_norm": 0.75, "learning_rate": 0.00018623269674661238, "loss": 4.3494, "step": 4898 }, { "epoch": 0.5079677789370678, "grad_norm": 0.9140625, "learning_rate": 0.00018622719609520804, "loss": 4.3768, "step": 4899 }, { "epoch": 0.5080714669915559, "grad_norm": 0.8203125, "learning_rate": 0.00018622169442641395, "loss": 4.3804, "step": 4900 }, { "epoch": 0.5081751550460439, "grad_norm": 0.7109375, "learning_rate": 0.000186216191740295, "loss": 4.377, "step": 4901 }, { "epoch": 0.5082788431005321, "grad_norm": 0.7421875, "learning_rate": 0.00018621068803691612, "loss": 4.3685, "step": 4902 }, { "epoch": 0.5083825311550201, "grad_norm": 0.6640625, "learning_rate": 0.00018620518331634225, "loss": 4.3558, "step": 4903 }, { "epoch": 0.5084862192095082, "grad_norm": 0.73828125, "learning_rate": 0.00018619967757863836, "loss": 4.3696, "step": 4904 }, { "epoch": 0.5085899072639962, "grad_norm": 0.625, "learning_rate": 0.00018619417082386936, "loss": 4.3299, "step": 4905 }, { "epoch": 0.5086935953184843, "grad_norm": 0.67578125, "learning_rate": 0.00018618866305210029, "loss": 4.3453, "step": 4906 }, { "epoch": 0.5087972833729724, "grad_norm": 0.63671875, "learning_rate": 0.00018618315426339606, "loss": 4.373, "step": 4907 }, { "epoch": 0.5089009714274605, "grad_norm": 0.7109375, "learning_rate": 0.00018617764445782173, "loss": 4.374, "step": 4908 }, { "epoch": 0.5090046594819485, "grad_norm": 0.640625, "learning_rate": 0.00018617213363544228, "loss": 4.3881, "step": 4909 }, { "epoch": 0.5091083475364366, "grad_norm": 0.6796875, "learning_rate": 0.00018616662179632277, "loss": 4.3841, "step": 4910 }, { "epoch": 0.5092120355909246, "grad_norm": 0.640625, "learning_rate": 0.00018616110894052818, "loss": 4.3558, "step": 4911 }, { "epoch": 0.5093157236454128, "grad_norm": 0.71484375, "learning_rate": 0.0001861555950681236, "loss": 4.2965, "step": 4912 }, { "epoch": 0.5094194116999009, "grad_norm": 0.71875, "learning_rate": 0.00018615008017917406, "loss": 4.3539, "step": 4913 }, { "epoch": 0.5095230997543889, "grad_norm": 0.640625, "learning_rate": 0.00018614456427374463, "loss": 4.3157, "step": 4914 }, { "epoch": 0.509626787808877, "grad_norm": 0.72265625, "learning_rate": 0.00018613904735190038, "loss": 4.3303, "step": 4915 }, { "epoch": 0.5097304758633651, "grad_norm": 0.69140625, "learning_rate": 0.00018613352941370645, "loss": 4.3523, "step": 4916 }, { "epoch": 0.5098341639178532, "grad_norm": 0.890625, "learning_rate": 0.00018612801045922792, "loss": 4.3532, "step": 4917 }, { "epoch": 0.5099378519723412, "grad_norm": 0.82421875, "learning_rate": 0.0001861224904885299, "loss": 4.3548, "step": 4918 }, { "epoch": 0.5100415400268293, "grad_norm": 0.8828125, "learning_rate": 0.00018611696950167754, "loss": 4.3741, "step": 4919 }, { "epoch": 0.5101452280813173, "grad_norm": 0.87109375, "learning_rate": 0.00018611144749873594, "loss": 4.3321, "step": 4920 }, { "epoch": 0.5102489161358055, "grad_norm": 0.85546875, "learning_rate": 0.00018610592447977035, "loss": 4.387, "step": 4921 }, { "epoch": 0.5103526041902935, "grad_norm": 0.91796875, "learning_rate": 0.0001861004004448458, "loss": 4.364, "step": 4922 }, { "epoch": 0.5104562922447816, "grad_norm": 0.69921875, "learning_rate": 0.00018609487539402757, "loss": 4.3479, "step": 4923 }, { "epoch": 0.5105599802992696, "grad_norm": 0.9765625, "learning_rate": 0.0001860893493273808, "loss": 4.3506, "step": 4924 }, { "epoch": 0.5106636683537578, "grad_norm": 0.953125, "learning_rate": 0.0001860838222449707, "loss": 4.3584, "step": 4925 }, { "epoch": 0.5107673564082458, "grad_norm": 0.76953125, "learning_rate": 0.00018607829414686252, "loss": 4.3778, "step": 4926 }, { "epoch": 0.5108710444627339, "grad_norm": 0.94140625, "learning_rate": 0.00018607276503312145, "loss": 4.3566, "step": 4927 }, { "epoch": 0.5109747325172219, "grad_norm": 0.94140625, "learning_rate": 0.00018606723490381273, "loss": 4.3141, "step": 4928 }, { "epoch": 0.51107842057171, "grad_norm": 0.6796875, "learning_rate": 0.0001860617037590016, "loss": 4.3806, "step": 4929 }, { "epoch": 0.5111821086261981, "grad_norm": 0.984375, "learning_rate": 0.00018605617159875336, "loss": 4.3295, "step": 4930 }, { "epoch": 0.5112857966806862, "grad_norm": 0.96875, "learning_rate": 0.00018605063842313326, "loss": 4.3544, "step": 4931 }, { "epoch": 0.5113894847351742, "grad_norm": 0.90625, "learning_rate": 0.00018604510423220658, "loss": 4.3995, "step": 4932 }, { "epoch": 0.5114931727896623, "grad_norm": 0.78125, "learning_rate": 0.00018603956902603862, "loss": 4.353, "step": 4933 }, { "epoch": 0.5115968608441503, "grad_norm": 0.859375, "learning_rate": 0.00018603403280469469, "loss": 4.3788, "step": 4934 }, { "epoch": 0.5117005488986385, "grad_norm": 0.80859375, "learning_rate": 0.00018602849556824015, "loss": 4.322, "step": 4935 }, { "epoch": 0.5118042369531265, "grad_norm": 0.7421875, "learning_rate": 0.00018602295731674025, "loss": 4.3518, "step": 4936 }, { "epoch": 0.5119079250076146, "grad_norm": 0.94921875, "learning_rate": 0.00018601741805026042, "loss": 4.3241, "step": 4937 }, { "epoch": 0.5120116130621026, "grad_norm": 0.765625, "learning_rate": 0.00018601187776886595, "loss": 4.3811, "step": 4938 }, { "epoch": 0.5121153011165908, "grad_norm": 0.7734375, "learning_rate": 0.00018600633647262226, "loss": 4.347, "step": 4939 }, { "epoch": 0.5122189891710788, "grad_norm": 0.86328125, "learning_rate": 0.00018600079416159472, "loss": 4.3929, "step": 4940 }, { "epoch": 0.5123226772255669, "grad_norm": 0.74609375, "learning_rate": 0.0001859952508358487, "loss": 4.3574, "step": 4941 }, { "epoch": 0.5124263652800549, "grad_norm": 0.8046875, "learning_rate": 0.0001859897064954496, "loss": 4.3482, "step": 4942 }, { "epoch": 0.512530053334543, "grad_norm": 0.94921875, "learning_rate": 0.0001859841611404629, "loss": 4.3844, "step": 4943 }, { "epoch": 0.5126337413890311, "grad_norm": 0.83203125, "learning_rate": 0.00018597861477095398, "loss": 4.3551, "step": 4944 }, { "epoch": 0.5127374294435192, "grad_norm": 0.76953125, "learning_rate": 0.00018597306738698827, "loss": 4.3954, "step": 4945 }, { "epoch": 0.5128411174980072, "grad_norm": 1.09375, "learning_rate": 0.00018596751898863128, "loss": 4.3693, "step": 4946 }, { "epoch": 0.5129448055524953, "grad_norm": 0.73046875, "learning_rate": 0.00018596196957594837, "loss": 4.4051, "step": 4947 }, { "epoch": 0.5130484936069833, "grad_norm": 0.85546875, "learning_rate": 0.00018595641914900514, "loss": 4.3488, "step": 4948 }, { "epoch": 0.5131521816614715, "grad_norm": 0.93359375, "learning_rate": 0.000185950867707867, "loss": 4.3103, "step": 4949 }, { "epoch": 0.5132558697159595, "grad_norm": 0.94140625, "learning_rate": 0.0001859453152525995, "loss": 4.3381, "step": 4950 }, { "epoch": 0.5133595577704476, "grad_norm": 0.80078125, "learning_rate": 0.0001859397617832681, "loss": 4.3591, "step": 4951 }, { "epoch": 0.5134632458249356, "grad_norm": 0.8359375, "learning_rate": 0.00018593420729993838, "loss": 4.3626, "step": 4952 }, { "epoch": 0.5135669338794238, "grad_norm": 0.83203125, "learning_rate": 0.00018592865180267582, "loss": 4.3803, "step": 4953 }, { "epoch": 0.5136706219339118, "grad_norm": 0.828125, "learning_rate": 0.000185923095291546, "loss": 4.3524, "step": 4954 }, { "epoch": 0.5137743099883999, "grad_norm": 0.79296875, "learning_rate": 0.0001859175377666145, "loss": 4.3688, "step": 4955 }, { "epoch": 0.5138779980428879, "grad_norm": 0.765625, "learning_rate": 0.0001859119792279469, "loss": 4.346, "step": 4956 }, { "epoch": 0.513981686097376, "grad_norm": 0.8359375, "learning_rate": 0.00018590641967560872, "loss": 4.4135, "step": 4957 }, { "epoch": 0.5140853741518642, "grad_norm": 0.8359375, "learning_rate": 0.0001859008591096656, "loss": 4.3789, "step": 4958 }, { "epoch": 0.5141890622063522, "grad_norm": 0.90625, "learning_rate": 0.00018589529753018313, "loss": 4.3745, "step": 4959 }, { "epoch": 0.5142927502608403, "grad_norm": 0.87890625, "learning_rate": 0.00018588973493722697, "loss": 4.3575, "step": 4960 }, { "epoch": 0.5143964383153283, "grad_norm": 0.88671875, "learning_rate": 0.0001858841713308627, "loss": 4.3291, "step": 4961 }, { "epoch": 0.5145001263698165, "grad_norm": 0.87109375, "learning_rate": 0.00018587860671115603, "loss": 4.3675, "step": 4962 }, { "epoch": 0.5146038144243045, "grad_norm": 0.796875, "learning_rate": 0.00018587304107817255, "loss": 4.3248, "step": 4963 }, { "epoch": 0.5147075024787926, "grad_norm": 0.734375, "learning_rate": 0.00018586747443197796, "loss": 4.3748, "step": 4964 }, { "epoch": 0.5148111905332806, "grad_norm": 0.89453125, "learning_rate": 0.00018586190677263793, "loss": 4.3217, "step": 4965 }, { "epoch": 0.5149148785877687, "grad_norm": 0.66015625, "learning_rate": 0.00018585633810021818, "loss": 4.3518, "step": 4966 }, { "epoch": 0.5150185666422568, "grad_norm": 0.86328125, "learning_rate": 0.00018585076841478438, "loss": 4.3605, "step": 4967 }, { "epoch": 0.5151222546967449, "grad_norm": 0.8203125, "learning_rate": 0.00018584519771640227, "loss": 4.3512, "step": 4968 }, { "epoch": 0.5152259427512329, "grad_norm": 0.84765625, "learning_rate": 0.00018583962600513755, "loss": 4.3504, "step": 4969 }, { "epoch": 0.515329630805721, "grad_norm": 0.80859375, "learning_rate": 0.000185834053281056, "loss": 4.3637, "step": 4970 }, { "epoch": 0.515433318860209, "grad_norm": 0.8359375, "learning_rate": 0.00018582847954422337, "loss": 4.4037, "step": 4971 }, { "epoch": 0.5155370069146972, "grad_norm": 0.8203125, "learning_rate": 0.00018582290479470537, "loss": 4.3579, "step": 4972 }, { "epoch": 0.5156406949691852, "grad_norm": 0.80078125, "learning_rate": 0.0001858173290325678, "loss": 4.3613, "step": 4973 }, { "epoch": 0.5157443830236733, "grad_norm": 0.8828125, "learning_rate": 0.00018581175225787652, "loss": 4.3607, "step": 4974 }, { "epoch": 0.5158480710781613, "grad_norm": 0.71484375, "learning_rate": 0.00018580617447069722, "loss": 4.3722, "step": 4975 }, { "epoch": 0.5159517591326495, "grad_norm": 0.78515625, "learning_rate": 0.00018580059567109577, "loss": 4.4164, "step": 4976 }, { "epoch": 0.5160554471871375, "grad_norm": 0.70703125, "learning_rate": 0.00018579501585913802, "loss": 4.3352, "step": 4977 }, { "epoch": 0.5161591352416256, "grad_norm": 0.7265625, "learning_rate": 0.00018578943503488974, "loss": 4.3437, "step": 4978 }, { "epoch": 0.5162628232961136, "grad_norm": 0.8203125, "learning_rate": 0.0001857838531984168, "loss": 4.3684, "step": 4979 }, { "epoch": 0.5163665113506017, "grad_norm": 0.7578125, "learning_rate": 0.0001857782703497851, "loss": 4.3773, "step": 4980 }, { "epoch": 0.5164701994050898, "grad_norm": 0.875, "learning_rate": 0.00018577268648906046, "loss": 4.3865, "step": 4981 }, { "epoch": 0.5165738874595779, "grad_norm": 0.81640625, "learning_rate": 0.0001857671016163088, "loss": 4.3972, "step": 4982 }, { "epoch": 0.5166775755140659, "grad_norm": 0.7578125, "learning_rate": 0.000185761515731596, "loss": 4.3545, "step": 4983 }, { "epoch": 0.516781263568554, "grad_norm": 0.7734375, "learning_rate": 0.00018575592883498794, "loss": 4.3675, "step": 4984 }, { "epoch": 0.516884951623042, "grad_norm": 0.73828125, "learning_rate": 0.0001857503409265506, "loss": 4.3399, "step": 4985 }, { "epoch": 0.5169886396775302, "grad_norm": 0.69921875, "learning_rate": 0.00018574475200634985, "loss": 4.3809, "step": 4986 }, { "epoch": 0.5170923277320182, "grad_norm": 0.78125, "learning_rate": 0.0001857391620744517, "loss": 4.3294, "step": 4987 }, { "epoch": 0.5171960157865063, "grad_norm": 0.70703125, "learning_rate": 0.00018573357113092203, "loss": 4.3773, "step": 4988 }, { "epoch": 0.5172997038409943, "grad_norm": 0.73046875, "learning_rate": 0.0001857279791758269, "loss": 4.3379, "step": 4989 }, { "epoch": 0.5174033918954825, "grad_norm": 0.69140625, "learning_rate": 0.00018572238620923217, "loss": 4.3642, "step": 4990 }, { "epoch": 0.5175070799499705, "grad_norm": 0.73046875, "learning_rate": 0.00018571679223120392, "loss": 4.3593, "step": 4991 }, { "epoch": 0.5176107680044586, "grad_norm": 0.8125, "learning_rate": 0.00018571119724180814, "loss": 4.3882, "step": 4992 }, { "epoch": 0.5177144560589466, "grad_norm": 0.765625, "learning_rate": 0.00018570560124111084, "loss": 4.3503, "step": 4993 }, { "epoch": 0.5178181441134347, "grad_norm": 0.75, "learning_rate": 0.000185700004229178, "loss": 4.3396, "step": 4994 }, { "epoch": 0.5179218321679228, "grad_norm": 0.73828125, "learning_rate": 0.00018569440620607572, "loss": 4.3554, "step": 4995 }, { "epoch": 0.5180255202224109, "grad_norm": 0.76171875, "learning_rate": 0.00018568880717187005, "loss": 4.3116, "step": 4996 }, { "epoch": 0.5181292082768989, "grad_norm": 0.703125, "learning_rate": 0.000185683207126627, "loss": 4.3576, "step": 4997 }, { "epoch": 0.518232896331387, "grad_norm": 0.71875, "learning_rate": 0.0001856776060704127, "loss": 4.375, "step": 4998 }, { "epoch": 0.518336584385875, "grad_norm": 0.7265625, "learning_rate": 0.00018567200400329317, "loss": 4.3738, "step": 4999 }, { "epoch": 0.5184402724403632, "grad_norm": 0.76953125, "learning_rate": 0.0001856664009253346, "loss": 4.3946, "step": 5000 }, { "epoch": 0.5185439604948512, "grad_norm": 0.76953125, "learning_rate": 0.00018566079683660306, "loss": 4.3456, "step": 5001 }, { "epoch": 0.5186476485493393, "grad_norm": 0.7109375, "learning_rate": 0.00018565519173716465, "loss": 4.3619, "step": 5002 }, { "epoch": 0.5187513366038274, "grad_norm": 0.71875, "learning_rate": 0.0001856495856270855, "loss": 4.3406, "step": 5003 }, { "epoch": 0.5188550246583155, "grad_norm": 0.73046875, "learning_rate": 0.00018564397850643182, "loss": 4.37, "step": 5004 }, { "epoch": 0.5189587127128036, "grad_norm": 0.7421875, "learning_rate": 0.00018563837037526967, "loss": 4.367, "step": 5005 }, { "epoch": 0.5190624007672916, "grad_norm": 0.70703125, "learning_rate": 0.0001856327612336653, "loss": 4.3462, "step": 5006 }, { "epoch": 0.5191660888217797, "grad_norm": 0.79296875, "learning_rate": 0.00018562715108168485, "loss": 4.3543, "step": 5007 }, { "epoch": 0.5192697768762677, "grad_norm": 0.74609375, "learning_rate": 0.00018562153991939453, "loss": 4.3434, "step": 5008 }, { "epoch": 0.5193734649307559, "grad_norm": 0.83203125, "learning_rate": 0.00018561592774686057, "loss": 4.3605, "step": 5009 }, { "epoch": 0.5194771529852439, "grad_norm": 0.86328125, "learning_rate": 0.00018561031456414915, "loss": 4.3769, "step": 5010 }, { "epoch": 0.519580841039732, "grad_norm": 0.80078125, "learning_rate": 0.00018560470037132652, "loss": 4.3688, "step": 5011 }, { "epoch": 0.51968452909422, "grad_norm": 0.71875, "learning_rate": 0.0001855990851684589, "loss": 4.3712, "step": 5012 }, { "epoch": 0.5197882171487082, "grad_norm": 0.828125, "learning_rate": 0.00018559346895561253, "loss": 4.3923, "step": 5013 }, { "epoch": 0.5198919052031962, "grad_norm": 0.6875, "learning_rate": 0.00018558785173285376, "loss": 4.3656, "step": 5014 }, { "epoch": 0.5199955932576843, "grad_norm": 0.70703125, "learning_rate": 0.00018558223350024876, "loss": 4.3326, "step": 5015 }, { "epoch": 0.5200992813121723, "grad_norm": 0.7109375, "learning_rate": 0.0001855766142578639, "loss": 4.3647, "step": 5016 }, { "epoch": 0.5202029693666604, "grad_norm": 0.71875, "learning_rate": 0.00018557099400576545, "loss": 4.392, "step": 5017 }, { "epoch": 0.5203066574211485, "grad_norm": 0.84765625, "learning_rate": 0.0001855653727440197, "loss": 4.3832, "step": 5018 }, { "epoch": 0.5204103454756366, "grad_norm": 0.8125, "learning_rate": 0.000185559750472693, "loss": 4.3736, "step": 5019 }, { "epoch": 0.5205140335301246, "grad_norm": 0.6953125, "learning_rate": 0.00018555412719185172, "loss": 4.2913, "step": 5020 }, { "epoch": 0.5206177215846127, "grad_norm": 0.97265625, "learning_rate": 0.00018554850290156214, "loss": 4.3555, "step": 5021 }, { "epoch": 0.5207214096391007, "grad_norm": 0.92578125, "learning_rate": 0.00018554287760189067, "loss": 4.3643, "step": 5022 }, { "epoch": 0.5208250976935889, "grad_norm": 0.70703125, "learning_rate": 0.00018553725129290366, "loss": 4.37, "step": 5023 }, { "epoch": 0.5209287857480769, "grad_norm": 0.85546875, "learning_rate": 0.0001855316239746675, "loss": 4.3919, "step": 5024 }, { "epoch": 0.521032473802565, "grad_norm": 0.8359375, "learning_rate": 0.0001855259956472486, "loss": 4.3377, "step": 5025 }, { "epoch": 0.521136161857053, "grad_norm": 0.69921875, "learning_rate": 0.00018552036631071334, "loss": 4.3495, "step": 5026 }, { "epoch": 0.5212398499115412, "grad_norm": 0.83984375, "learning_rate": 0.00018551473596512818, "loss": 4.3484, "step": 5027 }, { "epoch": 0.5213435379660292, "grad_norm": 0.86328125, "learning_rate": 0.00018550910461055952, "loss": 4.3541, "step": 5028 }, { "epoch": 0.5214472260205173, "grad_norm": 0.68359375, "learning_rate": 0.00018550347224707378, "loss": 4.3259, "step": 5029 }, { "epoch": 0.5215509140750053, "grad_norm": 0.7890625, "learning_rate": 0.00018549783887473749, "loss": 4.3419, "step": 5030 }, { "epoch": 0.5216546021294934, "grad_norm": 0.8125, "learning_rate": 0.00018549220449361707, "loss": 4.3365, "step": 5031 }, { "epoch": 0.5217582901839815, "grad_norm": 0.59765625, "learning_rate": 0.000185486569103779, "loss": 4.3261, "step": 5032 }, { "epoch": 0.5218619782384696, "grad_norm": 0.8046875, "learning_rate": 0.00018548093270528976, "loss": 4.3539, "step": 5033 }, { "epoch": 0.5219656662929576, "grad_norm": 0.7265625, "learning_rate": 0.0001854752952982159, "loss": 4.3635, "step": 5034 }, { "epoch": 0.5220693543474457, "grad_norm": 0.734375, "learning_rate": 0.00018546965688262388, "loss": 4.3569, "step": 5035 }, { "epoch": 0.5221730424019337, "grad_norm": 0.71484375, "learning_rate": 0.00018546401745858027, "loss": 4.3369, "step": 5036 }, { "epoch": 0.5222767304564219, "grad_norm": 0.70703125, "learning_rate": 0.0001854583770261516, "loss": 4.3832, "step": 5037 }, { "epoch": 0.5223804185109099, "grad_norm": 0.73046875, "learning_rate": 0.0001854527355854044, "loss": 4.3824, "step": 5038 }, { "epoch": 0.522484106565398, "grad_norm": 0.7421875, "learning_rate": 0.00018544709313640526, "loss": 4.4, "step": 5039 }, { "epoch": 0.522587794619886, "grad_norm": 0.73828125, "learning_rate": 0.00018544144967922073, "loss": 4.3677, "step": 5040 }, { "epoch": 0.5226914826743742, "grad_norm": 0.921875, "learning_rate": 0.0001854358052139174, "loss": 4.327, "step": 5041 }, { "epoch": 0.5227951707288622, "grad_norm": 0.76953125, "learning_rate": 0.0001854301597405619, "loss": 4.4058, "step": 5042 }, { "epoch": 0.5228988587833503, "grad_norm": 0.7890625, "learning_rate": 0.0001854245132592208, "loss": 4.3692, "step": 5043 }, { "epoch": 0.5230025468378383, "grad_norm": 0.79296875, "learning_rate": 0.00018541886576996076, "loss": 4.4042, "step": 5044 }, { "epoch": 0.5231062348923264, "grad_norm": 0.73828125, "learning_rate": 0.0001854132172728484, "loss": 4.3645, "step": 5045 }, { "epoch": 0.5232099229468145, "grad_norm": 0.859375, "learning_rate": 0.00018540756776795034, "loss": 4.3312, "step": 5046 }, { "epoch": 0.5233136110013026, "grad_norm": 0.76953125, "learning_rate": 0.00018540191725533326, "loss": 4.3549, "step": 5047 }, { "epoch": 0.5234172990557907, "grad_norm": 0.72265625, "learning_rate": 0.00018539626573506386, "loss": 4.3363, "step": 5048 }, { "epoch": 0.5235209871102787, "grad_norm": 0.83984375, "learning_rate": 0.00018539061320720874, "loss": 4.3412, "step": 5049 }, { "epoch": 0.5236246751647669, "grad_norm": 0.71875, "learning_rate": 0.0001853849596718347, "loss": 4.3746, "step": 5050 }, { "epoch": 0.5237283632192549, "grad_norm": 0.73046875, "learning_rate": 0.00018537930512900835, "loss": 4.343, "step": 5051 }, { "epoch": 0.523832051273743, "grad_norm": 0.8359375, "learning_rate": 0.00018537364957879648, "loss": 4.3576, "step": 5052 }, { "epoch": 0.523935739328231, "grad_norm": 0.6171875, "learning_rate": 0.00018536799302126578, "loss": 4.3926, "step": 5053 }, { "epoch": 0.5240394273827191, "grad_norm": 0.85546875, "learning_rate": 0.00018536233545648302, "loss": 4.3628, "step": 5054 }, { "epoch": 0.5241431154372072, "grad_norm": 0.671875, "learning_rate": 0.00018535667688451487, "loss": 4.3296, "step": 5055 }, { "epoch": 0.5242468034916953, "grad_norm": 0.765625, "learning_rate": 0.00018535101730542822, "loss": 4.3907, "step": 5056 }, { "epoch": 0.5243504915461833, "grad_norm": 0.7421875, "learning_rate": 0.00018534535671928978, "loss": 4.3648, "step": 5057 }, { "epoch": 0.5244541796006714, "grad_norm": 0.7578125, "learning_rate": 0.00018533969512616634, "loss": 4.3272, "step": 5058 }, { "epoch": 0.5245578676551594, "grad_norm": 0.7578125, "learning_rate": 0.00018533403252612467, "loss": 4.3601, "step": 5059 }, { "epoch": 0.5246615557096476, "grad_norm": 0.703125, "learning_rate": 0.00018532836891923166, "loss": 4.3732, "step": 5060 }, { "epoch": 0.5247652437641356, "grad_norm": 0.71484375, "learning_rate": 0.00018532270430555406, "loss": 4.372, "step": 5061 }, { "epoch": 0.5248689318186237, "grad_norm": 0.98828125, "learning_rate": 0.00018531703868515878, "loss": 4.3324, "step": 5062 }, { "epoch": 0.5249726198731117, "grad_norm": 0.9375, "learning_rate": 0.00018531137205811258, "loss": 4.3788, "step": 5063 }, { "epoch": 0.5250763079275999, "grad_norm": 0.83984375, "learning_rate": 0.0001853057044244824, "loss": 4.3657, "step": 5064 }, { "epoch": 0.5251799959820879, "grad_norm": 0.94921875, "learning_rate": 0.00018530003578433508, "loss": 4.331, "step": 5065 }, { "epoch": 0.525283684036576, "grad_norm": 0.9609375, "learning_rate": 0.0001852943661377375, "loss": 4.4065, "step": 5066 }, { "epoch": 0.525387372091064, "grad_norm": 0.93359375, "learning_rate": 0.00018528869548475654, "loss": 4.3171, "step": 5067 }, { "epoch": 0.5254910601455521, "grad_norm": 0.75390625, "learning_rate": 0.0001852830238254591, "loss": 4.35, "step": 5068 }, { "epoch": 0.5255947482000402, "grad_norm": 0.94921875, "learning_rate": 0.0001852773511599122, "loss": 4.3805, "step": 5069 }, { "epoch": 0.5256984362545283, "grad_norm": 0.90234375, "learning_rate": 0.00018527167748818267, "loss": 4.3788, "step": 5070 }, { "epoch": 0.5258021243090163, "grad_norm": 0.71484375, "learning_rate": 0.00018526600281033748, "loss": 4.3321, "step": 5071 }, { "epoch": 0.5259058123635044, "grad_norm": 0.86328125, "learning_rate": 0.00018526032712644358, "loss": 4.4017, "step": 5072 }, { "epoch": 0.5260095004179924, "grad_norm": 0.69921875, "learning_rate": 0.00018525465043656795, "loss": 4.335, "step": 5073 }, { "epoch": 0.5261131884724806, "grad_norm": 0.85546875, "learning_rate": 0.00018524897274077757, "loss": 4.3608, "step": 5074 }, { "epoch": 0.5262168765269686, "grad_norm": 0.7421875, "learning_rate": 0.0001852432940391394, "loss": 4.2927, "step": 5075 }, { "epoch": 0.5263205645814567, "grad_norm": 0.67578125, "learning_rate": 0.0001852376143317205, "loss": 4.366, "step": 5076 }, { "epoch": 0.5264242526359447, "grad_norm": 0.859375, "learning_rate": 0.00018523193361858785, "loss": 4.3713, "step": 5077 }, { "epoch": 0.5265279406904328, "grad_norm": 0.78515625, "learning_rate": 0.00018522625189980844, "loss": 4.3462, "step": 5078 }, { "epoch": 0.5266316287449209, "grad_norm": 0.76171875, "learning_rate": 0.00018522056917544939, "loss": 4.3247, "step": 5079 }, { "epoch": 0.526735316799409, "grad_norm": 0.8359375, "learning_rate": 0.00018521488544557765, "loss": 4.3743, "step": 5080 }, { "epoch": 0.526839004853897, "grad_norm": 0.8125, "learning_rate": 0.00018520920071026038, "loss": 4.4079, "step": 5081 }, { "epoch": 0.5269426929083851, "grad_norm": 0.75, "learning_rate": 0.0001852035149695646, "loss": 4.3398, "step": 5082 }, { "epoch": 0.5270463809628732, "grad_norm": 0.78125, "learning_rate": 0.00018519782822355743, "loss": 4.3662, "step": 5083 }, { "epoch": 0.5271500690173613, "grad_norm": 0.890625, "learning_rate": 0.00018519214047230592, "loss": 4.3139, "step": 5084 }, { "epoch": 0.5272537570718493, "grad_norm": 0.63671875, "learning_rate": 0.00018518645171587719, "loss": 4.3445, "step": 5085 }, { "epoch": 0.5273574451263374, "grad_norm": 0.890625, "learning_rate": 0.0001851807619543384, "loss": 4.3736, "step": 5086 }, { "epoch": 0.5274611331808254, "grad_norm": 0.79296875, "learning_rate": 0.00018517507118775666, "loss": 4.3472, "step": 5087 }, { "epoch": 0.5275648212353136, "grad_norm": 0.82421875, "learning_rate": 0.0001851693794161991, "loss": 4.3841, "step": 5088 }, { "epoch": 0.5276685092898016, "grad_norm": 0.77734375, "learning_rate": 0.0001851636866397329, "loss": 4.3624, "step": 5089 }, { "epoch": 0.5277721973442897, "grad_norm": 0.875, "learning_rate": 0.00018515799285842522, "loss": 4.3591, "step": 5090 }, { "epoch": 0.5278758853987777, "grad_norm": 0.953125, "learning_rate": 0.00018515229807234323, "loss": 4.3731, "step": 5091 }, { "epoch": 0.5279795734532658, "grad_norm": 0.7890625, "learning_rate": 0.00018514660228155413, "loss": 4.3403, "step": 5092 }, { "epoch": 0.528083261507754, "grad_norm": 0.8515625, "learning_rate": 0.00018514090548612516, "loss": 4.3769, "step": 5093 }, { "epoch": 0.528186949562242, "grad_norm": 0.91796875, "learning_rate": 0.00018513520768612346, "loss": 4.315, "step": 5094 }, { "epoch": 0.5282906376167301, "grad_norm": 0.859375, "learning_rate": 0.00018512950888161634, "loss": 4.3687, "step": 5095 }, { "epoch": 0.5283943256712181, "grad_norm": 0.96484375, "learning_rate": 0.00018512380907267098, "loss": 4.3596, "step": 5096 }, { "epoch": 0.5284980137257063, "grad_norm": 0.9921875, "learning_rate": 0.00018511810825935467, "loss": 4.3683, "step": 5097 }, { "epoch": 0.5286017017801943, "grad_norm": 0.75, "learning_rate": 0.00018511240644173462, "loss": 4.3502, "step": 5098 }, { "epoch": 0.5287053898346824, "grad_norm": 0.9921875, "learning_rate": 0.00018510670361987817, "loss": 4.3894, "step": 5099 }, { "epoch": 0.5288090778891704, "grad_norm": 0.8046875, "learning_rate": 0.00018510099979385255, "loss": 4.4088, "step": 5100 }, { "epoch": 0.5289127659436585, "grad_norm": 1.0859375, "learning_rate": 0.00018509529496372513, "loss": 4.3636, "step": 5101 }, { "epoch": 0.5290164539981466, "grad_norm": 0.8359375, "learning_rate": 0.00018508958912956316, "loss": 4.3658, "step": 5102 }, { "epoch": 0.5291201420526347, "grad_norm": 0.86328125, "learning_rate": 0.00018508388229143398, "loss": 4.3711, "step": 5103 }, { "epoch": 0.5292238301071227, "grad_norm": 1.0078125, "learning_rate": 0.0001850781744494049, "loss": 4.3279, "step": 5104 }, { "epoch": 0.5293275181616108, "grad_norm": 0.66796875, "learning_rate": 0.00018507246560354334, "loss": 4.3678, "step": 5105 }, { "epoch": 0.5294312062160988, "grad_norm": 0.96484375, "learning_rate": 0.00018506675575391662, "loss": 4.3717, "step": 5106 }, { "epoch": 0.529534894270587, "grad_norm": 0.734375, "learning_rate": 0.00018506104490059207, "loss": 4.3604, "step": 5107 }, { "epoch": 0.529638582325075, "grad_norm": 0.90234375, "learning_rate": 0.0001850553330436371, "loss": 4.3514, "step": 5108 }, { "epoch": 0.5297422703795631, "grad_norm": 0.9296875, "learning_rate": 0.00018504962018311912, "loss": 4.3815, "step": 5109 }, { "epoch": 0.5298459584340511, "grad_norm": 1.046875, "learning_rate": 0.0001850439063191055, "loss": 4.3414, "step": 5110 }, { "epoch": 0.5299496464885393, "grad_norm": 0.7578125, "learning_rate": 0.0001850381914516637, "loss": 4.3528, "step": 5111 }, { "epoch": 0.5300533345430273, "grad_norm": 0.87890625, "learning_rate": 0.00018503247558086114, "loss": 4.3303, "step": 5112 }, { "epoch": 0.5301570225975154, "grad_norm": 0.9375, "learning_rate": 0.00018502675870676522, "loss": 4.3694, "step": 5113 }, { "epoch": 0.5302607106520034, "grad_norm": 0.83984375, "learning_rate": 0.00018502104082944345, "loss": 4.339, "step": 5114 }, { "epoch": 0.5303643987064915, "grad_norm": 0.734375, "learning_rate": 0.00018501532194896327, "loss": 4.3463, "step": 5115 }, { "epoch": 0.5304680867609796, "grad_norm": 0.8515625, "learning_rate": 0.00018500960206539215, "loss": 4.331, "step": 5116 }, { "epoch": 0.5305717748154677, "grad_norm": 0.84765625, "learning_rate": 0.00018500388117879756, "loss": 4.3701, "step": 5117 }, { "epoch": 0.5306754628699557, "grad_norm": 0.765625, "learning_rate": 0.0001849981592892471, "loss": 4.3665, "step": 5118 }, { "epoch": 0.5307791509244438, "grad_norm": 0.82421875, "learning_rate": 0.00018499243639680813, "loss": 4.3249, "step": 5119 }, { "epoch": 0.5308828389789318, "grad_norm": 0.86328125, "learning_rate": 0.00018498671250154826, "loss": 4.3709, "step": 5120 }, { "epoch": 0.53098652703342, "grad_norm": 0.6484375, "learning_rate": 0.00018498098760353507, "loss": 4.3713, "step": 5121 }, { "epoch": 0.531090215087908, "grad_norm": 0.7890625, "learning_rate": 0.000184975261702836, "loss": 4.3346, "step": 5122 }, { "epoch": 0.5311939031423961, "grad_norm": 0.70703125, "learning_rate": 0.00018496953479951869, "loss": 4.3565, "step": 5123 }, { "epoch": 0.5312975911968841, "grad_norm": 0.734375, "learning_rate": 0.0001849638068936507, "loss": 4.3247, "step": 5124 }, { "epoch": 0.5314012792513723, "grad_norm": 0.8046875, "learning_rate": 0.00018495807798529957, "loss": 4.3497, "step": 5125 }, { "epoch": 0.5315049673058603, "grad_norm": 0.6796875, "learning_rate": 0.00018495234807453293, "loss": 4.4022, "step": 5126 }, { "epoch": 0.5316086553603484, "grad_norm": 0.78515625, "learning_rate": 0.0001849466171614184, "loss": 4.3508, "step": 5127 }, { "epoch": 0.5317123434148364, "grad_norm": 0.66796875, "learning_rate": 0.00018494088524602354, "loss": 4.3337, "step": 5128 }, { "epoch": 0.5318160314693245, "grad_norm": 0.68359375, "learning_rate": 0.00018493515232841605, "loss": 4.37, "step": 5129 }, { "epoch": 0.5319197195238126, "grad_norm": 0.6171875, "learning_rate": 0.00018492941840866353, "loss": 4.3041, "step": 5130 }, { "epoch": 0.5320234075783007, "grad_norm": 0.7109375, "learning_rate": 0.00018492368348683368, "loss": 4.3602, "step": 5131 }, { "epoch": 0.5321270956327887, "grad_norm": 0.66015625, "learning_rate": 0.0001849179475629941, "loss": 4.349, "step": 5132 }, { "epoch": 0.5322307836872768, "grad_norm": 0.7578125, "learning_rate": 0.0001849122106372125, "loss": 4.3643, "step": 5133 }, { "epoch": 0.5323344717417648, "grad_norm": 0.765625, "learning_rate": 0.0001849064727095566, "loss": 4.3504, "step": 5134 }, { "epoch": 0.532438159796253, "grad_norm": 0.65234375, "learning_rate": 0.00018490073378009405, "loss": 4.4057, "step": 5135 }, { "epoch": 0.532541847850741, "grad_norm": 0.7734375, "learning_rate": 0.0001848949938488926, "loss": 4.3717, "step": 5136 }, { "epoch": 0.5326455359052291, "grad_norm": 0.875, "learning_rate": 0.00018488925291601995, "loss": 4.3567, "step": 5137 }, { "epoch": 0.5327492239597172, "grad_norm": 0.74609375, "learning_rate": 0.00018488351098154383, "loss": 4.3428, "step": 5138 }, { "epoch": 0.5328529120142053, "grad_norm": 0.76953125, "learning_rate": 0.00018487776804553203, "loss": 4.3578, "step": 5139 }, { "epoch": 0.5329566000686934, "grad_norm": 0.73046875, "learning_rate": 0.0001848720241080523, "loss": 4.3299, "step": 5140 }, { "epoch": 0.5330602881231814, "grad_norm": 0.76171875, "learning_rate": 0.00018486627916917235, "loss": 4.334, "step": 5141 }, { "epoch": 0.5331639761776695, "grad_norm": 0.74609375, "learning_rate": 0.00018486053322896005, "loss": 4.3096, "step": 5142 }, { "epoch": 0.5332676642321575, "grad_norm": 0.72265625, "learning_rate": 0.00018485478628748315, "loss": 4.3155, "step": 5143 }, { "epoch": 0.5333713522866457, "grad_norm": 0.78515625, "learning_rate": 0.00018484903834480946, "loss": 4.3713, "step": 5144 }, { "epoch": 0.5334750403411337, "grad_norm": 0.7734375, "learning_rate": 0.0001848432894010068, "loss": 4.3835, "step": 5145 }, { "epoch": 0.5335787283956218, "grad_norm": 0.75, "learning_rate": 0.00018483753945614304, "loss": 4.415, "step": 5146 }, { "epoch": 0.5336824164501098, "grad_norm": 0.765625, "learning_rate": 0.00018483178851028597, "loss": 4.3042, "step": 5147 }, { "epoch": 0.533786104504598, "grad_norm": 0.73046875, "learning_rate": 0.00018482603656350347, "loss": 4.3036, "step": 5148 }, { "epoch": 0.533889792559086, "grad_norm": 0.6328125, "learning_rate": 0.00018482028361586342, "loss": 4.3833, "step": 5149 }, { "epoch": 0.5339934806135741, "grad_norm": 0.7890625, "learning_rate": 0.00018481452966743363, "loss": 4.338, "step": 5150 }, { "epoch": 0.5340971686680621, "grad_norm": 0.6875, "learning_rate": 0.0001848087747182821, "loss": 4.3212, "step": 5151 }, { "epoch": 0.5342008567225502, "grad_norm": 0.78125, "learning_rate": 0.00018480301876847667, "loss": 4.3137, "step": 5152 }, { "epoch": 0.5343045447770383, "grad_norm": 0.76171875, "learning_rate": 0.00018479726181808522, "loss": 4.354, "step": 5153 }, { "epoch": 0.5344082328315264, "grad_norm": 0.765625, "learning_rate": 0.00018479150386717575, "loss": 4.3564, "step": 5154 }, { "epoch": 0.5345119208860144, "grad_norm": 0.8125, "learning_rate": 0.00018478574491581618, "loss": 4.3444, "step": 5155 }, { "epoch": 0.5346156089405025, "grad_norm": 0.83203125, "learning_rate": 0.00018477998496407437, "loss": 4.381, "step": 5156 }, { "epoch": 0.5347192969949905, "grad_norm": 0.859375, "learning_rate": 0.00018477422401201842, "loss": 4.3283, "step": 5157 }, { "epoch": 0.5348229850494787, "grad_norm": 0.7734375, "learning_rate": 0.00018476846205971623, "loss": 4.3069, "step": 5158 }, { "epoch": 0.5349266731039667, "grad_norm": 0.81640625, "learning_rate": 0.00018476269910723577, "loss": 4.3874, "step": 5159 }, { "epoch": 0.5350303611584548, "grad_norm": 0.7890625, "learning_rate": 0.00018475693515464508, "loss": 4.3665, "step": 5160 }, { "epoch": 0.5351340492129428, "grad_norm": 0.75390625, "learning_rate": 0.0001847511702020121, "loss": 4.3524, "step": 5161 }, { "epoch": 0.535237737267431, "grad_norm": 0.890625, "learning_rate": 0.00018474540424940497, "loss": 4.4009, "step": 5162 }, { "epoch": 0.535341425321919, "grad_norm": 0.66796875, "learning_rate": 0.0001847396372968916, "loss": 4.3042, "step": 5163 }, { "epoch": 0.5354451133764071, "grad_norm": 0.78515625, "learning_rate": 0.0001847338693445401, "loss": 4.3491, "step": 5164 }, { "epoch": 0.5355488014308951, "grad_norm": 0.72265625, "learning_rate": 0.00018472810039241847, "loss": 4.3576, "step": 5165 }, { "epoch": 0.5356524894853832, "grad_norm": 0.81640625, "learning_rate": 0.00018472233044059485, "loss": 4.3091, "step": 5166 }, { "epoch": 0.5357561775398713, "grad_norm": 0.8359375, "learning_rate": 0.0001847165594891373, "loss": 4.368, "step": 5167 }, { "epoch": 0.5358598655943594, "grad_norm": 0.7890625, "learning_rate": 0.00018471078753811392, "loss": 4.3373, "step": 5168 }, { "epoch": 0.5359635536488474, "grad_norm": 0.8125, "learning_rate": 0.00018470501458759273, "loss": 4.3089, "step": 5169 }, { "epoch": 0.5360672417033355, "grad_norm": 0.8671875, "learning_rate": 0.00018469924063764192, "loss": 4.3848, "step": 5170 }, { "epoch": 0.5361709297578235, "grad_norm": 0.71484375, "learning_rate": 0.0001846934656883296, "loss": 4.3728, "step": 5171 }, { "epoch": 0.5362746178123117, "grad_norm": 0.81640625, "learning_rate": 0.00018468768973972392, "loss": 4.3487, "step": 5172 }, { "epoch": 0.5363783058667997, "grad_norm": 0.70703125, "learning_rate": 0.00018468191279189303, "loss": 4.3687, "step": 5173 }, { "epoch": 0.5364819939212878, "grad_norm": 0.79296875, "learning_rate": 0.00018467613484490506, "loss": 4.3389, "step": 5174 }, { "epoch": 0.5365856819757758, "grad_norm": 0.8515625, "learning_rate": 0.0001846703558988282, "loss": 4.4002, "step": 5175 }, { "epoch": 0.536689370030264, "grad_norm": 0.75390625, "learning_rate": 0.0001846645759537307, "loss": 4.3487, "step": 5176 }, { "epoch": 0.536793058084752, "grad_norm": 0.7578125, "learning_rate": 0.00018465879500968065, "loss": 4.3517, "step": 5177 }, { "epoch": 0.5368967461392401, "grad_norm": 0.7734375, "learning_rate": 0.00018465301306674632, "loss": 4.3955, "step": 5178 }, { "epoch": 0.5370004341937281, "grad_norm": 0.7734375, "learning_rate": 0.0001846472301249959, "loss": 4.3555, "step": 5179 }, { "epoch": 0.5371041222482162, "grad_norm": 0.6953125, "learning_rate": 0.00018464144618449764, "loss": 4.2937, "step": 5180 }, { "epoch": 0.5372078103027044, "grad_norm": 0.67578125, "learning_rate": 0.0001846356612453198, "loss": 4.3372, "step": 5181 }, { "epoch": 0.5373114983571924, "grad_norm": 0.7734375, "learning_rate": 0.00018462987530753062, "loss": 4.3874, "step": 5182 }, { "epoch": 0.5374151864116805, "grad_norm": 0.7421875, "learning_rate": 0.00018462408837119837, "loss": 4.3692, "step": 5183 }, { "epoch": 0.5375188744661685, "grad_norm": 0.859375, "learning_rate": 0.00018461830043639131, "loss": 4.3388, "step": 5184 }, { "epoch": 0.5376225625206567, "grad_norm": 0.7265625, "learning_rate": 0.00018461251150317777, "loss": 4.3781, "step": 5185 }, { "epoch": 0.5377262505751447, "grad_norm": 0.84375, "learning_rate": 0.00018460672157162602, "loss": 4.3331, "step": 5186 }, { "epoch": 0.5378299386296328, "grad_norm": 0.76171875, "learning_rate": 0.0001846009306418044, "loss": 4.3527, "step": 5187 }, { "epoch": 0.5379336266841208, "grad_norm": 0.828125, "learning_rate": 0.00018459513871378126, "loss": 4.3519, "step": 5188 }, { "epoch": 0.5380373147386089, "grad_norm": 0.8359375, "learning_rate": 0.00018458934578762483, "loss": 4.3211, "step": 5189 }, { "epoch": 0.538141002793097, "grad_norm": 0.68359375, "learning_rate": 0.0001845835518634036, "loss": 4.3603, "step": 5190 }, { "epoch": 0.5382446908475851, "grad_norm": 0.84375, "learning_rate": 0.00018457775694118582, "loss": 4.368, "step": 5191 }, { "epoch": 0.5383483789020731, "grad_norm": 0.72265625, "learning_rate": 0.00018457196102103992, "loss": 4.366, "step": 5192 }, { "epoch": 0.5384520669565612, "grad_norm": 0.90234375, "learning_rate": 0.0001845661641030343, "loss": 4.3691, "step": 5193 }, { "epoch": 0.5385557550110492, "grad_norm": 0.78515625, "learning_rate": 0.0001845603661872373, "loss": 4.348, "step": 5194 }, { "epoch": 0.5386594430655374, "grad_norm": 0.7265625, "learning_rate": 0.00018455456727371742, "loss": 4.3388, "step": 5195 }, { "epoch": 0.5387631311200254, "grad_norm": 0.8203125, "learning_rate": 0.00018454876736254296, "loss": 4.3334, "step": 5196 }, { "epoch": 0.5388668191745135, "grad_norm": 0.71875, "learning_rate": 0.00018454296645378245, "loss": 4.3293, "step": 5197 }, { "epoch": 0.5389705072290015, "grad_norm": 0.73046875, "learning_rate": 0.00018453716454750428, "loss": 4.3296, "step": 5198 }, { "epoch": 0.5390741952834897, "grad_norm": 0.9375, "learning_rate": 0.00018453136164377695, "loss": 4.3375, "step": 5199 }, { "epoch": 0.5391778833379777, "grad_norm": 0.71875, "learning_rate": 0.00018452555774266892, "loss": 4.3723, "step": 5200 }, { "epoch": 0.5392815713924658, "grad_norm": 0.91796875, "learning_rate": 0.00018451975284424862, "loss": 4.3693, "step": 5201 }, { "epoch": 0.5393852594469538, "grad_norm": 0.7265625, "learning_rate": 0.0001845139469485846, "loss": 4.298, "step": 5202 }, { "epoch": 0.5394889475014419, "grad_norm": 0.91796875, "learning_rate": 0.00018450814005574532, "loss": 4.3173, "step": 5203 }, { "epoch": 0.53959263555593, "grad_norm": 0.84375, "learning_rate": 0.00018450233216579936, "loss": 4.288, "step": 5204 }, { "epoch": 0.5396963236104181, "grad_norm": 0.8046875, "learning_rate": 0.00018449652327881514, "loss": 4.3271, "step": 5205 }, { "epoch": 0.5398000116649061, "grad_norm": 0.859375, "learning_rate": 0.0001844907133948613, "loss": 4.417, "step": 5206 }, { "epoch": 0.5399036997193942, "grad_norm": 0.81640625, "learning_rate": 0.00018448490251400635, "loss": 4.3407, "step": 5207 }, { "epoch": 0.5400073877738822, "grad_norm": 0.75, "learning_rate": 0.00018447909063631888, "loss": 4.3654, "step": 5208 }, { "epoch": 0.5401110758283704, "grad_norm": 0.79296875, "learning_rate": 0.0001844732777618674, "loss": 4.3053, "step": 5209 }, { "epoch": 0.5402147638828584, "grad_norm": 0.66015625, "learning_rate": 0.00018446746389072055, "loss": 4.3725, "step": 5210 }, { "epoch": 0.5403184519373465, "grad_norm": 0.8828125, "learning_rate": 0.0001844616490229469, "loss": 4.3676, "step": 5211 }, { "epoch": 0.5404221399918345, "grad_norm": 0.765625, "learning_rate": 0.00018445583315861508, "loss": 4.3485, "step": 5212 }, { "epoch": 0.5405258280463227, "grad_norm": 0.84765625, "learning_rate": 0.0001844500162977937, "loss": 4.345, "step": 5213 }, { "epoch": 0.5406295161008107, "grad_norm": 0.82421875, "learning_rate": 0.0001844441984405514, "loss": 4.3473, "step": 5214 }, { "epoch": 0.5407332041552988, "grad_norm": 0.82421875, "learning_rate": 0.00018443837958695682, "loss": 4.3451, "step": 5215 }, { "epoch": 0.5408368922097868, "grad_norm": 0.84765625, "learning_rate": 0.00018443255973707863, "loss": 4.3028, "step": 5216 }, { "epoch": 0.5409405802642749, "grad_norm": 0.78515625, "learning_rate": 0.00018442673889098546, "loss": 4.3392, "step": 5217 }, { "epoch": 0.541044268318763, "grad_norm": 0.9140625, "learning_rate": 0.000184420917048746, "loss": 4.3519, "step": 5218 }, { "epoch": 0.5411479563732511, "grad_norm": 0.79296875, "learning_rate": 0.00018441509421042898, "loss": 4.3588, "step": 5219 }, { "epoch": 0.5412516444277391, "grad_norm": 0.7421875, "learning_rate": 0.00018440927037610306, "loss": 4.3698, "step": 5220 }, { "epoch": 0.5413553324822272, "grad_norm": 0.90625, "learning_rate": 0.000184403445545837, "loss": 4.3532, "step": 5221 }, { "epoch": 0.5414590205367152, "grad_norm": 0.6875, "learning_rate": 0.00018439761971969948, "loss": 4.3166, "step": 5222 }, { "epoch": 0.5415627085912034, "grad_norm": 0.84765625, "learning_rate": 0.0001843917928977593, "loss": 4.3754, "step": 5223 }, { "epoch": 0.5416663966456914, "grad_norm": 0.91015625, "learning_rate": 0.00018438596508008512, "loss": 4.3262, "step": 5224 }, { "epoch": 0.5417700847001795, "grad_norm": 0.69921875, "learning_rate": 0.00018438013626674576, "loss": 4.378, "step": 5225 }, { "epoch": 0.5418737727546676, "grad_norm": 0.9140625, "learning_rate": 0.00018437430645781, "loss": 4.3435, "step": 5226 }, { "epoch": 0.5419774608091557, "grad_norm": 0.90234375, "learning_rate": 0.00018436847565334662, "loss": 4.368, "step": 5227 }, { "epoch": 0.5420811488636438, "grad_norm": 0.86328125, "learning_rate": 0.00018436264385342441, "loss": 4.3494, "step": 5228 }, { "epoch": 0.5421848369181318, "grad_norm": 0.75, "learning_rate": 0.00018435681105811216, "loss": 4.3826, "step": 5229 }, { "epoch": 0.5422885249726199, "grad_norm": 1.140625, "learning_rate": 0.0001843509772674787, "loss": 4.3497, "step": 5230 }, { "epoch": 0.5423922130271079, "grad_norm": 0.74609375, "learning_rate": 0.0001843451424815929, "loss": 4.3584, "step": 5231 }, { "epoch": 0.5424959010815961, "grad_norm": 0.8203125, "learning_rate": 0.00018433930670052354, "loss": 4.3105, "step": 5232 }, { "epoch": 0.5425995891360841, "grad_norm": 0.84375, "learning_rate": 0.00018433346992433955, "loss": 4.3521, "step": 5233 }, { "epoch": 0.5427032771905722, "grad_norm": 0.80859375, "learning_rate": 0.00018432763215310972, "loss": 4.3291, "step": 5234 }, { "epoch": 0.5428069652450602, "grad_norm": 0.796875, "learning_rate": 0.000184321793386903, "loss": 4.3466, "step": 5235 }, { "epoch": 0.5429106532995484, "grad_norm": 0.89453125, "learning_rate": 0.00018431595362578825, "loss": 4.337, "step": 5236 }, { "epoch": 0.5430143413540364, "grad_norm": 0.703125, "learning_rate": 0.00018431011286983436, "loss": 4.3597, "step": 5237 }, { "epoch": 0.5431180294085245, "grad_norm": 0.90625, "learning_rate": 0.00018430427111911025, "loss": 4.3259, "step": 5238 }, { "epoch": 0.5432217174630125, "grad_norm": 0.80859375, "learning_rate": 0.00018429842837368486, "loss": 4.357, "step": 5239 }, { "epoch": 0.5433254055175006, "grad_norm": 0.796875, "learning_rate": 0.00018429258463362714, "loss": 4.4306, "step": 5240 }, { "epoch": 0.5434290935719887, "grad_norm": 0.85546875, "learning_rate": 0.00018428673989900603, "loss": 4.3846, "step": 5241 }, { "epoch": 0.5435327816264768, "grad_norm": 0.84765625, "learning_rate": 0.00018428089416989044, "loss": 4.3188, "step": 5242 }, { "epoch": 0.5436364696809648, "grad_norm": 0.6875, "learning_rate": 0.00018427504744634941, "loss": 4.3628, "step": 5243 }, { "epoch": 0.5437401577354529, "grad_norm": 0.79296875, "learning_rate": 0.0001842691997284519, "loss": 4.338, "step": 5244 }, { "epoch": 0.5438438457899409, "grad_norm": 0.7578125, "learning_rate": 0.00018426335101626694, "loss": 4.3442, "step": 5245 }, { "epoch": 0.5439475338444291, "grad_norm": 0.84765625, "learning_rate": 0.00018425750130986347, "loss": 4.3254, "step": 5246 }, { "epoch": 0.5440512218989171, "grad_norm": 0.76953125, "learning_rate": 0.00018425165060931056, "loss": 4.3504, "step": 5247 }, { "epoch": 0.5441549099534052, "grad_norm": 0.7421875, "learning_rate": 0.0001842457989146772, "loss": 4.3441, "step": 5248 }, { "epoch": 0.5442585980078932, "grad_norm": 0.765625, "learning_rate": 0.0001842399462260325, "loss": 4.3201, "step": 5249 }, { "epoch": 0.5443622860623814, "grad_norm": 0.6640625, "learning_rate": 0.0001842340925434455, "loss": 4.311, "step": 5250 }, { "epoch": 0.5444659741168694, "grad_norm": 0.77734375, "learning_rate": 0.0001842282378669852, "loss": 4.4093, "step": 5251 }, { "epoch": 0.5445696621713575, "grad_norm": 0.65625, "learning_rate": 0.00018422238219672078, "loss": 4.372, "step": 5252 }, { "epoch": 0.5446733502258455, "grad_norm": 0.73046875, "learning_rate": 0.00018421652553272125, "loss": 4.3411, "step": 5253 }, { "epoch": 0.5447770382803336, "grad_norm": 0.73046875, "learning_rate": 0.00018421066787505574, "loss": 4.3217, "step": 5254 }, { "epoch": 0.5448807263348217, "grad_norm": 0.75, "learning_rate": 0.00018420480922379335, "loss": 4.352, "step": 5255 }, { "epoch": 0.5449844143893098, "grad_norm": 0.734375, "learning_rate": 0.0001841989495790032, "loss": 4.3746, "step": 5256 }, { "epoch": 0.5450881024437978, "grad_norm": 0.75, "learning_rate": 0.0001841930889407545, "loss": 4.3087, "step": 5257 }, { "epoch": 0.5451917904982859, "grad_norm": 0.88671875, "learning_rate": 0.00018418722730911632, "loss": 4.3734, "step": 5258 }, { "epoch": 0.5452954785527739, "grad_norm": 0.73046875, "learning_rate": 0.00018418136468415785, "loss": 4.3285, "step": 5259 }, { "epoch": 0.5453991666072621, "grad_norm": 0.7578125, "learning_rate": 0.00018417550106594827, "loss": 4.3549, "step": 5260 }, { "epoch": 0.5455028546617501, "grad_norm": 0.75, "learning_rate": 0.00018416963645455673, "loss": 4.3472, "step": 5261 }, { "epoch": 0.5456065427162382, "grad_norm": 0.7890625, "learning_rate": 0.00018416377085005248, "loss": 4.3737, "step": 5262 }, { "epoch": 0.5457102307707262, "grad_norm": 0.79296875, "learning_rate": 0.00018415790425250467, "loss": 4.327, "step": 5263 }, { "epoch": 0.5458139188252144, "grad_norm": 0.796875, "learning_rate": 0.00018415203666198257, "loss": 4.3347, "step": 5264 }, { "epoch": 0.5459176068797024, "grad_norm": 0.73828125, "learning_rate": 0.00018414616807855538, "loss": 4.2823, "step": 5265 }, { "epoch": 0.5460212949341905, "grad_norm": 0.79296875, "learning_rate": 0.00018414029850229236, "loss": 4.3409, "step": 5266 }, { "epoch": 0.5461249829886785, "grad_norm": 0.76171875, "learning_rate": 0.00018413442793326277, "loss": 4.3065, "step": 5267 }, { "epoch": 0.5462286710431666, "grad_norm": 0.81640625, "learning_rate": 0.00018412855637153582, "loss": 4.3402, "step": 5268 }, { "epoch": 0.5463323590976547, "grad_norm": 0.81640625, "learning_rate": 0.00018412268381718088, "loss": 4.3167, "step": 5269 }, { "epoch": 0.5464360471521428, "grad_norm": 0.78515625, "learning_rate": 0.00018411681027026715, "loss": 4.3548, "step": 5270 }, { "epoch": 0.5465397352066309, "grad_norm": 0.71484375, "learning_rate": 0.000184110935730864, "loss": 4.34, "step": 5271 }, { "epoch": 0.5466434232611189, "grad_norm": 0.74609375, "learning_rate": 0.00018410506019904074, "loss": 4.3171, "step": 5272 }, { "epoch": 0.546747111315607, "grad_norm": 0.7265625, "learning_rate": 0.00018409918367486662, "loss": 4.3388, "step": 5273 }, { "epoch": 0.5468507993700951, "grad_norm": 0.9453125, "learning_rate": 0.00018409330615841107, "loss": 4.3309, "step": 5274 }, { "epoch": 0.5469544874245832, "grad_norm": 0.84375, "learning_rate": 0.00018408742764974338, "loss": 4.3119, "step": 5275 }, { "epoch": 0.5470581754790712, "grad_norm": 0.87890625, "learning_rate": 0.00018408154814893296, "loss": 4.3519, "step": 5276 }, { "epoch": 0.5471618635335593, "grad_norm": 0.79296875, "learning_rate": 0.00018407566765604911, "loss": 4.3591, "step": 5277 }, { "epoch": 0.5472655515880473, "grad_norm": 1.0078125, "learning_rate": 0.00018406978617116124, "loss": 4.3208, "step": 5278 }, { "epoch": 0.5473692396425355, "grad_norm": 0.80859375, "learning_rate": 0.00018406390369433878, "loss": 4.3438, "step": 5279 }, { "epoch": 0.5474729276970235, "grad_norm": 0.74609375, "learning_rate": 0.00018405802022565114, "loss": 4.3529, "step": 5280 }, { "epoch": 0.5475766157515116, "grad_norm": 0.984375, "learning_rate": 0.00018405213576516772, "loss": 4.3476, "step": 5281 }, { "epoch": 0.5476803038059996, "grad_norm": 0.90234375, "learning_rate": 0.0001840462503129579, "loss": 4.329, "step": 5282 }, { "epoch": 0.5477839918604878, "grad_norm": 0.8203125, "learning_rate": 0.00018404036386909117, "loss": 4.3186, "step": 5283 }, { "epoch": 0.5478876799149758, "grad_norm": 0.95703125, "learning_rate": 0.00018403447643363703, "loss": 4.3454, "step": 5284 }, { "epoch": 0.5479913679694639, "grad_norm": 0.82421875, "learning_rate": 0.00018402858800666485, "loss": 4.3464, "step": 5285 }, { "epoch": 0.5480950560239519, "grad_norm": 0.75390625, "learning_rate": 0.00018402269858824416, "loss": 4.3919, "step": 5286 }, { "epoch": 0.54819874407844, "grad_norm": 1.0078125, "learning_rate": 0.00018401680817844445, "loss": 4.3347, "step": 5287 }, { "epoch": 0.5483024321329281, "grad_norm": 0.87109375, "learning_rate": 0.00018401091677733522, "loss": 4.3367, "step": 5288 }, { "epoch": 0.5484061201874162, "grad_norm": 0.6796875, "learning_rate": 0.00018400502438498597, "loss": 4.3219, "step": 5289 }, { "epoch": 0.5485098082419042, "grad_norm": 1.0390625, "learning_rate": 0.00018399913100146625, "loss": 4.3454, "step": 5290 }, { "epoch": 0.5486134962963923, "grad_norm": 0.84375, "learning_rate": 0.00018399323662684553, "loss": 4.3655, "step": 5291 }, { "epoch": 0.5487171843508803, "grad_norm": 0.75, "learning_rate": 0.00018398734126119344, "loss": 4.385, "step": 5292 }, { "epoch": 0.5488208724053685, "grad_norm": 0.7421875, "learning_rate": 0.00018398144490457946, "loss": 4.3444, "step": 5293 }, { "epoch": 0.5489245604598565, "grad_norm": 0.7734375, "learning_rate": 0.00018397554755707324, "loss": 4.3266, "step": 5294 }, { "epoch": 0.5490282485143446, "grad_norm": 0.56640625, "learning_rate": 0.00018396964921874433, "loss": 4.3489, "step": 5295 }, { "epoch": 0.5491319365688326, "grad_norm": 0.7421875, "learning_rate": 0.0001839637498896623, "loss": 4.2763, "step": 5296 }, { "epoch": 0.5492356246233208, "grad_norm": 0.6171875, "learning_rate": 0.00018395784956989677, "loss": 4.3416, "step": 5297 }, { "epoch": 0.5493393126778088, "grad_norm": 0.68359375, "learning_rate": 0.0001839519482595174, "loss": 4.3483, "step": 5298 }, { "epoch": 0.5494430007322969, "grad_norm": 0.65625, "learning_rate": 0.00018394604595859372, "loss": 4.3213, "step": 5299 }, { "epoch": 0.5495466887867849, "grad_norm": 0.671875, "learning_rate": 0.00018394014266719547, "loss": 4.2955, "step": 5300 }, { "epoch": 0.549650376841273, "grad_norm": 0.703125, "learning_rate": 0.00018393423838539225, "loss": 4.3245, "step": 5301 }, { "epoch": 0.5497540648957611, "grad_norm": 0.71484375, "learning_rate": 0.00018392833311325376, "loss": 4.3408, "step": 5302 }, { "epoch": 0.5498577529502492, "grad_norm": 0.73828125, "learning_rate": 0.00018392242685084963, "loss": 4.3355, "step": 5303 }, { "epoch": 0.5499614410047372, "grad_norm": 0.6875, "learning_rate": 0.0001839165195982496, "loss": 4.3566, "step": 5304 }, { "epoch": 0.5500651290592253, "grad_norm": 0.69140625, "learning_rate": 0.00018391061135552335, "loss": 4.3398, "step": 5305 }, { "epoch": 0.5501688171137133, "grad_norm": 0.6953125, "learning_rate": 0.00018390470212274057, "loss": 4.3165, "step": 5306 }, { "epoch": 0.5502725051682015, "grad_norm": 0.71484375, "learning_rate": 0.00018389879189997097, "loss": 4.353, "step": 5307 }, { "epoch": 0.5503761932226895, "grad_norm": 0.69140625, "learning_rate": 0.0001838928806872843, "loss": 4.3768, "step": 5308 }, { "epoch": 0.5504798812771776, "grad_norm": 0.7421875, "learning_rate": 0.00018388696848475034, "loss": 4.3385, "step": 5309 }, { "epoch": 0.5505835693316656, "grad_norm": 0.69921875, "learning_rate": 0.00018388105529243886, "loss": 4.3456, "step": 5310 }, { "epoch": 0.5506872573861538, "grad_norm": 0.80078125, "learning_rate": 0.00018387514111041956, "loss": 4.3464, "step": 5311 }, { "epoch": 0.5507909454406418, "grad_norm": 0.64453125, "learning_rate": 0.00018386922593876224, "loss": 4.3536, "step": 5312 }, { "epoch": 0.5508946334951299, "grad_norm": 0.8515625, "learning_rate": 0.00018386330977753675, "loss": 4.3569, "step": 5313 }, { "epoch": 0.5509983215496179, "grad_norm": 0.7578125, "learning_rate": 0.00018385739262681284, "loss": 4.3348, "step": 5314 }, { "epoch": 0.551102009604106, "grad_norm": 0.7578125, "learning_rate": 0.00018385147448666032, "loss": 4.3592, "step": 5315 }, { "epoch": 0.5512056976585942, "grad_norm": 0.7421875, "learning_rate": 0.00018384555535714902, "loss": 4.3617, "step": 5316 }, { "epoch": 0.5513093857130822, "grad_norm": 0.796875, "learning_rate": 0.0001838396352383488, "loss": 4.3339, "step": 5317 }, { "epoch": 0.5514130737675703, "grad_norm": 0.87890625, "learning_rate": 0.00018383371413032955, "loss": 4.3743, "step": 5318 }, { "epoch": 0.5515167618220583, "grad_norm": 0.7578125, "learning_rate": 0.00018382779203316108, "loss": 4.3506, "step": 5319 }, { "epoch": 0.5516204498765465, "grad_norm": 0.87890625, "learning_rate": 0.00018382186894691326, "loss": 4.3465, "step": 5320 }, { "epoch": 0.5517241379310345, "grad_norm": 0.84765625, "learning_rate": 0.00018381594487165597, "loss": 4.3386, "step": 5321 }, { "epoch": 0.5518278259855226, "grad_norm": 0.890625, "learning_rate": 0.00018381001980745916, "loss": 4.3716, "step": 5322 }, { "epoch": 0.5519315140400106, "grad_norm": 0.89453125, "learning_rate": 0.00018380409375439268, "loss": 4.3494, "step": 5323 }, { "epoch": 0.5520352020944987, "grad_norm": 0.9140625, "learning_rate": 0.0001837981667125265, "loss": 4.3477, "step": 5324 }, { "epoch": 0.5521388901489868, "grad_norm": 0.7890625, "learning_rate": 0.0001837922386819305, "loss": 4.3209, "step": 5325 }, { "epoch": 0.5522425782034749, "grad_norm": 0.94140625, "learning_rate": 0.0001837863096626747, "loss": 4.3661, "step": 5326 }, { "epoch": 0.5523462662579629, "grad_norm": 0.828125, "learning_rate": 0.00018378037965482898, "loss": 4.3566, "step": 5327 }, { "epoch": 0.552449954312451, "grad_norm": 0.68359375, "learning_rate": 0.00018377444865846337, "loss": 4.3516, "step": 5328 }, { "epoch": 0.552553642366939, "grad_norm": 0.9296875, "learning_rate": 0.00018376851667364777, "loss": 4.3606, "step": 5329 }, { "epoch": 0.5526573304214272, "grad_norm": 0.94921875, "learning_rate": 0.00018376258370045228, "loss": 4.3479, "step": 5330 }, { "epoch": 0.5527610184759152, "grad_norm": 0.66796875, "learning_rate": 0.0001837566497389468, "loss": 4.3693, "step": 5331 }, { "epoch": 0.5528647065304033, "grad_norm": 0.9296875, "learning_rate": 0.0001837507147892014, "loss": 4.3271, "step": 5332 }, { "epoch": 0.5529683945848913, "grad_norm": 0.8359375, "learning_rate": 0.0001837447788512861, "loss": 4.3483, "step": 5333 }, { "epoch": 0.5530720826393795, "grad_norm": 0.76953125, "learning_rate": 0.00018373884192527092, "loss": 4.3756, "step": 5334 }, { "epoch": 0.5531757706938675, "grad_norm": 0.8984375, "learning_rate": 0.00018373290401122594, "loss": 4.3084, "step": 5335 }, { "epoch": 0.5532794587483556, "grad_norm": 0.8515625, "learning_rate": 0.0001837269651092212, "loss": 4.3144, "step": 5336 }, { "epoch": 0.5533831468028436, "grad_norm": 0.8359375, "learning_rate": 0.00018372102521932676, "loss": 4.3555, "step": 5337 }, { "epoch": 0.5534868348573317, "grad_norm": 0.96875, "learning_rate": 0.0001837150843416127, "loss": 4.3477, "step": 5338 }, { "epoch": 0.5535905229118198, "grad_norm": 0.9296875, "learning_rate": 0.00018370914247614918, "loss": 4.3296, "step": 5339 }, { "epoch": 0.5536942109663079, "grad_norm": 1.0078125, "learning_rate": 0.0001837031996230062, "loss": 4.3393, "step": 5340 }, { "epoch": 0.5537978990207959, "grad_norm": 0.94140625, "learning_rate": 0.00018369725578225402, "loss": 4.3316, "step": 5341 }, { "epoch": 0.553901587075284, "grad_norm": 1.0703125, "learning_rate": 0.00018369131095396263, "loss": 4.3757, "step": 5342 }, { "epoch": 0.554005275129772, "grad_norm": 0.9296875, "learning_rate": 0.00018368536513820228, "loss": 4.3632, "step": 5343 }, { "epoch": 0.5541089631842602, "grad_norm": 1.015625, "learning_rate": 0.00018367941833504308, "loss": 4.354, "step": 5344 }, { "epoch": 0.5542126512387482, "grad_norm": 0.80859375, "learning_rate": 0.00018367347054455517, "loss": 4.3277, "step": 5345 }, { "epoch": 0.5543163392932363, "grad_norm": 1.3046875, "learning_rate": 0.00018366752176680878, "loss": 4.3754, "step": 5346 }, { "epoch": 0.5544200273477243, "grad_norm": 0.828125, "learning_rate": 0.00018366157200187404, "loss": 4.3197, "step": 5347 }, { "epoch": 0.5545237154022125, "grad_norm": 1.359375, "learning_rate": 0.00018365562124982122, "loss": 4.3296, "step": 5348 }, { "epoch": 0.5546274034567005, "grad_norm": 0.90625, "learning_rate": 0.00018364966951072047, "loss": 4.3743, "step": 5349 }, { "epoch": 0.5547310915111886, "grad_norm": 1.7890625, "learning_rate": 0.00018364371678464205, "loss": 4.3345, "step": 5350 }, { "epoch": 0.5548347795656766, "grad_norm": 1.4765625, "learning_rate": 0.00018363776307165619, "loss": 4.3296, "step": 5351 }, { "epoch": 0.5549384676201647, "grad_norm": 1.859375, "learning_rate": 0.00018363180837183314, "loss": 4.3787, "step": 5352 }, { "epoch": 0.5550421556746528, "grad_norm": 1.71875, "learning_rate": 0.00018362585268524314, "loss": 4.2736, "step": 5353 }, { "epoch": 0.5551458437291409, "grad_norm": 1.4921875, "learning_rate": 0.0001836198960119565, "loss": 4.3756, "step": 5354 }, { "epoch": 0.5552495317836289, "grad_norm": 1.34375, "learning_rate": 0.00018361393835204342, "loss": 4.3685, "step": 5355 }, { "epoch": 0.555353219838117, "grad_norm": 1.71875, "learning_rate": 0.0001836079797055743, "loss": 4.3604, "step": 5356 }, { "epoch": 0.555456907892605, "grad_norm": 1.3359375, "learning_rate": 0.0001836020200726194, "loss": 4.3392, "step": 5357 }, { "epoch": 0.5555605959470932, "grad_norm": 2.3125, "learning_rate": 0.00018359605945324903, "loss": 4.3684, "step": 5358 }, { "epoch": 0.5556642840015812, "grad_norm": 2.1875, "learning_rate": 0.0001835900978475335, "loss": 4.3533, "step": 5359 }, { "epoch": 0.5557679720560693, "grad_norm": 1.15625, "learning_rate": 0.0001835841352555432, "loss": 4.3828, "step": 5360 }, { "epoch": 0.5558716601105574, "grad_norm": 1.3984375, "learning_rate": 0.00018357817167734844, "loss": 4.3377, "step": 5361 }, { "epoch": 0.5559753481650455, "grad_norm": 1.1015625, "learning_rate": 0.00018357220711301962, "loss": 4.3375, "step": 5362 }, { "epoch": 0.5560790362195336, "grad_norm": 1.203125, "learning_rate": 0.00018356624156262708, "loss": 4.3466, "step": 5363 }, { "epoch": 0.5561827242740216, "grad_norm": 0.96484375, "learning_rate": 0.0001835602750262412, "loss": 4.3279, "step": 5364 }, { "epoch": 0.5562864123285097, "grad_norm": 1.3203125, "learning_rate": 0.00018355430750393244, "loss": 4.3026, "step": 5365 }, { "epoch": 0.5563901003829977, "grad_norm": 0.92578125, "learning_rate": 0.00018354833899577116, "loss": 4.3357, "step": 5366 }, { "epoch": 0.5564937884374859, "grad_norm": 1.4609375, "learning_rate": 0.00018354236950182782, "loss": 4.3593, "step": 5367 }, { "epoch": 0.5565974764919739, "grad_norm": 1.09375, "learning_rate": 0.0001835363990221728, "loss": 4.3321, "step": 5368 }, { "epoch": 0.556701164546462, "grad_norm": 1.984375, "learning_rate": 0.00018353042755687654, "loss": 4.3385, "step": 5369 }, { "epoch": 0.55680485260095, "grad_norm": 1.8671875, "learning_rate": 0.0001835244551060096, "loss": 4.3639, "step": 5370 }, { "epoch": 0.5569085406554382, "grad_norm": 1.4140625, "learning_rate": 0.00018351848166964232, "loss": 4.3301, "step": 5371 }, { "epoch": 0.5570122287099262, "grad_norm": 1.3984375, "learning_rate": 0.00018351250724784528, "loss": 4.3551, "step": 5372 }, { "epoch": 0.5571159167644143, "grad_norm": 1.2421875, "learning_rate": 0.00018350653184068892, "loss": 4.3526, "step": 5373 }, { "epoch": 0.5572196048189023, "grad_norm": 1.1328125, "learning_rate": 0.0001835005554482437, "loss": 4.3247, "step": 5374 }, { "epoch": 0.5573232928733904, "grad_norm": 1.3671875, "learning_rate": 0.00018349457807058026, "loss": 4.366, "step": 5375 }, { "epoch": 0.5574269809278785, "grad_norm": 1.140625, "learning_rate": 0.00018348859970776903, "loss": 4.3393, "step": 5376 }, { "epoch": 0.5575306689823666, "grad_norm": 1.6875, "learning_rate": 0.0001834826203598806, "loss": 4.3672, "step": 5377 }, { "epoch": 0.5576343570368546, "grad_norm": 1.359375, "learning_rate": 0.00018347664002698545, "loss": 4.3426, "step": 5378 }, { "epoch": 0.5577380450913427, "grad_norm": 1.984375, "learning_rate": 0.00018347065870915423, "loss": 4.3356, "step": 5379 }, { "epoch": 0.5578417331458307, "grad_norm": 1.7890625, "learning_rate": 0.00018346467640645745, "loss": 4.3529, "step": 5380 }, { "epoch": 0.5579454212003189, "grad_norm": 1.8203125, "learning_rate": 0.0001834586931189657, "loss": 4.3789, "step": 5381 }, { "epoch": 0.5580491092548069, "grad_norm": 1.609375, "learning_rate": 0.0001834527088467496, "loss": 4.3493, "step": 5382 }, { "epoch": 0.558152797309295, "grad_norm": 1.765625, "learning_rate": 0.00018344672358987975, "loss": 4.2857, "step": 5383 }, { "epoch": 0.558256485363783, "grad_norm": 1.5234375, "learning_rate": 0.00018344073734842677, "loss": 4.3451, "step": 5384 }, { "epoch": 0.5583601734182712, "grad_norm": 1.9609375, "learning_rate": 0.00018343475012246133, "loss": 4.3336, "step": 5385 }, { "epoch": 0.5584638614727592, "grad_norm": 1.796875, "learning_rate": 0.00018342876191205395, "loss": 4.2956, "step": 5386 }, { "epoch": 0.5585675495272473, "grad_norm": 1.5625, "learning_rate": 0.00018342277271727543, "loss": 4.2989, "step": 5387 }, { "epoch": 0.5586712375817353, "grad_norm": 1.4765625, "learning_rate": 0.00018341678253819637, "loss": 4.3567, "step": 5388 }, { "epoch": 0.5587749256362234, "grad_norm": 1.5078125, "learning_rate": 0.00018341079137488745, "loss": 4.3328, "step": 5389 }, { "epoch": 0.5588786136907115, "grad_norm": 1.3359375, "learning_rate": 0.00018340479922741932, "loss": 4.3779, "step": 5390 }, { "epoch": 0.5589823017451996, "grad_norm": 1.6640625, "learning_rate": 0.00018339880609586276, "loss": 4.2602, "step": 5391 }, { "epoch": 0.5590859897996876, "grad_norm": 1.4921875, "learning_rate": 0.00018339281198028845, "loss": 4.3444, "step": 5392 }, { "epoch": 0.5591896778541757, "grad_norm": 1.7734375, "learning_rate": 0.00018338681688076709, "loss": 4.3121, "step": 5393 }, { "epoch": 0.5592933659086637, "grad_norm": 1.6875, "learning_rate": 0.0001833808207973694, "loss": 4.3673, "step": 5394 }, { "epoch": 0.5593970539631519, "grad_norm": 1.578125, "learning_rate": 0.00018337482373016623, "loss": 4.3323, "step": 5395 }, { "epoch": 0.5595007420176399, "grad_norm": 1.4609375, "learning_rate": 0.00018336882567922822, "loss": 4.3443, "step": 5396 }, { "epoch": 0.559604430072128, "grad_norm": 1.7890625, "learning_rate": 0.00018336282664462622, "loss": 4.3561, "step": 5397 }, { "epoch": 0.559708118126616, "grad_norm": 1.578125, "learning_rate": 0.00018335682662643099, "loss": 4.3319, "step": 5398 }, { "epoch": 0.5598118061811042, "grad_norm": 1.7109375, "learning_rate": 0.0001833508256247133, "loss": 4.3546, "step": 5399 }, { "epoch": 0.5599154942355922, "grad_norm": 1.671875, "learning_rate": 0.00018334482363954396, "loss": 4.3268, "step": 5400 }, { "epoch": 0.5600191822900803, "grad_norm": 1.4375, "learning_rate": 0.00018333882067099385, "loss": 4.3379, "step": 5401 }, { "epoch": 0.5601228703445683, "grad_norm": 1.390625, "learning_rate": 0.0001833328167191337, "loss": 4.3464, "step": 5402 }, { "epoch": 0.5602265583990564, "grad_norm": 1.5546875, "learning_rate": 0.0001833268117840344, "loss": 4.3033, "step": 5403 }, { "epoch": 0.5603302464535445, "grad_norm": 1.3359375, "learning_rate": 0.0001833208058657668, "loss": 4.3792, "step": 5404 }, { "epoch": 0.5604339345080326, "grad_norm": 1.6171875, "learning_rate": 0.0001833147989644018, "loss": 4.3486, "step": 5405 }, { "epoch": 0.5605376225625207, "grad_norm": 1.4609375, "learning_rate": 0.00018330879108001023, "loss": 4.3596, "step": 5406 }, { "epoch": 0.5606413106170087, "grad_norm": 1.7421875, "learning_rate": 0.00018330278221266298, "loss": 4.3626, "step": 5407 }, { "epoch": 0.5607449986714969, "grad_norm": 1.609375, "learning_rate": 0.00018329677236243096, "loss": 4.3301, "step": 5408 }, { "epoch": 0.5608486867259849, "grad_norm": 1.578125, "learning_rate": 0.00018329076152938506, "loss": 4.3098, "step": 5409 }, { "epoch": 0.560952374780473, "grad_norm": 1.4140625, "learning_rate": 0.00018328474971359622, "loss": 4.3482, "step": 5410 }, { "epoch": 0.561056062834961, "grad_norm": 1.6953125, "learning_rate": 0.00018327873691513537, "loss": 4.3548, "step": 5411 }, { "epoch": 0.5611597508894491, "grad_norm": 1.6484375, "learning_rate": 0.00018327272313407343, "loss": 4.368, "step": 5412 }, { "epoch": 0.5612634389439372, "grad_norm": 1.53125, "learning_rate": 0.00018326670837048143, "loss": 4.3198, "step": 5413 }, { "epoch": 0.5613671269984253, "grad_norm": 1.484375, "learning_rate": 0.00018326069262443025, "loss": 4.3299, "step": 5414 }, { "epoch": 0.5614708150529133, "grad_norm": 1.4609375, "learning_rate": 0.00018325467589599094, "loss": 4.3464, "step": 5415 }, { "epoch": 0.5615745031074014, "grad_norm": 1.3828125, "learning_rate": 0.00018324865818523445, "loss": 4.3521, "step": 5416 }, { "epoch": 0.5616781911618894, "grad_norm": 1.5390625, "learning_rate": 0.00018324263949223178, "loss": 4.3357, "step": 5417 }, { "epoch": 0.5617818792163776, "grad_norm": 1.5078125, "learning_rate": 0.00018323661981705394, "loss": 4.3585, "step": 5418 }, { "epoch": 0.5618855672708656, "grad_norm": 1.5, "learning_rate": 0.000183230599159772, "loss": 4.3358, "step": 5419 }, { "epoch": 0.5619892553253537, "grad_norm": 1.359375, "learning_rate": 0.00018322457752045695, "loss": 4.2999, "step": 5420 }, { "epoch": 0.5620929433798417, "grad_norm": 1.5859375, "learning_rate": 0.00018321855489917988, "loss": 4.3694, "step": 5421 }, { "epoch": 0.5621966314343299, "grad_norm": 1.453125, "learning_rate": 0.0001832125312960118, "loss": 4.3505, "step": 5422 }, { "epoch": 0.5623003194888179, "grad_norm": 1.8203125, "learning_rate": 0.00018320650671102383, "loss": 4.3697, "step": 5423 }, { "epoch": 0.562404007543306, "grad_norm": 1.671875, "learning_rate": 0.00018320048114428702, "loss": 4.3777, "step": 5424 }, { "epoch": 0.562507695597794, "grad_norm": 1.4453125, "learning_rate": 0.00018319445459587247, "loss": 4.3195, "step": 5425 }, { "epoch": 0.5626113836522821, "grad_norm": 1.3828125, "learning_rate": 0.0001831884270658513, "loss": 4.3189, "step": 5426 }, { "epoch": 0.5627150717067702, "grad_norm": 1.75, "learning_rate": 0.00018318239855429465, "loss": 4.3681, "step": 5427 }, { "epoch": 0.5628187597612583, "grad_norm": 1.59375, "learning_rate": 0.0001831763690612736, "loss": 4.3512, "step": 5428 }, { "epoch": 0.5629224478157463, "grad_norm": 1.6796875, "learning_rate": 0.00018317033858685934, "loss": 4.3531, "step": 5429 }, { "epoch": 0.5630261358702344, "grad_norm": 1.671875, "learning_rate": 0.000183164307131123, "loss": 4.3556, "step": 5430 }, { "epoch": 0.5631298239247224, "grad_norm": 1.515625, "learning_rate": 0.0001831582746941357, "loss": 4.3025, "step": 5431 }, { "epoch": 0.5632335119792106, "grad_norm": 1.4375, "learning_rate": 0.0001831522412759687, "loss": 4.3599, "step": 5432 }, { "epoch": 0.5633372000336986, "grad_norm": 1.6015625, "learning_rate": 0.00018314620687669316, "loss": 4.3102, "step": 5433 }, { "epoch": 0.5634408880881867, "grad_norm": 1.46875, "learning_rate": 0.00018314017149638026, "loss": 4.3832, "step": 5434 }, { "epoch": 0.5635445761426747, "grad_norm": 1.390625, "learning_rate": 0.0001831341351351012, "loss": 4.3497, "step": 5435 }, { "epoch": 0.5636482641971629, "grad_norm": 1.296875, "learning_rate": 0.00018312809779292722, "loss": 4.3164, "step": 5436 }, { "epoch": 0.5637519522516509, "grad_norm": 1.4453125, "learning_rate": 0.00018312205946992958, "loss": 4.3224, "step": 5437 }, { "epoch": 0.563855640306139, "grad_norm": 1.3359375, "learning_rate": 0.00018311602016617945, "loss": 4.3528, "step": 5438 }, { "epoch": 0.563959328360627, "grad_norm": 1.546875, "learning_rate": 0.0001831099798817482, "loss": 4.339, "step": 5439 }, { "epoch": 0.5640630164151151, "grad_norm": 1.390625, "learning_rate": 0.000183103938616707, "loss": 4.3813, "step": 5440 }, { "epoch": 0.5641667044696032, "grad_norm": 1.4921875, "learning_rate": 0.0001830978963711272, "loss": 4.3508, "step": 5441 }, { "epoch": 0.5642703925240913, "grad_norm": 1.359375, "learning_rate": 0.00018309185314508003, "loss": 4.3509, "step": 5442 }, { "epoch": 0.5643740805785793, "grad_norm": 1.640625, "learning_rate": 0.00018308580893863685, "loss": 4.3256, "step": 5443 }, { "epoch": 0.5644777686330674, "grad_norm": 1.4296875, "learning_rate": 0.00018307976375186896, "loss": 4.3288, "step": 5444 }, { "epoch": 0.5645814566875554, "grad_norm": 1.7109375, "learning_rate": 0.00018307371758484765, "loss": 4.3302, "step": 5445 }, { "epoch": 0.5646851447420436, "grad_norm": 1.5859375, "learning_rate": 0.00018306767043764429, "loss": 4.3248, "step": 5446 }, { "epoch": 0.5647888327965316, "grad_norm": 1.4609375, "learning_rate": 0.0001830616223103302, "loss": 4.3706, "step": 5447 }, { "epoch": 0.5648925208510197, "grad_norm": 1.3359375, "learning_rate": 0.00018305557320297678, "loss": 4.3251, "step": 5448 }, { "epoch": 0.5649962089055078, "grad_norm": 1.453125, "learning_rate": 0.00018304952311565544, "loss": 4.3554, "step": 5449 }, { "epoch": 0.5650998969599959, "grad_norm": 1.28125, "learning_rate": 0.00018304347204843748, "loss": 4.3653, "step": 5450 }, { "epoch": 0.565203585014484, "grad_norm": 1.7734375, "learning_rate": 0.00018303742000139433, "loss": 4.321, "step": 5451 }, { "epoch": 0.565307273068972, "grad_norm": 1.703125, "learning_rate": 0.00018303136697459736, "loss": 4.3021, "step": 5452 }, { "epoch": 0.5654109611234601, "grad_norm": 1.546875, "learning_rate": 0.0001830253129681181, "loss": 4.3297, "step": 5453 }, { "epoch": 0.5655146491779481, "grad_norm": 1.4609375, "learning_rate": 0.0001830192579820279, "loss": 4.3235, "step": 5454 }, { "epoch": 0.5656183372324363, "grad_norm": 1.4609375, "learning_rate": 0.00018301320201639815, "loss": 4.3163, "step": 5455 }, { "epoch": 0.5657220252869243, "grad_norm": 1.359375, "learning_rate": 0.0001830071450713004, "loss": 4.3826, "step": 5456 }, { "epoch": 0.5658257133414124, "grad_norm": 1.609375, "learning_rate": 0.0001830010871468061, "loss": 4.3021, "step": 5457 }, { "epoch": 0.5659294013959004, "grad_norm": 1.5546875, "learning_rate": 0.00018299502824298668, "loss": 4.2943, "step": 5458 }, { "epoch": 0.5660330894503885, "grad_norm": 1.359375, "learning_rate": 0.00018298896835991366, "loss": 4.3351, "step": 5459 }, { "epoch": 0.5661367775048766, "grad_norm": 1.2734375, "learning_rate": 0.00018298290749765853, "loss": 4.3269, "step": 5460 }, { "epoch": 0.5662404655593647, "grad_norm": 1.515625, "learning_rate": 0.00018297684565629283, "loss": 4.3372, "step": 5461 }, { "epoch": 0.5663441536138527, "grad_norm": 1.3671875, "learning_rate": 0.00018297078283588804, "loss": 4.3356, "step": 5462 }, { "epoch": 0.5664478416683408, "grad_norm": 1.5625, "learning_rate": 0.00018296471903651572, "loss": 4.3262, "step": 5463 }, { "epoch": 0.5665515297228289, "grad_norm": 1.4609375, "learning_rate": 0.0001829586542582474, "loss": 4.3409, "step": 5464 }, { "epoch": 0.566655217777317, "grad_norm": 1.5390625, "learning_rate": 0.0001829525885011547, "loss": 4.2715, "step": 5465 }, { "epoch": 0.566758905831805, "grad_norm": 1.40625, "learning_rate": 0.00018294652176530912, "loss": 4.325, "step": 5466 }, { "epoch": 0.5668625938862931, "grad_norm": 1.5625, "learning_rate": 0.00018294045405078225, "loss": 4.309, "step": 5467 }, { "epoch": 0.5669662819407811, "grad_norm": 1.421875, "learning_rate": 0.0001829343853576457, "loss": 4.347, "step": 5468 }, { "epoch": 0.5670699699952693, "grad_norm": 1.484375, "learning_rate": 0.00018292831568597105, "loss": 4.3978, "step": 5469 }, { "epoch": 0.5671736580497573, "grad_norm": 1.4140625, "learning_rate": 0.00018292224503582992, "loss": 4.2942, "step": 5470 }, { "epoch": 0.5672773461042454, "grad_norm": 1.578125, "learning_rate": 0.00018291617340729399, "loss": 4.3313, "step": 5471 }, { "epoch": 0.5673810341587334, "grad_norm": 1.390625, "learning_rate": 0.00018291010080043483, "loss": 4.344, "step": 5472 }, { "epoch": 0.5674847222132215, "grad_norm": 1.71875, "learning_rate": 0.0001829040272153242, "loss": 4.3467, "step": 5473 }, { "epoch": 0.5675884102677096, "grad_norm": 1.6015625, "learning_rate": 0.0001828979526520336, "loss": 4.3406, "step": 5474 }, { "epoch": 0.5676920983221977, "grad_norm": 1.640625, "learning_rate": 0.0001828918771106348, "loss": 4.3034, "step": 5475 }, { "epoch": 0.5677957863766857, "grad_norm": 1.53125, "learning_rate": 0.0001828858005911995, "loss": 4.3252, "step": 5476 }, { "epoch": 0.5678994744311738, "grad_norm": 1.3203125, "learning_rate": 0.00018287972309379932, "loss": 4.3511, "step": 5477 }, { "epoch": 0.5680031624856618, "grad_norm": 1.28125, "learning_rate": 0.00018287364461850604, "loss": 4.33, "step": 5478 }, { "epoch": 0.56810685054015, "grad_norm": 1.5625, "learning_rate": 0.00018286756516539137, "loss": 4.3105, "step": 5479 }, { "epoch": 0.568210538594638, "grad_norm": 1.4140625, "learning_rate": 0.000182861484734527, "loss": 4.2994, "step": 5480 }, { "epoch": 0.5683142266491261, "grad_norm": 1.65625, "learning_rate": 0.0001828554033259847, "loss": 4.3377, "step": 5481 }, { "epoch": 0.5684179147036141, "grad_norm": 1.5078125, "learning_rate": 0.00018284932093983624, "loss": 4.3631, "step": 5482 }, { "epoch": 0.5685216027581023, "grad_norm": 1.4296875, "learning_rate": 0.00018284323757615335, "loss": 4.3617, "step": 5483 }, { "epoch": 0.5686252908125903, "grad_norm": 1.34375, "learning_rate": 0.00018283715323500786, "loss": 4.3284, "step": 5484 }, { "epoch": 0.5687289788670784, "grad_norm": 1.3984375, "learning_rate": 0.0001828310679164715, "loss": 4.353, "step": 5485 }, { "epoch": 0.5688326669215664, "grad_norm": 1.203125, "learning_rate": 0.0001828249816206161, "loss": 4.36, "step": 5486 }, { "epoch": 0.5689363549760545, "grad_norm": 1.7890625, "learning_rate": 0.00018281889434751346, "loss": 4.372, "step": 5487 }, { "epoch": 0.5690400430305426, "grad_norm": 1.734375, "learning_rate": 0.0001828128060972354, "loss": 4.3062, "step": 5488 }, { "epoch": 0.5691437310850307, "grad_norm": 1.390625, "learning_rate": 0.00018280671686985377, "loss": 4.3524, "step": 5489 }, { "epoch": 0.5692474191395187, "grad_norm": 1.3359375, "learning_rate": 0.00018280062666544043, "loss": 4.3408, "step": 5490 }, { "epoch": 0.5693511071940068, "grad_norm": 1.2265625, "learning_rate": 0.00018279453548406723, "loss": 4.315, "step": 5491 }, { "epoch": 0.5694547952484948, "grad_norm": 1.1640625, "learning_rate": 0.00018278844332580597, "loss": 4.364, "step": 5492 }, { "epoch": 0.569558483302983, "grad_norm": 1.5859375, "learning_rate": 0.00018278235019072864, "loss": 4.2915, "step": 5493 }, { "epoch": 0.5696621713574711, "grad_norm": 1.3828125, "learning_rate": 0.00018277625607890708, "loss": 4.2776, "step": 5494 }, { "epoch": 0.5697658594119591, "grad_norm": 1.546875, "learning_rate": 0.00018277016099041318, "loss": 4.3762, "step": 5495 }, { "epoch": 0.5698695474664472, "grad_norm": 1.4453125, "learning_rate": 0.00018276406492531887, "loss": 4.3403, "step": 5496 }, { "epoch": 0.5699732355209353, "grad_norm": 1.2421875, "learning_rate": 0.0001827579678836961, "loss": 4.4015, "step": 5497 }, { "epoch": 0.5700769235754234, "grad_norm": 1.1171875, "learning_rate": 0.00018275186986561675, "loss": 4.3357, "step": 5498 }, { "epoch": 0.5701806116299114, "grad_norm": 1.6171875, "learning_rate": 0.00018274577087115285, "loss": 4.3691, "step": 5499 }, { "epoch": 0.5702842996843995, "grad_norm": 1.390625, "learning_rate": 0.0001827396709003763, "loss": 4.3375, "step": 5500 }, { "epoch": 0.5703879877388875, "grad_norm": 1.7109375, "learning_rate": 0.00018273356995335909, "loss": 4.3429, "step": 5501 }, { "epoch": 0.5704916757933757, "grad_norm": 1.546875, "learning_rate": 0.00018272746803017323, "loss": 4.3488, "step": 5502 }, { "epoch": 0.5705953638478637, "grad_norm": 1.3984375, "learning_rate": 0.0001827213651308907, "loss": 4.3569, "step": 5503 }, { "epoch": 0.5706990519023518, "grad_norm": 1.28125, "learning_rate": 0.00018271526125558345, "loss": 4.3455, "step": 5504 }, { "epoch": 0.5708027399568398, "grad_norm": 1.4765625, "learning_rate": 0.0001827091564043236, "loss": 4.3558, "step": 5505 }, { "epoch": 0.570906428011328, "grad_norm": 1.3203125, "learning_rate": 0.00018270305057718308, "loss": 4.3108, "step": 5506 }, { "epoch": 0.571010116065816, "grad_norm": 1.71875, "learning_rate": 0.00018269694377423404, "loss": 4.3547, "step": 5507 }, { "epoch": 0.5711138041203041, "grad_norm": 1.578125, "learning_rate": 0.00018269083599554845, "loss": 4.3332, "step": 5508 }, { "epoch": 0.5712174921747921, "grad_norm": 1.5078125, "learning_rate": 0.0001826847272411984, "loss": 4.3429, "step": 5509 }, { "epoch": 0.5713211802292802, "grad_norm": 1.421875, "learning_rate": 0.000182678617511256, "loss": 4.3294, "step": 5510 }, { "epoch": 0.5714248682837683, "grad_norm": 1.3125, "learning_rate": 0.00018267250680579328, "loss": 4.3342, "step": 5511 }, { "epoch": 0.5715285563382564, "grad_norm": 1.28125, "learning_rate": 0.00018266639512488236, "loss": 4.3545, "step": 5512 }, { "epoch": 0.5716322443927444, "grad_norm": 1.6796875, "learning_rate": 0.00018266028246859538, "loss": 4.3752, "step": 5513 }, { "epoch": 0.5717359324472325, "grad_norm": 1.515625, "learning_rate": 0.00018265416883700444, "loss": 4.3253, "step": 5514 }, { "epoch": 0.5718396205017205, "grad_norm": 1.3828125, "learning_rate": 0.00018264805423018164, "loss": 4.3538, "step": 5515 }, { "epoch": 0.5719433085562087, "grad_norm": 1.3359375, "learning_rate": 0.00018264193864819922, "loss": 4.3428, "step": 5516 }, { "epoch": 0.5720469966106967, "grad_norm": 1.3671875, "learning_rate": 0.00018263582209112925, "loss": 4.3088, "step": 5517 }, { "epoch": 0.5721506846651848, "grad_norm": 1.21875, "learning_rate": 0.00018262970455904394, "loss": 4.3636, "step": 5518 }, { "epoch": 0.5722543727196728, "grad_norm": 1.46875, "learning_rate": 0.00018262358605201546, "loss": 4.3314, "step": 5519 }, { "epoch": 0.572358060774161, "grad_norm": 1.2890625, "learning_rate": 0.000182617466570116, "loss": 4.3425, "step": 5520 }, { "epoch": 0.572461748828649, "grad_norm": 1.546875, "learning_rate": 0.00018261134611341774, "loss": 4.3332, "step": 5521 }, { "epoch": 0.5725654368831371, "grad_norm": 1.4375, "learning_rate": 0.00018260522468199297, "loss": 4.3129, "step": 5522 }, { "epoch": 0.5726691249376251, "grad_norm": 1.3515625, "learning_rate": 0.00018259910227591384, "loss": 4.3197, "step": 5523 }, { "epoch": 0.5727728129921132, "grad_norm": 1.296875, "learning_rate": 0.0001825929788952526, "loss": 4.3431, "step": 5524 }, { "epoch": 0.5728765010466013, "grad_norm": 1.6015625, "learning_rate": 0.00018258685454008154, "loss": 4.3203, "step": 5525 }, { "epoch": 0.5729801891010894, "grad_norm": 1.4296875, "learning_rate": 0.00018258072921047288, "loss": 4.3404, "step": 5526 }, { "epoch": 0.5730838771555774, "grad_norm": 1.59375, "learning_rate": 0.00018257460290649889, "loss": 4.3576, "step": 5527 }, { "epoch": 0.5731875652100655, "grad_norm": 1.5625, "learning_rate": 0.0001825684756282319, "loss": 4.413, "step": 5528 }, { "epoch": 0.5732912532645535, "grad_norm": 1.2421875, "learning_rate": 0.00018256234737574415, "loss": 4.3611, "step": 5529 }, { "epoch": 0.5733949413190417, "grad_norm": 1.171875, "learning_rate": 0.000182556218149108, "loss": 4.377, "step": 5530 }, { "epoch": 0.5734986293735297, "grad_norm": 1.546875, "learning_rate": 0.0001825500879483957, "loss": 4.336, "step": 5531 }, { "epoch": 0.5736023174280178, "grad_norm": 1.421875, "learning_rate": 0.00018254395677367967, "loss": 4.3242, "step": 5532 }, { "epoch": 0.5737060054825058, "grad_norm": 1.5, "learning_rate": 0.00018253782462503216, "loss": 4.3437, "step": 5533 }, { "epoch": 0.573809693536994, "grad_norm": 1.421875, "learning_rate": 0.00018253169150252561, "loss": 4.3471, "step": 5534 }, { "epoch": 0.573913381591482, "grad_norm": 1.296875, "learning_rate": 0.00018252555740623234, "loss": 4.358, "step": 5535 }, { "epoch": 0.5740170696459701, "grad_norm": 1.25, "learning_rate": 0.0001825194223362247, "loss": 4.3566, "step": 5536 }, { "epoch": 0.5741207577004581, "grad_norm": 1.4609375, "learning_rate": 0.0001825132862925751, "loss": 4.3145, "step": 5537 }, { "epoch": 0.5742244457549462, "grad_norm": 1.359375, "learning_rate": 0.00018250714927535596, "loss": 4.381, "step": 5538 }, { "epoch": 0.5743281338094344, "grad_norm": 1.4453125, "learning_rate": 0.00018250101128463965, "loss": 4.3207, "step": 5539 }, { "epoch": 0.5744318218639224, "grad_norm": 1.296875, "learning_rate": 0.0001824948723204986, "loss": 4.3251, "step": 5540 }, { "epoch": 0.5745355099184105, "grad_norm": 1.3671875, "learning_rate": 0.0001824887323830053, "loss": 4.3404, "step": 5541 }, { "epoch": 0.5746391979728985, "grad_norm": 1.2109375, "learning_rate": 0.00018248259147223215, "loss": 4.3781, "step": 5542 }, { "epoch": 0.5747428860273867, "grad_norm": 1.5234375, "learning_rate": 0.0001824764495882516, "loss": 4.3385, "step": 5543 }, { "epoch": 0.5748465740818747, "grad_norm": 1.40625, "learning_rate": 0.0001824703067311361, "loss": 4.339, "step": 5544 }, { "epoch": 0.5749502621363628, "grad_norm": 1.46875, "learning_rate": 0.0001824641629009582, "loss": 4.3373, "step": 5545 }, { "epoch": 0.5750539501908508, "grad_norm": 1.3671875, "learning_rate": 0.0001824580180977903, "loss": 4.2672, "step": 5546 }, { "epoch": 0.5751576382453389, "grad_norm": 1.515625, "learning_rate": 0.00018245187232170497, "loss": 4.3089, "step": 5547 }, { "epoch": 0.575261326299827, "grad_norm": 1.390625, "learning_rate": 0.00018244572557277473, "loss": 4.3049, "step": 5548 }, { "epoch": 0.5753650143543151, "grad_norm": 1.515625, "learning_rate": 0.00018243957785107204, "loss": 4.3171, "step": 5549 }, { "epoch": 0.5754687024088031, "grad_norm": 1.4375, "learning_rate": 0.00018243342915666948, "loss": 4.3574, "step": 5550 }, { "epoch": 0.5755723904632912, "grad_norm": 1.4296875, "learning_rate": 0.0001824272794896396, "loss": 4.3325, "step": 5551 }, { "epoch": 0.5756760785177792, "grad_norm": 1.328125, "learning_rate": 0.00018242112885005494, "loss": 4.3621, "step": 5552 }, { "epoch": 0.5757797665722674, "grad_norm": 1.3828125, "learning_rate": 0.0001824149772379881, "loss": 4.3562, "step": 5553 }, { "epoch": 0.5758834546267554, "grad_norm": 1.3203125, "learning_rate": 0.00018240882465351163, "loss": 4.3579, "step": 5554 }, { "epoch": 0.5759871426812435, "grad_norm": 1.3359375, "learning_rate": 0.00018240267109669814, "loss": 4.3047, "step": 5555 }, { "epoch": 0.5760908307357315, "grad_norm": 1.234375, "learning_rate": 0.00018239651656762026, "loss": 4.3361, "step": 5556 }, { "epoch": 0.5761945187902197, "grad_norm": 1.4375, "learning_rate": 0.00018239036106635056, "loss": 4.3471, "step": 5557 }, { "epoch": 0.5762982068447077, "grad_norm": 1.34375, "learning_rate": 0.00018238420459296167, "loss": 4.3176, "step": 5558 }, { "epoch": 0.5764018948991958, "grad_norm": 1.578125, "learning_rate": 0.00018237804714752627, "loss": 4.3554, "step": 5559 }, { "epoch": 0.5765055829536838, "grad_norm": 1.40625, "learning_rate": 0.000182371888730117, "loss": 4.3582, "step": 5560 }, { "epoch": 0.5766092710081719, "grad_norm": 1.46875, "learning_rate": 0.00018236572934080648, "loss": 4.3058, "step": 5561 }, { "epoch": 0.57671295906266, "grad_norm": 1.3515625, "learning_rate": 0.00018235956897966747, "loss": 4.3419, "step": 5562 }, { "epoch": 0.5768166471171481, "grad_norm": 1.5546875, "learning_rate": 0.00018235340764677255, "loss": 4.3409, "step": 5563 }, { "epoch": 0.5769203351716361, "grad_norm": 1.484375, "learning_rate": 0.00018234724534219453, "loss": 4.3695, "step": 5564 }, { "epoch": 0.5770240232261242, "grad_norm": 1.375, "learning_rate": 0.000182341082066006, "loss": 4.3446, "step": 5565 }, { "epoch": 0.5771277112806122, "grad_norm": 1.3125, "learning_rate": 0.00018233491781827977, "loss": 4.3477, "step": 5566 }, { "epoch": 0.5772313993351004, "grad_norm": 1.3125, "learning_rate": 0.00018232875259908854, "loss": 4.3555, "step": 5567 }, { "epoch": 0.5773350873895884, "grad_norm": 1.1953125, "learning_rate": 0.00018232258640850507, "loss": 4.3217, "step": 5568 }, { "epoch": 0.5774387754440765, "grad_norm": 1.40625, "learning_rate": 0.00018231641924660208, "loss": 4.2945, "step": 5569 }, { "epoch": 0.5775424634985645, "grad_norm": 1.296875, "learning_rate": 0.00018231025111345233, "loss": 4.3787, "step": 5570 }, { "epoch": 0.5776461515530527, "grad_norm": 1.328125, "learning_rate": 0.00018230408200912868, "loss": 4.3239, "step": 5571 }, { "epoch": 0.5777498396075407, "grad_norm": 1.25, "learning_rate": 0.00018229791193370384, "loss": 4.3548, "step": 5572 }, { "epoch": 0.5778535276620288, "grad_norm": 1.375, "learning_rate": 0.00018229174088725062, "loss": 4.3668, "step": 5573 }, { "epoch": 0.5779572157165168, "grad_norm": 1.203125, "learning_rate": 0.00018228556886984182, "loss": 4.3516, "step": 5574 }, { "epoch": 0.5780609037710049, "grad_norm": 1.578125, "learning_rate": 0.00018227939588155031, "loss": 4.344, "step": 5575 }, { "epoch": 0.578164591825493, "grad_norm": 1.4375, "learning_rate": 0.00018227322192244892, "loss": 4.3754, "step": 5576 }, { "epoch": 0.5782682798799811, "grad_norm": 1.4453125, "learning_rate": 0.00018226704699261047, "loss": 4.3347, "step": 5577 }, { "epoch": 0.5783719679344691, "grad_norm": 1.34375, "learning_rate": 0.0001822608710921078, "loss": 4.3517, "step": 5578 }, { "epoch": 0.5784756559889572, "grad_norm": 1.4609375, "learning_rate": 0.00018225469422101384, "loss": 4.3267, "step": 5579 }, { "epoch": 0.5785793440434452, "grad_norm": 1.28125, "learning_rate": 0.0001822485163794014, "loss": 4.3415, "step": 5580 }, { "epoch": 0.5786830320979334, "grad_norm": 1.609375, "learning_rate": 0.00018224233756734343, "loss": 4.3439, "step": 5581 }, { "epoch": 0.5787867201524214, "grad_norm": 1.5, "learning_rate": 0.0001822361577849128, "loss": 4.3165, "step": 5582 }, { "epoch": 0.5788904082069095, "grad_norm": 1.203125, "learning_rate": 0.00018222997703218246, "loss": 4.3216, "step": 5583 }, { "epoch": 0.5789940962613976, "grad_norm": 1.125, "learning_rate": 0.0001822237953092253, "loss": 4.3286, "step": 5584 }, { "epoch": 0.5790977843158857, "grad_norm": 1.4609375, "learning_rate": 0.00018221761261611423, "loss": 4.3354, "step": 5585 }, { "epoch": 0.5792014723703738, "grad_norm": 1.3125, "learning_rate": 0.0001822114289529223, "loss": 4.3254, "step": 5586 }, { "epoch": 0.5793051604248618, "grad_norm": 1.453125, "learning_rate": 0.00018220524431972237, "loss": 4.3203, "step": 5587 }, { "epoch": 0.5794088484793499, "grad_norm": 1.359375, "learning_rate": 0.00018219905871658747, "loss": 4.3531, "step": 5588 }, { "epoch": 0.5795125365338379, "grad_norm": 1.203125, "learning_rate": 0.00018219287214359055, "loss": 4.3582, "step": 5589 }, { "epoch": 0.5796162245883261, "grad_norm": 1.1015625, "learning_rate": 0.00018218668460080463, "loss": 4.3153, "step": 5590 }, { "epoch": 0.5797199126428141, "grad_norm": 1.40625, "learning_rate": 0.00018218049608830273, "loss": 4.3506, "step": 5591 }, { "epoch": 0.5798236006973022, "grad_norm": 1.2109375, "learning_rate": 0.0001821743066061578, "loss": 4.3519, "step": 5592 }, { "epoch": 0.5799272887517902, "grad_norm": 1.625, "learning_rate": 0.00018216811615444294, "loss": 4.3325, "step": 5593 }, { "epoch": 0.5800309768062784, "grad_norm": 1.4921875, "learning_rate": 0.00018216192473323114, "loss": 4.373, "step": 5594 }, { "epoch": 0.5801346648607664, "grad_norm": 1.3984375, "learning_rate": 0.0001821557323425955, "loss": 4.3325, "step": 5595 }, { "epoch": 0.5802383529152545, "grad_norm": 1.359375, "learning_rate": 0.00018214953898260908, "loss": 4.2523, "step": 5596 }, { "epoch": 0.5803420409697425, "grad_norm": 1.3515625, "learning_rate": 0.00018214334465334488, "loss": 4.3368, "step": 5597 }, { "epoch": 0.5804457290242306, "grad_norm": 1.25, "learning_rate": 0.0001821371493548761, "loss": 4.3503, "step": 5598 }, { "epoch": 0.5805494170787187, "grad_norm": 1.5703125, "learning_rate": 0.00018213095308727576, "loss": 4.3115, "step": 5599 }, { "epoch": 0.5806531051332068, "grad_norm": 1.5078125, "learning_rate": 0.000182124755850617, "loss": 4.3373, "step": 5600 }, { "epoch": 0.5807567931876948, "grad_norm": 1.4609375, "learning_rate": 0.0001821185576449729, "loss": 4.3589, "step": 5601 }, { "epoch": 0.5808604812421829, "grad_norm": 1.3515625, "learning_rate": 0.00018211235847041663, "loss": 4.3404, "step": 5602 }, { "epoch": 0.5809641692966709, "grad_norm": 1.4453125, "learning_rate": 0.00018210615832702133, "loss": 4.3841, "step": 5603 }, { "epoch": 0.5810678573511591, "grad_norm": 1.28125, "learning_rate": 0.00018209995721486016, "loss": 4.3259, "step": 5604 }, { "epoch": 0.5811715454056471, "grad_norm": 1.609375, "learning_rate": 0.00018209375513400628, "loss": 4.3524, "step": 5605 }, { "epoch": 0.5812752334601352, "grad_norm": 1.5, "learning_rate": 0.00018208755208453287, "loss": 4.3828, "step": 5606 }, { "epoch": 0.5813789215146232, "grad_norm": 1.390625, "learning_rate": 0.00018208134806651312, "loss": 4.3642, "step": 5607 }, { "epoch": 0.5814826095691114, "grad_norm": 1.3828125, "learning_rate": 0.00018207514308002018, "loss": 4.3149, "step": 5608 }, { "epoch": 0.5815862976235994, "grad_norm": 1.375, "learning_rate": 0.00018206893712512735, "loss": 4.3453, "step": 5609 }, { "epoch": 0.5816899856780875, "grad_norm": 1.203125, "learning_rate": 0.00018206273020190782, "loss": 4.343, "step": 5610 }, { "epoch": 0.5817936737325755, "grad_norm": 1.546875, "learning_rate": 0.0001820565223104348, "loss": 4.3722, "step": 5611 }, { "epoch": 0.5818973617870636, "grad_norm": 1.3984375, "learning_rate": 0.00018205031345078156, "loss": 4.3135, "step": 5612 }, { "epoch": 0.5820010498415517, "grad_norm": 1.4609375, "learning_rate": 0.00018204410362302134, "loss": 4.2871, "step": 5613 }, { "epoch": 0.5821047378960398, "grad_norm": 1.46875, "learning_rate": 0.00018203789282722743, "loss": 4.3021, "step": 5614 }, { "epoch": 0.5822084259505278, "grad_norm": 1.0703125, "learning_rate": 0.0001820316810634731, "loss": 4.338, "step": 5615 }, { "epoch": 0.5823121140050159, "grad_norm": 1.0234375, "learning_rate": 0.00018202546833183164, "loss": 4.2786, "step": 5616 }, { "epoch": 0.5824158020595039, "grad_norm": 1.375, "learning_rate": 0.00018201925463237636, "loss": 4.2837, "step": 5617 }, { "epoch": 0.5825194901139921, "grad_norm": 1.1875, "learning_rate": 0.0001820130399651806, "loss": 4.2989, "step": 5618 }, { "epoch": 0.5826231781684801, "grad_norm": 1.6484375, "learning_rate": 0.00018200682433031765, "loss": 4.2944, "step": 5619 }, { "epoch": 0.5827268662229682, "grad_norm": 1.5078125, "learning_rate": 0.00018200060772786083, "loss": 4.3102, "step": 5620 }, { "epoch": 0.5828305542774562, "grad_norm": 1.1875, "learning_rate": 0.00018199439015788356, "loss": 4.3416, "step": 5621 }, { "epoch": 0.5829342423319444, "grad_norm": 1.15625, "learning_rate": 0.00018198817162045912, "loss": 4.3264, "step": 5622 }, { "epoch": 0.5830379303864324, "grad_norm": 1.328125, "learning_rate": 0.00018198195211566095, "loss": 4.3834, "step": 5623 }, { "epoch": 0.5831416184409205, "grad_norm": 1.125, "learning_rate": 0.00018197573164356238, "loss": 4.3006, "step": 5624 }, { "epoch": 0.5832453064954085, "grad_norm": 1.6796875, "learning_rate": 0.00018196951020423683, "loss": 4.3308, "step": 5625 }, { "epoch": 0.5833489945498966, "grad_norm": 1.4609375, "learning_rate": 0.00018196328779775768, "loss": 4.3448, "step": 5626 }, { "epoch": 0.5834526826043847, "grad_norm": 1.515625, "learning_rate": 0.00018195706442419843, "loss": 4.3255, "step": 5627 }, { "epoch": 0.5835563706588728, "grad_norm": 1.375, "learning_rate": 0.0001819508400836324, "loss": 4.3572, "step": 5628 }, { "epoch": 0.5836600587133609, "grad_norm": 1.5703125, "learning_rate": 0.00018194461477613315, "loss": 4.3235, "step": 5629 }, { "epoch": 0.5837637467678489, "grad_norm": 1.4296875, "learning_rate": 0.000181938388501774, "loss": 4.3347, "step": 5630 }, { "epoch": 0.583867434822337, "grad_norm": 1.46875, "learning_rate": 0.00018193216126062851, "loss": 4.322, "step": 5631 }, { "epoch": 0.5839711228768251, "grad_norm": 1.4140625, "learning_rate": 0.0001819259330527701, "loss": 4.363, "step": 5632 }, { "epoch": 0.5840748109313132, "grad_norm": 1.3046875, "learning_rate": 0.0001819197038782723, "loss": 4.3823, "step": 5633 }, { "epoch": 0.5841784989858012, "grad_norm": 1.1875, "learning_rate": 0.00018191347373720858, "loss": 4.3072, "step": 5634 }, { "epoch": 0.5842821870402893, "grad_norm": 1.4765625, "learning_rate": 0.00018190724262965246, "loss": 4.3529, "step": 5635 }, { "epoch": 0.5843858750947774, "grad_norm": 1.328125, "learning_rate": 0.00018190101055567744, "loss": 4.3431, "step": 5636 }, { "epoch": 0.5844895631492655, "grad_norm": 1.4453125, "learning_rate": 0.0001818947775153571, "loss": 4.3289, "step": 5637 }, { "epoch": 0.5845932512037535, "grad_norm": 1.296875, "learning_rate": 0.00018188854350876494, "loss": 4.3134, "step": 5638 }, { "epoch": 0.5846969392582416, "grad_norm": 1.2421875, "learning_rate": 0.0001818823085359745, "loss": 4.3689, "step": 5639 }, { "epoch": 0.5848006273127296, "grad_norm": 1.1484375, "learning_rate": 0.00018187607259705942, "loss": 4.3616, "step": 5640 }, { "epoch": 0.5849043153672178, "grad_norm": 1.6171875, "learning_rate": 0.0001818698356920932, "loss": 4.3581, "step": 5641 }, { "epoch": 0.5850080034217058, "grad_norm": 1.421875, "learning_rate": 0.00018186359782114945, "loss": 4.3455, "step": 5642 }, { "epoch": 0.5851116914761939, "grad_norm": 1.40625, "learning_rate": 0.00018185735898430182, "loss": 4.3584, "step": 5643 }, { "epoch": 0.5852153795306819, "grad_norm": 1.3515625, "learning_rate": 0.00018185111918162384, "loss": 4.3607, "step": 5644 }, { "epoch": 0.58531906758517, "grad_norm": 1.265625, "learning_rate": 0.00018184487841318918, "loss": 4.3458, "step": 5645 }, { "epoch": 0.5854227556396581, "grad_norm": 1.15625, "learning_rate": 0.00018183863667907147, "loss": 4.3571, "step": 5646 }, { "epoch": 0.5855264436941462, "grad_norm": 1.4140625, "learning_rate": 0.00018183239397934436, "loss": 4.3665, "step": 5647 }, { "epoch": 0.5856301317486342, "grad_norm": 1.34375, "learning_rate": 0.0001818261503140815, "loss": 4.3623, "step": 5648 }, { "epoch": 0.5857338198031223, "grad_norm": 1.3828125, "learning_rate": 0.00018181990568335657, "loss": 4.3244, "step": 5649 }, { "epoch": 0.5858375078576104, "grad_norm": 1.2578125, "learning_rate": 0.00018181366008724324, "loss": 4.2745, "step": 5650 }, { "epoch": 0.5859411959120985, "grad_norm": 1.3984375, "learning_rate": 0.00018180741352581518, "loss": 4.3148, "step": 5651 }, { "epoch": 0.5860448839665865, "grad_norm": 1.296875, "learning_rate": 0.00018180116599914614, "loss": 4.3239, "step": 5652 }, { "epoch": 0.5861485720210746, "grad_norm": 1.53125, "learning_rate": 0.00018179491750730978, "loss": 4.3469, "step": 5653 }, { "epoch": 0.5862522600755626, "grad_norm": 1.3671875, "learning_rate": 0.00018178866805037988, "loss": 4.3679, "step": 5654 }, { "epoch": 0.5863559481300508, "grad_norm": 1.4609375, "learning_rate": 0.00018178241762843014, "loss": 4.3314, "step": 5655 }, { "epoch": 0.5864596361845388, "grad_norm": 1.328125, "learning_rate": 0.00018177616624153432, "loss": 4.3647, "step": 5656 }, { "epoch": 0.5865633242390269, "grad_norm": 1.421875, "learning_rate": 0.00018176991388976622, "loss": 4.3769, "step": 5657 }, { "epoch": 0.5866670122935149, "grad_norm": 1.2890625, "learning_rate": 0.0001817636605731995, "loss": 4.3395, "step": 5658 }, { "epoch": 0.586770700348003, "grad_norm": 1.453125, "learning_rate": 0.00018175740629190805, "loss": 4.3378, "step": 5659 }, { "epoch": 0.5868743884024911, "grad_norm": 1.3359375, "learning_rate": 0.0001817511510459656, "loss": 4.3157, "step": 5660 }, { "epoch": 0.5869780764569792, "grad_norm": 1.3828125, "learning_rate": 0.00018174489483544604, "loss": 4.3298, "step": 5661 }, { "epoch": 0.5870817645114672, "grad_norm": 1.2734375, "learning_rate": 0.00018173863766042308, "loss": 4.3223, "step": 5662 }, { "epoch": 0.5871854525659553, "grad_norm": 1.359375, "learning_rate": 0.00018173237952097063, "loss": 4.3173, "step": 5663 }, { "epoch": 0.5872891406204434, "grad_norm": 1.28125, "learning_rate": 0.00018172612041716246, "loss": 4.3169, "step": 5664 }, { "epoch": 0.5873928286749315, "grad_norm": 1.46875, "learning_rate": 0.0001817198603490725, "loss": 4.3136, "step": 5665 }, { "epoch": 0.5874965167294195, "grad_norm": 1.3125, "learning_rate": 0.00018171359931677453, "loss": 4.3398, "step": 5666 }, { "epoch": 0.5876002047839076, "grad_norm": 1.7109375, "learning_rate": 0.00018170733732034248, "loss": 4.3699, "step": 5667 }, { "epoch": 0.5877038928383956, "grad_norm": 1.4765625, "learning_rate": 0.00018170107435985021, "loss": 4.3358, "step": 5668 }, { "epoch": 0.5878075808928838, "grad_norm": 1.5, "learning_rate": 0.00018169481043537166, "loss": 4.3863, "step": 5669 }, { "epoch": 0.5879112689473718, "grad_norm": 1.375, "learning_rate": 0.00018168854554698064, "loss": 4.3141, "step": 5670 }, { "epoch": 0.5880149570018599, "grad_norm": 1.40625, "learning_rate": 0.00018168227969475118, "loss": 4.3231, "step": 5671 }, { "epoch": 0.5881186450563479, "grad_norm": 1.359375, "learning_rate": 0.00018167601287875712, "loss": 4.3552, "step": 5672 }, { "epoch": 0.588222333110836, "grad_norm": 1.4296875, "learning_rate": 0.00018166974509907247, "loss": 4.3648, "step": 5673 }, { "epoch": 0.5883260211653242, "grad_norm": 1.2734375, "learning_rate": 0.00018166347635577117, "loss": 4.3125, "step": 5674 }, { "epoch": 0.5884297092198122, "grad_norm": 1.453125, "learning_rate": 0.00018165720664892714, "loss": 4.2986, "step": 5675 }, { "epoch": 0.5885333972743003, "grad_norm": 1.3984375, "learning_rate": 0.0001816509359786144, "loss": 4.3168, "step": 5676 }, { "epoch": 0.5886370853287883, "grad_norm": 1.46875, "learning_rate": 0.00018164466434490692, "loss": 4.3265, "step": 5677 }, { "epoch": 0.5887407733832765, "grad_norm": 1.4140625, "learning_rate": 0.00018163839174787874, "loss": 4.3073, "step": 5678 }, { "epoch": 0.5888444614377645, "grad_norm": 1.4375, "learning_rate": 0.00018163211818760379, "loss": 4.3157, "step": 5679 }, { "epoch": 0.5889481494922526, "grad_norm": 1.375, "learning_rate": 0.00018162584366415615, "loss": 4.3241, "step": 5680 }, { "epoch": 0.5890518375467406, "grad_norm": 1.5234375, "learning_rate": 0.00018161956817760983, "loss": 4.3372, "step": 5681 }, { "epoch": 0.5891555256012287, "grad_norm": 1.4375, "learning_rate": 0.0001816132917280389, "loss": 4.3576, "step": 5682 }, { "epoch": 0.5892592136557168, "grad_norm": 1.609375, "learning_rate": 0.00018160701431551736, "loss": 4.3108, "step": 5683 }, { "epoch": 0.5893629017102049, "grad_norm": 1.515625, "learning_rate": 0.00018160073594011936, "loss": 4.3198, "step": 5684 }, { "epoch": 0.5894665897646929, "grad_norm": 1.390625, "learning_rate": 0.00018159445660191888, "loss": 4.3091, "step": 5685 }, { "epoch": 0.589570277819181, "grad_norm": 1.28125, "learning_rate": 0.0001815881763009901, "loss": 4.3254, "step": 5686 }, { "epoch": 0.589673965873669, "grad_norm": 1.46875, "learning_rate": 0.00018158189503740709, "loss": 4.3508, "step": 5687 }, { "epoch": 0.5897776539281572, "grad_norm": 1.3984375, "learning_rate": 0.00018157561281124392, "loss": 4.3215, "step": 5688 }, { "epoch": 0.5898813419826452, "grad_norm": 1.4140625, "learning_rate": 0.00018156932962257475, "loss": 4.2878, "step": 5689 }, { "epoch": 0.5899850300371333, "grad_norm": 1.3984375, "learning_rate": 0.00018156304547147374, "loss": 4.3404, "step": 5690 }, { "epoch": 0.5900887180916213, "grad_norm": 1.21875, "learning_rate": 0.00018155676035801498, "loss": 4.3673, "step": 5691 }, { "epoch": 0.5901924061461095, "grad_norm": 1.171875, "learning_rate": 0.00018155047428227268, "loss": 4.3134, "step": 5692 }, { "epoch": 0.5902960942005975, "grad_norm": 1.3828125, "learning_rate": 0.000181544187244321, "loss": 4.3343, "step": 5693 }, { "epoch": 0.5903997822550856, "grad_norm": 1.265625, "learning_rate": 0.00018153789924423407, "loss": 4.329, "step": 5694 }, { "epoch": 0.5905034703095736, "grad_norm": 1.421875, "learning_rate": 0.00018153161028208614, "loss": 4.3077, "step": 5695 }, { "epoch": 0.5906071583640617, "grad_norm": 1.3203125, "learning_rate": 0.00018152532035795136, "loss": 4.308, "step": 5696 }, { "epoch": 0.5907108464185498, "grad_norm": 1.3984375, "learning_rate": 0.00018151902947190402, "loss": 4.3066, "step": 5697 }, { "epoch": 0.5908145344730379, "grad_norm": 1.3359375, "learning_rate": 0.00018151273762401825, "loss": 4.3872, "step": 5698 }, { "epoch": 0.5909182225275259, "grad_norm": 1.3359375, "learning_rate": 0.0001815064448143684, "loss": 4.3014, "step": 5699 }, { "epoch": 0.591021910582014, "grad_norm": 1.21875, "learning_rate": 0.0001815001510430286, "loss": 4.3571, "step": 5700 }, { "epoch": 0.591125598636502, "grad_norm": 1.375, "learning_rate": 0.00018149385631007322, "loss": 4.3611, "step": 5701 }, { "epoch": 0.5912292866909902, "grad_norm": 1.265625, "learning_rate": 0.00018148756061557646, "loss": 4.3576, "step": 5702 }, { "epoch": 0.5913329747454782, "grad_norm": 1.2421875, "learning_rate": 0.0001814812639596126, "loss": 4.3244, "step": 5703 }, { "epoch": 0.5914366627999663, "grad_norm": 1.1953125, "learning_rate": 0.00018147496634225596, "loss": 4.3498, "step": 5704 }, { "epoch": 0.5915403508544543, "grad_norm": 1.2578125, "learning_rate": 0.00018146866776358084, "loss": 4.3045, "step": 5705 }, { "epoch": 0.5916440389089425, "grad_norm": 1.140625, "learning_rate": 0.0001814623682236616, "loss": 4.398, "step": 5706 }, { "epoch": 0.5917477269634305, "grad_norm": 1.2890625, "learning_rate": 0.00018145606772257246, "loss": 4.36, "step": 5707 }, { "epoch": 0.5918514150179186, "grad_norm": 1.1875, "learning_rate": 0.00018144976626038785, "loss": 4.3212, "step": 5708 }, { "epoch": 0.5919551030724066, "grad_norm": 1.3125, "learning_rate": 0.00018144346383718211, "loss": 4.3165, "step": 5709 }, { "epoch": 0.5920587911268947, "grad_norm": 1.140625, "learning_rate": 0.00018143716045302956, "loss": 4.3158, "step": 5710 }, { "epoch": 0.5921624791813828, "grad_norm": 1.34375, "learning_rate": 0.0001814308561080046, "loss": 4.3757, "step": 5711 }, { "epoch": 0.5922661672358709, "grad_norm": 1.2421875, "learning_rate": 0.00018142455080218163, "loss": 4.3474, "step": 5712 }, { "epoch": 0.5923698552903589, "grad_norm": 1.390625, "learning_rate": 0.00018141824453563504, "loss": 4.3471, "step": 5713 }, { "epoch": 0.592473543344847, "grad_norm": 1.3203125, "learning_rate": 0.00018141193730843923, "loss": 4.3295, "step": 5714 }, { "epoch": 0.592577231399335, "grad_norm": 1.3984375, "learning_rate": 0.00018140562912066858, "loss": 4.2494, "step": 5715 }, { "epoch": 0.5926809194538232, "grad_norm": 1.296875, "learning_rate": 0.00018139931997239757, "loss": 4.3036, "step": 5716 }, { "epoch": 0.5927846075083113, "grad_norm": 1.3359375, "learning_rate": 0.00018139300986370064, "loss": 4.3459, "step": 5717 }, { "epoch": 0.5928882955627993, "grad_norm": 1.2578125, "learning_rate": 0.00018138669879465223, "loss": 4.3585, "step": 5718 }, { "epoch": 0.5929919836172874, "grad_norm": 1.4296875, "learning_rate": 0.0001813803867653268, "loss": 4.3598, "step": 5719 }, { "epoch": 0.5930956716717755, "grad_norm": 1.2890625, "learning_rate": 0.00018137407377579883, "loss": 4.3545, "step": 5720 }, { "epoch": 0.5931993597262636, "grad_norm": 1.34375, "learning_rate": 0.00018136775982614277, "loss": 4.334, "step": 5721 }, { "epoch": 0.5933030477807516, "grad_norm": 1.2109375, "learning_rate": 0.00018136144491643318, "loss": 4.3667, "step": 5722 }, { "epoch": 0.5934067358352397, "grad_norm": 1.453125, "learning_rate": 0.00018135512904674458, "loss": 4.3182, "step": 5723 }, { "epoch": 0.5935104238897277, "grad_norm": 1.34375, "learning_rate": 0.0001813488122171514, "loss": 4.3276, "step": 5724 }, { "epoch": 0.5936141119442159, "grad_norm": 1.3046875, "learning_rate": 0.00018134249442772825, "loss": 4.2957, "step": 5725 }, { "epoch": 0.5937177999987039, "grad_norm": 1.3125, "learning_rate": 0.00018133617567854966, "loss": 4.3233, "step": 5726 }, { "epoch": 0.593821488053192, "grad_norm": 1.234375, "learning_rate": 0.00018132985596969013, "loss": 4.3506, "step": 5727 }, { "epoch": 0.59392517610768, "grad_norm": 1.1953125, "learning_rate": 0.00018132353530122433, "loss": 4.3018, "step": 5728 }, { "epoch": 0.5940288641621682, "grad_norm": 1.21875, "learning_rate": 0.00018131721367322672, "loss": 4.3681, "step": 5729 }, { "epoch": 0.5941325522166562, "grad_norm": 1.0859375, "learning_rate": 0.00018131089108577197, "loss": 4.3353, "step": 5730 }, { "epoch": 0.5942362402711443, "grad_norm": 1.4140625, "learning_rate": 0.00018130456753893466, "loss": 4.336, "step": 5731 }, { "epoch": 0.5943399283256323, "grad_norm": 1.234375, "learning_rate": 0.00018129824303278938, "loss": 4.3253, "step": 5732 }, { "epoch": 0.5944436163801204, "grad_norm": 1.421875, "learning_rate": 0.00018129191756741076, "loss": 4.3457, "step": 5733 }, { "epoch": 0.5945473044346085, "grad_norm": 1.359375, "learning_rate": 0.00018128559114287347, "loss": 4.3569, "step": 5734 }, { "epoch": 0.5946509924890966, "grad_norm": 1.1953125, "learning_rate": 0.0001812792637592521, "loss": 4.3368, "step": 5735 }, { "epoch": 0.5947546805435846, "grad_norm": 1.0546875, "learning_rate": 0.00018127293541662137, "loss": 4.3151, "step": 5736 }, { "epoch": 0.5948583685980727, "grad_norm": 1.40625, "learning_rate": 0.00018126660611505587, "loss": 4.3404, "step": 5737 }, { "epoch": 0.5949620566525607, "grad_norm": 1.234375, "learning_rate": 0.00018126027585463038, "loss": 4.3155, "step": 5738 }, { "epoch": 0.5950657447070489, "grad_norm": 1.5234375, "learning_rate": 0.00018125394463541948, "loss": 4.3546, "step": 5739 }, { "epoch": 0.5951694327615369, "grad_norm": 1.4609375, "learning_rate": 0.0001812476124574979, "loss": 4.2847, "step": 5740 }, { "epoch": 0.595273120816025, "grad_norm": 1.2109375, "learning_rate": 0.0001812412793209404, "loss": 4.338, "step": 5741 }, { "epoch": 0.595376808870513, "grad_norm": 1.2265625, "learning_rate": 0.0001812349452258217, "loss": 4.3296, "step": 5742 }, { "epoch": 0.5954804969250012, "grad_norm": 1.3515625, "learning_rate": 0.00018122861017221654, "loss": 4.3893, "step": 5743 }, { "epoch": 0.5955841849794892, "grad_norm": 1.2109375, "learning_rate": 0.00018122227416019957, "loss": 4.3603, "step": 5744 }, { "epoch": 0.5956878730339773, "grad_norm": 1.421875, "learning_rate": 0.00018121593718984567, "loss": 4.2856, "step": 5745 }, { "epoch": 0.5957915610884653, "grad_norm": 1.3359375, "learning_rate": 0.00018120959926122953, "loss": 4.3316, "step": 5746 }, { "epoch": 0.5958952491429534, "grad_norm": 1.25, "learning_rate": 0.000181203260374426, "loss": 4.3399, "step": 5747 }, { "epoch": 0.5959989371974415, "grad_norm": 1.1953125, "learning_rate": 0.00018119692052950977, "loss": 4.349, "step": 5748 }, { "epoch": 0.5961026252519296, "grad_norm": 1.3671875, "learning_rate": 0.0001811905797265558, "loss": 4.345, "step": 5749 }, { "epoch": 0.5962063133064176, "grad_norm": 1.2109375, "learning_rate": 0.00018118423796563874, "loss": 4.3052, "step": 5750 }, { "epoch": 0.5963100013609057, "grad_norm": 1.40625, "learning_rate": 0.00018117789524683348, "loss": 4.2979, "step": 5751 }, { "epoch": 0.5964136894153937, "grad_norm": 1.2890625, "learning_rate": 0.0001811715515702149, "loss": 4.3457, "step": 5752 }, { "epoch": 0.5965173774698819, "grad_norm": 1.34375, "learning_rate": 0.0001811652069358578, "loss": 4.3463, "step": 5753 }, { "epoch": 0.5966210655243699, "grad_norm": 1.2421875, "learning_rate": 0.00018115886134383705, "loss": 4.299, "step": 5754 }, { "epoch": 0.596724753578858, "grad_norm": 1.4375, "learning_rate": 0.00018115251479422755, "loss": 4.324, "step": 5755 }, { "epoch": 0.596828441633346, "grad_norm": 1.2890625, "learning_rate": 0.00018114616728710415, "loss": 4.3441, "step": 5756 }, { "epoch": 0.5969321296878342, "grad_norm": 1.484375, "learning_rate": 0.00018113981882254173, "loss": 4.3491, "step": 5757 }, { "epoch": 0.5970358177423222, "grad_norm": 1.3671875, "learning_rate": 0.0001811334694006152, "loss": 4.3078, "step": 5758 }, { "epoch": 0.5971395057968103, "grad_norm": 1.4296875, "learning_rate": 0.00018112711902139954, "loss": 4.3105, "step": 5759 }, { "epoch": 0.5972431938512983, "grad_norm": 1.34375, "learning_rate": 0.0001811207676849696, "loss": 4.3357, "step": 5760 }, { "epoch": 0.5973468819057864, "grad_norm": 1.5390625, "learning_rate": 0.00018111441539140038, "loss": 4.3783, "step": 5761 }, { "epoch": 0.5974505699602746, "grad_norm": 1.5, "learning_rate": 0.00018110806214076676, "loss": 4.3518, "step": 5762 }, { "epoch": 0.5975542580147626, "grad_norm": 1.21875, "learning_rate": 0.00018110170793314377, "loss": 4.2929, "step": 5763 }, { "epoch": 0.5976579460692507, "grad_norm": 1.1875, "learning_rate": 0.00018109535276860633, "loss": 4.3141, "step": 5764 }, { "epoch": 0.5977616341237387, "grad_norm": 1.4609375, "learning_rate": 0.0001810889966472295, "loss": 4.3106, "step": 5765 }, { "epoch": 0.5978653221782269, "grad_norm": 1.328125, "learning_rate": 0.0001810826395690882, "loss": 4.33, "step": 5766 }, { "epoch": 0.5979690102327149, "grad_norm": 1.40625, "learning_rate": 0.00018107628153425745, "loss": 4.3587, "step": 5767 }, { "epoch": 0.598072698287203, "grad_norm": 1.3359375, "learning_rate": 0.00018106992254281225, "loss": 4.3539, "step": 5768 }, { "epoch": 0.598176386341691, "grad_norm": 1.3671875, "learning_rate": 0.0001810635625948277, "loss": 4.3211, "step": 5769 }, { "epoch": 0.5982800743961791, "grad_norm": 1.3203125, "learning_rate": 0.0001810572016903788, "loss": 4.3466, "step": 5770 }, { "epoch": 0.5983837624506672, "grad_norm": 1.265625, "learning_rate": 0.00018105083982954058, "loss": 4.3471, "step": 5771 }, { "epoch": 0.5984874505051553, "grad_norm": 1.140625, "learning_rate": 0.00018104447701238814, "loss": 4.3135, "step": 5772 }, { "epoch": 0.5985911385596433, "grad_norm": 1.4375, "learning_rate": 0.00018103811323899653, "loss": 4.3606, "step": 5773 }, { "epoch": 0.5986948266141314, "grad_norm": 1.390625, "learning_rate": 0.00018103174850944085, "loss": 4.3562, "step": 5774 }, { "epoch": 0.5987985146686194, "grad_norm": 1.1875, "learning_rate": 0.00018102538282379618, "loss": 4.2699, "step": 5775 }, { "epoch": 0.5989022027231076, "grad_norm": 1.1875, "learning_rate": 0.00018101901618213767, "loss": 4.3522, "step": 5776 }, { "epoch": 0.5990058907775956, "grad_norm": 1.21875, "learning_rate": 0.00018101264858454036, "loss": 4.3327, "step": 5777 }, { "epoch": 0.5991095788320837, "grad_norm": 1.1015625, "learning_rate": 0.00018100628003107948, "loss": 4.3317, "step": 5778 }, { "epoch": 0.5992132668865717, "grad_norm": 1.390625, "learning_rate": 0.0001809999105218301, "loss": 4.3271, "step": 5779 }, { "epoch": 0.5993169549410599, "grad_norm": 1.2578125, "learning_rate": 0.0001809935400568674, "loss": 4.3283, "step": 5780 }, { "epoch": 0.5994206429955479, "grad_norm": 1.171875, "learning_rate": 0.0001809871686362665, "loss": 4.3304, "step": 5781 }, { "epoch": 0.599524331050036, "grad_norm": 1.1015625, "learning_rate": 0.0001809807962601027, "loss": 4.3292, "step": 5782 }, { "epoch": 0.599628019104524, "grad_norm": 1.25, "learning_rate": 0.00018097442292845106, "loss": 4.344, "step": 5783 }, { "epoch": 0.5997317071590121, "grad_norm": 1.0703125, "learning_rate": 0.0001809680486413868, "loss": 4.3426, "step": 5784 }, { "epoch": 0.5998353952135002, "grad_norm": 1.484375, "learning_rate": 0.0001809616733989852, "loss": 4.3199, "step": 5785 }, { "epoch": 0.5999390832679883, "grad_norm": 1.40625, "learning_rate": 0.0001809552972013214, "loss": 4.3017, "step": 5786 }, { "epoch": 0.6000427713224763, "grad_norm": 1.265625, "learning_rate": 0.00018094892004847068, "loss": 4.289, "step": 5787 }, { "epoch": 0.6001464593769644, "grad_norm": 1.171875, "learning_rate": 0.00018094254194050827, "loss": 4.3317, "step": 5788 }, { "epoch": 0.6002501474314524, "grad_norm": 1.3046875, "learning_rate": 0.00018093616287750942, "loss": 4.3347, "step": 5789 }, { "epoch": 0.6003538354859406, "grad_norm": 1.15625, "learning_rate": 0.00018092978285954943, "loss": 4.3655, "step": 5790 }, { "epoch": 0.6004575235404286, "grad_norm": 1.4140625, "learning_rate": 0.0001809234018867035, "loss": 4.3226, "step": 5791 }, { "epoch": 0.6005612115949167, "grad_norm": 1.359375, "learning_rate": 0.000180917019959047, "loss": 4.3014, "step": 5792 }, { "epoch": 0.6006648996494047, "grad_norm": 1.375, "learning_rate": 0.0001809106370766552, "loss": 4.3476, "step": 5793 }, { "epoch": 0.6007685877038929, "grad_norm": 1.375, "learning_rate": 0.0001809042532396034, "loss": 4.3053, "step": 5794 }, { "epoch": 0.6008722757583809, "grad_norm": 1.1953125, "learning_rate": 0.00018089786844796693, "loss": 4.3442, "step": 5795 }, { "epoch": 0.600975963812869, "grad_norm": 1.1015625, "learning_rate": 0.00018089148270182111, "loss": 4.3391, "step": 5796 }, { "epoch": 0.601079651867357, "grad_norm": 1.28125, "learning_rate": 0.00018088509600124134, "loss": 4.3439, "step": 5797 }, { "epoch": 0.6011833399218451, "grad_norm": 1.140625, "learning_rate": 0.0001808787083463029, "loss": 4.3031, "step": 5798 }, { "epoch": 0.6012870279763332, "grad_norm": 1.40625, "learning_rate": 0.0001808723197370812, "loss": 4.3131, "step": 5799 }, { "epoch": 0.6013907160308213, "grad_norm": 1.3125, "learning_rate": 0.00018086593017365164, "loss": 4.3422, "step": 5800 }, { "epoch": 0.6014944040853093, "grad_norm": 1.125, "learning_rate": 0.00018085953965608952, "loss": 4.3209, "step": 5801 }, { "epoch": 0.6015980921397974, "grad_norm": 1.125, "learning_rate": 0.00018085314818447036, "loss": 4.3095, "step": 5802 }, { "epoch": 0.6017017801942854, "grad_norm": 1.234375, "learning_rate": 0.00018084675575886952, "loss": 4.3618, "step": 5803 }, { "epoch": 0.6018054682487736, "grad_norm": 1.0703125, "learning_rate": 0.00018084036237936237, "loss": 4.3659, "step": 5804 }, { "epoch": 0.6019091563032616, "grad_norm": 1.65625, "learning_rate": 0.00018083396804602443, "loss": 4.3087, "step": 5805 }, { "epoch": 0.6020128443577497, "grad_norm": 1.5546875, "learning_rate": 0.00018082757275893113, "loss": 4.3818, "step": 5806 }, { "epoch": 0.6021165324122378, "grad_norm": 1.265625, "learning_rate": 0.0001808211765181579, "loss": 4.3672, "step": 5807 }, { "epoch": 0.6022202204667259, "grad_norm": 1.265625, "learning_rate": 0.0001808147793237802, "loss": 4.3323, "step": 5808 }, { "epoch": 0.602323908521214, "grad_norm": 1.21875, "learning_rate": 0.00018080838117587352, "loss": 4.3476, "step": 5809 }, { "epoch": 0.602427596575702, "grad_norm": 1.0859375, "learning_rate": 0.0001808019820745134, "loss": 4.3119, "step": 5810 }, { "epoch": 0.6025312846301901, "grad_norm": 1.4609375, "learning_rate": 0.00018079558201977526, "loss": 4.3303, "step": 5811 }, { "epoch": 0.6026349726846781, "grad_norm": 1.265625, "learning_rate": 0.0001807891810117347, "loss": 4.3118, "step": 5812 }, { "epoch": 0.6027386607391663, "grad_norm": 1.4140625, "learning_rate": 0.0001807827790504672, "loss": 4.3289, "step": 5813 }, { "epoch": 0.6028423487936543, "grad_norm": 1.3515625, "learning_rate": 0.00018077637613604826, "loss": 4.2843, "step": 5814 }, { "epoch": 0.6029460368481424, "grad_norm": 1.203125, "learning_rate": 0.0001807699722685535, "loss": 4.3415, "step": 5815 }, { "epoch": 0.6030497249026304, "grad_norm": 1.1640625, "learning_rate": 0.00018076356744805842, "loss": 4.2981, "step": 5816 }, { "epoch": 0.6031534129571186, "grad_norm": 1.265625, "learning_rate": 0.00018075716167463863, "loss": 4.2897, "step": 5817 }, { "epoch": 0.6032571010116066, "grad_norm": 1.171875, "learning_rate": 0.0001807507549483697, "loss": 4.3263, "step": 5818 }, { "epoch": 0.6033607890660947, "grad_norm": 1.4765625, "learning_rate": 0.0001807443472693272, "loss": 4.3334, "step": 5819 }, { "epoch": 0.6034644771205827, "grad_norm": 1.3828125, "learning_rate": 0.00018073793863758675, "loss": 4.3286, "step": 5820 }, { "epoch": 0.6035681651750708, "grad_norm": 1.296875, "learning_rate": 0.00018073152905322397, "loss": 4.3683, "step": 5821 }, { "epoch": 0.6036718532295589, "grad_norm": 1.234375, "learning_rate": 0.00018072511851631448, "loss": 4.3507, "step": 5822 }, { "epoch": 0.603775541284047, "grad_norm": 1.2734375, "learning_rate": 0.00018071870702693397, "loss": 4.2915, "step": 5823 }, { "epoch": 0.603879229338535, "grad_norm": 1.1484375, "learning_rate": 0.000180712294585158, "loss": 4.3375, "step": 5824 }, { "epoch": 0.6039829173930231, "grad_norm": 1.375, "learning_rate": 0.00018070588119106228, "loss": 4.3278, "step": 5825 }, { "epoch": 0.6040866054475111, "grad_norm": 1.28125, "learning_rate": 0.00018069946684472248, "loss": 4.3463, "step": 5826 }, { "epoch": 0.6041902935019993, "grad_norm": 1.2578125, "learning_rate": 0.00018069305154621424, "loss": 4.3182, "step": 5827 }, { "epoch": 0.6042939815564873, "grad_norm": 1.25, "learning_rate": 0.00018068663529561331, "loss": 4.3293, "step": 5828 }, { "epoch": 0.6043976696109754, "grad_norm": 1.1875, "learning_rate": 0.00018068021809299536, "loss": 4.3106, "step": 5829 }, { "epoch": 0.6045013576654634, "grad_norm": 1.109375, "learning_rate": 0.00018067379993843617, "loss": 4.3078, "step": 5830 }, { "epoch": 0.6046050457199516, "grad_norm": 1.375, "learning_rate": 0.00018066738083201135, "loss": 4.3269, "step": 5831 }, { "epoch": 0.6047087337744396, "grad_norm": 1.2578125, "learning_rate": 0.00018066096077379675, "loss": 4.321, "step": 5832 }, { "epoch": 0.6048124218289277, "grad_norm": 1.28125, "learning_rate": 0.00018065453976386805, "loss": 4.3406, "step": 5833 }, { "epoch": 0.6049161098834157, "grad_norm": 1.234375, "learning_rate": 0.00018064811780230103, "loss": 4.3474, "step": 5834 }, { "epoch": 0.6050197979379038, "grad_norm": 1.28125, "learning_rate": 0.0001806416948891715, "loss": 4.3533, "step": 5835 }, { "epoch": 0.6051234859923919, "grad_norm": 1.171875, "learning_rate": 0.0001806352710245552, "loss": 4.3238, "step": 5836 }, { "epoch": 0.60522717404688, "grad_norm": 1.3203125, "learning_rate": 0.00018062884620852792, "loss": 4.3164, "step": 5837 }, { "epoch": 0.605330862101368, "grad_norm": 1.2421875, "learning_rate": 0.00018062242044116552, "loss": 4.3437, "step": 5838 }, { "epoch": 0.6054345501558561, "grad_norm": 1.4609375, "learning_rate": 0.00018061599372254375, "loss": 4.2953, "step": 5839 }, { "epoch": 0.6055382382103441, "grad_norm": 1.3515625, "learning_rate": 0.0001806095660527385, "loss": 4.3134, "step": 5840 }, { "epoch": 0.6056419262648323, "grad_norm": 1.1171875, "learning_rate": 0.00018060313743182554, "loss": 4.3519, "step": 5841 }, { "epoch": 0.6057456143193203, "grad_norm": 1.203125, "learning_rate": 0.00018059670785988075, "loss": 4.355, "step": 5842 }, { "epoch": 0.6058493023738084, "grad_norm": 1.1640625, "learning_rate": 0.00018059027733698005, "loss": 4.3502, "step": 5843 }, { "epoch": 0.6059529904282964, "grad_norm": 1.0703125, "learning_rate": 0.00018058384586319926, "loss": 4.3536, "step": 5844 }, { "epoch": 0.6060566784827845, "grad_norm": 1.3203125, "learning_rate": 0.00018057741343861423, "loss": 4.3558, "step": 5845 }, { "epoch": 0.6061603665372726, "grad_norm": 1.1328125, "learning_rate": 0.0001805709800633009, "loss": 4.3174, "step": 5846 }, { "epoch": 0.6062640545917607, "grad_norm": 1.4609375, "learning_rate": 0.00018056454573733518, "loss": 4.364, "step": 5847 }, { "epoch": 0.6063677426462487, "grad_norm": 1.390625, "learning_rate": 0.000180558110460793, "loss": 4.3445, "step": 5848 }, { "epoch": 0.6064714307007368, "grad_norm": 1.171875, "learning_rate": 0.00018055167423375025, "loss": 4.2715, "step": 5849 }, { "epoch": 0.6065751187552249, "grad_norm": 1.1015625, "learning_rate": 0.00018054523705628292, "loss": 4.305, "step": 5850 }, { "epoch": 0.606678806809713, "grad_norm": 1.1875, "learning_rate": 0.0001805387989284669, "loss": 4.3408, "step": 5851 }, { "epoch": 0.6067824948642011, "grad_norm": 1.0390625, "learning_rate": 0.0001805323598503782, "loss": 4.34, "step": 5852 }, { "epoch": 0.6068861829186891, "grad_norm": 1.4140625, "learning_rate": 0.0001805259198220928, "loss": 4.3203, "step": 5853 }, { "epoch": 0.6069898709731772, "grad_norm": 1.1640625, "learning_rate": 0.00018051947884368662, "loss": 4.3286, "step": 5854 }, { "epoch": 0.6070935590276653, "grad_norm": 1.453125, "learning_rate": 0.00018051303691523575, "loss": 4.3236, "step": 5855 }, { "epoch": 0.6071972470821534, "grad_norm": 1.359375, "learning_rate": 0.0001805065940368161, "loss": 4.335, "step": 5856 }, { "epoch": 0.6073009351366414, "grad_norm": 1.234375, "learning_rate": 0.00018050015020850378, "loss": 4.3283, "step": 5857 }, { "epoch": 0.6074046231911295, "grad_norm": 1.0625, "learning_rate": 0.00018049370543037475, "loss": 4.3239, "step": 5858 }, { "epoch": 0.6075083112456175, "grad_norm": 1.4609375, "learning_rate": 0.00018048725970250514, "loss": 4.3298, "step": 5859 }, { "epoch": 0.6076119993001057, "grad_norm": 1.2109375, "learning_rate": 0.0001804808130249709, "loss": 4.3457, "step": 5860 }, { "epoch": 0.6077156873545937, "grad_norm": 1.4765625, "learning_rate": 0.00018047436539784812, "loss": 4.3558, "step": 5861 }, { "epoch": 0.6078193754090818, "grad_norm": 1.3828125, "learning_rate": 0.00018046791682121293, "loss": 4.3571, "step": 5862 }, { "epoch": 0.6079230634635698, "grad_norm": 1.3125, "learning_rate": 0.00018046146729514136, "loss": 4.295, "step": 5863 }, { "epoch": 0.608026751518058, "grad_norm": 1.2890625, "learning_rate": 0.00018045501681970954, "loss": 4.3485, "step": 5864 }, { "epoch": 0.608130439572546, "grad_norm": 1.296875, "learning_rate": 0.00018044856539499354, "loss": 4.3523, "step": 5865 }, { "epoch": 0.6082341276270341, "grad_norm": 1.125, "learning_rate": 0.00018044211302106953, "loss": 4.3211, "step": 5866 }, { "epoch": 0.6083378156815221, "grad_norm": 1.4453125, "learning_rate": 0.00018043565969801359, "loss": 4.3512, "step": 5867 }, { "epoch": 0.6084415037360102, "grad_norm": 1.21875, "learning_rate": 0.00018042920542590195, "loss": 4.3781, "step": 5868 }, { "epoch": 0.6085451917904983, "grad_norm": 1.578125, "learning_rate": 0.00018042275020481064, "loss": 4.3541, "step": 5869 }, { "epoch": 0.6086488798449864, "grad_norm": 1.390625, "learning_rate": 0.00018041629403481592, "loss": 4.3422, "step": 5870 }, { "epoch": 0.6087525678994744, "grad_norm": 1.4921875, "learning_rate": 0.00018040983691599395, "loss": 4.339, "step": 5871 }, { "epoch": 0.6088562559539625, "grad_norm": 1.375, "learning_rate": 0.00018040337884842086, "loss": 4.2955, "step": 5872 }, { "epoch": 0.6089599440084505, "grad_norm": 1.2578125, "learning_rate": 0.00018039691983217287, "loss": 4.3055, "step": 5873 }, { "epoch": 0.6090636320629387, "grad_norm": 1.21875, "learning_rate": 0.00018039045986732627, "loss": 4.352, "step": 5874 }, { "epoch": 0.6091673201174267, "grad_norm": 1.3828125, "learning_rate": 0.0001803839989539572, "loss": 4.3229, "step": 5875 }, { "epoch": 0.6092710081719148, "grad_norm": 1.2734375, "learning_rate": 0.0001803775370921419, "loss": 4.313, "step": 5876 }, { "epoch": 0.6093746962264028, "grad_norm": 1.421875, "learning_rate": 0.00018037107428195664, "loss": 4.3309, "step": 5877 }, { "epoch": 0.609478384280891, "grad_norm": 1.3515625, "learning_rate": 0.00018036461052347766, "loss": 4.3201, "step": 5878 }, { "epoch": 0.609582072335379, "grad_norm": 1.2734375, "learning_rate": 0.0001803581458167812, "loss": 4.3666, "step": 5879 }, { "epoch": 0.6096857603898671, "grad_norm": 1.25, "learning_rate": 0.0001803516801619436, "loss": 4.3787, "step": 5880 }, { "epoch": 0.6097894484443551, "grad_norm": 1.2734375, "learning_rate": 0.00018034521355904108, "loss": 4.3374, "step": 5881 }, { "epoch": 0.6098931364988432, "grad_norm": 1.1640625, "learning_rate": 0.00018033874600815, "loss": 4.3281, "step": 5882 }, { "epoch": 0.6099968245533313, "grad_norm": 1.4296875, "learning_rate": 0.0001803322775093466, "loss": 4.3247, "step": 5883 }, { "epoch": 0.6101005126078194, "grad_norm": 1.203125, "learning_rate": 0.00018032580806270725, "loss": 4.3396, "step": 5884 }, { "epoch": 0.6102042006623074, "grad_norm": 1.515625, "learning_rate": 0.0001803193376683083, "loss": 4.3337, "step": 5885 }, { "epoch": 0.6103078887167955, "grad_norm": 1.4375, "learning_rate": 0.00018031286632622603, "loss": 4.347, "step": 5886 }, { "epoch": 0.6104115767712835, "grad_norm": 1.2578125, "learning_rate": 0.00018030639403653683, "loss": 4.3089, "step": 5887 }, { "epoch": 0.6105152648257717, "grad_norm": 1.21875, "learning_rate": 0.00018029992079931711, "loss": 4.3356, "step": 5888 }, { "epoch": 0.6106189528802597, "grad_norm": 1.3203125, "learning_rate": 0.00018029344661464318, "loss": 4.2903, "step": 5889 }, { "epoch": 0.6107226409347478, "grad_norm": 1.1796875, "learning_rate": 0.00018028697148259144, "loss": 4.3385, "step": 5890 }, { "epoch": 0.6108263289892358, "grad_norm": 1.6484375, "learning_rate": 0.00018028049540323832, "loss": 4.3115, "step": 5891 }, { "epoch": 0.610930017043724, "grad_norm": 1.484375, "learning_rate": 0.0001802740183766602, "loss": 4.3394, "step": 5892 }, { "epoch": 0.611033705098212, "grad_norm": 1.6328125, "learning_rate": 0.00018026754040293354, "loss": 4.3097, "step": 5893 }, { "epoch": 0.6111373931527001, "grad_norm": 1.578125, "learning_rate": 0.00018026106148213476, "loss": 4.3807, "step": 5894 }, { "epoch": 0.6112410812071881, "grad_norm": 1.1796875, "learning_rate": 0.00018025458161434027, "loss": 4.3459, "step": 5895 }, { "epoch": 0.6113447692616762, "grad_norm": 1.15625, "learning_rate": 0.00018024810079962653, "loss": 4.3674, "step": 5896 }, { "epoch": 0.6114484573161644, "grad_norm": 1.1953125, "learning_rate": 0.00018024161903807006, "loss": 4.3333, "step": 5897 }, { "epoch": 0.6115521453706524, "grad_norm": 1.0078125, "learning_rate": 0.0001802351363297473, "loss": 4.3329, "step": 5898 }, { "epoch": 0.6116558334251405, "grad_norm": 1.421875, "learning_rate": 0.00018022865267473473, "loss": 4.3412, "step": 5899 }, { "epoch": 0.6117595214796285, "grad_norm": 1.328125, "learning_rate": 0.00018022216807310888, "loss": 4.3796, "step": 5900 }, { "epoch": 0.6118632095341167, "grad_norm": 1.1875, "learning_rate": 0.00018021568252494624, "loss": 4.3685, "step": 5901 }, { "epoch": 0.6119668975886047, "grad_norm": 1.1484375, "learning_rate": 0.00018020919603032334, "loss": 4.3144, "step": 5902 }, { "epoch": 0.6120705856430928, "grad_norm": 1.125, "learning_rate": 0.00018020270858931666, "loss": 4.3126, "step": 5903 }, { "epoch": 0.6121742736975808, "grad_norm": 1.0, "learning_rate": 0.00018019622020200285, "loss": 4.3694, "step": 5904 }, { "epoch": 0.6122779617520689, "grad_norm": 1.3203125, "learning_rate": 0.0001801897308684584, "loss": 4.3326, "step": 5905 }, { "epoch": 0.612381649806557, "grad_norm": 1.25, "learning_rate": 0.00018018324058875993, "loss": 4.3275, "step": 5906 }, { "epoch": 0.6124853378610451, "grad_norm": 1.359375, "learning_rate": 0.00018017674936298393, "loss": 4.3632, "step": 5907 }, { "epoch": 0.6125890259155331, "grad_norm": 1.1875, "learning_rate": 0.00018017025719120703, "loss": 4.3562, "step": 5908 }, { "epoch": 0.6126927139700212, "grad_norm": 1.3671875, "learning_rate": 0.00018016376407350588, "loss": 4.3318, "step": 5909 }, { "epoch": 0.6127964020245092, "grad_norm": 1.3046875, "learning_rate": 0.000180157270009957, "loss": 4.3231, "step": 5910 }, { "epoch": 0.6129000900789974, "grad_norm": 1.296875, "learning_rate": 0.00018015077500063714, "loss": 4.3291, "step": 5911 }, { "epoch": 0.6130037781334854, "grad_norm": 1.1328125, "learning_rate": 0.0001801442790456228, "loss": 4.3329, "step": 5912 }, { "epoch": 0.6131074661879735, "grad_norm": 1.3046875, "learning_rate": 0.00018013778214499067, "loss": 4.3003, "step": 5913 }, { "epoch": 0.6132111542424615, "grad_norm": 1.203125, "learning_rate": 0.00018013128429881747, "loss": 4.3611, "step": 5914 }, { "epoch": 0.6133148422969497, "grad_norm": 1.40625, "learning_rate": 0.0001801247855071798, "loss": 4.3372, "step": 5915 }, { "epoch": 0.6134185303514377, "grad_norm": 1.359375, "learning_rate": 0.00018011828577015434, "loss": 4.3413, "step": 5916 }, { "epoch": 0.6135222184059258, "grad_norm": 1.171875, "learning_rate": 0.0001801117850878178, "loss": 4.3753, "step": 5917 }, { "epoch": 0.6136259064604138, "grad_norm": 1.0859375, "learning_rate": 0.00018010528346024688, "loss": 4.3338, "step": 5918 }, { "epoch": 0.6137295945149019, "grad_norm": 1.25, "learning_rate": 0.0001800987808875183, "loss": 4.3024, "step": 5919 }, { "epoch": 0.61383328256939, "grad_norm": 1.109375, "learning_rate": 0.00018009227736970877, "loss": 4.3136, "step": 5920 }, { "epoch": 0.6139369706238781, "grad_norm": 1.2421875, "learning_rate": 0.00018008577290689503, "loss": 4.318, "step": 5921 }, { "epoch": 0.6140406586783661, "grad_norm": 1.125, "learning_rate": 0.00018007926749915383, "loss": 4.324, "step": 5922 }, { "epoch": 0.6141443467328542, "grad_norm": 1.296875, "learning_rate": 0.0001800727611465619, "loss": 4.3304, "step": 5923 }, { "epoch": 0.6142480347873422, "grad_norm": 1.171875, "learning_rate": 0.00018006625384919605, "loss": 4.3218, "step": 5924 }, { "epoch": 0.6143517228418304, "grad_norm": 1.359375, "learning_rate": 0.00018005974560713305, "loss": 4.3835, "step": 5925 }, { "epoch": 0.6144554108963184, "grad_norm": 1.2578125, "learning_rate": 0.00018005323642044966, "loss": 4.314, "step": 5926 }, { "epoch": 0.6145590989508065, "grad_norm": 1.4140625, "learning_rate": 0.00018004672628922267, "loss": 4.3165, "step": 5927 }, { "epoch": 0.6146627870052945, "grad_norm": 1.3125, "learning_rate": 0.00018004021521352893, "loss": 4.3353, "step": 5928 }, { "epoch": 0.6147664750597827, "grad_norm": 1.40625, "learning_rate": 0.0001800337031934453, "loss": 4.2861, "step": 5929 }, { "epoch": 0.6148701631142707, "grad_norm": 1.3046875, "learning_rate": 0.00018002719022904855, "loss": 4.3535, "step": 5930 }, { "epoch": 0.6149738511687588, "grad_norm": 1.3671875, "learning_rate": 0.00018002067632041555, "loss": 4.3158, "step": 5931 }, { "epoch": 0.6150775392232468, "grad_norm": 1.28125, "learning_rate": 0.00018001416146762314, "loss": 4.3222, "step": 5932 }, { "epoch": 0.6151812272777349, "grad_norm": 1.421875, "learning_rate": 0.00018000764567074822, "loss": 4.3179, "step": 5933 }, { "epoch": 0.615284915332223, "grad_norm": 1.34375, "learning_rate": 0.00018000112892986765, "loss": 4.3686, "step": 5934 }, { "epoch": 0.6153886033867111, "grad_norm": 1.265625, "learning_rate": 0.0001799946112450583, "loss": 4.3321, "step": 5935 }, { "epoch": 0.6154922914411991, "grad_norm": 1.125, "learning_rate": 0.00017998809261639712, "loss": 4.3088, "step": 5936 }, { "epoch": 0.6155959794956872, "grad_norm": 1.2421875, "learning_rate": 0.000179981573043961, "loss": 4.3076, "step": 5937 }, { "epoch": 0.6156996675501752, "grad_norm": 1.1171875, "learning_rate": 0.00017997505252782687, "loss": 4.3049, "step": 5938 }, { "epoch": 0.6158033556046634, "grad_norm": 1.390625, "learning_rate": 0.00017996853106807165, "loss": 4.357, "step": 5939 }, { "epoch": 0.6159070436591514, "grad_norm": 1.359375, "learning_rate": 0.00017996200866477228, "loss": 4.3391, "step": 5940 }, { "epoch": 0.6160107317136395, "grad_norm": 1.2734375, "learning_rate": 0.00017995548531800573, "loss": 4.372, "step": 5941 }, { "epoch": 0.6161144197681276, "grad_norm": 1.1171875, "learning_rate": 0.000179948961027849, "loss": 4.2656, "step": 5942 }, { "epoch": 0.6162181078226157, "grad_norm": 1.2734375, "learning_rate": 0.00017994243579437898, "loss": 4.3352, "step": 5943 }, { "epoch": 0.6163217958771038, "grad_norm": 1.1171875, "learning_rate": 0.0001799359096176728, "loss": 4.3499, "step": 5944 }, { "epoch": 0.6164254839315918, "grad_norm": 1.4921875, "learning_rate": 0.00017992938249780733, "loss": 4.3459, "step": 5945 }, { "epoch": 0.6165291719860799, "grad_norm": 1.3828125, "learning_rate": 0.00017992285443485965, "loss": 4.3025, "step": 5946 }, { "epoch": 0.6166328600405679, "grad_norm": 1.15625, "learning_rate": 0.00017991632542890677, "loss": 4.3394, "step": 5947 }, { "epoch": 0.6167365480950561, "grad_norm": 1.1171875, "learning_rate": 0.00017990979548002572, "loss": 4.3118, "step": 5948 }, { "epoch": 0.6168402361495441, "grad_norm": 1.2734375, "learning_rate": 0.00017990326458829355, "loss": 4.2628, "step": 5949 }, { "epoch": 0.6169439242040322, "grad_norm": 1.109375, "learning_rate": 0.0001798967327537873, "loss": 4.3008, "step": 5950 }, { "epoch": 0.6170476122585202, "grad_norm": 1.4609375, "learning_rate": 0.0001798901999765841, "loss": 4.3193, "step": 5951 }, { "epoch": 0.6171513003130084, "grad_norm": 1.3515625, "learning_rate": 0.00017988366625676098, "loss": 4.3051, "step": 5952 }, { "epoch": 0.6172549883674964, "grad_norm": 1.1484375, "learning_rate": 0.00017987713159439502, "loss": 4.3361, "step": 5953 }, { "epoch": 0.6173586764219845, "grad_norm": 1.125, "learning_rate": 0.00017987059598956336, "loss": 4.3399, "step": 5954 }, { "epoch": 0.6174623644764725, "grad_norm": 1.3203125, "learning_rate": 0.00017986405944234307, "loss": 4.3643, "step": 5955 }, { "epoch": 0.6175660525309606, "grad_norm": 1.0859375, "learning_rate": 0.0001798575219528113, "loss": 4.3694, "step": 5956 }, { "epoch": 0.6176697405854487, "grad_norm": 1.40625, "learning_rate": 0.0001798509835210452, "loss": 4.3734, "step": 5957 }, { "epoch": 0.6177734286399368, "grad_norm": 1.328125, "learning_rate": 0.0001798444441471219, "loss": 4.3513, "step": 5958 }, { "epoch": 0.6178771166944248, "grad_norm": 1.265625, "learning_rate": 0.00017983790383111856, "loss": 4.3163, "step": 5959 }, { "epoch": 0.6179808047489129, "grad_norm": 1.1484375, "learning_rate": 0.00017983136257311233, "loss": 4.2875, "step": 5960 }, { "epoch": 0.6180844928034009, "grad_norm": 1.203125, "learning_rate": 0.00017982482037318042, "loss": 4.3255, "step": 5961 }, { "epoch": 0.6181881808578891, "grad_norm": 1.09375, "learning_rate": 0.00017981827723140002, "loss": 4.3877, "step": 5962 }, { "epoch": 0.6182918689123771, "grad_norm": 1.2734375, "learning_rate": 0.0001798117331478483, "loss": 4.3281, "step": 5963 }, { "epoch": 0.6183955569668652, "grad_norm": 1.1484375, "learning_rate": 0.0001798051881226025, "loss": 4.3363, "step": 5964 }, { "epoch": 0.6184992450213532, "grad_norm": 1.34375, "learning_rate": 0.00017979864215573983, "loss": 4.3321, "step": 5965 }, { "epoch": 0.6186029330758414, "grad_norm": 1.203125, "learning_rate": 0.00017979209524733754, "loss": 4.2988, "step": 5966 }, { "epoch": 0.6187066211303294, "grad_norm": 1.2265625, "learning_rate": 0.00017978554739747288, "loss": 4.3493, "step": 5967 }, { "epoch": 0.6188103091848175, "grad_norm": 1.15625, "learning_rate": 0.0001797789986062231, "loss": 4.2884, "step": 5968 }, { "epoch": 0.6189139972393055, "grad_norm": 1.2578125, "learning_rate": 0.00017977244887366545, "loss": 4.3441, "step": 5969 }, { "epoch": 0.6190176852937936, "grad_norm": 1.15625, "learning_rate": 0.00017976589819987724, "loss": 4.2903, "step": 5970 }, { "epoch": 0.6191213733482817, "grad_norm": 1.375, "learning_rate": 0.00017975934658493573, "loss": 4.3436, "step": 5971 }, { "epoch": 0.6192250614027698, "grad_norm": 1.2890625, "learning_rate": 0.00017975279402891826, "loss": 4.3744, "step": 5972 }, { "epoch": 0.6193287494572578, "grad_norm": 1.2109375, "learning_rate": 0.0001797462405319021, "loss": 4.3048, "step": 5973 }, { "epoch": 0.6194324375117459, "grad_norm": 1.0859375, "learning_rate": 0.0001797396860939646, "loss": 4.3472, "step": 5974 }, { "epoch": 0.6195361255662339, "grad_norm": 1.171875, "learning_rate": 0.0001797331307151831, "loss": 4.3844, "step": 5975 }, { "epoch": 0.6196398136207221, "grad_norm": 1.0625, "learning_rate": 0.00017972657439563493, "loss": 4.3364, "step": 5976 }, { "epoch": 0.6197435016752101, "grad_norm": 1.4296875, "learning_rate": 0.00017972001713539748, "loss": 4.3088, "step": 5977 }, { "epoch": 0.6198471897296982, "grad_norm": 1.3046875, "learning_rate": 0.00017971345893454807, "loss": 4.3343, "step": 5978 }, { "epoch": 0.6199508777841862, "grad_norm": 1.328125, "learning_rate": 0.00017970689979316412, "loss": 4.3309, "step": 5979 }, { "epoch": 0.6200545658386744, "grad_norm": 1.1875, "learning_rate": 0.00017970033971132301, "loss": 4.3378, "step": 5980 }, { "epoch": 0.6201582538931624, "grad_norm": 1.203125, "learning_rate": 0.00017969377868910216, "loss": 4.2783, "step": 5981 }, { "epoch": 0.6202619419476505, "grad_norm": 1.140625, "learning_rate": 0.00017968721672657892, "loss": 4.3228, "step": 5982 }, { "epoch": 0.6203656300021385, "grad_norm": 1.5078125, "learning_rate": 0.00017968065382383076, "loss": 4.3054, "step": 5983 }, { "epoch": 0.6204693180566266, "grad_norm": 1.390625, "learning_rate": 0.00017967408998093514, "loss": 4.336, "step": 5984 }, { "epoch": 0.6205730061111148, "grad_norm": 1.2421875, "learning_rate": 0.00017966752519796945, "loss": 4.3644, "step": 5985 }, { "epoch": 0.6206766941656028, "grad_norm": 1.25, "learning_rate": 0.00017966095947501119, "loss": 4.3458, "step": 5986 }, { "epoch": 0.6207803822200909, "grad_norm": 1.1171875, "learning_rate": 0.00017965439281213778, "loss": 4.3109, "step": 5987 }, { "epoch": 0.6208840702745789, "grad_norm": 1.0625, "learning_rate": 0.0001796478252094268, "loss": 4.3486, "step": 5988 }, { "epoch": 0.620987758329067, "grad_norm": 1.2890625, "learning_rate": 0.00017964125666695562, "loss": 4.3259, "step": 5989 }, { "epoch": 0.6210914463835551, "grad_norm": 1.1640625, "learning_rate": 0.00017963468718480181, "loss": 4.2994, "step": 5990 }, { "epoch": 0.6211951344380432, "grad_norm": 1.40625, "learning_rate": 0.00017962811676304285, "loss": 4.3223, "step": 5991 }, { "epoch": 0.6212988224925312, "grad_norm": 1.359375, "learning_rate": 0.00017962154540175632, "loss": 4.3949, "step": 5992 }, { "epoch": 0.6214025105470193, "grad_norm": 0.98046875, "learning_rate": 0.0001796149731010197, "loss": 4.2671, "step": 5993 }, { "epoch": 0.6215061986015074, "grad_norm": 1.0625, "learning_rate": 0.00017960839986091057, "loss": 4.3448, "step": 5994 }, { "epoch": 0.6216098866559955, "grad_norm": 1.0234375, "learning_rate": 0.00017960182568150642, "loss": 4.3655, "step": 5995 }, { "epoch": 0.6217135747104835, "grad_norm": 1.0, "learning_rate": 0.0001795952505628849, "loss": 4.284, "step": 5996 }, { "epoch": 0.6218172627649716, "grad_norm": 1.046875, "learning_rate": 0.00017958867450512358, "loss": 4.3335, "step": 5997 }, { "epoch": 0.6219209508194596, "grad_norm": 0.890625, "learning_rate": 0.0001795820975083, "loss": 4.3414, "step": 5998 }, { "epoch": 0.6220246388739478, "grad_norm": 1.03125, "learning_rate": 0.00017957551957249182, "loss": 4.2983, "step": 5999 }, { "epoch": 0.6221283269284358, "grad_norm": 0.875, "learning_rate": 0.0001795689406977766, "loss": 4.3232, "step": 6000 }, { "epoch": 0.6222320149829239, "grad_norm": 1.109375, "learning_rate": 0.000179562360884232, "loss": 4.3616, "step": 6001 }, { "epoch": 0.6223357030374119, "grad_norm": 0.92578125, "learning_rate": 0.00017955578013193564, "loss": 4.3173, "step": 6002 }, { "epoch": 0.6224393910919, "grad_norm": 1.265625, "learning_rate": 0.00017954919844096517, "loss": 4.3492, "step": 6003 }, { "epoch": 0.6225430791463881, "grad_norm": 1.078125, "learning_rate": 0.00017954261581139825, "loss": 4.3315, "step": 6004 }, { "epoch": 0.6226467672008762, "grad_norm": 1.390625, "learning_rate": 0.00017953603224331254, "loss": 4.3123, "step": 6005 }, { "epoch": 0.6227504552553642, "grad_norm": 1.2734375, "learning_rate": 0.0001795294477367857, "loss": 4.332, "step": 6006 }, { "epoch": 0.6228541433098523, "grad_norm": 1.234375, "learning_rate": 0.00017952286229189546, "loss": 4.3646, "step": 6007 }, { "epoch": 0.6229578313643404, "grad_norm": 1.1953125, "learning_rate": 0.00017951627590871952, "loss": 4.31, "step": 6008 }, { "epoch": 0.6230615194188285, "grad_norm": 1.1796875, "learning_rate": 0.00017950968858733557, "loss": 4.3498, "step": 6009 }, { "epoch": 0.6231652074733165, "grad_norm": 1.046875, "learning_rate": 0.00017950310032782132, "loss": 4.3115, "step": 6010 }, { "epoch": 0.6232688955278046, "grad_norm": 1.40625, "learning_rate": 0.00017949651113025454, "loss": 4.3336, "step": 6011 }, { "epoch": 0.6233725835822926, "grad_norm": 1.265625, "learning_rate": 0.00017948992099471296, "loss": 4.36, "step": 6012 }, { "epoch": 0.6234762716367808, "grad_norm": 1.3828125, "learning_rate": 0.00017948332992127433, "loss": 4.3652, "step": 6013 }, { "epoch": 0.6235799596912688, "grad_norm": 1.3203125, "learning_rate": 0.00017947673791001643, "loss": 4.3192, "step": 6014 }, { "epoch": 0.6236836477457569, "grad_norm": 1.203125, "learning_rate": 0.00017947014496101703, "loss": 4.3289, "step": 6015 }, { "epoch": 0.6237873358002449, "grad_norm": 1.125, "learning_rate": 0.00017946355107435391, "loss": 4.3483, "step": 6016 }, { "epoch": 0.623891023854733, "grad_norm": 1.40625, "learning_rate": 0.0001794569562501049, "loss": 4.3262, "step": 6017 }, { "epoch": 0.6239947119092211, "grad_norm": 1.234375, "learning_rate": 0.0001794503604883478, "loss": 4.3498, "step": 6018 }, { "epoch": 0.6240983999637092, "grad_norm": 1.3515625, "learning_rate": 0.00017944376378916044, "loss": 4.3238, "step": 6019 }, { "epoch": 0.6242020880181972, "grad_norm": 1.28125, "learning_rate": 0.00017943716615262062, "loss": 4.3432, "step": 6020 }, { "epoch": 0.6243057760726853, "grad_norm": 1.0859375, "learning_rate": 0.0001794305675788062, "loss": 4.3482, "step": 6021 }, { "epoch": 0.6244094641271734, "grad_norm": 1.0703125, "learning_rate": 0.00017942396806779507, "loss": 4.3173, "step": 6022 }, { "epoch": 0.6245131521816615, "grad_norm": 1.1484375, "learning_rate": 0.00017941736761966506, "loss": 4.304, "step": 6023 }, { "epoch": 0.6246168402361495, "grad_norm": 1.0625, "learning_rate": 0.00017941076623449406, "loss": 4.3422, "step": 6024 }, { "epoch": 0.6247205282906376, "grad_norm": 1.390625, "learning_rate": 0.00017940416391235995, "loss": 4.3239, "step": 6025 }, { "epoch": 0.6248242163451256, "grad_norm": 1.328125, "learning_rate": 0.00017939756065334068, "loss": 4.3214, "step": 6026 }, { "epoch": 0.6249279043996138, "grad_norm": 1.109375, "learning_rate": 0.00017939095645751408, "loss": 4.3207, "step": 6027 }, { "epoch": 0.6250315924541018, "grad_norm": 1.0625, "learning_rate": 0.0001793843513249581, "loss": 4.3462, "step": 6028 }, { "epoch": 0.6251352805085899, "grad_norm": 1.2421875, "learning_rate": 0.00017937774525575073, "loss": 4.3265, "step": 6029 }, { "epoch": 0.625238968563078, "grad_norm": 1.1328125, "learning_rate": 0.00017937113824996985, "loss": 4.3543, "step": 6030 }, { "epoch": 0.625342656617566, "grad_norm": 1.3515625, "learning_rate": 0.00017936453030769346, "loss": 4.3666, "step": 6031 }, { "epoch": 0.6254463446720542, "grad_norm": 1.1796875, "learning_rate": 0.00017935792142899948, "loss": 4.3258, "step": 6032 }, { "epoch": 0.6255500327265422, "grad_norm": 1.28125, "learning_rate": 0.00017935131161396592, "loss": 4.3247, "step": 6033 }, { "epoch": 0.6256537207810303, "grad_norm": 1.1875, "learning_rate": 0.00017934470086267075, "loss": 4.3449, "step": 6034 }, { "epoch": 0.6257574088355183, "grad_norm": 1.3046875, "learning_rate": 0.000179338089175192, "loss": 4.3614, "step": 6035 }, { "epoch": 0.6258610968900065, "grad_norm": 1.2109375, "learning_rate": 0.00017933147655160766, "loss": 4.3474, "step": 6036 }, { "epoch": 0.6259647849444945, "grad_norm": 1.2578125, "learning_rate": 0.00017932486299199573, "loss": 4.3562, "step": 6037 }, { "epoch": 0.6260684729989826, "grad_norm": 1.1640625, "learning_rate": 0.0001793182484964343, "loss": 4.3562, "step": 6038 }, { "epoch": 0.6261721610534706, "grad_norm": 1.3125, "learning_rate": 0.0001793116330650013, "loss": 4.3347, "step": 6039 }, { "epoch": 0.6262758491079587, "grad_norm": 1.1484375, "learning_rate": 0.00017930501669777496, "loss": 4.3492, "step": 6040 }, { "epoch": 0.6263795371624468, "grad_norm": 1.359375, "learning_rate": 0.00017929839939483322, "loss": 4.3075, "step": 6041 }, { "epoch": 0.6264832252169349, "grad_norm": 1.2578125, "learning_rate": 0.00017929178115625417, "loss": 4.3827, "step": 6042 }, { "epoch": 0.6265869132714229, "grad_norm": 1.3828125, "learning_rate": 0.00017928516198211595, "loss": 4.3436, "step": 6043 }, { "epoch": 0.626690601325911, "grad_norm": 1.265625, "learning_rate": 0.0001792785418724966, "loss": 4.3256, "step": 6044 }, { "epoch": 0.626794289380399, "grad_norm": 1.3984375, "learning_rate": 0.00017927192082747427, "loss": 4.3112, "step": 6045 }, { "epoch": 0.6268979774348872, "grad_norm": 1.2578125, "learning_rate": 0.0001792652988471271, "loss": 4.3048, "step": 6046 }, { "epoch": 0.6270016654893752, "grad_norm": 1.546875, "learning_rate": 0.00017925867593153317, "loss": 4.3716, "step": 6047 }, { "epoch": 0.6271053535438633, "grad_norm": 1.4296875, "learning_rate": 0.0001792520520807706, "loss": 4.2846, "step": 6048 }, { "epoch": 0.6272090415983513, "grad_norm": 1.2265625, "learning_rate": 0.00017924542729491765, "loss": 4.3488, "step": 6049 }, { "epoch": 0.6273127296528395, "grad_norm": 1.25, "learning_rate": 0.00017923880157405238, "loss": 4.318, "step": 6050 }, { "epoch": 0.6274164177073275, "grad_norm": 1.234375, "learning_rate": 0.000179232174918253, "loss": 4.3829, "step": 6051 }, { "epoch": 0.6275201057618156, "grad_norm": 1.1484375, "learning_rate": 0.00017922554732759775, "loss": 4.3599, "step": 6052 }, { "epoch": 0.6276237938163036, "grad_norm": 1.3359375, "learning_rate": 0.00017921891880216478, "loss": 4.3358, "step": 6053 }, { "epoch": 0.6277274818707917, "grad_norm": 1.2109375, "learning_rate": 0.0001792122893420323, "loss": 4.3376, "step": 6054 }, { "epoch": 0.6278311699252798, "grad_norm": 1.4296875, "learning_rate": 0.00017920565894727854, "loss": 4.3467, "step": 6055 }, { "epoch": 0.6279348579797679, "grad_norm": 1.2890625, "learning_rate": 0.00017919902761798172, "loss": 4.3125, "step": 6056 }, { "epoch": 0.6280385460342559, "grad_norm": 1.3671875, "learning_rate": 0.0001791923953542201, "loss": 4.292, "step": 6057 }, { "epoch": 0.628142234088744, "grad_norm": 1.3046875, "learning_rate": 0.00017918576215607192, "loss": 4.3364, "step": 6058 }, { "epoch": 0.628245922143232, "grad_norm": 1.140625, "learning_rate": 0.00017917912802361543, "loss": 4.3216, "step": 6059 }, { "epoch": 0.6283496101977202, "grad_norm": 1.109375, "learning_rate": 0.00017917249295692895, "loss": 4.3368, "step": 6060 }, { "epoch": 0.6284532982522082, "grad_norm": 1.2421875, "learning_rate": 0.00017916585695609073, "loss": 4.2879, "step": 6061 }, { "epoch": 0.6285569863066963, "grad_norm": 1.109375, "learning_rate": 0.0001791592200211791, "loss": 4.348, "step": 6062 }, { "epoch": 0.6286606743611843, "grad_norm": 1.421875, "learning_rate": 0.00017915258215227232, "loss": 4.3623, "step": 6063 }, { "epoch": 0.6287643624156725, "grad_norm": 1.3671875, "learning_rate": 0.00017914594334944873, "loss": 4.3399, "step": 6064 }, { "epoch": 0.6288680504701605, "grad_norm": 1.1171875, "learning_rate": 0.00017913930361278672, "loss": 4.325, "step": 6065 }, { "epoch": 0.6289717385246486, "grad_norm": 1.03125, "learning_rate": 0.00017913266294236456, "loss": 4.31, "step": 6066 }, { "epoch": 0.6290754265791366, "grad_norm": 1.328125, "learning_rate": 0.0001791260213382606, "loss": 4.3083, "step": 6067 }, { "epoch": 0.6291791146336247, "grad_norm": 1.1640625, "learning_rate": 0.00017911937880055323, "loss": 4.3409, "step": 6068 }, { "epoch": 0.6292828026881128, "grad_norm": 1.4609375, "learning_rate": 0.00017911273532932086, "loss": 4.3099, "step": 6069 }, { "epoch": 0.6293864907426009, "grad_norm": 1.359375, "learning_rate": 0.00017910609092464181, "loss": 4.3098, "step": 6070 }, { "epoch": 0.6294901787970889, "grad_norm": 1.1171875, "learning_rate": 0.0001790994455865945, "loss": 4.2889, "step": 6071 }, { "epoch": 0.629593866851577, "grad_norm": 1.1015625, "learning_rate": 0.00017909279931525735, "loss": 4.3456, "step": 6072 }, { "epoch": 0.629697554906065, "grad_norm": 1.234375, "learning_rate": 0.00017908615211070878, "loss": 4.2922, "step": 6073 }, { "epoch": 0.6298012429605532, "grad_norm": 1.0703125, "learning_rate": 0.00017907950397302722, "loss": 4.3142, "step": 6074 }, { "epoch": 0.6299049310150413, "grad_norm": 1.5703125, "learning_rate": 0.00017907285490229109, "loss": 4.3715, "step": 6075 }, { "epoch": 0.6300086190695293, "grad_norm": 1.3671875, "learning_rate": 0.00017906620489857887, "loss": 4.3564, "step": 6076 }, { "epoch": 0.6301123071240174, "grad_norm": 1.3046875, "learning_rate": 0.000179059553961969, "loss": 4.3453, "step": 6077 }, { "epoch": 0.6302159951785055, "grad_norm": 1.1640625, "learning_rate": 0.00017905290209253996, "loss": 4.2826, "step": 6078 }, { "epoch": 0.6303196832329936, "grad_norm": 1.3046875, "learning_rate": 0.00017904624929037025, "loss": 4.3096, "step": 6079 }, { "epoch": 0.6304233712874816, "grad_norm": 1.234375, "learning_rate": 0.00017903959555553832, "loss": 4.3031, "step": 6080 }, { "epoch": 0.6305270593419697, "grad_norm": 1.4453125, "learning_rate": 0.00017903294088812273, "loss": 4.2875, "step": 6081 }, { "epoch": 0.6306307473964577, "grad_norm": 1.34375, "learning_rate": 0.000179026285288202, "loss": 4.3089, "step": 6082 }, { "epoch": 0.6307344354509459, "grad_norm": 1.1328125, "learning_rate": 0.00017901962875585463, "loss": 4.3298, "step": 6083 }, { "epoch": 0.6308381235054339, "grad_norm": 1.0625, "learning_rate": 0.00017901297129115914, "loss": 4.2848, "step": 6084 }, { "epoch": 0.630941811559922, "grad_norm": 1.140625, "learning_rate": 0.00017900631289419417, "loss": 4.2869, "step": 6085 }, { "epoch": 0.63104549961441, "grad_norm": 1.0546875, "learning_rate": 0.00017899965356503816, "loss": 4.3072, "step": 6086 }, { "epoch": 0.6311491876688982, "grad_norm": 1.6015625, "learning_rate": 0.00017899299330376977, "loss": 4.3425, "step": 6087 }, { "epoch": 0.6312528757233862, "grad_norm": 1.3984375, "learning_rate": 0.00017898633211046753, "loss": 4.3198, "step": 6088 }, { "epoch": 0.6313565637778743, "grad_norm": 1.28125, "learning_rate": 0.00017897966998521011, "loss": 4.3226, "step": 6089 }, { "epoch": 0.6314602518323623, "grad_norm": 1.3046875, "learning_rate": 0.00017897300692807603, "loss": 4.3609, "step": 6090 }, { "epoch": 0.6315639398868504, "grad_norm": 1.0234375, "learning_rate": 0.00017896634293914398, "loss": 4.2785, "step": 6091 }, { "epoch": 0.6316676279413385, "grad_norm": 0.95703125, "learning_rate": 0.00017895967801849253, "loss": 4.2822, "step": 6092 }, { "epoch": 0.6317713159958266, "grad_norm": 0.98828125, "learning_rate": 0.00017895301216620032, "loss": 4.2794, "step": 6093 }, { "epoch": 0.6318750040503146, "grad_norm": 0.85546875, "learning_rate": 0.00017894634538234607, "loss": 4.3705, "step": 6094 }, { "epoch": 0.6319786921048027, "grad_norm": 0.94140625, "learning_rate": 0.0001789396776670084, "loss": 4.3103, "step": 6095 }, { "epoch": 0.6320823801592907, "grad_norm": 0.796875, "learning_rate": 0.00017893300902026594, "loss": 4.2928, "step": 6096 }, { "epoch": 0.6321860682137789, "grad_norm": 0.9375, "learning_rate": 0.00017892633944219743, "loss": 4.3577, "step": 6097 }, { "epoch": 0.6322897562682669, "grad_norm": 0.76953125, "learning_rate": 0.00017891966893288154, "loss": 4.3371, "step": 6098 }, { "epoch": 0.632393444322755, "grad_norm": 0.91015625, "learning_rate": 0.00017891299749239696, "loss": 4.336, "step": 6099 }, { "epoch": 0.632497132377243, "grad_norm": 0.75, "learning_rate": 0.00017890632512082243, "loss": 4.2812, "step": 6100 }, { "epoch": 0.6326008204317312, "grad_norm": 0.83984375, "learning_rate": 0.0001788996518182367, "loss": 4.2998, "step": 6101 }, { "epoch": 0.6327045084862192, "grad_norm": 0.7421875, "learning_rate": 0.00017889297758471846, "loss": 4.3527, "step": 6102 }, { "epoch": 0.6328081965407073, "grad_norm": 0.84765625, "learning_rate": 0.00017888630242034648, "loss": 4.3369, "step": 6103 }, { "epoch": 0.6329118845951953, "grad_norm": 0.76171875, "learning_rate": 0.0001788796263251995, "loss": 4.3144, "step": 6104 }, { "epoch": 0.6330155726496834, "grad_norm": 0.84765625, "learning_rate": 0.00017887294929935633, "loss": 4.2951, "step": 6105 }, { "epoch": 0.6331192607041715, "grad_norm": 0.76171875, "learning_rate": 0.00017886627134289573, "loss": 4.3436, "step": 6106 }, { "epoch": 0.6332229487586596, "grad_norm": 0.87890625, "learning_rate": 0.0001788595924558965, "loss": 4.3389, "step": 6107 }, { "epoch": 0.6333266368131476, "grad_norm": 0.734375, "learning_rate": 0.0001788529126384374, "loss": 4.3352, "step": 6108 }, { "epoch": 0.6334303248676357, "grad_norm": 0.86328125, "learning_rate": 0.00017884623189059733, "loss": 4.3023, "step": 6109 }, { "epoch": 0.6335340129221237, "grad_norm": 0.80078125, "learning_rate": 0.00017883955021245505, "loss": 4.2569, "step": 6110 }, { "epoch": 0.6336377009766119, "grad_norm": 0.81640625, "learning_rate": 0.0001788328676040894, "loss": 4.3254, "step": 6111 }, { "epoch": 0.6337413890310999, "grad_norm": 0.9140625, "learning_rate": 0.00017882618406557922, "loss": 4.2926, "step": 6112 }, { "epoch": 0.633845077085588, "grad_norm": 0.7421875, "learning_rate": 0.00017881949959700343, "loss": 4.3386, "step": 6113 }, { "epoch": 0.633948765140076, "grad_norm": 0.765625, "learning_rate": 0.00017881281419844088, "loss": 4.3414, "step": 6114 }, { "epoch": 0.6340524531945642, "grad_norm": 0.7265625, "learning_rate": 0.00017880612786997039, "loss": 4.2778, "step": 6115 }, { "epoch": 0.6341561412490522, "grad_norm": 0.828125, "learning_rate": 0.00017879944061167092, "loss": 4.3278, "step": 6116 }, { "epoch": 0.6342598293035403, "grad_norm": 0.65234375, "learning_rate": 0.0001787927524236213, "loss": 4.3591, "step": 6117 }, { "epoch": 0.6343635173580283, "grad_norm": 0.78125, "learning_rate": 0.00017878606330590054, "loss": 4.3536, "step": 6118 }, { "epoch": 0.6344672054125164, "grad_norm": 0.66015625, "learning_rate": 0.00017877937325858748, "loss": 4.3563, "step": 6119 }, { "epoch": 0.6345708934670046, "grad_norm": 0.7421875, "learning_rate": 0.00017877268228176112, "loss": 4.3259, "step": 6120 }, { "epoch": 0.6346745815214926, "grad_norm": 0.59765625, "learning_rate": 0.00017876599037550036, "loss": 4.317, "step": 6121 }, { "epoch": 0.6347782695759807, "grad_norm": 0.77734375, "learning_rate": 0.00017875929753988416, "loss": 4.3243, "step": 6122 }, { "epoch": 0.6348819576304687, "grad_norm": 0.62109375, "learning_rate": 0.00017875260377499152, "loss": 4.2985, "step": 6123 }, { "epoch": 0.6349856456849569, "grad_norm": 0.68359375, "learning_rate": 0.0001787459090809014, "loss": 4.3338, "step": 6124 }, { "epoch": 0.6350893337394449, "grad_norm": 0.6171875, "learning_rate": 0.0001787392134576928, "loss": 4.3434, "step": 6125 }, { "epoch": 0.635193021793933, "grad_norm": 0.7890625, "learning_rate": 0.00017873251690544469, "loss": 4.3351, "step": 6126 }, { "epoch": 0.635296709848421, "grad_norm": 0.66015625, "learning_rate": 0.0001787258194242361, "loss": 4.3537, "step": 6127 }, { "epoch": 0.6354003979029091, "grad_norm": 0.69140625, "learning_rate": 0.00017871912101414609, "loss": 4.293, "step": 6128 }, { "epoch": 0.6355040859573972, "grad_norm": 0.671875, "learning_rate": 0.0001787124216752536, "loss": 4.3296, "step": 6129 }, { "epoch": 0.6356077740118853, "grad_norm": 0.69140625, "learning_rate": 0.0001787057214076378, "loss": 4.2974, "step": 6130 }, { "epoch": 0.6357114620663733, "grad_norm": 0.61328125, "learning_rate": 0.00017869902021137765, "loss": 4.3578, "step": 6131 }, { "epoch": 0.6358151501208614, "grad_norm": 0.625, "learning_rate": 0.00017869231808655226, "loss": 4.3119, "step": 6132 }, { "epoch": 0.6359188381753494, "grad_norm": 0.66796875, "learning_rate": 0.00017868561503324071, "loss": 4.3112, "step": 6133 }, { "epoch": 0.6360225262298376, "grad_norm": 0.62109375, "learning_rate": 0.00017867891105152205, "loss": 4.3483, "step": 6134 }, { "epoch": 0.6361262142843256, "grad_norm": 0.69140625, "learning_rate": 0.00017867220614147544, "loss": 4.2971, "step": 6135 }, { "epoch": 0.6362299023388137, "grad_norm": 0.66015625, "learning_rate": 0.00017866550030317993, "loss": 4.2936, "step": 6136 }, { "epoch": 0.6363335903933017, "grad_norm": 0.6953125, "learning_rate": 0.0001786587935367147, "loss": 4.294, "step": 6137 }, { "epoch": 0.6364372784477899, "grad_norm": 0.71875, "learning_rate": 0.0001786520858421588, "loss": 4.2954, "step": 6138 }, { "epoch": 0.6365409665022779, "grad_norm": 0.73828125, "learning_rate": 0.00017864537721959148, "loss": 4.3349, "step": 6139 }, { "epoch": 0.636644654556766, "grad_norm": 0.68359375, "learning_rate": 0.00017863866766909181, "loss": 4.3433, "step": 6140 }, { "epoch": 0.636748342611254, "grad_norm": 0.72265625, "learning_rate": 0.00017863195719073897, "loss": 4.3523, "step": 6141 }, { "epoch": 0.6368520306657421, "grad_norm": 0.69140625, "learning_rate": 0.0001786252457846122, "loss": 4.3301, "step": 6142 }, { "epoch": 0.6369557187202302, "grad_norm": 0.71875, "learning_rate": 0.0001786185334507906, "loss": 4.3225, "step": 6143 }, { "epoch": 0.6370594067747183, "grad_norm": 0.73828125, "learning_rate": 0.00017861182018935343, "loss": 4.3684, "step": 6144 }, { "epoch": 0.6371630948292063, "grad_norm": 0.6640625, "learning_rate": 0.0001786051060003799, "loss": 4.3492, "step": 6145 }, { "epoch": 0.6372667828836944, "grad_norm": 0.7265625, "learning_rate": 0.00017859839088394915, "loss": 4.3687, "step": 6146 }, { "epoch": 0.6373704709381824, "grad_norm": 0.671875, "learning_rate": 0.00017859167484014053, "loss": 4.2824, "step": 6147 }, { "epoch": 0.6374741589926706, "grad_norm": 0.69140625, "learning_rate": 0.00017858495786903317, "loss": 4.3049, "step": 6148 }, { "epoch": 0.6375778470471586, "grad_norm": 0.640625, "learning_rate": 0.00017857823997070643, "loss": 4.2993, "step": 6149 }, { "epoch": 0.6376815351016467, "grad_norm": 0.671875, "learning_rate": 0.00017857152114523944, "loss": 4.3058, "step": 6150 }, { "epoch": 0.6377852231561347, "grad_norm": 0.65625, "learning_rate": 0.0001785648013927116, "loss": 4.346, "step": 6151 }, { "epoch": 0.6378889112106229, "grad_norm": 0.64453125, "learning_rate": 0.00017855808071320217, "loss": 4.3001, "step": 6152 }, { "epoch": 0.6379925992651109, "grad_norm": 0.65625, "learning_rate": 0.0001785513591067904, "loss": 4.3162, "step": 6153 }, { "epoch": 0.638096287319599, "grad_norm": 0.65625, "learning_rate": 0.00017854463657355566, "loss": 4.3288, "step": 6154 }, { "epoch": 0.638199975374087, "grad_norm": 0.609375, "learning_rate": 0.0001785379131135772, "loss": 4.3129, "step": 6155 }, { "epoch": 0.6383036634285751, "grad_norm": 0.6796875, "learning_rate": 0.0001785311887269344, "loss": 4.3475, "step": 6156 }, { "epoch": 0.6384073514830632, "grad_norm": 0.65234375, "learning_rate": 0.00017852446341370658, "loss": 4.3083, "step": 6157 }, { "epoch": 0.6385110395375513, "grad_norm": 0.734375, "learning_rate": 0.00017851773717397307, "loss": 4.3457, "step": 6158 }, { "epoch": 0.6386147275920393, "grad_norm": 0.671875, "learning_rate": 0.0001785110100078133, "loss": 4.318, "step": 6159 }, { "epoch": 0.6387184156465274, "grad_norm": 0.6484375, "learning_rate": 0.00017850428191530657, "loss": 4.365, "step": 6160 }, { "epoch": 0.6388221037010154, "grad_norm": 0.63671875, "learning_rate": 0.0001784975528965323, "loss": 4.3537, "step": 6161 }, { "epoch": 0.6389257917555036, "grad_norm": 0.609375, "learning_rate": 0.00017849082295156988, "loss": 4.369, "step": 6162 }, { "epoch": 0.6390294798099916, "grad_norm": 0.671875, "learning_rate": 0.00017848409208049874, "loss": 4.2885, "step": 6163 }, { "epoch": 0.6391331678644797, "grad_norm": 0.65234375, "learning_rate": 0.00017847736028339824, "loss": 4.3276, "step": 6164 }, { "epoch": 0.6392368559189678, "grad_norm": 0.67578125, "learning_rate": 0.00017847062756034786, "loss": 4.2959, "step": 6165 }, { "epoch": 0.6393405439734559, "grad_norm": 0.62890625, "learning_rate": 0.00017846389391142705, "loss": 4.3502, "step": 6166 }, { "epoch": 0.639444232027944, "grad_norm": 0.67578125, "learning_rate": 0.0001784571593367152, "loss": 4.3225, "step": 6167 }, { "epoch": 0.639547920082432, "grad_norm": 0.64453125, "learning_rate": 0.0001784504238362918, "loss": 4.3102, "step": 6168 }, { "epoch": 0.6396516081369201, "grad_norm": 0.7109375, "learning_rate": 0.00017844368741023634, "loss": 4.3177, "step": 6169 }, { "epoch": 0.6397552961914081, "grad_norm": 0.73828125, "learning_rate": 0.00017843695005862828, "loss": 4.3462, "step": 6170 }, { "epoch": 0.6398589842458963, "grad_norm": 0.6484375, "learning_rate": 0.00017843021178154712, "loss": 4.338, "step": 6171 }, { "epoch": 0.6399626723003843, "grad_norm": 0.65625, "learning_rate": 0.00017842347257907237, "loss": 4.2944, "step": 6172 }, { "epoch": 0.6400663603548724, "grad_norm": 0.734375, "learning_rate": 0.00017841673245128355, "loss": 4.3055, "step": 6173 }, { "epoch": 0.6401700484093604, "grad_norm": 0.6484375, "learning_rate": 0.00017840999139826015, "loss": 4.3246, "step": 6174 }, { "epoch": 0.6402737364638486, "grad_norm": 0.72265625, "learning_rate": 0.00017840324942008175, "loss": 4.2996, "step": 6175 }, { "epoch": 0.6403774245183366, "grad_norm": 0.640625, "learning_rate": 0.0001783965065168279, "loss": 4.3144, "step": 6176 }, { "epoch": 0.6404811125728247, "grad_norm": 0.7109375, "learning_rate": 0.00017838976268857813, "loss": 4.3124, "step": 6177 }, { "epoch": 0.6405848006273127, "grad_norm": 0.65625, "learning_rate": 0.000178383017935412, "loss": 4.3134, "step": 6178 }, { "epoch": 0.6406884886818008, "grad_norm": 0.671875, "learning_rate": 0.0001783762722574091, "loss": 4.3734, "step": 6179 }, { "epoch": 0.6407921767362889, "grad_norm": 0.66015625, "learning_rate": 0.0001783695256546491, "loss": 4.359, "step": 6180 }, { "epoch": 0.640895864790777, "grad_norm": 0.640625, "learning_rate": 0.00017836277812721148, "loss": 4.3412, "step": 6181 }, { "epoch": 0.640999552845265, "grad_norm": 0.6796875, "learning_rate": 0.00017835602967517593, "loss": 4.3363, "step": 6182 }, { "epoch": 0.6411032408997531, "grad_norm": 0.640625, "learning_rate": 0.00017834928029862205, "loss": 4.3228, "step": 6183 }, { "epoch": 0.6412069289542411, "grad_norm": 0.70703125, "learning_rate": 0.0001783425299976295, "loss": 4.3238, "step": 6184 }, { "epoch": 0.6413106170087293, "grad_norm": 0.70703125, "learning_rate": 0.00017833577877227793, "loss": 4.2995, "step": 6185 }, { "epoch": 0.6414143050632173, "grad_norm": 0.6640625, "learning_rate": 0.0001783290266226469, "loss": 4.3071, "step": 6186 }, { "epoch": 0.6415179931177054, "grad_norm": 0.6953125, "learning_rate": 0.0001783222735488162, "loss": 4.3645, "step": 6187 }, { "epoch": 0.6416216811721934, "grad_norm": 0.68359375, "learning_rate": 0.00017831551955086545, "loss": 4.345, "step": 6188 }, { "epoch": 0.6417253692266816, "grad_norm": 0.72265625, "learning_rate": 0.0001783087646288744, "loss": 4.3403, "step": 6189 }, { "epoch": 0.6418290572811696, "grad_norm": 0.71875, "learning_rate": 0.00017830200878292263, "loss": 4.3157, "step": 6190 }, { "epoch": 0.6419327453356577, "grad_norm": 0.76171875, "learning_rate": 0.00017829525201308998, "loss": 4.3469, "step": 6191 }, { "epoch": 0.6420364333901457, "grad_norm": 0.73828125, "learning_rate": 0.00017828849431945608, "loss": 4.3377, "step": 6192 }, { "epoch": 0.6421401214446338, "grad_norm": 0.75, "learning_rate": 0.00017828173570210072, "loss": 4.3062, "step": 6193 }, { "epoch": 0.6422438094991219, "grad_norm": 0.6875, "learning_rate": 0.0001782749761611036, "loss": 4.3202, "step": 6194 }, { "epoch": 0.64234749755361, "grad_norm": 0.703125, "learning_rate": 0.00017826821569654454, "loss": 4.301, "step": 6195 }, { "epoch": 0.642451185608098, "grad_norm": 0.66015625, "learning_rate": 0.00017826145430850329, "loss": 4.2708, "step": 6196 }, { "epoch": 0.6425548736625861, "grad_norm": 0.70703125, "learning_rate": 0.00017825469199705954, "loss": 4.3532, "step": 6197 }, { "epoch": 0.6426585617170741, "grad_norm": 0.6640625, "learning_rate": 0.00017824792876229318, "loss": 4.2841, "step": 6198 }, { "epoch": 0.6427622497715623, "grad_norm": 0.65234375, "learning_rate": 0.00017824116460428394, "loss": 4.3245, "step": 6199 }, { "epoch": 0.6428659378260503, "grad_norm": 0.70703125, "learning_rate": 0.00017823439952311168, "loss": 4.3055, "step": 6200 }, { "epoch": 0.6429696258805384, "grad_norm": 0.62890625, "learning_rate": 0.00017822763351885623, "loss": 4.308, "step": 6201 }, { "epoch": 0.6430733139350264, "grad_norm": 0.671875, "learning_rate": 0.00017822086659159738, "loss": 4.3525, "step": 6202 }, { "epoch": 0.6431770019895146, "grad_norm": 0.62890625, "learning_rate": 0.00017821409874141497, "loss": 4.3367, "step": 6203 }, { "epoch": 0.6432806900440026, "grad_norm": 0.70703125, "learning_rate": 0.0001782073299683889, "loss": 4.332, "step": 6204 }, { "epoch": 0.6433843780984907, "grad_norm": 0.58203125, "learning_rate": 0.00017820056027259895, "loss": 4.3704, "step": 6205 }, { "epoch": 0.6434880661529787, "grad_norm": 0.6875, "learning_rate": 0.0001781937896541251, "loss": 4.3155, "step": 6206 }, { "epoch": 0.6435917542074668, "grad_norm": 0.5703125, "learning_rate": 0.00017818701811304717, "loss": 4.2937, "step": 6207 }, { "epoch": 0.6436954422619549, "grad_norm": 0.671875, "learning_rate": 0.00017818024564944507, "loss": 4.2761, "step": 6208 }, { "epoch": 0.643799130316443, "grad_norm": 0.5703125, "learning_rate": 0.00017817347226339872, "loss": 4.2736, "step": 6209 }, { "epoch": 0.6439028183709311, "grad_norm": 0.640625, "learning_rate": 0.00017816669795498805, "loss": 4.309, "step": 6210 }, { "epoch": 0.6440065064254191, "grad_norm": 0.62890625, "learning_rate": 0.00017815992272429294, "loss": 4.2983, "step": 6211 }, { "epoch": 0.6441101944799072, "grad_norm": 0.66015625, "learning_rate": 0.00017815314657139336, "loss": 4.315, "step": 6212 }, { "epoch": 0.6442138825343953, "grad_norm": 0.6640625, "learning_rate": 0.00017814636949636928, "loss": 4.2786, "step": 6213 }, { "epoch": 0.6443175705888834, "grad_norm": 0.73828125, "learning_rate": 0.0001781395914993006, "loss": 4.2994, "step": 6214 }, { "epoch": 0.6444212586433714, "grad_norm": 0.65234375, "learning_rate": 0.0001781328125802674, "loss": 4.3356, "step": 6215 }, { "epoch": 0.6445249466978595, "grad_norm": 0.71875, "learning_rate": 0.00017812603273934956, "loss": 4.3215, "step": 6216 }, { "epoch": 0.6446286347523476, "grad_norm": 0.70703125, "learning_rate": 0.00017811925197662714, "loss": 4.3373, "step": 6217 }, { "epoch": 0.6447323228068357, "grad_norm": 0.7578125, "learning_rate": 0.0001781124702921801, "loss": 4.317, "step": 6218 }, { "epoch": 0.6448360108613237, "grad_norm": 0.66796875, "learning_rate": 0.00017810568768608848, "loss": 4.3405, "step": 6219 }, { "epoch": 0.6449396989158118, "grad_norm": 0.6953125, "learning_rate": 0.00017809890415843236, "loss": 4.3034, "step": 6220 }, { "epoch": 0.6450433869702998, "grad_norm": 0.6484375, "learning_rate": 0.00017809211970929166, "loss": 4.3331, "step": 6221 }, { "epoch": 0.645147075024788, "grad_norm": 0.67578125, "learning_rate": 0.00017808533433874652, "loss": 4.3232, "step": 6222 }, { "epoch": 0.645250763079276, "grad_norm": 0.6875, "learning_rate": 0.00017807854804687695, "loss": 4.3423, "step": 6223 }, { "epoch": 0.6453544511337641, "grad_norm": 0.6796875, "learning_rate": 0.0001780717608337631, "loss": 4.3282, "step": 6224 }, { "epoch": 0.6454581391882521, "grad_norm": 0.80078125, "learning_rate": 0.00017806497269948497, "loss": 4.3107, "step": 6225 }, { "epoch": 0.6455618272427402, "grad_norm": 0.72265625, "learning_rate": 0.00017805818364412268, "loss": 4.3284, "step": 6226 }, { "epoch": 0.6456655152972283, "grad_norm": 0.703125, "learning_rate": 0.0001780513936677563, "loss": 4.3306, "step": 6227 }, { "epoch": 0.6457692033517164, "grad_norm": 0.77734375, "learning_rate": 0.000178044602770466, "loss": 4.3592, "step": 6228 }, { "epoch": 0.6458728914062044, "grad_norm": 0.72265625, "learning_rate": 0.00017803781095233193, "loss": 4.3433, "step": 6229 }, { "epoch": 0.6459765794606925, "grad_norm": 0.671875, "learning_rate": 0.00017803101821343411, "loss": 4.3218, "step": 6230 }, { "epoch": 0.6460802675151806, "grad_norm": 0.71484375, "learning_rate": 0.00017802422455385282, "loss": 4.3262, "step": 6231 }, { "epoch": 0.6461839555696687, "grad_norm": 0.6953125, "learning_rate": 0.0001780174299736681, "loss": 4.3295, "step": 6232 }, { "epoch": 0.6462876436241567, "grad_norm": 0.703125, "learning_rate": 0.0001780106344729602, "loss": 4.3481, "step": 6233 }, { "epoch": 0.6463913316786448, "grad_norm": 0.70703125, "learning_rate": 0.0001780038380518093, "loss": 4.3348, "step": 6234 }, { "epoch": 0.6464950197331328, "grad_norm": 0.72265625, "learning_rate": 0.00017799704071029554, "loss": 4.3457, "step": 6235 }, { "epoch": 0.646598707787621, "grad_norm": 0.734375, "learning_rate": 0.00017799024244849912, "loss": 4.3509, "step": 6236 }, { "epoch": 0.646702395842109, "grad_norm": 0.63671875, "learning_rate": 0.00017798344326650032, "loss": 4.3605, "step": 6237 }, { "epoch": 0.6468060838965971, "grad_norm": 0.703125, "learning_rate": 0.00017797664316437928, "loss": 4.3658, "step": 6238 }, { "epoch": 0.6469097719510851, "grad_norm": 0.671875, "learning_rate": 0.00017796984214221633, "loss": 4.3078, "step": 6239 }, { "epoch": 0.6470134600055732, "grad_norm": 0.671875, "learning_rate": 0.00017796304020009163, "loss": 4.3317, "step": 6240 }, { "epoch": 0.6471171480600613, "grad_norm": 0.6328125, "learning_rate": 0.00017795623733808544, "loss": 4.3386, "step": 6241 }, { "epoch": 0.6472208361145494, "grad_norm": 0.6875, "learning_rate": 0.0001779494335562781, "loss": 4.2955, "step": 6242 }, { "epoch": 0.6473245241690374, "grad_norm": 0.64453125, "learning_rate": 0.0001779426288547498, "loss": 4.3589, "step": 6243 }, { "epoch": 0.6474282122235255, "grad_norm": 0.65625, "learning_rate": 0.00017793582323358088, "loss": 4.3091, "step": 6244 }, { "epoch": 0.6475319002780136, "grad_norm": 0.7265625, "learning_rate": 0.00017792901669285165, "loss": 4.3449, "step": 6245 }, { "epoch": 0.6476355883325017, "grad_norm": 0.625, "learning_rate": 0.00017792220923264237, "loss": 4.3256, "step": 6246 }, { "epoch": 0.6477392763869897, "grad_norm": 0.71875, "learning_rate": 0.0001779154008530334, "loss": 4.3174, "step": 6247 }, { "epoch": 0.6478429644414778, "grad_norm": 0.64453125, "learning_rate": 0.00017790859155410508, "loss": 4.2901, "step": 6248 }, { "epoch": 0.6479466524959658, "grad_norm": 0.69921875, "learning_rate": 0.00017790178133593768, "loss": 4.3499, "step": 6249 }, { "epoch": 0.648050340550454, "grad_norm": 0.7734375, "learning_rate": 0.00017789497019861166, "loss": 4.3747, "step": 6250 }, { "epoch": 0.648154028604942, "grad_norm": 0.68359375, "learning_rate": 0.00017788815814220733, "loss": 4.291, "step": 6251 }, { "epoch": 0.6482577166594301, "grad_norm": 0.8125, "learning_rate": 0.00017788134516680504, "loss": 4.2845, "step": 6252 }, { "epoch": 0.6483614047139182, "grad_norm": 0.76953125, "learning_rate": 0.00017787453127248522, "loss": 4.3211, "step": 6253 }, { "epoch": 0.6484650927684062, "grad_norm": 0.76953125, "learning_rate": 0.00017786771645932824, "loss": 4.3539, "step": 6254 }, { "epoch": 0.6485687808228944, "grad_norm": 0.68359375, "learning_rate": 0.00017786090072741448, "loss": 4.329, "step": 6255 }, { "epoch": 0.6486724688773824, "grad_norm": 0.7734375, "learning_rate": 0.00017785408407682444, "loss": 4.3404, "step": 6256 }, { "epoch": 0.6487761569318705, "grad_norm": 0.74609375, "learning_rate": 0.00017784726650763846, "loss": 4.3098, "step": 6257 }, { "epoch": 0.6488798449863585, "grad_norm": 0.7265625, "learning_rate": 0.00017784044801993706, "loss": 4.305, "step": 6258 }, { "epoch": 0.6489835330408467, "grad_norm": 0.73046875, "learning_rate": 0.00017783362861380065, "loss": 4.3093, "step": 6259 }, { "epoch": 0.6490872210953347, "grad_norm": 0.67578125, "learning_rate": 0.00017782680828930968, "loss": 4.3413, "step": 6260 }, { "epoch": 0.6491909091498228, "grad_norm": 0.671875, "learning_rate": 0.00017781998704654466, "loss": 4.2956, "step": 6261 }, { "epoch": 0.6492945972043108, "grad_norm": 0.64453125, "learning_rate": 0.00017781316488558603, "loss": 4.3282, "step": 6262 }, { "epoch": 0.649398285258799, "grad_norm": 0.68359375, "learning_rate": 0.00017780634180651432, "loss": 4.2974, "step": 6263 }, { "epoch": 0.649501973313287, "grad_norm": 0.609375, "learning_rate": 0.00017779951780941, "loss": 4.3213, "step": 6264 }, { "epoch": 0.6496056613677751, "grad_norm": 0.6484375, "learning_rate": 0.00017779269289435365, "loss": 4.3303, "step": 6265 }, { "epoch": 0.6497093494222631, "grad_norm": 0.67578125, "learning_rate": 0.00017778586706142572, "loss": 4.3236, "step": 6266 }, { "epoch": 0.6498130374767512, "grad_norm": 0.66015625, "learning_rate": 0.00017777904031070682, "loss": 4.3598, "step": 6267 }, { "epoch": 0.6499167255312392, "grad_norm": 0.64453125, "learning_rate": 0.0001777722126422774, "loss": 4.3371, "step": 6268 }, { "epoch": 0.6500204135857274, "grad_norm": 0.6953125, "learning_rate": 0.00017776538405621812, "loss": 4.3058, "step": 6269 }, { "epoch": 0.6501241016402154, "grad_norm": 0.6328125, "learning_rate": 0.00017775855455260954, "loss": 4.2711, "step": 6270 }, { "epoch": 0.6502277896947035, "grad_norm": 0.6875, "learning_rate": 0.00017775172413153216, "loss": 4.3393, "step": 6271 }, { "epoch": 0.6503314777491915, "grad_norm": 0.640625, "learning_rate": 0.00017774489279306663, "loss": 4.3036, "step": 6272 }, { "epoch": 0.6504351658036797, "grad_norm": 0.63671875, "learning_rate": 0.00017773806053729353, "loss": 4.3239, "step": 6273 }, { "epoch": 0.6505388538581677, "grad_norm": 0.6328125, "learning_rate": 0.00017773122736429353, "loss": 4.3377, "step": 6274 }, { "epoch": 0.6506425419126558, "grad_norm": 0.59375, "learning_rate": 0.0001777243932741472, "loss": 4.3155, "step": 6275 }, { "epoch": 0.6507462299671438, "grad_norm": 0.625, "learning_rate": 0.00017771755826693518, "loss": 4.2653, "step": 6276 }, { "epoch": 0.650849918021632, "grad_norm": 0.6640625, "learning_rate": 0.00017771072234273812, "loss": 4.3229, "step": 6277 }, { "epoch": 0.65095360607612, "grad_norm": 0.640625, "learning_rate": 0.00017770388550163667, "loss": 4.3287, "step": 6278 }, { "epoch": 0.6510572941306081, "grad_norm": 0.67578125, "learning_rate": 0.0001776970477437115, "loss": 4.2721, "step": 6279 }, { "epoch": 0.6511609821850961, "grad_norm": 0.6015625, "learning_rate": 0.00017769020906904332, "loss": 4.3, "step": 6280 }, { "epoch": 0.6512646702395842, "grad_norm": 0.66796875, "learning_rate": 0.00017768336947771282, "loss": 4.3336, "step": 6281 }, { "epoch": 0.6513683582940722, "grad_norm": 0.64453125, "learning_rate": 0.00017767652896980062, "loss": 4.3592, "step": 6282 }, { "epoch": 0.6514720463485604, "grad_norm": 0.625, "learning_rate": 0.00017766968754538755, "loss": 4.3167, "step": 6283 }, { "epoch": 0.6515757344030484, "grad_norm": 0.71875, "learning_rate": 0.0001776628452045542, "loss": 4.278, "step": 6284 }, { "epoch": 0.6516794224575365, "grad_norm": 0.68359375, "learning_rate": 0.0001776560019473814, "loss": 4.3557, "step": 6285 }, { "epoch": 0.6517831105120245, "grad_norm": 0.72265625, "learning_rate": 0.00017764915777394985, "loss": 4.3205, "step": 6286 }, { "epoch": 0.6518867985665127, "grad_norm": 0.74609375, "learning_rate": 0.00017764231268434035, "loss": 4.2882, "step": 6287 }, { "epoch": 0.6519904866210007, "grad_norm": 0.64453125, "learning_rate": 0.0001776354666786336, "loss": 4.3018, "step": 6288 }, { "epoch": 0.6520941746754888, "grad_norm": 0.75390625, "learning_rate": 0.00017762861975691044, "loss": 4.3372, "step": 6289 }, { "epoch": 0.6521978627299768, "grad_norm": 0.66015625, "learning_rate": 0.0001776217719192516, "loss": 4.342, "step": 6290 }, { "epoch": 0.652301550784465, "grad_norm": 0.84375, "learning_rate": 0.0001776149231657379, "loss": 4.2978, "step": 6291 }, { "epoch": 0.652405238838953, "grad_norm": 0.6171875, "learning_rate": 0.00017760807349645016, "loss": 4.348, "step": 6292 }, { "epoch": 0.6525089268934411, "grad_norm": 0.72265625, "learning_rate": 0.00017760122291146917, "loss": 4.2836, "step": 6293 }, { "epoch": 0.6526126149479291, "grad_norm": 0.67578125, "learning_rate": 0.0001775943714108758, "loss": 4.3148, "step": 6294 }, { "epoch": 0.6527163030024172, "grad_norm": 0.68359375, "learning_rate": 0.00017758751899475087, "loss": 4.304, "step": 6295 }, { "epoch": 0.6528199910569052, "grad_norm": 0.6640625, "learning_rate": 0.0001775806656631752, "loss": 4.3578, "step": 6296 }, { "epoch": 0.6529236791113934, "grad_norm": 0.7109375, "learning_rate": 0.00017757381141622971, "loss": 4.3294, "step": 6297 }, { "epoch": 0.6530273671658815, "grad_norm": 0.62109375, "learning_rate": 0.00017756695625399522, "loss": 4.3513, "step": 6298 }, { "epoch": 0.6531310552203695, "grad_norm": 0.609375, "learning_rate": 0.00017756010017655266, "loss": 4.3071, "step": 6299 }, { "epoch": 0.6532347432748576, "grad_norm": 0.65625, "learning_rate": 0.00017755324318398288, "loss": 4.326, "step": 6300 }, { "epoch": 0.6533384313293457, "grad_norm": 0.6484375, "learning_rate": 0.00017754638527636682, "loss": 4.2787, "step": 6301 }, { "epoch": 0.6534421193838338, "grad_norm": 0.72265625, "learning_rate": 0.0001775395264537854, "loss": 4.2901, "step": 6302 }, { "epoch": 0.6535458074383218, "grad_norm": 0.6640625, "learning_rate": 0.0001775326667163195, "loss": 4.3453, "step": 6303 }, { "epoch": 0.6536494954928099, "grad_norm": 0.6796875, "learning_rate": 0.0001775258060640501, "loss": 4.3247, "step": 6304 }, { "epoch": 0.653753183547298, "grad_norm": 0.7265625, "learning_rate": 0.00017751894449705814, "loss": 4.3338, "step": 6305 }, { "epoch": 0.6538568716017861, "grad_norm": 0.61328125, "learning_rate": 0.00017751208201542457, "loss": 4.3341, "step": 6306 }, { "epoch": 0.6539605596562741, "grad_norm": 0.7109375, "learning_rate": 0.00017750521861923036, "loss": 4.334, "step": 6307 }, { "epoch": 0.6540642477107622, "grad_norm": 0.69921875, "learning_rate": 0.00017749835430855653, "loss": 4.3381, "step": 6308 }, { "epoch": 0.6541679357652502, "grad_norm": 0.7265625, "learning_rate": 0.000177491489083484, "loss": 4.3229, "step": 6309 }, { "epoch": 0.6542716238197384, "grad_norm": 0.66015625, "learning_rate": 0.00017748462294409385, "loss": 4.2798, "step": 6310 }, { "epoch": 0.6543753118742264, "grad_norm": 0.6953125, "learning_rate": 0.00017747775589046702, "loss": 4.3167, "step": 6311 }, { "epoch": 0.6544789999287145, "grad_norm": 0.72265625, "learning_rate": 0.0001774708879226846, "loss": 4.3234, "step": 6312 }, { "epoch": 0.6545826879832025, "grad_norm": 0.80078125, "learning_rate": 0.00017746401904082757, "loss": 4.3201, "step": 6313 }, { "epoch": 0.6546863760376906, "grad_norm": 0.6328125, "learning_rate": 0.00017745714924497698, "loss": 4.3389, "step": 6314 }, { "epoch": 0.6547900640921787, "grad_norm": 0.796875, "learning_rate": 0.00017745027853521394, "loss": 4.2997, "step": 6315 }, { "epoch": 0.6548937521466668, "grad_norm": 0.67578125, "learning_rate": 0.00017744340691161947, "loss": 4.3189, "step": 6316 }, { "epoch": 0.6549974402011548, "grad_norm": 0.7734375, "learning_rate": 0.00017743653437427465, "loss": 4.2672, "step": 6317 }, { "epoch": 0.6551011282556429, "grad_norm": 0.73046875, "learning_rate": 0.0001774296609232606, "loss": 4.3556, "step": 6318 }, { "epoch": 0.6552048163101309, "grad_norm": 0.6953125, "learning_rate": 0.00017742278655865837, "loss": 4.3441, "step": 6319 }, { "epoch": 0.6553085043646191, "grad_norm": 0.71875, "learning_rate": 0.00017741591128054912, "loss": 4.3296, "step": 6320 }, { "epoch": 0.6554121924191071, "grad_norm": 0.7578125, "learning_rate": 0.00017740903508901395, "loss": 4.3605, "step": 6321 }, { "epoch": 0.6555158804735952, "grad_norm": 0.6640625, "learning_rate": 0.00017740215798413397, "loss": 4.3189, "step": 6322 }, { "epoch": 0.6556195685280832, "grad_norm": 0.70703125, "learning_rate": 0.00017739527996599034, "loss": 4.3237, "step": 6323 }, { "epoch": 0.6557232565825714, "grad_norm": 0.6796875, "learning_rate": 0.00017738840103466422, "loss": 4.3346, "step": 6324 }, { "epoch": 0.6558269446370594, "grad_norm": 0.6953125, "learning_rate": 0.0001773815211902368, "loss": 4.2839, "step": 6325 }, { "epoch": 0.6559306326915475, "grad_norm": 0.6953125, "learning_rate": 0.0001773746404327892, "loss": 4.2721, "step": 6326 }, { "epoch": 0.6560343207460355, "grad_norm": 0.734375, "learning_rate": 0.00017736775876240265, "loss": 4.2895, "step": 6327 }, { "epoch": 0.6561380088005236, "grad_norm": 0.6875, "learning_rate": 0.0001773608761791583, "loss": 4.3336, "step": 6328 }, { "epoch": 0.6562416968550117, "grad_norm": 0.71875, "learning_rate": 0.00017735399268313743, "loss": 4.2716, "step": 6329 }, { "epoch": 0.6563453849094998, "grad_norm": 0.76171875, "learning_rate": 0.00017734710827442118, "loss": 4.2834, "step": 6330 }, { "epoch": 0.6564490729639878, "grad_norm": 0.75, "learning_rate": 0.0001773402229530908, "loss": 4.35, "step": 6331 }, { "epoch": 0.6565527610184759, "grad_norm": 0.71875, "learning_rate": 0.00017733333671922756, "loss": 4.3204, "step": 6332 }, { "epoch": 0.6566564490729639, "grad_norm": 0.64453125, "learning_rate": 0.0001773264495729127, "loss": 4.3374, "step": 6333 }, { "epoch": 0.6567601371274521, "grad_norm": 0.70703125, "learning_rate": 0.00017731956151422745, "loss": 4.3615, "step": 6334 }, { "epoch": 0.6568638251819401, "grad_norm": 0.67578125, "learning_rate": 0.00017731267254325316, "loss": 4.3116, "step": 6335 }, { "epoch": 0.6569675132364282, "grad_norm": 0.765625, "learning_rate": 0.00017730578266007097, "loss": 4.3278, "step": 6336 }, { "epoch": 0.6570712012909162, "grad_norm": 0.63671875, "learning_rate": 0.00017729889186476232, "loss": 4.3253, "step": 6337 }, { "epoch": 0.6571748893454044, "grad_norm": 0.7421875, "learning_rate": 0.00017729200015740844, "loss": 4.2923, "step": 6338 }, { "epoch": 0.6572785773998924, "grad_norm": 0.65625, "learning_rate": 0.00017728510753809066, "loss": 4.2721, "step": 6339 }, { "epoch": 0.6573822654543805, "grad_norm": 0.76171875, "learning_rate": 0.0001772782140068903, "loss": 4.3146, "step": 6340 }, { "epoch": 0.6574859535088685, "grad_norm": 0.62890625, "learning_rate": 0.00017727131956388873, "loss": 4.3398, "step": 6341 }, { "epoch": 0.6575896415633566, "grad_norm": 0.78125, "learning_rate": 0.00017726442420916723, "loss": 4.3239, "step": 6342 }, { "epoch": 0.6576933296178448, "grad_norm": 0.75, "learning_rate": 0.0001772575279428072, "loss": 4.3379, "step": 6343 }, { "epoch": 0.6577970176723328, "grad_norm": 0.7421875, "learning_rate": 0.00017725063076489003, "loss": 4.3078, "step": 6344 }, { "epoch": 0.6579007057268209, "grad_norm": 0.80078125, "learning_rate": 0.00017724373267549704, "loss": 4.3641, "step": 6345 }, { "epoch": 0.6580043937813089, "grad_norm": 0.72265625, "learning_rate": 0.00017723683367470966, "loss": 4.3075, "step": 6346 }, { "epoch": 0.658108081835797, "grad_norm": 0.75, "learning_rate": 0.00017722993376260933, "loss": 4.3449, "step": 6347 }, { "epoch": 0.6582117698902851, "grad_norm": 0.64453125, "learning_rate": 0.00017722303293927737, "loss": 4.345, "step": 6348 }, { "epoch": 0.6583154579447732, "grad_norm": 0.71484375, "learning_rate": 0.00017721613120479524, "loss": 4.3421, "step": 6349 }, { "epoch": 0.6584191459992612, "grad_norm": 0.640625, "learning_rate": 0.00017720922855924442, "loss": 4.31, "step": 6350 }, { "epoch": 0.6585228340537493, "grad_norm": 0.68359375, "learning_rate": 0.0001772023250027063, "loss": 4.2911, "step": 6351 }, { "epoch": 0.6586265221082374, "grad_norm": 0.6328125, "learning_rate": 0.0001771954205352624, "loss": 4.3133, "step": 6352 }, { "epoch": 0.6587302101627255, "grad_norm": 0.68359375, "learning_rate": 0.00017718851515699407, "loss": 4.2883, "step": 6353 }, { "epoch": 0.6588338982172135, "grad_norm": 0.75390625, "learning_rate": 0.00017718160886798288, "loss": 4.3507, "step": 6354 }, { "epoch": 0.6589375862717016, "grad_norm": 0.703125, "learning_rate": 0.00017717470166831028, "loss": 4.3606, "step": 6355 }, { "epoch": 0.6590412743261896, "grad_norm": 0.69140625, "learning_rate": 0.0001771677935580578, "loss": 4.3079, "step": 6356 }, { "epoch": 0.6591449623806778, "grad_norm": 0.72265625, "learning_rate": 0.00017716088453730692, "loss": 4.3126, "step": 6357 }, { "epoch": 0.6592486504351658, "grad_norm": 0.71484375, "learning_rate": 0.0001771539746061391, "loss": 4.3268, "step": 6358 }, { "epoch": 0.6593523384896539, "grad_norm": 0.75, "learning_rate": 0.00017714706376463602, "loss": 4.3424, "step": 6359 }, { "epoch": 0.6594560265441419, "grad_norm": 0.67578125, "learning_rate": 0.00017714015201287912, "loss": 4.3265, "step": 6360 }, { "epoch": 0.65955971459863, "grad_norm": 0.7421875, "learning_rate": 0.00017713323935094995, "loss": 4.3295, "step": 6361 }, { "epoch": 0.6596634026531181, "grad_norm": 0.6640625, "learning_rate": 0.0001771263257789301, "loss": 4.3506, "step": 6362 }, { "epoch": 0.6597670907076062, "grad_norm": 0.75390625, "learning_rate": 0.0001771194112969011, "loss": 4.3425, "step": 6363 }, { "epoch": 0.6598707787620942, "grad_norm": 0.68359375, "learning_rate": 0.0001771124959049446, "loss": 4.3069, "step": 6364 }, { "epoch": 0.6599744668165823, "grad_norm": 0.66015625, "learning_rate": 0.00017710557960314214, "loss": 4.3119, "step": 6365 }, { "epoch": 0.6600781548710704, "grad_norm": 0.68359375, "learning_rate": 0.00017709866239157536, "loss": 4.3332, "step": 6366 }, { "epoch": 0.6601818429255585, "grad_norm": 0.6875, "learning_rate": 0.00017709174427032583, "loss": 4.3551, "step": 6367 }, { "epoch": 0.6602855309800465, "grad_norm": 0.74609375, "learning_rate": 0.00017708482523947524, "loss": 4.3009, "step": 6368 }, { "epoch": 0.6603892190345346, "grad_norm": 0.65234375, "learning_rate": 0.0001770779052991052, "loss": 4.2607, "step": 6369 }, { "epoch": 0.6604929070890226, "grad_norm": 0.73046875, "learning_rate": 0.00017707098444929732, "loss": 4.277, "step": 6370 }, { "epoch": 0.6605965951435108, "grad_norm": 0.64453125, "learning_rate": 0.00017706406269013333, "loss": 4.3004, "step": 6371 }, { "epoch": 0.6607002831979988, "grad_norm": 0.7578125, "learning_rate": 0.0001770571400216948, "loss": 4.325, "step": 6372 }, { "epoch": 0.6608039712524869, "grad_norm": 0.6640625, "learning_rate": 0.00017705021644406354, "loss": 4.2894, "step": 6373 }, { "epoch": 0.6609076593069749, "grad_norm": 0.734375, "learning_rate": 0.00017704329195732113, "loss": 4.3139, "step": 6374 }, { "epoch": 0.661011347361463, "grad_norm": 0.734375, "learning_rate": 0.0001770363665615493, "loss": 4.3121, "step": 6375 }, { "epoch": 0.6611150354159511, "grad_norm": 0.765625, "learning_rate": 0.00017702944025682981, "loss": 4.3245, "step": 6376 }, { "epoch": 0.6612187234704392, "grad_norm": 0.83203125, "learning_rate": 0.00017702251304324435, "loss": 4.328, "step": 6377 }, { "epoch": 0.6613224115249272, "grad_norm": 0.7265625, "learning_rate": 0.00017701558492087463, "loss": 4.329, "step": 6378 }, { "epoch": 0.6614260995794153, "grad_norm": 0.67578125, "learning_rate": 0.00017700865588980244, "loss": 4.3363, "step": 6379 }, { "epoch": 0.6615297876339034, "grad_norm": 0.82421875, "learning_rate": 0.0001770017259501095, "loss": 4.2961, "step": 6380 }, { "epoch": 0.6616334756883915, "grad_norm": 0.6484375, "learning_rate": 0.0001769947951018776, "loss": 4.3053, "step": 6381 }, { "epoch": 0.6617371637428795, "grad_norm": 0.7734375, "learning_rate": 0.00017698786334518848, "loss": 4.3419, "step": 6382 }, { "epoch": 0.6618408517973676, "grad_norm": 0.640625, "learning_rate": 0.00017698093068012398, "loss": 4.3493, "step": 6383 }, { "epoch": 0.6619445398518556, "grad_norm": 0.74609375, "learning_rate": 0.00017697399710676586, "loss": 4.2962, "step": 6384 }, { "epoch": 0.6620482279063438, "grad_norm": 0.66796875, "learning_rate": 0.00017696706262519592, "loss": 4.3222, "step": 6385 }, { "epoch": 0.6621519159608318, "grad_norm": 0.73828125, "learning_rate": 0.00017696012723549602, "loss": 4.2923, "step": 6386 }, { "epoch": 0.6622556040153199, "grad_norm": 0.6953125, "learning_rate": 0.00017695319093774796, "loss": 4.3247, "step": 6387 }, { "epoch": 0.662359292069808, "grad_norm": 0.70703125, "learning_rate": 0.0001769462537320336, "loss": 4.2963, "step": 6388 }, { "epoch": 0.662462980124296, "grad_norm": 0.640625, "learning_rate": 0.00017693931561843477, "loss": 4.3119, "step": 6389 }, { "epoch": 0.6625666681787842, "grad_norm": 0.71484375, "learning_rate": 0.00017693237659703335, "loss": 4.3639, "step": 6390 }, { "epoch": 0.6626703562332722, "grad_norm": 0.68359375, "learning_rate": 0.00017692543666791123, "loss": 4.2999, "step": 6391 }, { "epoch": 0.6627740442877603, "grad_norm": 0.72265625, "learning_rate": 0.00017691849583115023, "loss": 4.3166, "step": 6392 }, { "epoch": 0.6628777323422483, "grad_norm": 0.7109375, "learning_rate": 0.0001769115540868323, "loss": 4.3059, "step": 6393 }, { "epoch": 0.6629814203967365, "grad_norm": 0.78125, "learning_rate": 0.00017690461143503932, "loss": 4.3199, "step": 6394 }, { "epoch": 0.6630851084512245, "grad_norm": 0.625, "learning_rate": 0.00017689766787585325, "loss": 4.3414, "step": 6395 }, { "epoch": 0.6631887965057126, "grad_norm": 0.8203125, "learning_rate": 0.00017689072340935596, "loss": 4.2836, "step": 6396 }, { "epoch": 0.6632924845602006, "grad_norm": 0.7109375, "learning_rate": 0.00017688377803562938, "loss": 4.3232, "step": 6397 }, { "epoch": 0.6633961726146888, "grad_norm": 0.7265625, "learning_rate": 0.00017687683175475556, "loss": 4.3835, "step": 6398 }, { "epoch": 0.6634998606691768, "grad_norm": 0.77734375, "learning_rate": 0.00017686988456681632, "loss": 4.3314, "step": 6399 }, { "epoch": 0.6636035487236649, "grad_norm": 0.79296875, "learning_rate": 0.00017686293647189373, "loss": 4.3314, "step": 6400 }, { "epoch": 0.6637072367781529, "grad_norm": 0.75390625, "learning_rate": 0.00017685598747006976, "loss": 4.2765, "step": 6401 }, { "epoch": 0.663810924832641, "grad_norm": 0.84765625, "learning_rate": 0.00017684903756142635, "loss": 4.2805, "step": 6402 }, { "epoch": 0.663914612887129, "grad_norm": 0.671875, "learning_rate": 0.0001768420867460455, "loss": 4.3317, "step": 6403 }, { "epoch": 0.6640183009416172, "grad_norm": 0.76171875, "learning_rate": 0.0001768351350240093, "loss": 4.3102, "step": 6404 }, { "epoch": 0.6641219889961052, "grad_norm": 0.71875, "learning_rate": 0.00017682818239539972, "loss": 4.3507, "step": 6405 }, { "epoch": 0.6642256770505933, "grad_norm": 0.73828125, "learning_rate": 0.0001768212288602988, "loss": 4.2997, "step": 6406 }, { "epoch": 0.6643293651050813, "grad_norm": 0.73046875, "learning_rate": 0.00017681427441878855, "loss": 4.2939, "step": 6407 }, { "epoch": 0.6644330531595695, "grad_norm": 0.7265625, "learning_rate": 0.00017680731907095107, "loss": 4.3014, "step": 6408 }, { "epoch": 0.6645367412140575, "grad_norm": 0.76953125, "learning_rate": 0.00017680036281686844, "loss": 4.3149, "step": 6409 }, { "epoch": 0.6646404292685456, "grad_norm": 0.7578125, "learning_rate": 0.00017679340565662267, "loss": 4.2542, "step": 6410 }, { "epoch": 0.6647441173230336, "grad_norm": 0.79296875, "learning_rate": 0.00017678644759029592, "loss": 4.333, "step": 6411 }, { "epoch": 0.6648478053775218, "grad_norm": 0.69921875, "learning_rate": 0.00017677948861797026, "loss": 4.3367, "step": 6412 }, { "epoch": 0.6649514934320098, "grad_norm": 0.7421875, "learning_rate": 0.00017677252873972776, "loss": 4.3524, "step": 6413 }, { "epoch": 0.6650551814864979, "grad_norm": 0.734375, "learning_rate": 0.00017676556795565061, "loss": 4.318, "step": 6414 }, { "epoch": 0.6651588695409859, "grad_norm": 0.7578125, "learning_rate": 0.0001767586062658209, "loss": 4.3213, "step": 6415 }, { "epoch": 0.665262557595474, "grad_norm": 0.8125, "learning_rate": 0.00017675164367032077, "loss": 4.3024, "step": 6416 }, { "epoch": 0.665366245649962, "grad_norm": 0.8359375, "learning_rate": 0.00017674468016923232, "loss": 4.2829, "step": 6417 }, { "epoch": 0.6654699337044502, "grad_norm": 0.6953125, "learning_rate": 0.00017673771576263782, "loss": 4.3319, "step": 6418 }, { "epoch": 0.6655736217589382, "grad_norm": 0.87890625, "learning_rate": 0.00017673075045061937, "loss": 4.3104, "step": 6419 }, { "epoch": 0.6656773098134263, "grad_norm": 0.74609375, "learning_rate": 0.0001767237842332592, "loss": 4.3173, "step": 6420 }, { "epoch": 0.6657809978679143, "grad_norm": 0.90625, "learning_rate": 0.00017671681711063945, "loss": 4.3071, "step": 6421 }, { "epoch": 0.6658846859224025, "grad_norm": 0.828125, "learning_rate": 0.00017670984908284236, "loss": 4.338, "step": 6422 }, { "epoch": 0.6659883739768905, "grad_norm": 0.85546875, "learning_rate": 0.00017670288014995014, "loss": 4.3091, "step": 6423 }, { "epoch": 0.6660920620313786, "grad_norm": 0.77734375, "learning_rate": 0.000176695910312045, "loss": 4.3015, "step": 6424 }, { "epoch": 0.6661957500858666, "grad_norm": 0.8046875, "learning_rate": 0.0001766889395692092, "loss": 4.2442, "step": 6425 }, { "epoch": 0.6662994381403547, "grad_norm": 0.6953125, "learning_rate": 0.00017668196792152498, "loss": 4.2853, "step": 6426 }, { "epoch": 0.6664031261948428, "grad_norm": 0.91015625, "learning_rate": 0.00017667499536907458, "loss": 4.2546, "step": 6427 }, { "epoch": 0.6665068142493309, "grad_norm": 0.75390625, "learning_rate": 0.00017666802191194033, "loss": 4.3191, "step": 6428 }, { "epoch": 0.6666105023038189, "grad_norm": 0.85546875, "learning_rate": 0.0001766610475502044, "loss": 4.2634, "step": 6429 }, { "epoch": 0.666714190358307, "grad_norm": 0.734375, "learning_rate": 0.00017665407228394916, "loss": 4.3394, "step": 6430 }, { "epoch": 0.666817878412795, "grad_norm": 0.796875, "learning_rate": 0.0001766470961132569, "loss": 4.3419, "step": 6431 }, { "epoch": 0.6669215664672832, "grad_norm": 0.828125, "learning_rate": 0.00017664011903820994, "loss": 4.2822, "step": 6432 }, { "epoch": 0.6670252545217713, "grad_norm": 0.7421875, "learning_rate": 0.00017663314105889058, "loss": 4.3083, "step": 6433 }, { "epoch": 0.6671289425762593, "grad_norm": 0.91796875, "learning_rate": 0.00017662616217538118, "loss": 4.3549, "step": 6434 }, { "epoch": 0.6672326306307474, "grad_norm": 0.8046875, "learning_rate": 0.00017661918238776403, "loss": 4.2849, "step": 6435 }, { "epoch": 0.6673363186852355, "grad_norm": 0.7890625, "learning_rate": 0.0001766122016961215, "loss": 4.3556, "step": 6436 }, { "epoch": 0.6674400067397236, "grad_norm": 0.78515625, "learning_rate": 0.00017660522010053603, "loss": 4.3536, "step": 6437 }, { "epoch": 0.6675436947942116, "grad_norm": 0.6875, "learning_rate": 0.0001765982376010899, "loss": 4.3415, "step": 6438 }, { "epoch": 0.6676473828486997, "grad_norm": 0.7421875, "learning_rate": 0.00017659125419786556, "loss": 4.2903, "step": 6439 }, { "epoch": 0.6677510709031877, "grad_norm": 0.765625, "learning_rate": 0.00017658426989094534, "loss": 4.297, "step": 6440 }, { "epoch": 0.6678547589576759, "grad_norm": 0.8125, "learning_rate": 0.00017657728468041173, "loss": 4.2782, "step": 6441 }, { "epoch": 0.6679584470121639, "grad_norm": 0.81640625, "learning_rate": 0.00017657029856634707, "loss": 4.2852, "step": 6442 }, { "epoch": 0.668062135066652, "grad_norm": 0.87109375, "learning_rate": 0.00017656331154883385, "loss": 4.3173, "step": 6443 }, { "epoch": 0.66816582312114, "grad_norm": 0.72265625, "learning_rate": 0.00017655632362795448, "loss": 4.3095, "step": 6444 }, { "epoch": 0.6682695111756282, "grad_norm": 0.8046875, "learning_rate": 0.0001765493348037914, "loss": 4.2713, "step": 6445 }, { "epoch": 0.6683731992301162, "grad_norm": 0.703125, "learning_rate": 0.00017654234507642711, "loss": 4.3224, "step": 6446 }, { "epoch": 0.6684768872846043, "grad_norm": 0.7109375, "learning_rate": 0.00017653535444594405, "loss": 4.3031, "step": 6447 }, { "epoch": 0.6685805753390923, "grad_norm": 0.7421875, "learning_rate": 0.0001765283629124247, "loss": 4.3493, "step": 6448 }, { "epoch": 0.6686842633935804, "grad_norm": 0.6875, "learning_rate": 0.00017652137047595155, "loss": 4.3091, "step": 6449 }, { "epoch": 0.6687879514480685, "grad_norm": 0.66015625, "learning_rate": 0.00017651437713660714, "loss": 4.3148, "step": 6450 }, { "epoch": 0.6688916395025566, "grad_norm": 0.66796875, "learning_rate": 0.00017650738289447398, "loss": 4.3442, "step": 6451 }, { "epoch": 0.6689953275570446, "grad_norm": 0.6640625, "learning_rate": 0.00017650038774963452, "loss": 4.352, "step": 6452 }, { "epoch": 0.6690990156115327, "grad_norm": 0.625, "learning_rate": 0.00017649339170217138, "loss": 4.3011, "step": 6453 }, { "epoch": 0.6692027036660207, "grad_norm": 0.72265625, "learning_rate": 0.00017648639475216705, "loss": 4.3289, "step": 6454 }, { "epoch": 0.6693063917205089, "grad_norm": 0.6875, "learning_rate": 0.00017647939689970413, "loss": 4.3572, "step": 6455 }, { "epoch": 0.6694100797749969, "grad_norm": 0.734375, "learning_rate": 0.00017647239814486517, "loss": 4.2908, "step": 6456 }, { "epoch": 0.669513767829485, "grad_norm": 0.70703125, "learning_rate": 0.00017646539848773275, "loss": 4.339, "step": 6457 }, { "epoch": 0.669617455883973, "grad_norm": 0.7421875, "learning_rate": 0.00017645839792838946, "loss": 4.3345, "step": 6458 }, { "epoch": 0.6697211439384612, "grad_norm": 0.8828125, "learning_rate": 0.00017645139646691788, "loss": 4.3301, "step": 6459 }, { "epoch": 0.6698248319929492, "grad_norm": 0.75, "learning_rate": 0.00017644439410340062, "loss": 4.3495, "step": 6460 }, { "epoch": 0.6699285200474373, "grad_norm": 0.796875, "learning_rate": 0.00017643739083792034, "loss": 4.2701, "step": 6461 }, { "epoch": 0.6700322081019253, "grad_norm": 0.7890625, "learning_rate": 0.00017643038667055966, "loss": 4.3344, "step": 6462 }, { "epoch": 0.6701358961564134, "grad_norm": 0.8125, "learning_rate": 0.00017642338160140118, "loss": 4.2788, "step": 6463 }, { "epoch": 0.6702395842109015, "grad_norm": 0.7734375, "learning_rate": 0.00017641637563052756, "loss": 4.3089, "step": 6464 }, { "epoch": 0.6703432722653896, "grad_norm": 0.76953125, "learning_rate": 0.00017640936875802155, "loss": 4.2687, "step": 6465 }, { "epoch": 0.6704469603198776, "grad_norm": 0.81640625, "learning_rate": 0.0001764023609839657, "loss": 4.303, "step": 6466 }, { "epoch": 0.6705506483743657, "grad_norm": 0.86328125, "learning_rate": 0.00017639535230844276, "loss": 4.3321, "step": 6467 }, { "epoch": 0.6706543364288537, "grad_norm": 0.7421875, "learning_rate": 0.00017638834273153546, "loss": 4.3334, "step": 6468 }, { "epoch": 0.6707580244833419, "grad_norm": 0.828125, "learning_rate": 0.0001763813322533264, "loss": 4.3177, "step": 6469 }, { "epoch": 0.6708617125378299, "grad_norm": 0.76953125, "learning_rate": 0.0001763743208738984, "loss": 4.3005, "step": 6470 }, { "epoch": 0.670965400592318, "grad_norm": 0.76171875, "learning_rate": 0.00017636730859333413, "loss": 4.256, "step": 6471 }, { "epoch": 0.671069088646806, "grad_norm": 0.765625, "learning_rate": 0.00017636029541171633, "loss": 4.2535, "step": 6472 }, { "epoch": 0.6711727767012942, "grad_norm": 0.82421875, "learning_rate": 0.00017635328132912777, "loss": 4.3104, "step": 6473 }, { "epoch": 0.6712764647557822, "grad_norm": 0.78515625, "learning_rate": 0.0001763462663456512, "loss": 4.3169, "step": 6474 }, { "epoch": 0.6713801528102703, "grad_norm": 0.81640625, "learning_rate": 0.0001763392504613694, "loss": 4.3292, "step": 6475 }, { "epoch": 0.6714838408647583, "grad_norm": 0.765625, "learning_rate": 0.00017633223367636514, "loss": 4.317, "step": 6476 }, { "epoch": 0.6715875289192464, "grad_norm": 0.77734375, "learning_rate": 0.00017632521599072118, "loss": 4.3153, "step": 6477 }, { "epoch": 0.6716912169737346, "grad_norm": 0.76171875, "learning_rate": 0.00017631819740452037, "loss": 4.3043, "step": 6478 }, { "epoch": 0.6717949050282226, "grad_norm": 0.76953125, "learning_rate": 0.0001763111779178455, "loss": 4.3376, "step": 6479 }, { "epoch": 0.6718985930827107, "grad_norm": 0.609375, "learning_rate": 0.0001763041575307794, "loss": 4.3351, "step": 6480 }, { "epoch": 0.6720022811371987, "grad_norm": 0.75, "learning_rate": 0.0001762971362434049, "loss": 4.3147, "step": 6481 }, { "epoch": 0.6721059691916869, "grad_norm": 0.65625, "learning_rate": 0.00017629011405580482, "loss": 4.3209, "step": 6482 }, { "epoch": 0.6722096572461749, "grad_norm": 0.6875, "learning_rate": 0.00017628309096806206, "loss": 4.3264, "step": 6483 }, { "epoch": 0.672313345300663, "grad_norm": 0.6640625, "learning_rate": 0.00017627606698025945, "loss": 4.2596, "step": 6484 }, { "epoch": 0.672417033355151, "grad_norm": 0.765625, "learning_rate": 0.00017626904209247987, "loss": 4.3037, "step": 6485 }, { "epoch": 0.6725207214096391, "grad_norm": 0.734375, "learning_rate": 0.00017626201630480622, "loss": 4.3338, "step": 6486 }, { "epoch": 0.6726244094641272, "grad_norm": 0.69921875, "learning_rate": 0.00017625498961732142, "loss": 4.3313, "step": 6487 }, { "epoch": 0.6727280975186153, "grad_norm": 0.734375, "learning_rate": 0.0001762479620301083, "loss": 4.2831, "step": 6488 }, { "epoch": 0.6728317855731033, "grad_norm": 0.65234375, "learning_rate": 0.00017624093354324987, "loss": 4.3158, "step": 6489 }, { "epoch": 0.6729354736275914, "grad_norm": 0.734375, "learning_rate": 0.000176233904156829, "loss": 4.3364, "step": 6490 }, { "epoch": 0.6730391616820794, "grad_norm": 0.63671875, "learning_rate": 0.0001762268738709286, "loss": 4.3593, "step": 6491 }, { "epoch": 0.6731428497365676, "grad_norm": 0.76953125, "learning_rate": 0.00017621984268563173, "loss": 4.3485, "step": 6492 }, { "epoch": 0.6732465377910556, "grad_norm": 0.6953125, "learning_rate": 0.00017621281060102123, "loss": 4.3344, "step": 6493 }, { "epoch": 0.6733502258455437, "grad_norm": 0.7578125, "learning_rate": 0.00017620577761718015, "loss": 4.2714, "step": 6494 }, { "epoch": 0.6734539139000317, "grad_norm": 0.65234375, "learning_rate": 0.00017619874373419144, "loss": 4.3282, "step": 6495 }, { "epoch": 0.6735576019545199, "grad_norm": 0.796875, "learning_rate": 0.00017619170895213812, "loss": 4.2696, "step": 6496 }, { "epoch": 0.6736612900090079, "grad_norm": 0.671875, "learning_rate": 0.00017618467327110317, "loss": 4.3398, "step": 6497 }, { "epoch": 0.673764978063496, "grad_norm": 0.7734375, "learning_rate": 0.00017617763669116958, "loss": 4.3381, "step": 6498 }, { "epoch": 0.673868666117984, "grad_norm": 0.6796875, "learning_rate": 0.00017617059921242042, "loss": 4.3148, "step": 6499 }, { "epoch": 0.6739723541724721, "grad_norm": 0.71484375, "learning_rate": 0.0001761635608349387, "loss": 4.2602, "step": 6500 }, { "epoch": 0.6740760422269602, "grad_norm": 0.65625, "learning_rate": 0.00017615652155880747, "loss": 4.3324, "step": 6501 }, { "epoch": 0.6741797302814483, "grad_norm": 0.78515625, "learning_rate": 0.00017614948138410975, "loss": 4.341, "step": 6502 }, { "epoch": 0.6742834183359363, "grad_norm": 0.69921875, "learning_rate": 0.0001761424403109287, "loss": 4.3384, "step": 6503 }, { "epoch": 0.6743871063904244, "grad_norm": 0.76171875, "learning_rate": 0.0001761353983393473, "loss": 4.2924, "step": 6504 }, { "epoch": 0.6744907944449124, "grad_norm": 0.75, "learning_rate": 0.0001761283554694487, "loss": 4.3056, "step": 6505 }, { "epoch": 0.6745944824994006, "grad_norm": 0.71484375, "learning_rate": 0.000176121311701316, "loss": 4.3578, "step": 6506 }, { "epoch": 0.6746981705538886, "grad_norm": 0.765625, "learning_rate": 0.00017611426703503224, "loss": 4.327, "step": 6507 }, { "epoch": 0.6748018586083767, "grad_norm": 0.6796875, "learning_rate": 0.0001761072214706806, "loss": 4.3091, "step": 6508 }, { "epoch": 0.6749055466628647, "grad_norm": 0.7890625, "learning_rate": 0.0001761001750083442, "loss": 4.3275, "step": 6509 }, { "epoch": 0.6750092347173529, "grad_norm": 0.70703125, "learning_rate": 0.0001760931276481062, "loss": 4.316, "step": 6510 }, { "epoch": 0.6751129227718409, "grad_norm": 0.76171875, "learning_rate": 0.0001760860793900497, "loss": 4.3116, "step": 6511 }, { "epoch": 0.675216610826329, "grad_norm": 0.7265625, "learning_rate": 0.00017607903023425788, "loss": 4.3461, "step": 6512 }, { "epoch": 0.675320298880817, "grad_norm": 0.80859375, "learning_rate": 0.00017607198018081396, "loss": 4.3162, "step": 6513 }, { "epoch": 0.6754239869353051, "grad_norm": 0.82421875, "learning_rate": 0.00017606492922980104, "loss": 4.3131, "step": 6514 }, { "epoch": 0.6755276749897932, "grad_norm": 0.8359375, "learning_rate": 0.0001760578773813024, "loss": 4.2896, "step": 6515 }, { "epoch": 0.6756313630442813, "grad_norm": 0.8828125, "learning_rate": 0.00017605082463540117, "loss": 4.3247, "step": 6516 }, { "epoch": 0.6757350510987693, "grad_norm": 0.78125, "learning_rate": 0.0001760437709921806, "loss": 4.336, "step": 6517 }, { "epoch": 0.6758387391532574, "grad_norm": 0.90234375, "learning_rate": 0.00017603671645172395, "loss": 4.3142, "step": 6518 }, { "epoch": 0.6759424272077454, "grad_norm": 0.7734375, "learning_rate": 0.00017602966101411437, "loss": 4.2958, "step": 6519 }, { "epoch": 0.6760461152622336, "grad_norm": 0.8046875, "learning_rate": 0.00017602260467943517, "loss": 4.301, "step": 6520 }, { "epoch": 0.6761498033167217, "grad_norm": 0.734375, "learning_rate": 0.00017601554744776964, "loss": 4.3737, "step": 6521 }, { "epoch": 0.6762534913712097, "grad_norm": 0.9140625, "learning_rate": 0.00017600848931920098, "loss": 4.3691, "step": 6522 }, { "epoch": 0.6763571794256978, "grad_norm": 0.65234375, "learning_rate": 0.00017600143029381247, "loss": 4.3371, "step": 6523 }, { "epoch": 0.6764608674801859, "grad_norm": 0.8359375, "learning_rate": 0.00017599437037168746, "loss": 4.3148, "step": 6524 }, { "epoch": 0.676564555534674, "grad_norm": 0.58984375, "learning_rate": 0.00017598730955290917, "loss": 4.3323, "step": 6525 }, { "epoch": 0.676668243589162, "grad_norm": 0.77734375, "learning_rate": 0.00017598024783756095, "loss": 4.3187, "step": 6526 }, { "epoch": 0.6767719316436501, "grad_norm": 0.68359375, "learning_rate": 0.00017597318522572612, "loss": 4.3184, "step": 6527 }, { "epoch": 0.6768756196981381, "grad_norm": 0.66015625, "learning_rate": 0.00017596612171748803, "loss": 4.3484, "step": 6528 }, { "epoch": 0.6769793077526263, "grad_norm": 0.7265625, "learning_rate": 0.00017595905731292998, "loss": 4.3241, "step": 6529 }, { "epoch": 0.6770829958071143, "grad_norm": 0.6875, "learning_rate": 0.00017595199201213534, "loss": 4.3255, "step": 6530 }, { "epoch": 0.6771866838616024, "grad_norm": 0.8046875, "learning_rate": 0.00017594492581518752, "loss": 4.3215, "step": 6531 }, { "epoch": 0.6772903719160904, "grad_norm": 0.65625, "learning_rate": 0.00017593785872216982, "loss": 4.3453, "step": 6532 }, { "epoch": 0.6773940599705786, "grad_norm": 0.8359375, "learning_rate": 0.00017593079073316566, "loss": 4.3257, "step": 6533 }, { "epoch": 0.6774977480250666, "grad_norm": 0.69921875, "learning_rate": 0.00017592372184825847, "loss": 4.318, "step": 6534 }, { "epoch": 0.6776014360795547, "grad_norm": 0.7734375, "learning_rate": 0.0001759166520675316, "loss": 4.3414, "step": 6535 }, { "epoch": 0.6777051241340427, "grad_norm": 0.76171875, "learning_rate": 0.00017590958139106845, "loss": 4.3062, "step": 6536 }, { "epoch": 0.6778088121885308, "grad_norm": 0.73828125, "learning_rate": 0.0001759025098189525, "loss": 4.3148, "step": 6537 }, { "epoch": 0.6779125002430189, "grad_norm": 0.7421875, "learning_rate": 0.00017589543735126718, "loss": 4.2926, "step": 6538 }, { "epoch": 0.678016188297507, "grad_norm": 0.77734375, "learning_rate": 0.0001758883639880959, "loss": 4.3369, "step": 6539 }, { "epoch": 0.678119876351995, "grad_norm": 0.765625, "learning_rate": 0.00017588128972952216, "loss": 4.2984, "step": 6540 }, { "epoch": 0.6782235644064831, "grad_norm": 0.75, "learning_rate": 0.0001758742145756294, "loss": 4.3083, "step": 6541 }, { "epoch": 0.6783272524609711, "grad_norm": 0.73046875, "learning_rate": 0.0001758671385265011, "loss": 4.2944, "step": 6542 }, { "epoch": 0.6784309405154593, "grad_norm": 0.734375, "learning_rate": 0.00017586006158222078, "loss": 4.3253, "step": 6543 }, { "epoch": 0.6785346285699473, "grad_norm": 0.765625, "learning_rate": 0.0001758529837428719, "loss": 4.3334, "step": 6544 }, { "epoch": 0.6786383166244354, "grad_norm": 0.76953125, "learning_rate": 0.00017584590500853802, "loss": 4.3115, "step": 6545 }, { "epoch": 0.6787420046789234, "grad_norm": 0.71484375, "learning_rate": 0.0001758388253793026, "loss": 4.3329, "step": 6546 }, { "epoch": 0.6788456927334116, "grad_norm": 0.73046875, "learning_rate": 0.00017583174485524925, "loss": 4.2879, "step": 6547 }, { "epoch": 0.6789493807878996, "grad_norm": 0.7578125, "learning_rate": 0.00017582466343646144, "loss": 4.32, "step": 6548 }, { "epoch": 0.6790530688423877, "grad_norm": 0.77734375, "learning_rate": 0.00017581758112302276, "loss": 4.375, "step": 6549 }, { "epoch": 0.6791567568968757, "grad_norm": 0.6953125, "learning_rate": 0.00017581049791501677, "loss": 4.3272, "step": 6550 }, { "epoch": 0.6792604449513638, "grad_norm": 0.7109375, "learning_rate": 0.00017580341381252703, "loss": 4.2906, "step": 6551 }, { "epoch": 0.6793641330058519, "grad_norm": 0.796875, "learning_rate": 0.0001757963288156371, "loss": 4.3129, "step": 6552 }, { "epoch": 0.67946782106034, "grad_norm": 0.7734375, "learning_rate": 0.00017578924292443066, "loss": 4.2775, "step": 6553 }, { "epoch": 0.679571509114828, "grad_norm": 0.734375, "learning_rate": 0.00017578215613899128, "loss": 4.3718, "step": 6554 }, { "epoch": 0.6796751971693161, "grad_norm": 0.82421875, "learning_rate": 0.00017577506845940254, "loss": 4.2961, "step": 6555 }, { "epoch": 0.6797788852238041, "grad_norm": 0.78125, "learning_rate": 0.00017576797988574808, "loss": 4.294, "step": 6556 }, { "epoch": 0.6798825732782923, "grad_norm": 0.96875, "learning_rate": 0.00017576089041811151, "loss": 4.3179, "step": 6557 }, { "epoch": 0.6799862613327803, "grad_norm": 0.82421875, "learning_rate": 0.00017575380005657658, "loss": 4.2949, "step": 6558 }, { "epoch": 0.6800899493872684, "grad_norm": 0.98828125, "learning_rate": 0.00017574670880122687, "loss": 4.3174, "step": 6559 }, { "epoch": 0.6801936374417564, "grad_norm": 0.8203125, "learning_rate": 0.00017573961665214607, "loss": 4.3556, "step": 6560 }, { "epoch": 0.6802973254962446, "grad_norm": 0.84375, "learning_rate": 0.00017573252360941785, "loss": 4.3147, "step": 6561 }, { "epoch": 0.6804010135507326, "grad_norm": 0.93359375, "learning_rate": 0.00017572542967312586, "loss": 4.2997, "step": 6562 }, { "epoch": 0.6805047016052207, "grad_norm": 0.67578125, "learning_rate": 0.0001757183348433539, "loss": 4.3231, "step": 6563 }, { "epoch": 0.6806083896597087, "grad_norm": 0.89453125, "learning_rate": 0.0001757112391201856, "loss": 4.2858, "step": 6564 }, { "epoch": 0.6807120777141968, "grad_norm": 0.71875, "learning_rate": 0.00017570414250370472, "loss": 4.3065, "step": 6565 }, { "epoch": 0.680815765768685, "grad_norm": 0.78515625, "learning_rate": 0.00017569704499399496, "loss": 4.2943, "step": 6566 }, { "epoch": 0.680919453823173, "grad_norm": 0.6953125, "learning_rate": 0.00017568994659114008, "loss": 4.3219, "step": 6567 }, { "epoch": 0.6810231418776611, "grad_norm": 0.80078125, "learning_rate": 0.0001756828472952239, "loss": 4.3509, "step": 6568 }, { "epoch": 0.6811268299321491, "grad_norm": 0.76953125, "learning_rate": 0.00017567574710633006, "loss": 4.3214, "step": 6569 }, { "epoch": 0.6812305179866373, "grad_norm": 0.6796875, "learning_rate": 0.00017566864602454243, "loss": 4.3195, "step": 6570 }, { "epoch": 0.6813342060411253, "grad_norm": 0.76171875, "learning_rate": 0.00017566154404994471, "loss": 4.3133, "step": 6571 }, { "epoch": 0.6814378940956134, "grad_norm": 0.66015625, "learning_rate": 0.00017565444118262081, "loss": 4.3164, "step": 6572 }, { "epoch": 0.6815415821501014, "grad_norm": 0.69140625, "learning_rate": 0.00017564733742265445, "loss": 4.3009, "step": 6573 }, { "epoch": 0.6816452702045895, "grad_norm": 0.6171875, "learning_rate": 0.0001756402327701295, "loss": 4.3113, "step": 6574 }, { "epoch": 0.6817489582590776, "grad_norm": 0.6796875, "learning_rate": 0.0001756331272251297, "loss": 4.3586, "step": 6575 }, { "epoch": 0.6818526463135657, "grad_norm": 0.62890625, "learning_rate": 0.00017562602078773898, "loss": 4.3112, "step": 6576 }, { "epoch": 0.6819563343680537, "grad_norm": 0.65625, "learning_rate": 0.00017561891345804117, "loss": 4.3211, "step": 6577 }, { "epoch": 0.6820600224225418, "grad_norm": 0.66796875, "learning_rate": 0.00017561180523612008, "loss": 4.3358, "step": 6578 }, { "epoch": 0.6821637104770298, "grad_norm": 0.7109375, "learning_rate": 0.00017560469612205965, "loss": 4.3131, "step": 6579 }, { "epoch": 0.682267398531518, "grad_norm": 0.64453125, "learning_rate": 0.0001755975861159437, "loss": 4.3321, "step": 6580 }, { "epoch": 0.682371086586006, "grad_norm": 0.6640625, "learning_rate": 0.00017559047521785613, "loss": 4.2691, "step": 6581 }, { "epoch": 0.6824747746404941, "grad_norm": 0.71875, "learning_rate": 0.00017558336342788088, "loss": 4.3178, "step": 6582 }, { "epoch": 0.6825784626949821, "grad_norm": 0.7265625, "learning_rate": 0.00017557625074610185, "loss": 4.3003, "step": 6583 }, { "epoch": 0.6826821507494703, "grad_norm": 0.69140625, "learning_rate": 0.0001755691371726029, "loss": 4.2979, "step": 6584 }, { "epoch": 0.6827858388039583, "grad_norm": 0.71484375, "learning_rate": 0.00017556202270746805, "loss": 4.3183, "step": 6585 }, { "epoch": 0.6828895268584464, "grad_norm": 0.7109375, "learning_rate": 0.0001755549073507812, "loss": 4.2924, "step": 6586 }, { "epoch": 0.6829932149129344, "grad_norm": 0.65234375, "learning_rate": 0.0001755477911026263, "loss": 4.3058, "step": 6587 }, { "epoch": 0.6830969029674225, "grad_norm": 0.7421875, "learning_rate": 0.00017554067396308731, "loss": 4.2995, "step": 6588 }, { "epoch": 0.6832005910219106, "grad_norm": 0.76953125, "learning_rate": 0.00017553355593224822, "loss": 4.3376, "step": 6589 }, { "epoch": 0.6833042790763987, "grad_norm": 0.69921875, "learning_rate": 0.00017552643701019305, "loss": 4.2894, "step": 6590 }, { "epoch": 0.6834079671308867, "grad_norm": 0.67578125, "learning_rate": 0.0001755193171970057, "loss": 4.297, "step": 6591 }, { "epoch": 0.6835116551853748, "grad_norm": 0.73046875, "learning_rate": 0.00017551219649277028, "loss": 4.3418, "step": 6592 }, { "epoch": 0.6836153432398628, "grad_norm": 0.65234375, "learning_rate": 0.00017550507489757076, "loss": 4.3075, "step": 6593 }, { "epoch": 0.683719031294351, "grad_norm": 0.76953125, "learning_rate": 0.00017549795241149116, "loss": 4.3406, "step": 6594 }, { "epoch": 0.683822719348839, "grad_norm": 0.65234375, "learning_rate": 0.00017549082903461552, "loss": 4.2859, "step": 6595 }, { "epoch": 0.6839264074033271, "grad_norm": 0.76953125, "learning_rate": 0.0001754837047670279, "loss": 4.3241, "step": 6596 }, { "epoch": 0.6840300954578151, "grad_norm": 0.66796875, "learning_rate": 0.00017547657960881235, "loss": 4.3338, "step": 6597 }, { "epoch": 0.6841337835123033, "grad_norm": 0.73828125, "learning_rate": 0.00017546945356005294, "loss": 4.3292, "step": 6598 }, { "epoch": 0.6842374715667913, "grad_norm": 0.62109375, "learning_rate": 0.00017546232662083377, "loss": 4.3014, "step": 6599 }, { "epoch": 0.6843411596212794, "grad_norm": 0.6796875, "learning_rate": 0.00017545519879123887, "loss": 4.2827, "step": 6600 }, { "epoch": 0.6844448476757674, "grad_norm": 0.64453125, "learning_rate": 0.00017544807007135243, "loss": 4.2965, "step": 6601 }, { "epoch": 0.6845485357302555, "grad_norm": 0.671875, "learning_rate": 0.0001754409404612585, "loss": 4.2946, "step": 6602 }, { "epoch": 0.6846522237847436, "grad_norm": 0.63671875, "learning_rate": 0.00017543380996104123, "loss": 4.2934, "step": 6603 }, { "epoch": 0.6847559118392317, "grad_norm": 0.71875, "learning_rate": 0.00017542667857078472, "loss": 4.3368, "step": 6604 }, { "epoch": 0.6848595998937197, "grad_norm": 0.71484375, "learning_rate": 0.00017541954629057314, "loss": 4.3014, "step": 6605 }, { "epoch": 0.6849632879482078, "grad_norm": 0.71484375, "learning_rate": 0.00017541241312049062, "loss": 4.312, "step": 6606 }, { "epoch": 0.6850669760026958, "grad_norm": 0.6875, "learning_rate": 0.00017540527906062135, "loss": 4.3363, "step": 6607 }, { "epoch": 0.685170664057184, "grad_norm": 0.69921875, "learning_rate": 0.00017539814411104949, "loss": 4.2708, "step": 6608 }, { "epoch": 0.685274352111672, "grad_norm": 0.69921875, "learning_rate": 0.00017539100827185925, "loss": 4.3057, "step": 6609 }, { "epoch": 0.6853780401661601, "grad_norm": 0.80078125, "learning_rate": 0.0001753838715431348, "loss": 4.3111, "step": 6610 }, { "epoch": 0.6854817282206482, "grad_norm": 0.68359375, "learning_rate": 0.0001753767339249603, "loss": 4.2907, "step": 6611 }, { "epoch": 0.6855854162751363, "grad_norm": 0.7421875, "learning_rate": 0.00017536959541742007, "loss": 4.2412, "step": 6612 }, { "epoch": 0.6856891043296244, "grad_norm": 0.7265625, "learning_rate": 0.00017536245602059827, "loss": 4.2948, "step": 6613 }, { "epoch": 0.6857927923841124, "grad_norm": 0.77734375, "learning_rate": 0.00017535531573457914, "loss": 4.3325, "step": 6614 }, { "epoch": 0.6858964804386005, "grad_norm": 0.6875, "learning_rate": 0.00017534817455944698, "loss": 4.2668, "step": 6615 }, { "epoch": 0.6860001684930885, "grad_norm": 0.66015625, "learning_rate": 0.00017534103249528595, "loss": 4.3183, "step": 6616 }, { "epoch": 0.6861038565475767, "grad_norm": 0.7265625, "learning_rate": 0.0001753338895421804, "loss": 4.3117, "step": 6617 }, { "epoch": 0.6862075446020647, "grad_norm": 0.6484375, "learning_rate": 0.00017532674570021458, "loss": 4.3003, "step": 6618 }, { "epoch": 0.6863112326565528, "grad_norm": 0.62890625, "learning_rate": 0.0001753196009694728, "loss": 4.3061, "step": 6619 }, { "epoch": 0.6864149207110408, "grad_norm": 0.6953125, "learning_rate": 0.00017531245535003934, "loss": 4.3236, "step": 6620 }, { "epoch": 0.686518608765529, "grad_norm": 0.66015625, "learning_rate": 0.0001753053088419985, "loss": 4.3037, "step": 6621 }, { "epoch": 0.686622296820017, "grad_norm": 0.72265625, "learning_rate": 0.00017529816144543463, "loss": 4.302, "step": 6622 }, { "epoch": 0.6867259848745051, "grad_norm": 0.78515625, "learning_rate": 0.00017529101316043203, "loss": 4.3131, "step": 6623 }, { "epoch": 0.6868296729289931, "grad_norm": 0.734375, "learning_rate": 0.0001752838639870751, "loss": 4.3355, "step": 6624 }, { "epoch": 0.6869333609834812, "grad_norm": 0.73828125, "learning_rate": 0.00017527671392544812, "loss": 4.3358, "step": 6625 }, { "epoch": 0.6870370490379692, "grad_norm": 0.7734375, "learning_rate": 0.0001752695629756355, "loss": 4.2912, "step": 6626 }, { "epoch": 0.6871407370924574, "grad_norm": 0.76171875, "learning_rate": 0.00017526241113772158, "loss": 4.2923, "step": 6627 }, { "epoch": 0.6872444251469454, "grad_norm": 0.71875, "learning_rate": 0.00017525525841179077, "loss": 4.3022, "step": 6628 }, { "epoch": 0.6873481132014335, "grad_norm": 0.75, "learning_rate": 0.00017524810479792747, "loss": 4.3173, "step": 6629 }, { "epoch": 0.6874518012559215, "grad_norm": 0.6796875, "learning_rate": 0.0001752409502962161, "loss": 4.3274, "step": 6630 }, { "epoch": 0.6875554893104097, "grad_norm": 0.66796875, "learning_rate": 0.00017523379490674102, "loss": 4.3059, "step": 6631 }, { "epoch": 0.6876591773648977, "grad_norm": 0.7109375, "learning_rate": 0.00017522663862958667, "loss": 4.2875, "step": 6632 }, { "epoch": 0.6877628654193858, "grad_norm": 0.6953125, "learning_rate": 0.00017521948146483754, "loss": 4.2669, "step": 6633 }, { "epoch": 0.6878665534738738, "grad_norm": 0.671875, "learning_rate": 0.000175212323412578, "loss": 4.3299, "step": 6634 }, { "epoch": 0.687970241528362, "grad_norm": 0.6953125, "learning_rate": 0.00017520516447289258, "loss": 4.3181, "step": 6635 }, { "epoch": 0.68807392958285, "grad_norm": 0.66796875, "learning_rate": 0.00017519800464586572, "loss": 4.3058, "step": 6636 }, { "epoch": 0.6881776176373381, "grad_norm": 0.6875, "learning_rate": 0.00017519084393158185, "loss": 4.3505, "step": 6637 }, { "epoch": 0.6882813056918261, "grad_norm": 0.6015625, "learning_rate": 0.0001751836823301255, "loss": 4.3358, "step": 6638 }, { "epoch": 0.6883849937463142, "grad_norm": 0.70703125, "learning_rate": 0.00017517651984158122, "loss": 4.3308, "step": 6639 }, { "epoch": 0.6884886818008022, "grad_norm": 0.640625, "learning_rate": 0.00017516935646603345, "loss": 4.3068, "step": 6640 }, { "epoch": 0.6885923698552904, "grad_norm": 0.69921875, "learning_rate": 0.00017516219220356673, "loss": 4.3641, "step": 6641 }, { "epoch": 0.6886960579097784, "grad_norm": 0.64453125, "learning_rate": 0.0001751550270542656, "loss": 4.2864, "step": 6642 }, { "epoch": 0.6887997459642665, "grad_norm": 0.68359375, "learning_rate": 0.00017514786101821458, "loss": 4.2931, "step": 6643 }, { "epoch": 0.6889034340187545, "grad_norm": 0.70703125, "learning_rate": 0.00017514069409549823, "loss": 4.2846, "step": 6644 }, { "epoch": 0.6890071220732427, "grad_norm": 0.71875, "learning_rate": 0.00017513352628620115, "loss": 4.3249, "step": 6645 }, { "epoch": 0.6891108101277307, "grad_norm": 0.6796875, "learning_rate": 0.00017512635759040784, "loss": 4.2103, "step": 6646 }, { "epoch": 0.6892144981822188, "grad_norm": 0.71484375, "learning_rate": 0.00017511918800820292, "loss": 4.3311, "step": 6647 }, { "epoch": 0.6893181862367068, "grad_norm": 0.74609375, "learning_rate": 0.00017511201753967102, "loss": 4.3075, "step": 6648 }, { "epoch": 0.689421874291195, "grad_norm": 0.765625, "learning_rate": 0.00017510484618489668, "loss": 4.323, "step": 6649 }, { "epoch": 0.689525562345683, "grad_norm": 0.7109375, "learning_rate": 0.00017509767394396458, "loss": 4.3195, "step": 6650 }, { "epoch": 0.6896292504001711, "grad_norm": 0.81640625, "learning_rate": 0.00017509050081695925, "loss": 4.3298, "step": 6651 }, { "epoch": 0.6897329384546591, "grad_norm": 0.72265625, "learning_rate": 0.00017508332680396543, "loss": 4.2887, "step": 6652 }, { "epoch": 0.6898366265091472, "grad_norm": 0.76953125, "learning_rate": 0.0001750761519050677, "loss": 4.3267, "step": 6653 }, { "epoch": 0.6899403145636352, "grad_norm": 0.6796875, "learning_rate": 0.0001750689761203508, "loss": 4.3083, "step": 6654 }, { "epoch": 0.6900440026181234, "grad_norm": 0.72265625, "learning_rate": 0.00017506179944989928, "loss": 4.2902, "step": 6655 }, { "epoch": 0.6901476906726115, "grad_norm": 0.7109375, "learning_rate": 0.00017505462189379783, "loss": 4.3244, "step": 6656 }, { "epoch": 0.6902513787270995, "grad_norm": 0.82421875, "learning_rate": 0.00017504744345213122, "loss": 4.3278, "step": 6657 }, { "epoch": 0.6903550667815876, "grad_norm": 0.7265625, "learning_rate": 0.00017504026412498412, "loss": 4.3396, "step": 6658 }, { "epoch": 0.6904587548360757, "grad_norm": 0.74609375, "learning_rate": 0.0001750330839124412, "loss": 4.2807, "step": 6659 }, { "epoch": 0.6905624428905638, "grad_norm": 0.765625, "learning_rate": 0.0001750259028145872, "loss": 4.2965, "step": 6660 }, { "epoch": 0.6906661309450518, "grad_norm": 0.69921875, "learning_rate": 0.00017501872083150688, "loss": 4.3143, "step": 6661 }, { "epoch": 0.6907698189995399, "grad_norm": 0.74609375, "learning_rate": 0.00017501153796328493, "loss": 4.314, "step": 6662 }, { "epoch": 0.690873507054028, "grad_norm": 0.71484375, "learning_rate": 0.00017500435421000611, "loss": 4.2631, "step": 6663 }, { "epoch": 0.6909771951085161, "grad_norm": 0.75, "learning_rate": 0.00017499716957175524, "loss": 4.277, "step": 6664 }, { "epoch": 0.6910808831630041, "grad_norm": 0.73046875, "learning_rate": 0.00017498998404861702, "loss": 4.3147, "step": 6665 }, { "epoch": 0.6911845712174922, "grad_norm": 0.66796875, "learning_rate": 0.00017498279764067623, "loss": 4.2968, "step": 6666 }, { "epoch": 0.6912882592719802, "grad_norm": 0.7421875, "learning_rate": 0.00017497561034801772, "loss": 4.3201, "step": 6667 }, { "epoch": 0.6913919473264684, "grad_norm": 0.671875, "learning_rate": 0.00017496842217072626, "loss": 4.2557, "step": 6668 }, { "epoch": 0.6914956353809564, "grad_norm": 0.76171875, "learning_rate": 0.00017496123310888667, "loss": 4.3024, "step": 6669 }, { "epoch": 0.6915993234354445, "grad_norm": 0.703125, "learning_rate": 0.00017495404316258376, "loss": 4.3402, "step": 6670 }, { "epoch": 0.6917030114899325, "grad_norm": 0.83984375, "learning_rate": 0.00017494685233190234, "loss": 4.3003, "step": 6671 }, { "epoch": 0.6918066995444206, "grad_norm": 0.7421875, "learning_rate": 0.0001749396606169273, "loss": 4.2904, "step": 6672 }, { "epoch": 0.6919103875989087, "grad_norm": 0.85546875, "learning_rate": 0.0001749324680177435, "loss": 4.2703, "step": 6673 }, { "epoch": 0.6920140756533968, "grad_norm": 0.75, "learning_rate": 0.00017492527453443578, "loss": 4.3059, "step": 6674 }, { "epoch": 0.6921177637078848, "grad_norm": 0.87890625, "learning_rate": 0.00017491808016708899, "loss": 4.2864, "step": 6675 }, { "epoch": 0.6922214517623729, "grad_norm": 0.6953125, "learning_rate": 0.00017491088491578807, "loss": 4.2779, "step": 6676 }, { "epoch": 0.692325139816861, "grad_norm": 0.87890625, "learning_rate": 0.0001749036887806179, "loss": 4.3136, "step": 6677 }, { "epoch": 0.6924288278713491, "grad_norm": 0.79296875, "learning_rate": 0.00017489649176166336, "loss": 4.32, "step": 6678 }, { "epoch": 0.6925325159258371, "grad_norm": 0.78515625, "learning_rate": 0.0001748892938590094, "loss": 4.3044, "step": 6679 }, { "epoch": 0.6926362039803252, "grad_norm": 0.8046875, "learning_rate": 0.00017488209507274095, "loss": 4.3141, "step": 6680 }, { "epoch": 0.6927398920348132, "grad_norm": 0.828125, "learning_rate": 0.0001748748954029429, "loss": 4.3011, "step": 6681 }, { "epoch": 0.6928435800893014, "grad_norm": 0.8046875, "learning_rate": 0.00017486769484970026, "loss": 4.2975, "step": 6682 }, { "epoch": 0.6929472681437894, "grad_norm": 0.76953125, "learning_rate": 0.0001748604934130979, "loss": 4.323, "step": 6683 }, { "epoch": 0.6930509561982775, "grad_norm": 0.828125, "learning_rate": 0.00017485329109322089, "loss": 4.3262, "step": 6684 }, { "epoch": 0.6931546442527655, "grad_norm": 0.875, "learning_rate": 0.00017484608789015418, "loss": 4.2892, "step": 6685 }, { "epoch": 0.6932583323072536, "grad_norm": 0.80078125, "learning_rate": 0.0001748388838039827, "loss": 4.3314, "step": 6686 }, { "epoch": 0.6933620203617417, "grad_norm": 0.828125, "learning_rate": 0.00017483167883479157, "loss": 4.3239, "step": 6687 }, { "epoch": 0.6934657084162298, "grad_norm": 0.8125, "learning_rate": 0.0001748244729826657, "loss": 4.2554, "step": 6688 }, { "epoch": 0.6935693964707178, "grad_norm": 0.83203125, "learning_rate": 0.00017481726624769012, "loss": 4.301, "step": 6689 }, { "epoch": 0.6936730845252059, "grad_norm": 0.83203125, "learning_rate": 0.0001748100586299499, "loss": 4.2807, "step": 6690 }, { "epoch": 0.693776772579694, "grad_norm": 0.76171875, "learning_rate": 0.00017480285012953006, "loss": 4.3359, "step": 6691 }, { "epoch": 0.6938804606341821, "grad_norm": 0.765625, "learning_rate": 0.00017479564074651568, "loss": 4.3141, "step": 6692 }, { "epoch": 0.6939841486886701, "grad_norm": 0.81640625, "learning_rate": 0.00017478843048099178, "loss": 4.2948, "step": 6693 }, { "epoch": 0.6940878367431582, "grad_norm": 0.78125, "learning_rate": 0.00017478121933304345, "loss": 4.3005, "step": 6694 }, { "epoch": 0.6941915247976462, "grad_norm": 0.8125, "learning_rate": 0.0001747740073027558, "loss": 4.3416, "step": 6695 }, { "epoch": 0.6942952128521344, "grad_norm": 0.7421875, "learning_rate": 0.0001747667943902139, "loss": 4.3027, "step": 6696 }, { "epoch": 0.6943989009066224, "grad_norm": 0.80859375, "learning_rate": 0.00017475958059550285, "loss": 4.3, "step": 6697 }, { "epoch": 0.6945025889611105, "grad_norm": 0.7734375, "learning_rate": 0.0001747523659187078, "loss": 4.3291, "step": 6698 }, { "epoch": 0.6946062770155985, "grad_norm": 0.7734375, "learning_rate": 0.0001747451503599138, "loss": 4.3234, "step": 6699 }, { "epoch": 0.6947099650700866, "grad_norm": 0.83203125, "learning_rate": 0.00017473793391920608, "loss": 4.3066, "step": 6700 }, { "epoch": 0.6948136531245748, "grad_norm": 0.796875, "learning_rate": 0.0001747307165966697, "loss": 4.3168, "step": 6701 }, { "epoch": 0.6949173411790628, "grad_norm": 0.76171875, "learning_rate": 0.00017472349839238989, "loss": 4.3572, "step": 6702 }, { "epoch": 0.6950210292335509, "grad_norm": 0.796875, "learning_rate": 0.00017471627930645175, "loss": 4.2868, "step": 6703 }, { "epoch": 0.6951247172880389, "grad_norm": 0.7265625, "learning_rate": 0.00017470905933894052, "loss": 4.3121, "step": 6704 }, { "epoch": 0.6952284053425271, "grad_norm": 0.78125, "learning_rate": 0.00017470183848994139, "loss": 4.3402, "step": 6705 }, { "epoch": 0.6953320933970151, "grad_norm": 0.76953125, "learning_rate": 0.0001746946167595395, "loss": 4.301, "step": 6706 }, { "epoch": 0.6954357814515032, "grad_norm": 0.69140625, "learning_rate": 0.00017468739414782007, "loss": 4.2396, "step": 6707 }, { "epoch": 0.6955394695059912, "grad_norm": 0.73046875, "learning_rate": 0.00017468017065486836, "loss": 4.2664, "step": 6708 }, { "epoch": 0.6956431575604793, "grad_norm": 0.73046875, "learning_rate": 0.00017467294628076955, "loss": 4.303, "step": 6709 }, { "epoch": 0.6957468456149674, "grad_norm": 0.68359375, "learning_rate": 0.00017466572102560894, "loss": 4.3181, "step": 6710 }, { "epoch": 0.6958505336694555, "grad_norm": 0.7265625, "learning_rate": 0.0001746584948894717, "loss": 4.329, "step": 6711 }, { "epoch": 0.6959542217239435, "grad_norm": 0.65234375, "learning_rate": 0.0001746512678724432, "loss": 4.2602, "step": 6712 }, { "epoch": 0.6960579097784316, "grad_norm": 0.80078125, "learning_rate": 0.0001746440399746086, "loss": 4.2684, "step": 6713 }, { "epoch": 0.6961615978329196, "grad_norm": 0.61328125, "learning_rate": 0.00017463681119605324, "loss": 4.3137, "step": 6714 }, { "epoch": 0.6962652858874078, "grad_norm": 0.76171875, "learning_rate": 0.00017462958153686243, "loss": 4.3403, "step": 6715 }, { "epoch": 0.6963689739418958, "grad_norm": 0.65625, "learning_rate": 0.00017462235099712143, "loss": 4.313, "step": 6716 }, { "epoch": 0.6964726619963839, "grad_norm": 0.66796875, "learning_rate": 0.00017461511957691554, "loss": 4.2659, "step": 6717 }, { "epoch": 0.6965763500508719, "grad_norm": 0.6875, "learning_rate": 0.00017460788727633014, "loss": 4.3121, "step": 6718 }, { "epoch": 0.69668003810536, "grad_norm": 0.625, "learning_rate": 0.00017460065409545053, "loss": 4.2943, "step": 6719 }, { "epoch": 0.6967837261598481, "grad_norm": 0.66015625, "learning_rate": 0.00017459342003436204, "loss": 4.2951, "step": 6720 }, { "epoch": 0.6968874142143362, "grad_norm": 0.7109375, "learning_rate": 0.00017458618509315005, "loss": 4.3544, "step": 6721 }, { "epoch": 0.6969911022688242, "grad_norm": 0.6015625, "learning_rate": 0.00017457894927189996, "loss": 4.286, "step": 6722 }, { "epoch": 0.6970947903233123, "grad_norm": 0.70703125, "learning_rate": 0.00017457171257069707, "loss": 4.3755, "step": 6723 }, { "epoch": 0.6971984783778004, "grad_norm": 0.62890625, "learning_rate": 0.00017456447498962678, "loss": 4.3603, "step": 6724 }, { "epoch": 0.6973021664322885, "grad_norm": 0.6640625, "learning_rate": 0.0001745572365287745, "loss": 4.3034, "step": 6725 }, { "epoch": 0.6974058544867765, "grad_norm": 0.66015625, "learning_rate": 0.00017454999718822566, "loss": 4.3108, "step": 6726 }, { "epoch": 0.6975095425412646, "grad_norm": 0.765625, "learning_rate": 0.00017454275696806567, "loss": 4.2932, "step": 6727 }, { "epoch": 0.6976132305957526, "grad_norm": 0.671875, "learning_rate": 0.0001745355158683799, "loss": 4.2865, "step": 6728 }, { "epoch": 0.6977169186502408, "grad_norm": 0.74609375, "learning_rate": 0.00017452827388925388, "loss": 4.3343, "step": 6729 }, { "epoch": 0.6978206067047288, "grad_norm": 0.73046875, "learning_rate": 0.000174521031030773, "loss": 4.2954, "step": 6730 }, { "epoch": 0.6979242947592169, "grad_norm": 0.7578125, "learning_rate": 0.00017451378729302271, "loss": 4.3043, "step": 6731 }, { "epoch": 0.6980279828137049, "grad_norm": 0.7109375, "learning_rate": 0.00017450654267608847, "loss": 4.3119, "step": 6732 }, { "epoch": 0.698131670868193, "grad_norm": 0.77734375, "learning_rate": 0.00017449929718005582, "loss": 4.2998, "step": 6733 }, { "epoch": 0.6982353589226811, "grad_norm": 0.67578125, "learning_rate": 0.00017449205080501018, "loss": 4.2894, "step": 6734 }, { "epoch": 0.6983390469771692, "grad_norm": 0.703125, "learning_rate": 0.0001744848035510371, "loss": 4.2802, "step": 6735 }, { "epoch": 0.6984427350316572, "grad_norm": 0.72265625, "learning_rate": 0.00017447755541822208, "loss": 4.2742, "step": 6736 }, { "epoch": 0.6985464230861453, "grad_norm": 0.7578125, "learning_rate": 0.00017447030640665062, "loss": 4.2813, "step": 6737 }, { "epoch": 0.6986501111406334, "grad_norm": 0.78125, "learning_rate": 0.00017446305651640825, "loss": 4.2958, "step": 6738 }, { "epoch": 0.6987537991951215, "grad_norm": 0.7578125, "learning_rate": 0.00017445580574758056, "loss": 4.2717, "step": 6739 }, { "epoch": 0.6988574872496095, "grad_norm": 0.84765625, "learning_rate": 0.00017444855410025305, "loss": 4.3151, "step": 6740 }, { "epoch": 0.6989611753040976, "grad_norm": 0.7734375, "learning_rate": 0.0001744413015745113, "loss": 4.3139, "step": 6741 }, { "epoch": 0.6990648633585856, "grad_norm": 0.8046875, "learning_rate": 0.00017443404817044087, "loss": 4.3068, "step": 6742 }, { "epoch": 0.6991685514130738, "grad_norm": 0.7890625, "learning_rate": 0.00017442679388812737, "loss": 4.3164, "step": 6743 }, { "epoch": 0.6992722394675618, "grad_norm": 0.8359375, "learning_rate": 0.00017441953872765638, "loss": 4.2802, "step": 6744 }, { "epoch": 0.6993759275220499, "grad_norm": 0.71484375, "learning_rate": 0.00017441228268911347, "loss": 4.332, "step": 6745 }, { "epoch": 0.699479615576538, "grad_norm": 0.9453125, "learning_rate": 0.00017440502577258427, "loss": 4.2957, "step": 6746 }, { "epoch": 0.699583303631026, "grad_norm": 0.8046875, "learning_rate": 0.00017439776797815445, "loss": 4.3194, "step": 6747 }, { "epoch": 0.6996869916855142, "grad_norm": 0.92578125, "learning_rate": 0.00017439050930590958, "loss": 4.2783, "step": 6748 }, { "epoch": 0.6997906797400022, "grad_norm": 0.8203125, "learning_rate": 0.00017438324975593538, "loss": 4.2853, "step": 6749 }, { "epoch": 0.6998943677944903, "grad_norm": 0.8671875, "learning_rate": 0.0001743759893283174, "loss": 4.2653, "step": 6750 }, { "epoch": 0.6999980558489783, "grad_norm": 0.9375, "learning_rate": 0.00017436872802314141, "loss": 4.309, "step": 6751 }, { "epoch": 0.7001017439034665, "grad_norm": 0.8203125, "learning_rate": 0.00017436146584049302, "loss": 4.3477, "step": 6752 }, { "epoch": 0.7002054319579545, "grad_norm": 0.9765625, "learning_rate": 0.00017435420278045794, "loss": 4.3174, "step": 6753 }, { "epoch": 0.7003091200124426, "grad_norm": 0.79296875, "learning_rate": 0.00017434693884312184, "loss": 4.2983, "step": 6754 }, { "epoch": 0.7004128080669306, "grad_norm": 0.94140625, "learning_rate": 0.0001743396740285705, "loss": 4.3341, "step": 6755 }, { "epoch": 0.7005164961214188, "grad_norm": 0.80078125, "learning_rate": 0.00017433240833688955, "loss": 4.3021, "step": 6756 }, { "epoch": 0.7006201841759068, "grad_norm": 0.734375, "learning_rate": 0.00017432514176816478, "loss": 4.3089, "step": 6757 }, { "epoch": 0.7007238722303949, "grad_norm": 0.80859375, "learning_rate": 0.00017431787432248188, "loss": 4.3165, "step": 6758 }, { "epoch": 0.7008275602848829, "grad_norm": 0.8125, "learning_rate": 0.00017431060599992662, "loss": 4.321, "step": 6759 }, { "epoch": 0.700931248339371, "grad_norm": 0.83203125, "learning_rate": 0.00017430333680058476, "loss": 4.3034, "step": 6760 }, { "epoch": 0.701034936393859, "grad_norm": 0.828125, "learning_rate": 0.0001742960667245421, "loss": 4.2945, "step": 6761 }, { "epoch": 0.7011386244483472, "grad_norm": 0.95703125, "learning_rate": 0.00017428879577188435, "loss": 4.2914, "step": 6762 }, { "epoch": 0.7012423125028352, "grad_norm": 0.7265625, "learning_rate": 0.00017428152394269731, "loss": 4.3338, "step": 6763 }, { "epoch": 0.7013460005573233, "grad_norm": 0.79296875, "learning_rate": 0.00017427425123706688, "loss": 4.3143, "step": 6764 }, { "epoch": 0.7014496886118113, "grad_norm": 0.78125, "learning_rate": 0.00017426697765507876, "loss": 4.2955, "step": 6765 }, { "epoch": 0.7015533766662995, "grad_norm": 0.75390625, "learning_rate": 0.00017425970319681882, "loss": 4.2712, "step": 6766 }, { "epoch": 0.7016570647207875, "grad_norm": 0.84765625, "learning_rate": 0.00017425242786237285, "loss": 4.2621, "step": 6767 }, { "epoch": 0.7017607527752756, "grad_norm": 0.80078125, "learning_rate": 0.00017424515165182674, "loss": 4.3243, "step": 6768 }, { "epoch": 0.7018644408297636, "grad_norm": 0.78515625, "learning_rate": 0.00017423787456526634, "loss": 4.2579, "step": 6769 }, { "epoch": 0.7019681288842518, "grad_norm": 0.78515625, "learning_rate": 0.00017423059660277742, "loss": 4.2895, "step": 6770 }, { "epoch": 0.7020718169387398, "grad_norm": 0.859375, "learning_rate": 0.00017422331776444598, "loss": 4.3058, "step": 6771 }, { "epoch": 0.7021755049932279, "grad_norm": 0.91796875, "learning_rate": 0.00017421603805035785, "loss": 4.2943, "step": 6772 }, { "epoch": 0.7022791930477159, "grad_norm": 0.7890625, "learning_rate": 0.00017420875746059893, "loss": 4.2972, "step": 6773 }, { "epoch": 0.702382881102204, "grad_norm": 0.90625, "learning_rate": 0.0001742014759952551, "loss": 4.3516, "step": 6774 }, { "epoch": 0.702486569156692, "grad_norm": 0.78515625, "learning_rate": 0.00017419419365441227, "loss": 4.3031, "step": 6775 }, { "epoch": 0.7025902572111802, "grad_norm": 0.890625, "learning_rate": 0.0001741869104381564, "loss": 4.3006, "step": 6776 }, { "epoch": 0.7026939452656682, "grad_norm": 0.796875, "learning_rate": 0.0001741796263465734, "loss": 4.3177, "step": 6777 }, { "epoch": 0.7027976333201563, "grad_norm": 0.8125, "learning_rate": 0.00017417234137974923, "loss": 4.354, "step": 6778 }, { "epoch": 0.7029013213746443, "grad_norm": 0.83203125, "learning_rate": 0.00017416505553776983, "loss": 4.3367, "step": 6779 }, { "epoch": 0.7030050094291325, "grad_norm": 0.79296875, "learning_rate": 0.00017415776882072118, "loss": 4.2742, "step": 6780 }, { "epoch": 0.7031086974836205, "grad_norm": 0.90625, "learning_rate": 0.00017415048122868923, "loss": 4.2843, "step": 6781 }, { "epoch": 0.7032123855381086, "grad_norm": 0.765625, "learning_rate": 0.00017414319276175995, "loss": 4.3533, "step": 6782 }, { "epoch": 0.7033160735925966, "grad_norm": 0.90234375, "learning_rate": 0.00017413590342001944, "loss": 4.3043, "step": 6783 }, { "epoch": 0.7034197616470848, "grad_norm": 0.71875, "learning_rate": 0.0001741286132035536, "loss": 4.3679, "step": 6784 }, { "epoch": 0.7035234497015728, "grad_norm": 0.8359375, "learning_rate": 0.00017412132211244846, "loss": 4.2908, "step": 6785 }, { "epoch": 0.7036271377560609, "grad_norm": 0.71484375, "learning_rate": 0.0001741140301467901, "loss": 4.2883, "step": 6786 }, { "epoch": 0.7037308258105489, "grad_norm": 0.80859375, "learning_rate": 0.00017410673730666452, "loss": 4.3207, "step": 6787 }, { "epoch": 0.703834513865037, "grad_norm": 0.72265625, "learning_rate": 0.00017409944359215779, "loss": 4.2686, "step": 6788 }, { "epoch": 0.7039382019195252, "grad_norm": 0.83984375, "learning_rate": 0.00017409214900335592, "loss": 4.3197, "step": 6789 }, { "epoch": 0.7040418899740132, "grad_norm": 0.671875, "learning_rate": 0.000174084853540345, "loss": 4.2796, "step": 6790 }, { "epoch": 0.7041455780285013, "grad_norm": 0.89453125, "learning_rate": 0.00017407755720321116, "loss": 4.2983, "step": 6791 }, { "epoch": 0.7042492660829893, "grad_norm": 0.69921875, "learning_rate": 0.00017407025999204042, "loss": 4.2933, "step": 6792 }, { "epoch": 0.7043529541374774, "grad_norm": 0.76953125, "learning_rate": 0.00017406296190691892, "loss": 4.3228, "step": 6793 }, { "epoch": 0.7044566421919655, "grad_norm": 0.6953125, "learning_rate": 0.00017405566294793277, "loss": 4.2952, "step": 6794 }, { "epoch": 0.7045603302464536, "grad_norm": 0.83984375, "learning_rate": 0.00017404836311516806, "loss": 4.3146, "step": 6795 }, { "epoch": 0.7046640183009416, "grad_norm": 0.71484375, "learning_rate": 0.00017404106240871093, "loss": 4.316, "step": 6796 }, { "epoch": 0.7047677063554297, "grad_norm": 0.74609375, "learning_rate": 0.00017403376082864754, "loss": 4.2951, "step": 6797 }, { "epoch": 0.7048713944099178, "grad_norm": 0.7734375, "learning_rate": 0.000174026458375064, "loss": 4.2902, "step": 6798 }, { "epoch": 0.7049750824644059, "grad_norm": 0.75390625, "learning_rate": 0.00017401915504804656, "loss": 4.2828, "step": 6799 }, { "epoch": 0.7050787705188939, "grad_norm": 0.85546875, "learning_rate": 0.0001740118508476813, "loss": 4.34, "step": 6800 }, { "epoch": 0.705182458573382, "grad_norm": 0.7890625, "learning_rate": 0.00017400454577405443, "loss": 4.3162, "step": 6801 }, { "epoch": 0.70528614662787, "grad_norm": 0.86328125, "learning_rate": 0.00017399723982725217, "loss": 4.3065, "step": 6802 }, { "epoch": 0.7053898346823582, "grad_norm": 0.83984375, "learning_rate": 0.00017398993300736065, "loss": 4.2622, "step": 6803 }, { "epoch": 0.7054935227368462, "grad_norm": 0.7734375, "learning_rate": 0.00017398262531446616, "loss": 4.3342, "step": 6804 }, { "epoch": 0.7055972107913343, "grad_norm": 0.83984375, "learning_rate": 0.0001739753167486549, "loss": 4.2971, "step": 6805 }, { "epoch": 0.7057008988458223, "grad_norm": 0.8125, "learning_rate": 0.0001739680073100131, "loss": 4.3237, "step": 6806 }, { "epoch": 0.7058045869003104, "grad_norm": 0.8984375, "learning_rate": 0.000173960696998627, "loss": 4.3068, "step": 6807 }, { "epoch": 0.7059082749547985, "grad_norm": 0.76953125, "learning_rate": 0.00017395338581458286, "loss": 4.2803, "step": 6808 }, { "epoch": 0.7060119630092866, "grad_norm": 0.796875, "learning_rate": 0.00017394607375796693, "loss": 4.2956, "step": 6809 }, { "epoch": 0.7061156510637746, "grad_norm": 0.76953125, "learning_rate": 0.00017393876082886546, "loss": 4.3073, "step": 6810 }, { "epoch": 0.7062193391182627, "grad_norm": 0.73828125, "learning_rate": 0.0001739314470273648, "loss": 4.2763, "step": 6811 }, { "epoch": 0.7063230271727508, "grad_norm": 0.81640625, "learning_rate": 0.00017392413235355124, "loss": 4.3287, "step": 6812 }, { "epoch": 0.7064267152272389, "grad_norm": 0.73046875, "learning_rate": 0.000173916816807511, "loss": 4.2653, "step": 6813 }, { "epoch": 0.7065304032817269, "grad_norm": 0.765625, "learning_rate": 0.0001739095003893305, "loss": 4.3285, "step": 6814 }, { "epoch": 0.706634091336215, "grad_norm": 0.8203125, "learning_rate": 0.00017390218309909603, "loss": 4.3328, "step": 6815 }, { "epoch": 0.706737779390703, "grad_norm": 0.72265625, "learning_rate": 0.00017389486493689388, "loss": 4.2716, "step": 6816 }, { "epoch": 0.7068414674451912, "grad_norm": 0.921875, "learning_rate": 0.00017388754590281046, "loss": 4.2941, "step": 6817 }, { "epoch": 0.7069451554996792, "grad_norm": 0.7421875, "learning_rate": 0.0001738802259969321, "loss": 4.3477, "step": 6818 }, { "epoch": 0.7070488435541673, "grad_norm": 0.99609375, "learning_rate": 0.00017387290521934517, "loss": 4.2487, "step": 6819 }, { "epoch": 0.7071525316086553, "grad_norm": 0.7109375, "learning_rate": 0.00017386558357013602, "loss": 4.2578, "step": 6820 }, { "epoch": 0.7072562196631434, "grad_norm": 0.95703125, "learning_rate": 0.00017385826104939108, "loss": 4.2883, "step": 6821 }, { "epoch": 0.7073599077176315, "grad_norm": 0.72265625, "learning_rate": 0.00017385093765719673, "loss": 4.2993, "step": 6822 }, { "epoch": 0.7074635957721196, "grad_norm": 0.890625, "learning_rate": 0.0001738436133936394, "loss": 4.2944, "step": 6823 }, { "epoch": 0.7075672838266076, "grad_norm": 0.75, "learning_rate": 0.00017383628825880546, "loss": 4.2836, "step": 6824 }, { "epoch": 0.7076709718810957, "grad_norm": 0.8046875, "learning_rate": 0.0001738289622527814, "loss": 4.2791, "step": 6825 }, { "epoch": 0.7077746599355837, "grad_norm": 0.80859375, "learning_rate": 0.00017382163537565357, "loss": 4.2917, "step": 6826 }, { "epoch": 0.7078783479900719, "grad_norm": 0.7734375, "learning_rate": 0.0001738143076275085, "loss": 4.2644, "step": 6827 }, { "epoch": 0.7079820360445599, "grad_norm": 0.71875, "learning_rate": 0.00017380697900843263, "loss": 4.3196, "step": 6828 }, { "epoch": 0.708085724099048, "grad_norm": 0.7734375, "learning_rate": 0.00017379964951851244, "loss": 4.3053, "step": 6829 }, { "epoch": 0.708189412153536, "grad_norm": 0.734375, "learning_rate": 0.0001737923191578344, "loss": 4.2747, "step": 6830 }, { "epoch": 0.7082931002080242, "grad_norm": 0.78515625, "learning_rate": 0.00017378498792648496, "loss": 4.2809, "step": 6831 }, { "epoch": 0.7083967882625122, "grad_norm": 0.82421875, "learning_rate": 0.00017377765582455069, "loss": 4.3281, "step": 6832 }, { "epoch": 0.7085004763170003, "grad_norm": 0.8203125, "learning_rate": 0.00017377032285211804, "loss": 4.3215, "step": 6833 }, { "epoch": 0.7086041643714884, "grad_norm": 0.7109375, "learning_rate": 0.00017376298900927356, "loss": 4.3022, "step": 6834 }, { "epoch": 0.7087078524259764, "grad_norm": 0.8515625, "learning_rate": 0.0001737556542961038, "loss": 4.2739, "step": 6835 }, { "epoch": 0.7088115404804646, "grad_norm": 0.69921875, "learning_rate": 0.00017374831871269528, "loss": 4.3283, "step": 6836 }, { "epoch": 0.7089152285349526, "grad_norm": 0.78125, "learning_rate": 0.00017374098225913454, "loss": 4.2811, "step": 6837 }, { "epoch": 0.7090189165894407, "grad_norm": 0.6484375, "learning_rate": 0.00017373364493550815, "loss": 4.3492, "step": 6838 }, { "epoch": 0.7091226046439287, "grad_norm": 0.8125, "learning_rate": 0.00017372630674190274, "loss": 4.249, "step": 6839 }, { "epoch": 0.7092262926984169, "grad_norm": 0.6953125, "learning_rate": 0.00017371896767840478, "loss": 4.3491, "step": 6840 }, { "epoch": 0.7093299807529049, "grad_norm": 0.8671875, "learning_rate": 0.00017371162774510097, "loss": 4.3126, "step": 6841 }, { "epoch": 0.709433668807393, "grad_norm": 0.7109375, "learning_rate": 0.00017370428694207782, "loss": 4.3509, "step": 6842 }, { "epoch": 0.709537356861881, "grad_norm": 0.75390625, "learning_rate": 0.00017369694526942208, "loss": 4.2861, "step": 6843 }, { "epoch": 0.7096410449163691, "grad_norm": 0.75390625, "learning_rate": 0.00017368960272722022, "loss": 4.3139, "step": 6844 }, { "epoch": 0.7097447329708572, "grad_norm": 0.625, "learning_rate": 0.00017368225931555892, "loss": 4.3315, "step": 6845 }, { "epoch": 0.7098484210253453, "grad_norm": 0.75390625, "learning_rate": 0.0001736749150345249, "loss": 4.2999, "step": 6846 }, { "epoch": 0.7099521090798333, "grad_norm": 0.6484375, "learning_rate": 0.00017366756988420473, "loss": 4.3054, "step": 6847 }, { "epoch": 0.7100557971343214, "grad_norm": 0.73046875, "learning_rate": 0.00017366022386468513, "loss": 4.2595, "step": 6848 }, { "epoch": 0.7101594851888094, "grad_norm": 0.6953125, "learning_rate": 0.00017365287697605273, "loss": 4.2923, "step": 6849 }, { "epoch": 0.7102631732432976, "grad_norm": 0.75, "learning_rate": 0.00017364552921839423, "loss": 4.3015, "step": 6850 }, { "epoch": 0.7103668612977856, "grad_norm": 0.79296875, "learning_rate": 0.00017363818059179634, "loss": 4.3324, "step": 6851 }, { "epoch": 0.7104705493522737, "grad_norm": 0.69921875, "learning_rate": 0.00017363083109634577, "loss": 4.3018, "step": 6852 }, { "epoch": 0.7105742374067617, "grad_norm": 0.8125, "learning_rate": 0.0001736234807321292, "loss": 4.2822, "step": 6853 }, { "epoch": 0.7106779254612499, "grad_norm": 0.703125, "learning_rate": 0.00017361612949923344, "loss": 4.3018, "step": 6854 }, { "epoch": 0.7107816135157379, "grad_norm": 0.84765625, "learning_rate": 0.0001736087773977451, "loss": 4.309, "step": 6855 }, { "epoch": 0.710885301570226, "grad_norm": 0.78515625, "learning_rate": 0.00017360142442775104, "loss": 4.3042, "step": 6856 }, { "epoch": 0.710988989624714, "grad_norm": 0.74609375, "learning_rate": 0.00017359407058933793, "loss": 4.3146, "step": 6857 }, { "epoch": 0.7110926776792021, "grad_norm": 0.73046875, "learning_rate": 0.00017358671588259261, "loss": 4.298, "step": 6858 }, { "epoch": 0.7111963657336902, "grad_norm": 0.796875, "learning_rate": 0.00017357936030760183, "loss": 4.3325, "step": 6859 }, { "epoch": 0.7113000537881783, "grad_norm": 0.828125, "learning_rate": 0.00017357200386445233, "loss": 4.3342, "step": 6860 }, { "epoch": 0.7114037418426663, "grad_norm": 0.76953125, "learning_rate": 0.000173564646553231, "loss": 4.321, "step": 6861 }, { "epoch": 0.7115074298971544, "grad_norm": 0.8046875, "learning_rate": 0.00017355728837402458, "loss": 4.274, "step": 6862 }, { "epoch": 0.7116111179516424, "grad_norm": 0.76953125, "learning_rate": 0.00017354992932691992, "loss": 4.2502, "step": 6863 }, { "epoch": 0.7117148060061306, "grad_norm": 0.7734375, "learning_rate": 0.00017354256941200385, "loss": 4.2805, "step": 6864 }, { "epoch": 0.7118184940606186, "grad_norm": 0.66015625, "learning_rate": 0.00017353520862936316, "loss": 4.2811, "step": 6865 }, { "epoch": 0.7119221821151067, "grad_norm": 0.7578125, "learning_rate": 0.00017352784697908478, "loss": 4.3037, "step": 6866 }, { "epoch": 0.7120258701695947, "grad_norm": 0.7109375, "learning_rate": 0.00017352048446125551, "loss": 4.2977, "step": 6867 }, { "epoch": 0.7121295582240829, "grad_norm": 0.85546875, "learning_rate": 0.00017351312107596225, "loss": 4.3676, "step": 6868 }, { "epoch": 0.7122332462785709, "grad_norm": 0.7578125, "learning_rate": 0.00017350575682329185, "loss": 4.3402, "step": 6869 }, { "epoch": 0.712336934333059, "grad_norm": 0.83203125, "learning_rate": 0.0001734983917033312, "loss": 4.3002, "step": 6870 }, { "epoch": 0.712440622387547, "grad_norm": 0.83203125, "learning_rate": 0.00017349102571616727, "loss": 4.3039, "step": 6871 }, { "epoch": 0.7125443104420351, "grad_norm": 0.796875, "learning_rate": 0.0001734836588618869, "loss": 4.3131, "step": 6872 }, { "epoch": 0.7126479984965232, "grad_norm": 0.73046875, "learning_rate": 0.00017347629114057705, "loss": 4.3066, "step": 6873 }, { "epoch": 0.7127516865510113, "grad_norm": 0.703125, "learning_rate": 0.0001734689225523246, "loss": 4.2851, "step": 6874 }, { "epoch": 0.7128553746054993, "grad_norm": 0.8359375, "learning_rate": 0.00017346155309721652, "loss": 4.2915, "step": 6875 }, { "epoch": 0.7129590626599874, "grad_norm": 0.7109375, "learning_rate": 0.00017345418277533978, "loss": 4.3027, "step": 6876 }, { "epoch": 0.7130627507144754, "grad_norm": 0.77734375, "learning_rate": 0.00017344681158678132, "loss": 4.3315, "step": 6877 }, { "epoch": 0.7131664387689636, "grad_norm": 0.70703125, "learning_rate": 0.0001734394395316281, "loss": 4.3191, "step": 6878 }, { "epoch": 0.7132701268234517, "grad_norm": 0.7421875, "learning_rate": 0.0001734320666099672, "loss": 4.2969, "step": 6879 }, { "epoch": 0.7133738148779397, "grad_norm": 0.79296875, "learning_rate": 0.00017342469282188546, "loss": 4.3088, "step": 6880 }, { "epoch": 0.7134775029324278, "grad_norm": 0.76953125, "learning_rate": 0.00017341731816746997, "loss": 4.2874, "step": 6881 }, { "epoch": 0.7135811909869159, "grad_norm": 0.7578125, "learning_rate": 0.00017340994264680774, "loss": 4.2924, "step": 6882 }, { "epoch": 0.713684879041404, "grad_norm": 0.74609375, "learning_rate": 0.0001734025662599858, "loss": 4.31, "step": 6883 }, { "epoch": 0.713788567095892, "grad_norm": 0.71484375, "learning_rate": 0.00017339518900709116, "loss": 4.3129, "step": 6884 }, { "epoch": 0.7138922551503801, "grad_norm": 0.71875, "learning_rate": 0.00017338781088821085, "loss": 4.3051, "step": 6885 }, { "epoch": 0.7139959432048681, "grad_norm": 0.74609375, "learning_rate": 0.00017338043190343196, "loss": 4.3367, "step": 6886 }, { "epoch": 0.7140996312593563, "grad_norm": 0.7890625, "learning_rate": 0.00017337305205284155, "loss": 4.326, "step": 6887 }, { "epoch": 0.7142033193138443, "grad_norm": 0.62890625, "learning_rate": 0.00017336567133652668, "loss": 4.3013, "step": 6888 }, { "epoch": 0.7143070073683324, "grad_norm": 0.7734375, "learning_rate": 0.00017335828975457445, "loss": 4.3218, "step": 6889 }, { "epoch": 0.7144106954228204, "grad_norm": 0.6171875, "learning_rate": 0.0001733509073070719, "loss": 4.2486, "step": 6890 }, { "epoch": 0.7145143834773086, "grad_norm": 0.7890625, "learning_rate": 0.00017334352399410623, "loss": 4.285, "step": 6891 }, { "epoch": 0.7146180715317966, "grad_norm": 0.6875, "learning_rate": 0.0001733361398157645, "loss": 4.2822, "step": 6892 }, { "epoch": 0.7147217595862847, "grad_norm": 0.8203125, "learning_rate": 0.00017332875477213378, "loss": 4.3236, "step": 6893 }, { "epoch": 0.7148254476407727, "grad_norm": 0.6953125, "learning_rate": 0.00017332136886330136, "loss": 4.2788, "step": 6894 }, { "epoch": 0.7149291356952608, "grad_norm": 0.765625, "learning_rate": 0.0001733139820893542, "loss": 4.3271, "step": 6895 }, { "epoch": 0.7150328237497489, "grad_norm": 0.61328125, "learning_rate": 0.0001733065944503796, "loss": 4.2728, "step": 6896 }, { "epoch": 0.715136511804237, "grad_norm": 0.77734375, "learning_rate": 0.00017329920594646466, "loss": 4.3192, "step": 6897 }, { "epoch": 0.715240199858725, "grad_norm": 0.64453125, "learning_rate": 0.00017329181657769658, "loss": 4.2962, "step": 6898 }, { "epoch": 0.7153438879132131, "grad_norm": 0.85546875, "learning_rate": 0.00017328442634416253, "loss": 4.3042, "step": 6899 }, { "epoch": 0.7154475759677011, "grad_norm": 0.78515625, "learning_rate": 0.00017327703524594971, "loss": 4.3336, "step": 6900 }, { "epoch": 0.7155512640221893, "grad_norm": 0.734375, "learning_rate": 0.00017326964328314532, "loss": 4.2664, "step": 6901 }, { "epoch": 0.7156549520766773, "grad_norm": 0.8828125, "learning_rate": 0.0001732622504558366, "loss": 4.2763, "step": 6902 }, { "epoch": 0.7157586401311654, "grad_norm": 0.7421875, "learning_rate": 0.0001732548567641108, "loss": 4.2979, "step": 6903 }, { "epoch": 0.7158623281856534, "grad_norm": 0.83203125, "learning_rate": 0.00017324746220805508, "loss": 4.3024, "step": 6904 }, { "epoch": 0.7159660162401416, "grad_norm": 0.81640625, "learning_rate": 0.00017324006678775674, "loss": 4.3234, "step": 6905 }, { "epoch": 0.7160697042946296, "grad_norm": 0.83984375, "learning_rate": 0.00017323267050330302, "loss": 4.3004, "step": 6906 }, { "epoch": 0.7161733923491177, "grad_norm": 0.7265625, "learning_rate": 0.00017322527335478122, "loss": 4.3169, "step": 6907 }, { "epoch": 0.7162770804036057, "grad_norm": 0.86328125, "learning_rate": 0.00017321787534227862, "loss": 4.3255, "step": 6908 }, { "epoch": 0.7163807684580938, "grad_norm": 0.75, "learning_rate": 0.00017321047646588243, "loss": 4.3095, "step": 6909 }, { "epoch": 0.7164844565125819, "grad_norm": 0.8046875, "learning_rate": 0.00017320307672568004, "loss": 4.3014, "step": 6910 }, { "epoch": 0.71658814456707, "grad_norm": 0.84765625, "learning_rate": 0.00017319567612175872, "loss": 4.3117, "step": 6911 }, { "epoch": 0.716691832621558, "grad_norm": 0.75, "learning_rate": 0.00017318827465420577, "loss": 4.273, "step": 6912 }, { "epoch": 0.7167955206760461, "grad_norm": 0.90625, "learning_rate": 0.00017318087232310857, "loss": 4.2808, "step": 6913 }, { "epoch": 0.7168992087305341, "grad_norm": 0.6953125, "learning_rate": 0.00017317346912855443, "loss": 4.2904, "step": 6914 }, { "epoch": 0.7170028967850223, "grad_norm": 0.83203125, "learning_rate": 0.0001731660650706307, "loss": 4.3138, "step": 6915 }, { "epoch": 0.7171065848395103, "grad_norm": 0.76171875, "learning_rate": 0.00017315866014942473, "loss": 4.2877, "step": 6916 }, { "epoch": 0.7172102728939984, "grad_norm": 0.80078125, "learning_rate": 0.0001731512543650239, "loss": 4.3118, "step": 6917 }, { "epoch": 0.7173139609484864, "grad_norm": 0.70703125, "learning_rate": 0.00017314384771751563, "loss": 4.3041, "step": 6918 }, { "epoch": 0.7174176490029746, "grad_norm": 0.75, "learning_rate": 0.00017313644020698724, "loss": 4.282, "step": 6919 }, { "epoch": 0.7175213370574626, "grad_norm": 0.73828125, "learning_rate": 0.00017312903183352616, "loss": 4.266, "step": 6920 }, { "epoch": 0.7176250251119507, "grad_norm": 0.75, "learning_rate": 0.0001731216225972198, "loss": 4.2921, "step": 6921 }, { "epoch": 0.7177287131664387, "grad_norm": 0.75390625, "learning_rate": 0.0001731142124981556, "loss": 4.2855, "step": 6922 }, { "epoch": 0.7178324012209268, "grad_norm": 0.703125, "learning_rate": 0.00017310680153642097, "loss": 4.2506, "step": 6923 }, { "epoch": 0.717936089275415, "grad_norm": 0.73828125, "learning_rate": 0.00017309938971210337, "loss": 4.3639, "step": 6924 }, { "epoch": 0.718039777329903, "grad_norm": 0.69140625, "learning_rate": 0.00017309197702529026, "loss": 4.2782, "step": 6925 }, { "epoch": 0.7181434653843911, "grad_norm": 0.76171875, "learning_rate": 0.00017308456347606905, "loss": 4.332, "step": 6926 }, { "epoch": 0.7182471534388791, "grad_norm": 0.71484375, "learning_rate": 0.00017307714906452724, "loss": 4.3266, "step": 6927 }, { "epoch": 0.7183508414933673, "grad_norm": 0.71875, "learning_rate": 0.00017306973379075234, "loss": 4.3125, "step": 6928 }, { "epoch": 0.7184545295478553, "grad_norm": 0.74609375, "learning_rate": 0.0001730623176548318, "loss": 4.2936, "step": 6929 }, { "epoch": 0.7185582176023434, "grad_norm": 0.75390625, "learning_rate": 0.00017305490065685318, "loss": 4.3082, "step": 6930 }, { "epoch": 0.7186619056568314, "grad_norm": 0.7109375, "learning_rate": 0.00017304748279690392, "loss": 4.3166, "step": 6931 }, { "epoch": 0.7187655937113195, "grad_norm": 0.69140625, "learning_rate": 0.0001730400640750716, "loss": 4.3189, "step": 6932 }, { "epoch": 0.7188692817658076, "grad_norm": 0.73046875, "learning_rate": 0.00017303264449144369, "loss": 4.2606, "step": 6933 }, { "epoch": 0.7189729698202957, "grad_norm": 0.80859375, "learning_rate": 0.0001730252240461078, "loss": 4.28, "step": 6934 }, { "epoch": 0.7190766578747837, "grad_norm": 0.65625, "learning_rate": 0.00017301780273915144, "loss": 4.3002, "step": 6935 }, { "epoch": 0.7191803459292718, "grad_norm": 0.7109375, "learning_rate": 0.00017301038057066222, "loss": 4.3399, "step": 6936 }, { "epoch": 0.7192840339837598, "grad_norm": 0.72265625, "learning_rate": 0.00017300295754072768, "loss": 4.2772, "step": 6937 }, { "epoch": 0.719387722038248, "grad_norm": 0.70703125, "learning_rate": 0.0001729955336494354, "loss": 4.319, "step": 6938 }, { "epoch": 0.719491410092736, "grad_norm": 0.76171875, "learning_rate": 0.00017298810889687297, "loss": 4.3156, "step": 6939 }, { "epoch": 0.7195950981472241, "grad_norm": 0.81640625, "learning_rate": 0.00017298068328312804, "loss": 4.3124, "step": 6940 }, { "epoch": 0.7196987862017121, "grad_norm": 0.7734375, "learning_rate": 0.00017297325680828816, "loss": 4.323, "step": 6941 }, { "epoch": 0.7198024742562003, "grad_norm": 0.80859375, "learning_rate": 0.000172965829472441, "loss": 4.3011, "step": 6942 }, { "epoch": 0.7199061623106883, "grad_norm": 0.7734375, "learning_rate": 0.00017295840127567417, "loss": 4.2752, "step": 6943 }, { "epoch": 0.7200098503651764, "grad_norm": 0.7890625, "learning_rate": 0.00017295097221807534, "loss": 4.2404, "step": 6944 }, { "epoch": 0.7201135384196644, "grad_norm": 0.77734375, "learning_rate": 0.00017294354229973214, "loss": 4.2955, "step": 6945 }, { "epoch": 0.7202172264741525, "grad_norm": 0.79296875, "learning_rate": 0.00017293611152073224, "loss": 4.3117, "step": 6946 }, { "epoch": 0.7203209145286406, "grad_norm": 0.734375, "learning_rate": 0.00017292867988116334, "loss": 4.307, "step": 6947 }, { "epoch": 0.7204246025831287, "grad_norm": 0.74609375, "learning_rate": 0.00017292124738111308, "loss": 4.3032, "step": 6948 }, { "epoch": 0.7205282906376167, "grad_norm": 0.83984375, "learning_rate": 0.00017291381402066922, "loss": 4.306, "step": 6949 }, { "epoch": 0.7206319786921048, "grad_norm": 0.76171875, "learning_rate": 0.0001729063797999194, "loss": 4.2715, "step": 6950 }, { "epoch": 0.7207356667465928, "grad_norm": 0.80078125, "learning_rate": 0.0001728989447189514, "loss": 4.3337, "step": 6951 }, { "epoch": 0.720839354801081, "grad_norm": 0.84375, "learning_rate": 0.00017289150877785282, "loss": 4.2688, "step": 6952 }, { "epoch": 0.720943042855569, "grad_norm": 0.7265625, "learning_rate": 0.00017288407197671156, "loss": 4.3162, "step": 6953 }, { "epoch": 0.7210467309100571, "grad_norm": 0.77734375, "learning_rate": 0.00017287663431561528, "loss": 4.2812, "step": 6954 }, { "epoch": 0.7211504189645451, "grad_norm": 0.73046875, "learning_rate": 0.00017286919579465176, "loss": 4.3023, "step": 6955 }, { "epoch": 0.7212541070190333, "grad_norm": 0.734375, "learning_rate": 0.00017286175641390873, "loss": 4.2777, "step": 6956 }, { "epoch": 0.7213577950735213, "grad_norm": 0.7421875, "learning_rate": 0.000172854316173474, "loss": 4.3601, "step": 6957 }, { "epoch": 0.7214614831280094, "grad_norm": 0.828125, "learning_rate": 0.0001728468750734354, "loss": 4.3323, "step": 6958 }, { "epoch": 0.7215651711824974, "grad_norm": 0.66796875, "learning_rate": 0.0001728394331138806, "loss": 4.2778, "step": 6959 }, { "epoch": 0.7216688592369855, "grad_norm": 0.8046875, "learning_rate": 0.00017283199029489752, "loss": 4.3155, "step": 6960 }, { "epoch": 0.7217725472914736, "grad_norm": 0.67578125, "learning_rate": 0.00017282454661657391, "loss": 4.2807, "step": 6961 }, { "epoch": 0.7218762353459617, "grad_norm": 0.78515625, "learning_rate": 0.00017281710207899765, "loss": 4.3196, "step": 6962 }, { "epoch": 0.7219799234004497, "grad_norm": 0.72265625, "learning_rate": 0.0001728096566822566, "loss": 4.3321, "step": 6963 }, { "epoch": 0.7220836114549378, "grad_norm": 0.71875, "learning_rate": 0.0001728022104264385, "loss": 4.2515, "step": 6964 }, { "epoch": 0.7221872995094258, "grad_norm": 0.81640625, "learning_rate": 0.0001727947633116313, "loss": 4.2605, "step": 6965 }, { "epoch": 0.722290987563914, "grad_norm": 0.70703125, "learning_rate": 0.00017278731533792283, "loss": 4.3116, "step": 6966 }, { "epoch": 0.722394675618402, "grad_norm": 0.953125, "learning_rate": 0.000172779866505401, "loss": 4.3079, "step": 6967 }, { "epoch": 0.7224983636728901, "grad_norm": 0.8515625, "learning_rate": 0.00017277241681415366, "loss": 4.2762, "step": 6968 }, { "epoch": 0.7226020517273782, "grad_norm": 0.765625, "learning_rate": 0.00017276496626426874, "loss": 4.2775, "step": 6969 }, { "epoch": 0.7227057397818663, "grad_norm": 0.82421875, "learning_rate": 0.0001727575148558341, "loss": 4.3017, "step": 6970 }, { "epoch": 0.7228094278363544, "grad_norm": 0.75390625, "learning_rate": 0.00017275006258893775, "loss": 4.2978, "step": 6971 }, { "epoch": 0.7229131158908424, "grad_norm": 0.79296875, "learning_rate": 0.0001727426094636675, "loss": 4.2782, "step": 6972 }, { "epoch": 0.7230168039453305, "grad_norm": 0.78125, "learning_rate": 0.00017273515548011138, "loss": 4.2956, "step": 6973 }, { "epoch": 0.7231204919998185, "grad_norm": 0.77734375, "learning_rate": 0.00017272770063835732, "loss": 4.2611, "step": 6974 }, { "epoch": 0.7232241800543067, "grad_norm": 0.71484375, "learning_rate": 0.00017272024493849325, "loss": 4.2621, "step": 6975 }, { "epoch": 0.7233278681087947, "grad_norm": 0.8125, "learning_rate": 0.00017271278838060719, "loss": 4.3214, "step": 6976 }, { "epoch": 0.7234315561632828, "grad_norm": 0.73828125, "learning_rate": 0.00017270533096478704, "loss": 4.3202, "step": 6977 }, { "epoch": 0.7235352442177708, "grad_norm": 0.7734375, "learning_rate": 0.00017269787269112084, "loss": 4.2535, "step": 6978 }, { "epoch": 0.723638932272259, "grad_norm": 0.78515625, "learning_rate": 0.0001726904135596966, "loss": 4.2887, "step": 6979 }, { "epoch": 0.723742620326747, "grad_norm": 0.74609375, "learning_rate": 0.00017268295357060235, "loss": 4.3077, "step": 6980 }, { "epoch": 0.7238463083812351, "grad_norm": 0.85546875, "learning_rate": 0.00017267549272392607, "loss": 4.289, "step": 6981 }, { "epoch": 0.7239499964357231, "grad_norm": 0.70703125, "learning_rate": 0.00017266803101975576, "loss": 4.2946, "step": 6982 }, { "epoch": 0.7240536844902112, "grad_norm": 0.9453125, "learning_rate": 0.0001726605684581795, "loss": 4.262, "step": 6983 }, { "epoch": 0.7241573725446993, "grad_norm": 0.625, "learning_rate": 0.0001726531050392854, "loss": 4.3065, "step": 6984 }, { "epoch": 0.7242610605991874, "grad_norm": 0.8671875, "learning_rate": 0.00017264564076316136, "loss": 4.2718, "step": 6985 }, { "epoch": 0.7243647486536754, "grad_norm": 0.71484375, "learning_rate": 0.00017263817562989563, "loss": 4.326, "step": 6986 }, { "epoch": 0.7244684367081635, "grad_norm": 0.8125, "learning_rate": 0.0001726307096395762, "loss": 4.2844, "step": 6987 }, { "epoch": 0.7245721247626515, "grad_norm": 0.8203125, "learning_rate": 0.00017262324279229113, "loss": 4.3137, "step": 6988 }, { "epoch": 0.7246758128171397, "grad_norm": 0.75390625, "learning_rate": 0.0001726157750881286, "loss": 4.2779, "step": 6989 }, { "epoch": 0.7247795008716277, "grad_norm": 0.796875, "learning_rate": 0.00017260830652717665, "loss": 4.364, "step": 6990 }, { "epoch": 0.7248831889261158, "grad_norm": 0.70703125, "learning_rate": 0.00017260083710952343, "loss": 4.298, "step": 6991 }, { "epoch": 0.7249868769806038, "grad_norm": 0.84375, "learning_rate": 0.0001725933668352571, "loss": 4.3133, "step": 6992 }, { "epoch": 0.725090565035092, "grad_norm": 0.72265625, "learning_rate": 0.00017258589570446576, "loss": 4.3327, "step": 6993 }, { "epoch": 0.72519425308958, "grad_norm": 0.8046875, "learning_rate": 0.0001725784237172376, "loss": 4.3136, "step": 6994 }, { "epoch": 0.7252979411440681, "grad_norm": 0.7734375, "learning_rate": 0.00017257095087366074, "loss": 4.3083, "step": 6995 }, { "epoch": 0.7254016291985561, "grad_norm": 0.80859375, "learning_rate": 0.00017256347717382338, "loss": 4.2735, "step": 6996 }, { "epoch": 0.7255053172530442, "grad_norm": 0.71484375, "learning_rate": 0.0001725560026178137, "loss": 4.3301, "step": 6997 }, { "epoch": 0.7256090053075323, "grad_norm": 0.8125, "learning_rate": 0.00017254852720571982, "loss": 4.2935, "step": 6998 }, { "epoch": 0.7257126933620204, "grad_norm": 0.734375, "learning_rate": 0.00017254105093763008, "loss": 4.2975, "step": 6999 }, { "epoch": 0.7258163814165084, "grad_norm": 0.828125, "learning_rate": 0.0001725335738136326, "loss": 4.2722, "step": 7000 }, { "epoch": 0.7259200694709965, "grad_norm": 0.70703125, "learning_rate": 0.0001725260958338156, "loss": 4.297, "step": 7001 }, { "epoch": 0.7260237575254845, "grad_norm": 0.88671875, "learning_rate": 0.00017251861699826737, "loss": 4.2929, "step": 7002 }, { "epoch": 0.7261274455799727, "grad_norm": 0.63671875, "learning_rate": 0.00017251113730707608, "loss": 4.3054, "step": 7003 }, { "epoch": 0.7262311336344607, "grad_norm": 0.8359375, "learning_rate": 0.00017250365676033002, "loss": 4.2856, "step": 7004 }, { "epoch": 0.7263348216889488, "grad_norm": 0.84375, "learning_rate": 0.0001724961753581175, "loss": 4.2623, "step": 7005 }, { "epoch": 0.7264385097434368, "grad_norm": 0.7890625, "learning_rate": 0.0001724886931005267, "loss": 4.3196, "step": 7006 }, { "epoch": 0.726542197797925, "grad_norm": 0.796875, "learning_rate": 0.00017248120998764591, "loss": 4.3348, "step": 7007 }, { "epoch": 0.726645885852413, "grad_norm": 0.77734375, "learning_rate": 0.0001724737260195635, "loss": 4.2863, "step": 7008 }, { "epoch": 0.7267495739069011, "grad_norm": 0.8203125, "learning_rate": 0.00017246624119636773, "loss": 4.2792, "step": 7009 }, { "epoch": 0.7268532619613891, "grad_norm": 0.71875, "learning_rate": 0.00017245875551814689, "loss": 4.3015, "step": 7010 }, { "epoch": 0.7269569500158772, "grad_norm": 0.80859375, "learning_rate": 0.00017245126898498934, "loss": 4.3235, "step": 7011 }, { "epoch": 0.7270606380703653, "grad_norm": 0.74609375, "learning_rate": 0.00017244378159698344, "loss": 4.2926, "step": 7012 }, { "epoch": 0.7271643261248534, "grad_norm": 0.81640625, "learning_rate": 0.00017243629335421748, "loss": 4.2849, "step": 7013 }, { "epoch": 0.7272680141793415, "grad_norm": 0.83984375, "learning_rate": 0.0001724288042567798, "loss": 4.2794, "step": 7014 }, { "epoch": 0.7273717022338295, "grad_norm": 0.86328125, "learning_rate": 0.00017242131430475878, "loss": 4.2771, "step": 7015 }, { "epoch": 0.7274753902883176, "grad_norm": 0.80859375, "learning_rate": 0.00017241382349824283, "loss": 4.3026, "step": 7016 }, { "epoch": 0.7275790783428057, "grad_norm": 0.765625, "learning_rate": 0.00017240633183732032, "loss": 4.3207, "step": 7017 }, { "epoch": 0.7276827663972938, "grad_norm": 0.7578125, "learning_rate": 0.0001723988393220796, "loss": 4.3208, "step": 7018 }, { "epoch": 0.7277864544517818, "grad_norm": 0.6953125, "learning_rate": 0.0001723913459526091, "loss": 4.3088, "step": 7019 }, { "epoch": 0.7278901425062699, "grad_norm": 0.73828125, "learning_rate": 0.00017238385172899727, "loss": 4.257, "step": 7020 }, { "epoch": 0.727993830560758, "grad_norm": 0.72265625, "learning_rate": 0.00017237635665133248, "loss": 4.2831, "step": 7021 }, { "epoch": 0.7280975186152461, "grad_norm": 0.68359375, "learning_rate": 0.0001723688607197032, "loss": 4.2535, "step": 7022 }, { "epoch": 0.7282012066697341, "grad_norm": 0.76953125, "learning_rate": 0.00017236136393419783, "loss": 4.3271, "step": 7023 }, { "epoch": 0.7283048947242222, "grad_norm": 0.671875, "learning_rate": 0.0001723538662949049, "loss": 4.2639, "step": 7024 }, { "epoch": 0.7284085827787102, "grad_norm": 0.7734375, "learning_rate": 0.00017234636780191282, "loss": 4.2348, "step": 7025 }, { "epoch": 0.7285122708331984, "grad_norm": 0.71875, "learning_rate": 0.00017233886845531004, "loss": 4.3681, "step": 7026 }, { "epoch": 0.7286159588876864, "grad_norm": 0.7734375, "learning_rate": 0.00017233136825518512, "loss": 4.2736, "step": 7027 }, { "epoch": 0.7287196469421745, "grad_norm": 0.73828125, "learning_rate": 0.00017232386720162648, "loss": 4.3037, "step": 7028 }, { "epoch": 0.7288233349966625, "grad_norm": 0.78125, "learning_rate": 0.00017231636529472266, "loss": 4.2867, "step": 7029 }, { "epoch": 0.7289270230511506, "grad_norm": 0.765625, "learning_rate": 0.00017230886253456217, "loss": 4.2942, "step": 7030 }, { "epoch": 0.7290307111056387, "grad_norm": 0.7890625, "learning_rate": 0.00017230135892123358, "loss": 4.3403, "step": 7031 }, { "epoch": 0.7291343991601268, "grad_norm": 0.93359375, "learning_rate": 0.00017229385445482532, "loss": 4.3001, "step": 7032 }, { "epoch": 0.7292380872146148, "grad_norm": 0.7109375, "learning_rate": 0.00017228634913542604, "loss": 4.3063, "step": 7033 }, { "epoch": 0.7293417752691029, "grad_norm": 0.79296875, "learning_rate": 0.0001722788429631242, "loss": 4.3232, "step": 7034 }, { "epoch": 0.729445463323591, "grad_norm": 0.7578125, "learning_rate": 0.00017227133593800847, "loss": 4.2879, "step": 7035 }, { "epoch": 0.7295491513780791, "grad_norm": 0.76171875, "learning_rate": 0.00017226382806016734, "loss": 4.3101, "step": 7036 }, { "epoch": 0.7296528394325671, "grad_norm": 0.7578125, "learning_rate": 0.00017225631932968945, "loss": 4.2876, "step": 7037 }, { "epoch": 0.7297565274870552, "grad_norm": 0.78515625, "learning_rate": 0.00017224880974666337, "loss": 4.2636, "step": 7038 }, { "epoch": 0.7298602155415432, "grad_norm": 0.78515625, "learning_rate": 0.00017224129931117768, "loss": 4.3189, "step": 7039 }, { "epoch": 0.7299639035960314, "grad_norm": 0.78515625, "learning_rate": 0.00017223378802332098, "loss": 4.3109, "step": 7040 }, { "epoch": 0.7300675916505194, "grad_norm": 0.78515625, "learning_rate": 0.000172226275883182, "loss": 4.2895, "step": 7041 }, { "epoch": 0.7301712797050075, "grad_norm": 0.8046875, "learning_rate": 0.0001722187628908493, "loss": 4.2554, "step": 7042 }, { "epoch": 0.7302749677594955, "grad_norm": 0.8671875, "learning_rate": 0.00017221124904641153, "loss": 4.3007, "step": 7043 }, { "epoch": 0.7303786558139836, "grad_norm": 0.7578125, "learning_rate": 0.00017220373434995737, "loss": 4.2808, "step": 7044 }, { "epoch": 0.7304823438684717, "grad_norm": 0.796875, "learning_rate": 0.00017219621880157544, "loss": 4.275, "step": 7045 }, { "epoch": 0.7305860319229598, "grad_norm": 0.9140625, "learning_rate": 0.00017218870240135446, "loss": 4.3002, "step": 7046 }, { "epoch": 0.7306897199774478, "grad_norm": 0.703125, "learning_rate": 0.00017218118514938309, "loss": 4.3309, "step": 7047 }, { "epoch": 0.7307934080319359, "grad_norm": 0.8046875, "learning_rate": 0.00017217366704575, "loss": 4.3054, "step": 7048 }, { "epoch": 0.730897096086424, "grad_norm": 0.734375, "learning_rate": 0.00017216614809054402, "loss": 4.311, "step": 7049 }, { "epoch": 0.7310007841409121, "grad_norm": 0.69140625, "learning_rate": 0.0001721586282838537, "loss": 4.2963, "step": 7050 }, { "epoch": 0.7311044721954001, "grad_norm": 0.765625, "learning_rate": 0.00017215110762576786, "loss": 4.289, "step": 7051 }, { "epoch": 0.7312081602498882, "grad_norm": 0.6484375, "learning_rate": 0.00017214358611637522, "loss": 4.3274, "step": 7052 }, { "epoch": 0.7313118483043762, "grad_norm": 0.75, "learning_rate": 0.0001721360637557645, "loss": 4.2937, "step": 7053 }, { "epoch": 0.7314155363588644, "grad_norm": 0.7734375, "learning_rate": 0.00017212854054402451, "loss": 4.2802, "step": 7054 }, { "epoch": 0.7315192244133524, "grad_norm": 0.71875, "learning_rate": 0.00017212101648124398, "loss": 4.301, "step": 7055 }, { "epoch": 0.7316229124678405, "grad_norm": 0.72265625, "learning_rate": 0.00017211349156751168, "loss": 4.2797, "step": 7056 }, { "epoch": 0.7317266005223286, "grad_norm": 0.6953125, "learning_rate": 0.00017210596580291644, "loss": 4.3053, "step": 7057 }, { "epoch": 0.7318302885768166, "grad_norm": 0.7578125, "learning_rate": 0.00017209843918754698, "loss": 4.2942, "step": 7058 }, { "epoch": 0.7319339766313048, "grad_norm": 0.6875, "learning_rate": 0.00017209091172149215, "loss": 4.3132, "step": 7059 }, { "epoch": 0.7320376646857928, "grad_norm": 0.703125, "learning_rate": 0.0001720833834048408, "loss": 4.2949, "step": 7060 }, { "epoch": 0.7321413527402809, "grad_norm": 0.76171875, "learning_rate": 0.0001720758542376817, "loss": 4.3509, "step": 7061 }, { "epoch": 0.7322450407947689, "grad_norm": 0.6640625, "learning_rate": 0.0001720683242201037, "loss": 4.3095, "step": 7062 }, { "epoch": 0.7323487288492571, "grad_norm": 0.765625, "learning_rate": 0.00017206079335219568, "loss": 4.2731, "step": 7063 }, { "epoch": 0.7324524169037451, "grad_norm": 0.6953125, "learning_rate": 0.00017205326163404646, "loss": 4.3128, "step": 7064 }, { "epoch": 0.7325561049582332, "grad_norm": 0.7734375, "learning_rate": 0.0001720457290657449, "loss": 4.3371, "step": 7065 }, { "epoch": 0.7326597930127212, "grad_norm": 0.74609375, "learning_rate": 0.0001720381956473799, "loss": 4.3332, "step": 7066 }, { "epoch": 0.7327634810672093, "grad_norm": 0.74609375, "learning_rate": 0.00017203066137904034, "loss": 4.2822, "step": 7067 }, { "epoch": 0.7328671691216974, "grad_norm": 0.734375, "learning_rate": 0.0001720231262608151, "loss": 4.305, "step": 7068 }, { "epoch": 0.7329708571761855, "grad_norm": 0.74609375, "learning_rate": 0.00017201559029279313, "loss": 4.3058, "step": 7069 }, { "epoch": 0.7330745452306735, "grad_norm": 0.73046875, "learning_rate": 0.0001720080534750633, "loss": 4.3136, "step": 7070 }, { "epoch": 0.7331782332851616, "grad_norm": 0.7109375, "learning_rate": 0.00017200051580771458, "loss": 4.2996, "step": 7071 }, { "epoch": 0.7332819213396496, "grad_norm": 0.75390625, "learning_rate": 0.00017199297729083584, "loss": 4.2357, "step": 7072 }, { "epoch": 0.7333856093941378, "grad_norm": 0.71484375, "learning_rate": 0.00017198543792451608, "loss": 4.2651, "step": 7073 }, { "epoch": 0.7334892974486258, "grad_norm": 0.828125, "learning_rate": 0.00017197789770884424, "loss": 4.2955, "step": 7074 }, { "epoch": 0.7335929855031139, "grad_norm": 0.76953125, "learning_rate": 0.00017197035664390936, "loss": 4.308, "step": 7075 }, { "epoch": 0.7336966735576019, "grad_norm": 0.78515625, "learning_rate": 0.00017196281472980026, "loss": 4.2533, "step": 7076 }, { "epoch": 0.7338003616120901, "grad_norm": 0.7265625, "learning_rate": 0.00017195527196660603, "loss": 4.3313, "step": 7077 }, { "epoch": 0.7339040496665781, "grad_norm": 0.80078125, "learning_rate": 0.0001719477283544157, "loss": 4.2987, "step": 7078 }, { "epoch": 0.7340077377210662, "grad_norm": 0.75390625, "learning_rate": 0.00017194018389331817, "loss": 4.3059, "step": 7079 }, { "epoch": 0.7341114257755542, "grad_norm": 0.8671875, "learning_rate": 0.00017193263858340254, "loss": 4.2652, "step": 7080 }, { "epoch": 0.7342151138300423, "grad_norm": 0.66796875, "learning_rate": 0.00017192509242475783, "loss": 4.3037, "step": 7081 }, { "epoch": 0.7343188018845304, "grad_norm": 0.796875, "learning_rate": 0.00017191754541747301, "loss": 4.2889, "step": 7082 }, { "epoch": 0.7344224899390185, "grad_norm": 0.7265625, "learning_rate": 0.00017190999756163723, "loss": 4.3051, "step": 7083 }, { "epoch": 0.7345261779935065, "grad_norm": 0.765625, "learning_rate": 0.00017190244885733946, "loss": 4.3293, "step": 7084 }, { "epoch": 0.7346298660479946, "grad_norm": 0.796875, "learning_rate": 0.00017189489930466878, "loss": 4.3515, "step": 7085 }, { "epoch": 0.7347335541024826, "grad_norm": 0.75, "learning_rate": 0.0001718873489037143, "loss": 4.309, "step": 7086 }, { "epoch": 0.7348372421569708, "grad_norm": 0.859375, "learning_rate": 0.00017187979765456512, "loss": 4.2999, "step": 7087 }, { "epoch": 0.7349409302114588, "grad_norm": 0.71875, "learning_rate": 0.0001718722455573103, "loss": 4.2773, "step": 7088 }, { "epoch": 0.7350446182659469, "grad_norm": 0.73828125, "learning_rate": 0.0001718646926120389, "loss": 4.3381, "step": 7089 }, { "epoch": 0.7351483063204349, "grad_norm": 0.73828125, "learning_rate": 0.00017185713881884014, "loss": 4.2982, "step": 7090 }, { "epoch": 0.7352519943749231, "grad_norm": 0.74609375, "learning_rate": 0.00017184958417780307, "loss": 4.272, "step": 7091 }, { "epoch": 0.7353556824294111, "grad_norm": 0.7265625, "learning_rate": 0.00017184202868901691, "loss": 4.3178, "step": 7092 }, { "epoch": 0.7354593704838992, "grad_norm": 0.828125, "learning_rate": 0.0001718344723525707, "loss": 4.3005, "step": 7093 }, { "epoch": 0.7355630585383872, "grad_norm": 0.6953125, "learning_rate": 0.00017182691516855368, "loss": 4.3045, "step": 7094 }, { "epoch": 0.7356667465928753, "grad_norm": 0.72265625, "learning_rate": 0.00017181935713705496, "loss": 4.3003, "step": 7095 }, { "epoch": 0.7357704346473634, "grad_norm": 0.68359375, "learning_rate": 0.00017181179825816374, "loss": 4.3082, "step": 7096 }, { "epoch": 0.7358741227018515, "grad_norm": 0.65625, "learning_rate": 0.00017180423853196923, "loss": 4.3297, "step": 7097 }, { "epoch": 0.7359778107563395, "grad_norm": 0.68359375, "learning_rate": 0.0001717966779585606, "loss": 4.2416, "step": 7098 }, { "epoch": 0.7360814988108276, "grad_norm": 0.71484375, "learning_rate": 0.00017178911653802705, "loss": 4.2868, "step": 7099 }, { "epoch": 0.7361851868653156, "grad_norm": 0.66015625, "learning_rate": 0.00017178155427045782, "loss": 4.3129, "step": 7100 }, { "epoch": 0.7362888749198038, "grad_norm": 0.73828125, "learning_rate": 0.00017177399115594213, "loss": 4.3162, "step": 7101 }, { "epoch": 0.7363925629742919, "grad_norm": 0.6953125, "learning_rate": 0.0001717664271945692, "loss": 4.2818, "step": 7102 }, { "epoch": 0.7364962510287799, "grad_norm": 0.81640625, "learning_rate": 0.00017175886238642832, "loss": 4.3238, "step": 7103 }, { "epoch": 0.736599939083268, "grad_norm": 0.703125, "learning_rate": 0.00017175129673160865, "loss": 4.2752, "step": 7104 }, { "epoch": 0.7367036271377561, "grad_norm": 0.76171875, "learning_rate": 0.00017174373023019958, "loss": 4.3085, "step": 7105 }, { "epoch": 0.7368073151922442, "grad_norm": 0.78515625, "learning_rate": 0.0001717361628822903, "loss": 4.2717, "step": 7106 }, { "epoch": 0.7369110032467322, "grad_norm": 0.6953125, "learning_rate": 0.00017172859468797015, "loss": 4.3096, "step": 7107 }, { "epoch": 0.7370146913012203, "grad_norm": 0.8671875, "learning_rate": 0.0001717210256473284, "loss": 4.3106, "step": 7108 }, { "epoch": 0.7371183793557083, "grad_norm": 0.703125, "learning_rate": 0.00017171345576045437, "loss": 4.3068, "step": 7109 }, { "epoch": 0.7372220674101965, "grad_norm": 0.890625, "learning_rate": 0.00017170588502743735, "loss": 4.2665, "step": 7110 }, { "epoch": 0.7373257554646845, "grad_norm": 0.69921875, "learning_rate": 0.00017169831344836668, "loss": 4.3333, "step": 7111 }, { "epoch": 0.7374294435191726, "grad_norm": 0.79296875, "learning_rate": 0.0001716907410233317, "loss": 4.2849, "step": 7112 }, { "epoch": 0.7375331315736606, "grad_norm": 0.71875, "learning_rate": 0.00017168316775242174, "loss": 4.2733, "step": 7113 }, { "epoch": 0.7376368196281488, "grad_norm": 0.74609375, "learning_rate": 0.0001716755936357262, "loss": 4.2969, "step": 7114 }, { "epoch": 0.7377405076826368, "grad_norm": 0.8125, "learning_rate": 0.00017166801867333443, "loss": 4.3362, "step": 7115 }, { "epoch": 0.7378441957371249, "grad_norm": 0.75390625, "learning_rate": 0.00017166044286533576, "loss": 4.2884, "step": 7116 }, { "epoch": 0.7379478837916129, "grad_norm": 0.75, "learning_rate": 0.00017165286621181961, "loss": 4.2443, "step": 7117 }, { "epoch": 0.738051571846101, "grad_norm": 0.734375, "learning_rate": 0.0001716452887128754, "loss": 4.3151, "step": 7118 }, { "epoch": 0.7381552599005891, "grad_norm": 0.671875, "learning_rate": 0.00017163771036859252, "loss": 4.3142, "step": 7119 }, { "epoch": 0.7382589479550772, "grad_norm": 0.7109375, "learning_rate": 0.0001716301311790604, "loss": 4.2677, "step": 7120 }, { "epoch": 0.7383626360095652, "grad_norm": 0.66796875, "learning_rate": 0.00017162255114436842, "loss": 4.2915, "step": 7121 }, { "epoch": 0.7384663240640533, "grad_norm": 0.6953125, "learning_rate": 0.00017161497026460605, "loss": 4.2741, "step": 7122 }, { "epoch": 0.7385700121185413, "grad_norm": 0.66796875, "learning_rate": 0.00017160738853986272, "loss": 4.2836, "step": 7123 }, { "epoch": 0.7386737001730295, "grad_norm": 0.6640625, "learning_rate": 0.0001715998059702279, "loss": 4.2986, "step": 7124 }, { "epoch": 0.7387773882275175, "grad_norm": 0.68359375, "learning_rate": 0.00017159222255579105, "loss": 4.3167, "step": 7125 }, { "epoch": 0.7388810762820056, "grad_norm": 0.6953125, "learning_rate": 0.00017158463829664169, "loss": 4.3071, "step": 7126 }, { "epoch": 0.7389847643364936, "grad_norm": 0.66015625, "learning_rate": 0.00017157705319286923, "loss": 4.2997, "step": 7127 }, { "epoch": 0.7390884523909818, "grad_norm": 0.734375, "learning_rate": 0.00017156946724456321, "loss": 4.3084, "step": 7128 }, { "epoch": 0.7391921404454698, "grad_norm": 0.65625, "learning_rate": 0.00017156188045181313, "loss": 4.3035, "step": 7129 }, { "epoch": 0.7392958284999579, "grad_norm": 0.74609375, "learning_rate": 0.00017155429281470852, "loss": 4.3053, "step": 7130 }, { "epoch": 0.7393995165544459, "grad_norm": 0.625, "learning_rate": 0.00017154670433333887, "loss": 4.2988, "step": 7131 }, { "epoch": 0.739503204608934, "grad_norm": 0.69921875, "learning_rate": 0.00017153911500779377, "loss": 4.2948, "step": 7132 }, { "epoch": 0.739606892663422, "grad_norm": 0.7109375, "learning_rate": 0.00017153152483816267, "loss": 4.2857, "step": 7133 }, { "epoch": 0.7397105807179102, "grad_norm": 0.73828125, "learning_rate": 0.00017152393382453523, "loss": 4.2967, "step": 7134 }, { "epoch": 0.7398142687723982, "grad_norm": 0.78515625, "learning_rate": 0.00017151634196700097, "loss": 4.3033, "step": 7135 }, { "epoch": 0.7399179568268863, "grad_norm": 0.70703125, "learning_rate": 0.00017150874926564948, "loss": 4.2786, "step": 7136 }, { "epoch": 0.7400216448813743, "grad_norm": 0.73828125, "learning_rate": 0.00017150115572057032, "loss": 4.2795, "step": 7137 }, { "epoch": 0.7401253329358625, "grad_norm": 0.74609375, "learning_rate": 0.00017149356133185312, "loss": 4.2819, "step": 7138 }, { "epoch": 0.7402290209903505, "grad_norm": 0.796875, "learning_rate": 0.00017148596609958746, "loss": 4.2671, "step": 7139 }, { "epoch": 0.7403327090448386, "grad_norm": 0.7109375, "learning_rate": 0.00017147837002386295, "loss": 4.2576, "step": 7140 }, { "epoch": 0.7404363970993266, "grad_norm": 0.8125, "learning_rate": 0.00017147077310476923, "loss": 4.2497, "step": 7141 }, { "epoch": 0.7405400851538148, "grad_norm": 0.7265625, "learning_rate": 0.00017146317534239597, "loss": 4.3075, "step": 7142 }, { "epoch": 0.7406437732083028, "grad_norm": 0.75390625, "learning_rate": 0.00017145557673683273, "loss": 4.3005, "step": 7143 }, { "epoch": 0.7407474612627909, "grad_norm": 0.7421875, "learning_rate": 0.00017144797728816928, "loss": 4.3209, "step": 7144 }, { "epoch": 0.7408511493172789, "grad_norm": 0.80859375, "learning_rate": 0.0001714403769964952, "loss": 4.284, "step": 7145 }, { "epoch": 0.740954837371767, "grad_norm": 0.6328125, "learning_rate": 0.00017143277586190015, "loss": 4.3253, "step": 7146 }, { "epoch": 0.7410585254262552, "grad_norm": 0.7890625, "learning_rate": 0.00017142517388447388, "loss": 4.3014, "step": 7147 }, { "epoch": 0.7411622134807432, "grad_norm": 0.67578125, "learning_rate": 0.00017141757106430605, "loss": 4.2871, "step": 7148 }, { "epoch": 0.7412659015352313, "grad_norm": 0.7421875, "learning_rate": 0.00017140996740148636, "loss": 4.2668, "step": 7149 }, { "epoch": 0.7413695895897193, "grad_norm": 0.65625, "learning_rate": 0.00017140236289610457, "loss": 4.3317, "step": 7150 }, { "epoch": 0.7414732776442075, "grad_norm": 0.7734375, "learning_rate": 0.00017139475754825037, "loss": 4.279, "step": 7151 }, { "epoch": 0.7415769656986955, "grad_norm": 0.67578125, "learning_rate": 0.00017138715135801347, "loss": 4.2713, "step": 7152 }, { "epoch": 0.7416806537531836, "grad_norm": 0.796875, "learning_rate": 0.00017137954432548365, "loss": 4.271, "step": 7153 }, { "epoch": 0.7417843418076716, "grad_norm": 0.72265625, "learning_rate": 0.00017137193645075068, "loss": 4.3096, "step": 7154 }, { "epoch": 0.7418880298621597, "grad_norm": 0.765625, "learning_rate": 0.00017136432773390427, "loss": 4.3234, "step": 7155 }, { "epoch": 0.7419917179166478, "grad_norm": 0.796875, "learning_rate": 0.00017135671817503426, "loss": 4.3376, "step": 7156 }, { "epoch": 0.7420954059711359, "grad_norm": 0.76171875, "learning_rate": 0.0001713491077742304, "loss": 4.2886, "step": 7157 }, { "epoch": 0.7421990940256239, "grad_norm": 0.77734375, "learning_rate": 0.0001713414965315825, "loss": 4.299, "step": 7158 }, { "epoch": 0.742302782080112, "grad_norm": 0.7421875, "learning_rate": 0.0001713338844471803, "loss": 4.2673, "step": 7159 }, { "epoch": 0.7424064701346, "grad_norm": 0.75390625, "learning_rate": 0.00017132627152111372, "loss": 4.297, "step": 7160 }, { "epoch": 0.7425101581890882, "grad_norm": 0.71875, "learning_rate": 0.00017131865775347249, "loss": 4.2811, "step": 7161 }, { "epoch": 0.7426138462435762, "grad_norm": 0.75, "learning_rate": 0.00017131104314434652, "loss": 4.2929, "step": 7162 }, { "epoch": 0.7427175342980643, "grad_norm": 0.8046875, "learning_rate": 0.00017130342769382562, "loss": 4.3183, "step": 7163 }, { "epoch": 0.7428212223525523, "grad_norm": 0.6875, "learning_rate": 0.00017129581140199962, "loss": 4.2843, "step": 7164 }, { "epoch": 0.7429249104070405, "grad_norm": 0.76953125, "learning_rate": 0.00017128819426895841, "loss": 4.288, "step": 7165 }, { "epoch": 0.7430285984615285, "grad_norm": 0.71875, "learning_rate": 0.00017128057629479186, "loss": 4.2776, "step": 7166 }, { "epoch": 0.7431322865160166, "grad_norm": 0.72265625, "learning_rate": 0.0001712729574795899, "loss": 4.2689, "step": 7167 }, { "epoch": 0.7432359745705046, "grad_norm": 0.8203125, "learning_rate": 0.00017126533782344235, "loss": 4.3065, "step": 7168 }, { "epoch": 0.7433396626249927, "grad_norm": 0.69921875, "learning_rate": 0.00017125771732643915, "loss": 4.3338, "step": 7169 }, { "epoch": 0.7434433506794808, "grad_norm": 0.8125, "learning_rate": 0.00017125009598867018, "loss": 4.2874, "step": 7170 }, { "epoch": 0.7435470387339689, "grad_norm": 0.79296875, "learning_rate": 0.0001712424738102254, "loss": 4.2767, "step": 7171 }, { "epoch": 0.7436507267884569, "grad_norm": 0.80859375, "learning_rate": 0.00017123485079119477, "loss": 4.3092, "step": 7172 }, { "epoch": 0.743754414842945, "grad_norm": 0.73046875, "learning_rate": 0.00017122722693166815, "loss": 4.2933, "step": 7173 }, { "epoch": 0.743858102897433, "grad_norm": 0.80859375, "learning_rate": 0.00017121960223173558, "loss": 4.2672, "step": 7174 }, { "epoch": 0.7439617909519212, "grad_norm": 0.80859375, "learning_rate": 0.000171211976691487, "loss": 4.2863, "step": 7175 }, { "epoch": 0.7440654790064092, "grad_norm": 0.7734375, "learning_rate": 0.00017120435031101232, "loss": 4.2946, "step": 7176 }, { "epoch": 0.7441691670608973, "grad_norm": 0.78125, "learning_rate": 0.0001711967230904016, "loss": 4.2853, "step": 7177 }, { "epoch": 0.7442728551153853, "grad_norm": 0.74609375, "learning_rate": 0.00017118909502974482, "loss": 4.3053, "step": 7178 }, { "epoch": 0.7443765431698735, "grad_norm": 0.8828125, "learning_rate": 0.00017118146612913192, "loss": 4.251, "step": 7179 }, { "epoch": 0.7444802312243615, "grad_norm": 0.8046875, "learning_rate": 0.00017117383638865302, "loss": 4.2856, "step": 7180 }, { "epoch": 0.7445839192788496, "grad_norm": 0.8046875, "learning_rate": 0.00017116620580839804, "loss": 4.3232, "step": 7181 }, { "epoch": 0.7446876073333376, "grad_norm": 0.83984375, "learning_rate": 0.00017115857438845708, "loss": 4.2889, "step": 7182 }, { "epoch": 0.7447912953878257, "grad_norm": 0.72265625, "learning_rate": 0.00017115094212892017, "loss": 4.2871, "step": 7183 }, { "epoch": 0.7448949834423138, "grad_norm": 0.79296875, "learning_rate": 0.00017114330902987733, "loss": 4.2325, "step": 7184 }, { "epoch": 0.7449986714968019, "grad_norm": 0.80859375, "learning_rate": 0.00017113567509141863, "loss": 4.2844, "step": 7185 }, { "epoch": 0.7451023595512899, "grad_norm": 0.75, "learning_rate": 0.0001711280403136342, "loss": 4.2512, "step": 7186 }, { "epoch": 0.745206047605778, "grad_norm": 0.90625, "learning_rate": 0.00017112040469661409, "loss": 4.3283, "step": 7187 }, { "epoch": 0.745309735660266, "grad_norm": 0.703125, "learning_rate": 0.00017111276824044832, "loss": 4.2993, "step": 7188 }, { "epoch": 0.7454134237147542, "grad_norm": 0.9140625, "learning_rate": 0.00017110513094522708, "loss": 4.2461, "step": 7189 }, { "epoch": 0.7455171117692422, "grad_norm": 0.71484375, "learning_rate": 0.00017109749281104048, "loss": 4.3055, "step": 7190 }, { "epoch": 0.7456207998237303, "grad_norm": 0.890625, "learning_rate": 0.0001710898538379786, "loss": 4.2779, "step": 7191 }, { "epoch": 0.7457244878782184, "grad_norm": 0.80078125, "learning_rate": 0.0001710822140261316, "loss": 4.2817, "step": 7192 }, { "epoch": 0.7458281759327064, "grad_norm": 0.81640625, "learning_rate": 0.00017107457337558958, "loss": 4.3003, "step": 7193 }, { "epoch": 0.7459318639871946, "grad_norm": 0.78125, "learning_rate": 0.00017106693188644276, "loss": 4.2541, "step": 7194 }, { "epoch": 0.7460355520416826, "grad_norm": 0.75, "learning_rate": 0.00017105928955878127, "loss": 4.2842, "step": 7195 }, { "epoch": 0.7461392400961707, "grad_norm": 0.859375, "learning_rate": 0.00017105164639269526, "loss": 4.2931, "step": 7196 }, { "epoch": 0.7462429281506587, "grad_norm": 0.79296875, "learning_rate": 0.0001710440023882749, "loss": 4.3405, "step": 7197 }, { "epoch": 0.7463466162051469, "grad_norm": 0.890625, "learning_rate": 0.00017103635754561045, "loss": 4.3018, "step": 7198 }, { "epoch": 0.7464503042596349, "grad_norm": 0.80078125, "learning_rate": 0.00017102871186479206, "loss": 4.3131, "step": 7199 }, { "epoch": 0.746553992314123, "grad_norm": 0.77734375, "learning_rate": 0.00017102106534590993, "loss": 4.3459, "step": 7200 }, { "epoch": 0.746657680368611, "grad_norm": 0.734375, "learning_rate": 0.00017101341798905434, "loss": 4.3024, "step": 7201 }, { "epoch": 0.7467613684230991, "grad_norm": 0.75390625, "learning_rate": 0.00017100576979431543, "loss": 4.2866, "step": 7202 }, { "epoch": 0.7468650564775872, "grad_norm": 0.7421875, "learning_rate": 0.00017099812076178355, "loss": 4.295, "step": 7203 }, { "epoch": 0.7469687445320753, "grad_norm": 0.69921875, "learning_rate": 0.00017099047089154886, "loss": 4.2849, "step": 7204 }, { "epoch": 0.7470724325865633, "grad_norm": 0.8203125, "learning_rate": 0.00017098282018370163, "loss": 4.3026, "step": 7205 }, { "epoch": 0.7471761206410514, "grad_norm": 0.7109375, "learning_rate": 0.00017097516863833222, "loss": 4.2857, "step": 7206 }, { "epoch": 0.7472798086955394, "grad_norm": 0.84375, "learning_rate": 0.0001709675162555308, "loss": 4.2794, "step": 7207 }, { "epoch": 0.7473834967500276, "grad_norm": 0.76953125, "learning_rate": 0.0001709598630353877, "loss": 4.2753, "step": 7208 }, { "epoch": 0.7474871848045156, "grad_norm": 0.8046875, "learning_rate": 0.00017095220897799323, "loss": 4.2956, "step": 7209 }, { "epoch": 0.7475908728590037, "grad_norm": 0.875, "learning_rate": 0.00017094455408343768, "loss": 4.2803, "step": 7210 }, { "epoch": 0.7476945609134917, "grad_norm": 0.76953125, "learning_rate": 0.0001709368983518114, "loss": 4.2632, "step": 7211 }, { "epoch": 0.7477982489679799, "grad_norm": 0.7890625, "learning_rate": 0.0001709292417832047, "loss": 4.3143, "step": 7212 }, { "epoch": 0.7479019370224679, "grad_norm": 0.79296875, "learning_rate": 0.00017092158437770794, "loss": 4.2838, "step": 7213 }, { "epoch": 0.748005625076956, "grad_norm": 0.8046875, "learning_rate": 0.00017091392613541144, "loss": 4.284, "step": 7214 }, { "epoch": 0.748109313131444, "grad_norm": 0.73046875, "learning_rate": 0.00017090626705640557, "loss": 4.2753, "step": 7215 }, { "epoch": 0.7482130011859321, "grad_norm": 0.7421875, "learning_rate": 0.0001708986071407807, "loss": 4.3097, "step": 7216 }, { "epoch": 0.7483166892404202, "grad_norm": 0.703125, "learning_rate": 0.0001708909463886272, "loss": 4.2965, "step": 7217 }, { "epoch": 0.7484203772949083, "grad_norm": 0.8203125, "learning_rate": 0.00017088328480003545, "loss": 4.2942, "step": 7218 }, { "epoch": 0.7485240653493963, "grad_norm": 0.63671875, "learning_rate": 0.00017087562237509592, "loss": 4.3179, "step": 7219 }, { "epoch": 0.7486277534038844, "grad_norm": 0.8125, "learning_rate": 0.00017086795911389895, "loss": 4.289, "step": 7220 }, { "epoch": 0.7487314414583724, "grad_norm": 0.7265625, "learning_rate": 0.00017086029501653496, "loss": 4.2506, "step": 7221 }, { "epoch": 0.7488351295128606, "grad_norm": 0.9140625, "learning_rate": 0.00017085263008309438, "loss": 4.3148, "step": 7222 }, { "epoch": 0.7489388175673486, "grad_norm": 0.80859375, "learning_rate": 0.00017084496431366767, "loss": 4.2945, "step": 7223 }, { "epoch": 0.7490425056218367, "grad_norm": 0.921875, "learning_rate": 0.00017083729770834527, "loss": 4.3027, "step": 7224 }, { "epoch": 0.7491461936763247, "grad_norm": 0.87890625, "learning_rate": 0.00017082963026721762, "loss": 4.2845, "step": 7225 }, { "epoch": 0.7492498817308129, "grad_norm": 0.86328125, "learning_rate": 0.0001708219619903752, "loss": 4.3067, "step": 7226 }, { "epoch": 0.7493535697853009, "grad_norm": 0.921875, "learning_rate": 0.00017081429287790854, "loss": 4.3316, "step": 7227 }, { "epoch": 0.749457257839789, "grad_norm": 0.80859375, "learning_rate": 0.00017080662292990803, "loss": 4.309, "step": 7228 }, { "epoch": 0.749560945894277, "grad_norm": 0.9765625, "learning_rate": 0.0001707989521464642, "loss": 4.2783, "step": 7229 }, { "epoch": 0.7496646339487651, "grad_norm": 0.86328125, "learning_rate": 0.00017079128052766764, "loss": 4.2817, "step": 7230 }, { "epoch": 0.7497683220032532, "grad_norm": 0.82421875, "learning_rate": 0.00017078360807360875, "loss": 4.2768, "step": 7231 }, { "epoch": 0.7498720100577413, "grad_norm": 0.9453125, "learning_rate": 0.00017077593478437812, "loss": 4.2631, "step": 7232 }, { "epoch": 0.7499756981122293, "grad_norm": 0.80078125, "learning_rate": 0.00017076826066006627, "loss": 4.2689, "step": 7233 }, { "epoch": 0.7499756981122293, "eval_loss": 4.309548377990723, "eval_runtime": 0.441, "eval_samples_per_second": 337.873, "eval_steps_per_second": 15.873, "step": 7233 }, { "epoch": 0.7500793861667174, "grad_norm": 0.890625, "learning_rate": 0.00017076058570076374, "loss": 4.2999, "step": 7234 }, { "epoch": 0.7501830742212054, "grad_norm": 0.96484375, "learning_rate": 0.0001707529099065611, "loss": 4.2887, "step": 7235 }, { "epoch": 0.7502867622756936, "grad_norm": 0.84375, "learning_rate": 0.00017074523327754895, "loss": 4.2617, "step": 7236 }, { "epoch": 0.7503904503301817, "grad_norm": 0.9140625, "learning_rate": 0.00017073755581381778, "loss": 4.3132, "step": 7237 }, { "epoch": 0.7504941383846697, "grad_norm": 0.8671875, "learning_rate": 0.00017072987751545827, "loss": 4.2769, "step": 7238 }, { "epoch": 0.7505978264391578, "grad_norm": 0.8125, "learning_rate": 0.00017072219838256092, "loss": 4.3216, "step": 7239 }, { "epoch": 0.7507015144936459, "grad_norm": 0.97265625, "learning_rate": 0.0001707145184152164, "loss": 4.3209, "step": 7240 }, { "epoch": 0.750805202548134, "grad_norm": 0.81640625, "learning_rate": 0.00017070683761351532, "loss": 4.3313, "step": 7241 }, { "epoch": 0.750908890602622, "grad_norm": 0.79296875, "learning_rate": 0.00017069915597754827, "loss": 4.2942, "step": 7242 }, { "epoch": 0.7510125786571101, "grad_norm": 0.984375, "learning_rate": 0.00017069147350740594, "loss": 4.3034, "step": 7243 }, { "epoch": 0.7511162667115981, "grad_norm": 0.84375, "learning_rate": 0.00017068379020317894, "loss": 4.3169, "step": 7244 }, { "epoch": 0.7512199547660863, "grad_norm": 0.74609375, "learning_rate": 0.00017067610606495795, "loss": 4.3012, "step": 7245 }, { "epoch": 0.7513236428205743, "grad_norm": 0.9296875, "learning_rate": 0.00017066842109283358, "loss": 4.3494, "step": 7246 }, { "epoch": 0.7514273308750624, "grad_norm": 0.765625, "learning_rate": 0.00017066073528689658, "loss": 4.3039, "step": 7247 }, { "epoch": 0.7515310189295504, "grad_norm": 0.94921875, "learning_rate": 0.00017065304864723757, "loss": 4.3302, "step": 7248 }, { "epoch": 0.7516347069840386, "grad_norm": 0.9453125, "learning_rate": 0.00017064536117394728, "loss": 4.2775, "step": 7249 }, { "epoch": 0.7517383950385266, "grad_norm": 0.796875, "learning_rate": 0.0001706376728671164, "loss": 4.2653, "step": 7250 }, { "epoch": 0.7518420830930147, "grad_norm": 1.1015625, "learning_rate": 0.00017062998372683566, "loss": 4.3025, "step": 7251 }, { "epoch": 0.7519457711475027, "grad_norm": 0.73828125, "learning_rate": 0.00017062229375319573, "loss": 4.2539, "step": 7252 }, { "epoch": 0.7520494592019908, "grad_norm": 1.2578125, "learning_rate": 0.00017061460294628744, "loss": 4.2528, "step": 7253 }, { "epoch": 0.7521531472564789, "grad_norm": 0.8359375, "learning_rate": 0.00017060691130620144, "loss": 4.318, "step": 7254 }, { "epoch": 0.752256835310967, "grad_norm": 1.3125, "learning_rate": 0.00017059921883302853, "loss": 4.3094, "step": 7255 }, { "epoch": 0.752360523365455, "grad_norm": 1.0390625, "learning_rate": 0.0001705915255268595, "loss": 4.3801, "step": 7256 }, { "epoch": 0.7524642114199431, "grad_norm": 1.828125, "learning_rate": 0.00017058383138778504, "loss": 4.319, "step": 7257 }, { "epoch": 0.7525678994744311, "grad_norm": 1.6328125, "learning_rate": 0.00017057613641589597, "loss": 4.2533, "step": 7258 }, { "epoch": 0.7526715875289193, "grad_norm": 1.5, "learning_rate": 0.00017056844061128312, "loss": 4.2892, "step": 7259 }, { "epoch": 0.7527752755834073, "grad_norm": 1.484375, "learning_rate": 0.00017056074397403726, "loss": 4.2962, "step": 7260 }, { "epoch": 0.7528789636378954, "grad_norm": 1.203125, "learning_rate": 0.00017055304650424923, "loss": 4.3135, "step": 7261 }, { "epoch": 0.7529826516923834, "grad_norm": 1.25, "learning_rate": 0.00017054534820200982, "loss": 4.2944, "step": 7262 }, { "epoch": 0.7530863397468716, "grad_norm": 1.3671875, "learning_rate": 0.0001705376490674098, "loss": 4.2739, "step": 7263 }, { "epoch": 0.7531900278013596, "grad_norm": 1.21875, "learning_rate": 0.00017052994910054018, "loss": 4.2843, "step": 7264 }, { "epoch": 0.7532937158558477, "grad_norm": 1.5703125, "learning_rate": 0.00017052224830149166, "loss": 4.3169, "step": 7265 }, { "epoch": 0.7533974039103357, "grad_norm": 1.2890625, "learning_rate": 0.00017051454667035517, "loss": 4.2898, "step": 7266 }, { "epoch": 0.7535010919648238, "grad_norm": 1.6171875, "learning_rate": 0.00017050684420722155, "loss": 4.244, "step": 7267 }, { "epoch": 0.7536047800193119, "grad_norm": 1.4375, "learning_rate": 0.00017049914091218175, "loss": 4.3089, "step": 7268 }, { "epoch": 0.7537084680738, "grad_norm": 1.7265625, "learning_rate": 0.00017049143678532654, "loss": 4.284, "step": 7269 }, { "epoch": 0.753812156128288, "grad_norm": 1.6796875, "learning_rate": 0.00017048373182674694, "loss": 4.3093, "step": 7270 }, { "epoch": 0.7539158441827761, "grad_norm": 1.28125, "learning_rate": 0.0001704760260365338, "loss": 4.2962, "step": 7271 }, { "epoch": 0.7540195322372641, "grad_norm": 1.2578125, "learning_rate": 0.00017046831941477803, "loss": 4.3126, "step": 7272 }, { "epoch": 0.7541232202917523, "grad_norm": 1.5, "learning_rate": 0.0001704606119615706, "loss": 4.2415, "step": 7273 }, { "epoch": 0.7542269083462403, "grad_norm": 1.3984375, "learning_rate": 0.0001704529036770024, "loss": 4.3483, "step": 7274 }, { "epoch": 0.7543305964007284, "grad_norm": 1.578125, "learning_rate": 0.00017044519456116443, "loss": 4.3222, "step": 7275 }, { "epoch": 0.7544342844552164, "grad_norm": 1.40625, "learning_rate": 0.00017043748461414762, "loss": 4.2517, "step": 7276 }, { "epoch": 0.7545379725097046, "grad_norm": 1.4921875, "learning_rate": 0.00017042977383604295, "loss": 4.3102, "step": 7277 }, { "epoch": 0.7546416605641926, "grad_norm": 1.3515625, "learning_rate": 0.0001704220622269414, "loss": 4.3405, "step": 7278 }, { "epoch": 0.7547453486186807, "grad_norm": 1.5703125, "learning_rate": 0.00017041434978693393, "loss": 4.3233, "step": 7279 }, { "epoch": 0.7548490366731688, "grad_norm": 1.4921875, "learning_rate": 0.00017040663651611158, "loss": 4.2985, "step": 7280 }, { "epoch": 0.7549527247276568, "grad_norm": 1.390625, "learning_rate": 0.00017039892241456537, "loss": 4.2694, "step": 7281 }, { "epoch": 0.755056412782145, "grad_norm": 1.2734375, "learning_rate": 0.00017039120748238627, "loss": 4.3193, "step": 7282 }, { "epoch": 0.755160100836633, "grad_norm": 1.5859375, "learning_rate": 0.0001703834917196653, "loss": 4.2847, "step": 7283 }, { "epoch": 0.7552637888911211, "grad_norm": 1.484375, "learning_rate": 0.00017037577512649357, "loss": 4.2836, "step": 7284 }, { "epoch": 0.7553674769456091, "grad_norm": 1.28125, "learning_rate": 0.00017036805770296207, "loss": 4.2952, "step": 7285 }, { "epoch": 0.7554711650000973, "grad_norm": 1.2890625, "learning_rate": 0.00017036033944916188, "loss": 4.3062, "step": 7286 }, { "epoch": 0.7555748530545853, "grad_norm": 1.3671875, "learning_rate": 0.00017035262036518403, "loss": 4.2864, "step": 7287 }, { "epoch": 0.7556785411090734, "grad_norm": 1.28125, "learning_rate": 0.00017034490045111963, "loss": 4.2936, "step": 7288 }, { "epoch": 0.7557822291635614, "grad_norm": 1.484375, "learning_rate": 0.0001703371797070598, "loss": 4.265, "step": 7289 }, { "epoch": 0.7558859172180495, "grad_norm": 1.3828125, "learning_rate": 0.00017032945813309555, "loss": 4.3142, "step": 7290 }, { "epoch": 0.7559896052725376, "grad_norm": 1.234375, "learning_rate": 0.00017032173572931807, "loss": 4.2957, "step": 7291 }, { "epoch": 0.7560932933270257, "grad_norm": 1.1484375, "learning_rate": 0.00017031401249581842, "loss": 4.3144, "step": 7292 }, { "epoch": 0.7561969813815137, "grad_norm": 1.359375, "learning_rate": 0.00017030628843268776, "loss": 4.2702, "step": 7293 }, { "epoch": 0.7563006694360018, "grad_norm": 1.2890625, "learning_rate": 0.0001702985635400172, "loss": 4.3049, "step": 7294 }, { "epoch": 0.7564043574904898, "grad_norm": 1.3828125, "learning_rate": 0.00017029083781789793, "loss": 4.278, "step": 7295 }, { "epoch": 0.756508045544978, "grad_norm": 1.3203125, "learning_rate": 0.00017028311126642107, "loss": 4.2731, "step": 7296 }, { "epoch": 0.756611733599466, "grad_norm": 1.21875, "learning_rate": 0.00017027538388567778, "loss": 4.3164, "step": 7297 }, { "epoch": 0.7567154216539541, "grad_norm": 1.1015625, "learning_rate": 0.00017026765567575923, "loss": 4.3086, "step": 7298 }, { "epoch": 0.7568191097084421, "grad_norm": 1.4140625, "learning_rate": 0.0001702599266367566, "loss": 4.3135, "step": 7299 }, { "epoch": 0.7569227977629303, "grad_norm": 1.25, "learning_rate": 0.00017025219676876114, "loss": 4.2603, "step": 7300 }, { "epoch": 0.7570264858174183, "grad_norm": 1.5859375, "learning_rate": 0.00017024446607186402, "loss": 4.2973, "step": 7301 }, { "epoch": 0.7571301738719064, "grad_norm": 1.375, "learning_rate": 0.00017023673454615645, "loss": 4.3145, "step": 7302 }, { "epoch": 0.7572338619263944, "grad_norm": 1.4140625, "learning_rate": 0.00017022900219172964, "loss": 4.2712, "step": 7303 }, { "epoch": 0.7573375499808825, "grad_norm": 1.40625, "learning_rate": 0.00017022126900867484, "loss": 4.313, "step": 7304 }, { "epoch": 0.7574412380353706, "grad_norm": 1.328125, "learning_rate": 0.00017021353499708331, "loss": 4.3111, "step": 7305 }, { "epoch": 0.7575449260898587, "grad_norm": 1.2265625, "learning_rate": 0.00017020580015704627, "loss": 4.3204, "step": 7306 }, { "epoch": 0.7576486141443467, "grad_norm": 1.4765625, "learning_rate": 0.00017019806448865502, "loss": 4.3063, "step": 7307 }, { "epoch": 0.7577523021988348, "grad_norm": 1.28125, "learning_rate": 0.0001701903279920008, "loss": 4.3014, "step": 7308 }, { "epoch": 0.7578559902533228, "grad_norm": 1.796875, "learning_rate": 0.00017018259066717487, "loss": 4.2751, "step": 7309 }, { "epoch": 0.757959678307811, "grad_norm": 1.6484375, "learning_rate": 0.0001701748525142686, "loss": 4.2672, "step": 7310 }, { "epoch": 0.758063366362299, "grad_norm": 1.203125, "learning_rate": 0.00017016711353337325, "loss": 4.2977, "step": 7311 }, { "epoch": 0.7581670544167871, "grad_norm": 1.2109375, "learning_rate": 0.00017015937372458009, "loss": 4.2664, "step": 7312 }, { "epoch": 0.7582707424712751, "grad_norm": 1.25, "learning_rate": 0.0001701516330879805, "loss": 4.3269, "step": 7313 }, { "epoch": 0.7583744305257633, "grad_norm": 1.09375, "learning_rate": 0.0001701438916236658, "loss": 4.2875, "step": 7314 }, { "epoch": 0.7584781185802513, "grad_norm": 1.6015625, "learning_rate": 0.00017013614933172733, "loss": 4.3359, "step": 7315 }, { "epoch": 0.7585818066347394, "grad_norm": 1.53125, "learning_rate": 0.00017012840621225643, "loss": 4.3346, "step": 7316 }, { "epoch": 0.7586854946892274, "grad_norm": 1.4453125, "learning_rate": 0.00017012066226534446, "loss": 4.321, "step": 7317 }, { "epoch": 0.7587891827437155, "grad_norm": 1.359375, "learning_rate": 0.00017011291749108282, "loss": 4.2992, "step": 7318 }, { "epoch": 0.7588928707982036, "grad_norm": 1.28125, "learning_rate": 0.00017010517188956284, "loss": 4.2377, "step": 7319 }, { "epoch": 0.7589965588526917, "grad_norm": 1.2265625, "learning_rate": 0.00017009742546087594, "loss": 4.2905, "step": 7320 }, { "epoch": 0.7591002469071797, "grad_norm": 1.328125, "learning_rate": 0.00017008967820511352, "loss": 4.273, "step": 7321 }, { "epoch": 0.7592039349616678, "grad_norm": 1.2421875, "learning_rate": 0.000170081930122367, "loss": 4.2929, "step": 7322 }, { "epoch": 0.7593076230161558, "grad_norm": 1.4453125, "learning_rate": 0.00017007418121272775, "loss": 4.3323, "step": 7323 }, { "epoch": 0.759411311070644, "grad_norm": 1.3515625, "learning_rate": 0.00017006643147628726, "loss": 4.3453, "step": 7324 }, { "epoch": 0.7595149991251321, "grad_norm": 1.296875, "learning_rate": 0.00017005868091313694, "loss": 4.3426, "step": 7325 }, { "epoch": 0.7596186871796201, "grad_norm": 1.1953125, "learning_rate": 0.00017005092952336823, "loss": 4.2514, "step": 7326 }, { "epoch": 0.7597223752341082, "grad_norm": 1.4609375, "learning_rate": 0.00017004317730707263, "loss": 4.2899, "step": 7327 }, { "epoch": 0.7598260632885963, "grad_norm": 1.375, "learning_rate": 0.0001700354242643415, "loss": 4.2561, "step": 7328 }, { "epoch": 0.7599297513430844, "grad_norm": 1.46875, "learning_rate": 0.00017002767039526646, "loss": 4.3167, "step": 7329 }, { "epoch": 0.7600334393975724, "grad_norm": 1.4140625, "learning_rate": 0.0001700199156999389, "loss": 4.3333, "step": 7330 }, { "epoch": 0.7601371274520605, "grad_norm": 1.2734375, "learning_rate": 0.00017001216017845038, "loss": 4.2794, "step": 7331 }, { "epoch": 0.7602408155065485, "grad_norm": 1.15625, "learning_rate": 0.00017000440383089239, "loss": 4.2827, "step": 7332 }, { "epoch": 0.7603445035610367, "grad_norm": 1.5703125, "learning_rate": 0.00016999664665735638, "loss": 4.2929, "step": 7333 }, { "epoch": 0.7604481916155247, "grad_norm": 1.3203125, "learning_rate": 0.00016998888865793396, "loss": 4.3017, "step": 7334 }, { "epoch": 0.7605518796700128, "grad_norm": 1.515625, "learning_rate": 0.0001699811298327166, "loss": 4.2976, "step": 7335 }, { "epoch": 0.7606555677245008, "grad_norm": 1.3984375, "learning_rate": 0.0001699733701817959, "loss": 4.2537, "step": 7336 }, { "epoch": 0.760759255778989, "grad_norm": 1.2890625, "learning_rate": 0.0001699656097052634, "loss": 4.3015, "step": 7337 }, { "epoch": 0.760862943833477, "grad_norm": 1.1953125, "learning_rate": 0.0001699578484032107, "loss": 4.2592, "step": 7338 }, { "epoch": 0.7609666318879651, "grad_norm": 1.3671875, "learning_rate": 0.00016995008627572933, "loss": 4.3026, "step": 7339 }, { "epoch": 0.7610703199424531, "grad_norm": 1.140625, "learning_rate": 0.00016994232332291084, "loss": 4.2917, "step": 7340 }, { "epoch": 0.7611740079969412, "grad_norm": 1.6015625, "learning_rate": 0.00016993455954484687, "loss": 4.2922, "step": 7341 }, { "epoch": 0.7612776960514293, "grad_norm": 1.5078125, "learning_rate": 0.00016992679494162903, "loss": 4.2461, "step": 7342 }, { "epoch": 0.7613813841059174, "grad_norm": 1.4296875, "learning_rate": 0.00016991902951334894, "loss": 4.3031, "step": 7343 }, { "epoch": 0.7614850721604054, "grad_norm": 1.3046875, "learning_rate": 0.00016991126326009818, "loss": 4.2936, "step": 7344 }, { "epoch": 0.7615887602148935, "grad_norm": 1.3046875, "learning_rate": 0.00016990349618196845, "loss": 4.2804, "step": 7345 }, { "epoch": 0.7616924482693815, "grad_norm": 1.2734375, "learning_rate": 0.00016989572827905134, "loss": 4.2966, "step": 7346 }, { "epoch": 0.7617961363238697, "grad_norm": 1.4765625, "learning_rate": 0.00016988795955143852, "loss": 4.275, "step": 7347 }, { "epoch": 0.7618998243783577, "grad_norm": 1.2890625, "learning_rate": 0.00016988018999922167, "loss": 4.256, "step": 7348 }, { "epoch": 0.7620035124328458, "grad_norm": 1.4140625, "learning_rate": 0.0001698724196224924, "loss": 4.3285, "step": 7349 }, { "epoch": 0.7621072004873338, "grad_norm": 1.28125, "learning_rate": 0.00016986464842134246, "loss": 4.3024, "step": 7350 }, { "epoch": 0.762210888541822, "grad_norm": 1.2890625, "learning_rate": 0.00016985687639586354, "loss": 4.3106, "step": 7351 }, { "epoch": 0.76231457659631, "grad_norm": 1.296875, "learning_rate": 0.00016984910354614732, "loss": 4.3155, "step": 7352 }, { "epoch": 0.7624182646507981, "grad_norm": 1.3359375, "learning_rate": 0.00016984132987228547, "loss": 4.3108, "step": 7353 }, { "epoch": 0.7625219527052861, "grad_norm": 1.1953125, "learning_rate": 0.00016983355537436977, "loss": 4.3093, "step": 7354 }, { "epoch": 0.7626256407597742, "grad_norm": 1.3203125, "learning_rate": 0.00016982578005249197, "loss": 4.3132, "step": 7355 }, { "epoch": 0.7627293288142623, "grad_norm": 1.234375, "learning_rate": 0.0001698180039067437, "loss": 4.2788, "step": 7356 }, { "epoch": 0.7628330168687504, "grad_norm": 1.4140625, "learning_rate": 0.00016981022693721688, "loss": 4.299, "step": 7357 }, { "epoch": 0.7629367049232384, "grad_norm": 1.28125, "learning_rate": 0.0001698024491440031, "loss": 4.2963, "step": 7358 }, { "epoch": 0.7630403929777265, "grad_norm": 1.1953125, "learning_rate": 0.00016979467052719423, "loss": 4.2889, "step": 7359 }, { "epoch": 0.7631440810322145, "grad_norm": 1.109375, "learning_rate": 0.00016978689108688202, "loss": 4.2818, "step": 7360 }, { "epoch": 0.7632477690867027, "grad_norm": 1.328125, "learning_rate": 0.00016977911082315827, "loss": 4.2491, "step": 7361 }, { "epoch": 0.7633514571411907, "grad_norm": 1.1171875, "learning_rate": 0.00016977132973611475, "loss": 4.2968, "step": 7362 }, { "epoch": 0.7634551451956788, "grad_norm": 1.3671875, "learning_rate": 0.00016976354782584333, "loss": 4.3111, "step": 7363 }, { "epoch": 0.7635588332501668, "grad_norm": 1.2265625, "learning_rate": 0.00016975576509243578, "loss": 4.296, "step": 7364 }, { "epoch": 0.763662521304655, "grad_norm": 1.2578125, "learning_rate": 0.00016974798153598393, "loss": 4.289, "step": 7365 }, { "epoch": 0.763766209359143, "grad_norm": 1.171875, "learning_rate": 0.0001697401971565796, "loss": 4.3073, "step": 7366 }, { "epoch": 0.7638698974136311, "grad_norm": 1.3359375, "learning_rate": 0.00016973241195431468, "loss": 4.3217, "step": 7367 }, { "epoch": 0.7639735854681191, "grad_norm": 1.203125, "learning_rate": 0.000169724625929281, "loss": 4.3015, "step": 7368 }, { "epoch": 0.7640772735226072, "grad_norm": 1.421875, "learning_rate": 0.00016971683908157046, "loss": 4.2788, "step": 7369 }, { "epoch": 0.7641809615770954, "grad_norm": 1.3359375, "learning_rate": 0.0001697090514112749, "loss": 4.2878, "step": 7370 }, { "epoch": 0.7642846496315834, "grad_norm": 1.1875, "learning_rate": 0.0001697012629184862, "loss": 4.2664, "step": 7371 }, { "epoch": 0.7643883376860715, "grad_norm": 1.078125, "learning_rate": 0.0001696934736032963, "loss": 4.3435, "step": 7372 }, { "epoch": 0.7644920257405595, "grad_norm": 1.328125, "learning_rate": 0.00016968568346579707, "loss": 4.2861, "step": 7373 }, { "epoch": 0.7645957137950476, "grad_norm": 1.1953125, "learning_rate": 0.00016967789250608046, "loss": 4.2991, "step": 7374 }, { "epoch": 0.7646994018495357, "grad_norm": 1.3984375, "learning_rate": 0.00016967010072423835, "loss": 4.2715, "step": 7375 }, { "epoch": 0.7648030899040238, "grad_norm": 1.359375, "learning_rate": 0.00016966230812036267, "loss": 4.3264, "step": 7376 }, { "epoch": 0.7649067779585118, "grad_norm": 1.265625, "learning_rate": 0.00016965451469454546, "loss": 4.2816, "step": 7377 }, { "epoch": 0.7650104660129999, "grad_norm": 1.125, "learning_rate": 0.00016964672044687853, "loss": 4.2958, "step": 7378 }, { "epoch": 0.765114154067488, "grad_norm": 1.1328125, "learning_rate": 0.00016963892537745395, "loss": 4.2681, "step": 7379 }, { "epoch": 0.7652178421219761, "grad_norm": 1.078125, "learning_rate": 0.0001696311294863637, "loss": 4.2735, "step": 7380 }, { "epoch": 0.7653215301764641, "grad_norm": 1.328125, "learning_rate": 0.00016962333277369967, "loss": 4.2949, "step": 7381 }, { "epoch": 0.7654252182309522, "grad_norm": 1.25, "learning_rate": 0.00016961553523955393, "loss": 4.2902, "step": 7382 }, { "epoch": 0.7655289062854402, "grad_norm": 1.28125, "learning_rate": 0.00016960773688401846, "loss": 4.2752, "step": 7383 }, { "epoch": 0.7656325943399284, "grad_norm": 1.2734375, "learning_rate": 0.00016959993770718524, "loss": 4.3044, "step": 7384 }, { "epoch": 0.7657362823944164, "grad_norm": 1.125, "learning_rate": 0.00016959213770914637, "loss": 4.3009, "step": 7385 }, { "epoch": 0.7658399704489045, "grad_norm": 1.0390625, "learning_rate": 0.00016958433688999381, "loss": 4.2998, "step": 7386 }, { "epoch": 0.7659436585033925, "grad_norm": 1.3515625, "learning_rate": 0.00016957653524981968, "loss": 4.2673, "step": 7387 }, { "epoch": 0.7660473465578806, "grad_norm": 1.2578125, "learning_rate": 0.00016956873278871592, "loss": 4.3262, "step": 7388 }, { "epoch": 0.7661510346123687, "grad_norm": 1.5546875, "learning_rate": 0.0001695609295067747, "loss": 4.3042, "step": 7389 }, { "epoch": 0.7662547226668568, "grad_norm": 1.4453125, "learning_rate": 0.000169553125404088, "loss": 4.2929, "step": 7390 }, { "epoch": 0.7663584107213448, "grad_norm": 1.2890625, "learning_rate": 0.00016954532048074793, "loss": 4.287, "step": 7391 }, { "epoch": 0.7664620987758329, "grad_norm": 1.2578125, "learning_rate": 0.00016953751473684664, "loss": 4.276, "step": 7392 }, { "epoch": 0.766565786830321, "grad_norm": 1.25, "learning_rate": 0.00016952970817247614, "loss": 4.2847, "step": 7393 }, { "epoch": 0.7666694748848091, "grad_norm": 1.21875, "learning_rate": 0.0001695219007877286, "loss": 4.2912, "step": 7394 }, { "epoch": 0.7667731629392971, "grad_norm": 1.4453125, "learning_rate": 0.00016951409258269612, "loss": 4.2992, "step": 7395 }, { "epoch": 0.7668768509937852, "grad_norm": 1.2890625, "learning_rate": 0.0001695062835574708, "loss": 4.3023, "step": 7396 }, { "epoch": 0.7669805390482732, "grad_norm": 1.2890625, "learning_rate": 0.00016949847371214484, "loss": 4.2982, "step": 7397 }, { "epoch": 0.7670842271027614, "grad_norm": 1.21875, "learning_rate": 0.00016949066304681033, "loss": 4.2983, "step": 7398 }, { "epoch": 0.7671879151572494, "grad_norm": 1.15625, "learning_rate": 0.00016948285156155945, "loss": 4.2468, "step": 7399 }, { "epoch": 0.7672916032117375, "grad_norm": 0.98046875, "learning_rate": 0.0001694750392564844, "loss": 4.3253, "step": 7400 }, { "epoch": 0.7673952912662255, "grad_norm": 1.2890625, "learning_rate": 0.0001694672261316773, "loss": 4.3069, "step": 7401 }, { "epoch": 0.7674989793207136, "grad_norm": 1.1484375, "learning_rate": 0.00016945941218723037, "loss": 4.3027, "step": 7402 }, { "epoch": 0.7676026673752017, "grad_norm": 1.53125, "learning_rate": 0.0001694515974232358, "loss": 4.3055, "step": 7403 }, { "epoch": 0.7677063554296898, "grad_norm": 1.4140625, "learning_rate": 0.00016944378183978578, "loss": 4.3045, "step": 7404 }, { "epoch": 0.7678100434841778, "grad_norm": 1.2734375, "learning_rate": 0.00016943596543697256, "loss": 4.3241, "step": 7405 }, { "epoch": 0.7679137315386659, "grad_norm": 1.2109375, "learning_rate": 0.00016942814821488834, "loss": 4.2945, "step": 7406 }, { "epoch": 0.768017419593154, "grad_norm": 1.375, "learning_rate": 0.00016942033017362533, "loss": 4.2977, "step": 7407 }, { "epoch": 0.7681211076476421, "grad_norm": 1.1484375, "learning_rate": 0.00016941251131327581, "loss": 4.2918, "step": 7408 }, { "epoch": 0.7682247957021301, "grad_norm": 1.46875, "learning_rate": 0.00016940469163393207, "loss": 4.3023, "step": 7409 }, { "epoch": 0.7683284837566182, "grad_norm": 1.328125, "learning_rate": 0.0001693968711356863, "loss": 4.2654, "step": 7410 }, { "epoch": 0.7684321718111062, "grad_norm": 1.3515625, "learning_rate": 0.0001693890498186308, "loss": 4.3465, "step": 7411 }, { "epoch": 0.7685358598655944, "grad_norm": 1.2109375, "learning_rate": 0.00016938122768285786, "loss": 4.2624, "step": 7412 }, { "epoch": 0.7686395479200824, "grad_norm": 1.3828125, "learning_rate": 0.0001693734047284598, "loss": 4.3084, "step": 7413 }, { "epoch": 0.7687432359745705, "grad_norm": 1.3046875, "learning_rate": 0.00016936558095552887, "loss": 4.3405, "step": 7414 }, { "epoch": 0.7688469240290586, "grad_norm": 1.40625, "learning_rate": 0.00016935775636415742, "loss": 4.2755, "step": 7415 }, { "epoch": 0.7689506120835466, "grad_norm": 1.3359375, "learning_rate": 0.00016934993095443776, "loss": 4.3349, "step": 7416 }, { "epoch": 0.7690543001380348, "grad_norm": 1.203125, "learning_rate": 0.0001693421047264622, "loss": 4.2602, "step": 7417 }, { "epoch": 0.7691579881925228, "grad_norm": 1.1484375, "learning_rate": 0.00016933427768032314, "loss": 4.2741, "step": 7418 }, { "epoch": 0.7692616762470109, "grad_norm": 1.4375, "learning_rate": 0.00016932644981611288, "loss": 4.312, "step": 7419 }, { "epoch": 0.7693653643014989, "grad_norm": 1.25, "learning_rate": 0.0001693186211339238, "loss": 4.2978, "step": 7420 }, { "epoch": 0.7694690523559871, "grad_norm": 1.4609375, "learning_rate": 0.00016931079163384823, "loss": 4.2716, "step": 7421 }, { "epoch": 0.7695727404104751, "grad_norm": 1.4453125, "learning_rate": 0.00016930296131597862, "loss": 4.3112, "step": 7422 }, { "epoch": 0.7696764284649632, "grad_norm": 1.2109375, "learning_rate": 0.00016929513018040728, "loss": 4.2339, "step": 7423 }, { "epoch": 0.7697801165194512, "grad_norm": 1.140625, "learning_rate": 0.00016928729822722667, "loss": 4.2802, "step": 7424 }, { "epoch": 0.7698838045739393, "grad_norm": 1.203125, "learning_rate": 0.00016927946545652918, "loss": 4.2905, "step": 7425 }, { "epoch": 0.7699874926284274, "grad_norm": 1.109375, "learning_rate": 0.00016927163186840726, "loss": 4.2913, "step": 7426 }, { "epoch": 0.7700911806829155, "grad_norm": 1.46875, "learning_rate": 0.0001692637974629533, "loss": 4.2782, "step": 7427 }, { "epoch": 0.7701948687374035, "grad_norm": 1.421875, "learning_rate": 0.0001692559622402597, "loss": 4.2655, "step": 7428 }, { "epoch": 0.7702985567918916, "grad_norm": 1.203125, "learning_rate": 0.000169248126200419, "loss": 4.2924, "step": 7429 }, { "epoch": 0.7704022448463796, "grad_norm": 1.1875, "learning_rate": 0.0001692402893435236, "loss": 4.307, "step": 7430 }, { "epoch": 0.7705059329008678, "grad_norm": 1.2890625, "learning_rate": 0.00016923245166966595, "loss": 4.2712, "step": 7431 }, { "epoch": 0.7706096209553558, "grad_norm": 1.0703125, "learning_rate": 0.00016922461317893855, "loss": 4.3062, "step": 7432 }, { "epoch": 0.7707133090098439, "grad_norm": 1.5078125, "learning_rate": 0.00016921677387143392, "loss": 4.2883, "step": 7433 }, { "epoch": 0.7708169970643319, "grad_norm": 1.2421875, "learning_rate": 0.00016920893374724455, "loss": 4.2437, "step": 7434 }, { "epoch": 0.7709206851188201, "grad_norm": 1.4609375, "learning_rate": 0.00016920109280646285, "loss": 4.2196, "step": 7435 }, { "epoch": 0.7710243731733081, "grad_norm": 1.3828125, "learning_rate": 0.00016919325104918143, "loss": 4.3101, "step": 7436 }, { "epoch": 0.7711280612277962, "grad_norm": 1.3828125, "learning_rate": 0.00016918540847549277, "loss": 4.3135, "step": 7437 }, { "epoch": 0.7712317492822842, "grad_norm": 1.3125, "learning_rate": 0.00016917756508548948, "loss": 4.3182, "step": 7438 }, { "epoch": 0.7713354373367723, "grad_norm": 1.4453125, "learning_rate": 0.000169169720879264, "loss": 4.2481, "step": 7439 }, { "epoch": 0.7714391253912604, "grad_norm": 1.3671875, "learning_rate": 0.00016916187585690895, "loss": 4.3098, "step": 7440 }, { "epoch": 0.7715428134457485, "grad_norm": 1.4375, "learning_rate": 0.00016915403001851683, "loss": 4.2693, "step": 7441 }, { "epoch": 0.7716465015002365, "grad_norm": 1.3203125, "learning_rate": 0.00016914618336418032, "loss": 4.2724, "step": 7442 }, { "epoch": 0.7717501895547246, "grad_norm": 1.3984375, "learning_rate": 0.0001691383358939919, "loss": 4.3085, "step": 7443 }, { "epoch": 0.7718538776092126, "grad_norm": 1.3828125, "learning_rate": 0.00016913048760804417, "loss": 4.2871, "step": 7444 }, { "epoch": 0.7719575656637008, "grad_norm": 1.234375, "learning_rate": 0.00016912263850642981, "loss": 4.3041, "step": 7445 }, { "epoch": 0.7720612537181888, "grad_norm": 1.15625, "learning_rate": 0.00016911478858924137, "loss": 4.2777, "step": 7446 }, { "epoch": 0.7721649417726769, "grad_norm": 1.1875, "learning_rate": 0.00016910693785657146, "loss": 4.2965, "step": 7447 }, { "epoch": 0.7722686298271649, "grad_norm": 1.125, "learning_rate": 0.00016909908630851277, "loss": 4.3142, "step": 7448 }, { "epoch": 0.7723723178816531, "grad_norm": 1.453125, "learning_rate": 0.00016909123394515785, "loss": 4.3168, "step": 7449 }, { "epoch": 0.7724760059361411, "grad_norm": 1.390625, "learning_rate": 0.00016908338076659945, "loss": 4.3266, "step": 7450 }, { "epoch": 0.7725796939906292, "grad_norm": 1.3203125, "learning_rate": 0.00016907552677293018, "loss": 4.3167, "step": 7451 }, { "epoch": 0.7726833820451172, "grad_norm": 1.2109375, "learning_rate": 0.0001690676719642427, "loss": 4.2969, "step": 7452 }, { "epoch": 0.7727870700996053, "grad_norm": 1.2421875, "learning_rate": 0.00016905981634062967, "loss": 4.2735, "step": 7453 }, { "epoch": 0.7728907581540934, "grad_norm": 1.1640625, "learning_rate": 0.0001690519599021838, "loss": 4.3345, "step": 7454 }, { "epoch": 0.7729944462085815, "grad_norm": 1.578125, "learning_rate": 0.00016904410264899787, "loss": 4.3112, "step": 7455 }, { "epoch": 0.7730981342630695, "grad_norm": 1.4609375, "learning_rate": 0.00016903624458116444, "loss": 4.2882, "step": 7456 }, { "epoch": 0.7732018223175576, "grad_norm": 1.234375, "learning_rate": 0.0001690283856987763, "loss": 4.271, "step": 7457 }, { "epoch": 0.7733055103720456, "grad_norm": 1.203125, "learning_rate": 0.0001690205260019262, "loss": 4.3036, "step": 7458 }, { "epoch": 0.7734091984265338, "grad_norm": 1.125, "learning_rate": 0.00016901266549070688, "loss": 4.2912, "step": 7459 }, { "epoch": 0.7735128864810219, "grad_norm": 1.125, "learning_rate": 0.00016900480416521103, "loss": 4.2563, "step": 7460 }, { "epoch": 0.7736165745355099, "grad_norm": 1.3359375, "learning_rate": 0.00016899694202553143, "loss": 4.2761, "step": 7461 }, { "epoch": 0.773720262589998, "grad_norm": 1.25, "learning_rate": 0.00016898907907176084, "loss": 4.2598, "step": 7462 }, { "epoch": 0.7738239506444861, "grad_norm": 1.296875, "learning_rate": 0.00016898121530399202, "loss": 4.2726, "step": 7463 }, { "epoch": 0.7739276386989742, "grad_norm": 1.1796875, "learning_rate": 0.0001689733507223178, "loss": 4.2995, "step": 7464 }, { "epoch": 0.7740313267534622, "grad_norm": 1.2890625, "learning_rate": 0.00016896548532683098, "loss": 4.31, "step": 7465 }, { "epoch": 0.7741350148079503, "grad_norm": 1.25, "learning_rate": 0.0001689576191176243, "loss": 4.2907, "step": 7466 }, { "epoch": 0.7742387028624383, "grad_norm": 1.1796875, "learning_rate": 0.0001689497520947906, "loss": 4.3051, "step": 7467 }, { "epoch": 0.7743423909169265, "grad_norm": 1.125, "learning_rate": 0.00016894188425842273, "loss": 4.3089, "step": 7468 }, { "epoch": 0.7744460789714145, "grad_norm": 1.2265625, "learning_rate": 0.00016893401560861347, "loss": 4.2659, "step": 7469 }, { "epoch": 0.7745497670259026, "grad_norm": 1.1328125, "learning_rate": 0.00016892614614545572, "loss": 4.281, "step": 7470 }, { "epoch": 0.7746534550803906, "grad_norm": 1.2421875, "learning_rate": 0.00016891827586904226, "loss": 4.2811, "step": 7471 }, { "epoch": 0.7747571431348788, "grad_norm": 1.2109375, "learning_rate": 0.00016891040477946606, "loss": 4.2619, "step": 7472 }, { "epoch": 0.7748608311893668, "grad_norm": 1.1953125, "learning_rate": 0.0001689025328768199, "loss": 4.2999, "step": 7473 }, { "epoch": 0.7749645192438549, "grad_norm": 1.1484375, "learning_rate": 0.00016889466016119665, "loss": 4.3273, "step": 7474 }, { "epoch": 0.7750682072983429, "grad_norm": 1.078125, "learning_rate": 0.00016888678663268925, "loss": 4.2815, "step": 7475 }, { "epoch": 0.775171895352831, "grad_norm": 0.96484375, "learning_rate": 0.00016887891229139064, "loss": 4.2958, "step": 7476 }, { "epoch": 0.7752755834073191, "grad_norm": 1.3046875, "learning_rate": 0.00016887103713739363, "loss": 4.2667, "step": 7477 }, { "epoch": 0.7753792714618072, "grad_norm": 1.109375, "learning_rate": 0.0001688631611707912, "loss": 4.3166, "step": 7478 }, { "epoch": 0.7754829595162952, "grad_norm": 1.2890625, "learning_rate": 0.0001688552843916762, "loss": 4.3385, "step": 7479 }, { "epoch": 0.7755866475707833, "grad_norm": 1.1796875, "learning_rate": 0.00016884740680014169, "loss": 4.2935, "step": 7480 }, { "epoch": 0.7756903356252713, "grad_norm": 1.140625, "learning_rate": 0.00016883952839628053, "loss": 4.2766, "step": 7481 }, { "epoch": 0.7757940236797595, "grad_norm": 1.046875, "learning_rate": 0.0001688316491801857, "loss": 4.2529, "step": 7482 }, { "epoch": 0.7758977117342475, "grad_norm": 1.2890625, "learning_rate": 0.00016882376915195019, "loss": 4.3134, "step": 7483 }, { "epoch": 0.7760013997887356, "grad_norm": 1.109375, "learning_rate": 0.00016881588831166696, "loss": 4.2798, "step": 7484 }, { "epoch": 0.7761050878432236, "grad_norm": 1.421875, "learning_rate": 0.00016880800665942895, "loss": 4.297, "step": 7485 }, { "epoch": 0.7762087758977118, "grad_norm": 1.2734375, "learning_rate": 0.0001688001241953292, "loss": 4.3174, "step": 7486 }, { "epoch": 0.7763124639521998, "grad_norm": 1.359375, "learning_rate": 0.0001687922409194607, "loss": 4.2795, "step": 7487 }, { "epoch": 0.7764161520066879, "grad_norm": 1.28125, "learning_rate": 0.0001687843568319165, "loss": 4.3163, "step": 7488 }, { "epoch": 0.7765198400611759, "grad_norm": 1.21875, "learning_rate": 0.0001687764719327896, "loss": 4.2761, "step": 7489 }, { "epoch": 0.776623528115664, "grad_norm": 1.0625, "learning_rate": 0.00016876858622217304, "loss": 4.299, "step": 7490 }, { "epoch": 0.7767272161701521, "grad_norm": 1.34375, "learning_rate": 0.00016876069970015985, "loss": 4.2744, "step": 7491 }, { "epoch": 0.7768309042246402, "grad_norm": 1.2734375, "learning_rate": 0.00016875281236684305, "loss": 4.2877, "step": 7492 }, { "epoch": 0.7769345922791282, "grad_norm": 1.359375, "learning_rate": 0.00016874492422231578, "loss": 4.313, "step": 7493 }, { "epoch": 0.7770382803336163, "grad_norm": 1.3203125, "learning_rate": 0.00016873703526667106, "loss": 4.2781, "step": 7494 }, { "epoch": 0.7771419683881043, "grad_norm": 1.140625, "learning_rate": 0.00016872914550000197, "loss": 4.2608, "step": 7495 }, { "epoch": 0.7772456564425925, "grad_norm": 1.0546875, "learning_rate": 0.00016872125492240161, "loss": 4.2584, "step": 7496 }, { "epoch": 0.7773493444970805, "grad_norm": 1.1953125, "learning_rate": 0.0001687133635339631, "loss": 4.2925, "step": 7497 }, { "epoch": 0.7774530325515686, "grad_norm": 1.0078125, "learning_rate": 0.0001687054713347795, "loss": 4.3133, "step": 7498 }, { "epoch": 0.7775567206060566, "grad_norm": 1.46875, "learning_rate": 0.000168697578324944, "loss": 4.315, "step": 7499 }, { "epoch": 0.7776604086605448, "grad_norm": 1.3515625, "learning_rate": 0.00016868968450454966, "loss": 4.2717, "step": 7500 }, { "epoch": 0.7777640967150328, "grad_norm": 1.34375, "learning_rate": 0.0001686817898736897, "loss": 4.2838, "step": 7501 }, { "epoch": 0.7778677847695209, "grad_norm": 1.3125, "learning_rate": 0.0001686738944324572, "loss": 4.2746, "step": 7502 }, { "epoch": 0.7779714728240089, "grad_norm": 1.109375, "learning_rate": 0.00016866599818094531, "loss": 4.3302, "step": 7503 }, { "epoch": 0.778075160878497, "grad_norm": 1.078125, "learning_rate": 0.00016865810111924728, "loss": 4.2894, "step": 7504 }, { "epoch": 0.7781788489329852, "grad_norm": 1.390625, "learning_rate": 0.00016865020324745617, "loss": 4.2608, "step": 7505 }, { "epoch": 0.7782825369874732, "grad_norm": 1.1328125, "learning_rate": 0.00016864230456566527, "loss": 4.2797, "step": 7506 }, { "epoch": 0.7783862250419613, "grad_norm": 1.5546875, "learning_rate": 0.0001686344050739677, "loss": 4.2473, "step": 7507 }, { "epoch": 0.7784899130964493, "grad_norm": 1.453125, "learning_rate": 0.00016862650477245672, "loss": 4.289, "step": 7508 }, { "epoch": 0.7785936011509375, "grad_norm": 1.3359375, "learning_rate": 0.00016861860366122552, "loss": 4.3078, "step": 7509 }, { "epoch": 0.7786972892054255, "grad_norm": 1.1953125, "learning_rate": 0.00016861070174036734, "loss": 4.31, "step": 7510 }, { "epoch": 0.7788009772599136, "grad_norm": 1.2734375, "learning_rate": 0.0001686027990099754, "loss": 4.2991, "step": 7511 }, { "epoch": 0.7789046653144016, "grad_norm": 1.0703125, "learning_rate": 0.00016859489547014293, "loss": 4.2701, "step": 7512 }, { "epoch": 0.7790083533688897, "grad_norm": 1.640625, "learning_rate": 0.00016858699112096322, "loss": 4.2792, "step": 7513 }, { "epoch": 0.7791120414233778, "grad_norm": 1.578125, "learning_rate": 0.00016857908596252952, "loss": 4.3019, "step": 7514 }, { "epoch": 0.7792157294778659, "grad_norm": 1.2890625, "learning_rate": 0.0001685711799949351, "loss": 4.3028, "step": 7515 }, { "epoch": 0.7793194175323539, "grad_norm": 1.2578125, "learning_rate": 0.0001685632732182732, "loss": 4.3022, "step": 7516 }, { "epoch": 0.779423105586842, "grad_norm": 1.234375, "learning_rate": 0.00016855536563263715, "loss": 4.2835, "step": 7517 }, { "epoch": 0.77952679364133, "grad_norm": 1.15625, "learning_rate": 0.00016854745723812032, "loss": 4.332, "step": 7518 }, { "epoch": 0.7796304816958182, "grad_norm": 1.5234375, "learning_rate": 0.0001685395480348159, "loss": 4.3164, "step": 7519 }, { "epoch": 0.7797341697503062, "grad_norm": 1.4140625, "learning_rate": 0.00016853163802281728, "loss": 4.2839, "step": 7520 }, { "epoch": 0.7798378578047943, "grad_norm": 1.3515625, "learning_rate": 0.00016852372720221775, "loss": 4.3423, "step": 7521 }, { "epoch": 0.7799415458592823, "grad_norm": 1.2421875, "learning_rate": 0.00016851581557311072, "loss": 4.3084, "step": 7522 }, { "epoch": 0.7800452339137705, "grad_norm": 1.28125, "learning_rate": 0.00016850790313558943, "loss": 4.3181, "step": 7523 }, { "epoch": 0.7801489219682585, "grad_norm": 1.1640625, "learning_rate": 0.00016849998988974733, "loss": 4.2593, "step": 7524 }, { "epoch": 0.7802526100227466, "grad_norm": 1.25, "learning_rate": 0.00016849207583567776, "loss": 4.2949, "step": 7525 }, { "epoch": 0.7803562980772346, "grad_norm": 1.1171875, "learning_rate": 0.00016848416097347407, "loss": 4.2945, "step": 7526 }, { "epoch": 0.7804599861317227, "grad_norm": 1.3671875, "learning_rate": 0.0001684762453032297, "loss": 4.3089, "step": 7527 }, { "epoch": 0.7805636741862108, "grad_norm": 1.2890625, "learning_rate": 0.000168468328825038, "loss": 4.2943, "step": 7528 }, { "epoch": 0.7806673622406989, "grad_norm": 1.328125, "learning_rate": 0.00016846041153899242, "loss": 4.333, "step": 7529 }, { "epoch": 0.7807710502951869, "grad_norm": 1.2109375, "learning_rate": 0.00016845249344518633, "loss": 4.3041, "step": 7530 }, { "epoch": 0.780874738349675, "grad_norm": 1.15625, "learning_rate": 0.00016844457454371316, "loss": 4.2783, "step": 7531 }, { "epoch": 0.780978426404163, "grad_norm": 1.109375, "learning_rate": 0.00016843665483466636, "loss": 4.3132, "step": 7532 }, { "epoch": 0.7810821144586512, "grad_norm": 1.421875, "learning_rate": 0.0001684287343181394, "loss": 4.2628, "step": 7533 }, { "epoch": 0.7811858025131392, "grad_norm": 1.265625, "learning_rate": 0.0001684208129942257, "loss": 4.2853, "step": 7534 }, { "epoch": 0.7812894905676273, "grad_norm": 1.4140625, "learning_rate": 0.00016841289086301874, "loss": 4.3297, "step": 7535 }, { "epoch": 0.7813931786221153, "grad_norm": 1.328125, "learning_rate": 0.00016840496792461194, "loss": 4.2628, "step": 7536 }, { "epoch": 0.7814968666766035, "grad_norm": 1.1484375, "learning_rate": 0.00016839704417909886, "loss": 4.2701, "step": 7537 }, { "epoch": 0.7816005547310915, "grad_norm": 1.1015625, "learning_rate": 0.00016838911962657296, "loss": 4.3012, "step": 7538 }, { "epoch": 0.7817042427855796, "grad_norm": 1.2109375, "learning_rate": 0.00016838119426712775, "loss": 4.3009, "step": 7539 }, { "epoch": 0.7818079308400676, "grad_norm": 1.1484375, "learning_rate": 0.00016837326810085668, "loss": 4.2807, "step": 7540 }, { "epoch": 0.7819116188945557, "grad_norm": 1.2578125, "learning_rate": 0.00016836534112785336, "loss": 4.3013, "step": 7541 }, { "epoch": 0.7820153069490438, "grad_norm": 1.203125, "learning_rate": 0.00016835741334821126, "loss": 4.2881, "step": 7542 }, { "epoch": 0.7821189950035319, "grad_norm": 1.2734375, "learning_rate": 0.000168349484762024, "loss": 4.2784, "step": 7543 }, { "epoch": 0.7822226830580199, "grad_norm": 1.1953125, "learning_rate": 0.000168341555369385, "loss": 4.2688, "step": 7544 }, { "epoch": 0.782326371112508, "grad_norm": 1.2421875, "learning_rate": 0.0001683336251703879, "loss": 4.3094, "step": 7545 }, { "epoch": 0.782430059166996, "grad_norm": 1.140625, "learning_rate": 0.00016832569416512624, "loss": 4.2929, "step": 7546 }, { "epoch": 0.7825337472214842, "grad_norm": 1.1796875, "learning_rate": 0.00016831776235369365, "loss": 4.2823, "step": 7547 }, { "epoch": 0.7826374352759723, "grad_norm": 1.0546875, "learning_rate": 0.00016830982973618368, "loss": 4.2762, "step": 7548 }, { "epoch": 0.7827411233304603, "grad_norm": 1.3359375, "learning_rate": 0.00016830189631268988, "loss": 4.3096, "step": 7549 }, { "epoch": 0.7828448113849484, "grad_norm": 1.15625, "learning_rate": 0.00016829396208330595, "loss": 4.2435, "step": 7550 }, { "epoch": 0.7829484994394365, "grad_norm": 1.25, "learning_rate": 0.00016828602704812543, "loss": 4.2706, "step": 7551 }, { "epoch": 0.7830521874939246, "grad_norm": 1.1328125, "learning_rate": 0.00016827809120724199, "loss": 4.3124, "step": 7552 }, { "epoch": 0.7831558755484126, "grad_norm": 1.296875, "learning_rate": 0.00016827015456074925, "loss": 4.3115, "step": 7553 }, { "epoch": 0.7832595636029007, "grad_norm": 1.1953125, "learning_rate": 0.0001682622171087408, "loss": 4.2603, "step": 7554 }, { "epoch": 0.7833632516573887, "grad_norm": 1.1953125, "learning_rate": 0.0001682542788513104, "loss": 4.2819, "step": 7555 }, { "epoch": 0.7834669397118769, "grad_norm": 1.1484375, "learning_rate": 0.00016824633978855164, "loss": 4.3383, "step": 7556 }, { "epoch": 0.7835706277663649, "grad_norm": 1.296875, "learning_rate": 0.0001682383999205582, "loss": 4.2508, "step": 7557 }, { "epoch": 0.783674315820853, "grad_norm": 1.15625, "learning_rate": 0.0001682304592474238, "loss": 4.2703, "step": 7558 }, { "epoch": 0.783778003875341, "grad_norm": 1.34375, "learning_rate": 0.00016822251776924208, "loss": 4.2566, "step": 7559 }, { "epoch": 0.7838816919298291, "grad_norm": 1.2734375, "learning_rate": 0.00016821457548610677, "loss": 4.3067, "step": 7560 }, { "epoch": 0.7839853799843172, "grad_norm": 1.4921875, "learning_rate": 0.00016820663239811156, "loss": 4.3048, "step": 7561 }, { "epoch": 0.7840890680388053, "grad_norm": 1.296875, "learning_rate": 0.0001681986885053502, "loss": 4.2958, "step": 7562 }, { "epoch": 0.7841927560932933, "grad_norm": 1.3828125, "learning_rate": 0.00016819074380791638, "loss": 4.3104, "step": 7563 }, { "epoch": 0.7842964441477814, "grad_norm": 1.2421875, "learning_rate": 0.00016818279830590392, "loss": 4.3193, "step": 7564 }, { "epoch": 0.7844001322022695, "grad_norm": 1.2734375, "learning_rate": 0.00016817485199940648, "loss": 4.2839, "step": 7565 }, { "epoch": 0.7845038202567576, "grad_norm": 1.1953125, "learning_rate": 0.00016816690488851785, "loss": 4.2268, "step": 7566 }, { "epoch": 0.7846075083112456, "grad_norm": 1.265625, "learning_rate": 0.00016815895697333178, "loss": 4.2412, "step": 7567 }, { "epoch": 0.7847111963657337, "grad_norm": 1.171875, "learning_rate": 0.0001681510082539421, "loss": 4.2996, "step": 7568 }, { "epoch": 0.7848148844202217, "grad_norm": 1.328125, "learning_rate": 0.00016814305873044254, "loss": 4.2629, "step": 7569 }, { "epoch": 0.7849185724747099, "grad_norm": 1.2578125, "learning_rate": 0.0001681351084029269, "loss": 4.2047, "step": 7570 }, { "epoch": 0.7850222605291979, "grad_norm": 1.078125, "learning_rate": 0.00016812715727148906, "loss": 4.2992, "step": 7571 }, { "epoch": 0.785125948583686, "grad_norm": 1.09375, "learning_rate": 0.00016811920533622274, "loss": 4.31, "step": 7572 }, { "epoch": 0.785229636638174, "grad_norm": 1.21875, "learning_rate": 0.0001681112525972218, "loss": 4.2708, "step": 7573 }, { "epoch": 0.7853333246926621, "grad_norm": 1.09375, "learning_rate": 0.00016810329905458008, "loss": 4.2971, "step": 7574 }, { "epoch": 0.7854370127471502, "grad_norm": 1.3671875, "learning_rate": 0.0001680953447083914, "loss": 4.2911, "step": 7575 }, { "epoch": 0.7855407008016383, "grad_norm": 1.2890625, "learning_rate": 0.00016808738955874968, "loss": 4.3078, "step": 7576 }, { "epoch": 0.7856443888561263, "grad_norm": 1.1796875, "learning_rate": 0.0001680794336057487, "loss": 4.266, "step": 7577 }, { "epoch": 0.7857480769106144, "grad_norm": 1.1015625, "learning_rate": 0.00016807147684948237, "loss": 4.2838, "step": 7578 }, { "epoch": 0.7858517649651025, "grad_norm": 1.234375, "learning_rate": 0.00016806351929004457, "loss": 4.3373, "step": 7579 }, { "epoch": 0.7859554530195906, "grad_norm": 1.140625, "learning_rate": 0.00016805556092752921, "loss": 4.2938, "step": 7580 }, { "epoch": 0.7860591410740786, "grad_norm": 1.3046875, "learning_rate": 0.00016804760176203015, "loss": 4.2999, "step": 7581 }, { "epoch": 0.7861628291285667, "grad_norm": 1.1640625, "learning_rate": 0.00016803964179364132, "loss": 4.3438, "step": 7582 }, { "epoch": 0.7862665171830547, "grad_norm": 1.140625, "learning_rate": 0.0001680316810224566, "loss": 4.3247, "step": 7583 }, { "epoch": 0.7863702052375429, "grad_norm": 1.09375, "learning_rate": 0.00016802371944857, "loss": 4.3308, "step": 7584 }, { "epoch": 0.7864738932920309, "grad_norm": 1.140625, "learning_rate": 0.00016801575707207542, "loss": 4.2712, "step": 7585 }, { "epoch": 0.786577581346519, "grad_norm": 0.984375, "learning_rate": 0.00016800779389306678, "loss": 4.3176, "step": 7586 }, { "epoch": 0.786681269401007, "grad_norm": 1.2734375, "learning_rate": 0.00016799982991163808, "loss": 4.2888, "step": 7587 }, { "epoch": 0.7867849574554951, "grad_norm": 1.1171875, "learning_rate": 0.00016799186512788324, "loss": 4.2784, "step": 7588 }, { "epoch": 0.7868886455099832, "grad_norm": 1.3984375, "learning_rate": 0.0001679838995418963, "loss": 4.2813, "step": 7589 }, { "epoch": 0.7869923335644713, "grad_norm": 1.3125, "learning_rate": 0.0001679759331537712, "loss": 4.3295, "step": 7590 }, { "epoch": 0.7870960216189593, "grad_norm": 1.1953125, "learning_rate": 0.0001679679659636019, "loss": 4.2676, "step": 7591 }, { "epoch": 0.7871997096734474, "grad_norm": 1.1015625, "learning_rate": 0.00016795999797148244, "loss": 4.2144, "step": 7592 }, { "epoch": 0.7873033977279356, "grad_norm": 1.2890625, "learning_rate": 0.00016795202917750692, "loss": 4.2928, "step": 7593 }, { "epoch": 0.7874070857824236, "grad_norm": 1.1640625, "learning_rate": 0.0001679440595817692, "loss": 4.2746, "step": 7594 }, { "epoch": 0.7875107738369117, "grad_norm": 1.3984375, "learning_rate": 0.00016793608918436347, "loss": 4.308, "step": 7595 }, { "epoch": 0.7876144618913997, "grad_norm": 1.3203125, "learning_rate": 0.00016792811798538364, "loss": 4.2943, "step": 7596 }, { "epoch": 0.7877181499458878, "grad_norm": 1.1640625, "learning_rate": 0.00016792014598492386, "loss": 4.2543, "step": 7597 }, { "epoch": 0.7878218380003759, "grad_norm": 1.125, "learning_rate": 0.00016791217318307814, "loss": 4.2815, "step": 7598 }, { "epoch": 0.787925526054864, "grad_norm": 1.109375, "learning_rate": 0.00016790419957994054, "loss": 4.2256, "step": 7599 }, { "epoch": 0.788029214109352, "grad_norm": 1.0234375, "learning_rate": 0.0001678962251756052, "loss": 4.2607, "step": 7600 }, { "epoch": 0.7881329021638401, "grad_norm": 1.234375, "learning_rate": 0.00016788824997016616, "loss": 4.2988, "step": 7601 }, { "epoch": 0.7882365902183281, "grad_norm": 1.0546875, "learning_rate": 0.00016788027396371751, "loss": 4.2565, "step": 7602 }, { "epoch": 0.7883402782728163, "grad_norm": 1.421875, "learning_rate": 0.00016787229715635343, "loss": 4.3026, "step": 7603 }, { "epoch": 0.7884439663273043, "grad_norm": 1.3671875, "learning_rate": 0.00016786431954816792, "loss": 4.3119, "step": 7604 }, { "epoch": 0.7885476543817924, "grad_norm": 1.21875, "learning_rate": 0.00016785634113925522, "loss": 4.232, "step": 7605 }, { "epoch": 0.7886513424362804, "grad_norm": 1.21875, "learning_rate": 0.0001678483619297094, "loss": 4.3448, "step": 7606 }, { "epoch": 0.7887550304907686, "grad_norm": 1.109375, "learning_rate": 0.00016784038191962463, "loss": 4.272, "step": 7607 }, { "epoch": 0.7888587185452566, "grad_norm": 1.0, "learning_rate": 0.00016783240110909508, "loss": 4.3144, "step": 7608 }, { "epoch": 0.7889624065997447, "grad_norm": 1.34375, "learning_rate": 0.0001678244194982149, "loss": 4.2749, "step": 7609 }, { "epoch": 0.7890660946542327, "grad_norm": 1.1875, "learning_rate": 0.00016781643708707828, "loss": 4.2794, "step": 7610 }, { "epoch": 0.7891697827087208, "grad_norm": 1.34375, "learning_rate": 0.00016780845387577936, "loss": 4.295, "step": 7611 }, { "epoch": 0.7892734707632089, "grad_norm": 1.2109375, "learning_rate": 0.00016780046986441236, "loss": 4.3052, "step": 7612 }, { "epoch": 0.789377158817697, "grad_norm": 1.3125, "learning_rate": 0.0001677924850530715, "loss": 4.3186, "step": 7613 }, { "epoch": 0.789480846872185, "grad_norm": 1.25, "learning_rate": 0.00016778449944185092, "loss": 4.3061, "step": 7614 }, { "epoch": 0.7895845349266731, "grad_norm": 1.1640625, "learning_rate": 0.00016777651303084495, "loss": 4.2802, "step": 7615 }, { "epoch": 0.7896882229811611, "grad_norm": 1.0625, "learning_rate": 0.00016776852582014778, "loss": 4.2582, "step": 7616 }, { "epoch": 0.7897919110356493, "grad_norm": 1.2578125, "learning_rate": 0.0001677605378098536, "loss": 4.3021, "step": 7617 }, { "epoch": 0.7898955990901373, "grad_norm": 1.171875, "learning_rate": 0.00016775254900005675, "loss": 4.2635, "step": 7618 }, { "epoch": 0.7899992871446254, "grad_norm": 1.1875, "learning_rate": 0.00016774455939085137, "loss": 4.3136, "step": 7619 }, { "epoch": 0.7901029751991134, "grad_norm": 1.0703125, "learning_rate": 0.00016773656898233185, "loss": 4.2791, "step": 7620 }, { "epoch": 0.7902066632536016, "grad_norm": 1.1328125, "learning_rate": 0.0001677285777745924, "loss": 4.2588, "step": 7621 }, { "epoch": 0.7903103513080896, "grad_norm": 1.046875, "learning_rate": 0.00016772058576772735, "loss": 4.2855, "step": 7622 }, { "epoch": 0.7904140393625777, "grad_norm": 1.46875, "learning_rate": 0.00016771259296183093, "loss": 4.2701, "step": 7623 }, { "epoch": 0.7905177274170657, "grad_norm": 1.3359375, "learning_rate": 0.0001677045993569975, "loss": 4.2737, "step": 7624 }, { "epoch": 0.7906214154715538, "grad_norm": 1.1640625, "learning_rate": 0.00016769660495332138, "loss": 4.2514, "step": 7625 }, { "epoch": 0.7907251035260419, "grad_norm": 1.1171875, "learning_rate": 0.0001676886097508969, "loss": 4.3126, "step": 7626 }, { "epoch": 0.79082879158053, "grad_norm": 1.171875, "learning_rate": 0.00016768061374981832, "loss": 4.3186, "step": 7627 }, { "epoch": 0.790932479635018, "grad_norm": 1.0703125, "learning_rate": 0.00016767261695018007, "loss": 4.2789, "step": 7628 }, { "epoch": 0.7910361676895061, "grad_norm": 1.359375, "learning_rate": 0.00016766461935207647, "loss": 4.2943, "step": 7629 }, { "epoch": 0.7911398557439941, "grad_norm": 1.3203125, "learning_rate": 0.0001676566209556019, "loss": 4.3317, "step": 7630 }, { "epoch": 0.7912435437984823, "grad_norm": 1.2265625, "learning_rate": 0.0001676486217608507, "loss": 4.2931, "step": 7631 }, { "epoch": 0.7913472318529703, "grad_norm": 1.1171875, "learning_rate": 0.00016764062176791725, "loss": 4.2701, "step": 7632 }, { "epoch": 0.7914509199074584, "grad_norm": 1.1796875, "learning_rate": 0.000167632620976896, "loss": 4.2703, "step": 7633 }, { "epoch": 0.7915546079619464, "grad_norm": 1.1171875, "learning_rate": 0.00016762461938788132, "loss": 4.2488, "step": 7634 }, { "epoch": 0.7916582960164346, "grad_norm": 1.234375, "learning_rate": 0.0001676166170009676, "loss": 4.2902, "step": 7635 }, { "epoch": 0.7917619840709226, "grad_norm": 1.21875, "learning_rate": 0.00016760861381624926, "loss": 4.312, "step": 7636 }, { "epoch": 0.7918656721254107, "grad_norm": 1.3359375, "learning_rate": 0.00016760060983382077, "loss": 4.2857, "step": 7637 }, { "epoch": 0.7919693601798988, "grad_norm": 1.21875, "learning_rate": 0.00016759260505377652, "loss": 4.2868, "step": 7638 }, { "epoch": 0.7920730482343868, "grad_norm": 1.2265625, "learning_rate": 0.00016758459947621097, "loss": 4.2934, "step": 7639 }, { "epoch": 0.792176736288875, "grad_norm": 1.1328125, "learning_rate": 0.0001675765931012186, "loss": 4.3025, "step": 7640 }, { "epoch": 0.792280424343363, "grad_norm": 1.4453125, "learning_rate": 0.00016756858592889383, "loss": 4.3105, "step": 7641 }, { "epoch": 0.7923841123978511, "grad_norm": 1.2421875, "learning_rate": 0.00016756057795933122, "loss": 4.2634, "step": 7642 }, { "epoch": 0.7924878004523391, "grad_norm": 1.5, "learning_rate": 0.00016755256919262517, "loss": 4.2992, "step": 7643 }, { "epoch": 0.7925914885068273, "grad_norm": 1.3671875, "learning_rate": 0.00016754455962887023, "loss": 4.2771, "step": 7644 }, { "epoch": 0.7926951765613153, "grad_norm": 1.3984375, "learning_rate": 0.00016753654926816088, "loss": 4.3138, "step": 7645 }, { "epoch": 0.7927988646158034, "grad_norm": 1.3828125, "learning_rate": 0.00016752853811059163, "loss": 4.2902, "step": 7646 }, { "epoch": 0.7929025526702914, "grad_norm": 1.21875, "learning_rate": 0.00016752052615625704, "loss": 4.3219, "step": 7647 }, { "epoch": 0.7930062407247795, "grad_norm": 1.1953125, "learning_rate": 0.00016751251340525159, "loss": 4.3061, "step": 7648 }, { "epoch": 0.7931099287792676, "grad_norm": 1.2890625, "learning_rate": 0.00016750449985766984, "loss": 4.3073, "step": 7649 }, { "epoch": 0.7932136168337557, "grad_norm": 1.140625, "learning_rate": 0.00016749648551360634, "loss": 4.2367, "step": 7650 }, { "epoch": 0.7933173048882437, "grad_norm": 1.4609375, "learning_rate": 0.00016748847037315566, "loss": 4.284, "step": 7651 }, { "epoch": 0.7934209929427318, "grad_norm": 1.34375, "learning_rate": 0.0001674804544364124, "loss": 4.3115, "step": 7652 }, { "epoch": 0.7935246809972198, "grad_norm": 1.21875, "learning_rate": 0.0001674724377034711, "loss": 4.2805, "step": 7653 }, { "epoch": 0.793628369051708, "grad_norm": 1.203125, "learning_rate": 0.00016746442017442632, "loss": 4.3198, "step": 7654 }, { "epoch": 0.793732057106196, "grad_norm": 1.09375, "learning_rate": 0.00016745640184937272, "loss": 4.3037, "step": 7655 }, { "epoch": 0.7938357451606841, "grad_norm": 0.94921875, "learning_rate": 0.00016744838272840488, "loss": 4.2644, "step": 7656 }, { "epoch": 0.7939394332151721, "grad_norm": 1.171875, "learning_rate": 0.0001674403628116174, "loss": 4.2905, "step": 7657 }, { "epoch": 0.7940431212696603, "grad_norm": 0.953125, "learning_rate": 0.00016743234209910498, "loss": 4.3112, "step": 7658 }, { "epoch": 0.7941468093241483, "grad_norm": 1.1796875, "learning_rate": 0.00016742432059096214, "loss": 4.2794, "step": 7659 }, { "epoch": 0.7942504973786364, "grad_norm": 1.0703125, "learning_rate": 0.00016741629828728364, "loss": 4.2744, "step": 7660 }, { "epoch": 0.7943541854331244, "grad_norm": 1.5546875, "learning_rate": 0.00016740827518816405, "loss": 4.2451, "step": 7661 }, { "epoch": 0.7944578734876125, "grad_norm": 1.4765625, "learning_rate": 0.00016740025129369807, "loss": 4.3041, "step": 7662 }, { "epoch": 0.7945615615421006, "grad_norm": 1.1171875, "learning_rate": 0.00016739222660398038, "loss": 4.3188, "step": 7663 }, { "epoch": 0.7946652495965887, "grad_norm": 1.15625, "learning_rate": 0.00016738420111910566, "loss": 4.2473, "step": 7664 }, { "epoch": 0.7947689376510767, "grad_norm": 1.1484375, "learning_rate": 0.0001673761748391686, "loss": 4.2878, "step": 7665 }, { "epoch": 0.7948726257055648, "grad_norm": 0.98828125, "learning_rate": 0.00016736814776426387, "loss": 4.2988, "step": 7666 }, { "epoch": 0.7949763137600528, "grad_norm": 1.4921875, "learning_rate": 0.0001673601198944862, "loss": 4.2549, "step": 7667 }, { "epoch": 0.795080001814541, "grad_norm": 1.2890625, "learning_rate": 0.00016735209122993033, "loss": 4.3131, "step": 7668 }, { "epoch": 0.795183689869029, "grad_norm": 1.4140625, "learning_rate": 0.000167344061770691, "loss": 4.3129, "step": 7669 }, { "epoch": 0.7952873779235171, "grad_norm": 1.3671875, "learning_rate": 0.00016733603151686288, "loss": 4.2754, "step": 7670 }, { "epoch": 0.7953910659780051, "grad_norm": 1.0078125, "learning_rate": 0.00016732800046854082, "loss": 4.3063, "step": 7671 }, { "epoch": 0.7954947540324933, "grad_norm": 1.09375, "learning_rate": 0.0001673199686258195, "loss": 4.2615, "step": 7672 }, { "epoch": 0.7955984420869813, "grad_norm": 1.046875, "learning_rate": 0.0001673119359887937, "loss": 4.3246, "step": 7673 }, { "epoch": 0.7957021301414694, "grad_norm": 0.8828125, "learning_rate": 0.0001673039025575582, "loss": 4.2785, "step": 7674 }, { "epoch": 0.7958058181959574, "grad_norm": 1.2578125, "learning_rate": 0.00016729586833220782, "loss": 4.2913, "step": 7675 }, { "epoch": 0.7959095062504455, "grad_norm": 0.93359375, "learning_rate": 0.00016728783331283734, "loss": 4.3111, "step": 7676 }, { "epoch": 0.7960131943049336, "grad_norm": 1.390625, "learning_rate": 0.00016727979749954153, "loss": 4.3188, "step": 7677 }, { "epoch": 0.7961168823594217, "grad_norm": 1.2421875, "learning_rate": 0.00016727176089241521, "loss": 4.2945, "step": 7678 }, { "epoch": 0.7962205704139097, "grad_norm": 1.359375, "learning_rate": 0.00016726372349155325, "loss": 4.2384, "step": 7679 }, { "epoch": 0.7963242584683978, "grad_norm": 1.2578125, "learning_rate": 0.0001672556852970504, "loss": 4.2974, "step": 7680 }, { "epoch": 0.7964279465228858, "grad_norm": 1.3125, "learning_rate": 0.00016724764630900163, "loss": 4.2904, "step": 7681 }, { "epoch": 0.796531634577374, "grad_norm": 1.171875, "learning_rate": 0.0001672396065275017, "loss": 4.2817, "step": 7682 }, { "epoch": 0.7966353226318621, "grad_norm": 1.4765625, "learning_rate": 0.00016723156595264545, "loss": 4.2707, "step": 7683 }, { "epoch": 0.7967390106863501, "grad_norm": 1.421875, "learning_rate": 0.00016722352458452782, "loss": 4.3052, "step": 7684 }, { "epoch": 0.7968426987408382, "grad_norm": 1.296875, "learning_rate": 0.00016721548242324365, "loss": 4.3118, "step": 7685 }, { "epoch": 0.7969463867953263, "grad_norm": 1.21875, "learning_rate": 0.00016720743946888785, "loss": 4.2763, "step": 7686 }, { "epoch": 0.7970500748498144, "grad_norm": 1.1875, "learning_rate": 0.00016719939572155528, "loss": 4.2655, "step": 7687 }, { "epoch": 0.7971537629043024, "grad_norm": 1.0390625, "learning_rate": 0.00016719135118134092, "loss": 4.3199, "step": 7688 }, { "epoch": 0.7972574509587905, "grad_norm": 1.3203125, "learning_rate": 0.00016718330584833958, "loss": 4.3064, "step": 7689 }, { "epoch": 0.7973611390132785, "grad_norm": 1.2421875, "learning_rate": 0.00016717525972264626, "loss": 4.2913, "step": 7690 }, { "epoch": 0.7974648270677667, "grad_norm": 1.375, "learning_rate": 0.0001671672128043559, "loss": 4.3011, "step": 7691 }, { "epoch": 0.7975685151222547, "grad_norm": 1.2734375, "learning_rate": 0.00016715916509356344, "loss": 4.2835, "step": 7692 }, { "epoch": 0.7976722031767428, "grad_norm": 1.3046875, "learning_rate": 0.0001671511165903638, "loss": 4.272, "step": 7693 }, { "epoch": 0.7977758912312308, "grad_norm": 1.2734375, "learning_rate": 0.00016714306729485195, "loss": 4.2619, "step": 7694 }, { "epoch": 0.797879579285719, "grad_norm": 1.0703125, "learning_rate": 0.0001671350172071229, "loss": 4.2933, "step": 7695 }, { "epoch": 0.797983267340207, "grad_norm": 1.0859375, "learning_rate": 0.00016712696632727164, "loss": 4.3161, "step": 7696 }, { "epoch": 0.7980869553946951, "grad_norm": 1.1328125, "learning_rate": 0.0001671189146553931, "loss": 4.2422, "step": 7697 }, { "epoch": 0.7981906434491831, "grad_norm": 0.97265625, "learning_rate": 0.0001671108621915823, "loss": 4.2794, "step": 7698 }, { "epoch": 0.7982943315036712, "grad_norm": 1.34375, "learning_rate": 0.0001671028089359343, "loss": 4.2902, "step": 7699 }, { "epoch": 0.7983980195581593, "grad_norm": 1.2578125, "learning_rate": 0.00016709475488854407, "loss": 4.3202, "step": 7700 }, { "epoch": 0.7985017076126474, "grad_norm": 1.296875, "learning_rate": 0.00016708670004950666, "loss": 4.3195, "step": 7701 }, { "epoch": 0.7986053956671354, "grad_norm": 1.296875, "learning_rate": 0.0001670786444189171, "loss": 4.3011, "step": 7702 }, { "epoch": 0.7987090837216235, "grad_norm": 1.09375, "learning_rate": 0.00016707058799687044, "loss": 4.3031, "step": 7703 }, { "epoch": 0.7988127717761115, "grad_norm": 1.046875, "learning_rate": 0.00016706253078346175, "loss": 4.3169, "step": 7704 }, { "epoch": 0.7989164598305997, "grad_norm": 1.2734375, "learning_rate": 0.00016705447277878607, "loss": 4.2572, "step": 7705 }, { "epoch": 0.7990201478850877, "grad_norm": 1.140625, "learning_rate": 0.0001670464139829385, "loss": 4.294, "step": 7706 }, { "epoch": 0.7991238359395758, "grad_norm": 1.3671875, "learning_rate": 0.00016703835439601413, "loss": 4.3035, "step": 7707 }, { "epoch": 0.7992275239940638, "grad_norm": 1.2734375, "learning_rate": 0.00016703029401810802, "loss": 4.2962, "step": 7708 }, { "epoch": 0.799331212048552, "grad_norm": 1.171875, "learning_rate": 0.00016702223284931528, "loss": 4.3065, "step": 7709 }, { "epoch": 0.79943490010304, "grad_norm": 1.1328125, "learning_rate": 0.00016701417088973107, "loss": 4.3254, "step": 7710 }, { "epoch": 0.7995385881575281, "grad_norm": 1.09375, "learning_rate": 0.00016700610813945044, "loss": 4.3224, "step": 7711 }, { "epoch": 0.7996422762120161, "grad_norm": 1.109375, "learning_rate": 0.00016699804459856862, "loss": 4.3194, "step": 7712 }, { "epoch": 0.7997459642665042, "grad_norm": 1.0703125, "learning_rate": 0.00016698998026718064, "loss": 4.2604, "step": 7713 }, { "epoch": 0.7998496523209923, "grad_norm": 0.94921875, "learning_rate": 0.00016698191514538172, "loss": 4.3033, "step": 7714 }, { "epoch": 0.7999533403754804, "grad_norm": 1.1484375, "learning_rate": 0.00016697384923326704, "loss": 4.3104, "step": 7715 }, { "epoch": 0.8000570284299684, "grad_norm": 1.0390625, "learning_rate": 0.0001669657825309317, "loss": 4.315, "step": 7716 }, { "epoch": 0.8001607164844565, "grad_norm": 1.4375, "learning_rate": 0.00016695771503847092, "loss": 4.3228, "step": 7717 }, { "epoch": 0.8002644045389445, "grad_norm": 1.3359375, "learning_rate": 0.00016694964675597986, "loss": 4.2683, "step": 7718 }, { "epoch": 0.8003680925934327, "grad_norm": 1.2421875, "learning_rate": 0.00016694157768355376, "loss": 4.298, "step": 7719 }, { "epoch": 0.8004717806479207, "grad_norm": 1.1953125, "learning_rate": 0.00016693350782128778, "loss": 4.2719, "step": 7720 }, { "epoch": 0.8005754687024088, "grad_norm": 1.1875, "learning_rate": 0.00016692543716927718, "loss": 4.2916, "step": 7721 }, { "epoch": 0.8006791567568968, "grad_norm": 1.0859375, "learning_rate": 0.00016691736572761715, "loss": 4.3065, "step": 7722 }, { "epoch": 0.800782844811385, "grad_norm": 1.2578125, "learning_rate": 0.00016690929349640296, "loss": 4.3154, "step": 7723 }, { "epoch": 0.800886532865873, "grad_norm": 1.1015625, "learning_rate": 0.00016690122047572983, "loss": 4.3292, "step": 7724 }, { "epoch": 0.8009902209203611, "grad_norm": 1.3671875, "learning_rate": 0.000166893146665693, "loss": 4.3223, "step": 7725 }, { "epoch": 0.8010939089748491, "grad_norm": 1.3671875, "learning_rate": 0.00016688507206638777, "loss": 4.2767, "step": 7726 }, { "epoch": 0.8011975970293372, "grad_norm": 1.0234375, "learning_rate": 0.00016687699667790936, "loss": 4.3026, "step": 7727 }, { "epoch": 0.8013012850838254, "grad_norm": 1.0234375, "learning_rate": 0.0001668689205003531, "loss": 4.2776, "step": 7728 }, { "epoch": 0.8014049731383134, "grad_norm": 1.09375, "learning_rate": 0.00016686084353381426, "loss": 4.2744, "step": 7729 }, { "epoch": 0.8015086611928015, "grad_norm": 0.921875, "learning_rate": 0.00016685276577838815, "loss": 4.2787, "step": 7730 }, { "epoch": 0.8016123492472895, "grad_norm": 1.328125, "learning_rate": 0.00016684468723417005, "loss": 4.2959, "step": 7731 }, { "epoch": 0.8017160373017777, "grad_norm": 1.171875, "learning_rate": 0.00016683660790125533, "loss": 4.3154, "step": 7732 }, { "epoch": 0.8018197253562657, "grad_norm": 1.453125, "learning_rate": 0.0001668285277797393, "loss": 4.2811, "step": 7733 }, { "epoch": 0.8019234134107538, "grad_norm": 1.3046875, "learning_rate": 0.0001668204468697172, "loss": 4.2388, "step": 7734 }, { "epoch": 0.8020271014652418, "grad_norm": 1.0703125, "learning_rate": 0.0001668123651712845, "loss": 4.3022, "step": 7735 }, { "epoch": 0.8021307895197299, "grad_norm": 1.0390625, "learning_rate": 0.00016680428268453653, "loss": 4.2492, "step": 7736 }, { "epoch": 0.802234477574218, "grad_norm": 1.234375, "learning_rate": 0.00016679619940956864, "loss": 4.2857, "step": 7737 }, { "epoch": 0.8023381656287061, "grad_norm": 1.1015625, "learning_rate": 0.0001667881153464762, "loss": 4.3029, "step": 7738 }, { "epoch": 0.8024418536831941, "grad_norm": 1.3046875, "learning_rate": 0.00016678003049535459, "loss": 4.2936, "step": 7739 }, { "epoch": 0.8025455417376822, "grad_norm": 1.171875, "learning_rate": 0.0001667719448562992, "loss": 4.3072, "step": 7740 }, { "epoch": 0.8026492297921702, "grad_norm": 1.2578125, "learning_rate": 0.00016676385842940547, "loss": 4.2703, "step": 7741 }, { "epoch": 0.8027529178466584, "grad_norm": 1.171875, "learning_rate": 0.00016675577121476876, "loss": 4.2924, "step": 7742 }, { "epoch": 0.8028566059011464, "grad_norm": 1.21875, "learning_rate": 0.00016674768321248452, "loss": 4.3096, "step": 7743 }, { "epoch": 0.8029602939556345, "grad_norm": 1.1328125, "learning_rate": 0.0001667395944226482, "loss": 4.3014, "step": 7744 }, { "epoch": 0.8030639820101225, "grad_norm": 1.2578125, "learning_rate": 0.00016673150484535518, "loss": 4.2482, "step": 7745 }, { "epoch": 0.8031676700646107, "grad_norm": 1.265625, "learning_rate": 0.00016672341448070095, "loss": 4.2823, "step": 7746 }, { "epoch": 0.8032713581190987, "grad_norm": 1.171875, "learning_rate": 0.00016671532332878094, "loss": 4.3267, "step": 7747 }, { "epoch": 0.8033750461735868, "grad_norm": 1.15625, "learning_rate": 0.00016670723138969065, "loss": 4.3252, "step": 7748 }, { "epoch": 0.8034787342280748, "grad_norm": 1.25, "learning_rate": 0.00016669913866352556, "loss": 4.2556, "step": 7749 }, { "epoch": 0.8035824222825629, "grad_norm": 1.1953125, "learning_rate": 0.0001666910451503811, "loss": 4.2379, "step": 7750 }, { "epoch": 0.803686110337051, "grad_norm": 1.171875, "learning_rate": 0.00016668295085035286, "loss": 4.2545, "step": 7751 }, { "epoch": 0.8037897983915391, "grad_norm": 1.1015625, "learning_rate": 0.00016667485576353624, "loss": 4.3009, "step": 7752 }, { "epoch": 0.8038934864460271, "grad_norm": 1.1953125, "learning_rate": 0.0001666667598900268, "loss": 4.3247, "step": 7753 }, { "epoch": 0.8039971745005152, "grad_norm": 1.0234375, "learning_rate": 0.00016665866322992007, "loss": 4.2759, "step": 7754 }, { "epoch": 0.8041008625550032, "grad_norm": 1.265625, "learning_rate": 0.0001666505657833116, "loss": 4.2795, "step": 7755 }, { "epoch": 0.8042045506094914, "grad_norm": 1.1953125, "learning_rate": 0.0001666424675502969, "loss": 4.2908, "step": 7756 }, { "epoch": 0.8043082386639794, "grad_norm": 1.078125, "learning_rate": 0.0001666343685309715, "loss": 4.2934, "step": 7757 }, { "epoch": 0.8044119267184675, "grad_norm": 1.078125, "learning_rate": 0.000166626268725431, "loss": 4.2682, "step": 7758 }, { "epoch": 0.8045156147729555, "grad_norm": 0.97265625, "learning_rate": 0.00016661816813377095, "loss": 4.2481, "step": 7759 }, { "epoch": 0.8046193028274436, "grad_norm": 0.99609375, "learning_rate": 0.00016661006675608694, "loss": 4.2937, "step": 7760 }, { "epoch": 0.8047229908819317, "grad_norm": 1.2109375, "learning_rate": 0.00016660196459247458, "loss": 4.3113, "step": 7761 }, { "epoch": 0.8048266789364198, "grad_norm": 1.0703125, "learning_rate": 0.0001665938616430294, "loss": 4.3276, "step": 7762 }, { "epoch": 0.8049303669909078, "grad_norm": 1.2734375, "learning_rate": 0.00016658575790784704, "loss": 4.2966, "step": 7763 }, { "epoch": 0.8050340550453959, "grad_norm": 1.15625, "learning_rate": 0.00016657765338702317, "loss": 4.3491, "step": 7764 }, { "epoch": 0.805137743099884, "grad_norm": 1.265625, "learning_rate": 0.00016656954808065333, "loss": 4.3034, "step": 7765 }, { "epoch": 0.8052414311543721, "grad_norm": 1.125, "learning_rate": 0.0001665614419888332, "loss": 4.3113, "step": 7766 }, { "epoch": 0.8053451192088601, "grad_norm": 1.25, "learning_rate": 0.0001665533351116584, "loss": 4.3144, "step": 7767 }, { "epoch": 0.8054488072633482, "grad_norm": 1.1953125, "learning_rate": 0.00016654522744922461, "loss": 4.32, "step": 7768 }, { "epoch": 0.8055524953178362, "grad_norm": 1.03125, "learning_rate": 0.00016653711900162748, "loss": 4.293, "step": 7769 }, { "epoch": 0.8056561833723244, "grad_norm": 0.96875, "learning_rate": 0.00016652900976896263, "loss": 4.3441, "step": 7770 }, { "epoch": 0.8057598714268124, "grad_norm": 1.296875, "learning_rate": 0.00016652089975132585, "loss": 4.2961, "step": 7771 }, { "epoch": 0.8058635594813005, "grad_norm": 1.1484375, "learning_rate": 0.00016651278894881278, "loss": 4.2946, "step": 7772 }, { "epoch": 0.8059672475357886, "grad_norm": 1.3046875, "learning_rate": 0.00016650467736151905, "loss": 4.3085, "step": 7773 }, { "epoch": 0.8060709355902766, "grad_norm": 1.2734375, "learning_rate": 0.00016649656498954042, "loss": 4.2685, "step": 7774 }, { "epoch": 0.8061746236447648, "grad_norm": 1.21875, "learning_rate": 0.00016648845183297266, "loss": 4.2989, "step": 7775 }, { "epoch": 0.8062783116992528, "grad_norm": 1.09375, "learning_rate": 0.00016648033789191144, "loss": 4.3006, "step": 7776 }, { "epoch": 0.8063819997537409, "grad_norm": 1.265625, "learning_rate": 0.0001664722231664525, "loss": 4.2543, "step": 7777 }, { "epoch": 0.8064856878082289, "grad_norm": 1.125, "learning_rate": 0.0001664641076566916, "loss": 4.2665, "step": 7778 }, { "epoch": 0.8065893758627171, "grad_norm": 1.2421875, "learning_rate": 0.00016645599136272447, "loss": 4.3282, "step": 7779 }, { "epoch": 0.8066930639172051, "grad_norm": 1.1171875, "learning_rate": 0.0001664478742846469, "loss": 4.3053, "step": 7780 }, { "epoch": 0.8067967519716932, "grad_norm": 1.390625, "learning_rate": 0.00016643975642255466, "loss": 4.293, "step": 7781 }, { "epoch": 0.8069004400261812, "grad_norm": 1.2734375, "learning_rate": 0.0001664316377765435, "loss": 4.2947, "step": 7782 }, { "epoch": 0.8070041280806693, "grad_norm": 1.421875, "learning_rate": 0.00016642351834670924, "loss": 4.2965, "step": 7783 }, { "epoch": 0.8071078161351574, "grad_norm": 1.328125, "learning_rate": 0.00016641539813314768, "loss": 4.2896, "step": 7784 }, { "epoch": 0.8072115041896455, "grad_norm": 1.2421875, "learning_rate": 0.0001664072771359546, "loss": 4.2974, "step": 7785 }, { "epoch": 0.8073151922441335, "grad_norm": 1.140625, "learning_rate": 0.0001663991553552259, "loss": 4.2654, "step": 7786 }, { "epoch": 0.8074188802986216, "grad_norm": 1.234375, "learning_rate": 0.0001663910327910573, "loss": 4.2828, "step": 7787 }, { "epoch": 0.8075225683531096, "grad_norm": 1.1875, "learning_rate": 0.00016638290944354474, "loss": 4.2936, "step": 7788 }, { "epoch": 0.8076262564075978, "grad_norm": 1.3828125, "learning_rate": 0.000166374785312784, "loss": 4.3182, "step": 7789 }, { "epoch": 0.8077299444620858, "grad_norm": 1.296875, "learning_rate": 0.00016636666039887094, "loss": 4.2962, "step": 7790 }, { "epoch": 0.8078336325165739, "grad_norm": 1.28125, "learning_rate": 0.00016635853470190146, "loss": 4.316, "step": 7791 }, { "epoch": 0.8079373205710619, "grad_norm": 1.15625, "learning_rate": 0.00016635040822197142, "loss": 4.2575, "step": 7792 }, { "epoch": 0.8080410086255501, "grad_norm": 1.1953125, "learning_rate": 0.00016634228095917667, "loss": 4.2889, "step": 7793 }, { "epoch": 0.8081446966800381, "grad_norm": 1.1328125, "learning_rate": 0.00016633415291361314, "loss": 4.2599, "step": 7794 }, { "epoch": 0.8082483847345262, "grad_norm": 1.3515625, "learning_rate": 0.0001663260240853767, "loss": 4.2845, "step": 7795 }, { "epoch": 0.8083520727890142, "grad_norm": 1.2578125, "learning_rate": 0.00016631789447456337, "loss": 4.2388, "step": 7796 }, { "epoch": 0.8084557608435023, "grad_norm": 1.421875, "learning_rate": 0.00016630976408126891, "loss": 4.2798, "step": 7797 }, { "epoch": 0.8085594488979904, "grad_norm": 1.34375, "learning_rate": 0.00016630163290558937, "loss": 4.2705, "step": 7798 }, { "epoch": 0.8086631369524785, "grad_norm": 1.0390625, "learning_rate": 0.0001662935009476206, "loss": 4.2865, "step": 7799 }, { "epoch": 0.8087668250069665, "grad_norm": 1.0078125, "learning_rate": 0.0001662853682074586, "loss": 4.2664, "step": 7800 }, { "epoch": 0.8088705130614546, "grad_norm": 1.2734375, "learning_rate": 0.00016627723468519935, "loss": 4.2632, "step": 7801 }, { "epoch": 0.8089742011159426, "grad_norm": 1.09375, "learning_rate": 0.00016626910038093878, "loss": 4.296, "step": 7802 }, { "epoch": 0.8090778891704308, "grad_norm": 1.46875, "learning_rate": 0.00016626096529477288, "loss": 4.3453, "step": 7803 }, { "epoch": 0.8091815772249188, "grad_norm": 1.390625, "learning_rate": 0.0001662528294267976, "loss": 4.2582, "step": 7804 }, { "epoch": 0.8092852652794069, "grad_norm": 1.078125, "learning_rate": 0.000166244692777109, "loss": 4.2668, "step": 7805 }, { "epoch": 0.8093889533338949, "grad_norm": 1.078125, "learning_rate": 0.00016623655534580303, "loss": 4.2773, "step": 7806 }, { "epoch": 0.8094926413883831, "grad_norm": 1.1328125, "learning_rate": 0.0001662284171329757, "loss": 4.3475, "step": 7807 }, { "epoch": 0.8095963294428711, "grad_norm": 1.0546875, "learning_rate": 0.00016622027813872312, "loss": 4.3152, "step": 7808 }, { "epoch": 0.8097000174973592, "grad_norm": 1.3125, "learning_rate": 0.00016621213836314123, "loss": 4.2213, "step": 7809 }, { "epoch": 0.8098037055518472, "grad_norm": 1.2109375, "learning_rate": 0.00016620399780632608, "loss": 4.2859, "step": 7810 }, { "epoch": 0.8099073936063353, "grad_norm": 1.2265625, "learning_rate": 0.00016619585646837374, "loss": 4.2515, "step": 7811 }, { "epoch": 0.8100110816608234, "grad_norm": 1.1953125, "learning_rate": 0.00016618771434938025, "loss": 4.2569, "step": 7812 }, { "epoch": 0.8101147697153115, "grad_norm": 1.0859375, "learning_rate": 0.00016617957144944172, "loss": 4.2405, "step": 7813 }, { "epoch": 0.8102184577697995, "grad_norm": 1.03125, "learning_rate": 0.0001661714277686542, "loss": 4.2934, "step": 7814 }, { "epoch": 0.8103221458242876, "grad_norm": 1.171875, "learning_rate": 0.00016616328330711376, "loss": 4.3069, "step": 7815 }, { "epoch": 0.8104258338787758, "grad_norm": 1.078125, "learning_rate": 0.00016615513806491654, "loss": 4.2408, "step": 7816 }, { "epoch": 0.8105295219332638, "grad_norm": 1.21875, "learning_rate": 0.0001661469920421586, "loss": 4.334, "step": 7817 }, { "epoch": 0.8106332099877519, "grad_norm": 1.1015625, "learning_rate": 0.00016613884523893608, "loss": 4.2613, "step": 7818 }, { "epoch": 0.8107368980422399, "grad_norm": 1.234375, "learning_rate": 0.0001661306976553451, "loss": 4.3069, "step": 7819 }, { "epoch": 0.810840586096728, "grad_norm": 1.171875, "learning_rate": 0.00016612254929148179, "loss": 4.3041, "step": 7820 }, { "epoch": 0.8109442741512161, "grad_norm": 1.171875, "learning_rate": 0.0001661144001474423, "loss": 4.3017, "step": 7821 }, { "epoch": 0.8110479622057042, "grad_norm": 1.125, "learning_rate": 0.00016610625022332276, "loss": 4.2651, "step": 7822 }, { "epoch": 0.8111516502601922, "grad_norm": 1.1328125, "learning_rate": 0.00016609809951921936, "loss": 4.3191, "step": 7823 }, { "epoch": 0.8112553383146803, "grad_norm": 1.0546875, "learning_rate": 0.00016608994803522824, "loss": 4.2134, "step": 7824 }, { "epoch": 0.8113590263691683, "grad_norm": 1.375, "learning_rate": 0.0001660817957714456, "loss": 4.3125, "step": 7825 }, { "epoch": 0.8114627144236565, "grad_norm": 1.25, "learning_rate": 0.00016607364272796762, "loss": 4.2822, "step": 7826 }, { "epoch": 0.8115664024781445, "grad_norm": 1.2109375, "learning_rate": 0.0001660654889048905, "loss": 4.2994, "step": 7827 }, { "epoch": 0.8116700905326326, "grad_norm": 1.1796875, "learning_rate": 0.00016605733430231044, "loss": 4.2685, "step": 7828 }, { "epoch": 0.8117737785871206, "grad_norm": 1.1171875, "learning_rate": 0.00016604917892032366, "loss": 4.2875, "step": 7829 }, { "epoch": 0.8118774666416088, "grad_norm": 1.078125, "learning_rate": 0.0001660410227590264, "loss": 4.2983, "step": 7830 }, { "epoch": 0.8119811546960968, "grad_norm": 1.1796875, "learning_rate": 0.00016603286581851488, "loss": 4.2994, "step": 7831 }, { "epoch": 0.8120848427505849, "grad_norm": 1.0546875, "learning_rate": 0.0001660247080988853, "loss": 4.2526, "step": 7832 }, { "epoch": 0.8121885308050729, "grad_norm": 1.21875, "learning_rate": 0.00016601654960023398, "loss": 4.2908, "step": 7833 }, { "epoch": 0.812292218859561, "grad_norm": 1.109375, "learning_rate": 0.0001660083903226572, "loss": 4.2445, "step": 7834 }, { "epoch": 0.8123959069140491, "grad_norm": 1.25, "learning_rate": 0.00016600023026625116, "loss": 4.2771, "step": 7835 }, { "epoch": 0.8124995949685372, "grad_norm": 1.1484375, "learning_rate": 0.00016599206943111215, "loss": 4.2637, "step": 7836 }, { "epoch": 0.8126032830230252, "grad_norm": 1.1328125, "learning_rate": 0.0001659839078173365, "loss": 4.2666, "step": 7837 }, { "epoch": 0.8127069710775133, "grad_norm": 1.0546875, "learning_rate": 0.00016597574542502047, "loss": 4.2428, "step": 7838 }, { "epoch": 0.8128106591320013, "grad_norm": 1.2578125, "learning_rate": 0.0001659675822542604, "loss": 4.28, "step": 7839 }, { "epoch": 0.8129143471864895, "grad_norm": 1.15625, "learning_rate": 0.00016595941830515256, "loss": 4.2637, "step": 7840 }, { "epoch": 0.8130180352409775, "grad_norm": 1.375, "learning_rate": 0.00016595125357779332, "loss": 4.272, "step": 7841 }, { "epoch": 0.8131217232954656, "grad_norm": 1.2109375, "learning_rate": 0.00016594308807227904, "loss": 4.265, "step": 7842 }, { "epoch": 0.8132254113499536, "grad_norm": 1.09375, "learning_rate": 0.00016593492178870598, "loss": 4.2538, "step": 7843 }, { "epoch": 0.8133290994044418, "grad_norm": 1.0390625, "learning_rate": 0.00016592675472717054, "loss": 4.2471, "step": 7844 }, { "epoch": 0.8134327874589298, "grad_norm": 1.171875, "learning_rate": 0.0001659185868877691, "loss": 4.2553, "step": 7845 }, { "epoch": 0.8135364755134179, "grad_norm": 1.1015625, "learning_rate": 0.00016591041827059802, "loss": 4.3116, "step": 7846 }, { "epoch": 0.8136401635679059, "grad_norm": 1.2890625, "learning_rate": 0.00016590224887575366, "loss": 4.2769, "step": 7847 }, { "epoch": 0.813743851622394, "grad_norm": 1.171875, "learning_rate": 0.00016589407870333243, "loss": 4.3086, "step": 7848 }, { "epoch": 0.8138475396768821, "grad_norm": 1.1796875, "learning_rate": 0.00016588590775343072, "loss": 4.2312, "step": 7849 }, { "epoch": 0.8139512277313702, "grad_norm": 1.25, "learning_rate": 0.00016587773602614494, "loss": 4.3059, "step": 7850 }, { "epoch": 0.8140549157858582, "grad_norm": 1.0703125, "learning_rate": 0.0001658695635215715, "loss": 4.2705, "step": 7851 }, { "epoch": 0.8141586038403463, "grad_norm": 1.0703125, "learning_rate": 0.00016586139023980686, "loss": 4.2674, "step": 7852 }, { "epoch": 0.8142622918948343, "grad_norm": 1.1953125, "learning_rate": 0.00016585321618094742, "loss": 4.3153, "step": 7853 }, { "epoch": 0.8143659799493225, "grad_norm": 1.140625, "learning_rate": 0.00016584504134508965, "loss": 4.3233, "step": 7854 }, { "epoch": 0.8144696680038105, "grad_norm": 1.2890625, "learning_rate": 0.00016583686573233, "loss": 4.304, "step": 7855 }, { "epoch": 0.8145733560582986, "grad_norm": 1.2109375, "learning_rate": 0.0001658286893427649, "loss": 4.2762, "step": 7856 }, { "epoch": 0.8146770441127866, "grad_norm": 1.09375, "learning_rate": 0.00016582051217649087, "loss": 4.3054, "step": 7857 }, { "epoch": 0.8147807321672748, "grad_norm": 1.0703125, "learning_rate": 0.00016581233423360433, "loss": 4.2704, "step": 7858 }, { "epoch": 0.8148844202217628, "grad_norm": 1.2109375, "learning_rate": 0.00016580415551420189, "loss": 4.3082, "step": 7859 }, { "epoch": 0.8149881082762509, "grad_norm": 1.125, "learning_rate": 0.00016579597601837993, "loss": 4.2897, "step": 7860 }, { "epoch": 0.815091796330739, "grad_norm": 1.234375, "learning_rate": 0.000165787795746235, "loss": 4.3225, "step": 7861 }, { "epoch": 0.815195484385227, "grad_norm": 1.203125, "learning_rate": 0.00016577961469786364, "loss": 4.3007, "step": 7862 }, { "epoch": 0.8152991724397152, "grad_norm": 1.109375, "learning_rate": 0.00016577143287336234, "loss": 4.3154, "step": 7863 }, { "epoch": 0.8154028604942032, "grad_norm": 1.109375, "learning_rate": 0.00016576325027282764, "loss": 4.2933, "step": 7864 }, { "epoch": 0.8155065485486913, "grad_norm": 1.109375, "learning_rate": 0.00016575506689635612, "loss": 4.2656, "step": 7865 }, { "epoch": 0.8156102366031793, "grad_norm": 1.0703125, "learning_rate": 0.00016574688274404432, "loss": 4.239, "step": 7866 }, { "epoch": 0.8157139246576675, "grad_norm": 1.3125, "learning_rate": 0.00016573869781598882, "loss": 4.2593, "step": 7867 }, { "epoch": 0.8158176127121555, "grad_norm": 1.2109375, "learning_rate": 0.00016573051211228614, "loss": 4.2934, "step": 7868 }, { "epoch": 0.8159213007666436, "grad_norm": 1.265625, "learning_rate": 0.00016572232563303292, "loss": 4.2886, "step": 7869 }, { "epoch": 0.8160249888211316, "grad_norm": 1.203125, "learning_rate": 0.0001657141383783257, "loss": 4.2693, "step": 7870 }, { "epoch": 0.8161286768756197, "grad_norm": 1.078125, "learning_rate": 0.00016570595034826115, "loss": 4.3098, "step": 7871 }, { "epoch": 0.8162323649301078, "grad_norm": 1.0625, "learning_rate": 0.0001656977615429358, "loss": 4.2957, "step": 7872 }, { "epoch": 0.8163360529845959, "grad_norm": 1.1953125, "learning_rate": 0.00016568957196244632, "loss": 4.2775, "step": 7873 }, { "epoch": 0.8164397410390839, "grad_norm": 1.046875, "learning_rate": 0.00016568138160688936, "loss": 4.2855, "step": 7874 }, { "epoch": 0.816543429093572, "grad_norm": 1.3125, "learning_rate": 0.0001656731904763615, "loss": 4.2679, "step": 7875 }, { "epoch": 0.81664711714806, "grad_norm": 1.1640625, "learning_rate": 0.00016566499857095942, "loss": 4.3152, "step": 7876 }, { "epoch": 0.8167508052025482, "grad_norm": 1.3984375, "learning_rate": 0.00016565680589077976, "loss": 4.2852, "step": 7877 }, { "epoch": 0.8168544932570362, "grad_norm": 1.25, "learning_rate": 0.00016564861243591918, "loss": 4.2883, "step": 7878 }, { "epoch": 0.8169581813115243, "grad_norm": 1.171875, "learning_rate": 0.00016564041820647438, "loss": 4.2654, "step": 7879 }, { "epoch": 0.8170618693660123, "grad_norm": 1.1484375, "learning_rate": 0.00016563222320254206, "loss": 4.3057, "step": 7880 }, { "epoch": 0.8171655574205005, "grad_norm": 1.2578125, "learning_rate": 0.00016562402742421883, "loss": 4.2914, "step": 7881 }, { "epoch": 0.8172692454749885, "grad_norm": 1.1484375, "learning_rate": 0.00016561583087160145, "loss": 4.2848, "step": 7882 }, { "epoch": 0.8173729335294766, "grad_norm": 1.3359375, "learning_rate": 0.00016560763354478666, "loss": 4.2953, "step": 7883 }, { "epoch": 0.8174766215839646, "grad_norm": 1.25, "learning_rate": 0.00016559943544387114, "loss": 4.2588, "step": 7884 }, { "epoch": 0.8175803096384527, "grad_norm": 1.2421875, "learning_rate": 0.00016559123656895158, "loss": 4.2727, "step": 7885 }, { "epoch": 0.8176839976929408, "grad_norm": 1.2265625, "learning_rate": 0.00016558303692012482, "loss": 4.2575, "step": 7886 }, { "epoch": 0.8177876857474289, "grad_norm": 1.1171875, "learning_rate": 0.0001655748364974875, "loss": 4.293, "step": 7887 }, { "epoch": 0.8178913738019169, "grad_norm": 1.0234375, "learning_rate": 0.00016556663530113648, "loss": 4.267, "step": 7888 }, { "epoch": 0.817995061856405, "grad_norm": 1.359375, "learning_rate": 0.0001655584333311684, "loss": 4.3019, "step": 7889 }, { "epoch": 0.818098749910893, "grad_norm": 1.2578125, "learning_rate": 0.00016555023058768015, "loss": 4.2912, "step": 7890 }, { "epoch": 0.8182024379653812, "grad_norm": 1.1796875, "learning_rate": 0.00016554202707076847, "loss": 4.3066, "step": 7891 }, { "epoch": 0.8183061260198692, "grad_norm": 1.21875, "learning_rate": 0.00016553382278053014, "loss": 4.2105, "step": 7892 }, { "epoch": 0.8184098140743573, "grad_norm": 0.96875, "learning_rate": 0.00016552561771706196, "loss": 4.2934, "step": 7893 }, { "epoch": 0.8185135021288453, "grad_norm": 0.984375, "learning_rate": 0.00016551741188046076, "loss": 4.2425, "step": 7894 }, { "epoch": 0.8186171901833335, "grad_norm": 1.125, "learning_rate": 0.00016550920527082336, "loss": 4.2912, "step": 7895 }, { "epoch": 0.8187208782378215, "grad_norm": 0.8828125, "learning_rate": 0.0001655009978882466, "loss": 4.2229, "step": 7896 }, { "epoch": 0.8188245662923096, "grad_norm": 1.15625, "learning_rate": 0.0001654927897328273, "loss": 4.3013, "step": 7897 }, { "epoch": 0.8189282543467976, "grad_norm": 0.95703125, "learning_rate": 0.0001654845808046623, "loss": 4.2932, "step": 7898 }, { "epoch": 0.8190319424012857, "grad_norm": 1.328125, "learning_rate": 0.00016547637110384846, "loss": 4.3038, "step": 7899 }, { "epoch": 0.8191356304557738, "grad_norm": 1.1484375, "learning_rate": 0.00016546816063048268, "loss": 4.2764, "step": 7900 }, { "epoch": 0.8192393185102619, "grad_norm": 1.2265625, "learning_rate": 0.00016545994938466175, "loss": 4.2641, "step": 7901 }, { "epoch": 0.8193430065647499, "grad_norm": 1.203125, "learning_rate": 0.00016545173736648265, "loss": 4.2777, "step": 7902 }, { "epoch": 0.819446694619238, "grad_norm": 1.015625, "learning_rate": 0.00016544352457604225, "loss": 4.3514, "step": 7903 }, { "epoch": 0.819550382673726, "grad_norm": 0.99609375, "learning_rate": 0.0001654353110134374, "loss": 4.266, "step": 7904 }, { "epoch": 0.8196540707282142, "grad_norm": 1.1015625, "learning_rate": 0.0001654270966787651, "loss": 4.2791, "step": 7905 }, { "epoch": 0.8197577587827023, "grad_norm": 0.96484375, "learning_rate": 0.00016541888157212218, "loss": 4.2612, "step": 7906 }, { "epoch": 0.8198614468371903, "grad_norm": 1.3515625, "learning_rate": 0.0001654106656936056, "loss": 4.3454, "step": 7907 }, { "epoch": 0.8199651348916784, "grad_norm": 1.25, "learning_rate": 0.0001654024490433123, "loss": 4.2956, "step": 7908 }, { "epoch": 0.8200688229461665, "grad_norm": 1.2109375, "learning_rate": 0.00016539423162133926, "loss": 4.2367, "step": 7909 }, { "epoch": 0.8201725110006546, "grad_norm": 1.1328125, "learning_rate": 0.00016538601342778344, "loss": 4.2403, "step": 7910 }, { "epoch": 0.8202761990551426, "grad_norm": 0.98046875, "learning_rate": 0.00016537779446274174, "loss": 4.3065, "step": 7911 }, { "epoch": 0.8203798871096307, "grad_norm": 0.91015625, "learning_rate": 0.00016536957472631115, "loss": 4.2632, "step": 7912 }, { "epoch": 0.8204835751641187, "grad_norm": 0.96484375, "learning_rate": 0.00016536135421858875, "loss": 4.2896, "step": 7913 }, { "epoch": 0.8205872632186069, "grad_norm": 0.84375, "learning_rate": 0.0001653531329396714, "loss": 4.2405, "step": 7914 }, { "epoch": 0.8206909512730949, "grad_norm": 0.94140625, "learning_rate": 0.00016534491088965615, "loss": 4.275, "step": 7915 }, { "epoch": 0.820794639327583, "grad_norm": 0.8515625, "learning_rate": 0.00016533668806864007, "loss": 4.2577, "step": 7916 }, { "epoch": 0.820898327382071, "grad_norm": 0.9296875, "learning_rate": 0.00016532846447672012, "loss": 4.2587, "step": 7917 }, { "epoch": 0.8210020154365592, "grad_norm": 0.796875, "learning_rate": 0.00016532024011399335, "loss": 4.3051, "step": 7918 }, { "epoch": 0.8211057034910472, "grad_norm": 0.94140625, "learning_rate": 0.00016531201498055676, "loss": 4.3112, "step": 7919 }, { "epoch": 0.8212093915455353, "grad_norm": 0.79296875, "learning_rate": 0.00016530378907650745, "loss": 4.28, "step": 7920 }, { "epoch": 0.8213130796000233, "grad_norm": 0.87890625, "learning_rate": 0.0001652955624019425, "loss": 4.2912, "step": 7921 }, { "epoch": 0.8214167676545114, "grad_norm": 0.796875, "learning_rate": 0.00016528733495695886, "loss": 4.2139, "step": 7922 }, { "epoch": 0.8215204557089995, "grad_norm": 0.86328125, "learning_rate": 0.00016527910674165374, "loss": 4.289, "step": 7923 }, { "epoch": 0.8216241437634876, "grad_norm": 0.75390625, "learning_rate": 0.00016527087775612413, "loss": 4.2447, "step": 7924 }, { "epoch": 0.8217278318179756, "grad_norm": 0.9375, "learning_rate": 0.00016526264800046717, "loss": 4.305, "step": 7925 }, { "epoch": 0.8218315198724637, "grad_norm": 0.71484375, "learning_rate": 0.00016525441747477994, "loss": 4.3099, "step": 7926 }, { "epoch": 0.8219352079269517, "grad_norm": 0.87890625, "learning_rate": 0.00016524618617915957, "loss": 4.2869, "step": 7927 }, { "epoch": 0.8220388959814399, "grad_norm": 0.79296875, "learning_rate": 0.00016523795411370316, "loss": 4.2937, "step": 7928 }, { "epoch": 0.8221425840359279, "grad_norm": 0.86328125, "learning_rate": 0.00016522972127850784, "loss": 4.2678, "step": 7929 }, { "epoch": 0.822246272090416, "grad_norm": 0.76953125, "learning_rate": 0.0001652214876736708, "loss": 4.2553, "step": 7930 }, { "epoch": 0.822349960144904, "grad_norm": 0.85546875, "learning_rate": 0.00016521325329928911, "loss": 4.3384, "step": 7931 }, { "epoch": 0.8224536481993922, "grad_norm": 0.8515625, "learning_rate": 0.00016520501815546, "loss": 4.3044, "step": 7932 }, { "epoch": 0.8225573362538802, "grad_norm": 0.796875, "learning_rate": 0.00016519678224228055, "loss": 4.2717, "step": 7933 }, { "epoch": 0.8226610243083683, "grad_norm": 0.84765625, "learning_rate": 0.00016518854555984803, "loss": 4.2566, "step": 7934 }, { "epoch": 0.8227647123628563, "grad_norm": 0.7421875, "learning_rate": 0.00016518030810825957, "loss": 4.3309, "step": 7935 }, { "epoch": 0.8228684004173444, "grad_norm": 0.8125, "learning_rate": 0.00016517206988761238, "loss": 4.3035, "step": 7936 }, { "epoch": 0.8229720884718325, "grad_norm": 0.6796875, "learning_rate": 0.00016516383089800363, "loss": 4.2951, "step": 7937 }, { "epoch": 0.8230757765263206, "grad_norm": 0.79296875, "learning_rate": 0.00016515559113953056, "loss": 4.2759, "step": 7938 }, { "epoch": 0.8231794645808086, "grad_norm": 0.7109375, "learning_rate": 0.0001651473506122904, "loss": 4.2353, "step": 7939 }, { "epoch": 0.8232831526352967, "grad_norm": 0.7109375, "learning_rate": 0.00016513910931638038, "loss": 4.3049, "step": 7940 }, { "epoch": 0.8233868406897847, "grad_norm": 0.77734375, "learning_rate": 0.0001651308672518977, "loss": 4.2876, "step": 7941 }, { "epoch": 0.8234905287442729, "grad_norm": 0.734375, "learning_rate": 0.00016512262441893967, "loss": 4.2671, "step": 7942 }, { "epoch": 0.8235942167987609, "grad_norm": 0.73828125, "learning_rate": 0.00016511438081760348, "loss": 4.2758, "step": 7943 }, { "epoch": 0.823697904853249, "grad_norm": 0.734375, "learning_rate": 0.00016510613644798642, "loss": 4.3016, "step": 7944 }, { "epoch": 0.823801592907737, "grad_norm": 0.82421875, "learning_rate": 0.00016509789131018577, "loss": 4.2983, "step": 7945 }, { "epoch": 0.8239052809622252, "grad_norm": 0.7890625, "learning_rate": 0.00016508964540429882, "loss": 4.2565, "step": 7946 }, { "epoch": 0.8240089690167132, "grad_norm": 0.7578125, "learning_rate": 0.00016508139873042287, "loss": 4.3073, "step": 7947 }, { "epoch": 0.8241126570712013, "grad_norm": 0.75390625, "learning_rate": 0.00016507315128865517, "loss": 4.3019, "step": 7948 }, { "epoch": 0.8242163451256893, "grad_norm": 0.75, "learning_rate": 0.0001650649030790931, "loss": 4.2778, "step": 7949 }, { "epoch": 0.8243200331801774, "grad_norm": 0.77734375, "learning_rate": 0.00016505665410183396, "loss": 4.2676, "step": 7950 }, { "epoch": 0.8244237212346656, "grad_norm": 0.75, "learning_rate": 0.00016504840435697504, "loss": 4.2584, "step": 7951 }, { "epoch": 0.8245274092891536, "grad_norm": 0.74609375, "learning_rate": 0.0001650401538446137, "loss": 4.2756, "step": 7952 }, { "epoch": 0.8246310973436417, "grad_norm": 0.71875, "learning_rate": 0.00016503190256484732, "loss": 4.3243, "step": 7953 }, { "epoch": 0.8247347853981297, "grad_norm": 0.73046875, "learning_rate": 0.00016502365051777326, "loss": 4.3209, "step": 7954 }, { "epoch": 0.8248384734526178, "grad_norm": 0.72265625, "learning_rate": 0.0001650153977034888, "loss": 4.3155, "step": 7955 }, { "epoch": 0.8249421615071059, "grad_norm": 0.703125, "learning_rate": 0.0001650071441220914, "loss": 4.3073, "step": 7956 }, { "epoch": 0.825045849561594, "grad_norm": 0.67578125, "learning_rate": 0.00016499888977367842, "loss": 4.2505, "step": 7957 }, { "epoch": 0.825149537616082, "grad_norm": 0.75, "learning_rate": 0.00016499063465834723, "loss": 4.2585, "step": 7958 }, { "epoch": 0.8252532256705701, "grad_norm": 0.60546875, "learning_rate": 0.00016498237877619526, "loss": 4.2695, "step": 7959 }, { "epoch": 0.8253569137250581, "grad_norm": 0.765625, "learning_rate": 0.00016497412212731992, "loss": 4.2895, "step": 7960 }, { "epoch": 0.8254606017795463, "grad_norm": 0.6640625, "learning_rate": 0.00016496586471181863, "loss": 4.3072, "step": 7961 }, { "epoch": 0.8255642898340343, "grad_norm": 0.62109375, "learning_rate": 0.00016495760652978877, "loss": 4.2812, "step": 7962 }, { "epoch": 0.8256679778885224, "grad_norm": 0.66796875, "learning_rate": 0.00016494934758132782, "loss": 4.2581, "step": 7963 }, { "epoch": 0.8257716659430104, "grad_norm": 0.7421875, "learning_rate": 0.00016494108786653327, "loss": 4.2804, "step": 7964 }, { "epoch": 0.8258753539974986, "grad_norm": 0.6875, "learning_rate": 0.00016493282738550246, "loss": 4.2712, "step": 7965 }, { "epoch": 0.8259790420519866, "grad_norm": 0.75390625, "learning_rate": 0.00016492456613833299, "loss": 4.3037, "step": 7966 }, { "epoch": 0.8260827301064747, "grad_norm": 0.69921875, "learning_rate": 0.00016491630412512223, "loss": 4.311, "step": 7967 }, { "epoch": 0.8261864181609627, "grad_norm": 0.65625, "learning_rate": 0.0001649080413459677, "loss": 4.2494, "step": 7968 }, { "epoch": 0.8262901062154508, "grad_norm": 0.65234375, "learning_rate": 0.0001648997778009669, "loss": 4.2881, "step": 7969 }, { "epoch": 0.8263937942699389, "grad_norm": 0.70703125, "learning_rate": 0.00016489151349021732, "loss": 4.3125, "step": 7970 }, { "epoch": 0.826497482324427, "grad_norm": 0.578125, "learning_rate": 0.00016488324841381648, "loss": 4.2862, "step": 7971 }, { "epoch": 0.826601170378915, "grad_norm": 0.70703125, "learning_rate": 0.0001648749825718619, "loss": 4.286, "step": 7972 }, { "epoch": 0.8267048584334031, "grad_norm": 0.625, "learning_rate": 0.00016486671596445109, "loss": 4.2535, "step": 7973 }, { "epoch": 0.8268085464878911, "grad_norm": 0.63671875, "learning_rate": 0.0001648584485916816, "loss": 4.2822, "step": 7974 }, { "epoch": 0.8269122345423793, "grad_norm": 0.7109375, "learning_rate": 0.000164850180453651, "loss": 4.2299, "step": 7975 }, { "epoch": 0.8270159225968673, "grad_norm": 0.59375, "learning_rate": 0.0001648419115504568, "loss": 4.3058, "step": 7976 }, { "epoch": 0.8271196106513554, "grad_norm": 0.69921875, "learning_rate": 0.00016483364188219656, "loss": 4.3092, "step": 7977 }, { "epoch": 0.8272232987058434, "grad_norm": 0.60546875, "learning_rate": 0.0001648253714489679, "loss": 4.3129, "step": 7978 }, { "epoch": 0.8273269867603316, "grad_norm": 0.6484375, "learning_rate": 0.0001648171002508684, "loss": 4.3436, "step": 7979 }, { "epoch": 0.8274306748148196, "grad_norm": 0.6953125, "learning_rate": 0.0001648088282879956, "loss": 4.2833, "step": 7980 }, { "epoch": 0.8275343628693077, "grad_norm": 0.625, "learning_rate": 0.00016480055556044714, "loss": 4.2834, "step": 7981 }, { "epoch": 0.8276380509237957, "grad_norm": 0.71484375, "learning_rate": 0.00016479228206832064, "loss": 4.2804, "step": 7982 }, { "epoch": 0.8277417389782838, "grad_norm": 0.65234375, "learning_rate": 0.00016478400781171366, "loss": 4.2894, "step": 7983 }, { "epoch": 0.8278454270327719, "grad_norm": 0.6796875, "learning_rate": 0.00016477573279072391, "loss": 4.2667, "step": 7984 }, { "epoch": 0.82794911508726, "grad_norm": 0.6640625, "learning_rate": 0.00016476745700544896, "loss": 4.3018, "step": 7985 }, { "epoch": 0.828052803141748, "grad_norm": 0.62890625, "learning_rate": 0.00016475918045598647, "loss": 4.2352, "step": 7986 }, { "epoch": 0.8281564911962361, "grad_norm": 0.6953125, "learning_rate": 0.00016475090314243412, "loss": 4.2554, "step": 7987 }, { "epoch": 0.8282601792507241, "grad_norm": 0.64453125, "learning_rate": 0.00016474262506488958, "loss": 4.3044, "step": 7988 }, { "epoch": 0.8283638673052123, "grad_norm": 0.640625, "learning_rate": 0.00016473434622345047, "loss": 4.2943, "step": 7989 }, { "epoch": 0.8284675553597003, "grad_norm": 0.64453125, "learning_rate": 0.0001647260666182145, "loss": 4.3148, "step": 7990 }, { "epoch": 0.8285712434141884, "grad_norm": 0.59375, "learning_rate": 0.00016471778624927938, "loss": 4.2165, "step": 7991 }, { "epoch": 0.8286749314686764, "grad_norm": 0.7109375, "learning_rate": 0.00016470950511674278, "loss": 4.2778, "step": 7992 }, { "epoch": 0.8287786195231646, "grad_norm": 0.640625, "learning_rate": 0.00016470122322070245, "loss": 4.274, "step": 7993 }, { "epoch": 0.8288823075776526, "grad_norm": 0.68359375, "learning_rate": 0.00016469294056125602, "loss": 4.2821, "step": 7994 }, { "epoch": 0.8289859956321407, "grad_norm": 0.62890625, "learning_rate": 0.0001646846571385013, "loss": 4.2289, "step": 7995 }, { "epoch": 0.8290896836866288, "grad_norm": 0.72265625, "learning_rate": 0.00016467637295253603, "loss": 4.3043, "step": 7996 }, { "epoch": 0.8291933717411168, "grad_norm": 0.62109375, "learning_rate": 0.0001646680880034579, "loss": 4.2625, "step": 7997 }, { "epoch": 0.829297059795605, "grad_norm": 0.6875, "learning_rate": 0.00016465980229136471, "loss": 4.274, "step": 7998 }, { "epoch": 0.829400747850093, "grad_norm": 0.7265625, "learning_rate": 0.00016465151581635415, "loss": 4.2753, "step": 7999 }, { "epoch": 0.8295044359045811, "grad_norm": 0.7421875, "learning_rate": 0.0001646432285785241, "loss": 4.2849, "step": 8000 }, { "epoch": 0.8296081239590691, "grad_norm": 0.73828125, "learning_rate": 0.00016463494057797226, "loss": 4.2604, "step": 8001 }, { "epoch": 0.8297118120135573, "grad_norm": 0.73828125, "learning_rate": 0.00016462665181479644, "loss": 4.2643, "step": 8002 }, { "epoch": 0.8298155000680453, "grad_norm": 0.77734375, "learning_rate": 0.00016461836228909445, "loss": 4.3256, "step": 8003 }, { "epoch": 0.8299191881225334, "grad_norm": 0.70703125, "learning_rate": 0.00016461007200096407, "loss": 4.309, "step": 8004 }, { "epoch": 0.8300228761770214, "grad_norm": 0.69921875, "learning_rate": 0.00016460178095050316, "loss": 4.2878, "step": 8005 }, { "epoch": 0.8301265642315095, "grad_norm": 0.76171875, "learning_rate": 0.0001645934891378095, "loss": 4.3001, "step": 8006 }, { "epoch": 0.8302302522859976, "grad_norm": 0.6640625, "learning_rate": 0.00016458519656298095, "loss": 4.3005, "step": 8007 }, { "epoch": 0.8303339403404857, "grad_norm": 0.72265625, "learning_rate": 0.00016457690322611537, "loss": 4.2724, "step": 8008 }, { "epoch": 0.8304376283949737, "grad_norm": 0.69921875, "learning_rate": 0.0001645686091273106, "loss": 4.2699, "step": 8009 }, { "epoch": 0.8305413164494618, "grad_norm": 0.75, "learning_rate": 0.00016456031426666446, "loss": 4.3013, "step": 8010 }, { "epoch": 0.8306450045039498, "grad_norm": 0.63671875, "learning_rate": 0.00016455201864427486, "loss": 4.2032, "step": 8011 }, { "epoch": 0.830748692558438, "grad_norm": 0.734375, "learning_rate": 0.0001645437222602397, "loss": 4.276, "step": 8012 }, { "epoch": 0.830852380612926, "grad_norm": 0.59375, "learning_rate": 0.00016453542511465682, "loss": 4.2788, "step": 8013 }, { "epoch": 0.8309560686674141, "grad_norm": 0.68359375, "learning_rate": 0.00016452712720762416, "loss": 4.2797, "step": 8014 }, { "epoch": 0.8310597567219021, "grad_norm": 0.65625, "learning_rate": 0.00016451882853923958, "loss": 4.2979, "step": 8015 }, { "epoch": 0.8311634447763903, "grad_norm": 0.609375, "learning_rate": 0.00016451052910960106, "loss": 4.3467, "step": 8016 }, { "epoch": 0.8312671328308783, "grad_norm": 0.66796875, "learning_rate": 0.00016450222891880646, "loss": 4.2835, "step": 8017 }, { "epoch": 0.8313708208853664, "grad_norm": 0.6015625, "learning_rate": 0.00016449392796695373, "loss": 4.284, "step": 8018 }, { "epoch": 0.8314745089398544, "grad_norm": 0.671875, "learning_rate": 0.00016448562625414085, "loss": 4.32, "step": 8019 }, { "epoch": 0.8315781969943425, "grad_norm": 0.5703125, "learning_rate": 0.0001644773237804657, "loss": 4.2855, "step": 8020 }, { "epoch": 0.8316818850488306, "grad_norm": 0.63671875, "learning_rate": 0.00016446902054602635, "loss": 4.294, "step": 8021 }, { "epoch": 0.8317855731033187, "grad_norm": 0.54296875, "learning_rate": 0.00016446071655092068, "loss": 4.2968, "step": 8022 }, { "epoch": 0.8318892611578067, "grad_norm": 0.6640625, "learning_rate": 0.00016445241179524668, "loss": 4.2859, "step": 8023 }, { "epoch": 0.8319929492122948, "grad_norm": 0.6328125, "learning_rate": 0.00016444410627910235, "loss": 4.2405, "step": 8024 }, { "epoch": 0.8320966372667828, "grad_norm": 0.62890625, "learning_rate": 0.0001644358000025857, "loss": 4.2623, "step": 8025 }, { "epoch": 0.832200325321271, "grad_norm": 0.6328125, "learning_rate": 0.0001644274929657947, "loss": 4.2707, "step": 8026 }, { "epoch": 0.832304013375759, "grad_norm": 0.62890625, "learning_rate": 0.0001644191851688274, "loss": 4.2848, "step": 8027 }, { "epoch": 0.8324077014302471, "grad_norm": 0.65625, "learning_rate": 0.00016441087661178181, "loss": 4.3119, "step": 8028 }, { "epoch": 0.8325113894847351, "grad_norm": 0.64453125, "learning_rate": 0.00016440256729475597, "loss": 4.3303, "step": 8029 }, { "epoch": 0.8326150775392233, "grad_norm": 0.671875, "learning_rate": 0.0001643942572178479, "loss": 4.3078, "step": 8030 }, { "epoch": 0.8327187655937113, "grad_norm": 0.66015625, "learning_rate": 0.00016438594638115567, "loss": 4.3261, "step": 8031 }, { "epoch": 0.8328224536481994, "grad_norm": 0.62109375, "learning_rate": 0.00016437763478477732, "loss": 4.2575, "step": 8032 }, { "epoch": 0.8329261417026874, "grad_norm": 0.72265625, "learning_rate": 0.00016436932242881093, "loss": 4.2534, "step": 8033 }, { "epoch": 0.8330298297571755, "grad_norm": 0.60546875, "learning_rate": 0.00016436100931335458, "loss": 4.2907, "step": 8034 }, { "epoch": 0.8331335178116636, "grad_norm": 0.671875, "learning_rate": 0.00016435269543850636, "loss": 4.2756, "step": 8035 }, { "epoch": 0.8332372058661517, "grad_norm": 0.6875, "learning_rate": 0.00016434438080436436, "loss": 4.267, "step": 8036 }, { "epoch": 0.8333408939206397, "grad_norm": 0.73828125, "learning_rate": 0.00016433606541102667, "loss": 4.2986, "step": 8037 }, { "epoch": 0.8334445819751278, "grad_norm": 0.6328125, "learning_rate": 0.00016432774925859142, "loss": 4.2979, "step": 8038 }, { "epoch": 0.8335482700296158, "grad_norm": 0.71875, "learning_rate": 0.00016431943234715673, "loss": 4.2951, "step": 8039 }, { "epoch": 0.833651958084104, "grad_norm": 0.67578125, "learning_rate": 0.0001643111146768207, "loss": 4.2659, "step": 8040 }, { "epoch": 0.8337556461385921, "grad_norm": 0.66796875, "learning_rate": 0.00016430279624768152, "loss": 4.2604, "step": 8041 }, { "epoch": 0.8338593341930801, "grad_norm": 0.6015625, "learning_rate": 0.00016429447705983732, "loss": 4.3355, "step": 8042 }, { "epoch": 0.8339630222475682, "grad_norm": 0.70703125, "learning_rate": 0.00016428615711338624, "loss": 4.2892, "step": 8043 }, { "epoch": 0.8340667103020563, "grad_norm": 0.6328125, "learning_rate": 0.00016427783640842646, "loss": 4.282, "step": 8044 }, { "epoch": 0.8341703983565444, "grad_norm": 0.7265625, "learning_rate": 0.00016426951494505617, "loss": 4.3052, "step": 8045 }, { "epoch": 0.8342740864110324, "grad_norm": 0.64453125, "learning_rate": 0.00016426119272337352, "loss": 4.2935, "step": 8046 }, { "epoch": 0.8343777744655205, "grad_norm": 0.73046875, "learning_rate": 0.00016425286974347674, "loss": 4.2614, "step": 8047 }, { "epoch": 0.8344814625200085, "grad_norm": 0.6171875, "learning_rate": 0.000164244546005464, "loss": 4.2344, "step": 8048 }, { "epoch": 0.8345851505744967, "grad_norm": 0.7578125, "learning_rate": 0.00016423622150943355, "loss": 4.2243, "step": 8049 }, { "epoch": 0.8346888386289847, "grad_norm": 0.65234375, "learning_rate": 0.00016422789625548356, "loss": 4.2501, "step": 8050 }, { "epoch": 0.8347925266834728, "grad_norm": 0.7734375, "learning_rate": 0.0001642195702437123, "loss": 4.2824, "step": 8051 }, { "epoch": 0.8348962147379608, "grad_norm": 0.66796875, "learning_rate": 0.000164211243474218, "loss": 4.2817, "step": 8052 }, { "epoch": 0.834999902792449, "grad_norm": 0.80859375, "learning_rate": 0.00016420291594709889, "loss": 4.3298, "step": 8053 }, { "epoch": 0.835103590846937, "grad_norm": 0.6796875, "learning_rate": 0.00016419458766245323, "loss": 4.283, "step": 8054 }, { "epoch": 0.8352072789014251, "grad_norm": 0.86328125, "learning_rate": 0.00016418625862037932, "loss": 4.334, "step": 8055 }, { "epoch": 0.8353109669559131, "grad_norm": 0.734375, "learning_rate": 0.00016417792882097537, "loss": 4.2788, "step": 8056 }, { "epoch": 0.8354146550104012, "grad_norm": 0.85546875, "learning_rate": 0.00016416959826433974, "loss": 4.3003, "step": 8057 }, { "epoch": 0.8355183430648893, "grad_norm": 0.7265625, "learning_rate": 0.00016416126695057063, "loss": 4.269, "step": 8058 }, { "epoch": 0.8356220311193774, "grad_norm": 0.875, "learning_rate": 0.00016415293487976644, "loss": 4.2525, "step": 8059 }, { "epoch": 0.8357257191738654, "grad_norm": 0.703125, "learning_rate": 0.00016414460205202539, "loss": 4.2904, "step": 8060 }, { "epoch": 0.8358294072283535, "grad_norm": 0.92578125, "learning_rate": 0.00016413626846744584, "loss": 4.2732, "step": 8061 }, { "epoch": 0.8359330952828415, "grad_norm": 0.74609375, "learning_rate": 0.00016412793412612614, "loss": 4.2605, "step": 8062 }, { "epoch": 0.8360367833373297, "grad_norm": 0.94921875, "learning_rate": 0.00016411959902816462, "loss": 4.2769, "step": 8063 }, { "epoch": 0.8361404713918177, "grad_norm": 0.7421875, "learning_rate": 0.00016411126317365958, "loss": 4.2547, "step": 8064 }, { "epoch": 0.8362441594463058, "grad_norm": 0.8671875, "learning_rate": 0.00016410292656270943, "loss": 4.2837, "step": 8065 }, { "epoch": 0.8363478475007938, "grad_norm": 0.82421875, "learning_rate": 0.00016409458919541248, "loss": 4.2996, "step": 8066 }, { "epoch": 0.836451535555282, "grad_norm": 0.8671875, "learning_rate": 0.00016408625107186713, "loss": 4.2966, "step": 8067 }, { "epoch": 0.83655522360977, "grad_norm": 0.84765625, "learning_rate": 0.00016407791219217178, "loss": 4.2681, "step": 8068 }, { "epoch": 0.8366589116642581, "grad_norm": 0.73828125, "learning_rate": 0.0001640695725564248, "loss": 4.2706, "step": 8069 }, { "epoch": 0.8367625997187461, "grad_norm": 0.7890625, "learning_rate": 0.00016406123216472452, "loss": 4.2618, "step": 8070 }, { "epoch": 0.8368662877732342, "grad_norm": 0.765625, "learning_rate": 0.00016405289101716953, "loss": 4.3088, "step": 8071 }, { "epoch": 0.8369699758277223, "grad_norm": 0.75, "learning_rate": 0.00016404454911385805, "loss": 4.2826, "step": 8072 }, { "epoch": 0.8370736638822104, "grad_norm": 0.81640625, "learning_rate": 0.00016403620645488858, "loss": 4.2745, "step": 8073 }, { "epoch": 0.8371773519366984, "grad_norm": 0.71875, "learning_rate": 0.00016402786304035963, "loss": 4.3094, "step": 8074 }, { "epoch": 0.8372810399911865, "grad_norm": 0.73828125, "learning_rate": 0.00016401951887036952, "loss": 4.2542, "step": 8075 }, { "epoch": 0.8373847280456745, "grad_norm": 0.68359375, "learning_rate": 0.0001640111739450168, "loss": 4.3313, "step": 8076 }, { "epoch": 0.8374884161001627, "grad_norm": 0.69921875, "learning_rate": 0.00016400282826439986, "loss": 4.31, "step": 8077 }, { "epoch": 0.8375921041546507, "grad_norm": 0.8125, "learning_rate": 0.0001639944818286172, "loss": 4.2806, "step": 8078 }, { "epoch": 0.8376957922091388, "grad_norm": 0.7109375, "learning_rate": 0.0001639861346377673, "loss": 4.2812, "step": 8079 }, { "epoch": 0.8377994802636268, "grad_norm": 0.90625, "learning_rate": 0.0001639777866919487, "loss": 4.2328, "step": 8080 }, { "epoch": 0.837903168318115, "grad_norm": 0.6875, "learning_rate": 0.0001639694379912598, "loss": 4.2796, "step": 8081 }, { "epoch": 0.838006856372603, "grad_norm": 0.85546875, "learning_rate": 0.00016396108853579912, "loss": 4.2733, "step": 8082 }, { "epoch": 0.8381105444270911, "grad_norm": 0.69140625, "learning_rate": 0.00016395273832566523, "loss": 4.292, "step": 8083 }, { "epoch": 0.8382142324815792, "grad_norm": 0.90234375, "learning_rate": 0.00016394438736095667, "loss": 4.304, "step": 8084 }, { "epoch": 0.8383179205360672, "grad_norm": 0.6875, "learning_rate": 0.0001639360356417719, "loss": 4.2959, "step": 8085 }, { "epoch": 0.8384216085905554, "grad_norm": 0.84375, "learning_rate": 0.00016392768316820947, "loss": 4.2942, "step": 8086 }, { "epoch": 0.8385252966450434, "grad_norm": 0.671875, "learning_rate": 0.000163919329940368, "loss": 4.2869, "step": 8087 }, { "epoch": 0.8386289846995315, "grad_norm": 0.859375, "learning_rate": 0.00016391097595834598, "loss": 4.2924, "step": 8088 }, { "epoch": 0.8387326727540195, "grad_norm": 0.66015625, "learning_rate": 0.00016390262122224195, "loss": 4.3011, "step": 8089 }, { "epoch": 0.8388363608085077, "grad_norm": 0.90625, "learning_rate": 0.00016389426573215458, "loss": 4.2705, "step": 8090 }, { "epoch": 0.8389400488629957, "grad_norm": 0.67578125, "learning_rate": 0.00016388590948818245, "loss": 4.2856, "step": 8091 }, { "epoch": 0.8390437369174838, "grad_norm": 0.83984375, "learning_rate": 0.00016387755249042406, "loss": 4.2641, "step": 8092 }, { "epoch": 0.8391474249719718, "grad_norm": 0.6796875, "learning_rate": 0.00016386919473897812, "loss": 4.3175, "step": 8093 }, { "epoch": 0.8392511130264599, "grad_norm": 0.7890625, "learning_rate": 0.00016386083623394314, "loss": 4.2739, "step": 8094 }, { "epoch": 0.839354801080948, "grad_norm": 0.734375, "learning_rate": 0.00016385247697541785, "loss": 4.2844, "step": 8095 }, { "epoch": 0.8394584891354361, "grad_norm": 0.7890625, "learning_rate": 0.00016384411696350083, "loss": 4.2292, "step": 8096 }, { "epoch": 0.8395621771899241, "grad_norm": 0.75, "learning_rate": 0.00016383575619829065, "loss": 4.2973, "step": 8097 }, { "epoch": 0.8396658652444122, "grad_norm": 0.74609375, "learning_rate": 0.0001638273946798861, "loss": 4.2837, "step": 8098 }, { "epoch": 0.8397695532989002, "grad_norm": 0.7734375, "learning_rate": 0.0001638190324083857, "loss": 4.2605, "step": 8099 }, { "epoch": 0.8398732413533884, "grad_norm": 0.75390625, "learning_rate": 0.0001638106693838882, "loss": 4.2699, "step": 8100 }, { "epoch": 0.8399769294078764, "grad_norm": 0.75390625, "learning_rate": 0.00016380230560649226, "loss": 4.2393, "step": 8101 }, { "epoch": 0.8400806174623645, "grad_norm": 0.71875, "learning_rate": 0.00016379394107629656, "loss": 4.314, "step": 8102 }, { "epoch": 0.8401843055168525, "grad_norm": 0.73828125, "learning_rate": 0.00016378557579339978, "loss": 4.2962, "step": 8103 }, { "epoch": 0.8402879935713407, "grad_norm": 0.71875, "learning_rate": 0.00016377720975790062, "loss": 4.301, "step": 8104 }, { "epoch": 0.8403916816258287, "grad_norm": 0.75390625, "learning_rate": 0.00016376884296989783, "loss": 4.2674, "step": 8105 }, { "epoch": 0.8404953696803168, "grad_norm": 0.69140625, "learning_rate": 0.00016376047542949007, "loss": 4.3346, "step": 8106 }, { "epoch": 0.8405990577348048, "grad_norm": 0.74609375, "learning_rate": 0.00016375210713677612, "loss": 4.28, "step": 8107 }, { "epoch": 0.8407027457892929, "grad_norm": 0.703125, "learning_rate": 0.0001637437380918547, "loss": 4.3053, "step": 8108 }, { "epoch": 0.840806433843781, "grad_norm": 0.69140625, "learning_rate": 0.0001637353682948245, "loss": 4.3224, "step": 8109 }, { "epoch": 0.8409101218982691, "grad_norm": 0.76171875, "learning_rate": 0.00016372699774578435, "loss": 4.3216, "step": 8110 }, { "epoch": 0.8410138099527571, "grad_norm": 0.65234375, "learning_rate": 0.000163718626444833, "loss": 4.2668, "step": 8111 }, { "epoch": 0.8411174980072452, "grad_norm": 0.76953125, "learning_rate": 0.00016371025439206922, "loss": 4.2696, "step": 8112 }, { "epoch": 0.8412211860617332, "grad_norm": 0.6484375, "learning_rate": 0.00016370188158759176, "loss": 4.2908, "step": 8113 }, { "epoch": 0.8413248741162214, "grad_norm": 0.78515625, "learning_rate": 0.00016369350803149946, "loss": 4.3051, "step": 8114 }, { "epoch": 0.8414285621707094, "grad_norm": 0.6875, "learning_rate": 0.00016368513372389107, "loss": 4.2693, "step": 8115 }, { "epoch": 0.8415322502251975, "grad_norm": 0.6640625, "learning_rate": 0.0001636767586648654, "loss": 4.3012, "step": 8116 }, { "epoch": 0.8416359382796855, "grad_norm": 0.59375, "learning_rate": 0.00016366838285452134, "loss": 4.2654, "step": 8117 }, { "epoch": 0.8417396263341737, "grad_norm": 0.66796875, "learning_rate": 0.00016366000629295763, "loss": 4.2762, "step": 8118 }, { "epoch": 0.8418433143886617, "grad_norm": 0.59375, "learning_rate": 0.00016365162898027315, "loss": 4.2977, "step": 8119 }, { "epoch": 0.8419470024431498, "grad_norm": 0.69140625, "learning_rate": 0.0001636432509165667, "loss": 4.2502, "step": 8120 }, { "epoch": 0.8420506904976378, "grad_norm": 0.59375, "learning_rate": 0.00016363487210193723, "loss": 4.3008, "step": 8121 }, { "epoch": 0.8421543785521259, "grad_norm": 0.6484375, "learning_rate": 0.0001636264925364835, "loss": 4.2472, "step": 8122 }, { "epoch": 0.842258066606614, "grad_norm": 0.62109375, "learning_rate": 0.0001636181122203044, "loss": 4.2992, "step": 8123 }, { "epoch": 0.8423617546611021, "grad_norm": 0.6484375, "learning_rate": 0.0001636097311534988, "loss": 4.2512, "step": 8124 }, { "epoch": 0.8424654427155901, "grad_norm": 0.59765625, "learning_rate": 0.00016360134933616566, "loss": 4.288, "step": 8125 }, { "epoch": 0.8425691307700782, "grad_norm": 0.65234375, "learning_rate": 0.00016359296676840378, "loss": 4.3224, "step": 8126 }, { "epoch": 0.8426728188245662, "grad_norm": 0.63671875, "learning_rate": 0.00016358458345031216, "loss": 4.2729, "step": 8127 }, { "epoch": 0.8427765068790544, "grad_norm": 0.65625, "learning_rate": 0.00016357619938198962, "loss": 4.3161, "step": 8128 }, { "epoch": 0.8428801949335425, "grad_norm": 0.64453125, "learning_rate": 0.00016356781456353518, "loss": 4.3149, "step": 8129 }, { "epoch": 0.8429838829880305, "grad_norm": 0.73828125, "learning_rate": 0.0001635594289950477, "loss": 4.2801, "step": 8130 }, { "epoch": 0.8430875710425186, "grad_norm": 0.68359375, "learning_rate": 0.00016355104267662613, "loss": 4.2477, "step": 8131 }, { "epoch": 0.8431912590970067, "grad_norm": 0.6484375, "learning_rate": 0.00016354265560836946, "loss": 4.2534, "step": 8132 }, { "epoch": 0.8432949471514948, "grad_norm": 0.72265625, "learning_rate": 0.0001635342677903766, "loss": 4.2756, "step": 8133 }, { "epoch": 0.8433986352059828, "grad_norm": 0.66015625, "learning_rate": 0.00016352587922274653, "loss": 4.2894, "step": 8134 }, { "epoch": 0.8435023232604709, "grad_norm": 0.71484375, "learning_rate": 0.00016351748990557825, "loss": 4.2701, "step": 8135 }, { "epoch": 0.8436060113149589, "grad_norm": 0.77734375, "learning_rate": 0.00016350909983897075, "loss": 4.2833, "step": 8136 }, { "epoch": 0.8437096993694471, "grad_norm": 0.64453125, "learning_rate": 0.00016350070902302298, "loss": 4.301, "step": 8137 }, { "epoch": 0.8438133874239351, "grad_norm": 0.78515625, "learning_rate": 0.00016349231745783397, "loss": 4.3139, "step": 8138 }, { "epoch": 0.8439170754784232, "grad_norm": 0.6875, "learning_rate": 0.00016348392514350273, "loss": 4.3215, "step": 8139 }, { "epoch": 0.8440207635329112, "grad_norm": 0.72265625, "learning_rate": 0.00016347553208012828, "loss": 4.2747, "step": 8140 }, { "epoch": 0.8441244515873993, "grad_norm": 0.74609375, "learning_rate": 0.00016346713826780965, "loss": 4.277, "step": 8141 }, { "epoch": 0.8442281396418874, "grad_norm": 0.734375, "learning_rate": 0.00016345874370664583, "loss": 4.2962, "step": 8142 }, { "epoch": 0.8443318276963755, "grad_norm": 0.71484375, "learning_rate": 0.00016345034839673598, "loss": 4.2753, "step": 8143 }, { "epoch": 0.8444355157508635, "grad_norm": 0.70703125, "learning_rate": 0.00016344195233817908, "loss": 4.2507, "step": 8144 }, { "epoch": 0.8445392038053516, "grad_norm": 0.80078125, "learning_rate": 0.00016343355553107415, "loss": 4.2846, "step": 8145 }, { "epoch": 0.8446428918598397, "grad_norm": 0.69921875, "learning_rate": 0.0001634251579755204, "loss": 4.2818, "step": 8146 }, { "epoch": 0.8447465799143278, "grad_norm": 0.8359375, "learning_rate": 0.00016341675967161676, "loss": 4.3059, "step": 8147 }, { "epoch": 0.8448502679688158, "grad_norm": 0.7109375, "learning_rate": 0.00016340836061946244, "loss": 4.2929, "step": 8148 }, { "epoch": 0.8449539560233039, "grad_norm": 0.84375, "learning_rate": 0.00016339996081915644, "loss": 4.3118, "step": 8149 }, { "epoch": 0.8450576440777919, "grad_norm": 0.76953125, "learning_rate": 0.00016339156027079797, "loss": 4.2649, "step": 8150 }, { "epoch": 0.8451613321322801, "grad_norm": 0.77734375, "learning_rate": 0.00016338315897448605, "loss": 4.2986, "step": 8151 }, { "epoch": 0.8452650201867681, "grad_norm": 0.7734375, "learning_rate": 0.00016337475693031984, "loss": 4.3008, "step": 8152 }, { "epoch": 0.8453687082412562, "grad_norm": 0.76953125, "learning_rate": 0.00016336635413839853, "loss": 4.2551, "step": 8153 }, { "epoch": 0.8454723962957442, "grad_norm": 0.78125, "learning_rate": 0.00016335795059882125, "loss": 4.3054, "step": 8154 }, { "epoch": 0.8455760843502323, "grad_norm": 0.76171875, "learning_rate": 0.00016334954631168708, "loss": 4.2829, "step": 8155 }, { "epoch": 0.8456797724047204, "grad_norm": 0.7734375, "learning_rate": 0.00016334114127709523, "loss": 4.2798, "step": 8156 }, { "epoch": 0.8457834604592085, "grad_norm": 0.85546875, "learning_rate": 0.00016333273549514488, "loss": 4.3232, "step": 8157 }, { "epoch": 0.8458871485136965, "grad_norm": 0.828125, "learning_rate": 0.0001633243289659352, "loss": 4.2421, "step": 8158 }, { "epoch": 0.8459908365681846, "grad_norm": 0.796875, "learning_rate": 0.00016331592168956533, "loss": 4.2662, "step": 8159 }, { "epoch": 0.8460945246226727, "grad_norm": 0.77734375, "learning_rate": 0.00016330751366613454, "loss": 4.2964, "step": 8160 }, { "epoch": 0.8461982126771608, "grad_norm": 0.765625, "learning_rate": 0.00016329910489574202, "loss": 4.3097, "step": 8161 }, { "epoch": 0.8463019007316488, "grad_norm": 0.82421875, "learning_rate": 0.00016329069537848696, "loss": 4.249, "step": 8162 }, { "epoch": 0.8464055887861369, "grad_norm": 0.73828125, "learning_rate": 0.0001632822851144686, "loss": 4.308, "step": 8163 }, { "epoch": 0.8465092768406249, "grad_norm": 0.82421875, "learning_rate": 0.00016327387410378617, "loss": 4.2608, "step": 8164 }, { "epoch": 0.8466129648951131, "grad_norm": 0.73828125, "learning_rate": 0.00016326546234653893, "loss": 4.3032, "step": 8165 }, { "epoch": 0.8467166529496011, "grad_norm": 0.76953125, "learning_rate": 0.0001632570498428261, "loss": 4.2588, "step": 8166 }, { "epoch": 0.8468203410040892, "grad_norm": 0.66015625, "learning_rate": 0.00016324863659274692, "loss": 4.2746, "step": 8167 }, { "epoch": 0.8469240290585772, "grad_norm": 0.734375, "learning_rate": 0.0001632402225964007, "loss": 4.2657, "step": 8168 }, { "epoch": 0.8470277171130653, "grad_norm": 0.73828125, "learning_rate": 0.0001632318078538867, "loss": 4.2658, "step": 8169 }, { "epoch": 0.8471314051675534, "grad_norm": 0.75, "learning_rate": 0.0001632233923653042, "loss": 4.2676, "step": 8170 }, { "epoch": 0.8472350932220415, "grad_norm": 0.7734375, "learning_rate": 0.00016321497613075252, "loss": 4.2574, "step": 8171 }, { "epoch": 0.8473387812765295, "grad_norm": 0.8359375, "learning_rate": 0.0001632065591503309, "loss": 4.2547, "step": 8172 }, { "epoch": 0.8474424693310176, "grad_norm": 0.69921875, "learning_rate": 0.00016319814142413874, "loss": 4.2806, "step": 8173 }, { "epoch": 0.8475461573855058, "grad_norm": 0.81640625, "learning_rate": 0.00016318972295227527, "loss": 4.2752, "step": 8174 }, { "epoch": 0.8476498454399938, "grad_norm": 0.66015625, "learning_rate": 0.00016318130373483994, "loss": 4.2638, "step": 8175 }, { "epoch": 0.8477535334944819, "grad_norm": 0.8046875, "learning_rate": 0.00016317288377193197, "loss": 4.2541, "step": 8176 }, { "epoch": 0.8478572215489699, "grad_norm": 0.72265625, "learning_rate": 0.00016316446306365072, "loss": 4.3027, "step": 8177 }, { "epoch": 0.847960909603458, "grad_norm": 0.83984375, "learning_rate": 0.0001631560416100956, "loss": 4.2608, "step": 8178 }, { "epoch": 0.8480645976579461, "grad_norm": 0.7734375, "learning_rate": 0.00016314761941136594, "loss": 4.2515, "step": 8179 }, { "epoch": 0.8481682857124342, "grad_norm": 0.79296875, "learning_rate": 0.00016313919646756113, "loss": 4.2605, "step": 8180 }, { "epoch": 0.8482719737669222, "grad_norm": 0.7265625, "learning_rate": 0.00016313077277878052, "loss": 4.3121, "step": 8181 }, { "epoch": 0.8483756618214103, "grad_norm": 0.71875, "learning_rate": 0.00016312234834512355, "loss": 4.2779, "step": 8182 }, { "epoch": 0.8484793498758983, "grad_norm": 0.671875, "learning_rate": 0.0001631139231666896, "loss": 4.2677, "step": 8183 }, { "epoch": 0.8485830379303865, "grad_norm": 0.7109375, "learning_rate": 0.00016310549724357802, "loss": 4.2662, "step": 8184 }, { "epoch": 0.8486867259848745, "grad_norm": 0.65625, "learning_rate": 0.00016309707057588833, "loss": 4.305, "step": 8185 }, { "epoch": 0.8487904140393626, "grad_norm": 0.6875, "learning_rate": 0.00016308864316371988, "loss": 4.2841, "step": 8186 }, { "epoch": 0.8488941020938506, "grad_norm": 0.7421875, "learning_rate": 0.00016308021500717213, "loss": 4.2788, "step": 8187 }, { "epoch": 0.8489977901483388, "grad_norm": 0.71484375, "learning_rate": 0.00016307178610634453, "loss": 4.307, "step": 8188 }, { "epoch": 0.8491014782028268, "grad_norm": 0.796875, "learning_rate": 0.00016306335646133652, "loss": 4.2861, "step": 8189 }, { "epoch": 0.8492051662573149, "grad_norm": 0.7265625, "learning_rate": 0.00016305492607224755, "loss": 4.2639, "step": 8190 }, { "epoch": 0.8493088543118029, "grad_norm": 0.75, "learning_rate": 0.00016304649493917715, "loss": 4.324, "step": 8191 }, { "epoch": 0.849412542366291, "grad_norm": 0.66015625, "learning_rate": 0.00016303806306222472, "loss": 4.2783, "step": 8192 }, { "epoch": 0.8495162304207791, "grad_norm": 0.79296875, "learning_rate": 0.00016302963044148978, "loss": 4.2854, "step": 8193 }, { "epoch": 0.8496199184752672, "grad_norm": 0.69921875, "learning_rate": 0.00016302119707707182, "loss": 4.2542, "step": 8194 }, { "epoch": 0.8497236065297552, "grad_norm": 0.7421875, "learning_rate": 0.00016301276296907033, "loss": 4.2758, "step": 8195 }, { "epoch": 0.8498272945842433, "grad_norm": 0.6875, "learning_rate": 0.0001630043281175849, "loss": 4.248, "step": 8196 }, { "epoch": 0.8499309826387313, "grad_norm": 0.7890625, "learning_rate": 0.00016299589252271495, "loss": 4.2591, "step": 8197 }, { "epoch": 0.8500346706932195, "grad_norm": 0.78125, "learning_rate": 0.0001629874561845601, "loss": 4.2892, "step": 8198 }, { "epoch": 0.8501383587477075, "grad_norm": 0.8046875, "learning_rate": 0.0001629790191032198, "loss": 4.3016, "step": 8199 }, { "epoch": 0.8502420468021956, "grad_norm": 0.70703125, "learning_rate": 0.00016297058127879367, "loss": 4.3234, "step": 8200 }, { "epoch": 0.8503457348566836, "grad_norm": 0.71484375, "learning_rate": 0.00016296214271138126, "loss": 4.3115, "step": 8201 }, { "epoch": 0.8504494229111718, "grad_norm": 0.73046875, "learning_rate": 0.0001629537034010821, "loss": 4.2885, "step": 8202 }, { "epoch": 0.8505531109656598, "grad_norm": 0.65234375, "learning_rate": 0.00016294526334799576, "loss": 4.2767, "step": 8203 }, { "epoch": 0.8506567990201479, "grad_norm": 0.77734375, "learning_rate": 0.00016293682255222192, "loss": 4.2572, "step": 8204 }, { "epoch": 0.8507604870746359, "grad_norm": 0.640625, "learning_rate": 0.00016292838101386004, "loss": 4.2418, "step": 8205 }, { "epoch": 0.850864175129124, "grad_norm": 0.73828125, "learning_rate": 0.00016291993873300979, "loss": 4.2631, "step": 8206 }, { "epoch": 0.8509678631836121, "grad_norm": 0.80078125, "learning_rate": 0.00016291149570977077, "loss": 4.2422, "step": 8207 }, { "epoch": 0.8510715512381002, "grad_norm": 0.703125, "learning_rate": 0.00016290305194424263, "loss": 4.3361, "step": 8208 }, { "epoch": 0.8511752392925882, "grad_norm": 0.72265625, "learning_rate": 0.00016289460743652495, "loss": 4.3048, "step": 8209 }, { "epoch": 0.8512789273470763, "grad_norm": 0.671875, "learning_rate": 0.0001628861621867174, "loss": 4.2793, "step": 8210 }, { "epoch": 0.8513826154015643, "grad_norm": 0.81640625, "learning_rate": 0.00016287771619491958, "loss": 4.2686, "step": 8211 }, { "epoch": 0.8514863034560525, "grad_norm": 0.703125, "learning_rate": 0.0001628692694612312, "loss": 4.2783, "step": 8212 }, { "epoch": 0.8515899915105405, "grad_norm": 0.859375, "learning_rate": 0.00016286082198575187, "loss": 4.2605, "step": 8213 }, { "epoch": 0.8516936795650286, "grad_norm": 0.67578125, "learning_rate": 0.0001628523737685813, "loss": 4.3076, "step": 8214 }, { "epoch": 0.8517973676195166, "grad_norm": 0.82421875, "learning_rate": 0.00016284392480981916, "loss": 4.2711, "step": 8215 }, { "epoch": 0.8519010556740048, "grad_norm": 0.68359375, "learning_rate": 0.00016283547510956514, "loss": 4.2939, "step": 8216 }, { "epoch": 0.8520047437284928, "grad_norm": 0.8125, "learning_rate": 0.00016282702466791893, "loss": 4.2644, "step": 8217 }, { "epoch": 0.8521084317829809, "grad_norm": 0.75390625, "learning_rate": 0.00016281857348498027, "loss": 4.3043, "step": 8218 }, { "epoch": 0.852212119837469, "grad_norm": 0.83203125, "learning_rate": 0.0001628101215608488, "loss": 4.2732, "step": 8219 }, { "epoch": 0.852315807891957, "grad_norm": 0.7109375, "learning_rate": 0.0001628016688956243, "loss": 4.248, "step": 8220 }, { "epoch": 0.8524194959464452, "grad_norm": 0.79296875, "learning_rate": 0.0001627932154894065, "loss": 4.26, "step": 8221 }, { "epoch": 0.8525231840009332, "grad_norm": 0.7109375, "learning_rate": 0.00016278476134229514, "loss": 4.2633, "step": 8222 }, { "epoch": 0.8526268720554213, "grad_norm": 0.8515625, "learning_rate": 0.00016277630645438991, "loss": 4.2904, "step": 8223 }, { "epoch": 0.8527305601099093, "grad_norm": 0.7578125, "learning_rate": 0.0001627678508257907, "loss": 4.2674, "step": 8224 }, { "epoch": 0.8528342481643975, "grad_norm": 0.8046875, "learning_rate": 0.00016275939445659714, "loss": 4.2886, "step": 8225 }, { "epoch": 0.8529379362188855, "grad_norm": 0.83984375, "learning_rate": 0.0001627509373469091, "loss": 4.2872, "step": 8226 }, { "epoch": 0.8530416242733736, "grad_norm": 0.78515625, "learning_rate": 0.0001627424794968263, "loss": 4.26, "step": 8227 }, { "epoch": 0.8531453123278616, "grad_norm": 0.828125, "learning_rate": 0.00016273402090644855, "loss": 4.2756, "step": 8228 }, { "epoch": 0.8532490003823497, "grad_norm": 0.79296875, "learning_rate": 0.00016272556157587574, "loss": 4.264, "step": 8229 }, { "epoch": 0.8533526884368378, "grad_norm": 0.7890625, "learning_rate": 0.00016271710150520753, "loss": 4.2563, "step": 8230 }, { "epoch": 0.8534563764913259, "grad_norm": 0.7890625, "learning_rate": 0.00016270864069454385, "loss": 4.2632, "step": 8231 }, { "epoch": 0.8535600645458139, "grad_norm": 0.76953125, "learning_rate": 0.00016270017914398453, "loss": 4.2908, "step": 8232 }, { "epoch": 0.853663752600302, "grad_norm": 0.80078125, "learning_rate": 0.00016269171685362932, "loss": 4.3089, "step": 8233 }, { "epoch": 0.85376744065479, "grad_norm": 0.703125, "learning_rate": 0.00016268325382357815, "loss": 4.3015, "step": 8234 }, { "epoch": 0.8538711287092782, "grad_norm": 0.76953125, "learning_rate": 0.0001626747900539308, "loss": 4.271, "step": 8235 }, { "epoch": 0.8539748167637662, "grad_norm": 0.75390625, "learning_rate": 0.00016266632554478723, "loss": 4.2738, "step": 8236 }, { "epoch": 0.8540785048182543, "grad_norm": 0.875, "learning_rate": 0.00016265786029624724, "loss": 4.2862, "step": 8237 }, { "epoch": 0.8541821928727423, "grad_norm": 0.7890625, "learning_rate": 0.00016264939430841072, "loss": 4.2891, "step": 8238 }, { "epoch": 0.8542858809272305, "grad_norm": 0.875, "learning_rate": 0.00016264092758137757, "loss": 4.2754, "step": 8239 }, { "epoch": 0.8543895689817185, "grad_norm": 0.8125, "learning_rate": 0.0001626324601152477, "loss": 4.2848, "step": 8240 }, { "epoch": 0.8544932570362066, "grad_norm": 0.79296875, "learning_rate": 0.00016262399191012102, "loss": 4.2629, "step": 8241 }, { "epoch": 0.8545969450906946, "grad_norm": 0.796875, "learning_rate": 0.00016261552296609742, "loss": 4.2785, "step": 8242 }, { "epoch": 0.8547006331451827, "grad_norm": 0.7890625, "learning_rate": 0.00016260705328327682, "loss": 4.2961, "step": 8243 }, { "epoch": 0.8548043211996708, "grad_norm": 0.7578125, "learning_rate": 0.0001625985828617592, "loss": 4.2976, "step": 8244 }, { "epoch": 0.8549080092541589, "grad_norm": 0.68359375, "learning_rate": 0.00016259011170164443, "loss": 4.3077, "step": 8245 }, { "epoch": 0.8550116973086469, "grad_norm": 0.69921875, "learning_rate": 0.00016258163980303254, "loss": 4.264, "step": 8246 }, { "epoch": 0.855115385363135, "grad_norm": 0.72265625, "learning_rate": 0.00016257316716602343, "loss": 4.3042, "step": 8247 }, { "epoch": 0.855219073417623, "grad_norm": 0.73828125, "learning_rate": 0.0001625646937907171, "loss": 4.2716, "step": 8248 }, { "epoch": 0.8553227614721112, "grad_norm": 0.76171875, "learning_rate": 0.0001625562196772135, "loss": 4.199, "step": 8249 }, { "epoch": 0.8554264495265992, "grad_norm": 0.765625, "learning_rate": 0.00016254774482561267, "loss": 4.315, "step": 8250 }, { "epoch": 0.8555301375810873, "grad_norm": 0.70703125, "learning_rate": 0.00016253926923601454, "loss": 4.2734, "step": 8251 }, { "epoch": 0.8556338256355753, "grad_norm": 0.73828125, "learning_rate": 0.00016253079290851915, "loss": 4.2881, "step": 8252 }, { "epoch": 0.8557375136900635, "grad_norm": 0.71484375, "learning_rate": 0.00016252231584322648, "loss": 4.3141, "step": 8253 }, { "epoch": 0.8558412017445515, "grad_norm": 0.77734375, "learning_rate": 0.0001625138380402366, "loss": 4.3066, "step": 8254 }, { "epoch": 0.8559448897990396, "grad_norm": 0.796875, "learning_rate": 0.00016250535949964952, "loss": 4.3038, "step": 8255 }, { "epoch": 0.8560485778535276, "grad_norm": 0.6796875, "learning_rate": 0.00016249688022156524, "loss": 4.2631, "step": 8256 }, { "epoch": 0.8561522659080157, "grad_norm": 0.69921875, "learning_rate": 0.00016248840020608384, "loss": 4.2473, "step": 8257 }, { "epoch": 0.8562559539625038, "grad_norm": 0.69140625, "learning_rate": 0.0001624799194533054, "loss": 4.2545, "step": 8258 }, { "epoch": 0.8563596420169919, "grad_norm": 0.75, "learning_rate": 0.0001624714379633299, "loss": 4.2926, "step": 8259 }, { "epoch": 0.8564633300714799, "grad_norm": 0.6796875, "learning_rate": 0.0001624629557362575, "loss": 4.2821, "step": 8260 }, { "epoch": 0.856567018125968, "grad_norm": 0.76171875, "learning_rate": 0.00016245447277218823, "loss": 4.2727, "step": 8261 }, { "epoch": 0.856670706180456, "grad_norm": 0.6171875, "learning_rate": 0.00016244598907122222, "loss": 4.2537, "step": 8262 }, { "epoch": 0.8567743942349442, "grad_norm": 0.66015625, "learning_rate": 0.00016243750463345953, "loss": 4.274, "step": 8263 }, { "epoch": 0.8568780822894323, "grad_norm": 0.671875, "learning_rate": 0.00016242901945900031, "loss": 4.2731, "step": 8264 }, { "epoch": 0.8569817703439203, "grad_norm": 0.5625, "learning_rate": 0.00016242053354794463, "loss": 4.2518, "step": 8265 }, { "epoch": 0.8570854583984084, "grad_norm": 0.63671875, "learning_rate": 0.00016241204690039266, "loss": 4.2789, "step": 8266 }, { "epoch": 0.8571891464528965, "grad_norm": 0.6796875, "learning_rate": 0.00016240355951644447, "loss": 4.2943, "step": 8267 }, { "epoch": 0.8572928345073846, "grad_norm": 0.6640625, "learning_rate": 0.00016239507139620028, "loss": 4.2876, "step": 8268 }, { "epoch": 0.8573965225618726, "grad_norm": 0.68359375, "learning_rate": 0.00016238658253976018, "loss": 4.2763, "step": 8269 }, { "epoch": 0.8575002106163607, "grad_norm": 0.6953125, "learning_rate": 0.00016237809294722435, "loss": 4.2659, "step": 8270 }, { "epoch": 0.8576038986708487, "grad_norm": 0.6796875, "learning_rate": 0.00016236960261869297, "loss": 4.2733, "step": 8271 }, { "epoch": 0.8577075867253369, "grad_norm": 0.69921875, "learning_rate": 0.0001623611115542662, "loss": 4.2487, "step": 8272 }, { "epoch": 0.8578112747798249, "grad_norm": 0.7734375, "learning_rate": 0.00016235261975404422, "loss": 4.2883, "step": 8273 }, { "epoch": 0.857914962834313, "grad_norm": 0.71875, "learning_rate": 0.00016234412721812726, "loss": 4.3047, "step": 8274 }, { "epoch": 0.858018650888801, "grad_norm": 0.72265625, "learning_rate": 0.0001623356339466155, "loss": 4.28, "step": 8275 }, { "epoch": 0.8581223389432892, "grad_norm": 0.6796875, "learning_rate": 0.00016232713993960914, "loss": 4.255, "step": 8276 }, { "epoch": 0.8582260269977772, "grad_norm": 0.76953125, "learning_rate": 0.0001623186451972084, "loss": 4.2602, "step": 8277 }, { "epoch": 0.8583297150522653, "grad_norm": 0.68359375, "learning_rate": 0.00016231014971951352, "loss": 4.2697, "step": 8278 }, { "epoch": 0.8584334031067533, "grad_norm": 0.7578125, "learning_rate": 0.00016230165350662477, "loss": 4.2785, "step": 8279 }, { "epoch": 0.8585370911612414, "grad_norm": 0.6875, "learning_rate": 0.00016229315655864234, "loss": 4.3177, "step": 8280 }, { "epoch": 0.8586407792157295, "grad_norm": 0.71875, "learning_rate": 0.00016228465887566652, "loss": 4.2407, "step": 8281 }, { "epoch": 0.8587444672702176, "grad_norm": 0.6953125, "learning_rate": 0.00016227616045779754, "loss": 4.2322, "step": 8282 }, { "epoch": 0.8588481553247056, "grad_norm": 0.71484375, "learning_rate": 0.00016226766130513574, "loss": 4.2513, "step": 8283 }, { "epoch": 0.8589518433791937, "grad_norm": 0.6171875, "learning_rate": 0.00016225916141778132, "loss": 4.274, "step": 8284 }, { "epoch": 0.8590555314336817, "grad_norm": 0.67578125, "learning_rate": 0.0001622506607958346, "loss": 4.2683, "step": 8285 }, { "epoch": 0.8591592194881699, "grad_norm": 0.68359375, "learning_rate": 0.00016224215943939593, "loss": 4.2584, "step": 8286 }, { "epoch": 0.8592629075426579, "grad_norm": 0.73046875, "learning_rate": 0.00016223365734856552, "loss": 4.2968, "step": 8287 }, { "epoch": 0.859366595597146, "grad_norm": 0.73828125, "learning_rate": 0.00016222515452344376, "loss": 4.32, "step": 8288 }, { "epoch": 0.859470283651634, "grad_norm": 0.66015625, "learning_rate": 0.00016221665096413095, "loss": 4.2641, "step": 8289 }, { "epoch": 0.8595739717061222, "grad_norm": 0.65625, "learning_rate": 0.0001622081466707274, "loss": 4.2229, "step": 8290 }, { "epoch": 0.8596776597606102, "grad_norm": 0.72265625, "learning_rate": 0.00016219964164333351, "loss": 4.2653, "step": 8291 }, { "epoch": 0.8597813478150983, "grad_norm": 0.65625, "learning_rate": 0.0001621911358820496, "loss": 4.2972, "step": 8292 }, { "epoch": 0.8598850358695863, "grad_norm": 0.69921875, "learning_rate": 0.00016218262938697602, "loss": 4.3156, "step": 8293 }, { "epoch": 0.8599887239240744, "grad_norm": 0.67578125, "learning_rate": 0.00016217412215821313, "loss": 4.29, "step": 8294 }, { "epoch": 0.8600924119785625, "grad_norm": 0.69140625, "learning_rate": 0.0001621656141958613, "loss": 4.2752, "step": 8295 }, { "epoch": 0.8601961000330506, "grad_norm": 0.6484375, "learning_rate": 0.00016215710550002098, "loss": 4.2789, "step": 8296 }, { "epoch": 0.8602997880875386, "grad_norm": 0.67578125, "learning_rate": 0.0001621485960707925, "loss": 4.2865, "step": 8297 }, { "epoch": 0.8604034761420267, "grad_norm": 0.63671875, "learning_rate": 0.00016214008590827628, "loss": 4.2618, "step": 8298 }, { "epoch": 0.8605071641965147, "grad_norm": 0.671875, "learning_rate": 0.0001621315750125727, "loss": 4.2809, "step": 8299 }, { "epoch": 0.8606108522510029, "grad_norm": 0.63671875, "learning_rate": 0.00016212306338378226, "loss": 4.286, "step": 8300 }, { "epoch": 0.8607145403054909, "grad_norm": 0.74609375, "learning_rate": 0.00016211455102200533, "loss": 4.2798, "step": 8301 }, { "epoch": 0.860818228359979, "grad_norm": 0.72265625, "learning_rate": 0.00016210603792734233, "loss": 4.2832, "step": 8302 }, { "epoch": 0.860921916414467, "grad_norm": 0.7109375, "learning_rate": 0.00016209752409989374, "loss": 4.271, "step": 8303 }, { "epoch": 0.8610256044689552, "grad_norm": 0.6875, "learning_rate": 0.00016208900953976004, "loss": 4.2956, "step": 8304 }, { "epoch": 0.8611292925234432, "grad_norm": 0.70703125, "learning_rate": 0.00016208049424704162, "loss": 4.2594, "step": 8305 }, { "epoch": 0.8612329805779313, "grad_norm": 0.75390625, "learning_rate": 0.000162071978221839, "loss": 4.2507, "step": 8306 }, { "epoch": 0.8613366686324193, "grad_norm": 0.69140625, "learning_rate": 0.00016206346146425263, "loss": 4.2584, "step": 8307 }, { "epoch": 0.8614403566869074, "grad_norm": 0.703125, "learning_rate": 0.00016205494397438303, "loss": 4.2283, "step": 8308 }, { "epoch": 0.8615440447413956, "grad_norm": 0.75, "learning_rate": 0.00016204642575233072, "loss": 4.3046, "step": 8309 }, { "epoch": 0.8616477327958836, "grad_norm": 0.75390625, "learning_rate": 0.00016203790679819612, "loss": 4.2764, "step": 8310 }, { "epoch": 0.8617514208503717, "grad_norm": 0.75390625, "learning_rate": 0.00016202938711207984, "loss": 4.2966, "step": 8311 }, { "epoch": 0.8618551089048597, "grad_norm": 0.6953125, "learning_rate": 0.00016202086669408233, "loss": 4.3013, "step": 8312 }, { "epoch": 0.8619587969593479, "grad_norm": 0.76171875, "learning_rate": 0.00016201234554430417, "loss": 4.2819, "step": 8313 }, { "epoch": 0.8620624850138359, "grad_norm": 0.73046875, "learning_rate": 0.00016200382366284588, "loss": 4.2945, "step": 8314 }, { "epoch": 0.862166173068324, "grad_norm": 0.77734375, "learning_rate": 0.000161995301049808, "loss": 4.2843, "step": 8315 }, { "epoch": 0.862269861122812, "grad_norm": 0.671875, "learning_rate": 0.00016198677770529112, "loss": 4.2686, "step": 8316 }, { "epoch": 0.8623735491773001, "grad_norm": 0.74609375, "learning_rate": 0.00016197825362939577, "loss": 4.3057, "step": 8317 }, { "epoch": 0.8624772372317882, "grad_norm": 0.68359375, "learning_rate": 0.00016196972882222255, "loss": 4.3086, "step": 8318 }, { "epoch": 0.8625809252862763, "grad_norm": 0.703125, "learning_rate": 0.00016196120328387204, "loss": 4.262, "step": 8319 }, { "epoch": 0.8626846133407643, "grad_norm": 0.7109375, "learning_rate": 0.0001619526770144448, "loss": 4.3007, "step": 8320 }, { "epoch": 0.8627883013952524, "grad_norm": 0.6484375, "learning_rate": 0.00016194415001404147, "loss": 4.2464, "step": 8321 }, { "epoch": 0.8628919894497404, "grad_norm": 0.671875, "learning_rate": 0.00016193562228276263, "loss": 4.273, "step": 8322 }, { "epoch": 0.8629956775042286, "grad_norm": 0.734375, "learning_rate": 0.00016192709382070896, "loss": 4.2781, "step": 8323 }, { "epoch": 0.8630993655587166, "grad_norm": 0.6484375, "learning_rate": 0.000161918564627981, "loss": 4.2916, "step": 8324 }, { "epoch": 0.8632030536132047, "grad_norm": 0.671875, "learning_rate": 0.00016191003470467948, "loss": 4.3182, "step": 8325 }, { "epoch": 0.8633067416676927, "grad_norm": 0.66015625, "learning_rate": 0.00016190150405090496, "loss": 4.2624, "step": 8326 }, { "epoch": 0.8634104297221808, "grad_norm": 0.74609375, "learning_rate": 0.0001618929726667581, "loss": 4.2549, "step": 8327 }, { "epoch": 0.8635141177766689, "grad_norm": 0.7265625, "learning_rate": 0.00016188444055233961, "loss": 4.253, "step": 8328 }, { "epoch": 0.863617805831157, "grad_norm": 0.71875, "learning_rate": 0.00016187590770775016, "loss": 4.2926, "step": 8329 }, { "epoch": 0.863721493885645, "grad_norm": 0.6953125, "learning_rate": 0.00016186737413309037, "loss": 4.2453, "step": 8330 }, { "epoch": 0.8638251819401331, "grad_norm": 0.71484375, "learning_rate": 0.00016185883982846097, "loss": 4.2331, "step": 8331 }, { "epoch": 0.8639288699946212, "grad_norm": 0.66796875, "learning_rate": 0.00016185030479396267, "loss": 4.2789, "step": 8332 }, { "epoch": 0.8640325580491093, "grad_norm": 0.7109375, "learning_rate": 0.00016184176902969615, "loss": 4.2669, "step": 8333 }, { "epoch": 0.8641362461035973, "grad_norm": 0.703125, "learning_rate": 0.0001618332325357621, "loss": 4.2263, "step": 8334 }, { "epoch": 0.8642399341580854, "grad_norm": 0.76171875, "learning_rate": 0.00016182469531226125, "loss": 4.2698, "step": 8335 }, { "epoch": 0.8643436222125734, "grad_norm": 0.703125, "learning_rate": 0.0001618161573592944, "loss": 4.314, "step": 8336 }, { "epoch": 0.8644473102670616, "grad_norm": 0.75, "learning_rate": 0.0001618076186769622, "loss": 4.282, "step": 8337 }, { "epoch": 0.8645509983215496, "grad_norm": 0.74609375, "learning_rate": 0.00016179907926536547, "loss": 4.2735, "step": 8338 }, { "epoch": 0.8646546863760377, "grad_norm": 0.796875, "learning_rate": 0.00016179053912460486, "loss": 4.3129, "step": 8339 }, { "epoch": 0.8647583744305257, "grad_norm": 0.80078125, "learning_rate": 0.00016178199825478125, "loss": 4.3089, "step": 8340 }, { "epoch": 0.8648620624850138, "grad_norm": 0.7734375, "learning_rate": 0.00016177345665599536, "loss": 4.2849, "step": 8341 }, { "epoch": 0.8649657505395019, "grad_norm": 0.80859375, "learning_rate": 0.00016176491432834798, "loss": 4.2581, "step": 8342 }, { "epoch": 0.86506943859399, "grad_norm": 0.87890625, "learning_rate": 0.0001617563712719399, "loss": 4.2763, "step": 8343 }, { "epoch": 0.865173126648478, "grad_norm": 0.8203125, "learning_rate": 0.00016174782748687192, "loss": 4.2793, "step": 8344 }, { "epoch": 0.8652768147029661, "grad_norm": 0.75390625, "learning_rate": 0.00016173928297324484, "loss": 4.2949, "step": 8345 }, { "epoch": 0.8653805027574542, "grad_norm": 0.7890625, "learning_rate": 0.0001617307377311595, "loss": 4.2183, "step": 8346 }, { "epoch": 0.8654841908119423, "grad_norm": 0.87109375, "learning_rate": 0.0001617221917607167, "loss": 4.2548, "step": 8347 }, { "epoch": 0.8655878788664303, "grad_norm": 0.76171875, "learning_rate": 0.00016171364506201727, "loss": 4.2516, "step": 8348 }, { "epoch": 0.8656915669209184, "grad_norm": 0.80859375, "learning_rate": 0.00016170509763516205, "loss": 4.2631, "step": 8349 }, { "epoch": 0.8657952549754064, "grad_norm": 0.8125, "learning_rate": 0.00016169654948025193, "loss": 4.2793, "step": 8350 }, { "epoch": 0.8658989430298946, "grad_norm": 0.79296875, "learning_rate": 0.00016168800059738773, "loss": 4.2804, "step": 8351 }, { "epoch": 0.8660026310843827, "grad_norm": 0.86328125, "learning_rate": 0.0001616794509866703, "loss": 4.2818, "step": 8352 }, { "epoch": 0.8661063191388707, "grad_norm": 0.7421875, "learning_rate": 0.0001616709006482006, "loss": 4.232, "step": 8353 }, { "epoch": 0.8662100071933588, "grad_norm": 0.83203125, "learning_rate": 0.00016166234958207946, "loss": 4.2797, "step": 8354 }, { "epoch": 0.8663136952478468, "grad_norm": 0.82421875, "learning_rate": 0.00016165379778840776, "loss": 4.2438, "step": 8355 }, { "epoch": 0.866417383302335, "grad_norm": 0.78125, "learning_rate": 0.00016164524526728638, "loss": 4.2721, "step": 8356 }, { "epoch": 0.866521071356823, "grad_norm": 0.8125, "learning_rate": 0.0001616366920188163, "loss": 4.3177, "step": 8357 }, { "epoch": 0.8666247594113111, "grad_norm": 0.66796875, "learning_rate": 0.00016162813804309841, "loss": 4.2867, "step": 8358 }, { "epoch": 0.8667284474657991, "grad_norm": 0.8203125, "learning_rate": 0.00016161958334023365, "loss": 4.2791, "step": 8359 }, { "epoch": 0.8668321355202873, "grad_norm": 0.703125, "learning_rate": 0.0001616110279103229, "loss": 4.2265, "step": 8360 }, { "epoch": 0.8669358235747753, "grad_norm": 0.71484375, "learning_rate": 0.00016160247175346716, "loss": 4.282, "step": 8361 }, { "epoch": 0.8670395116292634, "grad_norm": 0.7265625, "learning_rate": 0.00016159391486976737, "loss": 4.2605, "step": 8362 }, { "epoch": 0.8671431996837514, "grad_norm": 0.73046875, "learning_rate": 0.00016158535725932453, "loss": 4.2556, "step": 8363 }, { "epoch": 0.8672468877382395, "grad_norm": 0.71875, "learning_rate": 0.00016157679892223953, "loss": 4.2746, "step": 8364 }, { "epoch": 0.8673505757927276, "grad_norm": 0.69921875, "learning_rate": 0.0001615682398586134, "loss": 4.2715, "step": 8365 }, { "epoch": 0.8674542638472157, "grad_norm": 0.69921875, "learning_rate": 0.00016155968006854713, "loss": 4.3402, "step": 8366 }, { "epoch": 0.8675579519017037, "grad_norm": 0.68359375, "learning_rate": 0.00016155111955214172, "loss": 4.2307, "step": 8367 }, { "epoch": 0.8676616399561918, "grad_norm": 0.7578125, "learning_rate": 0.00016154255830949814, "loss": 4.3315, "step": 8368 }, { "epoch": 0.8677653280106798, "grad_norm": 0.6953125, "learning_rate": 0.00016153399634071742, "loss": 4.3183, "step": 8369 }, { "epoch": 0.867869016065168, "grad_norm": 0.734375, "learning_rate": 0.00016152543364590058, "loss": 4.2694, "step": 8370 }, { "epoch": 0.867972704119656, "grad_norm": 0.65234375, "learning_rate": 0.0001615168702251487, "loss": 4.2861, "step": 8371 }, { "epoch": 0.8680763921741441, "grad_norm": 0.75, "learning_rate": 0.00016150830607856276, "loss": 4.2575, "step": 8372 }, { "epoch": 0.8681800802286321, "grad_norm": 0.69140625, "learning_rate": 0.0001614997412062438, "loss": 4.2933, "step": 8373 }, { "epoch": 0.8682837682831203, "grad_norm": 0.703125, "learning_rate": 0.0001614911756082929, "loss": 4.3062, "step": 8374 }, { "epoch": 0.8683874563376083, "grad_norm": 0.71875, "learning_rate": 0.00016148260928481117, "loss": 4.3121, "step": 8375 }, { "epoch": 0.8684911443920964, "grad_norm": 0.72265625, "learning_rate": 0.0001614740422358996, "loss": 4.3087, "step": 8376 }, { "epoch": 0.8685948324465844, "grad_norm": 0.66796875, "learning_rate": 0.00016146547446165935, "loss": 4.2649, "step": 8377 }, { "epoch": 0.8686985205010725, "grad_norm": 0.68359375, "learning_rate": 0.00016145690596219142, "loss": 4.2673, "step": 8378 }, { "epoch": 0.8688022085555606, "grad_norm": 0.69921875, "learning_rate": 0.000161448336737597, "loss": 4.2901, "step": 8379 }, { "epoch": 0.8689058966100487, "grad_norm": 0.74609375, "learning_rate": 0.00016143976678797717, "loss": 4.2934, "step": 8380 }, { "epoch": 0.8690095846645367, "grad_norm": 0.69140625, "learning_rate": 0.00016143119611343302, "loss": 4.2689, "step": 8381 }, { "epoch": 0.8691132727190248, "grad_norm": 0.78515625, "learning_rate": 0.00016142262471406568, "loss": 4.2696, "step": 8382 }, { "epoch": 0.8692169607735128, "grad_norm": 0.6953125, "learning_rate": 0.00016141405258997632, "loss": 4.2788, "step": 8383 }, { "epoch": 0.869320648828001, "grad_norm": 0.75, "learning_rate": 0.00016140547974126602, "loss": 4.2743, "step": 8384 }, { "epoch": 0.869424336882489, "grad_norm": 0.66015625, "learning_rate": 0.000161396906168036, "loss": 4.2375, "step": 8385 }, { "epoch": 0.8695280249369771, "grad_norm": 0.76953125, "learning_rate": 0.00016138833187038737, "loss": 4.2954, "step": 8386 }, { "epoch": 0.8696317129914651, "grad_norm": 0.66796875, "learning_rate": 0.00016137975684842135, "loss": 4.2602, "step": 8387 }, { "epoch": 0.8697354010459533, "grad_norm": 0.77734375, "learning_rate": 0.00016137118110223905, "loss": 4.2751, "step": 8388 }, { "epoch": 0.8698390891004413, "grad_norm": 0.7421875, "learning_rate": 0.00016136260463194167, "loss": 4.2718, "step": 8389 }, { "epoch": 0.8699427771549294, "grad_norm": 0.77734375, "learning_rate": 0.00016135402743763043, "loss": 4.2387, "step": 8390 }, { "epoch": 0.8700464652094174, "grad_norm": 0.734375, "learning_rate": 0.00016134544951940656, "loss": 4.2696, "step": 8391 }, { "epoch": 0.8701501532639055, "grad_norm": 0.703125, "learning_rate": 0.00016133687087737118, "loss": 4.2762, "step": 8392 }, { "epoch": 0.8702538413183936, "grad_norm": 0.734375, "learning_rate": 0.00016132829151162557, "loss": 4.299, "step": 8393 }, { "epoch": 0.8703575293728817, "grad_norm": 0.63671875, "learning_rate": 0.00016131971142227097, "loss": 4.2447, "step": 8394 }, { "epoch": 0.8704612174273697, "grad_norm": 0.7890625, "learning_rate": 0.00016131113060940857, "loss": 4.2158, "step": 8395 }, { "epoch": 0.8705649054818578, "grad_norm": 0.7265625, "learning_rate": 0.00016130254907313965, "loss": 4.2522, "step": 8396 }, { "epoch": 0.870668593536346, "grad_norm": 0.70703125, "learning_rate": 0.00016129396681356545, "loss": 4.2729, "step": 8397 }, { "epoch": 0.870772281590834, "grad_norm": 0.71484375, "learning_rate": 0.0001612853838307872, "loss": 4.303, "step": 8398 }, { "epoch": 0.8708759696453221, "grad_norm": 0.75, "learning_rate": 0.00016127680012490624, "loss": 4.2124, "step": 8399 }, { "epoch": 0.8709796576998101, "grad_norm": 0.74609375, "learning_rate": 0.00016126821569602381, "loss": 4.2845, "step": 8400 }, { "epoch": 0.8710833457542982, "grad_norm": 0.68359375, "learning_rate": 0.0001612596305442412, "loss": 4.3315, "step": 8401 }, { "epoch": 0.8711870338087863, "grad_norm": 0.87109375, "learning_rate": 0.00016125104466965966, "loss": 4.3241, "step": 8402 }, { "epoch": 0.8712907218632744, "grad_norm": 0.6796875, "learning_rate": 0.0001612424580723806, "loss": 4.2597, "step": 8403 }, { "epoch": 0.8713944099177624, "grad_norm": 0.91796875, "learning_rate": 0.00016123387075250522, "loss": 4.2711, "step": 8404 }, { "epoch": 0.8714980979722505, "grad_norm": 0.7265625, "learning_rate": 0.0001612252827101349, "loss": 4.3072, "step": 8405 }, { "epoch": 0.8716017860267385, "grad_norm": 0.96875, "learning_rate": 0.00016121669394537097, "loss": 4.2542, "step": 8406 }, { "epoch": 0.8717054740812267, "grad_norm": 0.7421875, "learning_rate": 0.00016120810445831478, "loss": 4.231, "step": 8407 }, { "epoch": 0.8718091621357147, "grad_norm": 1.0234375, "learning_rate": 0.00016119951424906764, "loss": 4.3089, "step": 8408 }, { "epoch": 0.8719128501902028, "grad_norm": 0.71875, "learning_rate": 0.00016119092331773094, "loss": 4.2775, "step": 8409 }, { "epoch": 0.8720165382446908, "grad_norm": 1.015625, "learning_rate": 0.000161182331664406, "loss": 4.2568, "step": 8410 }, { "epoch": 0.872120226299179, "grad_norm": 0.7109375, "learning_rate": 0.00016117373928919423, "loss": 4.248, "step": 8411 }, { "epoch": 0.872223914353667, "grad_norm": 1.0234375, "learning_rate": 0.000161165146192197, "loss": 4.237, "step": 8412 }, { "epoch": 0.8723276024081551, "grad_norm": 0.6640625, "learning_rate": 0.00016115655237351572, "loss": 4.2702, "step": 8413 }, { "epoch": 0.8724312904626431, "grad_norm": 0.890625, "learning_rate": 0.00016114795783325173, "loss": 4.2676, "step": 8414 }, { "epoch": 0.8725349785171312, "grad_norm": 0.69921875, "learning_rate": 0.00016113936257150649, "loss": 4.2378, "step": 8415 }, { "epoch": 0.8726386665716193, "grad_norm": 0.953125, "learning_rate": 0.0001611307665883814, "loss": 4.3031, "step": 8416 }, { "epoch": 0.8727423546261074, "grad_norm": 0.83984375, "learning_rate": 0.00016112216988397788, "loss": 4.276, "step": 8417 }, { "epoch": 0.8728460426805954, "grad_norm": 0.9375, "learning_rate": 0.0001611135724583974, "loss": 4.2516, "step": 8418 }, { "epoch": 0.8729497307350835, "grad_norm": 0.68359375, "learning_rate": 0.0001611049743117413, "loss": 4.3003, "step": 8419 }, { "epoch": 0.8730534187895715, "grad_norm": 0.9921875, "learning_rate": 0.00016109637544411113, "loss": 4.2494, "step": 8420 }, { "epoch": 0.8731571068440597, "grad_norm": 0.75, "learning_rate": 0.0001610877758556083, "loss": 4.3208, "step": 8421 }, { "epoch": 0.8732607948985477, "grad_norm": 0.98046875, "learning_rate": 0.00016107917554633429, "loss": 4.2399, "step": 8422 }, { "epoch": 0.8733644829530358, "grad_norm": 0.68359375, "learning_rate": 0.00016107057451639057, "loss": 4.2707, "step": 8423 }, { "epoch": 0.8734681710075238, "grad_norm": 0.9296875, "learning_rate": 0.00016106197276587864, "loss": 4.274, "step": 8424 }, { "epoch": 0.873571859062012, "grad_norm": 0.69140625, "learning_rate": 0.00016105337029489997, "loss": 4.256, "step": 8425 }, { "epoch": 0.8736755471165, "grad_norm": 0.94140625, "learning_rate": 0.00016104476710355608, "loss": 4.2639, "step": 8426 }, { "epoch": 0.8737792351709881, "grad_norm": 0.73046875, "learning_rate": 0.0001610361631919484, "loss": 4.2571, "step": 8427 }, { "epoch": 0.8738829232254761, "grad_norm": 0.91015625, "learning_rate": 0.00016102755856017858, "loss": 4.3151, "step": 8428 }, { "epoch": 0.8739866112799642, "grad_norm": 0.7421875, "learning_rate": 0.00016101895320834805, "loss": 4.3228, "step": 8429 }, { "epoch": 0.8740902993344523, "grad_norm": 0.76953125, "learning_rate": 0.0001610103471365584, "loss": 4.2533, "step": 8430 }, { "epoch": 0.8741939873889404, "grad_norm": 0.74609375, "learning_rate": 0.0001610017403449111, "loss": 4.2683, "step": 8431 }, { "epoch": 0.8742976754434284, "grad_norm": 0.76953125, "learning_rate": 0.0001609931328335078, "loss": 4.2586, "step": 8432 }, { "epoch": 0.8744013634979165, "grad_norm": 0.7265625, "learning_rate": 0.00016098452460245, "loss": 4.3005, "step": 8433 }, { "epoch": 0.8745050515524045, "grad_norm": 0.73828125, "learning_rate": 0.0001609759156518392, "loss": 4.3092, "step": 8434 }, { "epoch": 0.8746087396068927, "grad_norm": 0.65625, "learning_rate": 0.0001609673059817771, "loss": 4.2785, "step": 8435 }, { "epoch": 0.8747124276613807, "grad_norm": 0.8203125, "learning_rate": 0.00016095869559236526, "loss": 4.2505, "step": 8436 }, { "epoch": 0.8748161157158688, "grad_norm": 0.77734375, "learning_rate": 0.0001609500844837052, "loss": 4.2825, "step": 8437 }, { "epoch": 0.8749198037703568, "grad_norm": 0.85546875, "learning_rate": 0.0001609414726558986, "loss": 4.2787, "step": 8438 }, { "epoch": 0.875023491824845, "grad_norm": 0.73046875, "learning_rate": 0.00016093286010904705, "loss": 4.2771, "step": 8439 }, { "epoch": 0.875127179879333, "grad_norm": 0.84765625, "learning_rate": 0.00016092424684325214, "loss": 4.2597, "step": 8440 }, { "epoch": 0.8752308679338211, "grad_norm": 0.734375, "learning_rate": 0.00016091563285861553, "loss": 4.2585, "step": 8441 }, { "epoch": 0.8753345559883092, "grad_norm": 0.796875, "learning_rate": 0.0001609070181552388, "loss": 4.2992, "step": 8442 }, { "epoch": 0.8754382440427972, "grad_norm": 0.66796875, "learning_rate": 0.00016089840273322367, "loss": 4.262, "step": 8443 }, { "epoch": 0.8755419320972854, "grad_norm": 0.76171875, "learning_rate": 0.00016088978659267177, "loss": 4.2614, "step": 8444 }, { "epoch": 0.8756456201517734, "grad_norm": 0.640625, "learning_rate": 0.00016088116973368477, "loss": 4.2499, "step": 8445 }, { "epoch": 0.8757493082062615, "grad_norm": 0.828125, "learning_rate": 0.00016087255215636428, "loss": 4.2869, "step": 8446 }, { "epoch": 0.8758529962607495, "grad_norm": 0.609375, "learning_rate": 0.000160863933860812, "loss": 4.2898, "step": 8447 }, { "epoch": 0.8759566843152377, "grad_norm": 0.76171875, "learning_rate": 0.00016085531484712968, "loss": 4.2726, "step": 8448 }, { "epoch": 0.8760603723697257, "grad_norm": 0.66796875, "learning_rate": 0.00016084669511541897, "loss": 4.2572, "step": 8449 }, { "epoch": 0.8761640604242138, "grad_norm": 0.7265625, "learning_rate": 0.00016083807466578157, "loss": 4.2457, "step": 8450 }, { "epoch": 0.8762677484787018, "grad_norm": 0.6484375, "learning_rate": 0.0001608294534983192, "loss": 4.2877, "step": 8451 }, { "epoch": 0.8763714365331899, "grad_norm": 0.73046875, "learning_rate": 0.00016082083161313355, "loss": 4.3083, "step": 8452 }, { "epoch": 0.876475124587678, "grad_norm": 0.66796875, "learning_rate": 0.00016081220901032638, "loss": 4.3249, "step": 8453 }, { "epoch": 0.8765788126421661, "grad_norm": 0.71875, "learning_rate": 0.00016080358568999948, "loss": 4.2733, "step": 8454 }, { "epoch": 0.8766825006966541, "grad_norm": 0.6953125, "learning_rate": 0.00016079496165225448, "loss": 4.2688, "step": 8455 }, { "epoch": 0.8767861887511422, "grad_norm": 0.73828125, "learning_rate": 0.00016078633689719322, "loss": 4.3026, "step": 8456 }, { "epoch": 0.8768898768056302, "grad_norm": 0.68359375, "learning_rate": 0.00016077771142491745, "loss": 4.2397, "step": 8457 }, { "epoch": 0.8769935648601184, "grad_norm": 0.734375, "learning_rate": 0.00016076908523552893, "loss": 4.2921, "step": 8458 }, { "epoch": 0.8770972529146064, "grad_norm": 0.7578125, "learning_rate": 0.00016076045832912942, "loss": 4.2783, "step": 8459 }, { "epoch": 0.8772009409690945, "grad_norm": 0.703125, "learning_rate": 0.00016075183070582072, "loss": 4.2605, "step": 8460 }, { "epoch": 0.8773046290235825, "grad_norm": 0.7265625, "learning_rate": 0.00016074320236570463, "loss": 4.2831, "step": 8461 }, { "epoch": 0.8774083170780707, "grad_norm": 0.69140625, "learning_rate": 0.00016073457330888303, "loss": 4.2776, "step": 8462 }, { "epoch": 0.8775120051325587, "grad_norm": 0.703125, "learning_rate": 0.0001607259435354576, "loss": 4.2924, "step": 8463 }, { "epoch": 0.8776156931870468, "grad_norm": 0.6875, "learning_rate": 0.00016071731304553025, "loss": 4.2821, "step": 8464 }, { "epoch": 0.8777193812415348, "grad_norm": 0.68359375, "learning_rate": 0.00016070868183920277, "loss": 4.3323, "step": 8465 }, { "epoch": 0.8778230692960229, "grad_norm": 0.671875, "learning_rate": 0.00016070004991657703, "loss": 4.2644, "step": 8466 }, { "epoch": 0.877926757350511, "grad_norm": 0.64453125, "learning_rate": 0.00016069141727775485, "loss": 4.2329, "step": 8467 }, { "epoch": 0.8780304454049991, "grad_norm": 0.7421875, "learning_rate": 0.00016068278392283812, "loss": 4.2735, "step": 8468 }, { "epoch": 0.8781341334594871, "grad_norm": 0.64453125, "learning_rate": 0.0001606741498519287, "loss": 4.2749, "step": 8469 }, { "epoch": 0.8782378215139752, "grad_norm": 0.69140625, "learning_rate": 0.0001606655150651284, "loss": 4.2961, "step": 8470 }, { "epoch": 0.8783415095684632, "grad_norm": 0.6875, "learning_rate": 0.00016065687956253916, "loss": 4.2416, "step": 8471 }, { "epoch": 0.8784451976229514, "grad_norm": 0.69921875, "learning_rate": 0.00016064824334426287, "loss": 4.3016, "step": 8472 }, { "epoch": 0.8785488856774394, "grad_norm": 0.73828125, "learning_rate": 0.0001606396064104014, "loss": 4.2524, "step": 8473 }, { "epoch": 0.8786525737319275, "grad_norm": 0.734375, "learning_rate": 0.0001606309687610567, "loss": 4.2683, "step": 8474 }, { "epoch": 0.8787562617864155, "grad_norm": 0.7421875, "learning_rate": 0.00016062233039633065, "loss": 4.2676, "step": 8475 }, { "epoch": 0.8788599498409037, "grad_norm": 0.734375, "learning_rate": 0.00016061369131632516, "loss": 4.2734, "step": 8476 }, { "epoch": 0.8789636378953917, "grad_norm": 0.671875, "learning_rate": 0.00016060505152114222, "loss": 4.277, "step": 8477 }, { "epoch": 0.8790673259498798, "grad_norm": 0.69921875, "learning_rate": 0.00016059641101088369, "loss": 4.2818, "step": 8478 }, { "epoch": 0.8791710140043678, "grad_norm": 0.58203125, "learning_rate": 0.00016058776978565163, "loss": 4.3112, "step": 8479 }, { "epoch": 0.8792747020588559, "grad_norm": 0.7109375, "learning_rate": 0.00016057912784554786, "loss": 4.2279, "step": 8480 }, { "epoch": 0.879378390113344, "grad_norm": 0.68359375, "learning_rate": 0.00016057048519067446, "loss": 4.2666, "step": 8481 }, { "epoch": 0.8794820781678321, "grad_norm": 0.765625, "learning_rate": 0.00016056184182113335, "loss": 4.2834, "step": 8482 }, { "epoch": 0.8795857662223201, "grad_norm": 0.60546875, "learning_rate": 0.00016055319773702653, "loss": 4.2535, "step": 8483 }, { "epoch": 0.8796894542768082, "grad_norm": 0.7734375, "learning_rate": 0.00016054455293845597, "loss": 4.2957, "step": 8484 }, { "epoch": 0.8797931423312962, "grad_norm": 0.59375, "learning_rate": 0.00016053590742552367, "loss": 4.2143, "step": 8485 }, { "epoch": 0.8798968303857844, "grad_norm": 0.73046875, "learning_rate": 0.00016052726119833167, "loss": 4.3376, "step": 8486 }, { "epoch": 0.8800005184402725, "grad_norm": 0.703125, "learning_rate": 0.000160518614256982, "loss": 4.227, "step": 8487 }, { "epoch": 0.8801042064947605, "grad_norm": 0.65625, "learning_rate": 0.0001605099666015766, "loss": 4.2465, "step": 8488 }, { "epoch": 0.8802078945492486, "grad_norm": 0.67578125, "learning_rate": 0.00016050131823221756, "loss": 4.2975, "step": 8489 }, { "epoch": 0.8803115826037367, "grad_norm": 0.671875, "learning_rate": 0.00016049266914900694, "loss": 4.2436, "step": 8490 }, { "epoch": 0.8804152706582248, "grad_norm": 0.66015625, "learning_rate": 0.00016048401935204676, "loss": 4.3068, "step": 8491 }, { "epoch": 0.8805189587127128, "grad_norm": 0.66796875, "learning_rate": 0.0001604753688414391, "loss": 4.2671, "step": 8492 }, { "epoch": 0.8806226467672009, "grad_norm": 0.703125, "learning_rate": 0.00016046671761728597, "loss": 4.2261, "step": 8493 }, { "epoch": 0.8807263348216889, "grad_norm": 0.76953125, "learning_rate": 0.00016045806567968954, "loss": 4.3031, "step": 8494 }, { "epoch": 0.8808300228761771, "grad_norm": 0.78125, "learning_rate": 0.0001604494130287518, "loss": 4.2663, "step": 8495 }, { "epoch": 0.8809337109306651, "grad_norm": 0.6875, "learning_rate": 0.0001604407596645749, "loss": 4.2643, "step": 8496 }, { "epoch": 0.8810373989851532, "grad_norm": 0.78125, "learning_rate": 0.00016043210558726095, "loss": 4.2479, "step": 8497 }, { "epoch": 0.8811410870396412, "grad_norm": 0.671875, "learning_rate": 0.000160423450796912, "loss": 4.2624, "step": 8498 }, { "epoch": 0.8812447750941294, "grad_norm": 0.73046875, "learning_rate": 0.0001604147952936302, "loss": 4.2587, "step": 8499 }, { "epoch": 0.8813484631486174, "grad_norm": 0.66015625, "learning_rate": 0.00016040613907751769, "loss": 4.2823, "step": 8500 }, { "epoch": 0.8814521512031055, "grad_norm": 0.68359375, "learning_rate": 0.00016039748214867662, "loss": 4.2809, "step": 8501 }, { "epoch": 0.8815558392575935, "grad_norm": 0.68359375, "learning_rate": 0.00016038882450720906, "loss": 4.2661, "step": 8502 }, { "epoch": 0.8816595273120816, "grad_norm": 0.6484375, "learning_rate": 0.0001603801661532172, "loss": 4.2395, "step": 8503 }, { "epoch": 0.8817632153665697, "grad_norm": 0.74609375, "learning_rate": 0.00016037150708680324, "loss": 4.2503, "step": 8504 }, { "epoch": 0.8818669034210578, "grad_norm": 0.67578125, "learning_rate": 0.0001603628473080693, "loss": 4.3186, "step": 8505 }, { "epoch": 0.8819705914755458, "grad_norm": 0.6875, "learning_rate": 0.00016035418681711755, "loss": 4.2744, "step": 8506 }, { "epoch": 0.8820742795300339, "grad_norm": 0.66015625, "learning_rate": 0.00016034552561405022, "loss": 4.2869, "step": 8507 }, { "epoch": 0.8821779675845219, "grad_norm": 0.77734375, "learning_rate": 0.00016033686369896946, "loss": 4.2335, "step": 8508 }, { "epoch": 0.8822816556390101, "grad_norm": 0.69140625, "learning_rate": 0.0001603282010719775, "loss": 4.3039, "step": 8509 }, { "epoch": 0.8823853436934981, "grad_norm": 0.76171875, "learning_rate": 0.0001603195377331765, "loss": 4.3127, "step": 8510 }, { "epoch": 0.8824890317479862, "grad_norm": 0.6484375, "learning_rate": 0.00016031087368266876, "loss": 4.2854, "step": 8511 }, { "epoch": 0.8825927198024742, "grad_norm": 0.77734375, "learning_rate": 0.00016030220892055642, "loss": 4.2955, "step": 8512 }, { "epoch": 0.8826964078569624, "grad_norm": 0.6953125, "learning_rate": 0.0001602935434469418, "loss": 4.2926, "step": 8513 }, { "epoch": 0.8828000959114504, "grad_norm": 0.7578125, "learning_rate": 0.00016028487726192707, "loss": 4.3063, "step": 8514 }, { "epoch": 0.8829037839659385, "grad_norm": 0.76953125, "learning_rate": 0.00016027621036561454, "loss": 4.3108, "step": 8515 }, { "epoch": 0.8830074720204265, "grad_norm": 0.734375, "learning_rate": 0.0001602675427581064, "loss": 4.2623, "step": 8516 }, { "epoch": 0.8831111600749146, "grad_norm": 0.71875, "learning_rate": 0.000160258874439505, "loss": 4.2864, "step": 8517 }, { "epoch": 0.8832148481294027, "grad_norm": 0.75, "learning_rate": 0.00016025020540991257, "loss": 4.2584, "step": 8518 }, { "epoch": 0.8833185361838908, "grad_norm": 0.71484375, "learning_rate": 0.00016024153566943138, "loss": 4.2823, "step": 8519 }, { "epoch": 0.8834222242383788, "grad_norm": 0.72265625, "learning_rate": 0.00016023286521816375, "loss": 4.2925, "step": 8520 }, { "epoch": 0.8835259122928669, "grad_norm": 0.796875, "learning_rate": 0.000160224194056212, "loss": 4.3236, "step": 8521 }, { "epoch": 0.8836296003473549, "grad_norm": 0.77734375, "learning_rate": 0.00016021552218367844, "loss": 4.3257, "step": 8522 }, { "epoch": 0.8837332884018431, "grad_norm": 0.73046875, "learning_rate": 0.00016020684960066532, "loss": 4.2686, "step": 8523 }, { "epoch": 0.8838369764563311, "grad_norm": 0.703125, "learning_rate": 0.00016019817630727505, "loss": 4.2799, "step": 8524 }, { "epoch": 0.8839406645108192, "grad_norm": 0.75, "learning_rate": 0.00016018950230360993, "loss": 4.3084, "step": 8525 }, { "epoch": 0.8840443525653072, "grad_norm": 0.69921875, "learning_rate": 0.00016018082758977227, "loss": 4.3094, "step": 8526 }, { "epoch": 0.8841480406197954, "grad_norm": 0.76171875, "learning_rate": 0.00016017215216586447, "loss": 4.262, "step": 8527 }, { "epoch": 0.8842517286742834, "grad_norm": 0.703125, "learning_rate": 0.00016016347603198892, "loss": 4.3118, "step": 8528 }, { "epoch": 0.8843554167287715, "grad_norm": 0.7890625, "learning_rate": 0.0001601547991882479, "loss": 4.2696, "step": 8529 }, { "epoch": 0.8844591047832595, "grad_norm": 0.73828125, "learning_rate": 0.00016014612163474387, "loss": 4.2951, "step": 8530 }, { "epoch": 0.8845627928377476, "grad_norm": 0.8203125, "learning_rate": 0.00016013744337157917, "loss": 4.2564, "step": 8531 }, { "epoch": 0.8846664808922358, "grad_norm": 0.6953125, "learning_rate": 0.0001601287643988562, "loss": 4.2667, "step": 8532 }, { "epoch": 0.8847701689467238, "grad_norm": 0.82421875, "learning_rate": 0.0001601200847166774, "loss": 4.2845, "step": 8533 }, { "epoch": 0.8848738570012119, "grad_norm": 0.7109375, "learning_rate": 0.00016011140432514511, "loss": 4.271, "step": 8534 }, { "epoch": 0.8849775450556999, "grad_norm": 0.796875, "learning_rate": 0.0001601027232243618, "loss": 4.2858, "step": 8535 }, { "epoch": 0.885081233110188, "grad_norm": 0.734375, "learning_rate": 0.00016009404141442988, "loss": 4.2776, "step": 8536 }, { "epoch": 0.8851849211646761, "grad_norm": 0.7265625, "learning_rate": 0.00016008535889545182, "loss": 4.2547, "step": 8537 }, { "epoch": 0.8852886092191642, "grad_norm": 0.72265625, "learning_rate": 0.00016007667566753, "loss": 4.2769, "step": 8538 }, { "epoch": 0.8853922972736522, "grad_norm": 0.70703125, "learning_rate": 0.00016006799173076694, "loss": 4.2438, "step": 8539 }, { "epoch": 0.8854959853281403, "grad_norm": 0.6875, "learning_rate": 0.00016005930708526506, "loss": 4.2793, "step": 8540 }, { "epoch": 0.8855996733826283, "grad_norm": 0.6640625, "learning_rate": 0.00016005062173112688, "loss": 4.243, "step": 8541 }, { "epoch": 0.8857033614371165, "grad_norm": 0.66796875, "learning_rate": 0.00016004193566845478, "loss": 4.2848, "step": 8542 }, { "epoch": 0.8858070494916045, "grad_norm": 0.66796875, "learning_rate": 0.00016003324889735134, "loss": 4.2932, "step": 8543 }, { "epoch": 0.8859107375460926, "grad_norm": 0.62890625, "learning_rate": 0.00016002456141791903, "loss": 4.2527, "step": 8544 }, { "epoch": 0.8860144256005806, "grad_norm": 0.7265625, "learning_rate": 0.0001600158732302603, "loss": 4.2389, "step": 8545 }, { "epoch": 0.8861181136550688, "grad_norm": 0.66015625, "learning_rate": 0.00016000718433447774, "loss": 4.2734, "step": 8546 }, { "epoch": 0.8862218017095568, "grad_norm": 0.703125, "learning_rate": 0.00015999849473067386, "loss": 4.236, "step": 8547 }, { "epoch": 0.8863254897640449, "grad_norm": 0.6796875, "learning_rate": 0.0001599898044189511, "loss": 4.2682, "step": 8548 }, { "epoch": 0.8864291778185329, "grad_norm": 0.72265625, "learning_rate": 0.0001599811133994121, "loss": 4.2913, "step": 8549 }, { "epoch": 0.886532865873021, "grad_norm": 0.69921875, "learning_rate": 0.00015997242167215935, "loss": 4.2738, "step": 8550 }, { "epoch": 0.8866365539275091, "grad_norm": 0.74609375, "learning_rate": 0.00015996372923729544, "loss": 4.2653, "step": 8551 }, { "epoch": 0.8867402419819972, "grad_norm": 0.73828125, "learning_rate": 0.00015995503609492288, "loss": 4.2559, "step": 8552 }, { "epoch": 0.8868439300364852, "grad_norm": 0.76171875, "learning_rate": 0.00015994634224514428, "loss": 4.2434, "step": 8553 }, { "epoch": 0.8869476180909733, "grad_norm": 0.76171875, "learning_rate": 0.00015993764768806222, "loss": 4.2631, "step": 8554 }, { "epoch": 0.8870513061454613, "grad_norm": 0.765625, "learning_rate": 0.00015992895242377927, "loss": 4.2876, "step": 8555 }, { "epoch": 0.8871549941999495, "grad_norm": 0.765625, "learning_rate": 0.000159920256452398, "loss": 4.2772, "step": 8556 }, { "epoch": 0.8872586822544375, "grad_norm": 0.8515625, "learning_rate": 0.00015991155977402108, "loss": 4.29, "step": 8557 }, { "epoch": 0.8873623703089256, "grad_norm": 0.6796875, "learning_rate": 0.00015990286238875108, "loss": 4.2843, "step": 8558 }, { "epoch": 0.8874660583634136, "grad_norm": 0.84765625, "learning_rate": 0.00015989416429669062, "loss": 4.3156, "step": 8559 }, { "epoch": 0.8875697464179018, "grad_norm": 0.7890625, "learning_rate": 0.0001598854654979423, "loss": 4.2318, "step": 8560 }, { "epoch": 0.8876734344723898, "grad_norm": 0.87109375, "learning_rate": 0.00015987676599260882, "loss": 4.2924, "step": 8561 }, { "epoch": 0.8877771225268779, "grad_norm": 0.74609375, "learning_rate": 0.0001598680657807928, "loss": 4.2523, "step": 8562 }, { "epoch": 0.8878808105813659, "grad_norm": 0.890625, "learning_rate": 0.00015985936486259688, "loss": 4.253, "step": 8563 }, { "epoch": 0.887984498635854, "grad_norm": 0.81640625, "learning_rate": 0.00015985066323812372, "loss": 4.2691, "step": 8564 }, { "epoch": 0.8880881866903421, "grad_norm": 0.828125, "learning_rate": 0.000159841960907476, "loss": 4.3306, "step": 8565 }, { "epoch": 0.8881918747448302, "grad_norm": 0.87109375, "learning_rate": 0.00015983325787075642, "loss": 4.2717, "step": 8566 }, { "epoch": 0.8882955627993182, "grad_norm": 0.8203125, "learning_rate": 0.00015982455412806763, "loss": 4.271, "step": 8567 }, { "epoch": 0.8883992508538063, "grad_norm": 0.8671875, "learning_rate": 0.0001598158496795123, "loss": 4.2797, "step": 8568 }, { "epoch": 0.8885029389082943, "grad_norm": 0.7734375, "learning_rate": 0.00015980714452519323, "loss": 4.2774, "step": 8569 }, { "epoch": 0.8886066269627825, "grad_norm": 0.9375, "learning_rate": 0.00015979843866521302, "loss": 4.2678, "step": 8570 }, { "epoch": 0.8887103150172705, "grad_norm": 0.86328125, "learning_rate": 0.0001597897320996745, "loss": 4.2739, "step": 8571 }, { "epoch": 0.8888140030717586, "grad_norm": 0.9140625, "learning_rate": 0.00015978102482868032, "loss": 4.2616, "step": 8572 }, { "epoch": 0.8889176911262466, "grad_norm": 0.8515625, "learning_rate": 0.0001597723168523332, "loss": 4.2571, "step": 8573 }, { "epoch": 0.8890213791807348, "grad_norm": 0.83203125, "learning_rate": 0.00015976360817073596, "loss": 4.3135, "step": 8574 }, { "epoch": 0.8891250672352228, "grad_norm": 0.7265625, "learning_rate": 0.0001597548987839913, "loss": 4.3112, "step": 8575 }, { "epoch": 0.8892287552897109, "grad_norm": 0.734375, "learning_rate": 0.00015974618869220203, "loss": 4.273, "step": 8576 }, { "epoch": 0.889332443344199, "grad_norm": 0.70703125, "learning_rate": 0.00015973747789547086, "loss": 4.2647, "step": 8577 }, { "epoch": 0.889436131398687, "grad_norm": 0.74609375, "learning_rate": 0.00015972876639390058, "loss": 4.2989, "step": 8578 }, { "epoch": 0.8895398194531752, "grad_norm": 0.72265625, "learning_rate": 0.00015972005418759405, "loss": 4.256, "step": 8579 }, { "epoch": 0.8896435075076632, "grad_norm": 0.7734375, "learning_rate": 0.00015971134127665395, "loss": 4.3002, "step": 8580 }, { "epoch": 0.8897471955621513, "grad_norm": 0.79296875, "learning_rate": 0.00015970262766118318, "loss": 4.2529, "step": 8581 }, { "epoch": 0.8898508836166393, "grad_norm": 0.71875, "learning_rate": 0.00015969391334128447, "loss": 4.2892, "step": 8582 }, { "epoch": 0.8899545716711275, "grad_norm": 0.75390625, "learning_rate": 0.00015968519831706073, "loss": 4.2553, "step": 8583 }, { "epoch": 0.8900582597256155, "grad_norm": 0.7109375, "learning_rate": 0.00015967648258861472, "loss": 4.2492, "step": 8584 }, { "epoch": 0.8901619477801036, "grad_norm": 0.78125, "learning_rate": 0.00015966776615604928, "loss": 4.2728, "step": 8585 }, { "epoch": 0.8902656358345916, "grad_norm": 0.7265625, "learning_rate": 0.0001596590490194673, "loss": 4.2634, "step": 8586 }, { "epoch": 0.8903693238890797, "grad_norm": 0.76953125, "learning_rate": 0.0001596503311789716, "loss": 4.2454, "step": 8587 }, { "epoch": 0.8904730119435678, "grad_norm": 0.703125, "learning_rate": 0.00015964161263466503, "loss": 4.3347, "step": 8588 }, { "epoch": 0.8905766999980559, "grad_norm": 0.6953125, "learning_rate": 0.00015963289338665046, "loss": 4.294, "step": 8589 }, { "epoch": 0.8906803880525439, "grad_norm": 0.65234375, "learning_rate": 0.0001596241734350308, "loss": 4.3227, "step": 8590 }, { "epoch": 0.890784076107032, "grad_norm": 0.75, "learning_rate": 0.0001596154527799089, "loss": 4.2758, "step": 8591 }, { "epoch": 0.89088776416152, "grad_norm": 0.6875, "learning_rate": 0.00015960673142138775, "loss": 4.3, "step": 8592 }, { "epoch": 0.8909914522160082, "grad_norm": 0.703125, "learning_rate": 0.0001595980093595701, "loss": 4.278, "step": 8593 }, { "epoch": 0.8910951402704962, "grad_norm": 0.69140625, "learning_rate": 0.00015958928659455895, "loss": 4.2745, "step": 8594 }, { "epoch": 0.8911988283249843, "grad_norm": 0.6953125, "learning_rate": 0.0001595805631264572, "loss": 4.2686, "step": 8595 }, { "epoch": 0.8913025163794723, "grad_norm": 0.6796875, "learning_rate": 0.0001595718389553678, "loss": 4.2547, "step": 8596 }, { "epoch": 0.8914062044339605, "grad_norm": 0.640625, "learning_rate": 0.00015956311408139365, "loss": 4.2825, "step": 8597 }, { "epoch": 0.8915098924884485, "grad_norm": 0.66796875, "learning_rate": 0.00015955438850463776, "loss": 4.2491, "step": 8598 }, { "epoch": 0.8916135805429366, "grad_norm": 0.703125, "learning_rate": 0.000159545662225203, "loss": 4.2302, "step": 8599 }, { "epoch": 0.8917172685974246, "grad_norm": 0.64453125, "learning_rate": 0.0001595369352431924, "loss": 4.2561, "step": 8600 }, { "epoch": 0.8918209566519127, "grad_norm": 0.63671875, "learning_rate": 0.00015952820755870886, "loss": 4.2302, "step": 8601 }, { "epoch": 0.8919246447064008, "grad_norm": 0.64453125, "learning_rate": 0.0001595194791718554, "loss": 4.2646, "step": 8602 }, { "epoch": 0.8920283327608889, "grad_norm": 0.68359375, "learning_rate": 0.00015951075008273504, "loss": 4.2478, "step": 8603 }, { "epoch": 0.8921320208153769, "grad_norm": 0.66796875, "learning_rate": 0.00015950202029145068, "loss": 4.305, "step": 8604 }, { "epoch": 0.892235708869865, "grad_norm": 0.6953125, "learning_rate": 0.0001594932897981054, "loss": 4.3149, "step": 8605 }, { "epoch": 0.892339396924353, "grad_norm": 0.68359375, "learning_rate": 0.0001594845586028022, "loss": 4.2881, "step": 8606 }, { "epoch": 0.8924430849788412, "grad_norm": 0.73046875, "learning_rate": 0.00015947582670564406, "loss": 4.222, "step": 8607 }, { "epoch": 0.8925467730333292, "grad_norm": 0.69921875, "learning_rate": 0.0001594670941067341, "loss": 4.2469, "step": 8608 }, { "epoch": 0.8926504610878173, "grad_norm": 0.7265625, "learning_rate": 0.0001594583608061752, "loss": 4.244, "step": 8609 }, { "epoch": 0.8927541491423053, "grad_norm": 0.703125, "learning_rate": 0.00015944962680407057, "loss": 4.2382, "step": 8610 }, { "epoch": 0.8928578371967935, "grad_norm": 0.8515625, "learning_rate": 0.00015944089210052312, "loss": 4.2642, "step": 8611 }, { "epoch": 0.8929615252512815, "grad_norm": 0.6328125, "learning_rate": 0.000159432156695636, "loss": 4.2569, "step": 8612 }, { "epoch": 0.8930652133057696, "grad_norm": 0.81640625, "learning_rate": 0.00015942342058951225, "loss": 4.2415, "step": 8613 }, { "epoch": 0.8931689013602576, "grad_norm": 0.66015625, "learning_rate": 0.00015941468378225497, "loss": 4.2533, "step": 8614 }, { "epoch": 0.8932725894147457, "grad_norm": 0.87109375, "learning_rate": 0.00015940594627396722, "loss": 4.2619, "step": 8615 }, { "epoch": 0.8933762774692338, "grad_norm": 0.68359375, "learning_rate": 0.0001593972080647521, "loss": 4.2925, "step": 8616 }, { "epoch": 0.8934799655237219, "grad_norm": 0.84765625, "learning_rate": 0.0001593884691547127, "loss": 4.2796, "step": 8617 }, { "epoch": 0.8935836535782099, "grad_norm": 0.66015625, "learning_rate": 0.00015937972954395213, "loss": 4.3201, "step": 8618 }, { "epoch": 0.893687341632698, "grad_norm": 0.72265625, "learning_rate": 0.00015937098923257353, "loss": 4.2301, "step": 8619 }, { "epoch": 0.8937910296871862, "grad_norm": 0.7578125, "learning_rate": 0.00015936224822068, "loss": 4.3146, "step": 8620 }, { "epoch": 0.8938947177416742, "grad_norm": 0.71875, "learning_rate": 0.0001593535065083747, "loss": 4.2895, "step": 8621 }, { "epoch": 0.8939984057961623, "grad_norm": 0.77734375, "learning_rate": 0.00015934476409576077, "loss": 4.2772, "step": 8622 }, { "epoch": 0.8941020938506503, "grad_norm": 0.6640625, "learning_rate": 0.00015933602098294137, "loss": 4.2764, "step": 8623 }, { "epoch": 0.8942057819051384, "grad_norm": 0.7578125, "learning_rate": 0.00015932727717001962, "loss": 4.2412, "step": 8624 }, { "epoch": 0.8943094699596265, "grad_norm": 0.6328125, "learning_rate": 0.0001593185326570987, "loss": 4.2683, "step": 8625 }, { "epoch": 0.8944131580141146, "grad_norm": 0.70703125, "learning_rate": 0.00015930978744428182, "loss": 4.2657, "step": 8626 }, { "epoch": 0.8945168460686026, "grad_norm": 0.69140625, "learning_rate": 0.00015930104153167213, "loss": 4.2425, "step": 8627 }, { "epoch": 0.8946205341230907, "grad_norm": 0.671875, "learning_rate": 0.00015929229491937286, "loss": 4.2625, "step": 8628 }, { "epoch": 0.8947242221775787, "grad_norm": 0.734375, "learning_rate": 0.00015928354760748716, "loss": 4.2919, "step": 8629 }, { "epoch": 0.8948279102320669, "grad_norm": 0.68359375, "learning_rate": 0.00015927479959611826, "loss": 4.2587, "step": 8630 }, { "epoch": 0.8949315982865549, "grad_norm": 0.69921875, "learning_rate": 0.00015926605088536938, "loss": 4.2266, "step": 8631 }, { "epoch": 0.895035286341043, "grad_norm": 0.6796875, "learning_rate": 0.00015925730147534374, "loss": 4.301, "step": 8632 }, { "epoch": 0.895138974395531, "grad_norm": 0.69921875, "learning_rate": 0.0001592485513661446, "loss": 4.289, "step": 8633 }, { "epoch": 0.8952426624500192, "grad_norm": 0.703125, "learning_rate": 0.00015923980055787518, "loss": 4.2844, "step": 8634 }, { "epoch": 0.8953463505045072, "grad_norm": 0.6640625, "learning_rate": 0.0001592310490506387, "loss": 4.2731, "step": 8635 }, { "epoch": 0.8954500385589953, "grad_norm": 0.73828125, "learning_rate": 0.0001592222968445385, "loss": 4.2905, "step": 8636 }, { "epoch": 0.8955537266134833, "grad_norm": 0.69921875, "learning_rate": 0.00015921354393967779, "loss": 4.3063, "step": 8637 }, { "epoch": 0.8956574146679714, "grad_norm": 0.6484375, "learning_rate": 0.00015920479033615983, "loss": 4.2803, "step": 8638 }, { "epoch": 0.8957611027224595, "grad_norm": 0.73828125, "learning_rate": 0.00015919603603408792, "loss": 4.3242, "step": 8639 }, { "epoch": 0.8958647907769476, "grad_norm": 0.62109375, "learning_rate": 0.00015918728103356538, "loss": 4.2296, "step": 8640 }, { "epoch": 0.8959684788314356, "grad_norm": 0.71484375, "learning_rate": 0.00015917852533469548, "loss": 4.3023, "step": 8641 }, { "epoch": 0.8960721668859237, "grad_norm": 0.765625, "learning_rate": 0.00015916976893758154, "loss": 4.2442, "step": 8642 }, { "epoch": 0.8961758549404117, "grad_norm": 0.796875, "learning_rate": 0.00015916101184232687, "loss": 4.278, "step": 8643 }, { "epoch": 0.8962795429948999, "grad_norm": 0.6640625, "learning_rate": 0.0001591522540490348, "loss": 4.2436, "step": 8644 }, { "epoch": 0.8963832310493879, "grad_norm": 0.78515625, "learning_rate": 0.00015914349555780865, "loss": 4.2421, "step": 8645 }, { "epoch": 0.896486919103876, "grad_norm": 0.70703125, "learning_rate": 0.0001591347363687518, "loss": 4.3135, "step": 8646 }, { "epoch": 0.896590607158364, "grad_norm": 0.7890625, "learning_rate": 0.00015912597648196753, "loss": 4.2696, "step": 8647 }, { "epoch": 0.8966942952128522, "grad_norm": 0.69921875, "learning_rate": 0.0001591172158975592, "loss": 4.2535, "step": 8648 }, { "epoch": 0.8967979832673402, "grad_norm": 0.66015625, "learning_rate": 0.00015910845461563028, "loss": 4.2705, "step": 8649 }, { "epoch": 0.8969016713218283, "grad_norm": 0.75, "learning_rate": 0.00015909969263628408, "loss": 4.2643, "step": 8650 }, { "epoch": 0.8970053593763163, "grad_norm": 0.6953125, "learning_rate": 0.00015909092995962393, "loss": 4.2681, "step": 8651 }, { "epoch": 0.8971090474308044, "grad_norm": 0.6875, "learning_rate": 0.0001590821665857533, "loss": 4.302, "step": 8652 }, { "epoch": 0.8972127354852925, "grad_norm": 0.79296875, "learning_rate": 0.00015907340251477558, "loss": 4.2141, "step": 8653 }, { "epoch": 0.8973164235397806, "grad_norm": 0.66796875, "learning_rate": 0.0001590646377467941, "loss": 4.324, "step": 8654 }, { "epoch": 0.8974201115942686, "grad_norm": 0.77734375, "learning_rate": 0.00015905587228191235, "loss": 4.2819, "step": 8655 }, { "epoch": 0.8975237996487567, "grad_norm": 0.71875, "learning_rate": 0.00015904710612023372, "loss": 4.284, "step": 8656 }, { "epoch": 0.8976274877032447, "grad_norm": 0.75390625, "learning_rate": 0.00015903833926186168, "loss": 4.228, "step": 8657 }, { "epoch": 0.8977311757577329, "grad_norm": 0.6796875, "learning_rate": 0.00015902957170689966, "loss": 4.2526, "step": 8658 }, { "epoch": 0.8978348638122209, "grad_norm": 0.69140625, "learning_rate": 0.00015902080345545102, "loss": 4.2731, "step": 8659 }, { "epoch": 0.897938551866709, "grad_norm": 0.6953125, "learning_rate": 0.00015901203450761932, "loss": 4.303, "step": 8660 }, { "epoch": 0.898042239921197, "grad_norm": 0.7578125, "learning_rate": 0.000159003264863508, "loss": 4.3071, "step": 8661 }, { "epoch": 0.8981459279756852, "grad_norm": 0.72265625, "learning_rate": 0.00015899449452322055, "loss": 4.2771, "step": 8662 }, { "epoch": 0.8982496160301732, "grad_norm": 0.734375, "learning_rate": 0.00015898572348686037, "loss": 4.3085, "step": 8663 }, { "epoch": 0.8983533040846613, "grad_norm": 0.8203125, "learning_rate": 0.00015897695175453105, "loss": 4.2636, "step": 8664 }, { "epoch": 0.8984569921391494, "grad_norm": 0.6953125, "learning_rate": 0.000158968179326336, "loss": 4.2446, "step": 8665 }, { "epoch": 0.8985606801936374, "grad_norm": 0.82421875, "learning_rate": 0.0001589594062023788, "loss": 4.2981, "step": 8666 }, { "epoch": 0.8986643682481256, "grad_norm": 0.69140625, "learning_rate": 0.00015895063238276292, "loss": 4.2697, "step": 8667 }, { "epoch": 0.8987680563026136, "grad_norm": 0.73828125, "learning_rate": 0.00015894185786759189, "loss": 4.2807, "step": 8668 }, { "epoch": 0.8988717443571017, "grad_norm": 0.6796875, "learning_rate": 0.00015893308265696923, "loss": 4.2924, "step": 8669 }, { "epoch": 0.8989754324115897, "grad_norm": 0.8359375, "learning_rate": 0.0001589243067509985, "loss": 4.2421, "step": 8670 }, { "epoch": 0.8990791204660779, "grad_norm": 0.671875, "learning_rate": 0.00015891553014978324, "loss": 4.2539, "step": 8671 }, { "epoch": 0.8991828085205659, "grad_norm": 0.76953125, "learning_rate": 0.000158906752853427, "loss": 4.2523, "step": 8672 }, { "epoch": 0.899286496575054, "grad_norm": 0.6796875, "learning_rate": 0.00015889797486203332, "loss": 4.2962, "step": 8673 }, { "epoch": 0.899390184629542, "grad_norm": 0.78515625, "learning_rate": 0.00015888919617570584, "loss": 4.2683, "step": 8674 }, { "epoch": 0.8994938726840301, "grad_norm": 0.7265625, "learning_rate": 0.00015888041679454806, "loss": 4.2513, "step": 8675 }, { "epoch": 0.8995975607385182, "grad_norm": 0.80859375, "learning_rate": 0.0001588716367186636, "loss": 4.2272, "step": 8676 }, { "epoch": 0.8997012487930063, "grad_norm": 0.6875, "learning_rate": 0.00015886285594815606, "loss": 4.2718, "step": 8677 }, { "epoch": 0.8998049368474943, "grad_norm": 0.76953125, "learning_rate": 0.00015885407448312905, "loss": 4.2413, "step": 8678 }, { "epoch": 0.8999086249019824, "grad_norm": 0.69921875, "learning_rate": 0.0001588452923236862, "loss": 4.304, "step": 8679 }, { "epoch": 0.9000123129564704, "grad_norm": 0.75, "learning_rate": 0.00015883650946993104, "loss": 4.2845, "step": 8680 }, { "epoch": 0.9001160010109586, "grad_norm": 0.71484375, "learning_rate": 0.0001588277259219673, "loss": 4.2831, "step": 8681 }, { "epoch": 0.9002196890654466, "grad_norm": 0.734375, "learning_rate": 0.00015881894167989855, "loss": 4.2684, "step": 8682 }, { "epoch": 0.9003233771199347, "grad_norm": 0.703125, "learning_rate": 0.0001588101567438285, "loss": 4.2956, "step": 8683 }, { "epoch": 0.9004270651744227, "grad_norm": 0.80859375, "learning_rate": 0.00015880137111386075, "loss": 4.2602, "step": 8684 }, { "epoch": 0.9005307532289109, "grad_norm": 0.7109375, "learning_rate": 0.00015879258479009896, "loss": 4.257, "step": 8685 }, { "epoch": 0.9006344412833989, "grad_norm": 0.765625, "learning_rate": 0.0001587837977726468, "loss": 4.2761, "step": 8686 }, { "epoch": 0.900738129337887, "grad_norm": 0.75390625, "learning_rate": 0.00015877501006160804, "loss": 4.2571, "step": 8687 }, { "epoch": 0.900841817392375, "grad_norm": 0.76171875, "learning_rate": 0.00015876622165708623, "loss": 4.2385, "step": 8688 }, { "epoch": 0.9009455054468631, "grad_norm": 0.7890625, "learning_rate": 0.00015875743255918512, "loss": 4.2775, "step": 8689 }, { "epoch": 0.9010491935013512, "grad_norm": 0.80078125, "learning_rate": 0.00015874864276800845, "loss": 4.27, "step": 8690 }, { "epoch": 0.9011528815558393, "grad_norm": 0.81640625, "learning_rate": 0.00015873985228365988, "loss": 4.2789, "step": 8691 }, { "epoch": 0.9012565696103273, "grad_norm": 0.75390625, "learning_rate": 0.00015873106110624315, "loss": 4.3142, "step": 8692 }, { "epoch": 0.9013602576648154, "grad_norm": 0.80078125, "learning_rate": 0.00015872226923586196, "loss": 4.2239, "step": 8693 }, { "epoch": 0.9014639457193034, "grad_norm": 0.71875, "learning_rate": 0.00015871347667262007, "loss": 4.2556, "step": 8694 }, { "epoch": 0.9015676337737916, "grad_norm": 0.74609375, "learning_rate": 0.00015870468341662124, "loss": 4.3006, "step": 8695 }, { "epoch": 0.9016713218282796, "grad_norm": 0.76171875, "learning_rate": 0.00015869588946796918, "loss": 4.2797, "step": 8696 }, { "epoch": 0.9017750098827677, "grad_norm": 0.86328125, "learning_rate": 0.00015868709482676766, "loss": 4.3279, "step": 8697 }, { "epoch": 0.9018786979372557, "grad_norm": 0.80859375, "learning_rate": 0.00015867829949312045, "loss": 4.3145, "step": 8698 }, { "epoch": 0.9019823859917439, "grad_norm": 0.8203125, "learning_rate": 0.00015866950346713136, "loss": 4.3113, "step": 8699 }, { "epoch": 0.9020860740462319, "grad_norm": 0.828125, "learning_rate": 0.00015866070674890414, "loss": 4.2536, "step": 8700 }, { "epoch": 0.90218976210072, "grad_norm": 0.734375, "learning_rate": 0.00015865190933854259, "loss": 4.2619, "step": 8701 }, { "epoch": 0.902293450155208, "grad_norm": 0.71875, "learning_rate": 0.0001586431112361505, "loss": 4.2475, "step": 8702 }, { "epoch": 0.9023971382096961, "grad_norm": 0.74609375, "learning_rate": 0.00015863431244183168, "loss": 4.2817, "step": 8703 }, { "epoch": 0.9025008262641842, "grad_norm": 0.75, "learning_rate": 0.00015862551295568996, "loss": 4.2685, "step": 8704 }, { "epoch": 0.9026045143186723, "grad_norm": 0.6796875, "learning_rate": 0.00015861671277782918, "loss": 4.2415, "step": 8705 }, { "epoch": 0.9027082023731603, "grad_norm": 0.85546875, "learning_rate": 0.0001586079119083531, "loss": 4.2506, "step": 8706 }, { "epoch": 0.9028118904276484, "grad_norm": 0.69140625, "learning_rate": 0.00015859911034736562, "loss": 4.2706, "step": 8707 }, { "epoch": 0.9029155784821364, "grad_norm": 0.83984375, "learning_rate": 0.0001585903080949706, "loss": 4.2659, "step": 8708 }, { "epoch": 0.9030192665366246, "grad_norm": 0.6953125, "learning_rate": 0.0001585815051512719, "loss": 4.2601, "step": 8709 }, { "epoch": 0.9031229545911127, "grad_norm": 0.76171875, "learning_rate": 0.0001585727015163733, "loss": 4.2822, "step": 8710 }, { "epoch": 0.9032266426456007, "grad_norm": 0.73046875, "learning_rate": 0.00015856389719037876, "loss": 4.2895, "step": 8711 }, { "epoch": 0.9033303307000888, "grad_norm": 0.6953125, "learning_rate": 0.00015855509217339219, "loss": 4.2873, "step": 8712 }, { "epoch": 0.9034340187545769, "grad_norm": 0.71875, "learning_rate": 0.00015854628646551737, "loss": 4.2613, "step": 8713 }, { "epoch": 0.903537706809065, "grad_norm": 0.69140625, "learning_rate": 0.0001585374800668583, "loss": 4.2723, "step": 8714 }, { "epoch": 0.903641394863553, "grad_norm": 0.72265625, "learning_rate": 0.00015852867297751877, "loss": 4.2495, "step": 8715 }, { "epoch": 0.9037450829180411, "grad_norm": 0.7421875, "learning_rate": 0.00015851986519760286, "loss": 4.2634, "step": 8716 }, { "epoch": 0.9038487709725291, "grad_norm": 0.7890625, "learning_rate": 0.00015851105672721433, "loss": 4.2665, "step": 8717 }, { "epoch": 0.9039524590270173, "grad_norm": 0.7265625, "learning_rate": 0.00015850224756645717, "loss": 4.3059, "step": 8718 }, { "epoch": 0.9040561470815053, "grad_norm": 0.79296875, "learning_rate": 0.00015849343771543538, "loss": 4.2917, "step": 8719 }, { "epoch": 0.9041598351359934, "grad_norm": 0.76953125, "learning_rate": 0.00015848462717425282, "loss": 4.2838, "step": 8720 }, { "epoch": 0.9042635231904814, "grad_norm": 0.9765625, "learning_rate": 0.0001584758159430135, "loss": 4.2943, "step": 8721 }, { "epoch": 0.9043672112449695, "grad_norm": 0.76953125, "learning_rate": 0.00015846700402182134, "loss": 4.267, "step": 8722 }, { "epoch": 0.9044708992994576, "grad_norm": 0.83984375, "learning_rate": 0.00015845819141078037, "loss": 4.2886, "step": 8723 }, { "epoch": 0.9045745873539457, "grad_norm": 0.80859375, "learning_rate": 0.00015844937810999453, "loss": 4.2663, "step": 8724 }, { "epoch": 0.9046782754084337, "grad_norm": 0.78125, "learning_rate": 0.00015844056411956777, "loss": 4.2901, "step": 8725 }, { "epoch": 0.9047819634629218, "grad_norm": 0.75, "learning_rate": 0.00015843174943960418, "loss": 4.3164, "step": 8726 }, { "epoch": 0.9048856515174099, "grad_norm": 0.7734375, "learning_rate": 0.00015842293407020765, "loss": 4.2537, "step": 8727 }, { "epoch": 0.904989339571898, "grad_norm": 0.73046875, "learning_rate": 0.00015841411801148232, "loss": 4.27, "step": 8728 }, { "epoch": 0.905093027626386, "grad_norm": 0.82421875, "learning_rate": 0.00015840530126353213, "loss": 4.2543, "step": 8729 }, { "epoch": 0.9051967156808741, "grad_norm": 0.71875, "learning_rate": 0.0001583964838264611, "loss": 4.2713, "step": 8730 }, { "epoch": 0.9053004037353621, "grad_norm": 0.734375, "learning_rate": 0.00015838766570037326, "loss": 4.2698, "step": 8731 }, { "epoch": 0.9054040917898503, "grad_norm": 0.76953125, "learning_rate": 0.0001583788468853727, "loss": 4.2757, "step": 8732 }, { "epoch": 0.9055077798443383, "grad_norm": 0.7890625, "learning_rate": 0.0001583700273815635, "loss": 4.291, "step": 8733 }, { "epoch": 0.9056114678988264, "grad_norm": 0.77734375, "learning_rate": 0.00015836120718904967, "loss": 4.2981, "step": 8734 }, { "epoch": 0.9057151559533144, "grad_norm": 0.765625, "learning_rate": 0.00015835238630793524, "loss": 4.3023, "step": 8735 }, { "epoch": 0.9058188440078025, "grad_norm": 0.76953125, "learning_rate": 0.00015834356473832438, "loss": 4.2939, "step": 8736 }, { "epoch": 0.9059225320622906, "grad_norm": 0.76953125, "learning_rate": 0.00015833474248032111, "loss": 4.2536, "step": 8737 }, { "epoch": 0.9060262201167787, "grad_norm": 0.765625, "learning_rate": 0.0001583259195340295, "loss": 4.2877, "step": 8738 }, { "epoch": 0.9061299081712667, "grad_norm": 0.7421875, "learning_rate": 0.00015831709589955376, "loss": 4.2692, "step": 8739 }, { "epoch": 0.9062335962257548, "grad_norm": 0.7734375, "learning_rate": 0.0001583082715769979, "loss": 4.2686, "step": 8740 }, { "epoch": 0.9063372842802428, "grad_norm": 0.7578125, "learning_rate": 0.00015829944656646608, "loss": 4.2784, "step": 8741 }, { "epoch": 0.906440972334731, "grad_norm": 0.8515625, "learning_rate": 0.0001582906208680624, "loss": 4.2617, "step": 8742 }, { "epoch": 0.906544660389219, "grad_norm": 0.73828125, "learning_rate": 0.00015828179448189103, "loss": 4.2634, "step": 8743 }, { "epoch": 0.9066483484437071, "grad_norm": 0.828125, "learning_rate": 0.00015827296740805608, "loss": 4.2801, "step": 8744 }, { "epoch": 0.9067520364981951, "grad_norm": 0.703125, "learning_rate": 0.00015826413964666172, "loss": 4.2969, "step": 8745 }, { "epoch": 0.9068557245526833, "grad_norm": 0.8046875, "learning_rate": 0.0001582553111978121, "loss": 4.2976, "step": 8746 }, { "epoch": 0.9069594126071713, "grad_norm": 0.6953125, "learning_rate": 0.00015824648206161138, "loss": 4.2384, "step": 8747 }, { "epoch": 0.9070631006616594, "grad_norm": 0.7265625, "learning_rate": 0.00015823765223816372, "loss": 4.2754, "step": 8748 }, { "epoch": 0.9071667887161474, "grad_norm": 0.75, "learning_rate": 0.00015822882172757333, "loss": 4.3017, "step": 8749 }, { "epoch": 0.9072704767706355, "grad_norm": 0.7578125, "learning_rate": 0.00015821999052994441, "loss": 4.2288, "step": 8750 }, { "epoch": 0.9073741648251236, "grad_norm": 0.78515625, "learning_rate": 0.00015821115864538113, "loss": 4.2528, "step": 8751 }, { "epoch": 0.9074778528796117, "grad_norm": 0.7578125, "learning_rate": 0.00015820232607398772, "loss": 4.2473, "step": 8752 }, { "epoch": 0.9075815409340997, "grad_norm": 0.78515625, "learning_rate": 0.0001581934928158684, "loss": 4.2968, "step": 8753 }, { "epoch": 0.9076852289885878, "grad_norm": 0.69140625, "learning_rate": 0.00015818465887112738, "loss": 4.24, "step": 8754 }, { "epoch": 0.907788917043076, "grad_norm": 0.73828125, "learning_rate": 0.00015817582423986886, "loss": 4.2909, "step": 8755 }, { "epoch": 0.907892605097564, "grad_norm": 0.71484375, "learning_rate": 0.0001581669889221971, "loss": 4.2621, "step": 8756 }, { "epoch": 0.9079962931520521, "grad_norm": 0.75, "learning_rate": 0.00015815815291821638, "loss": 4.3012, "step": 8757 }, { "epoch": 0.9080999812065401, "grad_norm": 0.703125, "learning_rate": 0.00015814931622803094, "loss": 4.2564, "step": 8758 }, { "epoch": 0.9082036692610282, "grad_norm": 0.6875, "learning_rate": 0.000158140478851745, "loss": 4.299, "step": 8759 }, { "epoch": 0.9083073573155163, "grad_norm": 0.7734375, "learning_rate": 0.0001581316407894629, "loss": 4.2498, "step": 8760 }, { "epoch": 0.9084110453700044, "grad_norm": 0.71875, "learning_rate": 0.00015812280204128887, "loss": 4.2857, "step": 8761 }, { "epoch": 0.9085147334244924, "grad_norm": 0.78515625, "learning_rate": 0.00015811396260732722, "loss": 4.2647, "step": 8762 }, { "epoch": 0.9086184214789805, "grad_norm": 0.75, "learning_rate": 0.00015810512248768226, "loss": 4.2407, "step": 8763 }, { "epoch": 0.9087221095334685, "grad_norm": 0.69921875, "learning_rate": 0.00015809628168245821, "loss": 4.2563, "step": 8764 }, { "epoch": 0.9088257975879567, "grad_norm": 0.6640625, "learning_rate": 0.00015808744019175951, "loss": 4.2673, "step": 8765 }, { "epoch": 0.9089294856424447, "grad_norm": 0.72265625, "learning_rate": 0.00015807859801569043, "loss": 4.2512, "step": 8766 }, { "epoch": 0.9090331736969328, "grad_norm": 0.609375, "learning_rate": 0.00015806975515435522, "loss": 4.3002, "step": 8767 }, { "epoch": 0.9091368617514208, "grad_norm": 0.8046875, "learning_rate": 0.0001580609116078583, "loss": 4.2859, "step": 8768 }, { "epoch": 0.909240549805909, "grad_norm": 0.66015625, "learning_rate": 0.00015805206737630402, "loss": 4.2534, "step": 8769 }, { "epoch": 0.909344237860397, "grad_norm": 0.67578125, "learning_rate": 0.0001580432224597967, "loss": 4.3108, "step": 8770 }, { "epoch": 0.9094479259148851, "grad_norm": 0.6875, "learning_rate": 0.00015803437685844073, "loss": 4.2701, "step": 8771 }, { "epoch": 0.9095516139693731, "grad_norm": 0.69140625, "learning_rate": 0.0001580255305723404, "loss": 4.2773, "step": 8772 }, { "epoch": 0.9096553020238612, "grad_norm": 0.671875, "learning_rate": 0.0001580166836016002, "loss": 4.2535, "step": 8773 }, { "epoch": 0.9097589900783493, "grad_norm": 0.68359375, "learning_rate": 0.00015800783594632444, "loss": 4.2504, "step": 8774 }, { "epoch": 0.9098626781328374, "grad_norm": 0.640625, "learning_rate": 0.00015799898760661757, "loss": 4.2086, "step": 8775 }, { "epoch": 0.9099663661873254, "grad_norm": 0.7109375, "learning_rate": 0.00015799013858258388, "loss": 4.2892, "step": 8776 }, { "epoch": 0.9100700542418135, "grad_norm": 0.6640625, "learning_rate": 0.0001579812888743279, "loss": 4.2683, "step": 8777 }, { "epoch": 0.9101737422963015, "grad_norm": 0.765625, "learning_rate": 0.000157972438481954, "loss": 4.2804, "step": 8778 }, { "epoch": 0.9102774303507897, "grad_norm": 0.68359375, "learning_rate": 0.00015796358740556658, "loss": 4.2349, "step": 8779 }, { "epoch": 0.9103811184052777, "grad_norm": 0.79296875, "learning_rate": 0.00015795473564527012, "loss": 4.2952, "step": 8780 }, { "epoch": 0.9104848064597658, "grad_norm": 0.68359375, "learning_rate": 0.00015794588320116904, "loss": 4.3084, "step": 8781 }, { "epoch": 0.9105884945142538, "grad_norm": 0.8125, "learning_rate": 0.0001579370300733678, "loss": 4.2992, "step": 8782 }, { "epoch": 0.910692182568742, "grad_norm": 0.69921875, "learning_rate": 0.00015792817626197084, "loss": 4.2066, "step": 8783 }, { "epoch": 0.91079587062323, "grad_norm": 0.83984375, "learning_rate": 0.0001579193217670826, "loss": 4.2721, "step": 8784 }, { "epoch": 0.9108995586777181, "grad_norm": 0.73046875, "learning_rate": 0.00015791046658880759, "loss": 4.3195, "step": 8785 }, { "epoch": 0.9110032467322061, "grad_norm": 0.84765625, "learning_rate": 0.00015790161072725033, "loss": 4.2495, "step": 8786 }, { "epoch": 0.9111069347866942, "grad_norm": 0.79296875, "learning_rate": 0.00015789275418251527, "loss": 4.2673, "step": 8787 }, { "epoch": 0.9112106228411823, "grad_norm": 0.78515625, "learning_rate": 0.00015788389695470687, "loss": 4.2598, "step": 8788 }, { "epoch": 0.9113143108956704, "grad_norm": 0.79296875, "learning_rate": 0.00015787503904392969, "loss": 4.2678, "step": 8789 }, { "epoch": 0.9114179989501584, "grad_norm": 0.78125, "learning_rate": 0.0001578661804502882, "loss": 4.2586, "step": 8790 }, { "epoch": 0.9115216870046465, "grad_norm": 0.6875, "learning_rate": 0.00015785732117388698, "loss": 4.2912, "step": 8791 }, { "epoch": 0.9116253750591345, "grad_norm": 0.73046875, "learning_rate": 0.00015784846121483053, "loss": 4.3045, "step": 8792 }, { "epoch": 0.9117290631136227, "grad_norm": 0.69921875, "learning_rate": 0.00015783960057322335, "loss": 4.2654, "step": 8793 }, { "epoch": 0.9118327511681107, "grad_norm": 0.75, "learning_rate": 0.00015783073924917004, "loss": 4.2305, "step": 8794 }, { "epoch": 0.9119364392225988, "grad_norm": 0.72265625, "learning_rate": 0.00015782187724277519, "loss": 4.2843, "step": 8795 }, { "epoch": 0.9120401272770868, "grad_norm": 0.78125, "learning_rate": 0.00015781301455414324, "loss": 4.2485, "step": 8796 }, { "epoch": 0.912143815331575, "grad_norm": 0.66796875, "learning_rate": 0.00015780415118337885, "loss": 4.2562, "step": 8797 }, { "epoch": 0.912247503386063, "grad_norm": 0.76953125, "learning_rate": 0.00015779528713058655, "loss": 4.2176, "step": 8798 }, { "epoch": 0.9123511914405511, "grad_norm": 0.72265625, "learning_rate": 0.000157786422395871, "loss": 4.3066, "step": 8799 }, { "epoch": 0.9124548794950392, "grad_norm": 0.7734375, "learning_rate": 0.00015777755697933673, "loss": 4.2327, "step": 8800 }, { "epoch": 0.9125585675495272, "grad_norm": 0.7421875, "learning_rate": 0.00015776869088108834, "loss": 4.2274, "step": 8801 }, { "epoch": 0.9126622556040154, "grad_norm": 0.71484375, "learning_rate": 0.0001577598241012305, "loss": 4.2839, "step": 8802 }, { "epoch": 0.9127659436585034, "grad_norm": 0.74609375, "learning_rate": 0.00015775095663986778, "loss": 4.2804, "step": 8803 }, { "epoch": 0.9128696317129915, "grad_norm": 0.66796875, "learning_rate": 0.00015774208849710482, "loss": 4.299, "step": 8804 }, { "epoch": 0.9129733197674795, "grad_norm": 0.7578125, "learning_rate": 0.00015773321967304624, "loss": 4.272, "step": 8805 }, { "epoch": 0.9130770078219677, "grad_norm": 0.71484375, "learning_rate": 0.0001577243501677967, "loss": 4.2984, "step": 8806 }, { "epoch": 0.9131806958764557, "grad_norm": 0.75, "learning_rate": 0.00015771547998146086, "loss": 4.2805, "step": 8807 }, { "epoch": 0.9132843839309438, "grad_norm": 0.73046875, "learning_rate": 0.00015770660911414332, "loss": 4.2923, "step": 8808 }, { "epoch": 0.9133880719854318, "grad_norm": 0.65234375, "learning_rate": 0.00015769773756594885, "loss": 4.2588, "step": 8809 }, { "epoch": 0.9134917600399199, "grad_norm": 0.7421875, "learning_rate": 0.000157688865336982, "loss": 4.2737, "step": 8810 }, { "epoch": 0.913595448094408, "grad_norm": 0.67578125, "learning_rate": 0.00015767999242734756, "loss": 4.2774, "step": 8811 }, { "epoch": 0.9136991361488961, "grad_norm": 0.796875, "learning_rate": 0.00015767111883715018, "loss": 4.2914, "step": 8812 }, { "epoch": 0.9138028242033841, "grad_norm": 0.68359375, "learning_rate": 0.00015766224456649453, "loss": 4.2476, "step": 8813 }, { "epoch": 0.9139065122578722, "grad_norm": 0.8515625, "learning_rate": 0.00015765336961548538, "loss": 4.2929, "step": 8814 }, { "epoch": 0.9140102003123602, "grad_norm": 0.73046875, "learning_rate": 0.00015764449398422738, "loss": 4.2975, "step": 8815 }, { "epoch": 0.9141138883668484, "grad_norm": 0.79296875, "learning_rate": 0.00015763561767282534, "loss": 4.2574, "step": 8816 }, { "epoch": 0.9142175764213364, "grad_norm": 0.75390625, "learning_rate": 0.0001576267406813839, "loss": 4.2037, "step": 8817 }, { "epoch": 0.9143212644758245, "grad_norm": 0.81640625, "learning_rate": 0.0001576178630100078, "loss": 4.2451, "step": 8818 }, { "epoch": 0.9144249525303125, "grad_norm": 0.78125, "learning_rate": 0.00015760898465880184, "loss": 4.2579, "step": 8819 }, { "epoch": 0.9145286405848007, "grad_norm": 0.8203125, "learning_rate": 0.00015760010562787077, "loss": 4.289, "step": 8820 }, { "epoch": 0.9146323286392887, "grad_norm": 0.6953125, "learning_rate": 0.00015759122591731936, "loss": 4.2257, "step": 8821 }, { "epoch": 0.9147360166937768, "grad_norm": 0.671875, "learning_rate": 0.00015758234552725234, "loss": 4.224, "step": 8822 }, { "epoch": 0.9148397047482648, "grad_norm": 0.734375, "learning_rate": 0.00015757346445777448, "loss": 4.2634, "step": 8823 }, { "epoch": 0.9149433928027529, "grad_norm": 0.75390625, "learning_rate": 0.00015756458270899066, "loss": 4.3201, "step": 8824 }, { "epoch": 0.915047080857241, "grad_norm": 0.72265625, "learning_rate": 0.00015755570028100558, "loss": 4.2593, "step": 8825 }, { "epoch": 0.9151507689117291, "grad_norm": 0.70703125, "learning_rate": 0.00015754681717392407, "loss": 4.2888, "step": 8826 }, { "epoch": 0.9152544569662171, "grad_norm": 0.71875, "learning_rate": 0.00015753793338785092, "loss": 4.2838, "step": 8827 }, { "epoch": 0.9153581450207052, "grad_norm": 0.79296875, "learning_rate": 0.00015752904892289102, "loss": 4.2517, "step": 8828 }, { "epoch": 0.9154618330751932, "grad_norm": 0.6953125, "learning_rate": 0.00015752016377914915, "loss": 4.3019, "step": 8829 }, { "epoch": 0.9155655211296814, "grad_norm": 0.76953125, "learning_rate": 0.00015751127795673013, "loss": 4.283, "step": 8830 }, { "epoch": 0.9156692091841694, "grad_norm": 0.65625, "learning_rate": 0.00015750239145573885, "loss": 4.2745, "step": 8831 }, { "epoch": 0.9157728972386575, "grad_norm": 0.75390625, "learning_rate": 0.00015749350427628008, "loss": 4.3084, "step": 8832 }, { "epoch": 0.9158765852931455, "grad_norm": 0.66015625, "learning_rate": 0.00015748461641845878, "loss": 4.2416, "step": 8833 }, { "epoch": 0.9159802733476337, "grad_norm": 0.734375, "learning_rate": 0.00015747572788237977, "loss": 4.2328, "step": 8834 }, { "epoch": 0.9160839614021217, "grad_norm": 0.78515625, "learning_rate": 0.0001574668386681479, "loss": 4.2722, "step": 8835 }, { "epoch": 0.9161876494566098, "grad_norm": 0.80859375, "learning_rate": 0.0001574579487758681, "loss": 4.2735, "step": 8836 }, { "epoch": 0.9162913375110978, "grad_norm": 0.78515625, "learning_rate": 0.00015744905820564524, "loss": 4.2824, "step": 8837 }, { "epoch": 0.9163950255655859, "grad_norm": 0.69921875, "learning_rate": 0.0001574401669575842, "loss": 4.2671, "step": 8838 }, { "epoch": 0.916498713620074, "grad_norm": 0.80078125, "learning_rate": 0.00015743127503178994, "loss": 4.2662, "step": 8839 }, { "epoch": 0.9166024016745621, "grad_norm": 0.69921875, "learning_rate": 0.0001574223824283673, "loss": 4.2068, "step": 8840 }, { "epoch": 0.9167060897290501, "grad_norm": 0.703125, "learning_rate": 0.00015741348914742126, "loss": 4.2622, "step": 8841 }, { "epoch": 0.9168097777835382, "grad_norm": 0.73828125, "learning_rate": 0.00015740459518905677, "loss": 4.3, "step": 8842 }, { "epoch": 0.9169134658380262, "grad_norm": 0.64453125, "learning_rate": 0.0001573957005533787, "loss": 4.2615, "step": 8843 }, { "epoch": 0.9170171538925144, "grad_norm": 0.71484375, "learning_rate": 0.00015738680524049202, "loss": 4.2809, "step": 8844 }, { "epoch": 0.9171208419470025, "grad_norm": 0.69140625, "learning_rate": 0.00015737790925050173, "loss": 4.2593, "step": 8845 }, { "epoch": 0.9172245300014905, "grad_norm": 0.6875, "learning_rate": 0.00015736901258351277, "loss": 4.271, "step": 8846 }, { "epoch": 0.9173282180559786, "grad_norm": 0.67578125, "learning_rate": 0.00015736011523963006, "loss": 4.2784, "step": 8847 }, { "epoch": 0.9174319061104667, "grad_norm": 0.70703125, "learning_rate": 0.00015735121721895866, "loss": 4.2922, "step": 8848 }, { "epoch": 0.9175355941649548, "grad_norm": 0.6875, "learning_rate": 0.0001573423185216035, "loss": 4.236, "step": 8849 }, { "epoch": 0.9176392822194428, "grad_norm": 0.73828125, "learning_rate": 0.00015733341914766962, "loss": 4.2735, "step": 8850 }, { "epoch": 0.9177429702739309, "grad_norm": 0.70703125, "learning_rate": 0.00015732451909726195, "loss": 4.2984, "step": 8851 }, { "epoch": 0.9178466583284189, "grad_norm": 0.7265625, "learning_rate": 0.0001573156183704856, "loss": 4.2864, "step": 8852 }, { "epoch": 0.9179503463829071, "grad_norm": 0.6875, "learning_rate": 0.00015730671696744555, "loss": 4.2688, "step": 8853 }, { "epoch": 0.9180540344373951, "grad_norm": 0.8203125, "learning_rate": 0.0001572978148882468, "loss": 4.3005, "step": 8854 }, { "epoch": 0.9181577224918832, "grad_norm": 0.71875, "learning_rate": 0.00015728891213299435, "loss": 4.2281, "step": 8855 }, { "epoch": 0.9182614105463712, "grad_norm": 0.7265625, "learning_rate": 0.0001572800087017933, "loss": 4.2397, "step": 8856 }, { "epoch": 0.9183650986008594, "grad_norm": 0.6953125, "learning_rate": 0.00015727110459474878, "loss": 4.1826, "step": 8857 }, { "epoch": 0.9184687866553474, "grad_norm": 0.70703125, "learning_rate": 0.00015726219981196567, "loss": 4.2488, "step": 8858 }, { "epoch": 0.9185724747098355, "grad_norm": 0.80078125, "learning_rate": 0.00015725329435354918, "loss": 4.24, "step": 8859 }, { "epoch": 0.9186761627643235, "grad_norm": 0.6015625, "learning_rate": 0.00015724438821960432, "loss": 4.2979, "step": 8860 }, { "epoch": 0.9187798508188116, "grad_norm": 0.734375, "learning_rate": 0.00015723548141023617, "loss": 4.3016, "step": 8861 }, { "epoch": 0.9188835388732997, "grad_norm": 0.625, "learning_rate": 0.00015722657392554988, "loss": 4.274, "step": 8862 }, { "epoch": 0.9189872269277878, "grad_norm": 0.71484375, "learning_rate": 0.00015721766576565048, "loss": 4.2504, "step": 8863 }, { "epoch": 0.9190909149822758, "grad_norm": 0.73046875, "learning_rate": 0.00015720875693064313, "loss": 4.2652, "step": 8864 }, { "epoch": 0.9191946030367639, "grad_norm": 0.65234375, "learning_rate": 0.00015719984742063292, "loss": 4.2431, "step": 8865 }, { "epoch": 0.9192982910912519, "grad_norm": 0.7734375, "learning_rate": 0.00015719093723572496, "loss": 4.2611, "step": 8866 }, { "epoch": 0.9194019791457401, "grad_norm": 0.6484375, "learning_rate": 0.0001571820263760244, "loss": 4.2941, "step": 8867 }, { "epoch": 0.9195056672002281, "grad_norm": 0.80078125, "learning_rate": 0.00015717311484163634, "loss": 4.2652, "step": 8868 }, { "epoch": 0.9196093552547162, "grad_norm": 0.6953125, "learning_rate": 0.000157164202632666, "loss": 4.2666, "step": 8869 }, { "epoch": 0.9197130433092042, "grad_norm": 0.7265625, "learning_rate": 0.00015715528974921848, "loss": 4.2376, "step": 8870 }, { "epoch": 0.9198167313636924, "grad_norm": 0.73828125, "learning_rate": 0.00015714637619139898, "loss": 4.3034, "step": 8871 }, { "epoch": 0.9199204194181804, "grad_norm": 0.70703125, "learning_rate": 0.00015713746195931264, "loss": 4.2802, "step": 8872 }, { "epoch": 0.9200241074726685, "grad_norm": 0.7890625, "learning_rate": 0.0001571285470530646, "loss": 4.2629, "step": 8873 }, { "epoch": 0.9201277955271565, "grad_norm": 0.7421875, "learning_rate": 0.00015711963147276015, "loss": 4.2792, "step": 8874 }, { "epoch": 0.9202314835816446, "grad_norm": 0.828125, "learning_rate": 0.00015711071521850437, "loss": 4.2617, "step": 8875 }, { "epoch": 0.9203351716361327, "grad_norm": 0.71484375, "learning_rate": 0.00015710179829040257, "loss": 4.3119, "step": 8876 }, { "epoch": 0.9204388596906208, "grad_norm": 0.87109375, "learning_rate": 0.00015709288068855987, "loss": 4.3074, "step": 8877 }, { "epoch": 0.9205425477451088, "grad_norm": 0.765625, "learning_rate": 0.00015708396241308155, "loss": 4.2577, "step": 8878 }, { "epoch": 0.9206462357995969, "grad_norm": 0.8671875, "learning_rate": 0.00015707504346407282, "loss": 4.2658, "step": 8879 }, { "epoch": 0.9207499238540849, "grad_norm": 0.71875, "learning_rate": 0.00015706612384163888, "loss": 4.2884, "step": 8880 }, { "epoch": 0.9208536119085731, "grad_norm": 0.8046875, "learning_rate": 0.00015705720354588503, "loss": 4.2734, "step": 8881 }, { "epoch": 0.9209572999630611, "grad_norm": 0.6796875, "learning_rate": 0.00015704828257691647, "loss": 4.2624, "step": 8882 }, { "epoch": 0.9210609880175492, "grad_norm": 0.80078125, "learning_rate": 0.00015703936093483848, "loss": 4.2541, "step": 8883 }, { "epoch": 0.9211646760720372, "grad_norm": 0.73046875, "learning_rate": 0.00015703043861975635, "loss": 4.2992, "step": 8884 }, { "epoch": 0.9212683641265254, "grad_norm": 0.8203125, "learning_rate": 0.00015702151563177531, "loss": 4.2204, "step": 8885 }, { "epoch": 0.9213720521810134, "grad_norm": 0.75, "learning_rate": 0.00015701259197100067, "loss": 4.2905, "step": 8886 }, { "epoch": 0.9214757402355015, "grad_norm": 0.79296875, "learning_rate": 0.00015700366763753772, "loss": 4.2781, "step": 8887 }, { "epoch": 0.9215794282899896, "grad_norm": 0.76953125, "learning_rate": 0.0001569947426314917, "loss": 4.2797, "step": 8888 }, { "epoch": 0.9216831163444776, "grad_norm": 0.79296875, "learning_rate": 0.00015698581695296798, "loss": 4.2928, "step": 8889 }, { "epoch": 0.9217868043989658, "grad_norm": 0.76953125, "learning_rate": 0.00015697689060207188, "loss": 4.2258, "step": 8890 }, { "epoch": 0.9218904924534538, "grad_norm": 0.7734375, "learning_rate": 0.00015696796357890868, "loss": 4.2968, "step": 8891 }, { "epoch": 0.9219941805079419, "grad_norm": 0.8203125, "learning_rate": 0.00015695903588358373, "loss": 4.2534, "step": 8892 }, { "epoch": 0.9220978685624299, "grad_norm": 0.8046875, "learning_rate": 0.00015695010751620237, "loss": 4.2481, "step": 8893 }, { "epoch": 0.922201556616918, "grad_norm": 0.6796875, "learning_rate": 0.0001569411784768699, "loss": 4.2957, "step": 8894 }, { "epoch": 0.9223052446714061, "grad_norm": 0.78515625, "learning_rate": 0.00015693224876569178, "loss": 4.2965, "step": 8895 }, { "epoch": 0.9224089327258942, "grad_norm": 0.63671875, "learning_rate": 0.00015692331838277327, "loss": 4.2929, "step": 8896 }, { "epoch": 0.9225126207803822, "grad_norm": 0.7421875, "learning_rate": 0.00015691438732821979, "loss": 4.3019, "step": 8897 }, { "epoch": 0.9226163088348703, "grad_norm": 0.75, "learning_rate": 0.00015690545560213668, "loss": 4.2573, "step": 8898 }, { "epoch": 0.9227199968893584, "grad_norm": 0.71484375, "learning_rate": 0.00015689652320462937, "loss": 4.2578, "step": 8899 }, { "epoch": 0.9228236849438465, "grad_norm": 0.796875, "learning_rate": 0.0001568875901358032, "loss": 4.2717, "step": 8900 }, { "epoch": 0.9229273729983345, "grad_norm": 0.703125, "learning_rate": 0.00015687865639576361, "loss": 4.1983, "step": 8901 }, { "epoch": 0.9230310610528226, "grad_norm": 0.8046875, "learning_rate": 0.000156869721984616, "loss": 4.2582, "step": 8902 }, { "epoch": 0.9231347491073106, "grad_norm": 0.75, "learning_rate": 0.0001568607869024658, "loss": 4.295, "step": 8903 }, { "epoch": 0.9232384371617988, "grad_norm": 0.8046875, "learning_rate": 0.0001568518511494184, "loss": 4.3088, "step": 8904 }, { "epoch": 0.9233421252162868, "grad_norm": 0.8125, "learning_rate": 0.00015684291472557927, "loss": 4.2766, "step": 8905 }, { "epoch": 0.9234458132707749, "grad_norm": 0.76953125, "learning_rate": 0.0001568339776310538, "loss": 4.2754, "step": 8906 }, { "epoch": 0.9235495013252629, "grad_norm": 0.80078125, "learning_rate": 0.0001568250398659475, "loss": 4.3055, "step": 8907 }, { "epoch": 0.923653189379751, "grad_norm": 0.7578125, "learning_rate": 0.00015681610143036579, "loss": 4.2631, "step": 8908 }, { "epoch": 0.9237568774342391, "grad_norm": 0.83984375, "learning_rate": 0.0001568071623244141, "loss": 4.2663, "step": 8909 }, { "epoch": 0.9238605654887272, "grad_norm": 0.6953125, "learning_rate": 0.00015679822254819798, "loss": 4.2789, "step": 8910 }, { "epoch": 0.9239642535432152, "grad_norm": 0.8125, "learning_rate": 0.0001567892821018229, "loss": 4.2898, "step": 8911 }, { "epoch": 0.9240679415977033, "grad_norm": 0.7421875, "learning_rate": 0.00015678034098539427, "loss": 4.267, "step": 8912 }, { "epoch": 0.9241716296521914, "grad_norm": 0.8125, "learning_rate": 0.00015677139919901764, "loss": 4.2639, "step": 8913 }, { "epoch": 0.9242753177066795, "grad_norm": 0.80859375, "learning_rate": 0.00015676245674279855, "loss": 4.2804, "step": 8914 }, { "epoch": 0.9243790057611675, "grad_norm": 0.828125, "learning_rate": 0.00015675351361684242, "loss": 4.2755, "step": 8915 }, { "epoch": 0.9244826938156556, "grad_norm": 0.76171875, "learning_rate": 0.00015674456982125485, "loss": 4.2354, "step": 8916 }, { "epoch": 0.9245863818701436, "grad_norm": 0.83203125, "learning_rate": 0.00015673562535614135, "loss": 4.2736, "step": 8917 }, { "epoch": 0.9246900699246318, "grad_norm": 0.68359375, "learning_rate": 0.0001567266802216074, "loss": 4.2275, "step": 8918 }, { "epoch": 0.9247937579791198, "grad_norm": 0.78515625, "learning_rate": 0.0001567177344177586, "loss": 4.2584, "step": 8919 }, { "epoch": 0.9248974460336079, "grad_norm": 0.7109375, "learning_rate": 0.0001567087879447005, "loss": 4.2091, "step": 8920 }, { "epoch": 0.9250011340880959, "grad_norm": 0.78125, "learning_rate": 0.00015669984080253865, "loss": 4.2761, "step": 8921 }, { "epoch": 0.925104822142584, "grad_norm": 0.70703125, "learning_rate": 0.00015669089299137862, "loss": 4.3291, "step": 8922 }, { "epoch": 0.9252085101970721, "grad_norm": 0.70703125, "learning_rate": 0.00015668194451132596, "loss": 4.2769, "step": 8923 }, { "epoch": 0.9253121982515602, "grad_norm": 0.74609375, "learning_rate": 0.00015667299536248626, "loss": 4.3015, "step": 8924 }, { "epoch": 0.9254158863060482, "grad_norm": 0.69140625, "learning_rate": 0.00015666404554496514, "loss": 4.275, "step": 8925 }, { "epoch": 0.9255195743605363, "grad_norm": 0.703125, "learning_rate": 0.0001566550950588682, "loss": 4.2754, "step": 8926 }, { "epoch": 0.9256232624150244, "grad_norm": 0.640625, "learning_rate": 0.000156646143904301, "loss": 4.298, "step": 8927 }, { "epoch": 0.9257269504695125, "grad_norm": 0.6953125, "learning_rate": 0.00015663719208136917, "loss": 4.2431, "step": 8928 }, { "epoch": 0.9258306385240005, "grad_norm": 0.6484375, "learning_rate": 0.00015662823959017836, "loss": 4.1848, "step": 8929 }, { "epoch": 0.9259343265784886, "grad_norm": 0.66796875, "learning_rate": 0.00015661928643083417, "loss": 4.2754, "step": 8930 }, { "epoch": 0.9260380146329766, "grad_norm": 0.5703125, "learning_rate": 0.00015661033260344224, "loss": 4.2398, "step": 8931 }, { "epoch": 0.9261417026874648, "grad_norm": 0.609375, "learning_rate": 0.00015660137810810825, "loss": 4.2746, "step": 8932 }, { "epoch": 0.9262453907419529, "grad_norm": 0.67578125, "learning_rate": 0.0001565924229449378, "loss": 4.2448, "step": 8933 }, { "epoch": 0.9263490787964409, "grad_norm": 0.7421875, "learning_rate": 0.00015658346711403662, "loss": 4.2526, "step": 8934 }, { "epoch": 0.926452766850929, "grad_norm": 0.6953125, "learning_rate": 0.0001565745106155103, "loss": 4.3112, "step": 8935 }, { "epoch": 0.926556454905417, "grad_norm": 0.70703125, "learning_rate": 0.0001565655534494646, "loss": 4.2805, "step": 8936 }, { "epoch": 0.9266601429599052, "grad_norm": 0.625, "learning_rate": 0.0001565565956160051, "loss": 4.2638, "step": 8937 }, { "epoch": 0.9267638310143932, "grad_norm": 0.73828125, "learning_rate": 0.0001565476371152376, "loss": 4.2515, "step": 8938 }, { "epoch": 0.9268675190688813, "grad_norm": 0.66796875, "learning_rate": 0.00015653867794726773, "loss": 4.2495, "step": 8939 }, { "epoch": 0.9269712071233693, "grad_norm": 0.78125, "learning_rate": 0.00015652971811220125, "loss": 4.2629, "step": 8940 }, { "epoch": 0.9270748951778575, "grad_norm": 0.7265625, "learning_rate": 0.00015652075761014384, "loss": 4.2958, "step": 8941 }, { "epoch": 0.9271785832323455, "grad_norm": 0.68359375, "learning_rate": 0.00015651179644120123, "loss": 4.1946, "step": 8942 }, { "epoch": 0.9272822712868336, "grad_norm": 0.71875, "learning_rate": 0.00015650283460547914, "loss": 4.259, "step": 8943 }, { "epoch": 0.9273859593413216, "grad_norm": 0.69140625, "learning_rate": 0.00015649387210308336, "loss": 4.2503, "step": 8944 }, { "epoch": 0.9274896473958097, "grad_norm": 0.7265625, "learning_rate": 0.0001564849089341196, "loss": 4.2597, "step": 8945 }, { "epoch": 0.9275933354502978, "grad_norm": 0.7109375, "learning_rate": 0.0001564759450986936, "loss": 4.2625, "step": 8946 }, { "epoch": 0.9276970235047859, "grad_norm": 0.71875, "learning_rate": 0.0001564669805969112, "loss": 4.3084, "step": 8947 }, { "epoch": 0.9278007115592739, "grad_norm": 0.72265625, "learning_rate": 0.00015645801542887806, "loss": 4.2666, "step": 8948 }, { "epoch": 0.927904399613762, "grad_norm": 0.71875, "learning_rate": 0.00015644904959470003, "loss": 4.2831, "step": 8949 }, { "epoch": 0.92800808766825, "grad_norm": 0.734375, "learning_rate": 0.00015644008309448291, "loss": 4.2836, "step": 8950 }, { "epoch": 0.9281117757227382, "grad_norm": 0.70703125, "learning_rate": 0.00015643111592833245, "loss": 4.2428, "step": 8951 }, { "epoch": 0.9282154637772262, "grad_norm": 0.69140625, "learning_rate": 0.00015642214809635444, "loss": 4.2951, "step": 8952 }, { "epoch": 0.9283191518317143, "grad_norm": 0.71484375, "learning_rate": 0.00015641317959865476, "loss": 4.2916, "step": 8953 }, { "epoch": 0.9284228398862023, "grad_norm": 0.703125, "learning_rate": 0.0001564042104353392, "loss": 4.3157, "step": 8954 }, { "epoch": 0.9285265279406905, "grad_norm": 0.71875, "learning_rate": 0.00015639524060651356, "loss": 4.2512, "step": 8955 }, { "epoch": 0.9286302159951785, "grad_norm": 0.72265625, "learning_rate": 0.0001563862701122837, "loss": 4.2779, "step": 8956 }, { "epoch": 0.9287339040496666, "grad_norm": 0.75, "learning_rate": 0.00015637729895275545, "loss": 4.2798, "step": 8957 }, { "epoch": 0.9288375921041546, "grad_norm": 0.75390625, "learning_rate": 0.0001563683271280347, "loss": 4.2495, "step": 8958 }, { "epoch": 0.9289412801586427, "grad_norm": 0.78515625, "learning_rate": 0.00015635935463822722, "loss": 4.2622, "step": 8959 }, { "epoch": 0.9290449682131308, "grad_norm": 0.7578125, "learning_rate": 0.00015635038148343895, "loss": 4.2685, "step": 8960 }, { "epoch": 0.9291486562676189, "grad_norm": 0.85546875, "learning_rate": 0.00015634140766377573, "loss": 4.264, "step": 8961 }, { "epoch": 0.9292523443221069, "grad_norm": 0.734375, "learning_rate": 0.0001563324331793435, "loss": 4.31, "step": 8962 }, { "epoch": 0.929356032376595, "grad_norm": 0.97265625, "learning_rate": 0.00015632345803024805, "loss": 4.2839, "step": 8963 }, { "epoch": 0.929459720431083, "grad_norm": 0.73828125, "learning_rate": 0.00015631448221659536, "loss": 4.3095, "step": 8964 }, { "epoch": 0.9295634084855712, "grad_norm": 0.86328125, "learning_rate": 0.00015630550573849132, "loss": 4.2685, "step": 8965 }, { "epoch": 0.9296670965400592, "grad_norm": 0.7890625, "learning_rate": 0.00015629652859604182, "loss": 4.2873, "step": 8966 }, { "epoch": 0.9297707845945473, "grad_norm": 0.6953125, "learning_rate": 0.00015628755078935278, "loss": 4.2562, "step": 8967 }, { "epoch": 0.9298744726490353, "grad_norm": 0.87890625, "learning_rate": 0.00015627857231853014, "loss": 4.2917, "step": 8968 }, { "epoch": 0.9299781607035235, "grad_norm": 0.6953125, "learning_rate": 0.0001562695931836798, "loss": 4.2779, "step": 8969 }, { "epoch": 0.9300818487580115, "grad_norm": 0.796875, "learning_rate": 0.00015626061338490783, "loss": 4.2989, "step": 8970 }, { "epoch": 0.9301855368124996, "grad_norm": 0.8046875, "learning_rate": 0.00015625163292232004, "loss": 4.2377, "step": 8971 }, { "epoch": 0.9302892248669876, "grad_norm": 0.85546875, "learning_rate": 0.00015624265179602244, "loss": 4.2382, "step": 8972 }, { "epoch": 0.9303929129214757, "grad_norm": 0.78515625, "learning_rate": 0.00015623367000612098, "loss": 4.2848, "step": 8973 }, { "epoch": 0.9304966009759638, "grad_norm": 0.859375, "learning_rate": 0.0001562246875527217, "loss": 4.2626, "step": 8974 }, { "epoch": 0.9306002890304519, "grad_norm": 0.765625, "learning_rate": 0.00015621570443593053, "loss": 4.2344, "step": 8975 }, { "epoch": 0.9307039770849399, "grad_norm": 0.8359375, "learning_rate": 0.00015620672065585345, "loss": 4.2367, "step": 8976 }, { "epoch": 0.930807665139428, "grad_norm": 0.7578125, "learning_rate": 0.00015619773621259648, "loss": 4.2997, "step": 8977 }, { "epoch": 0.9309113531939162, "grad_norm": 0.7734375, "learning_rate": 0.00015618875110626562, "loss": 4.2668, "step": 8978 }, { "epoch": 0.9310150412484042, "grad_norm": 0.77734375, "learning_rate": 0.00015617976533696692, "loss": 4.2893, "step": 8979 }, { "epoch": 0.9311187293028923, "grad_norm": 0.81640625, "learning_rate": 0.00015617077890480637, "loss": 4.2655, "step": 8980 }, { "epoch": 0.9312224173573803, "grad_norm": 0.70703125, "learning_rate": 0.00015616179180988996, "loss": 4.2781, "step": 8981 }, { "epoch": 0.9313261054118684, "grad_norm": 0.80859375, "learning_rate": 0.00015615280405232383, "loss": 4.2794, "step": 8982 }, { "epoch": 0.9314297934663565, "grad_norm": 0.828125, "learning_rate": 0.00015614381563221395, "loss": 4.307, "step": 8983 }, { "epoch": 0.9315334815208446, "grad_norm": 0.7578125, "learning_rate": 0.00015613482654966638, "loss": 4.286, "step": 8984 }, { "epoch": 0.9316371695753326, "grad_norm": 0.78125, "learning_rate": 0.00015612583680478718, "loss": 4.23, "step": 8985 }, { "epoch": 0.9317408576298207, "grad_norm": 0.765625, "learning_rate": 0.0001561168463976825, "loss": 4.2303, "step": 8986 }, { "epoch": 0.9318445456843087, "grad_norm": 0.79296875, "learning_rate": 0.0001561078553284583, "loss": 4.33, "step": 8987 }, { "epoch": 0.9319482337387969, "grad_norm": 0.8359375, "learning_rate": 0.0001560988635972207, "loss": 4.2243, "step": 8988 }, { "epoch": 0.9320519217932849, "grad_norm": 0.859375, "learning_rate": 0.0001560898712040758, "loss": 4.2901, "step": 8989 }, { "epoch": 0.932155609847773, "grad_norm": 0.80078125, "learning_rate": 0.00015608087814912978, "loss": 4.2306, "step": 8990 }, { "epoch": 0.932259297902261, "grad_norm": 0.76953125, "learning_rate": 0.00015607188443248862, "loss": 4.2759, "step": 8991 }, { "epoch": 0.9323629859567492, "grad_norm": 0.7734375, "learning_rate": 0.00015606289005425848, "loss": 4.2388, "step": 8992 }, { "epoch": 0.9324666740112372, "grad_norm": 0.84375, "learning_rate": 0.00015605389501454554, "loss": 4.2662, "step": 8993 }, { "epoch": 0.9325703620657253, "grad_norm": 0.75390625, "learning_rate": 0.00015604489931345588, "loss": 4.246, "step": 8994 }, { "epoch": 0.9326740501202133, "grad_norm": 0.83203125, "learning_rate": 0.00015603590295109565, "loss": 4.2991, "step": 8995 }, { "epoch": 0.9327777381747014, "grad_norm": 0.76953125, "learning_rate": 0.000156026905927571, "loss": 4.2641, "step": 8996 }, { "epoch": 0.9328814262291895, "grad_norm": 0.98828125, "learning_rate": 0.00015601790824298808, "loss": 4.2938, "step": 8997 }, { "epoch": 0.9329851142836776, "grad_norm": 0.82421875, "learning_rate": 0.00015600890989745305, "loss": 4.2682, "step": 8998 }, { "epoch": 0.9330888023381656, "grad_norm": 0.91796875, "learning_rate": 0.00015599991089107208, "loss": 4.2853, "step": 8999 }, { "epoch": 0.9331924903926537, "grad_norm": 0.8203125, "learning_rate": 0.00015599091122395139, "loss": 4.2797, "step": 9000 }, { "epoch": 0.9332961784471417, "grad_norm": 0.921875, "learning_rate": 0.0001559819108961971, "loss": 4.2417, "step": 9001 }, { "epoch": 0.9333998665016299, "grad_norm": 0.77734375, "learning_rate": 0.00015597290990791544, "loss": 4.2504, "step": 9002 }, { "epoch": 0.9335035545561179, "grad_norm": 0.91796875, "learning_rate": 0.00015596390825921264, "loss": 4.2459, "step": 9003 }, { "epoch": 0.933607242610606, "grad_norm": 0.71875, "learning_rate": 0.00015595490595019483, "loss": 4.2283, "step": 9004 }, { "epoch": 0.933710930665094, "grad_norm": 0.86328125, "learning_rate": 0.00015594590298096832, "loss": 4.2622, "step": 9005 }, { "epoch": 0.9338146187195822, "grad_norm": 0.7890625, "learning_rate": 0.00015593689935163924, "loss": 4.2009, "step": 9006 }, { "epoch": 0.9339183067740702, "grad_norm": 0.76953125, "learning_rate": 0.00015592789506231394, "loss": 4.2585, "step": 9007 }, { "epoch": 0.9340219948285583, "grad_norm": 0.75390625, "learning_rate": 0.00015591889011309858, "loss": 4.2724, "step": 9008 }, { "epoch": 0.9341256828830463, "grad_norm": 0.75390625, "learning_rate": 0.00015590988450409939, "loss": 4.301, "step": 9009 }, { "epoch": 0.9342293709375344, "grad_norm": 0.68359375, "learning_rate": 0.00015590087823542267, "loss": 4.2327, "step": 9010 }, { "epoch": 0.9343330589920225, "grad_norm": 0.78515625, "learning_rate": 0.0001558918713071747, "loss": 4.2382, "step": 9011 }, { "epoch": 0.9344367470465106, "grad_norm": 0.6328125, "learning_rate": 0.0001558828637194617, "loss": 4.2565, "step": 9012 }, { "epoch": 0.9345404351009986, "grad_norm": 0.7734375, "learning_rate": 0.00015587385547239, "loss": 4.2808, "step": 9013 }, { "epoch": 0.9346441231554867, "grad_norm": 0.671875, "learning_rate": 0.0001558648465660659, "loss": 4.2589, "step": 9014 }, { "epoch": 0.9347478112099747, "grad_norm": 0.83984375, "learning_rate": 0.00015585583700059564, "loss": 4.2889, "step": 9015 }, { "epoch": 0.9348514992644629, "grad_norm": 0.73046875, "learning_rate": 0.0001558468267760855, "loss": 4.2689, "step": 9016 }, { "epoch": 0.9349551873189509, "grad_norm": 0.74609375, "learning_rate": 0.00015583781589264186, "loss": 4.2566, "step": 9017 }, { "epoch": 0.935058875373439, "grad_norm": 0.703125, "learning_rate": 0.00015582880435037103, "loss": 4.2746, "step": 9018 }, { "epoch": 0.935162563427927, "grad_norm": 0.73828125, "learning_rate": 0.00015581979214937932, "loss": 4.2494, "step": 9019 }, { "epoch": 0.9352662514824152, "grad_norm": 0.71875, "learning_rate": 0.00015581077928977308, "loss": 4.3052, "step": 9020 }, { "epoch": 0.9353699395369032, "grad_norm": 0.78515625, "learning_rate": 0.0001558017657716586, "loss": 4.2869, "step": 9021 }, { "epoch": 0.9354736275913913, "grad_norm": 0.7421875, "learning_rate": 0.0001557927515951423, "loss": 4.1821, "step": 9022 }, { "epoch": 0.9355773156458794, "grad_norm": 0.765625, "learning_rate": 0.0001557837367603305, "loss": 4.2827, "step": 9023 }, { "epoch": 0.9356810037003674, "grad_norm": 0.73828125, "learning_rate": 0.0001557747212673296, "loss": 4.2785, "step": 9024 }, { "epoch": 0.9357846917548556, "grad_norm": 0.6875, "learning_rate": 0.0001557657051162459, "loss": 4.2271, "step": 9025 }, { "epoch": 0.9358883798093436, "grad_norm": 0.6796875, "learning_rate": 0.00015575668830718583, "loss": 4.26, "step": 9026 }, { "epoch": 0.9359920678638317, "grad_norm": 0.77734375, "learning_rate": 0.0001557476708402558, "loss": 4.2341, "step": 9027 }, { "epoch": 0.9360957559183197, "grad_norm": 0.7265625, "learning_rate": 0.00015573865271556217, "loss": 4.2817, "step": 9028 }, { "epoch": 0.9361994439728079, "grad_norm": 0.72265625, "learning_rate": 0.00015572963393321136, "loss": 4.294, "step": 9029 }, { "epoch": 0.9363031320272959, "grad_norm": 0.67578125, "learning_rate": 0.00015572061449330976, "loss": 4.2312, "step": 9030 }, { "epoch": 0.936406820081784, "grad_norm": 0.7109375, "learning_rate": 0.00015571159439596382, "loss": 4.2447, "step": 9031 }, { "epoch": 0.936510508136272, "grad_norm": 0.6875, "learning_rate": 0.00015570257364127995, "loss": 4.2631, "step": 9032 }, { "epoch": 0.9366141961907601, "grad_norm": 0.65625, "learning_rate": 0.0001556935522293646, "loss": 4.2454, "step": 9033 }, { "epoch": 0.9367178842452482, "grad_norm": 0.765625, "learning_rate": 0.0001556845301603242, "loss": 4.2791, "step": 9034 }, { "epoch": 0.9368215722997363, "grad_norm": 0.69140625, "learning_rate": 0.0001556755074342652, "loss": 4.2614, "step": 9035 }, { "epoch": 0.9369252603542243, "grad_norm": 0.72265625, "learning_rate": 0.0001556664840512941, "loss": 4.2768, "step": 9036 }, { "epoch": 0.9370289484087124, "grad_norm": 0.734375, "learning_rate": 0.0001556574600115173, "loss": 4.2986, "step": 9037 }, { "epoch": 0.9371326364632004, "grad_norm": 0.7421875, "learning_rate": 0.0001556484353150413, "loss": 4.3204, "step": 9038 }, { "epoch": 0.9372363245176886, "grad_norm": 0.6953125, "learning_rate": 0.0001556394099619726, "loss": 4.262, "step": 9039 }, { "epoch": 0.9373400125721766, "grad_norm": 0.7421875, "learning_rate": 0.00015563038395241766, "loss": 4.3063, "step": 9040 }, { "epoch": 0.9374437006266647, "grad_norm": 0.734375, "learning_rate": 0.00015562135728648303, "loss": 4.2485, "step": 9041 }, { "epoch": 0.9375473886811527, "grad_norm": 0.703125, "learning_rate": 0.00015561232996427513, "loss": 4.2451, "step": 9042 }, { "epoch": 0.9376510767356409, "grad_norm": 0.6328125, "learning_rate": 0.00015560330198590054, "loss": 4.2971, "step": 9043 }, { "epoch": 0.9377547647901289, "grad_norm": 0.6875, "learning_rate": 0.00015559427335146579, "loss": 4.2891, "step": 9044 }, { "epoch": 0.937858452844617, "grad_norm": 0.71875, "learning_rate": 0.00015558524406107733, "loss": 4.27, "step": 9045 }, { "epoch": 0.937962140899105, "grad_norm": 0.64453125, "learning_rate": 0.00015557621411484176, "loss": 4.2278, "step": 9046 }, { "epoch": 0.9380658289535931, "grad_norm": 0.73828125, "learning_rate": 0.00015556718351286564, "loss": 4.2598, "step": 9047 }, { "epoch": 0.9381695170080812, "grad_norm": 0.65234375, "learning_rate": 0.00015555815225525547, "loss": 4.2699, "step": 9048 }, { "epoch": 0.9382732050625693, "grad_norm": 0.65625, "learning_rate": 0.00015554912034211783, "loss": 4.2374, "step": 9049 }, { "epoch": 0.9383768931170573, "grad_norm": 0.6640625, "learning_rate": 0.00015554008777355927, "loss": 4.2409, "step": 9050 }, { "epoch": 0.9384805811715454, "grad_norm": 0.63671875, "learning_rate": 0.00015553105454968642, "loss": 4.2517, "step": 9051 }, { "epoch": 0.9385842692260334, "grad_norm": 0.65234375, "learning_rate": 0.0001555220206706058, "loss": 4.2676, "step": 9052 }, { "epoch": 0.9386879572805216, "grad_norm": 0.65234375, "learning_rate": 0.00015551298613642405, "loss": 4.2632, "step": 9053 }, { "epoch": 0.9387916453350096, "grad_norm": 0.73828125, "learning_rate": 0.0001555039509472477, "loss": 4.2829, "step": 9054 }, { "epoch": 0.9388953333894977, "grad_norm": 0.6171875, "learning_rate": 0.00015549491510318344, "loss": 4.2757, "step": 9055 }, { "epoch": 0.9389990214439857, "grad_norm": 0.7578125, "learning_rate": 0.00015548587860433782, "loss": 4.2739, "step": 9056 }, { "epoch": 0.9391027094984739, "grad_norm": 0.70703125, "learning_rate": 0.0001554768414508175, "loss": 4.3261, "step": 9057 }, { "epoch": 0.9392063975529619, "grad_norm": 0.72265625, "learning_rate": 0.0001554678036427291, "loss": 4.2985, "step": 9058 }, { "epoch": 0.93931008560745, "grad_norm": 0.7421875, "learning_rate": 0.00015545876518017923, "loss": 4.2876, "step": 9059 }, { "epoch": 0.939413773661938, "grad_norm": 0.734375, "learning_rate": 0.00015544972606327458, "loss": 4.256, "step": 9060 }, { "epoch": 0.9395174617164261, "grad_norm": 0.78125, "learning_rate": 0.00015544068629212173, "loss": 4.2775, "step": 9061 }, { "epoch": 0.9396211497709142, "grad_norm": 0.71484375, "learning_rate": 0.00015543164586682742, "loss": 4.28, "step": 9062 }, { "epoch": 0.9397248378254023, "grad_norm": 0.71484375, "learning_rate": 0.00015542260478749827, "loss": 4.2796, "step": 9063 }, { "epoch": 0.9398285258798903, "grad_norm": 0.7109375, "learning_rate": 0.00015541356305424095, "loss": 4.2735, "step": 9064 }, { "epoch": 0.9399322139343784, "grad_norm": 0.6640625, "learning_rate": 0.0001554045206671622, "loss": 4.2218, "step": 9065 }, { "epoch": 0.9400359019888664, "grad_norm": 0.72265625, "learning_rate": 0.00015539547762636864, "loss": 4.2663, "step": 9066 }, { "epoch": 0.9401395900433546, "grad_norm": 0.6484375, "learning_rate": 0.00015538643393196703, "loss": 4.2504, "step": 9067 }, { "epoch": 0.9402432780978427, "grad_norm": 0.71875, "learning_rate": 0.000155377389584064, "loss": 4.1995, "step": 9068 }, { "epoch": 0.9403469661523307, "grad_norm": 0.66796875, "learning_rate": 0.00015536834458276634, "loss": 4.2034, "step": 9069 }, { "epoch": 0.9404506542068188, "grad_norm": 0.75390625, "learning_rate": 0.00015535929892818073, "loss": 4.2722, "step": 9070 }, { "epoch": 0.9405543422613069, "grad_norm": 0.76171875, "learning_rate": 0.0001553502526204139, "loss": 4.2463, "step": 9071 }, { "epoch": 0.940658030315795, "grad_norm": 0.7421875, "learning_rate": 0.00015534120565957263, "loss": 4.2895, "step": 9072 }, { "epoch": 0.940761718370283, "grad_norm": 0.7890625, "learning_rate": 0.00015533215804576362, "loss": 4.2403, "step": 9073 }, { "epoch": 0.9408654064247711, "grad_norm": 0.68359375, "learning_rate": 0.0001553231097790936, "loss": 4.2882, "step": 9074 }, { "epoch": 0.9409690944792591, "grad_norm": 0.703125, "learning_rate": 0.0001553140608596694, "loss": 4.2911, "step": 9075 }, { "epoch": 0.9410727825337473, "grad_norm": 0.72265625, "learning_rate": 0.00015530501128759773, "loss": 4.2612, "step": 9076 }, { "epoch": 0.9411764705882353, "grad_norm": 0.79296875, "learning_rate": 0.00015529596106298542, "loss": 4.2227, "step": 9077 }, { "epoch": 0.9412801586427234, "grad_norm": 0.75, "learning_rate": 0.00015528691018593918, "loss": 4.2334, "step": 9078 }, { "epoch": 0.9413838466972114, "grad_norm": 0.7578125, "learning_rate": 0.00015527785865656588, "loss": 4.2482, "step": 9079 }, { "epoch": 0.9414875347516996, "grad_norm": 0.76171875, "learning_rate": 0.00015526880647497224, "loss": 4.2654, "step": 9080 }, { "epoch": 0.9415912228061876, "grad_norm": 0.7109375, "learning_rate": 0.00015525975364126513, "loss": 4.3063, "step": 9081 }, { "epoch": 0.9416949108606757, "grad_norm": 0.83203125, "learning_rate": 0.00015525070015555135, "loss": 4.2706, "step": 9082 }, { "epoch": 0.9417985989151637, "grad_norm": 0.73046875, "learning_rate": 0.00015524164601793768, "loss": 4.2785, "step": 9083 }, { "epoch": 0.9419022869696518, "grad_norm": 0.8671875, "learning_rate": 0.000155232591228531, "loss": 4.234, "step": 9084 }, { "epoch": 0.9420059750241399, "grad_norm": 0.70703125, "learning_rate": 0.0001552235357874381, "loss": 4.2799, "step": 9085 }, { "epoch": 0.942109663078628, "grad_norm": 0.78125, "learning_rate": 0.0001552144796947659, "loss": 4.2756, "step": 9086 }, { "epoch": 0.942213351133116, "grad_norm": 0.765625, "learning_rate": 0.00015520542295062117, "loss": 4.2696, "step": 9087 }, { "epoch": 0.9423170391876041, "grad_norm": 0.71484375, "learning_rate": 0.00015519636555511078, "loss": 4.2314, "step": 9088 }, { "epoch": 0.9424207272420921, "grad_norm": 0.77734375, "learning_rate": 0.00015518730750834167, "loss": 4.2627, "step": 9089 }, { "epoch": 0.9425244152965803, "grad_norm": 0.74609375, "learning_rate": 0.00015517824881042066, "loss": 4.2633, "step": 9090 }, { "epoch": 0.9426281033510683, "grad_norm": 0.7265625, "learning_rate": 0.0001551691894614546, "loss": 4.2876, "step": 9091 }, { "epoch": 0.9427317914055564, "grad_norm": 0.8046875, "learning_rate": 0.00015516012946155047, "loss": 4.231, "step": 9092 }, { "epoch": 0.9428354794600444, "grad_norm": 0.7109375, "learning_rate": 0.00015515106881081508, "loss": 4.3029, "step": 9093 }, { "epoch": 0.9429391675145326, "grad_norm": 0.67578125, "learning_rate": 0.00015514200750935543, "loss": 4.28, "step": 9094 }, { "epoch": 0.9430428555690206, "grad_norm": 0.80078125, "learning_rate": 0.00015513294555727833, "loss": 4.2345, "step": 9095 }, { "epoch": 0.9431465436235087, "grad_norm": 0.69921875, "learning_rate": 0.00015512388295469077, "loss": 4.2922, "step": 9096 }, { "epoch": 0.9432502316779967, "grad_norm": 0.8046875, "learning_rate": 0.00015511481970169964, "loss": 4.2515, "step": 9097 }, { "epoch": 0.9433539197324848, "grad_norm": 0.734375, "learning_rate": 0.00015510575579841192, "loss": 4.3032, "step": 9098 }, { "epoch": 0.9434576077869729, "grad_norm": 0.8828125, "learning_rate": 0.00015509669124493452, "loss": 4.2944, "step": 9099 }, { "epoch": 0.943561295841461, "grad_norm": 0.71875, "learning_rate": 0.00015508762604137438, "loss": 4.2555, "step": 9100 }, { "epoch": 0.943664983895949, "grad_norm": 0.83203125, "learning_rate": 0.0001550785601878385, "loss": 4.2433, "step": 9101 }, { "epoch": 0.9437686719504371, "grad_norm": 0.71875, "learning_rate": 0.00015506949368443386, "loss": 4.2553, "step": 9102 }, { "epoch": 0.9438723600049251, "grad_norm": 0.80078125, "learning_rate": 0.00015506042653126736, "loss": 4.2619, "step": 9103 }, { "epoch": 0.9439760480594133, "grad_norm": 0.79296875, "learning_rate": 0.00015505135872844608, "loss": 4.2375, "step": 9104 }, { "epoch": 0.9440797361139013, "grad_norm": 0.8984375, "learning_rate": 0.0001550422902760769, "loss": 4.322, "step": 9105 }, { "epoch": 0.9441834241683894, "grad_norm": 0.77734375, "learning_rate": 0.0001550332211742669, "loss": 4.2436, "step": 9106 }, { "epoch": 0.9442871122228774, "grad_norm": 0.91015625, "learning_rate": 0.0001550241514231231, "loss": 4.2417, "step": 9107 }, { "epoch": 0.9443908002773655, "grad_norm": 0.72265625, "learning_rate": 0.0001550150810227524, "loss": 4.3237, "step": 9108 }, { "epoch": 0.9444944883318536, "grad_norm": 0.9453125, "learning_rate": 0.00015500600997326195, "loss": 4.2396, "step": 9109 }, { "epoch": 0.9445981763863417, "grad_norm": 0.6796875, "learning_rate": 0.00015499693827475874, "loss": 4.2631, "step": 9110 }, { "epoch": 0.9447018644408297, "grad_norm": 1.0, "learning_rate": 0.00015498786592734977, "loss": 4.2638, "step": 9111 }, { "epoch": 0.9448055524953178, "grad_norm": 0.6640625, "learning_rate": 0.0001549787929311421, "loss": 4.2598, "step": 9112 }, { "epoch": 0.944909240549806, "grad_norm": 0.90625, "learning_rate": 0.0001549697192862428, "loss": 4.2533, "step": 9113 }, { "epoch": 0.945012928604294, "grad_norm": 0.703125, "learning_rate": 0.0001549606449927589, "loss": 4.2861, "step": 9114 }, { "epoch": 0.9451166166587821, "grad_norm": 0.85546875, "learning_rate": 0.00015495157005079751, "loss": 4.2727, "step": 9115 }, { "epoch": 0.9452203047132701, "grad_norm": 0.76171875, "learning_rate": 0.00015494249446046566, "loss": 4.3136, "step": 9116 }, { "epoch": 0.9453239927677582, "grad_norm": 0.81640625, "learning_rate": 0.00015493341822187047, "loss": 4.2814, "step": 9117 }, { "epoch": 0.9454276808222463, "grad_norm": 0.7734375, "learning_rate": 0.000154924341335119, "loss": 4.2213, "step": 9118 }, { "epoch": 0.9455313688767344, "grad_norm": 0.75, "learning_rate": 0.00015491526380031836, "loss": 4.257, "step": 9119 }, { "epoch": 0.9456350569312224, "grad_norm": 0.71484375, "learning_rate": 0.00015490618561757566, "loss": 4.3009, "step": 9120 }, { "epoch": 0.9457387449857105, "grad_norm": 0.78125, "learning_rate": 0.000154897106786998, "loss": 4.2634, "step": 9121 }, { "epoch": 0.9458424330401985, "grad_norm": 0.77734375, "learning_rate": 0.00015488802730869252, "loss": 4.3353, "step": 9122 }, { "epoch": 0.9459461210946867, "grad_norm": 0.8203125, "learning_rate": 0.00015487894718276633, "loss": 4.2733, "step": 9123 }, { "epoch": 0.9460498091491747, "grad_norm": 0.68359375, "learning_rate": 0.00015486986640932658, "loss": 4.2482, "step": 9124 }, { "epoch": 0.9461534972036628, "grad_norm": 0.71875, "learning_rate": 0.0001548607849884804, "loss": 4.2753, "step": 9125 }, { "epoch": 0.9462571852581508, "grad_norm": 0.625, "learning_rate": 0.00015485170292033496, "loss": 4.2713, "step": 9126 }, { "epoch": 0.946360873312639, "grad_norm": 0.78125, "learning_rate": 0.0001548426202049974, "loss": 4.2871, "step": 9127 }, { "epoch": 0.946464561367127, "grad_norm": 0.68359375, "learning_rate": 0.00015483353684257487, "loss": 4.2852, "step": 9128 }, { "epoch": 0.9465682494216151, "grad_norm": 0.71875, "learning_rate": 0.0001548244528331746, "loss": 4.2846, "step": 9129 }, { "epoch": 0.9466719374761031, "grad_norm": 0.72265625, "learning_rate": 0.0001548153681769037, "loss": 4.2558, "step": 9130 }, { "epoch": 0.9467756255305912, "grad_norm": 0.81640625, "learning_rate": 0.0001548062828738694, "loss": 4.2772, "step": 9131 }, { "epoch": 0.9468793135850793, "grad_norm": 0.74609375, "learning_rate": 0.00015479719692417892, "loss": 4.2572, "step": 9132 }, { "epoch": 0.9469830016395674, "grad_norm": 0.8984375, "learning_rate": 0.00015478811032793946, "loss": 4.288, "step": 9133 }, { "epoch": 0.9470866896940554, "grad_norm": 0.734375, "learning_rate": 0.00015477902308525817, "loss": 4.2884, "step": 9134 }, { "epoch": 0.9471903777485435, "grad_norm": 1.015625, "learning_rate": 0.00015476993519624233, "loss": 4.2595, "step": 9135 }, { "epoch": 0.9472940658030315, "grad_norm": 0.703125, "learning_rate": 0.00015476084666099914, "loss": 4.2354, "step": 9136 }, { "epoch": 0.9473977538575197, "grad_norm": 1.015625, "learning_rate": 0.00015475175747963586, "loss": 4.1977, "step": 9137 }, { "epoch": 0.9475014419120077, "grad_norm": 0.68359375, "learning_rate": 0.00015474266765225968, "loss": 4.294, "step": 9138 }, { "epoch": 0.9476051299664958, "grad_norm": 1.03125, "learning_rate": 0.00015473357717897792, "loss": 4.2882, "step": 9139 }, { "epoch": 0.9477088180209838, "grad_norm": 0.75, "learning_rate": 0.00015472448605989783, "loss": 4.2827, "step": 9140 }, { "epoch": 0.947812506075472, "grad_norm": 1.0859375, "learning_rate": 0.0001547153942951266, "loss": 4.2591, "step": 9141 }, { "epoch": 0.94791619412996, "grad_norm": 0.796875, "learning_rate": 0.0001547063018847716, "loss": 4.2223, "step": 9142 }, { "epoch": 0.9480198821844481, "grad_norm": 1.265625, "learning_rate": 0.00015469720882894, "loss": 4.2614, "step": 9143 }, { "epoch": 0.9481235702389361, "grad_norm": 0.85546875, "learning_rate": 0.00015468811512773923, "loss": 4.2703, "step": 9144 }, { "epoch": 0.9482272582934242, "grad_norm": 1.4453125, "learning_rate": 0.00015467902078127645, "loss": 4.2755, "step": 9145 }, { "epoch": 0.9483309463479123, "grad_norm": 1.1875, "learning_rate": 0.00015466992578965907, "loss": 4.2887, "step": 9146 }, { "epoch": 0.9484346344024004, "grad_norm": 1.578125, "learning_rate": 0.00015466083015299432, "loss": 4.2957, "step": 9147 }, { "epoch": 0.9485383224568884, "grad_norm": 1.5390625, "learning_rate": 0.00015465173387138955, "loss": 4.2571, "step": 9148 }, { "epoch": 0.9486420105113765, "grad_norm": 1.0625, "learning_rate": 0.0001546426369449521, "loss": 4.2698, "step": 9149 }, { "epoch": 0.9487456985658645, "grad_norm": 1.046875, "learning_rate": 0.0001546335393737893, "loss": 4.2932, "step": 9150 }, { "epoch": 0.9488493866203527, "grad_norm": 1.1953125, "learning_rate": 0.0001546244411580085, "loss": 4.2408, "step": 9151 }, { "epoch": 0.9489530746748407, "grad_norm": 1.015625, "learning_rate": 0.00015461534229771698, "loss": 4.2668, "step": 9152 }, { "epoch": 0.9490567627293288, "grad_norm": 1.46875, "learning_rate": 0.0001546062427930222, "loss": 4.2251, "step": 9153 }, { "epoch": 0.9491604507838168, "grad_norm": 1.2734375, "learning_rate": 0.00015459714264403149, "loss": 4.2717, "step": 9154 }, { "epoch": 0.949264138838305, "grad_norm": 1.515625, "learning_rate": 0.00015458804185085215, "loss": 4.2636, "step": 9155 }, { "epoch": 0.9493678268927931, "grad_norm": 1.4375, "learning_rate": 0.00015457894041359164, "loss": 4.2856, "step": 9156 }, { "epoch": 0.9494715149472811, "grad_norm": 1.1640625, "learning_rate": 0.00015456983833235735, "loss": 4.2362, "step": 9157 }, { "epoch": 0.9495752030017692, "grad_norm": 1.203125, "learning_rate": 0.00015456073560725665, "loss": 4.2561, "step": 9158 }, { "epoch": 0.9496788910562572, "grad_norm": 1.015625, "learning_rate": 0.00015455163223839692, "loss": 4.2903, "step": 9159 }, { "epoch": 0.9497825791107454, "grad_norm": 1.1484375, "learning_rate": 0.0001545425282258856, "loss": 4.3087, "step": 9160 }, { "epoch": 0.9498862671652334, "grad_norm": 1.078125, "learning_rate": 0.00015453342356983014, "loss": 4.3171, "step": 9161 }, { "epoch": 0.9499899552197215, "grad_norm": 1.0078125, "learning_rate": 0.0001545243182703379, "loss": 4.2506, "step": 9162 }, { "epoch": 0.9500936432742095, "grad_norm": 1.4765625, "learning_rate": 0.00015451521232751628, "loss": 4.2497, "step": 9163 }, { "epoch": 0.9501973313286977, "grad_norm": 1.2109375, "learning_rate": 0.00015450610574147284, "loss": 4.2478, "step": 9164 }, { "epoch": 0.9503010193831857, "grad_norm": 1.4609375, "learning_rate": 0.000154496998512315, "loss": 4.2293, "step": 9165 }, { "epoch": 0.9504047074376738, "grad_norm": 1.421875, "learning_rate": 0.0001544878906401501, "loss": 4.2779, "step": 9166 }, { "epoch": 0.9505083954921618, "grad_norm": 1.171875, "learning_rate": 0.00015447878212508572, "loss": 4.226, "step": 9167 }, { "epoch": 0.9506120835466499, "grad_norm": 1.2265625, "learning_rate": 0.0001544696729672293, "loss": 4.2892, "step": 9168 }, { "epoch": 0.950715771601138, "grad_norm": 1.1171875, "learning_rate": 0.00015446056316668837, "loss": 4.2678, "step": 9169 }, { "epoch": 0.9508194596556261, "grad_norm": 0.98046875, "learning_rate": 0.00015445145272357028, "loss": 4.2994, "step": 9170 }, { "epoch": 0.9509231477101141, "grad_norm": 1.375, "learning_rate": 0.00015444234163798263, "loss": 4.2739, "step": 9171 }, { "epoch": 0.9510268357646022, "grad_norm": 1.0234375, "learning_rate": 0.0001544332299100329, "loss": 4.2484, "step": 9172 }, { "epoch": 0.9511305238190902, "grad_norm": 1.703125, "learning_rate": 0.00015442411753982863, "loss": 4.2454, "step": 9173 }, { "epoch": 0.9512342118735784, "grad_norm": 1.671875, "learning_rate": 0.00015441500452747725, "loss": 4.2962, "step": 9174 }, { "epoch": 0.9513378999280664, "grad_norm": 1.1640625, "learning_rate": 0.00015440589087308636, "loss": 4.2961, "step": 9175 }, { "epoch": 0.9514415879825545, "grad_norm": 1.1875, "learning_rate": 0.00015439677657676346, "loss": 4.2194, "step": 9176 }, { "epoch": 0.9515452760370425, "grad_norm": 0.98828125, "learning_rate": 0.0001543876616386161, "loss": 4.3136, "step": 9177 }, { "epoch": 0.9516489640915307, "grad_norm": 0.92578125, "learning_rate": 0.0001543785460587518, "loss": 4.2778, "step": 9178 }, { "epoch": 0.9517526521460187, "grad_norm": 1.0390625, "learning_rate": 0.0001543694298372782, "loss": 4.2829, "step": 9179 }, { "epoch": 0.9518563402005068, "grad_norm": 0.79296875, "learning_rate": 0.00015436031297430276, "loss": 4.2372, "step": 9180 }, { "epoch": 0.9519600282549948, "grad_norm": 0.96875, "learning_rate": 0.00015435119546993307, "loss": 4.2937, "step": 9181 }, { "epoch": 0.9520637163094829, "grad_norm": 0.77734375, "learning_rate": 0.00015434207732427675, "loss": 4.2077, "step": 9182 }, { "epoch": 0.952167404363971, "grad_norm": 0.9921875, "learning_rate": 0.00015433295853744134, "loss": 4.2331, "step": 9183 }, { "epoch": 0.9522710924184591, "grad_norm": 0.77734375, "learning_rate": 0.00015432383910953448, "loss": 4.2744, "step": 9184 }, { "epoch": 0.9523747804729471, "grad_norm": 1.03125, "learning_rate": 0.00015431471904066372, "loss": 4.2522, "step": 9185 }, { "epoch": 0.9524784685274352, "grad_norm": 0.796875, "learning_rate": 0.00015430559833093672, "loss": 4.2428, "step": 9186 }, { "epoch": 0.9525821565819232, "grad_norm": 1.140625, "learning_rate": 0.00015429647698046104, "loss": 4.2723, "step": 9187 }, { "epoch": 0.9526858446364114, "grad_norm": 0.85546875, "learning_rate": 0.0001542873549893443, "loss": 4.2884, "step": 9188 }, { "epoch": 0.9527895326908994, "grad_norm": 1.4375, "learning_rate": 0.00015427823235769416, "loss": 4.2651, "step": 9189 }, { "epoch": 0.9528932207453875, "grad_norm": 1.25, "learning_rate": 0.0001542691090856183, "loss": 4.2941, "step": 9190 }, { "epoch": 0.9529969087998755, "grad_norm": 1.4765625, "learning_rate": 0.00015425998517322426, "loss": 4.2693, "step": 9191 }, { "epoch": 0.9531005968543637, "grad_norm": 1.4609375, "learning_rate": 0.0001542508606206198, "loss": 4.2732, "step": 9192 }, { "epoch": 0.9532042849088517, "grad_norm": 1.0625, "learning_rate": 0.00015424173542791253, "loss": 4.2487, "step": 9193 }, { "epoch": 0.9533079729633398, "grad_norm": 1.015625, "learning_rate": 0.00015423260959521008, "loss": 4.2602, "step": 9194 }, { "epoch": 0.9534116610178278, "grad_norm": 1.234375, "learning_rate": 0.0001542234831226202, "loss": 4.325, "step": 9195 }, { "epoch": 0.9535153490723159, "grad_norm": 1.03125, "learning_rate": 0.00015421435601025048, "loss": 4.2692, "step": 9196 }, { "epoch": 0.953619037126804, "grad_norm": 1.546875, "learning_rate": 0.0001542052282582087, "loss": 4.251, "step": 9197 }, { "epoch": 0.9537227251812921, "grad_norm": 1.390625, "learning_rate": 0.00015419609986660256, "loss": 4.2267, "step": 9198 }, { "epoch": 0.9538264132357801, "grad_norm": 1.3203125, "learning_rate": 0.00015418697083553968, "loss": 4.2704, "step": 9199 }, { "epoch": 0.9539301012902682, "grad_norm": 1.3046875, "learning_rate": 0.00015417784116512784, "loss": 4.2562, "step": 9200 }, { "epoch": 0.9540337893447564, "grad_norm": 1.234375, "learning_rate": 0.00015416871085547474, "loss": 4.2652, "step": 9201 }, { "epoch": 0.9541374773992444, "grad_norm": 1.171875, "learning_rate": 0.00015415957990668808, "loss": 4.2949, "step": 9202 }, { "epoch": 0.9542411654537325, "grad_norm": 1.0546875, "learning_rate": 0.00015415044831887568, "loss": 4.2267, "step": 9203 }, { "epoch": 0.9543448535082205, "grad_norm": 1.0078125, "learning_rate": 0.00015414131609214522, "loss": 4.2524, "step": 9204 }, { "epoch": 0.9544485415627086, "grad_norm": 1.203125, "learning_rate": 0.00015413218322660443, "loss": 4.2644, "step": 9205 }, { "epoch": 0.9545522296171967, "grad_norm": 1.1328125, "learning_rate": 0.00015412304972236112, "loss": 4.2714, "step": 9206 }, { "epoch": 0.9546559176716848, "grad_norm": 1.2734375, "learning_rate": 0.00015411391557952303, "loss": 4.3073, "step": 9207 }, { "epoch": 0.9547596057261728, "grad_norm": 1.171875, "learning_rate": 0.00015410478079819795, "loss": 4.2954, "step": 9208 }, { "epoch": 0.9548632937806609, "grad_norm": 1.203125, "learning_rate": 0.0001540956453784936, "loss": 4.2384, "step": 9209 }, { "epoch": 0.9549669818351489, "grad_norm": 1.15625, "learning_rate": 0.00015408650932051786, "loss": 4.2725, "step": 9210 }, { "epoch": 0.9550706698896371, "grad_norm": 1.15625, "learning_rate": 0.00015407737262437848, "loss": 4.2239, "step": 9211 }, { "epoch": 0.9551743579441251, "grad_norm": 1.1015625, "learning_rate": 0.00015406823529018327, "loss": 4.247, "step": 9212 }, { "epoch": 0.9552780459986132, "grad_norm": 1.2578125, "learning_rate": 0.00015405909731804003, "loss": 4.2726, "step": 9213 }, { "epoch": 0.9553817340531012, "grad_norm": 1.15625, "learning_rate": 0.00015404995870805656, "loss": 4.25, "step": 9214 }, { "epoch": 0.9554854221075894, "grad_norm": 1.2578125, "learning_rate": 0.00015404081946034074, "loss": 4.2884, "step": 9215 }, { "epoch": 0.9555891101620774, "grad_norm": 1.1171875, "learning_rate": 0.00015403167957500036, "loss": 4.2743, "step": 9216 }, { "epoch": 0.9556927982165655, "grad_norm": 1.28125, "learning_rate": 0.00015402253905214325, "loss": 4.2711, "step": 9217 }, { "epoch": 0.9557964862710535, "grad_norm": 1.2734375, "learning_rate": 0.00015401339789187731, "loss": 4.2759, "step": 9218 }, { "epoch": 0.9559001743255416, "grad_norm": 1.2109375, "learning_rate": 0.0001540042560943104, "loss": 4.2727, "step": 9219 }, { "epoch": 0.9560038623800297, "grad_norm": 1.140625, "learning_rate": 0.00015399511365955034, "loss": 4.2178, "step": 9220 }, { "epoch": 0.9561075504345178, "grad_norm": 1.09375, "learning_rate": 0.00015398597058770497, "loss": 4.2876, "step": 9221 }, { "epoch": 0.9562112384890058, "grad_norm": 1.015625, "learning_rate": 0.00015397682687888224, "loss": 4.274, "step": 9222 }, { "epoch": 0.9563149265434939, "grad_norm": 1.28125, "learning_rate": 0.00015396768253319004, "loss": 4.2447, "step": 9223 }, { "epoch": 0.9564186145979819, "grad_norm": 1.125, "learning_rate": 0.00015395853755073617, "loss": 4.2406, "step": 9224 }, { "epoch": 0.9565223026524701, "grad_norm": 1.5703125, "learning_rate": 0.0001539493919316286, "loss": 4.2687, "step": 9225 }, { "epoch": 0.9566259907069581, "grad_norm": 1.46875, "learning_rate": 0.00015394024567597528, "loss": 4.2737, "step": 9226 }, { "epoch": 0.9567296787614462, "grad_norm": 1.28125, "learning_rate": 0.00015393109878388404, "loss": 4.2817, "step": 9227 }, { "epoch": 0.9568333668159342, "grad_norm": 1.1796875, "learning_rate": 0.00015392195125546286, "loss": 4.2752, "step": 9228 }, { "epoch": 0.9569370548704224, "grad_norm": 1.15625, "learning_rate": 0.00015391280309081968, "loss": 4.2593, "step": 9229 }, { "epoch": 0.9570407429249104, "grad_norm": 1.0859375, "learning_rate": 0.00015390365429006236, "loss": 4.2702, "step": 9230 }, { "epoch": 0.9571444309793985, "grad_norm": 1.140625, "learning_rate": 0.00015389450485329894, "loss": 4.2631, "step": 9231 }, { "epoch": 0.9572481190338865, "grad_norm": 1.0234375, "learning_rate": 0.0001538853547806373, "loss": 4.2499, "step": 9232 }, { "epoch": 0.9573518070883746, "grad_norm": 1.2421875, "learning_rate": 0.00015387620407218545, "loss": 4.2652, "step": 9233 }, { "epoch": 0.9574554951428627, "grad_norm": 1.078125, "learning_rate": 0.00015386705272805133, "loss": 4.2738, "step": 9234 }, { "epoch": 0.9575591831973508, "grad_norm": 1.46875, "learning_rate": 0.0001538579007483429, "loss": 4.2667, "step": 9235 }, { "epoch": 0.9576628712518388, "grad_norm": 1.4453125, "learning_rate": 0.00015384874813316826, "loss": 4.2577, "step": 9236 }, { "epoch": 0.9577665593063269, "grad_norm": 1.046875, "learning_rate": 0.00015383959488263525, "loss": 4.2821, "step": 9237 }, { "epoch": 0.9578702473608149, "grad_norm": 1.109375, "learning_rate": 0.00015383044099685192, "loss": 4.3023, "step": 9238 }, { "epoch": 0.9579739354153031, "grad_norm": 0.9375, "learning_rate": 0.00015382128647592632, "loss": 4.2862, "step": 9239 }, { "epoch": 0.9580776234697911, "grad_norm": 0.90625, "learning_rate": 0.00015381213131996643, "loss": 4.2631, "step": 9240 }, { "epoch": 0.9581813115242792, "grad_norm": 0.98046875, "learning_rate": 0.00015380297552908026, "loss": 4.2545, "step": 9241 }, { "epoch": 0.9582849995787672, "grad_norm": 0.765625, "learning_rate": 0.00015379381910337583, "loss": 4.2514, "step": 9242 }, { "epoch": 0.9583886876332554, "grad_norm": 0.99609375, "learning_rate": 0.00015378466204296121, "loss": 4.2767, "step": 9243 }, { "epoch": 0.9584923756877434, "grad_norm": 0.72265625, "learning_rate": 0.00015377550434794447, "loss": 4.29, "step": 9244 }, { "epoch": 0.9585960637422315, "grad_norm": 0.9296875, "learning_rate": 0.00015376634601843358, "loss": 4.2716, "step": 9245 }, { "epoch": 0.9586997517967196, "grad_norm": 0.72265625, "learning_rate": 0.00015375718705453663, "loss": 4.2961, "step": 9246 }, { "epoch": 0.9588034398512076, "grad_norm": 0.9375, "learning_rate": 0.00015374802745636172, "loss": 4.2452, "step": 9247 }, { "epoch": 0.9589071279056958, "grad_norm": 0.75390625, "learning_rate": 0.00015373886722401693, "loss": 4.2632, "step": 9248 }, { "epoch": 0.9590108159601838, "grad_norm": 0.77734375, "learning_rate": 0.00015372970635761025, "loss": 4.2766, "step": 9249 }, { "epoch": 0.9591145040146719, "grad_norm": 0.734375, "learning_rate": 0.00015372054485724988, "loss": 4.2929, "step": 9250 }, { "epoch": 0.9592181920691599, "grad_norm": 0.73046875, "learning_rate": 0.00015371138272304383, "loss": 4.2799, "step": 9251 }, { "epoch": 0.959321880123648, "grad_norm": 0.75, "learning_rate": 0.00015370221995510025, "loss": 4.2487, "step": 9252 }, { "epoch": 0.9594255681781361, "grad_norm": 0.83203125, "learning_rate": 0.0001536930565535273, "loss": 4.2844, "step": 9253 }, { "epoch": 0.9595292562326242, "grad_norm": 0.6953125, "learning_rate": 0.00015368389251843297, "loss": 4.258, "step": 9254 }, { "epoch": 0.9596329442871122, "grad_norm": 0.71875, "learning_rate": 0.00015367472784992545, "loss": 4.2792, "step": 9255 }, { "epoch": 0.9597366323416003, "grad_norm": 0.8359375, "learning_rate": 0.0001536655625481129, "loss": 4.2807, "step": 9256 }, { "epoch": 0.9598403203960884, "grad_norm": 0.6875, "learning_rate": 0.00015365639661310342, "loss": 4.2627, "step": 9257 }, { "epoch": 0.9599440084505765, "grad_norm": 0.796875, "learning_rate": 0.00015364723004500518, "loss": 4.2875, "step": 9258 }, { "epoch": 0.9600476965050645, "grad_norm": 0.72265625, "learning_rate": 0.00015363806284392637, "loss": 4.2563, "step": 9259 }, { "epoch": 0.9601513845595526, "grad_norm": 0.86328125, "learning_rate": 0.0001536288950099751, "loss": 4.266, "step": 9260 }, { "epoch": 0.9602550726140406, "grad_norm": 0.69921875, "learning_rate": 0.00015361972654325952, "loss": 4.2698, "step": 9261 }, { "epoch": 0.9603587606685288, "grad_norm": 0.79296875, "learning_rate": 0.00015361055744388789, "loss": 4.2405, "step": 9262 }, { "epoch": 0.9604624487230168, "grad_norm": 0.76953125, "learning_rate": 0.00015360138771196833, "loss": 4.2959, "step": 9263 }, { "epoch": 0.9605661367775049, "grad_norm": 0.87890625, "learning_rate": 0.00015359221734760907, "loss": 4.2977, "step": 9264 }, { "epoch": 0.9606698248319929, "grad_norm": 0.7421875, "learning_rate": 0.0001535830463509183, "loss": 4.2779, "step": 9265 }, { "epoch": 0.960773512886481, "grad_norm": 0.7890625, "learning_rate": 0.00015357387472200417, "loss": 4.2732, "step": 9266 }, { "epoch": 0.9608772009409691, "grad_norm": 0.7421875, "learning_rate": 0.000153564702460975, "loss": 4.266, "step": 9267 }, { "epoch": 0.9609808889954572, "grad_norm": 0.84375, "learning_rate": 0.00015355552956793891, "loss": 4.2521, "step": 9268 }, { "epoch": 0.9610845770499452, "grad_norm": 0.75, "learning_rate": 0.00015354635604300425, "loss": 4.2541, "step": 9269 }, { "epoch": 0.9611882651044333, "grad_norm": 0.79296875, "learning_rate": 0.00015353718188627915, "loss": 4.2916, "step": 9270 }, { "epoch": 0.9612919531589214, "grad_norm": 0.75390625, "learning_rate": 0.0001535280070978719, "loss": 4.2589, "step": 9271 }, { "epoch": 0.9613956412134095, "grad_norm": 0.7890625, "learning_rate": 0.00015351883167789073, "loss": 4.2507, "step": 9272 }, { "epoch": 0.9614993292678975, "grad_norm": 0.8125, "learning_rate": 0.00015350965562644397, "loss": 4.2826, "step": 9273 }, { "epoch": 0.9616030173223856, "grad_norm": 0.828125, "learning_rate": 0.00015350047894363978, "loss": 4.2858, "step": 9274 }, { "epoch": 0.9617067053768736, "grad_norm": 0.8046875, "learning_rate": 0.00015349130162958653, "loss": 4.3188, "step": 9275 }, { "epoch": 0.9618103934313618, "grad_norm": 0.7890625, "learning_rate": 0.00015348212368439246, "loss": 4.2959, "step": 9276 }, { "epoch": 0.9619140814858498, "grad_norm": 0.765625, "learning_rate": 0.00015347294510816584, "loss": 4.2796, "step": 9277 }, { "epoch": 0.9620177695403379, "grad_norm": 0.72265625, "learning_rate": 0.000153463765901015, "loss": 4.2404, "step": 9278 }, { "epoch": 0.9621214575948259, "grad_norm": 0.8125, "learning_rate": 0.00015345458606304827, "loss": 4.2171, "step": 9279 }, { "epoch": 0.962225145649314, "grad_norm": 0.7421875, "learning_rate": 0.0001534454055943739, "loss": 4.3025, "step": 9280 }, { "epoch": 0.9623288337038021, "grad_norm": 0.70703125, "learning_rate": 0.0001534362244951002, "loss": 4.2821, "step": 9281 }, { "epoch": 0.9624325217582902, "grad_norm": 0.7421875, "learning_rate": 0.00015342704276533558, "loss": 4.2664, "step": 9282 }, { "epoch": 0.9625362098127782, "grad_norm": 0.66796875, "learning_rate": 0.0001534178604051883, "loss": 4.2356, "step": 9283 }, { "epoch": 0.9626398978672663, "grad_norm": 0.77734375, "learning_rate": 0.00015340867741476676, "loss": 4.2559, "step": 9284 }, { "epoch": 0.9627435859217544, "grad_norm": 0.640625, "learning_rate": 0.00015339949379417927, "loss": 4.2702, "step": 9285 }, { "epoch": 0.9628472739762425, "grad_norm": 0.69140625, "learning_rate": 0.00015339030954353424, "loss": 4.2583, "step": 9286 }, { "epoch": 0.9629509620307305, "grad_norm": 0.68359375, "learning_rate": 0.00015338112466293992, "loss": 4.2732, "step": 9287 }, { "epoch": 0.9630546500852186, "grad_norm": 0.7265625, "learning_rate": 0.0001533719391525048, "loss": 4.2976, "step": 9288 }, { "epoch": 0.9631583381397066, "grad_norm": 0.796875, "learning_rate": 0.00015336275301233723, "loss": 4.2614, "step": 9289 }, { "epoch": 0.9632620261941948, "grad_norm": 0.69921875, "learning_rate": 0.00015335356624254556, "loss": 4.2133, "step": 9290 }, { "epoch": 0.9633657142486829, "grad_norm": 0.7421875, "learning_rate": 0.0001533443788432382, "loss": 4.2687, "step": 9291 }, { "epoch": 0.9634694023031709, "grad_norm": 0.71875, "learning_rate": 0.00015333519081452357, "loss": 4.3103, "step": 9292 }, { "epoch": 0.963573090357659, "grad_norm": 0.64453125, "learning_rate": 0.00015332600215651004, "loss": 4.2408, "step": 9293 }, { "epoch": 0.963676778412147, "grad_norm": 0.73828125, "learning_rate": 0.0001533168128693061, "loss": 4.2903, "step": 9294 }, { "epoch": 0.9637804664666352, "grad_norm": 0.6796875, "learning_rate": 0.00015330762295302008, "loss": 4.2581, "step": 9295 }, { "epoch": 0.9638841545211232, "grad_norm": 0.73828125, "learning_rate": 0.00015329843240776049, "loss": 4.2235, "step": 9296 }, { "epoch": 0.9639878425756113, "grad_norm": 0.77734375, "learning_rate": 0.0001532892412336357, "loss": 4.2837, "step": 9297 }, { "epoch": 0.9640915306300993, "grad_norm": 0.75390625, "learning_rate": 0.0001532800494307542, "loss": 4.261, "step": 9298 }, { "epoch": 0.9641952186845875, "grad_norm": 0.75, "learning_rate": 0.00015327085699922446, "loss": 4.2868, "step": 9299 }, { "epoch": 0.9642989067390755, "grad_norm": 0.734375, "learning_rate": 0.0001532616639391549, "loss": 4.2561, "step": 9300 }, { "epoch": 0.9644025947935636, "grad_norm": 0.76953125, "learning_rate": 0.00015325247025065403, "loss": 4.2627, "step": 9301 }, { "epoch": 0.9645062828480516, "grad_norm": 0.82421875, "learning_rate": 0.00015324327593383027, "loss": 4.2958, "step": 9302 }, { "epoch": 0.9646099709025397, "grad_norm": 0.81640625, "learning_rate": 0.00015323408098879217, "loss": 4.2615, "step": 9303 }, { "epoch": 0.9647136589570278, "grad_norm": 0.86328125, "learning_rate": 0.00015322488541564817, "loss": 4.2625, "step": 9304 }, { "epoch": 0.9648173470115159, "grad_norm": 0.78125, "learning_rate": 0.00015321568921450676, "loss": 4.2513, "step": 9305 }, { "epoch": 0.9649210350660039, "grad_norm": 0.86328125, "learning_rate": 0.0001532064923854765, "loss": 4.2893, "step": 9306 }, { "epoch": 0.965024723120492, "grad_norm": 0.7578125, "learning_rate": 0.00015319729492866584, "loss": 4.2635, "step": 9307 }, { "epoch": 0.96512841117498, "grad_norm": 0.875, "learning_rate": 0.00015318809684418337, "loss": 4.2596, "step": 9308 }, { "epoch": 0.9652320992294682, "grad_norm": 0.7421875, "learning_rate": 0.00015317889813213753, "loss": 4.2766, "step": 9309 }, { "epoch": 0.9653357872839562, "grad_norm": 0.8828125, "learning_rate": 0.00015316969879263694, "loss": 4.2442, "step": 9310 }, { "epoch": 0.9654394753384443, "grad_norm": 0.7265625, "learning_rate": 0.0001531604988257901, "loss": 4.2975, "step": 9311 }, { "epoch": 0.9655431633929323, "grad_norm": 0.9140625, "learning_rate": 0.00015315129823170555, "loss": 4.2573, "step": 9312 }, { "epoch": 0.9656468514474205, "grad_norm": 0.6953125, "learning_rate": 0.0001531420970104919, "loss": 4.2364, "step": 9313 }, { "epoch": 0.9657505395019085, "grad_norm": 0.953125, "learning_rate": 0.00015313289516225766, "loss": 4.2463, "step": 9314 }, { "epoch": 0.9658542275563966, "grad_norm": 0.72265625, "learning_rate": 0.00015312369268711144, "loss": 4.2555, "step": 9315 }, { "epoch": 0.9659579156108846, "grad_norm": 0.87890625, "learning_rate": 0.00015311448958516176, "loss": 4.2938, "step": 9316 }, { "epoch": 0.9660616036653727, "grad_norm": 0.64453125, "learning_rate": 0.00015310528585651725, "loss": 4.2862, "step": 9317 }, { "epoch": 0.9661652917198608, "grad_norm": 0.90234375, "learning_rate": 0.00015309608150128654, "loss": 4.2608, "step": 9318 }, { "epoch": 0.9662689797743489, "grad_norm": 0.66015625, "learning_rate": 0.00015308687651957817, "loss": 4.2827, "step": 9319 }, { "epoch": 0.9663726678288369, "grad_norm": 0.84765625, "learning_rate": 0.00015307767091150078, "loss": 4.31, "step": 9320 }, { "epoch": 0.966476355883325, "grad_norm": 0.66015625, "learning_rate": 0.00015306846467716295, "loss": 4.2828, "step": 9321 }, { "epoch": 0.966580043937813, "grad_norm": 0.80859375, "learning_rate": 0.00015305925781667335, "loss": 4.2935, "step": 9322 }, { "epoch": 0.9666837319923012, "grad_norm": 0.79296875, "learning_rate": 0.00015305005033014064, "loss": 4.2706, "step": 9323 }, { "epoch": 0.9667874200467892, "grad_norm": 0.75, "learning_rate": 0.00015304084221767335, "loss": 4.2371, "step": 9324 }, { "epoch": 0.9668911081012773, "grad_norm": 0.73828125, "learning_rate": 0.0001530316334793802, "loss": 4.2946, "step": 9325 }, { "epoch": 0.9669947961557653, "grad_norm": 0.69921875, "learning_rate": 0.00015302242411536988, "loss": 4.2853, "step": 9326 }, { "epoch": 0.9670984842102535, "grad_norm": 0.8046875, "learning_rate": 0.00015301321412575095, "loss": 4.273, "step": 9327 }, { "epoch": 0.9672021722647415, "grad_norm": 0.68359375, "learning_rate": 0.00015300400351063215, "loss": 4.2475, "step": 9328 }, { "epoch": 0.9673058603192296, "grad_norm": 0.83203125, "learning_rate": 0.00015299479227012214, "loss": 4.3119, "step": 9329 }, { "epoch": 0.9674095483737176, "grad_norm": 0.68359375, "learning_rate": 0.0001529855804043296, "loss": 4.2729, "step": 9330 }, { "epoch": 0.9675132364282057, "grad_norm": 0.90625, "learning_rate": 0.0001529763679133632, "loss": 4.2845, "step": 9331 }, { "epoch": 0.9676169244826938, "grad_norm": 0.71875, "learning_rate": 0.0001529671547973317, "loss": 4.2634, "step": 9332 }, { "epoch": 0.9677206125371819, "grad_norm": 0.97265625, "learning_rate": 0.00015295794105634372, "loss": 4.2497, "step": 9333 }, { "epoch": 0.9678243005916699, "grad_norm": 0.7109375, "learning_rate": 0.00015294872669050798, "loss": 4.2846, "step": 9334 }, { "epoch": 0.967927988646158, "grad_norm": 0.87109375, "learning_rate": 0.00015293951169993331, "loss": 4.272, "step": 9335 }, { "epoch": 0.9680316767006462, "grad_norm": 0.7109375, "learning_rate": 0.00015293029608472834, "loss": 4.2757, "step": 9336 }, { "epoch": 0.9681353647551342, "grad_norm": 0.80859375, "learning_rate": 0.00015292107984500182, "loss": 4.2667, "step": 9337 }, { "epoch": 0.9682390528096223, "grad_norm": 0.6796875, "learning_rate": 0.00015291186298086248, "loss": 4.2901, "step": 9338 }, { "epoch": 0.9683427408641103, "grad_norm": 0.828125, "learning_rate": 0.0001529026454924191, "loss": 4.284, "step": 9339 }, { "epoch": 0.9684464289185984, "grad_norm": 0.703125, "learning_rate": 0.00015289342737978044, "loss": 4.2652, "step": 9340 }, { "epoch": 0.9685501169730865, "grad_norm": 0.78125, "learning_rate": 0.00015288420864305522, "loss": 4.2741, "step": 9341 }, { "epoch": 0.9686538050275746, "grad_norm": 0.76171875, "learning_rate": 0.00015287498928235227, "loss": 4.2619, "step": 9342 }, { "epoch": 0.9687574930820626, "grad_norm": 0.796875, "learning_rate": 0.0001528657692977803, "loss": 4.2337, "step": 9343 }, { "epoch": 0.9688611811365507, "grad_norm": 0.8515625, "learning_rate": 0.00015285654868944817, "loss": 4.2751, "step": 9344 }, { "epoch": 0.9689648691910387, "grad_norm": 0.890625, "learning_rate": 0.0001528473274574646, "loss": 4.2606, "step": 9345 }, { "epoch": 0.9690685572455269, "grad_norm": 0.80859375, "learning_rate": 0.00015283810560193846, "loss": 4.2429, "step": 9346 }, { "epoch": 0.9691722453000149, "grad_norm": 0.9609375, "learning_rate": 0.0001528288831229785, "loss": 4.2399, "step": 9347 }, { "epoch": 0.969275933354503, "grad_norm": 0.76171875, "learning_rate": 0.00015281966002069362, "loss": 4.284, "step": 9348 }, { "epoch": 0.969379621408991, "grad_norm": 0.80078125, "learning_rate": 0.00015281043629519252, "loss": 4.2841, "step": 9349 }, { "epoch": 0.9694833094634792, "grad_norm": 0.7265625, "learning_rate": 0.0001528012119465841, "loss": 4.292, "step": 9350 }, { "epoch": 0.9695869975179672, "grad_norm": 0.77734375, "learning_rate": 0.00015279198697497722, "loss": 4.2625, "step": 9351 }, { "epoch": 0.9696906855724553, "grad_norm": 0.7109375, "learning_rate": 0.00015278276138048068, "loss": 4.2883, "step": 9352 }, { "epoch": 0.9697943736269433, "grad_norm": 0.70703125, "learning_rate": 0.00015277353516320337, "loss": 4.2763, "step": 9353 }, { "epoch": 0.9698980616814314, "grad_norm": 0.75390625, "learning_rate": 0.0001527643083232541, "loss": 4.2784, "step": 9354 }, { "epoch": 0.9700017497359195, "grad_norm": 0.6953125, "learning_rate": 0.00015275508086074176, "loss": 4.235, "step": 9355 }, { "epoch": 0.9701054377904076, "grad_norm": 0.68359375, "learning_rate": 0.0001527458527757753, "loss": 4.3079, "step": 9356 }, { "epoch": 0.9702091258448956, "grad_norm": 0.640625, "learning_rate": 0.0001527366240684635, "loss": 4.2871, "step": 9357 }, { "epoch": 0.9703128138993837, "grad_norm": 0.69921875, "learning_rate": 0.0001527273947389152, "loss": 4.2377, "step": 9358 }, { "epoch": 0.9704165019538717, "grad_norm": 0.671875, "learning_rate": 0.00015271816478723945, "loss": 4.3026, "step": 9359 }, { "epoch": 0.9705201900083599, "grad_norm": 0.72265625, "learning_rate": 0.00015270893421354508, "loss": 4.2523, "step": 9360 }, { "epoch": 0.9706238780628479, "grad_norm": 0.69921875, "learning_rate": 0.00015269970301794102, "loss": 4.2463, "step": 9361 }, { "epoch": 0.970727566117336, "grad_norm": 0.671875, "learning_rate": 0.0001526904712005361, "loss": 4.2646, "step": 9362 }, { "epoch": 0.970831254171824, "grad_norm": 0.6640625, "learning_rate": 0.00015268123876143938, "loss": 4.2801, "step": 9363 }, { "epoch": 0.9709349422263122, "grad_norm": 0.66015625, "learning_rate": 0.00015267200570075973, "loss": 4.2408, "step": 9364 }, { "epoch": 0.9710386302808002, "grad_norm": 0.68359375, "learning_rate": 0.00015266277201860608, "loss": 4.1902, "step": 9365 }, { "epoch": 0.9711423183352883, "grad_norm": 0.671875, "learning_rate": 0.00015265353771508737, "loss": 4.2369, "step": 9366 }, { "epoch": 0.9712460063897763, "grad_norm": 0.6875, "learning_rate": 0.00015264430279031256, "loss": 4.2613, "step": 9367 }, { "epoch": 0.9713496944442644, "grad_norm": 0.7421875, "learning_rate": 0.00015263506724439064, "loss": 4.2568, "step": 9368 }, { "epoch": 0.9714533824987525, "grad_norm": 0.74609375, "learning_rate": 0.00015262583107743057, "loss": 4.2521, "step": 9369 }, { "epoch": 0.9715570705532406, "grad_norm": 0.8828125, "learning_rate": 0.00015261659428954133, "loss": 4.2344, "step": 9370 }, { "epoch": 0.9716607586077286, "grad_norm": 0.796875, "learning_rate": 0.0001526073568808319, "loss": 4.2492, "step": 9371 }, { "epoch": 0.9717644466622167, "grad_norm": 0.9140625, "learning_rate": 0.0001525981188514112, "loss": 4.2453, "step": 9372 }, { "epoch": 0.9718681347167047, "grad_norm": 0.78125, "learning_rate": 0.0001525888802013884, "loss": 4.2665, "step": 9373 }, { "epoch": 0.9719718227711929, "grad_norm": 0.7890625, "learning_rate": 0.00015257964093087233, "loss": 4.2541, "step": 9374 }, { "epoch": 0.9720755108256809, "grad_norm": 0.7578125, "learning_rate": 0.00015257040103997208, "loss": 4.2422, "step": 9375 }, { "epoch": 0.972179198880169, "grad_norm": 0.8984375, "learning_rate": 0.0001525611605287967, "loss": 4.2632, "step": 9376 }, { "epoch": 0.972282886934657, "grad_norm": 0.75390625, "learning_rate": 0.00015255191939745518, "loss": 4.2566, "step": 9377 }, { "epoch": 0.9723865749891452, "grad_norm": 0.82421875, "learning_rate": 0.00015254267764605657, "loss": 4.2434, "step": 9378 }, { "epoch": 0.9724902630436332, "grad_norm": 0.74609375, "learning_rate": 0.00015253343527470986, "loss": 4.287, "step": 9379 }, { "epoch": 0.9725939510981213, "grad_norm": 0.80078125, "learning_rate": 0.00015252419228352414, "loss": 4.2895, "step": 9380 }, { "epoch": 0.9726976391526094, "grad_norm": 0.82421875, "learning_rate": 0.00015251494867260854, "loss": 4.2692, "step": 9381 }, { "epoch": 0.9728013272070974, "grad_norm": 0.74609375, "learning_rate": 0.000152505704442072, "loss": 4.3054, "step": 9382 }, { "epoch": 0.9729050152615856, "grad_norm": 0.8046875, "learning_rate": 0.00015249645959202366, "loss": 4.2397, "step": 9383 }, { "epoch": 0.9730087033160736, "grad_norm": 0.71875, "learning_rate": 0.0001524872141225726, "loss": 4.2267, "step": 9384 }, { "epoch": 0.9731123913705617, "grad_norm": 0.80859375, "learning_rate": 0.00015247796803382789, "loss": 4.2755, "step": 9385 }, { "epoch": 0.9732160794250497, "grad_norm": 0.6015625, "learning_rate": 0.00015246872132589862, "loss": 4.2202, "step": 9386 }, { "epoch": 0.9733197674795379, "grad_norm": 0.85546875, "learning_rate": 0.0001524594739988939, "loss": 4.2612, "step": 9387 }, { "epoch": 0.9734234555340259, "grad_norm": 0.6171875, "learning_rate": 0.00015245022605292285, "loss": 4.2896, "step": 9388 }, { "epoch": 0.973527143588514, "grad_norm": 0.77734375, "learning_rate": 0.00015244097748809456, "loss": 4.2922, "step": 9389 }, { "epoch": 0.973630831643002, "grad_norm": 0.69140625, "learning_rate": 0.0001524317283045182, "loss": 4.2702, "step": 9390 }, { "epoch": 0.9737345196974901, "grad_norm": 0.79296875, "learning_rate": 0.00015242247850230282, "loss": 4.2298, "step": 9391 }, { "epoch": 0.9738382077519782, "grad_norm": 0.703125, "learning_rate": 0.00015241322808155763, "loss": 4.2585, "step": 9392 }, { "epoch": 0.9739418958064663, "grad_norm": 0.75390625, "learning_rate": 0.00015240397704239177, "loss": 4.3028, "step": 9393 }, { "epoch": 0.9740455838609543, "grad_norm": 0.72265625, "learning_rate": 0.00015239472538491439, "loss": 4.2511, "step": 9394 }, { "epoch": 0.9741492719154424, "grad_norm": 0.7265625, "learning_rate": 0.00015238547310923457, "loss": 4.2455, "step": 9395 }, { "epoch": 0.9742529599699304, "grad_norm": 0.69921875, "learning_rate": 0.00015237622021546158, "loss": 4.2601, "step": 9396 }, { "epoch": 0.9743566480244186, "grad_norm": 0.71875, "learning_rate": 0.00015236696670370455, "loss": 4.2497, "step": 9397 }, { "epoch": 0.9744603360789066, "grad_norm": 0.7421875, "learning_rate": 0.00015235771257407268, "loss": 4.1968, "step": 9398 }, { "epoch": 0.9745640241333947, "grad_norm": 0.83984375, "learning_rate": 0.0001523484578266751, "loss": 4.2809, "step": 9399 }, { "epoch": 0.9746677121878827, "grad_norm": 0.7421875, "learning_rate": 0.00015233920246162107, "loss": 4.2493, "step": 9400 }, { "epoch": 0.9747714002423709, "grad_norm": 0.80859375, "learning_rate": 0.00015232994647901982, "loss": 4.2857, "step": 9401 }, { "epoch": 0.9748750882968589, "grad_norm": 0.84765625, "learning_rate": 0.0001523206898789805, "loss": 4.2667, "step": 9402 }, { "epoch": 0.974978776351347, "grad_norm": 0.81640625, "learning_rate": 0.00015231143266161232, "loss": 4.2546, "step": 9403 }, { "epoch": 0.975082464405835, "grad_norm": 0.80078125, "learning_rate": 0.00015230217482702454, "loss": 4.2398, "step": 9404 }, { "epoch": 0.9751861524603231, "grad_norm": 0.8125, "learning_rate": 0.0001522929163753264, "loss": 4.2702, "step": 9405 }, { "epoch": 0.9752898405148112, "grad_norm": 0.75, "learning_rate": 0.00015228365730662712, "loss": 4.2692, "step": 9406 }, { "epoch": 0.9753935285692993, "grad_norm": 0.71484375, "learning_rate": 0.00015227439762103594, "loss": 4.3059, "step": 9407 }, { "epoch": 0.9754972166237873, "grad_norm": 0.71484375, "learning_rate": 0.0001522651373186621, "loss": 4.2772, "step": 9408 }, { "epoch": 0.9756009046782754, "grad_norm": 0.72265625, "learning_rate": 0.00015225587639961492, "loss": 4.2697, "step": 9409 }, { "epoch": 0.9757045927327634, "grad_norm": 0.7734375, "learning_rate": 0.0001522466148640036, "loss": 4.2494, "step": 9410 }, { "epoch": 0.9758082807872516, "grad_norm": 0.7734375, "learning_rate": 0.00015223735271193748, "loss": 4.2737, "step": 9411 }, { "epoch": 0.9759119688417396, "grad_norm": 0.75, "learning_rate": 0.00015222808994352582, "loss": 4.2676, "step": 9412 }, { "epoch": 0.9760156568962277, "grad_norm": 0.77734375, "learning_rate": 0.0001522188265588779, "loss": 4.2648, "step": 9413 }, { "epoch": 0.9761193449507157, "grad_norm": 0.7890625, "learning_rate": 0.00015220956255810304, "loss": 4.2446, "step": 9414 }, { "epoch": 0.9762230330052039, "grad_norm": 0.73828125, "learning_rate": 0.0001522002979413105, "loss": 4.2817, "step": 9415 }, { "epoch": 0.9763267210596919, "grad_norm": 0.765625, "learning_rate": 0.00015219103270860965, "loss": 4.2879, "step": 9416 }, { "epoch": 0.97643040911418, "grad_norm": 0.8359375, "learning_rate": 0.00015218176686010974, "loss": 4.2554, "step": 9417 }, { "epoch": 0.976534097168668, "grad_norm": 0.8046875, "learning_rate": 0.00015217250039592018, "loss": 4.2662, "step": 9418 }, { "epoch": 0.9766377852231561, "grad_norm": 0.77734375, "learning_rate": 0.00015216323331615023, "loss": 4.271, "step": 9419 }, { "epoch": 0.9767414732776442, "grad_norm": 0.76171875, "learning_rate": 0.0001521539656209093, "loss": 4.261, "step": 9420 }, { "epoch": 0.9768451613321323, "grad_norm": 0.7890625, "learning_rate": 0.00015214469731030666, "loss": 4.3136, "step": 9421 }, { "epoch": 0.9769488493866203, "grad_norm": 0.71484375, "learning_rate": 0.00015213542838445176, "loss": 4.2493, "step": 9422 }, { "epoch": 0.9770525374411084, "grad_norm": 0.7890625, "learning_rate": 0.0001521261588434539, "loss": 4.2347, "step": 9423 }, { "epoch": 0.9771562254955966, "grad_norm": 0.66796875, "learning_rate": 0.00015211688868742247, "loss": 4.2623, "step": 9424 }, { "epoch": 0.9772599135500846, "grad_norm": 0.7109375, "learning_rate": 0.00015210761791646684, "loss": 4.2435, "step": 9425 }, { "epoch": 0.9773636016045727, "grad_norm": 0.72265625, "learning_rate": 0.0001520983465306964, "loss": 4.2485, "step": 9426 }, { "epoch": 0.9774672896590607, "grad_norm": 0.75390625, "learning_rate": 0.0001520890745302205, "loss": 4.2038, "step": 9427 }, { "epoch": 0.9775709777135488, "grad_norm": 0.703125, "learning_rate": 0.00015207980191514863, "loss": 4.2652, "step": 9428 }, { "epoch": 0.9776746657680369, "grad_norm": 0.69921875, "learning_rate": 0.00015207052868559012, "loss": 4.2598, "step": 9429 }, { "epoch": 0.977778353822525, "grad_norm": 0.7421875, "learning_rate": 0.00015206125484165445, "loss": 4.2875, "step": 9430 }, { "epoch": 0.977882041877013, "grad_norm": 0.74609375, "learning_rate": 0.000152051980383451, "loss": 4.2917, "step": 9431 }, { "epoch": 0.9779857299315011, "grad_norm": 0.73828125, "learning_rate": 0.00015204270531108915, "loss": 4.2378, "step": 9432 }, { "epoch": 0.9780894179859891, "grad_norm": 0.76171875, "learning_rate": 0.00015203342962467843, "loss": 4.2538, "step": 9433 }, { "epoch": 0.9781931060404773, "grad_norm": 0.671875, "learning_rate": 0.00015202415332432826, "loss": 4.2516, "step": 9434 }, { "epoch": 0.9782967940949653, "grad_norm": 0.703125, "learning_rate": 0.00015201487641014803, "loss": 4.2188, "step": 9435 }, { "epoch": 0.9784004821494534, "grad_norm": 0.73046875, "learning_rate": 0.00015200559888224727, "loss": 4.2706, "step": 9436 }, { "epoch": 0.9785041702039414, "grad_norm": 0.77734375, "learning_rate": 0.00015199632074073538, "loss": 4.294, "step": 9437 }, { "epoch": 0.9786078582584296, "grad_norm": 0.79296875, "learning_rate": 0.00015198704198572189, "loss": 4.2657, "step": 9438 }, { "epoch": 0.9787115463129176, "grad_norm": 0.80859375, "learning_rate": 0.00015197776261731627, "loss": 4.3236, "step": 9439 }, { "epoch": 0.9788152343674057, "grad_norm": 0.70703125, "learning_rate": 0.000151968482635628, "loss": 4.2368, "step": 9440 }, { "epoch": 0.9789189224218937, "grad_norm": 0.81640625, "learning_rate": 0.00015195920204076654, "loss": 4.2963, "step": 9441 }, { "epoch": 0.9790226104763818, "grad_norm": 0.78125, "learning_rate": 0.00015194992083284142, "loss": 4.2503, "step": 9442 }, { "epoch": 0.9791262985308699, "grad_norm": 0.83203125, "learning_rate": 0.00015194063901196217, "loss": 4.2708, "step": 9443 }, { "epoch": 0.979229986585358, "grad_norm": 0.70703125, "learning_rate": 0.00015193135657823827, "loss": 4.2709, "step": 9444 }, { "epoch": 0.979333674639846, "grad_norm": 0.8359375, "learning_rate": 0.00015192207353177922, "loss": 4.2549, "step": 9445 }, { "epoch": 0.9794373626943341, "grad_norm": 0.73046875, "learning_rate": 0.00015191278987269463, "loss": 4.2305, "step": 9446 }, { "epoch": 0.9795410507488221, "grad_norm": 0.82421875, "learning_rate": 0.00015190350560109398, "loss": 4.2507, "step": 9447 }, { "epoch": 0.9796447388033103, "grad_norm": 0.76171875, "learning_rate": 0.00015189422071708685, "loss": 4.2778, "step": 9448 }, { "epoch": 0.9797484268577983, "grad_norm": 0.91015625, "learning_rate": 0.00015188493522078276, "loss": 4.2954, "step": 9449 }, { "epoch": 0.9798521149122864, "grad_norm": 0.765625, "learning_rate": 0.00015187564911229125, "loss": 4.298, "step": 9450 }, { "epoch": 0.9799558029667744, "grad_norm": 0.94921875, "learning_rate": 0.00015186636239172198, "loss": 4.2874, "step": 9451 }, { "epoch": 0.9800594910212626, "grad_norm": 0.87109375, "learning_rate": 0.00015185707505918438, "loss": 4.2759, "step": 9452 }, { "epoch": 0.9801631790757506, "grad_norm": 0.890625, "learning_rate": 0.00015184778711478818, "loss": 4.3086, "step": 9453 }, { "epoch": 0.9802668671302387, "grad_norm": 0.81640625, "learning_rate": 0.00015183849855864286, "loss": 4.2788, "step": 9454 }, { "epoch": 0.9803705551847267, "grad_norm": 0.91015625, "learning_rate": 0.00015182920939085806, "loss": 4.2358, "step": 9455 }, { "epoch": 0.9804742432392148, "grad_norm": 0.85546875, "learning_rate": 0.0001518199196115434, "loss": 4.2496, "step": 9456 }, { "epoch": 0.9805779312937029, "grad_norm": 0.8125, "learning_rate": 0.0001518106292208084, "loss": 4.2617, "step": 9457 }, { "epoch": 0.980681619348191, "grad_norm": 0.80859375, "learning_rate": 0.00015180133821876278, "loss": 4.2754, "step": 9458 }, { "epoch": 0.980785307402679, "grad_norm": 0.8125, "learning_rate": 0.00015179204660551614, "loss": 4.2655, "step": 9459 }, { "epoch": 0.9808889954571671, "grad_norm": 0.83203125, "learning_rate": 0.00015178275438117807, "loss": 4.2661, "step": 9460 }, { "epoch": 0.9809926835116551, "grad_norm": 0.77734375, "learning_rate": 0.00015177346154585824, "loss": 4.2846, "step": 9461 }, { "epoch": 0.9810963715661433, "grad_norm": 0.8984375, "learning_rate": 0.0001517641680996663, "loss": 4.2724, "step": 9462 }, { "epoch": 0.9812000596206313, "grad_norm": 0.78125, "learning_rate": 0.0001517548740427119, "loss": 4.2302, "step": 9463 }, { "epoch": 0.9813037476751194, "grad_norm": 0.7890625, "learning_rate": 0.00015174557937510467, "loss": 4.2434, "step": 9464 }, { "epoch": 0.9814074357296074, "grad_norm": 0.84765625, "learning_rate": 0.00015173628409695432, "loss": 4.257, "step": 9465 }, { "epoch": 0.9815111237840956, "grad_norm": 0.90234375, "learning_rate": 0.0001517269882083705, "loss": 4.2477, "step": 9466 }, { "epoch": 0.9816148118385836, "grad_norm": 0.77734375, "learning_rate": 0.00015171769170946287, "loss": 4.2774, "step": 9467 }, { "epoch": 0.9817184998930717, "grad_norm": 0.9453125, "learning_rate": 0.00015170839460034122, "loss": 4.2568, "step": 9468 }, { "epoch": 0.9818221879475598, "grad_norm": 0.7890625, "learning_rate": 0.00015169909688111512, "loss": 4.2727, "step": 9469 }, { "epoch": 0.9819258760020478, "grad_norm": 0.8359375, "learning_rate": 0.0001516897985518943, "loss": 4.2676, "step": 9470 }, { "epoch": 0.982029564056536, "grad_norm": 0.796875, "learning_rate": 0.00015168049961278854, "loss": 4.2741, "step": 9471 }, { "epoch": 0.982133252111024, "grad_norm": 0.8203125, "learning_rate": 0.0001516712000639075, "loss": 4.2287, "step": 9472 }, { "epoch": 0.9822369401655121, "grad_norm": 0.8125, "learning_rate": 0.0001516618999053609, "loss": 4.2734, "step": 9473 }, { "epoch": 0.9823406282200001, "grad_norm": 0.83203125, "learning_rate": 0.0001516525991372585, "loss": 4.2906, "step": 9474 }, { "epoch": 0.9824443162744882, "grad_norm": 0.80859375, "learning_rate": 0.00015164329775971003, "loss": 4.2236, "step": 9475 }, { "epoch": 0.9825480043289763, "grad_norm": 0.7890625, "learning_rate": 0.00015163399577282526, "loss": 4.2571, "step": 9476 }, { "epoch": 0.9826516923834644, "grad_norm": 0.7890625, "learning_rate": 0.00015162469317671392, "loss": 4.2449, "step": 9477 }, { "epoch": 0.9827553804379524, "grad_norm": 0.7734375, "learning_rate": 0.00015161538997148573, "loss": 4.2457, "step": 9478 }, { "epoch": 0.9828590684924405, "grad_norm": 0.84375, "learning_rate": 0.00015160608615725054, "loss": 4.2468, "step": 9479 }, { "epoch": 0.9829627565469286, "grad_norm": 0.80078125, "learning_rate": 0.00015159678173411805, "loss": 4.2502, "step": 9480 }, { "epoch": 0.9830664446014167, "grad_norm": 0.83203125, "learning_rate": 0.0001515874767021981, "loss": 4.2675, "step": 9481 }, { "epoch": 0.9831701326559047, "grad_norm": 0.85546875, "learning_rate": 0.00015157817106160044, "loss": 4.3162, "step": 9482 }, { "epoch": 0.9832738207103928, "grad_norm": 0.73046875, "learning_rate": 0.0001515688648124349, "loss": 4.2683, "step": 9483 }, { "epoch": 0.9833775087648808, "grad_norm": 0.81640625, "learning_rate": 0.00015155955795481124, "loss": 4.2951, "step": 9484 }, { "epoch": 0.983481196819369, "grad_norm": 0.734375, "learning_rate": 0.0001515502504888393, "loss": 4.2818, "step": 9485 }, { "epoch": 0.983584884873857, "grad_norm": 0.765625, "learning_rate": 0.0001515409424146289, "loss": 4.3042, "step": 9486 }, { "epoch": 0.9836885729283451, "grad_norm": 0.80078125, "learning_rate": 0.00015153163373228987, "loss": 4.2414, "step": 9487 }, { "epoch": 0.9837922609828331, "grad_norm": 0.7890625, "learning_rate": 0.00015152232444193202, "loss": 4.2496, "step": 9488 }, { "epoch": 0.9838959490373212, "grad_norm": 0.77734375, "learning_rate": 0.00015151301454366522, "loss": 4.2784, "step": 9489 }, { "epoch": 0.9839996370918093, "grad_norm": 0.73828125, "learning_rate": 0.00015150370403759929, "loss": 4.2508, "step": 9490 }, { "epoch": 0.9841033251462974, "grad_norm": 0.84765625, "learning_rate": 0.0001514943929238441, "loss": 4.2928, "step": 9491 }, { "epoch": 0.9842070132007854, "grad_norm": 0.69921875, "learning_rate": 0.00015148508120250948, "loss": 4.2469, "step": 9492 }, { "epoch": 0.9843107012552735, "grad_norm": 0.80078125, "learning_rate": 0.00015147576887370535, "loss": 4.2814, "step": 9493 }, { "epoch": 0.9844143893097616, "grad_norm": 0.70703125, "learning_rate": 0.00015146645593754155, "loss": 4.2694, "step": 9494 }, { "epoch": 0.9845180773642497, "grad_norm": 0.75390625, "learning_rate": 0.000151457142394128, "loss": 4.2363, "step": 9495 }, { "epoch": 0.9846217654187377, "grad_norm": 0.671875, "learning_rate": 0.0001514478282435745, "loss": 4.2998, "step": 9496 }, { "epoch": 0.9847254534732258, "grad_norm": 0.7734375, "learning_rate": 0.00015143851348599108, "loss": 4.242, "step": 9497 }, { "epoch": 0.9848291415277138, "grad_norm": 0.765625, "learning_rate": 0.00015142919812148752, "loss": 4.2483, "step": 9498 }, { "epoch": 0.984932829582202, "grad_norm": 0.7734375, "learning_rate": 0.0001514198821501738, "loss": 4.2775, "step": 9499 }, { "epoch": 0.98503651763669, "grad_norm": 0.78515625, "learning_rate": 0.00015141056557215984, "loss": 4.2735, "step": 9500 }, { "epoch": 0.9851402056911781, "grad_norm": 0.80078125, "learning_rate": 0.00015140124838755554, "loss": 4.2642, "step": 9501 }, { "epoch": 0.9852438937456661, "grad_norm": 0.73828125, "learning_rate": 0.00015139193059647086, "loss": 4.2407, "step": 9502 }, { "epoch": 0.9853475818001542, "grad_norm": 0.78125, "learning_rate": 0.0001513826121990157, "loss": 4.3053, "step": 9503 }, { "epoch": 0.9854512698546423, "grad_norm": 0.7421875, "learning_rate": 0.00015137329319530002, "loss": 4.2636, "step": 9504 }, { "epoch": 0.9855549579091304, "grad_norm": 0.7578125, "learning_rate": 0.00015136397358543382, "loss": 4.2455, "step": 9505 }, { "epoch": 0.9856586459636184, "grad_norm": 0.8203125, "learning_rate": 0.00015135465336952702, "loss": 4.2578, "step": 9506 }, { "epoch": 0.9857623340181065, "grad_norm": 0.71484375, "learning_rate": 0.00015134533254768958, "loss": 4.2418, "step": 9507 }, { "epoch": 0.9858660220725945, "grad_norm": 0.84375, "learning_rate": 0.00015133601112003145, "loss": 4.2535, "step": 9508 }, { "epoch": 0.9859697101270827, "grad_norm": 0.7578125, "learning_rate": 0.0001513266890866627, "loss": 4.2542, "step": 9509 }, { "epoch": 0.9860733981815707, "grad_norm": 0.78125, "learning_rate": 0.00015131736644769328, "loss": 4.308, "step": 9510 }, { "epoch": 0.9861770862360588, "grad_norm": 0.8046875, "learning_rate": 0.00015130804320323318, "loss": 4.2822, "step": 9511 }, { "epoch": 0.9862807742905468, "grad_norm": 0.79296875, "learning_rate": 0.00015129871935339238, "loss": 4.2731, "step": 9512 }, { "epoch": 0.986384462345035, "grad_norm": 0.80859375, "learning_rate": 0.0001512893948982809, "loss": 4.2935, "step": 9513 }, { "epoch": 0.9864881503995231, "grad_norm": 0.8203125, "learning_rate": 0.00015128006983800883, "loss": 4.2167, "step": 9514 }, { "epoch": 0.9865918384540111, "grad_norm": 0.83984375, "learning_rate": 0.00015127074417268613, "loss": 4.2594, "step": 9515 }, { "epoch": 0.9866955265084992, "grad_norm": 0.78125, "learning_rate": 0.00015126141790242282, "loss": 4.2714, "step": 9516 }, { "epoch": 0.9867992145629872, "grad_norm": 0.87109375, "learning_rate": 0.00015125209102732895, "loss": 4.2274, "step": 9517 }, { "epoch": 0.9869029026174754, "grad_norm": 0.71484375, "learning_rate": 0.0001512427635475146, "loss": 4.2527, "step": 9518 }, { "epoch": 0.9870065906719634, "grad_norm": 0.828125, "learning_rate": 0.00015123343546308984, "loss": 4.2661, "step": 9519 }, { "epoch": 0.9871102787264515, "grad_norm": 0.75390625, "learning_rate": 0.00015122410677416467, "loss": 4.2873, "step": 9520 }, { "epoch": 0.9872139667809395, "grad_norm": 0.859375, "learning_rate": 0.00015121477748084917, "loss": 4.259, "step": 9521 }, { "epoch": 0.9873176548354277, "grad_norm": 0.6953125, "learning_rate": 0.00015120544758325346, "loss": 4.2133, "step": 9522 }, { "epoch": 0.9874213428899157, "grad_norm": 0.7265625, "learning_rate": 0.0001511961170814876, "loss": 4.252, "step": 9523 }, { "epoch": 0.9875250309444038, "grad_norm": 0.72265625, "learning_rate": 0.00015118678597566163, "loss": 4.2915, "step": 9524 }, { "epoch": 0.9876287189988918, "grad_norm": 0.75, "learning_rate": 0.0001511774542658857, "loss": 4.2505, "step": 9525 }, { "epoch": 0.98773240705338, "grad_norm": 0.78515625, "learning_rate": 0.00015116812195226995, "loss": 4.2576, "step": 9526 }, { "epoch": 0.987836095107868, "grad_norm": 0.69140625, "learning_rate": 0.00015115878903492443, "loss": 4.3013, "step": 9527 }, { "epoch": 0.9879397831623561, "grad_norm": 0.72265625, "learning_rate": 0.0001511494555139593, "loss": 4.2584, "step": 9528 }, { "epoch": 0.9880434712168441, "grad_norm": 0.69140625, "learning_rate": 0.0001511401213894846, "loss": 4.2957, "step": 9529 }, { "epoch": 0.9881471592713322, "grad_norm": 0.69140625, "learning_rate": 0.00015113078666161055, "loss": 4.2904, "step": 9530 }, { "epoch": 0.9882508473258202, "grad_norm": 0.6953125, "learning_rate": 0.00015112145133044732, "loss": 4.2956, "step": 9531 }, { "epoch": 0.9883545353803084, "grad_norm": 0.71875, "learning_rate": 0.00015111211539610496, "loss": 4.2502, "step": 9532 }, { "epoch": 0.9884582234347964, "grad_norm": 0.6875, "learning_rate": 0.00015110277885869362, "loss": 4.2664, "step": 9533 }, { "epoch": 0.9885619114892845, "grad_norm": 0.796875, "learning_rate": 0.00015109344171832355, "loss": 4.2533, "step": 9534 }, { "epoch": 0.9886655995437725, "grad_norm": 0.67578125, "learning_rate": 0.00015108410397510487, "loss": 4.2616, "step": 9535 }, { "epoch": 0.9887692875982607, "grad_norm": 0.8359375, "learning_rate": 0.0001510747656291478, "loss": 4.2467, "step": 9536 }, { "epoch": 0.9888729756527487, "grad_norm": 0.65625, "learning_rate": 0.00015106542668056241, "loss": 4.2971, "step": 9537 }, { "epoch": 0.9889766637072368, "grad_norm": 0.80859375, "learning_rate": 0.00015105608712945903, "loss": 4.2468, "step": 9538 }, { "epoch": 0.9890803517617248, "grad_norm": 0.671875, "learning_rate": 0.00015104674697594776, "loss": 4.2745, "step": 9539 }, { "epoch": 0.989184039816213, "grad_norm": 0.8203125, "learning_rate": 0.00015103740622013882, "loss": 4.2288, "step": 9540 }, { "epoch": 0.989287727870701, "grad_norm": 0.625, "learning_rate": 0.0001510280648621425, "loss": 4.3209, "step": 9541 }, { "epoch": 0.9893914159251891, "grad_norm": 0.74609375, "learning_rate": 0.00015101872290206888, "loss": 4.286, "step": 9542 }, { "epoch": 0.9894951039796771, "grad_norm": 0.72265625, "learning_rate": 0.00015100938034002826, "loss": 4.2625, "step": 9543 }, { "epoch": 0.9895987920341652, "grad_norm": 0.7734375, "learning_rate": 0.00015100003717613095, "loss": 4.249, "step": 9544 }, { "epoch": 0.9897024800886532, "grad_norm": 0.70703125, "learning_rate": 0.00015099069341048703, "loss": 4.2947, "step": 9545 }, { "epoch": 0.9898061681431414, "grad_norm": 0.80859375, "learning_rate": 0.00015098134904320686, "loss": 4.2678, "step": 9546 }, { "epoch": 0.9899098561976294, "grad_norm": 0.71484375, "learning_rate": 0.00015097200407440065, "loss": 4.2481, "step": 9547 }, { "epoch": 0.9900135442521175, "grad_norm": 0.76953125, "learning_rate": 0.0001509626585041787, "loss": 4.2628, "step": 9548 }, { "epoch": 0.9901172323066055, "grad_norm": 0.7890625, "learning_rate": 0.00015095331233265123, "loss": 4.3012, "step": 9549 }, { "epoch": 0.9902209203610937, "grad_norm": 0.7890625, "learning_rate": 0.00015094396555992852, "loss": 4.2533, "step": 9550 }, { "epoch": 0.9903246084155817, "grad_norm": 0.73046875, "learning_rate": 0.00015093461818612087, "loss": 4.263, "step": 9551 }, { "epoch": 0.9904282964700698, "grad_norm": 0.7734375, "learning_rate": 0.00015092527021133857, "loss": 4.2751, "step": 9552 }, { "epoch": 0.9905319845245578, "grad_norm": 0.77734375, "learning_rate": 0.00015091592163569195, "loss": 4.2127, "step": 9553 }, { "epoch": 0.990635672579046, "grad_norm": 0.73046875, "learning_rate": 0.00015090657245929123, "loss": 4.2407, "step": 9554 }, { "epoch": 0.990739360633534, "grad_norm": 0.71484375, "learning_rate": 0.00015089722268224676, "loss": 4.247, "step": 9555 }, { "epoch": 0.9908430486880221, "grad_norm": 0.796875, "learning_rate": 0.0001508878723046689, "loss": 4.2134, "step": 9556 }, { "epoch": 0.9909467367425101, "grad_norm": 0.71875, "learning_rate": 0.0001508785213266679, "loss": 4.2577, "step": 9557 }, { "epoch": 0.9910504247969982, "grad_norm": 0.7734375, "learning_rate": 0.00015086916974835413, "loss": 4.2612, "step": 9558 }, { "epoch": 0.9911541128514864, "grad_norm": 0.71875, "learning_rate": 0.00015085981756983796, "loss": 4.259, "step": 9559 }, { "epoch": 0.9912578009059744, "grad_norm": 0.8125, "learning_rate": 0.0001508504647912297, "loss": 4.2905, "step": 9560 }, { "epoch": 0.9913614889604625, "grad_norm": 0.78515625, "learning_rate": 0.0001508411114126397, "loss": 4.2423, "step": 9561 }, { "epoch": 0.9914651770149505, "grad_norm": 0.7265625, "learning_rate": 0.00015083175743417832, "loss": 4.2527, "step": 9562 }, { "epoch": 0.9915688650694386, "grad_norm": 0.84765625, "learning_rate": 0.00015082240285595592, "loss": 4.2274, "step": 9563 }, { "epoch": 0.9916725531239267, "grad_norm": 0.8046875, "learning_rate": 0.00015081304767808294, "loss": 4.2877, "step": 9564 }, { "epoch": 0.9917762411784148, "grad_norm": 0.765625, "learning_rate": 0.00015080369190066965, "loss": 4.2442, "step": 9565 }, { "epoch": 0.9918799292329028, "grad_norm": 0.74609375, "learning_rate": 0.00015079433552382654, "loss": 4.2374, "step": 9566 }, { "epoch": 0.9919836172873909, "grad_norm": 0.75390625, "learning_rate": 0.00015078497854766393, "loss": 4.2583, "step": 9567 }, { "epoch": 0.9920873053418789, "grad_norm": 0.73828125, "learning_rate": 0.00015077562097229227, "loss": 4.2587, "step": 9568 }, { "epoch": 0.9921909933963671, "grad_norm": 0.8046875, "learning_rate": 0.00015076626279782196, "loss": 4.2521, "step": 9569 }, { "epoch": 0.9922946814508551, "grad_norm": 0.7421875, "learning_rate": 0.0001507569040243634, "loss": 4.2325, "step": 9570 }, { "epoch": 0.9923983695053432, "grad_norm": 0.77734375, "learning_rate": 0.00015074754465202702, "loss": 4.2582, "step": 9571 }, { "epoch": 0.9925020575598312, "grad_norm": 0.71875, "learning_rate": 0.00015073818468092326, "loss": 4.2995, "step": 9572 }, { "epoch": 0.9926057456143194, "grad_norm": 0.77734375, "learning_rate": 0.00015072882411116257, "loss": 4.2835, "step": 9573 }, { "epoch": 0.9927094336688074, "grad_norm": 0.78515625, "learning_rate": 0.00015071946294285536, "loss": 4.2457, "step": 9574 }, { "epoch": 0.9928131217232955, "grad_norm": 0.75390625, "learning_rate": 0.0001507101011761121, "loss": 4.3043, "step": 9575 }, { "epoch": 0.9929168097777835, "grad_norm": 0.796875, "learning_rate": 0.00015070073881104325, "loss": 4.3035, "step": 9576 }, { "epoch": 0.9930204978322716, "grad_norm": 0.7421875, "learning_rate": 0.00015069137584775933, "loss": 4.2266, "step": 9577 }, { "epoch": 0.9931241858867597, "grad_norm": 0.79296875, "learning_rate": 0.00015068201228637072, "loss": 4.2521, "step": 9578 }, { "epoch": 0.9932278739412478, "grad_norm": 0.73828125, "learning_rate": 0.0001506726481269879, "loss": 4.2694, "step": 9579 }, { "epoch": 0.9933315619957358, "grad_norm": 0.75390625, "learning_rate": 0.00015066328336972142, "loss": 4.2715, "step": 9580 }, { "epoch": 0.9934352500502239, "grad_norm": 0.76171875, "learning_rate": 0.00015065391801468176, "loss": 4.2333, "step": 9581 }, { "epoch": 0.9935389381047119, "grad_norm": 0.71875, "learning_rate": 0.00015064455206197937, "loss": 4.285, "step": 9582 }, { "epoch": 0.9936426261592001, "grad_norm": 0.796875, "learning_rate": 0.00015063518551172486, "loss": 4.2574, "step": 9583 }, { "epoch": 0.9937463142136881, "grad_norm": 0.72265625, "learning_rate": 0.00015062581836402866, "loss": 4.2694, "step": 9584 }, { "epoch": 0.9938500022681762, "grad_norm": 0.74609375, "learning_rate": 0.00015061645061900132, "loss": 4.2823, "step": 9585 }, { "epoch": 0.9939536903226642, "grad_norm": 0.7265625, "learning_rate": 0.00015060708227675337, "loss": 4.2766, "step": 9586 }, { "epoch": 0.9940573783771524, "grad_norm": 0.734375, "learning_rate": 0.0001505977133373953, "loss": 4.2578, "step": 9587 }, { "epoch": 0.9941610664316404, "grad_norm": 0.76171875, "learning_rate": 0.00015058834380103772, "loss": 4.2511, "step": 9588 }, { "epoch": 0.9942647544861285, "grad_norm": 0.73046875, "learning_rate": 0.0001505789736677912, "loss": 4.2933, "step": 9589 }, { "epoch": 0.9943684425406165, "grad_norm": 0.87109375, "learning_rate": 0.0001505696029377662, "loss": 4.248, "step": 9590 }, { "epoch": 0.9944721305951046, "grad_norm": 0.8046875, "learning_rate": 0.0001505602316110734, "loss": 4.2504, "step": 9591 }, { "epoch": 0.9945758186495927, "grad_norm": 0.78125, "learning_rate": 0.00015055085968782326, "loss": 4.2362, "step": 9592 }, { "epoch": 0.9946795067040808, "grad_norm": 0.7890625, "learning_rate": 0.00015054148716812642, "loss": 4.2888, "step": 9593 }, { "epoch": 0.9947831947585688, "grad_norm": 0.78515625, "learning_rate": 0.00015053211405209347, "loss": 4.2504, "step": 9594 }, { "epoch": 0.9948868828130569, "grad_norm": 0.75, "learning_rate": 0.00015052274033983495, "loss": 4.2843, "step": 9595 }, { "epoch": 0.9949905708675449, "grad_norm": 0.75, "learning_rate": 0.00015051336603146153, "loss": 4.277, "step": 9596 }, { "epoch": 0.9950942589220331, "grad_norm": 0.859375, "learning_rate": 0.0001505039911270838, "loss": 4.2381, "step": 9597 }, { "epoch": 0.9951979469765211, "grad_norm": 0.79296875, "learning_rate": 0.00015049461562681232, "loss": 4.2435, "step": 9598 }, { "epoch": 0.9953016350310092, "grad_norm": 0.8046875, "learning_rate": 0.00015048523953075778, "loss": 4.2282, "step": 9599 }, { "epoch": 0.9954053230854972, "grad_norm": 0.80859375, "learning_rate": 0.00015047586283903076, "loss": 4.2414, "step": 9600 }, { "epoch": 0.9955090111399854, "grad_norm": 0.75, "learning_rate": 0.00015046648555174194, "loss": 4.3018, "step": 9601 }, { "epoch": 0.9956126991944734, "grad_norm": 0.765625, "learning_rate": 0.00015045710766900194, "loss": 4.2932, "step": 9602 }, { "epoch": 0.9957163872489615, "grad_norm": 0.765625, "learning_rate": 0.00015044772919092138, "loss": 4.266, "step": 9603 }, { "epoch": 0.9958200753034496, "grad_norm": 0.828125, "learning_rate": 0.00015043835011761095, "loss": 4.3192, "step": 9604 }, { "epoch": 0.9959237633579376, "grad_norm": 0.80859375, "learning_rate": 0.0001504289704491813, "loss": 4.316, "step": 9605 }, { "epoch": 0.9960274514124258, "grad_norm": 0.84375, "learning_rate": 0.00015041959018574313, "loss": 4.2825, "step": 9606 }, { "epoch": 0.9961311394669138, "grad_norm": 0.71484375, "learning_rate": 0.00015041020932740707, "loss": 4.2815, "step": 9607 }, { "epoch": 0.9962348275214019, "grad_norm": 0.84375, "learning_rate": 0.0001504008278742838, "loss": 4.261, "step": 9608 }, { "epoch": 0.9963385155758899, "grad_norm": 0.75390625, "learning_rate": 0.0001503914458264841, "loss": 4.2402, "step": 9609 }, { "epoch": 0.996442203630378, "grad_norm": 0.84765625, "learning_rate": 0.00015038206318411854, "loss": 4.2206, "step": 9610 }, { "epoch": 0.9965458916848661, "grad_norm": 0.7890625, "learning_rate": 0.00015037267994729795, "loss": 4.295, "step": 9611 }, { "epoch": 0.9966495797393542, "grad_norm": 0.7890625, "learning_rate": 0.0001503632961161329, "loss": 4.2348, "step": 9612 }, { "epoch": 0.9967532677938422, "grad_norm": 0.75390625, "learning_rate": 0.00015035391169073426, "loss": 4.2728, "step": 9613 }, { "epoch": 0.9968569558483303, "grad_norm": 0.89453125, "learning_rate": 0.00015034452667121267, "loss": 4.2732, "step": 9614 }, { "epoch": 0.9969606439028184, "grad_norm": 0.7265625, "learning_rate": 0.0001503351410576789, "loss": 4.2078, "step": 9615 }, { "epoch": 0.9970643319573065, "grad_norm": 0.78515625, "learning_rate": 0.00015032575485024365, "loss": 4.2402, "step": 9616 }, { "epoch": 0.9971680200117945, "grad_norm": 0.68359375, "learning_rate": 0.0001503163680490177, "loss": 4.2576, "step": 9617 }, { "epoch": 0.9972717080662826, "grad_norm": 0.75390625, "learning_rate": 0.00015030698065411177, "loss": 4.2684, "step": 9618 }, { "epoch": 0.9973753961207706, "grad_norm": 0.6875, "learning_rate": 0.00015029759266563667, "loss": 4.2699, "step": 9619 }, { "epoch": 0.9974790841752588, "grad_norm": 0.75, "learning_rate": 0.00015028820408370314, "loss": 4.2519, "step": 9620 }, { "epoch": 0.9975827722297468, "grad_norm": 0.69140625, "learning_rate": 0.00015027881490842194, "loss": 4.2527, "step": 9621 }, { "epoch": 0.9976864602842349, "grad_norm": 0.70703125, "learning_rate": 0.0001502694251399039, "loss": 4.2643, "step": 9622 }, { "epoch": 0.9977901483387229, "grad_norm": 0.671875, "learning_rate": 0.00015026003477825977, "loss": 4.2781, "step": 9623 }, { "epoch": 0.997893836393211, "grad_norm": 0.703125, "learning_rate": 0.00015025064382360036, "loss": 4.2317, "step": 9624 }, { "epoch": 0.9979975244476991, "grad_norm": 0.71484375, "learning_rate": 0.00015024125227603645, "loss": 4.2581, "step": 9625 }, { "epoch": 0.9981012125021872, "grad_norm": 0.71875, "learning_rate": 0.00015023186013567887, "loss": 4.2255, "step": 9626 }, { "epoch": 0.9982049005566752, "grad_norm": 0.69921875, "learning_rate": 0.00015022246740263844, "loss": 4.225, "step": 9627 }, { "epoch": 0.9983085886111633, "grad_norm": 0.66796875, "learning_rate": 0.00015021307407702604, "loss": 4.2266, "step": 9628 }, { "epoch": 0.9984122766656514, "grad_norm": 0.69140625, "learning_rate": 0.0001502036801589524, "loss": 4.2333, "step": 9629 }, { "epoch": 0.9985159647201395, "grad_norm": 0.6328125, "learning_rate": 0.00015019428564852838, "loss": 4.248, "step": 9630 }, { "epoch": 0.9986196527746275, "grad_norm": 0.71484375, "learning_rate": 0.0001501848905458649, "loss": 4.2952, "step": 9631 }, { "epoch": 0.9987233408291156, "grad_norm": 0.72265625, "learning_rate": 0.0001501754948510727, "loss": 4.2362, "step": 9632 }, { "epoch": 0.9988270288836036, "grad_norm": 0.72265625, "learning_rate": 0.0001501660985642627, "loss": 4.2598, "step": 9633 }, { "epoch": 0.9989307169380918, "grad_norm": 0.76953125, "learning_rate": 0.0001501567016855458, "loss": 4.2798, "step": 9634 }, { "epoch": 0.9990344049925798, "grad_norm": 0.65625, "learning_rate": 0.00015014730421503286, "loss": 4.2741, "step": 9635 }, { "epoch": 0.9991380930470679, "grad_norm": 0.73828125, "learning_rate": 0.00015013790615283468, "loss": 4.2794, "step": 9636 }, { "epoch": 0.9992417811015559, "grad_norm": 0.68359375, "learning_rate": 0.00015012850749906222, "loss": 4.2997, "step": 9637 }, { "epoch": 0.999345469156044, "grad_norm": 0.8125, "learning_rate": 0.00015011910825382638, "loss": 4.2481, "step": 9638 }, { "epoch": 0.9994491572105321, "grad_norm": 0.75390625, "learning_rate": 0.00015010970841723803, "loss": 4.2476, "step": 9639 }, { "epoch": 0.9995528452650202, "grad_norm": 0.77734375, "learning_rate": 0.00015010030798940808, "loss": 4.2513, "step": 9640 }, { "epoch": 0.9996565333195082, "grad_norm": 0.72265625, "learning_rate": 0.00015009090697044747, "loss": 4.2688, "step": 9641 }, { "epoch": 0.9997602213739963, "grad_norm": 0.78125, "learning_rate": 0.0001500815053604671, "loss": 4.2505, "step": 9642 }, { "epoch": 0.9998639094284844, "grad_norm": 0.72265625, "learning_rate": 0.00015007210315957792, "loss": 4.2664, "step": 9643 }, { "epoch": 0.9999675974829725, "grad_norm": 0.71484375, "learning_rate": 0.00015006270036789084, "loss": 4.2492, "step": 9644 }, { "epoch": 0.9999675974829725, "eval_loss": 4.279594898223877, "eval_runtime": 0.4479, "eval_samples_per_second": 332.645, "eval_steps_per_second": 15.628, "step": 9644 } ], "logging_steps": 1, "max_steps": 28932, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2411, "total_flos": 1.3911233195677843e+19, "train_batch_size": 3, "trial_name": null, "trial_params": null }