{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999863865008543, "eval_steps": 500, "global_step": 36728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.4453996583011714e-05, "grad_norm": 0.6514295021273755, "learning_rate": 1.8148820326678765e-07, "loss": 12.2835, "step": 1 }, { "epoch": 0.00010890799316602343, "grad_norm": 0.6445339767412759, "learning_rate": 3.629764065335753e-07, "loss": 12.0613, "step": 2 }, { "epoch": 0.00016336198974903514, "grad_norm": 0.7426420695460909, "learning_rate": 5.44464609800363e-07, "loss": 12.4091, "step": 3 }, { "epoch": 0.00021781598633204685, "grad_norm": 0.7481904256847658, "learning_rate": 7.259528130671506e-07, "loss": 12.3499, "step": 4 }, { "epoch": 0.0002722699829150586, "grad_norm": 0.6345118387930209, "learning_rate": 9.074410163339384e-07, "loss": 12.1005, "step": 5 }, { "epoch": 0.0003267239794980703, "grad_norm": 0.7830858567681372, "learning_rate": 1.088929219600726e-06, "loss": 12.3444, "step": 6 }, { "epoch": 0.000381177976081082, "grad_norm": 0.6552438665550809, "learning_rate": 1.2704174228675138e-06, "loss": 12.0518, "step": 7 }, { "epoch": 0.0004356319726640937, "grad_norm": 1.0230629177664952, "learning_rate": 1.4519056261343012e-06, "loss": 12.2645, "step": 8 }, { "epoch": 0.0004900859692471054, "grad_norm": 0.736508733398066, "learning_rate": 1.6333938294010888e-06, "loss": 12.4657, "step": 9 }, { "epoch": 0.0005445399658301172, "grad_norm": 0.7832204882876573, "learning_rate": 1.8148820326678768e-06, "loss": 12.2456, "step": 10 }, { "epoch": 0.0005989939624131288, "grad_norm": 0.6406598213024922, "learning_rate": 1.996370235934664e-06, "loss": 12.2987, "step": 11 }, { "epoch": 0.0006534479589961406, "grad_norm": 0.6616531858585198, "learning_rate": 2.177858439201452e-06, "loss": 12.2704, "step": 12 }, { "epoch": 0.0007079019555791523, "grad_norm": 0.9041079613798669, "learning_rate": 2.35934664246824e-06, "loss": 12.219, "step": 13 }, { "epoch": 0.000762355952162164, "grad_norm": 0.7350143336611246, "learning_rate": 2.5408348457350276e-06, "loss": 12.3781, "step": 14 }, { "epoch": 0.0008168099487451757, "grad_norm": 0.650775455014501, "learning_rate": 2.722323049001815e-06, "loss": 12.3572, "step": 15 }, { "epoch": 0.0008712639453281874, "grad_norm": 0.7993019309118847, "learning_rate": 2.9038112522686024e-06, "loss": 12.2421, "step": 16 }, { "epoch": 0.0009257179419111992, "grad_norm": 0.7507798789960347, "learning_rate": 3.0852994555353906e-06, "loss": 12.3405, "step": 17 }, { "epoch": 0.000980171938494211, "grad_norm": 0.7232033569227868, "learning_rate": 3.2667876588021776e-06, "loss": 12.2115, "step": 18 }, { "epoch": 0.0010346259350772226, "grad_norm": 0.6730149594636531, "learning_rate": 3.448275862068966e-06, "loss": 12.3185, "step": 19 }, { "epoch": 0.0010890799316602344, "grad_norm": 0.7695953679018918, "learning_rate": 3.6297640653357536e-06, "loss": 12.3159, "step": 20 }, { "epoch": 0.001143533928243246, "grad_norm": 0.711349662965617, "learning_rate": 3.8112522686025406e-06, "loss": 12.2963, "step": 21 }, { "epoch": 0.0011979879248262576, "grad_norm": 0.8016325530794184, "learning_rate": 3.992740471869328e-06, "loss": 12.3665, "step": 22 }, { "epoch": 0.0012524419214092694, "grad_norm": 0.5886320970744596, "learning_rate": 4.174228675136116e-06, "loss": 12.1546, "step": 23 }, { "epoch": 0.0013068959179922811, "grad_norm": 0.6537757775197935, "learning_rate": 4.355716878402904e-06, "loss": 12.4958, "step": 24 }, { "epoch": 0.0013613499145752929, "grad_norm": 0.6751351224388435, "learning_rate": 4.537205081669692e-06, "loss": 12.3862, "step": 25 }, { "epoch": 0.0014158039111583046, "grad_norm": 0.6901807715714254, "learning_rate": 4.71869328493648e-06, "loss": 12.1915, "step": 26 }, { "epoch": 0.0014702579077413163, "grad_norm": 0.7439712686389042, "learning_rate": 4.900181488203267e-06, "loss": 12.3018, "step": 27 }, { "epoch": 0.001524711904324328, "grad_norm": 0.6966653361776312, "learning_rate": 5.081669691470055e-06, "loss": 12.2465, "step": 28 }, { "epoch": 0.0015791659009073396, "grad_norm": 0.6787479289536181, "learning_rate": 5.263157894736842e-06, "loss": 12.3024, "step": 29 }, { "epoch": 0.0016336198974903514, "grad_norm": 0.7046649743230384, "learning_rate": 5.44464609800363e-06, "loss": 12.3373, "step": 30 }, { "epoch": 0.001688073894073363, "grad_norm": 0.6384618515156677, "learning_rate": 5.626134301270418e-06, "loss": 12.2291, "step": 31 }, { "epoch": 0.0017425278906563748, "grad_norm": 0.7312209897280082, "learning_rate": 5.807622504537205e-06, "loss": 12.2218, "step": 32 }, { "epoch": 0.0017969818872393866, "grad_norm": 0.6929587260508587, "learning_rate": 5.9891107078039935e-06, "loss": 12.2801, "step": 33 }, { "epoch": 0.0018514358838223983, "grad_norm": 0.6811907398958092, "learning_rate": 6.170598911070781e-06, "loss": 12.2846, "step": 34 }, { "epoch": 0.00190588988040541, "grad_norm": 0.7114030300924737, "learning_rate": 6.352087114337568e-06, "loss": 12.2761, "step": 35 }, { "epoch": 0.001960343876988422, "grad_norm": 0.7208253804183478, "learning_rate": 6.533575317604355e-06, "loss": 12.2223, "step": 36 }, { "epoch": 0.0020147978735714335, "grad_norm": 0.7173103462219833, "learning_rate": 6.715063520871144e-06, "loss": 12.2833, "step": 37 }, { "epoch": 0.0020692518701544453, "grad_norm": 0.7199801679873419, "learning_rate": 6.896551724137932e-06, "loss": 12.4004, "step": 38 }, { "epoch": 0.002123705866737457, "grad_norm": 0.6283121742102948, "learning_rate": 7.078039927404719e-06, "loss": 12.2422, "step": 39 }, { "epoch": 0.0021781598633204688, "grad_norm": 0.6438610301627259, "learning_rate": 7.259528130671507e-06, "loss": 12.2721, "step": 40 }, { "epoch": 0.0022326138599034805, "grad_norm": 0.673710537418366, "learning_rate": 7.441016333938294e-06, "loss": 12.2338, "step": 41 }, { "epoch": 0.002287067856486492, "grad_norm": 0.7046804607495716, "learning_rate": 7.622504537205081e-06, "loss": 12.3124, "step": 42 }, { "epoch": 0.0023415218530695035, "grad_norm": 0.7040526020482769, "learning_rate": 7.80399274047187e-06, "loss": 12.2752, "step": 43 }, { "epoch": 0.0023959758496525153, "grad_norm": 0.7306043247419787, "learning_rate": 7.985480943738657e-06, "loss": 12.4015, "step": 44 }, { "epoch": 0.002450429846235527, "grad_norm": 0.664490461781453, "learning_rate": 8.166969147005445e-06, "loss": 12.3169, "step": 45 }, { "epoch": 0.0025048838428185388, "grad_norm": 0.6548692389195887, "learning_rate": 8.348457350272232e-06, "loss": 12.2979, "step": 46 }, { "epoch": 0.0025593378394015505, "grad_norm": 0.7790840513869893, "learning_rate": 8.52994555353902e-06, "loss": 12.3873, "step": 47 }, { "epoch": 0.0026137918359845622, "grad_norm": 0.700770109715519, "learning_rate": 8.711433756805808e-06, "loss": 12.2902, "step": 48 }, { "epoch": 0.002668245832567574, "grad_norm": 0.7284900184817878, "learning_rate": 8.892921960072596e-06, "loss": 12.2719, "step": 49 }, { "epoch": 0.0027226998291505857, "grad_norm": 0.8332734018054926, "learning_rate": 9.074410163339384e-06, "loss": 12.2799, "step": 50 }, { "epoch": 0.0027771538257335975, "grad_norm": 0.6965636980670914, "learning_rate": 9.255898366606171e-06, "loss": 12.2629, "step": 51 }, { "epoch": 0.002831607822316609, "grad_norm": 0.6783391272444385, "learning_rate": 9.43738656987296e-06, "loss": 12.1928, "step": 52 }, { "epoch": 0.002886061818899621, "grad_norm": 0.6768160560169009, "learning_rate": 9.618874773139747e-06, "loss": 12.2738, "step": 53 }, { "epoch": 0.0029405158154826327, "grad_norm": 0.7038244532929006, "learning_rate": 9.800362976406533e-06, "loss": 12.2378, "step": 54 }, { "epoch": 0.0029949698120656444, "grad_norm": 0.6932385380100631, "learning_rate": 9.981851179673321e-06, "loss": 12.3725, "step": 55 }, { "epoch": 0.003049423808648656, "grad_norm": 0.6915084120243854, "learning_rate": 1.016333938294011e-05, "loss": 12.2021, "step": 56 }, { "epoch": 0.003103877805231668, "grad_norm": 0.7148909867057305, "learning_rate": 1.0344827586206897e-05, "loss": 12.3215, "step": 57 }, { "epoch": 0.0031583318018146792, "grad_norm": 0.6991958091345991, "learning_rate": 1.0526315789473684e-05, "loss": 12.2382, "step": 58 }, { "epoch": 0.003212785798397691, "grad_norm": 0.6919246914121425, "learning_rate": 1.0707803992740472e-05, "loss": 12.183, "step": 59 }, { "epoch": 0.0032672397949807027, "grad_norm": 0.8025321181544712, "learning_rate": 1.088929219600726e-05, "loss": 12.4441, "step": 60 }, { "epoch": 0.0033216937915637144, "grad_norm": 0.6698039631860974, "learning_rate": 1.1070780399274048e-05, "loss": 12.2611, "step": 61 }, { "epoch": 0.003376147788146726, "grad_norm": 0.7407394943261039, "learning_rate": 1.1252268602540836e-05, "loss": 12.2053, "step": 62 }, { "epoch": 0.003430601784729738, "grad_norm": 0.6865613288316597, "learning_rate": 1.1433756805807623e-05, "loss": 12.3175, "step": 63 }, { "epoch": 0.0034850557813127497, "grad_norm": 0.7041064521893132, "learning_rate": 1.161524500907441e-05, "loss": 12.2321, "step": 64 }, { "epoch": 0.0035395097778957614, "grad_norm": 0.6723425810734814, "learning_rate": 1.1796733212341199e-05, "loss": 12.2558, "step": 65 }, { "epoch": 0.003593963774478773, "grad_norm": 0.661956982839324, "learning_rate": 1.1978221415607987e-05, "loss": 12.2064, "step": 66 }, { "epoch": 0.003648417771061785, "grad_norm": 0.7483218714408623, "learning_rate": 1.2159709618874773e-05, "loss": 12.3637, "step": 67 }, { "epoch": 0.0037028717676447966, "grad_norm": 0.6719289161767829, "learning_rate": 1.2341197822141563e-05, "loss": 12.2866, "step": 68 }, { "epoch": 0.0037573257642278084, "grad_norm": 0.7353482196298523, "learning_rate": 1.2522686025408347e-05, "loss": 12.3733, "step": 69 }, { "epoch": 0.00381177976081082, "grad_norm": 0.7121839832467598, "learning_rate": 1.2704174228675136e-05, "loss": 12.3107, "step": 70 }, { "epoch": 0.003866233757393832, "grad_norm": 0.7298666561899594, "learning_rate": 1.2885662431941924e-05, "loss": 12.2409, "step": 71 }, { "epoch": 0.003920687753976844, "grad_norm": 0.673187183618948, "learning_rate": 1.306715063520871e-05, "loss": 12.2624, "step": 72 }, { "epoch": 0.003975141750559855, "grad_norm": 0.6647579362296864, "learning_rate": 1.32486388384755e-05, "loss": 12.2228, "step": 73 }, { "epoch": 0.004029595747142867, "grad_norm": 0.7126085308286554, "learning_rate": 1.3430127041742288e-05, "loss": 12.1887, "step": 74 }, { "epoch": 0.004084049743725879, "grad_norm": 0.6508629608031615, "learning_rate": 1.3611615245009074e-05, "loss": 12.2574, "step": 75 }, { "epoch": 0.0041385037403088906, "grad_norm": 0.7217793347228008, "learning_rate": 1.3793103448275863e-05, "loss": 12.3596, "step": 76 }, { "epoch": 0.004192957736891902, "grad_norm": 0.7277886621255288, "learning_rate": 1.3974591651542651e-05, "loss": 12.1593, "step": 77 }, { "epoch": 0.004247411733474914, "grad_norm": 0.7476740623784802, "learning_rate": 1.4156079854809437e-05, "loss": 12.2851, "step": 78 }, { "epoch": 0.004301865730057926, "grad_norm": 0.670232339117414, "learning_rate": 1.4337568058076225e-05, "loss": 12.2627, "step": 79 }, { "epoch": 0.0043563197266409375, "grad_norm": 0.6467570917998483, "learning_rate": 1.4519056261343015e-05, "loss": 12.2336, "step": 80 }, { "epoch": 0.004410773723223949, "grad_norm": 0.7120208830515039, "learning_rate": 1.47005444646098e-05, "loss": 12.357, "step": 81 }, { "epoch": 0.004465227719806961, "grad_norm": 0.7432640467149172, "learning_rate": 1.4882032667876588e-05, "loss": 12.3145, "step": 82 }, { "epoch": 0.004519681716389972, "grad_norm": 0.8293675536060945, "learning_rate": 1.5063520871143378e-05, "loss": 12.6006, "step": 83 }, { "epoch": 0.004574135712972984, "grad_norm": 0.7120304671169275, "learning_rate": 1.5245009074410162e-05, "loss": 12.3627, "step": 84 }, { "epoch": 0.004628589709555995, "grad_norm": 0.6673914704270645, "learning_rate": 1.542649727767695e-05, "loss": 12.2417, "step": 85 }, { "epoch": 0.004683043706139007, "grad_norm": 0.6868912483040882, "learning_rate": 1.560798548094374e-05, "loss": 12.2832, "step": 86 }, { "epoch": 0.004737497702722019, "grad_norm": 0.6950953167328401, "learning_rate": 1.5789473684210526e-05, "loss": 12.2372, "step": 87 }, { "epoch": 0.004791951699305031, "grad_norm": 0.7383575421215864, "learning_rate": 1.5970961887477314e-05, "loss": 12.2887, "step": 88 }, { "epoch": 0.004846405695888042, "grad_norm": 0.703029378935624, "learning_rate": 1.6152450090744105e-05, "loss": 12.3149, "step": 89 }, { "epoch": 0.004900859692471054, "grad_norm": 0.7021279833401868, "learning_rate": 1.633393829401089e-05, "loss": 12.2678, "step": 90 }, { "epoch": 0.004955313689054066, "grad_norm": 0.7251509526578306, "learning_rate": 1.6515426497277677e-05, "loss": 12.3562, "step": 91 }, { "epoch": 0.0050097676856370775, "grad_norm": 0.7797560038561253, "learning_rate": 1.6696914700544465e-05, "loss": 12.276, "step": 92 }, { "epoch": 0.005064221682220089, "grad_norm": 0.7064592922388186, "learning_rate": 1.6878402903811253e-05, "loss": 12.3852, "step": 93 }, { "epoch": 0.005118675678803101, "grad_norm": 0.7053365174441039, "learning_rate": 1.705989110707804e-05, "loss": 12.3483, "step": 94 }, { "epoch": 0.005173129675386113, "grad_norm": 0.6738625360373299, "learning_rate": 1.7241379310344828e-05, "loss": 12.1678, "step": 95 }, { "epoch": 0.0052275836719691245, "grad_norm": 0.5972838096238351, "learning_rate": 1.7422867513611616e-05, "loss": 12.1698, "step": 96 }, { "epoch": 0.005282037668552136, "grad_norm": 0.694605357226162, "learning_rate": 1.7604355716878404e-05, "loss": 12.2028, "step": 97 }, { "epoch": 0.005336491665135148, "grad_norm": 0.7235709308856128, "learning_rate": 1.7785843920145192e-05, "loss": 12.3249, "step": 98 }, { "epoch": 0.00539094566171816, "grad_norm": 0.7116272658253867, "learning_rate": 1.796733212341198e-05, "loss": 12.1328, "step": 99 }, { "epoch": 0.0054453996583011715, "grad_norm": 0.7960311740307418, "learning_rate": 1.8148820326678767e-05, "loss": 12.4241, "step": 100 }, { "epoch": 0.005499853654884183, "grad_norm": 0.7607101112707406, "learning_rate": 1.8330308529945555e-05, "loss": 12.1386, "step": 101 }, { "epoch": 0.005554307651467195, "grad_norm": 0.6989823266122064, "learning_rate": 1.8511796733212343e-05, "loss": 12.3272, "step": 102 }, { "epoch": 0.005608761648050207, "grad_norm": 0.6771018070968361, "learning_rate": 1.869328493647913e-05, "loss": 12.2864, "step": 103 }, { "epoch": 0.005663215644633218, "grad_norm": 0.6776164869457959, "learning_rate": 1.887477313974592e-05, "loss": 12.2483, "step": 104 }, { "epoch": 0.00571766964121623, "grad_norm": 0.7161127268232864, "learning_rate": 1.9056261343012703e-05, "loss": 12.1731, "step": 105 }, { "epoch": 0.005772123637799242, "grad_norm": 0.7248124927863123, "learning_rate": 1.9237749546279494e-05, "loss": 12.4591, "step": 106 }, { "epoch": 0.005826577634382254, "grad_norm": 0.7591292383102465, "learning_rate": 1.941923774954628e-05, "loss": 12.1536, "step": 107 }, { "epoch": 0.005881031630965265, "grad_norm": 0.6567533407138826, "learning_rate": 1.9600725952813066e-05, "loss": 12.2191, "step": 108 }, { "epoch": 0.005935485627548277, "grad_norm": 0.7190643868028403, "learning_rate": 1.9782214156079858e-05, "loss": 12.2527, "step": 109 }, { "epoch": 0.005989939624131289, "grad_norm": 0.6898991311631406, "learning_rate": 1.9963702359346642e-05, "loss": 12.2642, "step": 110 }, { "epoch": 0.006044393620714301, "grad_norm": 0.7495187164171233, "learning_rate": 2.014519056261343e-05, "loss": 12.2646, "step": 111 }, { "epoch": 0.006098847617297312, "grad_norm": 0.7170076277469752, "learning_rate": 2.032667876588022e-05, "loss": 12.3398, "step": 112 }, { "epoch": 0.006153301613880324, "grad_norm": 0.731108164640672, "learning_rate": 2.0508166969147005e-05, "loss": 12.3531, "step": 113 }, { "epoch": 0.006207755610463336, "grad_norm": 0.7825773788282819, "learning_rate": 2.0689655172413793e-05, "loss": 12.2599, "step": 114 }, { "epoch": 0.006262209607046348, "grad_norm": 0.6717004811664388, "learning_rate": 2.087114337568058e-05, "loss": 12.3298, "step": 115 }, { "epoch": 0.0063166636036293584, "grad_norm": 0.6969969828201115, "learning_rate": 2.105263157894737e-05, "loss": 12.2737, "step": 116 }, { "epoch": 0.00637111760021237, "grad_norm": 0.7242188258016712, "learning_rate": 2.1234119782214157e-05, "loss": 12.3695, "step": 117 }, { "epoch": 0.006425571596795382, "grad_norm": 0.8811248563175815, "learning_rate": 2.1415607985480945e-05, "loss": 12.3923, "step": 118 }, { "epoch": 0.006480025593378394, "grad_norm": 0.6943426432171022, "learning_rate": 2.1597096188747732e-05, "loss": 12.2724, "step": 119 }, { "epoch": 0.006534479589961405, "grad_norm": 0.8367672147901168, "learning_rate": 2.177858439201452e-05, "loss": 12.2846, "step": 120 }, { "epoch": 0.006588933586544417, "grad_norm": 0.7169323963392524, "learning_rate": 2.1960072595281308e-05, "loss": 12.3054, "step": 121 }, { "epoch": 0.006643387583127429, "grad_norm": 0.7328339351641384, "learning_rate": 2.2141560798548096e-05, "loss": 12.3401, "step": 122 }, { "epoch": 0.006697841579710441, "grad_norm": 0.6950351401076931, "learning_rate": 2.2323049001814884e-05, "loss": 12.2651, "step": 123 }, { "epoch": 0.006752295576293452, "grad_norm": 0.7135461876331347, "learning_rate": 2.250453720508167e-05, "loss": 12.3066, "step": 124 }, { "epoch": 0.006806749572876464, "grad_norm": 0.6740158526405283, "learning_rate": 2.2686025408348456e-05, "loss": 12.2421, "step": 125 }, { "epoch": 0.006861203569459476, "grad_norm": 0.7511554883846614, "learning_rate": 2.2867513611615247e-05, "loss": 12.2741, "step": 126 }, { "epoch": 0.006915657566042488, "grad_norm": 0.6990531199482589, "learning_rate": 2.3049001814882035e-05, "loss": 12.384, "step": 127 }, { "epoch": 0.006970111562625499, "grad_norm": 0.7135501305238853, "learning_rate": 2.323049001814882e-05, "loss": 12.3623, "step": 128 }, { "epoch": 0.007024565559208511, "grad_norm": 0.7041623355500041, "learning_rate": 2.341197822141561e-05, "loss": 12.3004, "step": 129 }, { "epoch": 0.007079019555791523, "grad_norm": 0.6661526549812437, "learning_rate": 2.3593466424682398e-05, "loss": 12.1402, "step": 130 }, { "epoch": 0.0071334735523745346, "grad_norm": 0.7361476840420841, "learning_rate": 2.3774954627949183e-05, "loss": 12.1733, "step": 131 }, { "epoch": 0.007187927548957546, "grad_norm": 0.7967543207030372, "learning_rate": 2.3956442831215974e-05, "loss": 12.4345, "step": 132 }, { "epoch": 0.007242381545540558, "grad_norm": 0.77469629112749, "learning_rate": 2.413793103448276e-05, "loss": 12.3164, "step": 133 }, { "epoch": 0.00729683554212357, "grad_norm": 0.8176680285748424, "learning_rate": 2.4319419237749546e-05, "loss": 12.3651, "step": 134 }, { "epoch": 0.0073512895387065815, "grad_norm": 0.6969513054321705, "learning_rate": 2.4500907441016334e-05, "loss": 12.3023, "step": 135 }, { "epoch": 0.007405743535289593, "grad_norm": 0.6874259890342583, "learning_rate": 2.4682395644283125e-05, "loss": 12.3488, "step": 136 }, { "epoch": 0.007460197531872605, "grad_norm": 0.7848881195123376, "learning_rate": 2.486388384754991e-05, "loss": 12.3248, "step": 137 }, { "epoch": 0.007514651528455617, "grad_norm": 0.7536572549155603, "learning_rate": 2.5045372050816694e-05, "loss": 12.3101, "step": 138 }, { "epoch": 0.0075691055250386285, "grad_norm": 0.6779931950396582, "learning_rate": 2.5226860254083485e-05, "loss": 12.21, "step": 139 }, { "epoch": 0.00762355952162164, "grad_norm": 0.7551152796581928, "learning_rate": 2.5408348457350273e-05, "loss": 12.4042, "step": 140 }, { "epoch": 0.007678013518204652, "grad_norm": 0.9069979314667508, "learning_rate": 2.558983666061706e-05, "loss": 12.2847, "step": 141 }, { "epoch": 0.007732467514787664, "grad_norm": 0.7202987526526302, "learning_rate": 2.577132486388385e-05, "loss": 12.363, "step": 142 }, { "epoch": 0.0077869215113706754, "grad_norm": 0.7524333057239485, "learning_rate": 2.595281306715064e-05, "loss": 12.3406, "step": 143 }, { "epoch": 0.007841375507953687, "grad_norm": 0.7164888079373964, "learning_rate": 2.613430127041742e-05, "loss": 12.3726, "step": 144 }, { "epoch": 0.007895829504536699, "grad_norm": 0.7311551787417221, "learning_rate": 2.6315789473684212e-05, "loss": 12.2858, "step": 145 }, { "epoch": 0.00795028350111971, "grad_norm": 0.6773134174743295, "learning_rate": 2.6497277676951e-05, "loss": 12.3468, "step": 146 }, { "epoch": 0.008004737497702722, "grad_norm": 0.7174401575961286, "learning_rate": 2.6678765880217788e-05, "loss": 12.2457, "step": 147 }, { "epoch": 0.008059191494285734, "grad_norm": 0.6863666889751157, "learning_rate": 2.6860254083484575e-05, "loss": 12.2749, "step": 148 }, { "epoch": 0.008113645490868746, "grad_norm": 0.7262950182241283, "learning_rate": 2.7041742286751363e-05, "loss": 12.2609, "step": 149 }, { "epoch": 0.008168099487451758, "grad_norm": 0.8015975463544986, "learning_rate": 2.7223230490018148e-05, "loss": 12.2737, "step": 150 }, { "epoch": 0.00822255348403477, "grad_norm": 0.6974688735570929, "learning_rate": 2.7404718693284935e-05, "loss": 12.2764, "step": 151 }, { "epoch": 0.008277007480617781, "grad_norm": 0.7612859811519558, "learning_rate": 2.7586206896551727e-05, "loss": 12.3173, "step": 152 }, { "epoch": 0.008331461477200793, "grad_norm": 0.7535272542092525, "learning_rate": 2.7767695099818514e-05, "loss": 12.2599, "step": 153 }, { "epoch": 0.008385915473783805, "grad_norm": 0.8024979560171636, "learning_rate": 2.7949183303085302e-05, "loss": 12.3722, "step": 154 }, { "epoch": 0.008440369470366816, "grad_norm": 0.7531576898161162, "learning_rate": 2.813067150635209e-05, "loss": 12.3065, "step": 155 }, { "epoch": 0.008494823466949828, "grad_norm": 0.7575320428987756, "learning_rate": 2.8312159709618874e-05, "loss": 12.356, "step": 156 }, { "epoch": 0.00854927746353284, "grad_norm": 0.6921716231479544, "learning_rate": 2.8493647912885662e-05, "loss": 12.2568, "step": 157 }, { "epoch": 0.008603731460115852, "grad_norm": 0.6867771987616564, "learning_rate": 2.867513611615245e-05, "loss": 12.2139, "step": 158 }, { "epoch": 0.008658185456698863, "grad_norm": 0.7022464961805333, "learning_rate": 2.885662431941924e-05, "loss": 12.3242, "step": 159 }, { "epoch": 0.008712639453281875, "grad_norm": 0.7220153309753276, "learning_rate": 2.903811252268603e-05, "loss": 12.3137, "step": 160 }, { "epoch": 0.008767093449864887, "grad_norm": 0.7116207468224576, "learning_rate": 2.9219600725952817e-05, "loss": 12.4212, "step": 161 }, { "epoch": 0.008821547446447899, "grad_norm": 0.7606595881201347, "learning_rate": 2.94010889292196e-05, "loss": 12.3533, "step": 162 }, { "epoch": 0.00887600144303091, "grad_norm": 0.8010136363377759, "learning_rate": 2.958257713248639e-05, "loss": 12.384, "step": 163 }, { "epoch": 0.008930455439613922, "grad_norm": 0.7641948223788549, "learning_rate": 2.9764065335753177e-05, "loss": 12.2878, "step": 164 }, { "epoch": 0.008984909436196934, "grad_norm": 0.7313238653805536, "learning_rate": 2.9945553539019965e-05, "loss": 12.2797, "step": 165 }, { "epoch": 0.009039363432779944, "grad_norm": 0.7280246174606982, "learning_rate": 3.0127041742286756e-05, "loss": 12.2048, "step": 166 }, { "epoch": 0.009093817429362955, "grad_norm": 0.6735111329183364, "learning_rate": 3.0308529945553544e-05, "loss": 12.1298, "step": 167 }, { "epoch": 0.009148271425945967, "grad_norm": 0.662515268105397, "learning_rate": 3.0490018148820325e-05, "loss": 12.2694, "step": 168 }, { "epoch": 0.009202725422528979, "grad_norm": 0.6814067150138639, "learning_rate": 3.0671506352087116e-05, "loss": 12.3329, "step": 169 }, { "epoch": 0.00925717941911199, "grad_norm": 0.6299645547657693, "learning_rate": 3.08529945553539e-05, "loss": 12.2323, "step": 170 }, { "epoch": 0.009311633415695002, "grad_norm": 0.7046415195233939, "learning_rate": 3.103448275862069e-05, "loss": 12.1888, "step": 171 }, { "epoch": 0.009366087412278014, "grad_norm": 0.6635786550238101, "learning_rate": 3.121597096188748e-05, "loss": 12.3145, "step": 172 }, { "epoch": 0.009420541408861026, "grad_norm": 0.8405662525571916, "learning_rate": 3.139745916515426e-05, "loss": 12.3302, "step": 173 }, { "epoch": 0.009474995405444038, "grad_norm": 0.7053223200658312, "learning_rate": 3.157894736842105e-05, "loss": 12.2905, "step": 174 }, { "epoch": 0.00952944940202705, "grad_norm": 0.7179732734291909, "learning_rate": 3.176043557168784e-05, "loss": 12.3143, "step": 175 }, { "epoch": 0.009583903398610061, "grad_norm": 0.6713806463041468, "learning_rate": 3.194192377495463e-05, "loss": 12.2889, "step": 176 }, { "epoch": 0.009638357395193073, "grad_norm": 0.8537429522165395, "learning_rate": 3.212341197822142e-05, "loss": 12.3003, "step": 177 }, { "epoch": 0.009692811391776085, "grad_norm": 0.7936643802651663, "learning_rate": 3.230490018148821e-05, "loss": 12.4024, "step": 178 }, { "epoch": 0.009747265388359096, "grad_norm": 0.6673211953219038, "learning_rate": 3.248638838475499e-05, "loss": 12.3591, "step": 179 }, { "epoch": 0.009801719384942108, "grad_norm": 0.7664690630438143, "learning_rate": 3.266787658802178e-05, "loss": 12.2, "step": 180 }, { "epoch": 0.00985617338152512, "grad_norm": 0.7602742645809355, "learning_rate": 3.284936479128857e-05, "loss": 12.2861, "step": 181 }, { "epoch": 0.009910627378108132, "grad_norm": 0.7223425709471121, "learning_rate": 3.3030852994555354e-05, "loss": 12.3773, "step": 182 }, { "epoch": 0.009965081374691143, "grad_norm": 0.7272094124639393, "learning_rate": 3.3212341197822145e-05, "loss": 12.2859, "step": 183 }, { "epoch": 0.010019535371274155, "grad_norm": 0.7226902937341132, "learning_rate": 3.339382940108893e-05, "loss": 12.3335, "step": 184 }, { "epoch": 0.010073989367857167, "grad_norm": 0.7069144564518154, "learning_rate": 3.3575317604355714e-05, "loss": 12.3361, "step": 185 }, { "epoch": 0.010128443364440179, "grad_norm": 0.7351369937399913, "learning_rate": 3.3756805807622505e-05, "loss": 12.3304, "step": 186 }, { "epoch": 0.01018289736102319, "grad_norm": 0.7562449223296611, "learning_rate": 3.3938294010889297e-05, "loss": 12.4463, "step": 187 }, { "epoch": 0.010237351357606202, "grad_norm": 0.7380133543278365, "learning_rate": 3.411978221415608e-05, "loss": 12.3487, "step": 188 }, { "epoch": 0.010291805354189214, "grad_norm": 0.8018559952427972, "learning_rate": 3.430127041742287e-05, "loss": 12.4735, "step": 189 }, { "epoch": 0.010346259350772226, "grad_norm": 0.6992645273661477, "learning_rate": 3.4482758620689657e-05, "loss": 12.2485, "step": 190 }, { "epoch": 0.010400713347355237, "grad_norm": 0.7190265436442772, "learning_rate": 3.466424682395644e-05, "loss": 12.2231, "step": 191 }, { "epoch": 0.010455167343938249, "grad_norm": 0.7728396372356257, "learning_rate": 3.484573502722323e-05, "loss": 12.4133, "step": 192 }, { "epoch": 0.01050962134052126, "grad_norm": 0.8039774543145355, "learning_rate": 3.502722323049002e-05, "loss": 12.4807, "step": 193 }, { "epoch": 0.010564075337104272, "grad_norm": 0.7275544168870578, "learning_rate": 3.520871143375681e-05, "loss": 12.3294, "step": 194 }, { "epoch": 0.010618529333687284, "grad_norm": 0.8566875819319059, "learning_rate": 3.53901996370236e-05, "loss": 12.4185, "step": 195 }, { "epoch": 0.010672983330270296, "grad_norm": 0.8055650218978196, "learning_rate": 3.5571687840290383e-05, "loss": 12.5118, "step": 196 }, { "epoch": 0.010727437326853308, "grad_norm": 0.7349187170227696, "learning_rate": 3.575317604355717e-05, "loss": 12.2468, "step": 197 }, { "epoch": 0.01078189132343632, "grad_norm": 0.7134840505631237, "learning_rate": 3.593466424682396e-05, "loss": 12.388, "step": 198 }, { "epoch": 0.010836345320019331, "grad_norm": 0.6774397050532335, "learning_rate": 3.6116152450090743e-05, "loss": 12.2235, "step": 199 }, { "epoch": 0.010890799316602343, "grad_norm": 0.7744176715025664, "learning_rate": 3.6297640653357535e-05, "loss": 12.293, "step": 200 }, { "epoch": 0.010945253313185355, "grad_norm": 0.7348144637670879, "learning_rate": 3.6479128856624326e-05, "loss": 12.3172, "step": 201 }, { "epoch": 0.010999707309768366, "grad_norm": 0.6968931217183741, "learning_rate": 3.666061705989111e-05, "loss": 12.1999, "step": 202 }, { "epoch": 0.011054161306351378, "grad_norm": 0.847067144995256, "learning_rate": 3.6842105263157895e-05, "loss": 12.2706, "step": 203 }, { "epoch": 0.01110861530293439, "grad_norm": 0.7530363546527266, "learning_rate": 3.7023593466424686e-05, "loss": 12.2301, "step": 204 }, { "epoch": 0.011163069299517402, "grad_norm": 0.7564652633064927, "learning_rate": 3.720508166969147e-05, "loss": 12.3639, "step": 205 }, { "epoch": 0.011217523296100413, "grad_norm": 0.7135173235708773, "learning_rate": 3.738656987295826e-05, "loss": 12.2338, "step": 206 }, { "epoch": 0.011271977292683425, "grad_norm": 0.6858025792749569, "learning_rate": 3.7568058076225046e-05, "loss": 12.3793, "step": 207 }, { "epoch": 0.011326431289266437, "grad_norm": 0.7824021027165805, "learning_rate": 3.774954627949184e-05, "loss": 12.339, "step": 208 }, { "epoch": 0.011380885285849449, "grad_norm": 0.7250247555230146, "learning_rate": 3.793103448275862e-05, "loss": 12.2351, "step": 209 }, { "epoch": 0.01143533928243246, "grad_norm": 0.6809803088998201, "learning_rate": 3.8112522686025406e-05, "loss": 12.4248, "step": 210 }, { "epoch": 0.011489793279015472, "grad_norm": 0.7032058028279533, "learning_rate": 3.82940108892922e-05, "loss": 12.3732, "step": 211 }, { "epoch": 0.011544247275598484, "grad_norm": 0.754428880383836, "learning_rate": 3.847549909255899e-05, "loss": 12.3197, "step": 212 }, { "epoch": 0.011598701272181496, "grad_norm": 0.7504424399849499, "learning_rate": 3.865698729582577e-05, "loss": 12.3846, "step": 213 }, { "epoch": 0.011653155268764507, "grad_norm": 0.8098844516869852, "learning_rate": 3.883847549909256e-05, "loss": 12.4009, "step": 214 }, { "epoch": 0.011707609265347519, "grad_norm": 0.7434306698270551, "learning_rate": 3.901996370235935e-05, "loss": 12.3121, "step": 215 }, { "epoch": 0.01176206326193053, "grad_norm": 0.835756221038369, "learning_rate": 3.920145190562613e-05, "loss": 12.3561, "step": 216 }, { "epoch": 0.011816517258513543, "grad_norm": 0.7441859041127364, "learning_rate": 3.9382940108892924e-05, "loss": 12.3837, "step": 217 }, { "epoch": 0.011870971255096554, "grad_norm": 0.6774272997258979, "learning_rate": 3.9564428312159715e-05, "loss": 12.321, "step": 218 }, { "epoch": 0.011925425251679566, "grad_norm": 0.740858626909941, "learning_rate": 3.97459165154265e-05, "loss": 12.3377, "step": 219 }, { "epoch": 0.011979879248262578, "grad_norm": 0.8029304956373081, "learning_rate": 3.9927404718693284e-05, "loss": 12.2668, "step": 220 }, { "epoch": 0.01203433324484559, "grad_norm": 0.7255710443809265, "learning_rate": 4.0108892921960075e-05, "loss": 12.2052, "step": 221 }, { "epoch": 0.012088787241428601, "grad_norm": 0.7467564299377789, "learning_rate": 4.029038112522686e-05, "loss": 12.3052, "step": 222 }, { "epoch": 0.012143241238011613, "grad_norm": 0.7757543921998024, "learning_rate": 4.047186932849365e-05, "loss": 12.3313, "step": 223 }, { "epoch": 0.012197695234594625, "grad_norm": 0.7663879408559824, "learning_rate": 4.065335753176044e-05, "loss": 12.3567, "step": 224 }, { "epoch": 0.012252149231177636, "grad_norm": 0.830893424480459, "learning_rate": 4.0834845735027227e-05, "loss": 12.2888, "step": 225 }, { "epoch": 0.012306603227760648, "grad_norm": 0.7659576414170449, "learning_rate": 4.101633393829401e-05, "loss": 12.2534, "step": 226 }, { "epoch": 0.01236105722434366, "grad_norm": 0.8240503874061232, "learning_rate": 4.11978221415608e-05, "loss": 12.2796, "step": 227 }, { "epoch": 0.012415511220926672, "grad_norm": 0.8055779272980373, "learning_rate": 4.1379310344827587e-05, "loss": 12.3734, "step": 228 }, { "epoch": 0.012469965217509683, "grad_norm": 0.6990309940422604, "learning_rate": 4.156079854809438e-05, "loss": 12.2448, "step": 229 }, { "epoch": 0.012524419214092695, "grad_norm": 0.7814896272818583, "learning_rate": 4.174228675136116e-05, "loss": 12.3718, "step": 230 }, { "epoch": 0.012578873210675707, "grad_norm": 0.6456843025285693, "learning_rate": 4.192377495462795e-05, "loss": 12.2105, "step": 231 }, { "epoch": 0.012633327207258717, "grad_norm": 0.7374802126161342, "learning_rate": 4.210526315789474e-05, "loss": 12.3398, "step": 232 }, { "epoch": 0.012687781203841729, "grad_norm": 0.720484469540933, "learning_rate": 4.228675136116152e-05, "loss": 12.3939, "step": 233 }, { "epoch": 0.01274223520042474, "grad_norm": 0.7966943666753264, "learning_rate": 4.2468239564428313e-05, "loss": 12.41, "step": 234 }, { "epoch": 0.012796689197007752, "grad_norm": 0.6898793732291435, "learning_rate": 4.2649727767695105e-05, "loss": 12.1404, "step": 235 }, { "epoch": 0.012851143193590764, "grad_norm": 0.7640340826855201, "learning_rate": 4.283121597096189e-05, "loss": 12.2975, "step": 236 }, { "epoch": 0.012905597190173776, "grad_norm": 0.7512000166482308, "learning_rate": 4.301270417422868e-05, "loss": 12.483, "step": 237 }, { "epoch": 0.012960051186756787, "grad_norm": 0.7378596174200351, "learning_rate": 4.3194192377495465e-05, "loss": 12.3202, "step": 238 }, { "epoch": 0.013014505183339799, "grad_norm": 0.813840093552659, "learning_rate": 4.337568058076225e-05, "loss": 12.3155, "step": 239 }, { "epoch": 0.01306895917992281, "grad_norm": 0.7077202129525905, "learning_rate": 4.355716878402904e-05, "loss": 12.331, "step": 240 }, { "epoch": 0.013123413176505823, "grad_norm": 0.7432924329815834, "learning_rate": 4.373865698729583e-05, "loss": 12.4413, "step": 241 }, { "epoch": 0.013177867173088834, "grad_norm": 0.8087370274823864, "learning_rate": 4.3920145190562616e-05, "loss": 12.352, "step": 242 }, { "epoch": 0.013232321169671846, "grad_norm": 0.787315390919198, "learning_rate": 4.410163339382941e-05, "loss": 12.3767, "step": 243 }, { "epoch": 0.013286775166254858, "grad_norm": 0.7186308136487785, "learning_rate": 4.428312159709619e-05, "loss": 12.3972, "step": 244 }, { "epoch": 0.01334122916283787, "grad_norm": 0.6962881361865607, "learning_rate": 4.4464609800362976e-05, "loss": 12.2565, "step": 245 }, { "epoch": 0.013395683159420881, "grad_norm": 0.6962482325758493, "learning_rate": 4.464609800362977e-05, "loss": 12.2827, "step": 246 }, { "epoch": 0.013450137156003893, "grad_norm": 0.8134645016161036, "learning_rate": 4.482758620689655e-05, "loss": 12.3889, "step": 247 }, { "epoch": 0.013504591152586905, "grad_norm": 0.7305915380958484, "learning_rate": 4.500907441016334e-05, "loss": 12.3915, "step": 248 }, { "epoch": 0.013559045149169916, "grad_norm": 0.7618719998357277, "learning_rate": 4.5190562613430134e-05, "loss": 12.3237, "step": 249 }, { "epoch": 0.013613499145752928, "grad_norm": 0.7662374563616653, "learning_rate": 4.537205081669691e-05, "loss": 12.3536, "step": 250 }, { "epoch": 0.01366795314233594, "grad_norm": 0.7878655053271634, "learning_rate": 4.55535390199637e-05, "loss": 12.2811, "step": 251 }, { "epoch": 0.013722407138918952, "grad_norm": 0.7351407959136845, "learning_rate": 4.5735027223230494e-05, "loss": 12.3271, "step": 252 }, { "epoch": 0.013776861135501963, "grad_norm": 0.738532069890918, "learning_rate": 4.591651542649728e-05, "loss": 12.3281, "step": 253 }, { "epoch": 0.013831315132084975, "grad_norm": 0.7684920721548087, "learning_rate": 4.609800362976407e-05, "loss": 12.174, "step": 254 }, { "epoch": 0.013885769128667987, "grad_norm": 0.85709477932742, "learning_rate": 4.6279491833030854e-05, "loss": 12.3123, "step": 255 }, { "epoch": 0.013940223125250999, "grad_norm": 0.7699276923675464, "learning_rate": 4.646098003629764e-05, "loss": 12.3149, "step": 256 }, { "epoch": 0.01399467712183401, "grad_norm": 0.7252363551142195, "learning_rate": 4.664246823956443e-05, "loss": 12.2997, "step": 257 }, { "epoch": 0.014049131118417022, "grad_norm": 0.7307344287538694, "learning_rate": 4.682395644283122e-05, "loss": 12.3743, "step": 258 }, { "epoch": 0.014103585115000034, "grad_norm": 0.7688745278021739, "learning_rate": 4.7005444646098005e-05, "loss": 12.2888, "step": 259 }, { "epoch": 0.014158039111583046, "grad_norm": 0.7519190643126366, "learning_rate": 4.7186932849364796e-05, "loss": 12.2947, "step": 260 }, { "epoch": 0.014212493108166057, "grad_norm": 0.7811339132345416, "learning_rate": 4.736842105263158e-05, "loss": 12.462, "step": 261 }, { "epoch": 0.014266947104749069, "grad_norm": 0.7115737251169268, "learning_rate": 4.7549909255898365e-05, "loss": 12.3242, "step": 262 }, { "epoch": 0.01432140110133208, "grad_norm": 0.7507844491490189, "learning_rate": 4.7731397459165156e-05, "loss": 12.2234, "step": 263 }, { "epoch": 0.014375855097915093, "grad_norm": 0.7610821598373387, "learning_rate": 4.791288566243195e-05, "loss": 12.4286, "step": 264 }, { "epoch": 0.014430309094498104, "grad_norm": 0.8770692142481441, "learning_rate": 4.809437386569873e-05, "loss": 12.5179, "step": 265 }, { "epoch": 0.014484763091081116, "grad_norm": 0.7994207367776012, "learning_rate": 4.827586206896552e-05, "loss": 12.3988, "step": 266 }, { "epoch": 0.014539217087664128, "grad_norm": 0.7317523430466678, "learning_rate": 4.845735027223231e-05, "loss": 12.3898, "step": 267 }, { "epoch": 0.01459367108424714, "grad_norm": 0.7656122484229141, "learning_rate": 4.863883847549909e-05, "loss": 12.0056, "step": 268 }, { "epoch": 0.014648125080830151, "grad_norm": 0.7986586703788588, "learning_rate": 4.882032667876588e-05, "loss": 12.3948, "step": 269 }, { "epoch": 0.014702579077413163, "grad_norm": 0.7657932505480529, "learning_rate": 4.900181488203267e-05, "loss": 12.3687, "step": 270 }, { "epoch": 0.014757033073996175, "grad_norm": 0.7099680506665171, "learning_rate": 4.918330308529946e-05, "loss": 12.2355, "step": 271 }, { "epoch": 0.014811487070579187, "grad_norm": 0.6931982785581737, "learning_rate": 4.936479128856625e-05, "loss": 12.3633, "step": 272 }, { "epoch": 0.014865941067162198, "grad_norm": 0.7474303399881781, "learning_rate": 4.954627949183303e-05, "loss": 12.3746, "step": 273 }, { "epoch": 0.01492039506374521, "grad_norm": 0.862481964480327, "learning_rate": 4.972776769509982e-05, "loss": 12.4141, "step": 274 }, { "epoch": 0.014974849060328222, "grad_norm": 0.7318364097265114, "learning_rate": 4.990925589836661e-05, "loss": 12.2247, "step": 275 }, { "epoch": 0.015029303056911233, "grad_norm": 0.734011719238498, "learning_rate": 5.009074410163339e-05, "loss": 12.3864, "step": 276 }, { "epoch": 0.015083757053494245, "grad_norm": 0.7736991802041899, "learning_rate": 5.027223230490018e-05, "loss": 12.3802, "step": 277 }, { "epoch": 0.015138211050077257, "grad_norm": 0.7148902208446675, "learning_rate": 5.045372050816697e-05, "loss": 12.3205, "step": 278 }, { "epoch": 0.015192665046660269, "grad_norm": 0.7319645221512004, "learning_rate": 5.0635208711433755e-05, "loss": 12.3342, "step": 279 }, { "epoch": 0.01524711904324328, "grad_norm": 0.7299722315856292, "learning_rate": 5.0816696914700546e-05, "loss": 12.3347, "step": 280 }, { "epoch": 0.015301573039826292, "grad_norm": 0.7296940630372005, "learning_rate": 5.099818511796734e-05, "loss": 12.3668, "step": 281 }, { "epoch": 0.015356027036409304, "grad_norm": 0.7073166461717706, "learning_rate": 5.117967332123412e-05, "loss": 12.3703, "step": 282 }, { "epoch": 0.015410481032992316, "grad_norm": 0.7546793817999312, "learning_rate": 5.136116152450091e-05, "loss": 12.3491, "step": 283 }, { "epoch": 0.015464935029575327, "grad_norm": 0.685017972082848, "learning_rate": 5.15426497277677e-05, "loss": 12.2707, "step": 284 }, { "epoch": 0.01551938902615834, "grad_norm": 0.7562946426920689, "learning_rate": 5.172413793103449e-05, "loss": 12.3115, "step": 285 }, { "epoch": 0.015573843022741351, "grad_norm": 0.7216084563358899, "learning_rate": 5.190562613430128e-05, "loss": 12.3921, "step": 286 }, { "epoch": 0.01562829701932436, "grad_norm": 0.6939190861763674, "learning_rate": 5.2087114337568064e-05, "loss": 12.2032, "step": 287 }, { "epoch": 0.015682751015907374, "grad_norm": 0.7239626701676075, "learning_rate": 5.226860254083484e-05, "loss": 12.2799, "step": 288 }, { "epoch": 0.015737205012490384, "grad_norm": 0.7753868516243451, "learning_rate": 5.245009074410163e-05, "loss": 12.4619, "step": 289 }, { "epoch": 0.015791659009073398, "grad_norm": 0.7514557376174034, "learning_rate": 5.2631578947368424e-05, "loss": 12.4112, "step": 290 }, { "epoch": 0.015846113005656408, "grad_norm": 0.7377806576725301, "learning_rate": 5.281306715063521e-05, "loss": 12.3133, "step": 291 }, { "epoch": 0.01590056700223942, "grad_norm": 0.7454703909220026, "learning_rate": 5.2994555353902e-05, "loss": 12.2604, "step": 292 }, { "epoch": 0.01595502099882243, "grad_norm": 0.7425978328340237, "learning_rate": 5.3176043557168784e-05, "loss": 12.3337, "step": 293 }, { "epoch": 0.016009474995405445, "grad_norm": 0.8121816159096134, "learning_rate": 5.3357531760435575e-05, "loss": 12.3189, "step": 294 }, { "epoch": 0.016063928991988455, "grad_norm": 0.874090569506292, "learning_rate": 5.3539019963702366e-05, "loss": 12.4024, "step": 295 }, { "epoch": 0.01611838298857147, "grad_norm": 0.766723150191294, "learning_rate": 5.372050816696915e-05, "loss": 12.3427, "step": 296 }, { "epoch": 0.01617283698515448, "grad_norm": 0.7533549696517355, "learning_rate": 5.390199637023594e-05, "loss": 12.1945, "step": 297 }, { "epoch": 0.016227290981737492, "grad_norm": 0.82295747879306, "learning_rate": 5.4083484573502726e-05, "loss": 12.3395, "step": 298 }, { "epoch": 0.016281744978320502, "grad_norm": 0.7763144627979807, "learning_rate": 5.4264972776769504e-05, "loss": 12.4658, "step": 299 }, { "epoch": 0.016336198974903515, "grad_norm": 0.7803622453108773, "learning_rate": 5.4446460980036295e-05, "loss": 12.1774, "step": 300 }, { "epoch": 0.016390652971486525, "grad_norm": 0.7711036795355979, "learning_rate": 5.4627949183303086e-05, "loss": 12.3421, "step": 301 }, { "epoch": 0.01644510696806954, "grad_norm": 0.7862864570197541, "learning_rate": 5.480943738656987e-05, "loss": 12.44, "step": 302 }, { "epoch": 0.01649956096465255, "grad_norm": 0.7174141772095207, "learning_rate": 5.499092558983666e-05, "loss": 12.3887, "step": 303 }, { "epoch": 0.016554014961235562, "grad_norm": 0.8079655442331966, "learning_rate": 5.517241379310345e-05, "loss": 12.3736, "step": 304 }, { "epoch": 0.016608468957818572, "grad_norm": 0.7882892914456249, "learning_rate": 5.535390199637024e-05, "loss": 12.3937, "step": 305 }, { "epoch": 0.016662922954401586, "grad_norm": 0.7739803428465745, "learning_rate": 5.553539019963703e-05, "loss": 12.3119, "step": 306 }, { "epoch": 0.016717376950984596, "grad_norm": 0.8202440006814529, "learning_rate": 5.571687840290381e-05, "loss": 12.469, "step": 307 }, { "epoch": 0.01677183094756761, "grad_norm": 0.6948421754884823, "learning_rate": 5.5898366606170604e-05, "loss": 12.38, "step": 308 }, { "epoch": 0.01682628494415062, "grad_norm": 0.7278638745473617, "learning_rate": 5.6079854809437396e-05, "loss": 12.3829, "step": 309 }, { "epoch": 0.016880738940733633, "grad_norm": 0.8078662521778953, "learning_rate": 5.626134301270418e-05, "loss": 12.4609, "step": 310 }, { "epoch": 0.016935192937316643, "grad_norm": 0.8163381416660692, "learning_rate": 5.644283121597096e-05, "loss": 12.296, "step": 311 }, { "epoch": 0.016989646933899656, "grad_norm": 0.7499116355114528, "learning_rate": 5.662431941923775e-05, "loss": 12.2917, "step": 312 }, { "epoch": 0.017044100930482666, "grad_norm": 0.8559513637122415, "learning_rate": 5.680580762250453e-05, "loss": 12.3777, "step": 313 }, { "epoch": 0.01709855492706568, "grad_norm": 0.8186258240775369, "learning_rate": 5.6987295825771325e-05, "loss": 12.449, "step": 314 }, { "epoch": 0.01715300892364869, "grad_norm": 0.7314385570608495, "learning_rate": 5.7168784029038116e-05, "loss": 12.2357, "step": 315 }, { "epoch": 0.017207462920231703, "grad_norm": 0.8568191729204323, "learning_rate": 5.73502722323049e-05, "loss": 12.5074, "step": 316 }, { "epoch": 0.017261916916814713, "grad_norm": 0.8062065360577912, "learning_rate": 5.753176043557169e-05, "loss": 12.357, "step": 317 }, { "epoch": 0.017316370913397727, "grad_norm": 0.7803665817022063, "learning_rate": 5.771324863883848e-05, "loss": 12.4644, "step": 318 }, { "epoch": 0.017370824909980737, "grad_norm": 0.742566393860498, "learning_rate": 5.789473684210527e-05, "loss": 12.3906, "step": 319 }, { "epoch": 0.01742527890656375, "grad_norm": 0.7289345098896403, "learning_rate": 5.807622504537206e-05, "loss": 12.3417, "step": 320 }, { "epoch": 0.01747973290314676, "grad_norm": 0.7808326632555322, "learning_rate": 5.825771324863884e-05, "loss": 12.3817, "step": 321 }, { "epoch": 0.017534186899729774, "grad_norm": 0.8249973000669343, "learning_rate": 5.8439201451905634e-05, "loss": 12.2332, "step": 322 }, { "epoch": 0.017588640896312784, "grad_norm": 0.7574058668425114, "learning_rate": 5.862068965517241e-05, "loss": 12.2956, "step": 323 }, { "epoch": 0.017643094892895797, "grad_norm": 0.8298624360645479, "learning_rate": 5.88021778584392e-05, "loss": 12.36, "step": 324 }, { "epoch": 0.017697548889478807, "grad_norm": 0.8547904402189499, "learning_rate": 5.898366606170599e-05, "loss": 12.3879, "step": 325 }, { "epoch": 0.01775200288606182, "grad_norm": 0.7968314498228707, "learning_rate": 5.916515426497278e-05, "loss": 12.46, "step": 326 }, { "epoch": 0.01780645688264483, "grad_norm": 0.8736437654956223, "learning_rate": 5.934664246823956e-05, "loss": 12.4407, "step": 327 }, { "epoch": 0.017860910879227844, "grad_norm": 0.8668907429370268, "learning_rate": 5.9528130671506354e-05, "loss": 12.4397, "step": 328 }, { "epoch": 0.017915364875810854, "grad_norm": 0.7818621779531549, "learning_rate": 5.9709618874773145e-05, "loss": 12.4018, "step": 329 }, { "epoch": 0.017969818872393867, "grad_norm": 0.9103192243585209, "learning_rate": 5.989110707803993e-05, "loss": 12.5196, "step": 330 }, { "epoch": 0.018024272868976877, "grad_norm": 0.7862412367698574, "learning_rate": 6.007259528130672e-05, "loss": 12.3264, "step": 331 }, { "epoch": 0.018078726865559887, "grad_norm": 0.7143113953678407, "learning_rate": 6.025408348457351e-05, "loss": 12.4047, "step": 332 }, { "epoch": 0.0181331808621429, "grad_norm": 0.9052912116449027, "learning_rate": 6.0435571687840296e-05, "loss": 12.3172, "step": 333 }, { "epoch": 0.01818763485872591, "grad_norm": 0.7947826092807085, "learning_rate": 6.061705989110709e-05, "loss": 12.3744, "step": 334 }, { "epoch": 0.018242088855308924, "grad_norm": 0.7545460389607728, "learning_rate": 6.0798548094373865e-05, "loss": 12.1713, "step": 335 }, { "epoch": 0.018296542851891934, "grad_norm": 0.7152244757749424, "learning_rate": 6.098003629764065e-05, "loss": 12.3317, "step": 336 }, { "epoch": 0.018350996848474948, "grad_norm": 0.7996572062771121, "learning_rate": 6.116152450090745e-05, "loss": 12.4122, "step": 337 }, { "epoch": 0.018405450845057958, "grad_norm": 0.8411178606002937, "learning_rate": 6.134301270417423e-05, "loss": 12.3052, "step": 338 }, { "epoch": 0.01845990484164097, "grad_norm": 0.7975036289012045, "learning_rate": 6.152450090744102e-05, "loss": 12.4601, "step": 339 }, { "epoch": 0.01851435883822398, "grad_norm": 0.9137429350223257, "learning_rate": 6.17059891107078e-05, "loss": 12.3111, "step": 340 }, { "epoch": 0.018568812834806995, "grad_norm": 0.7148462961473729, "learning_rate": 6.18874773139746e-05, "loss": 12.4212, "step": 341 }, { "epoch": 0.018623266831390005, "grad_norm": 0.6913217865933418, "learning_rate": 6.206896551724138e-05, "loss": 12.3248, "step": 342 }, { "epoch": 0.01867772082797302, "grad_norm": 0.8838781205805315, "learning_rate": 6.225045372050817e-05, "loss": 12.3943, "step": 343 }, { "epoch": 0.01873217482455603, "grad_norm": 0.7873429833167643, "learning_rate": 6.243194192377497e-05, "loss": 12.3052, "step": 344 }, { "epoch": 0.018786628821139042, "grad_norm": 0.6852179867320145, "learning_rate": 6.261343012704175e-05, "loss": 12.3556, "step": 345 }, { "epoch": 0.018841082817722052, "grad_norm": 0.8621452861175046, "learning_rate": 6.279491833030852e-05, "loss": 12.5934, "step": 346 }, { "epoch": 0.018895536814305065, "grad_norm": 0.9120176242511238, "learning_rate": 6.297640653357532e-05, "loss": 12.4076, "step": 347 }, { "epoch": 0.018949990810888075, "grad_norm": 0.7843427231761305, "learning_rate": 6.31578947368421e-05, "loss": 12.3899, "step": 348 }, { "epoch": 0.01900444480747109, "grad_norm": 0.7893170140012902, "learning_rate": 6.333938294010889e-05, "loss": 12.2412, "step": 349 }, { "epoch": 0.0190588988040541, "grad_norm": 0.8916646848287791, "learning_rate": 6.352087114337569e-05, "loss": 12.4694, "step": 350 }, { "epoch": 0.019113352800637112, "grad_norm": 0.7258088717745435, "learning_rate": 6.370235934664247e-05, "loss": 12.2489, "step": 351 }, { "epoch": 0.019167806797220122, "grad_norm": 0.8650617630444332, "learning_rate": 6.388384754990925e-05, "loss": 12.2905, "step": 352 }, { "epoch": 0.019222260793803136, "grad_norm": 0.7667753049211509, "learning_rate": 6.406533575317605e-05, "loss": 12.3042, "step": 353 }, { "epoch": 0.019276714790386146, "grad_norm": 0.7511282035657841, "learning_rate": 6.424682395644284e-05, "loss": 12.2579, "step": 354 }, { "epoch": 0.01933116878696916, "grad_norm": 0.7735404530594452, "learning_rate": 6.442831215970962e-05, "loss": 12.3486, "step": 355 }, { "epoch": 0.01938562278355217, "grad_norm": 0.8412619305074472, "learning_rate": 6.460980036297642e-05, "loss": 12.4623, "step": 356 }, { "epoch": 0.019440076780135183, "grad_norm": 0.7301192343330496, "learning_rate": 6.47912885662432e-05, "loss": 12.3276, "step": 357 }, { "epoch": 0.019494530776718193, "grad_norm": 0.7804652434665313, "learning_rate": 6.497277676950997e-05, "loss": 12.2476, "step": 358 }, { "epoch": 0.019548984773301206, "grad_norm": 0.8556088812883522, "learning_rate": 6.515426497277677e-05, "loss": 12.529, "step": 359 }, { "epoch": 0.019603438769884216, "grad_norm": 0.8768877684878126, "learning_rate": 6.533575317604356e-05, "loss": 12.4842, "step": 360 }, { "epoch": 0.01965789276646723, "grad_norm": 0.7343479317743452, "learning_rate": 6.551724137931034e-05, "loss": 12.3482, "step": 361 }, { "epoch": 0.01971234676305024, "grad_norm": 0.8436152095043282, "learning_rate": 6.569872958257714e-05, "loss": 12.422, "step": 362 }, { "epoch": 0.019766800759633253, "grad_norm": 0.8293501645016546, "learning_rate": 6.588021778584392e-05, "loss": 12.3398, "step": 363 }, { "epoch": 0.019821254756216263, "grad_norm": 0.757977291896263, "learning_rate": 6.606170598911071e-05, "loss": 12.3762, "step": 364 }, { "epoch": 0.019875708752799277, "grad_norm": 0.9108534009229488, "learning_rate": 6.62431941923775e-05, "loss": 12.5203, "step": 365 }, { "epoch": 0.019930162749382287, "grad_norm": 0.861404684253492, "learning_rate": 6.642468239564429e-05, "loss": 12.3693, "step": 366 }, { "epoch": 0.0199846167459653, "grad_norm": 0.7442435951270349, "learning_rate": 6.660617059891108e-05, "loss": 12.3365, "step": 367 }, { "epoch": 0.02003907074254831, "grad_norm": 0.7481686167086928, "learning_rate": 6.678765880217786e-05, "loss": 12.3823, "step": 368 }, { "epoch": 0.020093524739131324, "grad_norm": 0.8866860856512322, "learning_rate": 6.696914700544466e-05, "loss": 12.3893, "step": 369 }, { "epoch": 0.020147978735714334, "grad_norm": 0.7480340530756074, "learning_rate": 6.715063520871143e-05, "loss": 12.3733, "step": 370 }, { "epoch": 0.020202432732297347, "grad_norm": 0.8429189686136943, "learning_rate": 6.733212341197823e-05, "loss": 12.3659, "step": 371 }, { "epoch": 0.020256886728880357, "grad_norm": 0.927639442740603, "learning_rate": 6.751361161524501e-05, "loss": 12.287, "step": 372 }, { "epoch": 0.02031134072546337, "grad_norm": 0.7857422926853126, "learning_rate": 6.76950998185118e-05, "loss": 12.412, "step": 373 }, { "epoch": 0.02036579472204638, "grad_norm": 0.806848985078179, "learning_rate": 6.787658802177859e-05, "loss": 12.3987, "step": 374 }, { "epoch": 0.020420248718629394, "grad_norm": 0.9004884915515702, "learning_rate": 6.805807622504538e-05, "loss": 12.3248, "step": 375 }, { "epoch": 0.020474702715212404, "grad_norm": 0.8136907332825543, "learning_rate": 6.823956442831216e-05, "loss": 12.5514, "step": 376 }, { "epoch": 0.020529156711795418, "grad_norm": 0.7947680138594297, "learning_rate": 6.842105263157895e-05, "loss": 12.3048, "step": 377 }, { "epoch": 0.020583610708378428, "grad_norm": 0.787939095178154, "learning_rate": 6.860254083484574e-05, "loss": 12.5616, "step": 378 }, { "epoch": 0.02063806470496144, "grad_norm": 0.8053644554897705, "learning_rate": 6.878402903811253e-05, "loss": 12.342, "step": 379 }, { "epoch": 0.02069251870154445, "grad_norm": 0.7333377537038525, "learning_rate": 6.896551724137931e-05, "loss": 12.346, "step": 380 }, { "epoch": 0.020746972698127465, "grad_norm": 0.8115592545530859, "learning_rate": 6.914700544464611e-05, "loss": 12.5671, "step": 381 }, { "epoch": 0.020801426694710475, "grad_norm": 0.9025245777577485, "learning_rate": 6.932849364791288e-05, "loss": 12.2945, "step": 382 }, { "epoch": 0.020855880691293488, "grad_norm": 0.7272441731724826, "learning_rate": 6.950998185117967e-05, "loss": 12.2068, "step": 383 }, { "epoch": 0.020910334687876498, "grad_norm": 0.8508505713825312, "learning_rate": 6.969147005444646e-05, "loss": 12.4984, "step": 384 }, { "epoch": 0.02096478868445951, "grad_norm": 0.7415051803154171, "learning_rate": 6.987295825771325e-05, "loss": 12.3752, "step": 385 }, { "epoch": 0.02101924268104252, "grad_norm": 0.759163767114583, "learning_rate": 7.005444646098003e-05, "loss": 12.3624, "step": 386 }, { "epoch": 0.021073696677625535, "grad_norm": 0.8489341011156437, "learning_rate": 7.023593466424683e-05, "loss": 12.3969, "step": 387 }, { "epoch": 0.021128150674208545, "grad_norm": 0.8462665677785168, "learning_rate": 7.041742286751362e-05, "loss": 12.2932, "step": 388 }, { "epoch": 0.02118260467079156, "grad_norm": 0.75552639338092, "learning_rate": 7.05989110707804e-05, "loss": 12.3267, "step": 389 }, { "epoch": 0.02123705866737457, "grad_norm": 0.8161265444898805, "learning_rate": 7.07803992740472e-05, "loss": 12.5891, "step": 390 }, { "epoch": 0.021291512663957582, "grad_norm": 0.8856078253587745, "learning_rate": 7.096188747731398e-05, "loss": 12.2956, "step": 391 }, { "epoch": 0.021345966660540592, "grad_norm": 0.8152626351928155, "learning_rate": 7.114337568058077e-05, "loss": 12.3987, "step": 392 }, { "epoch": 0.021400420657123605, "grad_norm": 0.8211638500815782, "learning_rate": 7.132486388384755e-05, "loss": 12.3112, "step": 393 }, { "epoch": 0.021454874653706615, "grad_norm": 0.8404912916628079, "learning_rate": 7.150635208711434e-05, "loss": 12.2929, "step": 394 }, { "epoch": 0.02150932865028963, "grad_norm": 0.7378950463318673, "learning_rate": 7.168784029038112e-05, "loss": 12.4709, "step": 395 }, { "epoch": 0.02156378264687264, "grad_norm": 0.7621638941069262, "learning_rate": 7.186932849364792e-05, "loss": 12.4729, "step": 396 }, { "epoch": 0.021618236643455652, "grad_norm": 0.8226698918779496, "learning_rate": 7.20508166969147e-05, "loss": 12.4705, "step": 397 }, { "epoch": 0.021672690640038662, "grad_norm": 0.7052288911047748, "learning_rate": 7.223230490018149e-05, "loss": 12.4461, "step": 398 }, { "epoch": 0.021727144636621672, "grad_norm": 0.7609734401203072, "learning_rate": 7.241379310344828e-05, "loss": 12.3506, "step": 399 }, { "epoch": 0.021781598633204686, "grad_norm": 0.770636318894737, "learning_rate": 7.259528130671507e-05, "loss": 12.248, "step": 400 }, { "epoch": 0.021836052629787696, "grad_norm": 0.7161390931014325, "learning_rate": 7.277676950998185e-05, "loss": 12.2974, "step": 401 }, { "epoch": 0.02189050662637071, "grad_norm": 0.7093602343926348, "learning_rate": 7.295825771324865e-05, "loss": 12.3475, "step": 402 }, { "epoch": 0.02194496062295372, "grad_norm": 0.7846838901586377, "learning_rate": 7.313974591651544e-05, "loss": 12.4812, "step": 403 }, { "epoch": 0.021999414619536733, "grad_norm": 0.7610570252567146, "learning_rate": 7.332123411978222e-05, "loss": 12.3179, "step": 404 }, { "epoch": 0.022053868616119743, "grad_norm": 0.7678646935604143, "learning_rate": 7.3502722323049e-05, "loss": 12.4395, "step": 405 }, { "epoch": 0.022108322612702756, "grad_norm": 0.788227147067854, "learning_rate": 7.368421052631579e-05, "loss": 12.3643, "step": 406 }, { "epoch": 0.022162776609285766, "grad_norm": 0.8555008491336252, "learning_rate": 7.386569872958257e-05, "loss": 12.5442, "step": 407 }, { "epoch": 0.02221723060586878, "grad_norm": 0.7848374239784748, "learning_rate": 7.404718693284937e-05, "loss": 12.4897, "step": 408 }, { "epoch": 0.02227168460245179, "grad_norm": 0.7986911623537468, "learning_rate": 7.422867513611616e-05, "loss": 12.4207, "step": 409 }, { "epoch": 0.022326138599034803, "grad_norm": 0.8578821767689867, "learning_rate": 7.441016333938294e-05, "loss": 12.406, "step": 410 }, { "epoch": 0.022380592595617813, "grad_norm": 0.8579401771922557, "learning_rate": 7.459165154264974e-05, "loss": 12.4753, "step": 411 }, { "epoch": 0.022435046592200827, "grad_norm": 0.7942776463738789, "learning_rate": 7.477313974591652e-05, "loss": 12.3652, "step": 412 }, { "epoch": 0.022489500588783837, "grad_norm": 0.7606530813783086, "learning_rate": 7.495462794918331e-05, "loss": 12.3301, "step": 413 }, { "epoch": 0.02254395458536685, "grad_norm": 0.7820187282722689, "learning_rate": 7.513611615245009e-05, "loss": 12.2943, "step": 414 }, { "epoch": 0.02259840858194986, "grad_norm": 0.8133279352459926, "learning_rate": 7.531760435571689e-05, "loss": 12.3255, "step": 415 }, { "epoch": 0.022652862578532874, "grad_norm": 0.7202675473587253, "learning_rate": 7.549909255898367e-05, "loss": 12.4437, "step": 416 }, { "epoch": 0.022707316575115884, "grad_norm": 0.7250526735890024, "learning_rate": 7.568058076225046e-05, "loss": 12.4672, "step": 417 }, { "epoch": 0.022761770571698897, "grad_norm": 0.7829881797724314, "learning_rate": 7.586206896551724e-05, "loss": 12.4441, "step": 418 }, { "epoch": 0.022816224568281907, "grad_norm": 0.8157466754769012, "learning_rate": 7.604355716878403e-05, "loss": 12.3637, "step": 419 }, { "epoch": 0.02287067856486492, "grad_norm": 0.6878509475121516, "learning_rate": 7.622504537205081e-05, "loss": 12.265, "step": 420 }, { "epoch": 0.02292513256144793, "grad_norm": 0.8133681369767054, "learning_rate": 7.640653357531761e-05, "loss": 12.4073, "step": 421 }, { "epoch": 0.022979586558030944, "grad_norm": 0.7970282881764229, "learning_rate": 7.65880217785844e-05, "loss": 12.4782, "step": 422 }, { "epoch": 0.023034040554613954, "grad_norm": 0.7306816228265234, "learning_rate": 7.676950998185118e-05, "loss": 12.2968, "step": 423 }, { "epoch": 0.023088494551196968, "grad_norm": 0.6634878489767484, "learning_rate": 7.695099818511798e-05, "loss": 12.43, "step": 424 }, { "epoch": 0.023142948547779978, "grad_norm": 0.7106521845447317, "learning_rate": 7.713248638838476e-05, "loss": 12.2943, "step": 425 }, { "epoch": 0.02319740254436299, "grad_norm": 0.6994237071240959, "learning_rate": 7.731397459165155e-05, "loss": 12.3155, "step": 426 }, { "epoch": 0.023251856540946, "grad_norm": 0.8721037875029398, "learning_rate": 7.749546279491834e-05, "loss": 12.3339, "step": 427 }, { "epoch": 0.023306310537529015, "grad_norm": 0.7274699367575759, "learning_rate": 7.767695099818511e-05, "loss": 12.4143, "step": 428 }, { "epoch": 0.023360764534112025, "grad_norm": 0.7122374265862785, "learning_rate": 7.78584392014519e-05, "loss": 12.1451, "step": 429 }, { "epoch": 0.023415218530695038, "grad_norm": 0.8254803830668477, "learning_rate": 7.80399274047187e-05, "loss": 12.5343, "step": 430 }, { "epoch": 0.023469672527278048, "grad_norm": 0.7387526625998517, "learning_rate": 7.822141560798548e-05, "loss": 12.351, "step": 431 }, { "epoch": 0.02352412652386106, "grad_norm": 0.7816935354004675, "learning_rate": 7.840290381125227e-05, "loss": 12.3752, "step": 432 }, { "epoch": 0.02357858052044407, "grad_norm": 0.8477449845456101, "learning_rate": 7.858439201451906e-05, "loss": 12.4905, "step": 433 }, { "epoch": 0.023633034517027085, "grad_norm": 0.7892505674089381, "learning_rate": 7.876588021778585e-05, "loss": 12.4304, "step": 434 }, { "epoch": 0.023687488513610095, "grad_norm": 0.7935738185972664, "learning_rate": 7.894736842105263e-05, "loss": 12.295, "step": 435 }, { "epoch": 0.02374194251019311, "grad_norm": 0.7129426510545228, "learning_rate": 7.912885662431943e-05, "loss": 12.4046, "step": 436 }, { "epoch": 0.02379639650677612, "grad_norm": 0.836938154929551, "learning_rate": 7.931034482758621e-05, "loss": 12.3877, "step": 437 }, { "epoch": 0.023850850503359132, "grad_norm": 1.0221663146021225, "learning_rate": 7.9491833030853e-05, "loss": 12.3748, "step": 438 }, { "epoch": 0.023905304499942142, "grad_norm": 0.9267503989787906, "learning_rate": 7.96733212341198e-05, "loss": 12.4954, "step": 439 }, { "epoch": 0.023959758496525155, "grad_norm": 0.8434026048076346, "learning_rate": 7.985480943738657e-05, "loss": 12.4728, "step": 440 }, { "epoch": 0.024014212493108165, "grad_norm": 0.7878062858231188, "learning_rate": 8.003629764065335e-05, "loss": 12.3983, "step": 441 }, { "epoch": 0.02406866648969118, "grad_norm": 0.710973960203588, "learning_rate": 8.021778584392015e-05, "loss": 12.3385, "step": 442 }, { "epoch": 0.02412312048627419, "grad_norm": 0.7938168455755067, "learning_rate": 8.039927404718693e-05, "loss": 12.3286, "step": 443 }, { "epoch": 0.024177574482857202, "grad_norm": 0.7947193309206884, "learning_rate": 8.058076225045372e-05, "loss": 12.5106, "step": 444 }, { "epoch": 0.024232028479440212, "grad_norm": 0.7584161983962521, "learning_rate": 8.076225045372052e-05, "loss": 12.3826, "step": 445 }, { "epoch": 0.024286482476023226, "grad_norm": 0.8202312029520822, "learning_rate": 8.09437386569873e-05, "loss": 12.345, "step": 446 }, { "epoch": 0.024340936472606236, "grad_norm": 0.7645543641740715, "learning_rate": 8.112522686025409e-05, "loss": 12.4588, "step": 447 }, { "epoch": 0.02439539046918925, "grad_norm": 0.7730517169532368, "learning_rate": 8.130671506352088e-05, "loss": 12.421, "step": 448 }, { "epoch": 0.02444984446577226, "grad_norm": 0.814708665964133, "learning_rate": 8.148820326678767e-05, "loss": 12.3183, "step": 449 }, { "epoch": 0.024504298462355273, "grad_norm": 0.7711615040975552, "learning_rate": 8.166969147005445e-05, "loss": 12.4889, "step": 450 }, { "epoch": 0.024558752458938283, "grad_norm": 0.7528070171512011, "learning_rate": 8.185117967332124e-05, "loss": 12.4343, "step": 451 }, { "epoch": 0.024613206455521296, "grad_norm": 0.8249958258020976, "learning_rate": 8.203266787658802e-05, "loss": 12.4099, "step": 452 }, { "epoch": 0.024667660452104306, "grad_norm": 0.8813431530918817, "learning_rate": 8.22141560798548e-05, "loss": 12.4739, "step": 453 }, { "epoch": 0.02472211444868732, "grad_norm": 0.9494001566035871, "learning_rate": 8.23956442831216e-05, "loss": 12.4154, "step": 454 }, { "epoch": 0.02477656844527033, "grad_norm": 0.7154527838559613, "learning_rate": 8.257713248638839e-05, "loss": 12.3046, "step": 455 }, { "epoch": 0.024831022441853343, "grad_norm": 0.8426756172886806, "learning_rate": 8.275862068965517e-05, "loss": 12.3878, "step": 456 }, { "epoch": 0.024885476438436353, "grad_norm": 0.7868340051489234, "learning_rate": 8.294010889292196e-05, "loss": 12.4081, "step": 457 }, { "epoch": 0.024939930435019367, "grad_norm": 0.7614062874800365, "learning_rate": 8.312159709618876e-05, "loss": 12.3421, "step": 458 }, { "epoch": 0.024994384431602377, "grad_norm": 0.7831978878652606, "learning_rate": 8.330308529945554e-05, "loss": 12.4271, "step": 459 }, { "epoch": 0.02504883842818539, "grad_norm": 0.7742389342308593, "learning_rate": 8.348457350272232e-05, "loss": 12.6071, "step": 460 }, { "epoch": 0.0251032924247684, "grad_norm": 0.7667575505932676, "learning_rate": 8.366606170598912e-05, "loss": 12.4484, "step": 461 }, { "epoch": 0.025157746421351414, "grad_norm": 0.7949954459613233, "learning_rate": 8.38475499092559e-05, "loss": 12.3982, "step": 462 }, { "epoch": 0.025212200417934424, "grad_norm": 0.8839761700709547, "learning_rate": 8.402903811252269e-05, "loss": 12.3902, "step": 463 }, { "epoch": 0.025266654414517434, "grad_norm": 0.7436720063586588, "learning_rate": 8.421052631578948e-05, "loss": 12.3808, "step": 464 }, { "epoch": 0.025321108411100447, "grad_norm": 0.8086165049759212, "learning_rate": 8.439201451905626e-05, "loss": 12.3742, "step": 465 }, { "epoch": 0.025375562407683457, "grad_norm": 0.8217729326448585, "learning_rate": 8.457350272232304e-05, "loss": 12.3675, "step": 466 }, { "epoch": 0.02543001640426647, "grad_norm": 0.8804919476658869, "learning_rate": 8.475499092558984e-05, "loss": 12.4588, "step": 467 }, { "epoch": 0.02548447040084948, "grad_norm": 0.8021891332568722, "learning_rate": 8.493647912885663e-05, "loss": 12.443, "step": 468 }, { "epoch": 0.025538924397432494, "grad_norm": 0.7325722424197364, "learning_rate": 8.511796733212341e-05, "loss": 12.4545, "step": 469 }, { "epoch": 0.025593378394015504, "grad_norm": 0.8306668524452858, "learning_rate": 8.529945553539021e-05, "loss": 12.5469, "step": 470 }, { "epoch": 0.025647832390598518, "grad_norm": 0.7943932754142018, "learning_rate": 8.5480943738657e-05, "loss": 12.2246, "step": 471 }, { "epoch": 0.025702286387181528, "grad_norm": 0.7407462425158048, "learning_rate": 8.566243194192378e-05, "loss": 12.3846, "step": 472 }, { "epoch": 0.02575674038376454, "grad_norm": 0.8711005010892444, "learning_rate": 8.584392014519058e-05, "loss": 12.5484, "step": 473 }, { "epoch": 0.02581119438034755, "grad_norm": 0.7937737358003336, "learning_rate": 8.602540834845736e-05, "loss": 12.3675, "step": 474 }, { "epoch": 0.025865648376930565, "grad_norm": 0.739494251610042, "learning_rate": 8.620689655172413e-05, "loss": 12.4181, "step": 475 }, { "epoch": 0.025920102373513575, "grad_norm": 0.7494105764676788, "learning_rate": 8.638838475499093e-05, "loss": 12.4294, "step": 476 }, { "epoch": 0.025974556370096588, "grad_norm": 0.8112658612116864, "learning_rate": 8.656987295825771e-05, "loss": 12.3863, "step": 477 }, { "epoch": 0.026029010366679598, "grad_norm": 0.7951356603711836, "learning_rate": 8.67513611615245e-05, "loss": 12.509, "step": 478 }, { "epoch": 0.02608346436326261, "grad_norm": 0.8050809097921263, "learning_rate": 8.69328493647913e-05, "loss": 12.3642, "step": 479 }, { "epoch": 0.02613791835984562, "grad_norm": 0.8245382505558528, "learning_rate": 8.711433756805808e-05, "loss": 12.3835, "step": 480 }, { "epoch": 0.026192372356428635, "grad_norm": 0.742494597149955, "learning_rate": 8.729582577132486e-05, "loss": 12.4518, "step": 481 }, { "epoch": 0.026246826353011645, "grad_norm": 0.9027867580588818, "learning_rate": 8.747731397459166e-05, "loss": 12.3726, "step": 482 }, { "epoch": 0.02630128034959466, "grad_norm": 0.7595129443596266, "learning_rate": 8.765880217785845e-05, "loss": 12.6085, "step": 483 }, { "epoch": 0.02635573434617767, "grad_norm": 0.8600852902167964, "learning_rate": 8.784029038112523e-05, "loss": 12.5357, "step": 484 }, { "epoch": 0.026410188342760682, "grad_norm": 0.9430561851745869, "learning_rate": 8.802177858439202e-05, "loss": 12.5052, "step": 485 }, { "epoch": 0.026464642339343692, "grad_norm": 0.7608697999488, "learning_rate": 8.820326678765881e-05, "loss": 12.3988, "step": 486 }, { "epoch": 0.026519096335926706, "grad_norm": 0.9102688809761854, "learning_rate": 8.838475499092559e-05, "loss": 12.4724, "step": 487 }, { "epoch": 0.026573550332509716, "grad_norm": 0.7791000410634494, "learning_rate": 8.856624319419238e-05, "loss": 12.5351, "step": 488 }, { "epoch": 0.02662800432909273, "grad_norm": 0.7489844670021688, "learning_rate": 8.874773139745917e-05, "loss": 12.2126, "step": 489 }, { "epoch": 0.02668245832567574, "grad_norm": 0.8085753500287401, "learning_rate": 8.892921960072595e-05, "loss": 12.5421, "step": 490 }, { "epoch": 0.026736912322258753, "grad_norm": 0.9563889035257984, "learning_rate": 8.911070780399275e-05, "loss": 12.6741, "step": 491 }, { "epoch": 0.026791366318841763, "grad_norm": 0.7063448574074058, "learning_rate": 8.929219600725953e-05, "loss": 12.2971, "step": 492 }, { "epoch": 0.026845820315424776, "grad_norm": 0.7813369753167644, "learning_rate": 8.947368421052632e-05, "loss": 12.2513, "step": 493 }, { "epoch": 0.026900274312007786, "grad_norm": 0.8629014027216114, "learning_rate": 8.96551724137931e-05, "loss": 12.4148, "step": 494 }, { "epoch": 0.0269547283085908, "grad_norm": 0.7481663513315681, "learning_rate": 8.98366606170599e-05, "loss": 12.5463, "step": 495 }, { "epoch": 0.02700918230517381, "grad_norm": 0.8758771305014487, "learning_rate": 9.001814882032669e-05, "loss": 12.4045, "step": 496 }, { "epoch": 0.027063636301756823, "grad_norm": 0.7480406054883233, "learning_rate": 9.019963702359347e-05, "loss": 12.4557, "step": 497 }, { "epoch": 0.027118090298339833, "grad_norm": 0.8152346713462907, "learning_rate": 9.038112522686027e-05, "loss": 12.3973, "step": 498 }, { "epoch": 0.027172544294922846, "grad_norm": 0.8415389004370808, "learning_rate": 9.056261343012704e-05, "loss": 12.6063, "step": 499 }, { "epoch": 0.027226998291505856, "grad_norm": 0.8011366657793219, "learning_rate": 9.074410163339382e-05, "loss": 12.4955, "step": 500 }, { "epoch": 0.02728145228808887, "grad_norm": 0.7380805437115405, "learning_rate": 9.092558983666062e-05, "loss": 12.3469, "step": 501 }, { "epoch": 0.02733590628467188, "grad_norm": 0.7133968415768672, "learning_rate": 9.11070780399274e-05, "loss": 12.4881, "step": 502 }, { "epoch": 0.027390360281254893, "grad_norm": 0.8125970947519109, "learning_rate": 9.128856624319419e-05, "loss": 12.3151, "step": 503 }, { "epoch": 0.027444814277837903, "grad_norm": 0.7077249733189649, "learning_rate": 9.147005444646099e-05, "loss": 12.3738, "step": 504 }, { "epoch": 0.027499268274420917, "grad_norm": 0.7809924557333555, "learning_rate": 9.165154264972777e-05, "loss": 12.4415, "step": 505 }, { "epoch": 0.027553722271003927, "grad_norm": 0.7543662314141218, "learning_rate": 9.183303085299456e-05, "loss": 12.425, "step": 506 }, { "epoch": 0.02760817626758694, "grad_norm": 0.740968620199914, "learning_rate": 9.201451905626135e-05, "loss": 12.4001, "step": 507 }, { "epoch": 0.02766263026416995, "grad_norm": 0.715948782602618, "learning_rate": 9.219600725952814e-05, "loss": 12.4193, "step": 508 }, { "epoch": 0.027717084260752964, "grad_norm": 0.7555285381975604, "learning_rate": 9.237749546279492e-05, "loss": 12.3583, "step": 509 }, { "epoch": 0.027771538257335974, "grad_norm": 0.7926894903352032, "learning_rate": 9.255898366606171e-05, "loss": 12.5034, "step": 510 }, { "epoch": 0.027825992253918987, "grad_norm": 0.722277348946715, "learning_rate": 9.274047186932849e-05, "loss": 12.4026, "step": 511 }, { "epoch": 0.027880446250501997, "grad_norm": 0.8485310235578939, "learning_rate": 9.292196007259528e-05, "loss": 12.4843, "step": 512 }, { "epoch": 0.02793490024708501, "grad_norm": 0.8246964012075222, "learning_rate": 9.310344827586207e-05, "loss": 12.3967, "step": 513 }, { "epoch": 0.02798935424366802, "grad_norm": 0.8152543979538756, "learning_rate": 9.328493647912886e-05, "loss": 12.26, "step": 514 }, { "epoch": 0.028043808240251034, "grad_norm": 0.8555264952532589, "learning_rate": 9.346642468239564e-05, "loss": 12.3481, "step": 515 }, { "epoch": 0.028098262236834044, "grad_norm": 0.7301430730163417, "learning_rate": 9.364791288566244e-05, "loss": 12.3859, "step": 516 }, { "epoch": 0.028152716233417058, "grad_norm": 0.8502343380693529, "learning_rate": 9.382940108892923e-05, "loss": 12.3946, "step": 517 }, { "epoch": 0.028207170230000068, "grad_norm": 0.7616908996355757, "learning_rate": 9.401088929219601e-05, "loss": 12.4984, "step": 518 }, { "epoch": 0.02826162422658308, "grad_norm": 0.848187178061048, "learning_rate": 9.419237749546281e-05, "loss": 12.5009, "step": 519 }, { "epoch": 0.02831607822316609, "grad_norm": 0.7605162192554722, "learning_rate": 9.437386569872959e-05, "loss": 12.3757, "step": 520 }, { "epoch": 0.028370532219749105, "grad_norm": 0.6941160676897854, "learning_rate": 9.455535390199638e-05, "loss": 12.497, "step": 521 }, { "epoch": 0.028424986216332115, "grad_norm": 0.7856867296190392, "learning_rate": 9.473684210526316e-05, "loss": 12.4029, "step": 522 }, { "epoch": 0.028479440212915128, "grad_norm": 0.7048400049802264, "learning_rate": 9.491833030852995e-05, "loss": 12.4511, "step": 523 }, { "epoch": 0.028533894209498138, "grad_norm": 0.8013294724513513, "learning_rate": 9.509981851179673e-05, "loss": 12.4689, "step": 524 }, { "epoch": 0.02858834820608115, "grad_norm": 0.7878969559755142, "learning_rate": 9.528130671506353e-05, "loss": 12.5229, "step": 525 }, { "epoch": 0.02864280220266416, "grad_norm": 0.7987625743494264, "learning_rate": 9.546279491833031e-05, "loss": 12.3626, "step": 526 }, { "epoch": 0.028697256199247175, "grad_norm": 0.8079525903340861, "learning_rate": 9.56442831215971e-05, "loss": 12.4762, "step": 527 }, { "epoch": 0.028751710195830185, "grad_norm": 0.6640448779297942, "learning_rate": 9.58257713248639e-05, "loss": 12.3718, "step": 528 }, { "epoch": 0.0288061641924132, "grad_norm": 0.8399913617521838, "learning_rate": 9.600725952813068e-05, "loss": 12.342, "step": 529 }, { "epoch": 0.02886061818899621, "grad_norm": 0.8352947312719899, "learning_rate": 9.618874773139746e-05, "loss": 12.4117, "step": 530 }, { "epoch": 0.02891507218557922, "grad_norm": 0.7357612574343283, "learning_rate": 9.637023593466425e-05, "loss": 12.3374, "step": 531 }, { "epoch": 0.028969526182162232, "grad_norm": 0.8621291940802033, "learning_rate": 9.655172413793105e-05, "loss": 12.4102, "step": 532 }, { "epoch": 0.029023980178745242, "grad_norm": 0.7496563648684155, "learning_rate": 9.673321234119783e-05, "loss": 12.553, "step": 533 }, { "epoch": 0.029078434175328256, "grad_norm": 0.7178040502697846, "learning_rate": 9.691470054446462e-05, "loss": 12.3036, "step": 534 }, { "epoch": 0.029132888171911266, "grad_norm": 0.7748952398348568, "learning_rate": 9.70961887477314e-05, "loss": 12.493, "step": 535 }, { "epoch": 0.02918734216849428, "grad_norm": 0.7713180455385662, "learning_rate": 9.727767695099818e-05, "loss": 12.4102, "step": 536 }, { "epoch": 0.02924179616507729, "grad_norm": 0.7740687521356002, "learning_rate": 9.745916515426497e-05, "loss": 12.5748, "step": 537 }, { "epoch": 0.029296250161660303, "grad_norm": 0.7865595720640453, "learning_rate": 9.764065335753177e-05, "loss": 12.4367, "step": 538 }, { "epoch": 0.029350704158243313, "grad_norm": 0.7560847556377283, "learning_rate": 9.782214156079855e-05, "loss": 12.3583, "step": 539 }, { "epoch": 0.029405158154826326, "grad_norm": 0.7978359099601086, "learning_rate": 9.800362976406534e-05, "loss": 12.3999, "step": 540 }, { "epoch": 0.029459612151409336, "grad_norm": 0.7535509446044629, "learning_rate": 9.818511796733213e-05, "loss": 12.3052, "step": 541 }, { "epoch": 0.02951406614799235, "grad_norm": 0.7775521738235894, "learning_rate": 9.836660617059892e-05, "loss": 12.4467, "step": 542 }, { "epoch": 0.02956852014457536, "grad_norm": 0.7469699238521446, "learning_rate": 9.85480943738657e-05, "loss": 12.456, "step": 543 }, { "epoch": 0.029622974141158373, "grad_norm": 1.0563298749968926, "learning_rate": 9.87295825771325e-05, "loss": 12.4247, "step": 544 }, { "epoch": 0.029677428137741383, "grad_norm": 0.8361142060905258, "learning_rate": 9.891107078039928e-05, "loss": 12.5043, "step": 545 }, { "epoch": 0.029731882134324396, "grad_norm": 1.0096705276772675, "learning_rate": 9.909255898366606e-05, "loss": 12.3808, "step": 546 }, { "epoch": 0.029786336130907407, "grad_norm": 0.7864328089464631, "learning_rate": 9.927404718693285e-05, "loss": 12.538, "step": 547 }, { "epoch": 0.02984079012749042, "grad_norm": 0.8451585533599, "learning_rate": 9.945553539019964e-05, "loss": 12.5222, "step": 548 }, { "epoch": 0.02989524412407343, "grad_norm": 0.8653338116417337, "learning_rate": 9.963702359346642e-05, "loss": 12.4985, "step": 549 }, { "epoch": 0.029949698120656443, "grad_norm": 0.7662298878370504, "learning_rate": 9.981851179673322e-05, "loss": 12.4266, "step": 550 }, { "epoch": 0.030004152117239453, "grad_norm": 0.7970444594455892, "learning_rate": 0.0001, "loss": 12.3258, "step": 551 }, { "epoch": 0.030058606113822467, "grad_norm": 0.7349918099412, "learning_rate": 0.00010018148820326678, "loss": 12.4242, "step": 552 }, { "epoch": 0.030113060110405477, "grad_norm": 0.854292237492196, "learning_rate": 0.00010036297640653359, "loss": 12.3636, "step": 553 }, { "epoch": 0.03016751410698849, "grad_norm": 0.7395022781358609, "learning_rate": 0.00010054446460980036, "loss": 12.5779, "step": 554 }, { "epoch": 0.0302219681035715, "grad_norm": 0.768148807115022, "learning_rate": 0.00010072595281306716, "loss": 12.5132, "step": 555 }, { "epoch": 0.030276422100154514, "grad_norm": 0.8159993812941795, "learning_rate": 0.00010090744101633394, "loss": 12.4995, "step": 556 }, { "epoch": 0.030330876096737524, "grad_norm": 0.8351323851458299, "learning_rate": 0.00010108892921960074, "loss": 12.5193, "step": 557 }, { "epoch": 0.030385330093320537, "grad_norm": 0.8416946553162289, "learning_rate": 0.00010127041742286751, "loss": 12.5428, "step": 558 }, { "epoch": 0.030439784089903547, "grad_norm": 0.9752655054331537, "learning_rate": 0.00010145190562613431, "loss": 12.6036, "step": 559 }, { "epoch": 0.03049423808648656, "grad_norm": 0.7197724811301942, "learning_rate": 0.00010163339382940109, "loss": 12.4491, "step": 560 }, { "epoch": 0.03054869208306957, "grad_norm": 0.7503723774413532, "learning_rate": 0.00010181488203266789, "loss": 12.3547, "step": 561 }, { "epoch": 0.030603146079652584, "grad_norm": 0.8452417124749888, "learning_rate": 0.00010199637023593467, "loss": 12.5108, "step": 562 }, { "epoch": 0.030657600076235594, "grad_norm": 1.0895902134284718, "learning_rate": 0.00010217785843920144, "loss": 12.5596, "step": 563 }, { "epoch": 0.030712054072818608, "grad_norm": 0.7045960377212881, "learning_rate": 0.00010235934664246824, "loss": 12.5114, "step": 564 }, { "epoch": 0.030766508069401618, "grad_norm": 0.7817959879503318, "learning_rate": 0.00010254083484573503, "loss": 12.4439, "step": 565 }, { "epoch": 0.03082096206598463, "grad_norm": 0.7924652130755838, "learning_rate": 0.00010272232304900183, "loss": 12.5138, "step": 566 }, { "epoch": 0.03087541606256764, "grad_norm": 0.8559510622634047, "learning_rate": 0.0001029038112522686, "loss": 12.4195, "step": 567 }, { "epoch": 0.030929870059150655, "grad_norm": 0.7675767381139006, "learning_rate": 0.0001030852994555354, "loss": 12.4978, "step": 568 }, { "epoch": 0.030984324055733665, "grad_norm": 0.7741050473466831, "learning_rate": 0.00010326678765880218, "loss": 12.2737, "step": 569 }, { "epoch": 0.03103877805231668, "grad_norm": 0.7415387084328533, "learning_rate": 0.00010344827586206898, "loss": 12.4745, "step": 570 }, { "epoch": 0.03109323204889969, "grad_norm": 0.7841681248843312, "learning_rate": 0.00010362976406533576, "loss": 12.4871, "step": 571 }, { "epoch": 0.031147686045482702, "grad_norm": 0.7936691292245915, "learning_rate": 0.00010381125226860256, "loss": 12.652, "step": 572 }, { "epoch": 0.031202140042065712, "grad_norm": 0.8113102552857533, "learning_rate": 0.00010399274047186933, "loss": 12.5519, "step": 573 }, { "epoch": 0.03125659403864872, "grad_norm": 0.7347337505529872, "learning_rate": 0.00010417422867513613, "loss": 12.3806, "step": 574 }, { "epoch": 0.031311048035231735, "grad_norm": 0.746487006322028, "learning_rate": 0.00010435571687840291, "loss": 12.4832, "step": 575 }, { "epoch": 0.03136550203181475, "grad_norm": 0.8489077584446184, "learning_rate": 0.00010453720508166968, "loss": 12.2913, "step": 576 }, { "epoch": 0.03141995602839776, "grad_norm": 0.7185282981457465, "learning_rate": 0.00010471869328493648, "loss": 12.3301, "step": 577 }, { "epoch": 0.03147441002498077, "grad_norm": 0.8659215057338194, "learning_rate": 0.00010490018148820327, "loss": 12.601, "step": 578 }, { "epoch": 0.03152886402156378, "grad_norm": 0.76054791141889, "learning_rate": 0.00010508166969147006, "loss": 12.3515, "step": 579 }, { "epoch": 0.031583318018146796, "grad_norm": 0.8109048061455886, "learning_rate": 0.00010526315789473685, "loss": 12.5157, "step": 580 }, { "epoch": 0.03163777201472981, "grad_norm": 0.7397461059924253, "learning_rate": 0.00010544464609800365, "loss": 12.4316, "step": 581 }, { "epoch": 0.031692226011312816, "grad_norm": 0.7946491784910253, "learning_rate": 0.00010562613430127042, "loss": 12.3995, "step": 582 }, { "epoch": 0.03174668000789583, "grad_norm": 0.8084263112882792, "learning_rate": 0.00010580762250453721, "loss": 12.4758, "step": 583 }, { "epoch": 0.03180113400447884, "grad_norm": 0.7806289943327588, "learning_rate": 0.000105989110707804, "loss": 12.3552, "step": 584 }, { "epoch": 0.031855588001061856, "grad_norm": 0.782911514302089, "learning_rate": 0.0001061705989110708, "loss": 12.5056, "step": 585 }, { "epoch": 0.03191004199764486, "grad_norm": 0.8014304453370548, "learning_rate": 0.00010635208711433757, "loss": 12.5809, "step": 586 }, { "epoch": 0.031964495994227876, "grad_norm": 0.8775607468840907, "learning_rate": 0.00010653357531760435, "loss": 12.5721, "step": 587 }, { "epoch": 0.03201894999081089, "grad_norm": 0.7172825755544577, "learning_rate": 0.00010671506352087115, "loss": 12.4076, "step": 588 }, { "epoch": 0.0320734039873939, "grad_norm": 0.7180909578238679, "learning_rate": 0.00010689655172413792, "loss": 12.4168, "step": 589 }, { "epoch": 0.03212785798397691, "grad_norm": 0.8139633699732863, "learning_rate": 0.00010707803992740473, "loss": 12.4554, "step": 590 }, { "epoch": 0.03218231198055992, "grad_norm": 0.8201333378367557, "learning_rate": 0.0001072595281306715, "loss": 12.5802, "step": 591 }, { "epoch": 0.03223676597714294, "grad_norm": 0.7468982443082229, "learning_rate": 0.0001074410163339383, "loss": 12.4596, "step": 592 }, { "epoch": 0.03229121997372595, "grad_norm": 0.8776024210025977, "learning_rate": 0.00010762250453720509, "loss": 12.4242, "step": 593 }, { "epoch": 0.03234567397030896, "grad_norm": 0.7475469170008233, "learning_rate": 0.00010780399274047188, "loss": 12.2832, "step": 594 }, { "epoch": 0.03240012796689197, "grad_norm": 0.7498393845005602, "learning_rate": 0.00010798548094373865, "loss": 12.4668, "step": 595 }, { "epoch": 0.032454581963474984, "grad_norm": 0.8085189966961024, "learning_rate": 0.00010816696914700545, "loss": 12.5477, "step": 596 }, { "epoch": 0.03250903596005799, "grad_norm": 0.781876240427016, "learning_rate": 0.00010834845735027224, "loss": 12.3383, "step": 597 }, { "epoch": 0.032563489956641004, "grad_norm": 0.7894170574953352, "learning_rate": 0.00010852994555353901, "loss": 12.505, "step": 598 }, { "epoch": 0.03261794395322402, "grad_norm": 0.7576096050152189, "learning_rate": 0.00010871143375680582, "loss": 12.3167, "step": 599 }, { "epoch": 0.03267239794980703, "grad_norm": 0.8250448715677351, "learning_rate": 0.00010889292196007259, "loss": 12.4931, "step": 600 }, { "epoch": 0.03272685194639004, "grad_norm": 0.7191331124925939, "learning_rate": 0.00010907441016333939, "loss": 12.343, "step": 601 }, { "epoch": 0.03278130594297305, "grad_norm": 0.7096756844673696, "learning_rate": 0.00010925589836660617, "loss": 12.3706, "step": 602 }, { "epoch": 0.032835759939556064, "grad_norm": 0.8855413287507164, "learning_rate": 0.00010943738656987297, "loss": 12.5008, "step": 603 }, { "epoch": 0.03289021393613908, "grad_norm": 0.702816878198603, "learning_rate": 0.00010961887477313974, "loss": 12.4181, "step": 604 }, { "epoch": 0.032944667932722084, "grad_norm": 0.7511863541614481, "learning_rate": 0.00010980036297640654, "loss": 12.4619, "step": 605 }, { "epoch": 0.0329991219293051, "grad_norm": 0.7440166154853498, "learning_rate": 0.00010998185117967332, "loss": 12.5371, "step": 606 }, { "epoch": 0.03305357592588811, "grad_norm": 0.7329152168316372, "learning_rate": 0.00011016333938294012, "loss": 12.4742, "step": 607 }, { "epoch": 0.033108029922471124, "grad_norm": 0.736280532598944, "learning_rate": 0.0001103448275862069, "loss": 12.3711, "step": 608 }, { "epoch": 0.03316248391905413, "grad_norm": 0.7413393159584099, "learning_rate": 0.0001105263157894737, "loss": 12.4609, "step": 609 }, { "epoch": 0.033216937915637144, "grad_norm": 0.789987821650849, "learning_rate": 0.00011070780399274048, "loss": 12.5343, "step": 610 }, { "epoch": 0.03327139191222016, "grad_norm": 0.7557116687034697, "learning_rate": 0.00011088929219600726, "loss": 12.5283, "step": 611 }, { "epoch": 0.03332584590880317, "grad_norm": 0.7854327140332928, "learning_rate": 0.00011107078039927406, "loss": 12.4899, "step": 612 }, { "epoch": 0.03338029990538618, "grad_norm": 0.8594935168696665, "learning_rate": 0.00011125226860254083, "loss": 12.4901, "step": 613 }, { "epoch": 0.03343475390196919, "grad_norm": 0.880417761612294, "learning_rate": 0.00011143375680580763, "loss": 12.5552, "step": 614 }, { "epoch": 0.033489207898552205, "grad_norm": 0.7806942770954527, "learning_rate": 0.00011161524500907441, "loss": 12.6158, "step": 615 }, { "epoch": 0.03354366189513522, "grad_norm": 0.8841970711057281, "learning_rate": 0.00011179673321234121, "loss": 12.4989, "step": 616 }, { "epoch": 0.033598115891718225, "grad_norm": 0.7713157169726781, "learning_rate": 0.000111978221415608, "loss": 12.4892, "step": 617 }, { "epoch": 0.03365256988830124, "grad_norm": 0.8324243289561584, "learning_rate": 0.00011215970961887479, "loss": 12.4635, "step": 618 }, { "epoch": 0.03370702388488425, "grad_norm": 0.7807991754630468, "learning_rate": 0.00011234119782214156, "loss": 12.5345, "step": 619 }, { "epoch": 0.033761477881467265, "grad_norm": 0.7865945001071623, "learning_rate": 0.00011252268602540836, "loss": 12.3791, "step": 620 }, { "epoch": 0.03381593187805027, "grad_norm": 0.7872398487857712, "learning_rate": 0.00011270417422867514, "loss": 12.5145, "step": 621 }, { "epoch": 0.033870385874633285, "grad_norm": 0.7885667646810096, "learning_rate": 0.00011288566243194192, "loss": 12.5641, "step": 622 }, { "epoch": 0.0339248398712163, "grad_norm": 0.7741959357450423, "learning_rate": 0.00011306715063520871, "loss": 12.4703, "step": 623 }, { "epoch": 0.03397929386779931, "grad_norm": 0.7564638066325946, "learning_rate": 0.0001132486388384755, "loss": 12.3965, "step": 624 }, { "epoch": 0.03403374786438232, "grad_norm": 0.9519171311859516, "learning_rate": 0.0001134301270417423, "loss": 12.4289, "step": 625 }, { "epoch": 0.03408820186096533, "grad_norm": 0.8107340278897369, "learning_rate": 0.00011361161524500907, "loss": 12.3782, "step": 626 }, { "epoch": 0.034142655857548346, "grad_norm": 0.8481212877491462, "learning_rate": 0.00011379310344827588, "loss": 12.4214, "step": 627 }, { "epoch": 0.03419710985413136, "grad_norm": 0.8650098885796367, "learning_rate": 0.00011397459165154265, "loss": 12.4615, "step": 628 }, { "epoch": 0.034251563850714366, "grad_norm": 0.7457653007753933, "learning_rate": 0.00011415607985480945, "loss": 12.596, "step": 629 }, { "epoch": 0.03430601784729738, "grad_norm": 0.9104614202638347, "learning_rate": 0.00011433756805807623, "loss": 12.4861, "step": 630 }, { "epoch": 0.03436047184388039, "grad_norm": 0.7999052962726083, "learning_rate": 0.00011451905626134303, "loss": 12.4831, "step": 631 }, { "epoch": 0.034414925840463406, "grad_norm": 0.8776766817589621, "learning_rate": 0.0001147005444646098, "loss": 12.4767, "step": 632 }, { "epoch": 0.03446937983704641, "grad_norm": 0.757055231958654, "learning_rate": 0.0001148820326678766, "loss": 12.4682, "step": 633 }, { "epoch": 0.034523833833629426, "grad_norm": 0.7452271311596549, "learning_rate": 0.00011506352087114338, "loss": 12.4019, "step": 634 }, { "epoch": 0.03457828783021244, "grad_norm": 0.8914212821035306, "learning_rate": 0.00011524500907441015, "loss": 12.5484, "step": 635 }, { "epoch": 0.03463274182679545, "grad_norm": 0.8426837477394973, "learning_rate": 0.00011542649727767697, "loss": 12.6615, "step": 636 }, { "epoch": 0.03468719582337846, "grad_norm": 0.8084210678468613, "learning_rate": 0.00011560798548094374, "loss": 12.3847, "step": 637 }, { "epoch": 0.03474164981996147, "grad_norm": 0.8653995572984597, "learning_rate": 0.00011578947368421053, "loss": 12.5286, "step": 638 }, { "epoch": 0.03479610381654449, "grad_norm": 0.7890053357866484, "learning_rate": 0.00011597096188747732, "loss": 12.4752, "step": 639 }, { "epoch": 0.0348505578131275, "grad_norm": 0.8857773461034695, "learning_rate": 0.00011615245009074412, "loss": 12.537, "step": 640 }, { "epoch": 0.03490501180971051, "grad_norm": 0.9416668002877222, "learning_rate": 0.00011633393829401089, "loss": 12.5413, "step": 641 }, { "epoch": 0.03495946580629352, "grad_norm": 0.779793688730529, "learning_rate": 0.00011651542649727769, "loss": 12.5147, "step": 642 }, { "epoch": 0.035013919802876534, "grad_norm": 0.951732854095737, "learning_rate": 0.00011669691470054447, "loss": 12.684, "step": 643 }, { "epoch": 0.03506837379945955, "grad_norm": 0.7967477855943389, "learning_rate": 0.00011687840290381127, "loss": 12.5166, "step": 644 }, { "epoch": 0.035122827796042554, "grad_norm": 0.9520128117212568, "learning_rate": 0.00011705989110707805, "loss": 12.4273, "step": 645 }, { "epoch": 0.03517728179262557, "grad_norm": 0.7204273522718219, "learning_rate": 0.00011724137931034482, "loss": 12.452, "step": 646 }, { "epoch": 0.03523173578920858, "grad_norm": 0.8316091332522132, "learning_rate": 0.00011742286751361162, "loss": 12.5175, "step": 647 }, { "epoch": 0.035286189785791594, "grad_norm": 0.8106043550269486, "learning_rate": 0.0001176043557168784, "loss": 12.6276, "step": 648 }, { "epoch": 0.0353406437823746, "grad_norm": 0.907645004050805, "learning_rate": 0.0001177858439201452, "loss": 12.5116, "step": 649 }, { "epoch": 0.035395097778957614, "grad_norm": 0.8421133812031315, "learning_rate": 0.00011796733212341197, "loss": 12.5068, "step": 650 }, { "epoch": 0.03544955177554063, "grad_norm": 0.8263559879788932, "learning_rate": 0.00011814882032667877, "loss": 12.5064, "step": 651 }, { "epoch": 0.03550400577212364, "grad_norm": 0.8368348477203709, "learning_rate": 0.00011833030852994556, "loss": 12.6344, "step": 652 }, { "epoch": 0.03555845976870665, "grad_norm": 0.8435898015434021, "learning_rate": 0.00011851179673321235, "loss": 12.5091, "step": 653 }, { "epoch": 0.03561291376528966, "grad_norm": 0.8127838512211614, "learning_rate": 0.00011869328493647913, "loss": 12.5358, "step": 654 }, { "epoch": 0.035667367761872674, "grad_norm": 0.844221855096733, "learning_rate": 0.00011887477313974594, "loss": 12.5023, "step": 655 }, { "epoch": 0.03572182175845569, "grad_norm": 0.830609883523681, "learning_rate": 0.00011905626134301271, "loss": 12.5483, "step": 656 }, { "epoch": 0.035776275755038695, "grad_norm": 0.7751682709968774, "learning_rate": 0.00011923774954627949, "loss": 12.5475, "step": 657 }, { "epoch": 0.03583072975162171, "grad_norm": 0.7594651681846976, "learning_rate": 0.00011941923774954629, "loss": 12.4376, "step": 658 }, { "epoch": 0.03588518374820472, "grad_norm": 0.8105747332842143, "learning_rate": 0.00011960072595281306, "loss": 12.5585, "step": 659 }, { "epoch": 0.035939637744787735, "grad_norm": 0.7793088239529253, "learning_rate": 0.00011978221415607986, "loss": 12.5071, "step": 660 }, { "epoch": 0.03599409174137074, "grad_norm": 0.8091456420082649, "learning_rate": 0.00011996370235934664, "loss": 12.4204, "step": 661 }, { "epoch": 0.036048545737953755, "grad_norm": 0.7951004347492463, "learning_rate": 0.00012014519056261344, "loss": 12.4689, "step": 662 }, { "epoch": 0.03610299973453677, "grad_norm": 0.8203582718986129, "learning_rate": 0.00012032667876588021, "loss": 12.4161, "step": 663 }, { "epoch": 0.036157453731119775, "grad_norm": 0.9104629783176926, "learning_rate": 0.00012050816696914702, "loss": 12.4031, "step": 664 }, { "epoch": 0.03621190772770279, "grad_norm": 0.8353893787785487, "learning_rate": 0.0001206896551724138, "loss": 12.4805, "step": 665 }, { "epoch": 0.0362663617242858, "grad_norm": 0.8025972229636333, "learning_rate": 0.00012087114337568059, "loss": 12.5601, "step": 666 }, { "epoch": 0.036320815720868815, "grad_norm": 0.7563712264723594, "learning_rate": 0.00012105263157894738, "loss": 12.4652, "step": 667 }, { "epoch": 0.03637526971745182, "grad_norm": 0.8122372235020665, "learning_rate": 0.00012123411978221418, "loss": 12.4633, "step": 668 }, { "epoch": 0.036429723714034835, "grad_norm": 0.8880903012742153, "learning_rate": 0.00012141560798548095, "loss": 12.4917, "step": 669 }, { "epoch": 0.03648417771061785, "grad_norm": 0.7902079118875632, "learning_rate": 0.00012159709618874773, "loss": 12.5041, "step": 670 }, { "epoch": 0.03653863170720086, "grad_norm": 0.7919532434646256, "learning_rate": 0.00012177858439201453, "loss": 12.4096, "step": 671 }, { "epoch": 0.03659308570378387, "grad_norm": 0.786591506755876, "learning_rate": 0.0001219600725952813, "loss": 12.5606, "step": 672 }, { "epoch": 0.03664753970036688, "grad_norm": 0.8038365184195617, "learning_rate": 0.0001221415607985481, "loss": 12.4756, "step": 673 }, { "epoch": 0.036701993696949896, "grad_norm": 0.770757422573286, "learning_rate": 0.0001223230490018149, "loss": 12.4893, "step": 674 }, { "epoch": 0.03675644769353291, "grad_norm": 0.761612989544494, "learning_rate": 0.0001225045372050817, "loss": 12.3772, "step": 675 }, { "epoch": 0.036810901690115916, "grad_norm": 0.8670862349575074, "learning_rate": 0.00012268602540834846, "loss": 12.3902, "step": 676 }, { "epoch": 0.03686535568669893, "grad_norm": 0.7716718708781327, "learning_rate": 0.00012286751361161526, "loss": 12.5174, "step": 677 }, { "epoch": 0.03691980968328194, "grad_norm": 0.7626179805058609, "learning_rate": 0.00012304900181488203, "loss": 12.5441, "step": 678 }, { "epoch": 0.036974263679864956, "grad_norm": 0.8312158992798632, "learning_rate": 0.00012323049001814883, "loss": 12.3531, "step": 679 }, { "epoch": 0.03702871767644796, "grad_norm": 0.8564858249915114, "learning_rate": 0.0001234119782214156, "loss": 12.5319, "step": 680 }, { "epoch": 0.037083171673030976, "grad_norm": 0.8648959668269681, "learning_rate": 0.0001235934664246824, "loss": 12.5591, "step": 681 }, { "epoch": 0.03713762566961399, "grad_norm": 0.8916475653141226, "learning_rate": 0.0001237749546279492, "loss": 12.6703, "step": 682 }, { "epoch": 0.037192079666197, "grad_norm": 0.9084031257077833, "learning_rate": 0.00012395644283121597, "loss": 12.5349, "step": 683 }, { "epoch": 0.03724653366278001, "grad_norm": 0.8797221532380368, "learning_rate": 0.00012413793103448277, "loss": 12.4662, "step": 684 }, { "epoch": 0.03730098765936302, "grad_norm": 0.8043936491667408, "learning_rate": 0.00012431941923774954, "loss": 12.5472, "step": 685 }, { "epoch": 0.03735544165594604, "grad_norm": 0.817146472456262, "learning_rate": 0.00012450090744101634, "loss": 12.4383, "step": 686 }, { "epoch": 0.03740989565252905, "grad_norm": 0.8114469583343945, "learning_rate": 0.00012468239564428313, "loss": 12.5172, "step": 687 }, { "epoch": 0.03746434964911206, "grad_norm": 0.8326000918570917, "learning_rate": 0.00012486388384754993, "loss": 12.5178, "step": 688 }, { "epoch": 0.03751880364569507, "grad_norm": 0.7723478809577488, "learning_rate": 0.0001250453720508167, "loss": 12.5362, "step": 689 }, { "epoch": 0.037573257642278084, "grad_norm": 0.9410693950400154, "learning_rate": 0.0001252268602540835, "loss": 12.5571, "step": 690 }, { "epoch": 0.0376277116388611, "grad_norm": 0.7561947345119391, "learning_rate": 0.00012540834845735027, "loss": 12.4258, "step": 691 }, { "epoch": 0.037682165635444104, "grad_norm": 0.8418495240004382, "learning_rate": 0.00012558983666061704, "loss": 12.5777, "step": 692 }, { "epoch": 0.03773661963202712, "grad_norm": 0.8746311873777267, "learning_rate": 0.00012577132486388387, "loss": 12.5951, "step": 693 }, { "epoch": 0.03779107362861013, "grad_norm": 0.7625921236694155, "learning_rate": 0.00012595281306715064, "loss": 12.4046, "step": 694 }, { "epoch": 0.037845527625193144, "grad_norm": 0.734823565054105, "learning_rate": 0.00012613430127041744, "loss": 12.5139, "step": 695 }, { "epoch": 0.03789998162177615, "grad_norm": 0.7621770331854553, "learning_rate": 0.0001263157894736842, "loss": 12.6277, "step": 696 }, { "epoch": 0.037954435618359164, "grad_norm": 0.7166086175672672, "learning_rate": 0.000126497277676951, "loss": 12.4117, "step": 697 }, { "epoch": 0.03800888961494218, "grad_norm": 0.7971843612908569, "learning_rate": 0.00012667876588021778, "loss": 12.4814, "step": 698 }, { "epoch": 0.03806334361152519, "grad_norm": 0.7481586297499027, "learning_rate": 0.00012686025408348457, "loss": 12.4657, "step": 699 }, { "epoch": 0.0381177976081082, "grad_norm": 0.7143053642727076, "learning_rate": 0.00012704174228675137, "loss": 12.3673, "step": 700 }, { "epoch": 0.03817225160469121, "grad_norm": 0.8593540767983657, "learning_rate": 0.00012722323049001817, "loss": 12.5246, "step": 701 }, { "epoch": 0.038226705601274225, "grad_norm": 0.7833857321998503, "learning_rate": 0.00012740471869328494, "loss": 12.5754, "step": 702 }, { "epoch": 0.03828115959785724, "grad_norm": 0.7740278489930515, "learning_rate": 0.00012758620689655174, "loss": 12.6015, "step": 703 }, { "epoch": 0.038335613594440245, "grad_norm": 0.8410629541952023, "learning_rate": 0.0001277676950998185, "loss": 12.6456, "step": 704 }, { "epoch": 0.03839006759102326, "grad_norm": 0.8093859296985529, "learning_rate": 0.0001279491833030853, "loss": 12.4783, "step": 705 }, { "epoch": 0.03844452158760627, "grad_norm": 0.7619677944718481, "learning_rate": 0.0001281306715063521, "loss": 12.4919, "step": 706 }, { "epoch": 0.038498975584189285, "grad_norm": 0.7627275451844259, "learning_rate": 0.00012831215970961888, "loss": 12.4902, "step": 707 }, { "epoch": 0.03855342958077229, "grad_norm": 0.7874103603037563, "learning_rate": 0.00012849364791288567, "loss": 12.4671, "step": 708 }, { "epoch": 0.038607883577355305, "grad_norm": 0.8572838287744005, "learning_rate": 0.00012867513611615244, "loss": 12.5543, "step": 709 }, { "epoch": 0.03866233757393832, "grad_norm": 0.8810561414897864, "learning_rate": 0.00012885662431941924, "loss": 12.51, "step": 710 }, { "epoch": 0.03871679157052133, "grad_norm": 0.7541640787422855, "learning_rate": 0.00012903811252268604, "loss": 12.5667, "step": 711 }, { "epoch": 0.03877124556710434, "grad_norm": 0.773591066392057, "learning_rate": 0.00012921960072595284, "loss": 12.3739, "step": 712 }, { "epoch": 0.03882569956368735, "grad_norm": 0.7308375378339267, "learning_rate": 0.0001294010889292196, "loss": 12.5431, "step": 713 }, { "epoch": 0.038880153560270365, "grad_norm": 0.8362238065065555, "learning_rate": 0.0001295825771324864, "loss": 12.5286, "step": 714 }, { "epoch": 0.03893460755685338, "grad_norm": 0.8560896444701315, "learning_rate": 0.00012976406533575318, "loss": 12.6039, "step": 715 }, { "epoch": 0.038989061553436385, "grad_norm": 0.7718918195967964, "learning_rate": 0.00012994555353901995, "loss": 12.6776, "step": 716 }, { "epoch": 0.0390435155500194, "grad_norm": 0.8479818318382927, "learning_rate": 0.00013012704174228675, "loss": 12.4291, "step": 717 }, { "epoch": 0.03909796954660241, "grad_norm": 0.8189985279402068, "learning_rate": 0.00013030852994555355, "loss": 12.4984, "step": 718 }, { "epoch": 0.039152423543185426, "grad_norm": 0.8252447601960348, "learning_rate": 0.00013049001814882034, "loss": 12.5111, "step": 719 }, { "epoch": 0.03920687753976843, "grad_norm": 0.8136491572382893, "learning_rate": 0.00013067150635208711, "loss": 12.5477, "step": 720 }, { "epoch": 0.039261331536351446, "grad_norm": 0.741260225544626, "learning_rate": 0.0001308529945553539, "loss": 12.5397, "step": 721 }, { "epoch": 0.03931578553293446, "grad_norm": 1.0188705229204873, "learning_rate": 0.00013103448275862068, "loss": 12.5885, "step": 722 }, { "epoch": 0.03937023952951747, "grad_norm": 0.8344084817037877, "learning_rate": 0.00013121597096188748, "loss": 12.6934, "step": 723 }, { "epoch": 0.03942469352610048, "grad_norm": 0.8182160497160557, "learning_rate": 0.00013139745916515428, "loss": 12.5394, "step": 724 }, { "epoch": 0.03947914752268349, "grad_norm": 0.754364334548919, "learning_rate": 0.00013157894736842108, "loss": 12.5191, "step": 725 }, { "epoch": 0.039533601519266506, "grad_norm": 0.7740832117265269, "learning_rate": 0.00013176043557168785, "loss": 12.5467, "step": 726 }, { "epoch": 0.03958805551584952, "grad_norm": 0.8335032571621467, "learning_rate": 0.00013194192377495462, "loss": 12.4449, "step": 727 }, { "epoch": 0.039642509512432526, "grad_norm": 0.8051614632134781, "learning_rate": 0.00013212341197822142, "loss": 12.6346, "step": 728 }, { "epoch": 0.03969696350901554, "grad_norm": 0.7995481068509528, "learning_rate": 0.0001323049001814882, "loss": 12.6264, "step": 729 }, { "epoch": 0.03975141750559855, "grad_norm": 0.8623273622651532, "learning_rate": 0.000132486388384755, "loss": 12.5711, "step": 730 }, { "epoch": 0.03980587150218156, "grad_norm": 0.8437033455684732, "learning_rate": 0.00013266787658802178, "loss": 12.5684, "step": 731 }, { "epoch": 0.03986032549876457, "grad_norm": 0.9133141357437732, "learning_rate": 0.00013284936479128858, "loss": 12.602, "step": 732 }, { "epoch": 0.03991477949534759, "grad_norm": 0.8189575723503095, "learning_rate": 0.00013303085299455535, "loss": 12.4223, "step": 733 }, { "epoch": 0.0399692334919306, "grad_norm": 0.7768915209961624, "learning_rate": 0.00013321234119782215, "loss": 12.5371, "step": 734 }, { "epoch": 0.04002368748851361, "grad_norm": 0.8949770926241271, "learning_rate": 0.00013339382940108892, "loss": 12.5584, "step": 735 }, { "epoch": 0.04007814148509662, "grad_norm": 0.8251457768920586, "learning_rate": 0.00013357531760435572, "loss": 12.5211, "step": 736 }, { "epoch": 0.040132595481679634, "grad_norm": 1.057308901725991, "learning_rate": 0.00013375680580762252, "loss": 12.7114, "step": 737 }, { "epoch": 0.04018704947826265, "grad_norm": 0.7555355053742839, "learning_rate": 0.00013393829401088931, "loss": 12.4727, "step": 738 }, { "epoch": 0.040241503474845654, "grad_norm": 0.9506351004654765, "learning_rate": 0.00013411978221415609, "loss": 12.5368, "step": 739 }, { "epoch": 0.04029595747142867, "grad_norm": 0.8585509422496327, "learning_rate": 0.00013430127041742286, "loss": 12.59, "step": 740 }, { "epoch": 0.04035041146801168, "grad_norm": 0.8650805842588604, "learning_rate": 0.00013448275862068965, "loss": 12.4794, "step": 741 }, { "epoch": 0.040404865464594694, "grad_norm": 0.8002843337946893, "learning_rate": 0.00013466424682395645, "loss": 12.3748, "step": 742 }, { "epoch": 0.0404593194611777, "grad_norm": 0.7811881804163763, "learning_rate": 0.00013484573502722325, "loss": 12.5016, "step": 743 }, { "epoch": 0.040513773457760714, "grad_norm": 0.8033795374596253, "learning_rate": 0.00013502722323049002, "loss": 12.5511, "step": 744 }, { "epoch": 0.04056822745434373, "grad_norm": 0.7797716186956701, "learning_rate": 0.00013520871143375682, "loss": 12.6652, "step": 745 }, { "epoch": 0.04062268145092674, "grad_norm": 0.8900245517652725, "learning_rate": 0.0001353901996370236, "loss": 12.5599, "step": 746 }, { "epoch": 0.04067713544750975, "grad_norm": 0.942216054700293, "learning_rate": 0.0001355716878402904, "loss": 12.5546, "step": 747 }, { "epoch": 0.04073158944409276, "grad_norm": 1.0359805567146756, "learning_rate": 0.00013575317604355719, "loss": 12.5748, "step": 748 }, { "epoch": 0.040786043440675775, "grad_norm": 1.0075629890341031, "learning_rate": 0.00013593466424682398, "loss": 12.5473, "step": 749 }, { "epoch": 0.04084049743725879, "grad_norm": 0.7558468233619073, "learning_rate": 0.00013611615245009076, "loss": 12.4132, "step": 750 }, { "epoch": 0.040894951433841795, "grad_norm": 0.778620606134502, "learning_rate": 0.00013629764065335753, "loss": 12.5153, "step": 751 }, { "epoch": 0.04094940543042481, "grad_norm": 0.8510205117821609, "learning_rate": 0.00013647912885662432, "loss": 12.6411, "step": 752 }, { "epoch": 0.04100385942700782, "grad_norm": 0.9177016995670307, "learning_rate": 0.0001366606170598911, "loss": 12.5692, "step": 753 }, { "epoch": 0.041058313423590835, "grad_norm": 0.7745160379530536, "learning_rate": 0.0001368421052631579, "loss": 12.5341, "step": 754 }, { "epoch": 0.04111276742017384, "grad_norm": 0.8140311412247214, "learning_rate": 0.0001370235934664247, "loss": 12.5958, "step": 755 }, { "epoch": 0.041167221416756855, "grad_norm": 0.8551770393253671, "learning_rate": 0.0001372050816696915, "loss": 12.5419, "step": 756 }, { "epoch": 0.04122167541333987, "grad_norm": 0.7937937119864299, "learning_rate": 0.00013738656987295826, "loss": 12.5787, "step": 757 }, { "epoch": 0.04127612940992288, "grad_norm": 0.8020893372298973, "learning_rate": 0.00013756805807622506, "loss": 12.5617, "step": 758 }, { "epoch": 0.04133058340650589, "grad_norm": 0.903708509231395, "learning_rate": 0.00013774954627949183, "loss": 12.5472, "step": 759 }, { "epoch": 0.0413850374030889, "grad_norm": 0.8577396403644533, "learning_rate": 0.00013793103448275863, "loss": 12.5458, "step": 760 }, { "epoch": 0.041439491399671916, "grad_norm": 1.091639804694237, "learning_rate": 0.00013811252268602542, "loss": 12.5178, "step": 761 }, { "epoch": 0.04149394539625493, "grad_norm": 0.842781245898835, "learning_rate": 0.00013829401088929222, "loss": 12.662, "step": 762 }, { "epoch": 0.041548399392837936, "grad_norm": 0.9767804515104263, "learning_rate": 0.000138475499092559, "loss": 12.5239, "step": 763 }, { "epoch": 0.04160285338942095, "grad_norm": 0.8484900134788992, "learning_rate": 0.00013865698729582576, "loss": 12.5559, "step": 764 }, { "epoch": 0.04165730738600396, "grad_norm": 0.8415112219861786, "learning_rate": 0.00013883847549909256, "loss": 12.527, "step": 765 }, { "epoch": 0.041711761382586976, "grad_norm": 1.0104772032943936, "learning_rate": 0.00013901996370235933, "loss": 12.5081, "step": 766 }, { "epoch": 0.04176621537916998, "grad_norm": 0.7789779394160212, "learning_rate": 0.00013920145190562616, "loss": 12.5802, "step": 767 }, { "epoch": 0.041820669375752996, "grad_norm": 0.8828806284149939, "learning_rate": 0.00013938294010889293, "loss": 12.5638, "step": 768 }, { "epoch": 0.04187512337233601, "grad_norm": 0.9095358751899403, "learning_rate": 0.00013956442831215973, "loss": 12.6153, "step": 769 }, { "epoch": 0.04192957736891902, "grad_norm": 0.7915235702796843, "learning_rate": 0.0001397459165154265, "loss": 12.575, "step": 770 }, { "epoch": 0.04198403136550203, "grad_norm": 0.9294936050905224, "learning_rate": 0.0001399274047186933, "loss": 12.4846, "step": 771 }, { "epoch": 0.04203848536208504, "grad_norm": 0.749715657221197, "learning_rate": 0.00014010889292196007, "loss": 12.5267, "step": 772 }, { "epoch": 0.042092939358668056, "grad_norm": 0.8036323955706655, "learning_rate": 0.00014029038112522686, "loss": 12.602, "step": 773 }, { "epoch": 0.04214739335525107, "grad_norm": 0.8151381024538288, "learning_rate": 0.00014047186932849366, "loss": 12.5653, "step": 774 }, { "epoch": 0.042201847351834076, "grad_norm": 0.9971274493801566, "learning_rate": 0.00014065335753176043, "loss": 12.6751, "step": 775 }, { "epoch": 0.04225630134841709, "grad_norm": 0.8513572095537125, "learning_rate": 0.00014083484573502723, "loss": 12.3767, "step": 776 }, { "epoch": 0.0423107553450001, "grad_norm": 1.0023920639584978, "learning_rate": 0.000141016333938294, "loss": 12.7133, "step": 777 }, { "epoch": 0.04236520934158312, "grad_norm": 0.8628820897885917, "learning_rate": 0.0001411978221415608, "loss": 12.5328, "step": 778 }, { "epoch": 0.04241966333816612, "grad_norm": 0.7938920881523879, "learning_rate": 0.0001413793103448276, "loss": 12.3853, "step": 779 }, { "epoch": 0.04247411733474914, "grad_norm": 0.8646632555026025, "learning_rate": 0.0001415607985480944, "loss": 12.6662, "step": 780 }, { "epoch": 0.04252857133133215, "grad_norm": 0.8439048939595939, "learning_rate": 0.00014174228675136117, "loss": 12.3081, "step": 781 }, { "epoch": 0.042583025327915164, "grad_norm": 0.7599687354406947, "learning_rate": 0.00014192377495462796, "loss": 12.4467, "step": 782 }, { "epoch": 0.04263747932449817, "grad_norm": 0.8070920519030709, "learning_rate": 0.00014210526315789474, "loss": 12.531, "step": 783 }, { "epoch": 0.042691933321081184, "grad_norm": 0.873705703606423, "learning_rate": 0.00014228675136116153, "loss": 12.4829, "step": 784 }, { "epoch": 0.0427463873176642, "grad_norm": 0.8746134110818439, "learning_rate": 0.00014246823956442833, "loss": 12.5633, "step": 785 }, { "epoch": 0.04280084131424721, "grad_norm": 0.8003175609249429, "learning_rate": 0.0001426497277676951, "loss": 12.6093, "step": 786 }, { "epoch": 0.04285529531083022, "grad_norm": 0.9226227234251844, "learning_rate": 0.0001428312159709619, "loss": 12.7582, "step": 787 }, { "epoch": 0.04290974930741323, "grad_norm": 0.774081515279731, "learning_rate": 0.00014301270417422867, "loss": 12.4773, "step": 788 }, { "epoch": 0.042964203303996244, "grad_norm": 0.7770717492469534, "learning_rate": 0.00014319419237749547, "loss": 12.5703, "step": 789 }, { "epoch": 0.04301865730057926, "grad_norm": 0.8342543205146827, "learning_rate": 0.00014337568058076224, "loss": 12.596, "step": 790 }, { "epoch": 0.043073111297162264, "grad_norm": 0.8516389169196312, "learning_rate": 0.00014355716878402904, "loss": 12.5668, "step": 791 }, { "epoch": 0.04312756529374528, "grad_norm": 0.7896009308409137, "learning_rate": 0.00014373865698729584, "loss": 12.519, "step": 792 }, { "epoch": 0.04318201929032829, "grad_norm": 0.8052339672016775, "learning_rate": 0.00014392014519056263, "loss": 12.5517, "step": 793 }, { "epoch": 0.043236473286911305, "grad_norm": 0.7530572458315676, "learning_rate": 0.0001441016333938294, "loss": 12.541, "step": 794 }, { "epoch": 0.04329092728349431, "grad_norm": 0.8662017245842865, "learning_rate": 0.0001442831215970962, "loss": 12.5754, "step": 795 }, { "epoch": 0.043345381280077325, "grad_norm": 0.7645406815099872, "learning_rate": 0.00014446460980036297, "loss": 12.4066, "step": 796 }, { "epoch": 0.04339983527666034, "grad_norm": 0.8330667873972226, "learning_rate": 0.00014464609800362977, "loss": 12.4053, "step": 797 }, { "epoch": 0.043454289273243345, "grad_norm": 0.8205108353659064, "learning_rate": 0.00014482758620689657, "loss": 12.5986, "step": 798 }, { "epoch": 0.04350874326982636, "grad_norm": 0.8520375890357685, "learning_rate": 0.00014500907441016334, "loss": 12.5274, "step": 799 }, { "epoch": 0.04356319726640937, "grad_norm": 0.8080865183543499, "learning_rate": 0.00014519056261343014, "loss": 12.703, "step": 800 }, { "epoch": 0.043617651262992385, "grad_norm": 0.8028752755990547, "learning_rate": 0.0001453720508166969, "loss": 12.4813, "step": 801 }, { "epoch": 0.04367210525957539, "grad_norm": 0.7991778930135419, "learning_rate": 0.0001455535390199637, "loss": 12.4828, "step": 802 }, { "epoch": 0.043726559256158405, "grad_norm": 0.7638276877678696, "learning_rate": 0.00014573502722323048, "loss": 12.4735, "step": 803 }, { "epoch": 0.04378101325274142, "grad_norm": 0.818424687275988, "learning_rate": 0.0001459165154264973, "loss": 12.6986, "step": 804 }, { "epoch": 0.04383546724932443, "grad_norm": 1.082495586362405, "learning_rate": 0.00014609800362976407, "loss": 12.5908, "step": 805 }, { "epoch": 0.04388992124590744, "grad_norm": 1.325984013203311, "learning_rate": 0.00014627949183303087, "loss": 12.6857, "step": 806 }, { "epoch": 0.04394437524249045, "grad_norm": 0.8686683284879365, "learning_rate": 0.00014646098003629764, "loss": 12.4566, "step": 807 }, { "epoch": 0.043998829239073466, "grad_norm": 0.8177369162606946, "learning_rate": 0.00014664246823956444, "loss": 12.5558, "step": 808 }, { "epoch": 0.04405328323565648, "grad_norm": 0.7760195423893618, "learning_rate": 0.0001468239564428312, "loss": 12.4437, "step": 809 }, { "epoch": 0.044107737232239486, "grad_norm": 0.8298412976489291, "learning_rate": 0.000147005444646098, "loss": 12.4522, "step": 810 }, { "epoch": 0.0441621912288225, "grad_norm": 0.7607087893488296, "learning_rate": 0.0001471869328493648, "loss": 12.5662, "step": 811 }, { "epoch": 0.04421664522540551, "grad_norm": 0.8810991381778492, "learning_rate": 0.00014736842105263158, "loss": 12.6585, "step": 812 }, { "epoch": 0.044271099221988526, "grad_norm": 0.8024073570168333, "learning_rate": 0.00014754990925589838, "loss": 12.5763, "step": 813 }, { "epoch": 0.04432555321857153, "grad_norm": 0.7715357348992065, "learning_rate": 0.00014773139745916515, "loss": 12.6189, "step": 814 }, { "epoch": 0.044380007215154546, "grad_norm": 0.8995056229545437, "learning_rate": 0.00014791288566243195, "loss": 12.6572, "step": 815 }, { "epoch": 0.04443446121173756, "grad_norm": 0.8248284303171265, "learning_rate": 0.00014809437386569874, "loss": 12.5962, "step": 816 }, { "epoch": 0.04448891520832057, "grad_norm": 0.866524307728336, "learning_rate": 0.00014827586206896554, "loss": 12.6484, "step": 817 }, { "epoch": 0.04454336920490358, "grad_norm": 0.8879113215826534, "learning_rate": 0.0001484573502722323, "loss": 12.6412, "step": 818 }, { "epoch": 0.04459782320148659, "grad_norm": 0.8758200396877941, "learning_rate": 0.0001486388384754991, "loss": 12.4537, "step": 819 }, { "epoch": 0.044652277198069606, "grad_norm": 0.8436526664202744, "learning_rate": 0.00014882032667876588, "loss": 12.5899, "step": 820 }, { "epoch": 0.04470673119465262, "grad_norm": 0.8545141478541453, "learning_rate": 0.00014900181488203265, "loss": 12.3574, "step": 821 }, { "epoch": 0.044761185191235627, "grad_norm": 1.0267167465528357, "learning_rate": 0.00014918330308529948, "loss": 12.5645, "step": 822 }, { "epoch": 0.04481563918781864, "grad_norm": 0.784780075284951, "learning_rate": 0.00014936479128856625, "loss": 12.4824, "step": 823 }, { "epoch": 0.04487009318440165, "grad_norm": 0.9789745719784654, "learning_rate": 0.00014954627949183305, "loss": 12.8387, "step": 824 }, { "epoch": 0.04492454718098467, "grad_norm": 0.8341195518544883, "learning_rate": 0.00014972776769509982, "loss": 12.5543, "step": 825 }, { "epoch": 0.04497900117756767, "grad_norm": 0.869629747247344, "learning_rate": 0.00014990925589836661, "loss": 12.6488, "step": 826 }, { "epoch": 0.04503345517415069, "grad_norm": 0.826578908620228, "learning_rate": 0.00015009074410163339, "loss": 12.7158, "step": 827 }, { "epoch": 0.0450879091707337, "grad_norm": 0.8756522771620907, "learning_rate": 0.00015027223230490018, "loss": 12.6569, "step": 828 }, { "epoch": 0.045142363167316714, "grad_norm": 0.9101434033316181, "learning_rate": 0.00015045372050816698, "loss": 12.6332, "step": 829 }, { "epoch": 0.04519681716389972, "grad_norm": 0.8952132643866115, "learning_rate": 0.00015063520871143378, "loss": 12.5787, "step": 830 }, { "epoch": 0.045251271160482734, "grad_norm": 0.8011479463268877, "learning_rate": 0.00015081669691470055, "loss": 12.608, "step": 831 }, { "epoch": 0.04530572515706575, "grad_norm": 0.8206162307714958, "learning_rate": 0.00015099818511796735, "loss": 12.4505, "step": 832 }, { "epoch": 0.04536017915364876, "grad_norm": 0.832128042031696, "learning_rate": 0.00015117967332123412, "loss": 12.6066, "step": 833 }, { "epoch": 0.04541463315023177, "grad_norm": 0.8683483549303054, "learning_rate": 0.00015136116152450092, "loss": 12.606, "step": 834 }, { "epoch": 0.04546908714681478, "grad_norm": 0.8436085523266798, "learning_rate": 0.00015154264972776772, "loss": 12.6469, "step": 835 }, { "epoch": 0.045523541143397794, "grad_norm": 0.8528405540241153, "learning_rate": 0.00015172413793103449, "loss": 12.6791, "step": 836 }, { "epoch": 0.04557799513998081, "grad_norm": 0.8141272474203417, "learning_rate": 0.00015190562613430128, "loss": 12.6382, "step": 837 }, { "epoch": 0.045632449136563814, "grad_norm": 0.74720840520126, "learning_rate": 0.00015208711433756806, "loss": 12.5676, "step": 838 }, { "epoch": 0.04568690313314683, "grad_norm": 0.7927722643407407, "learning_rate": 0.00015226860254083485, "loss": 12.5094, "step": 839 }, { "epoch": 0.04574135712972984, "grad_norm": 0.8636739467475668, "learning_rate": 0.00015245009074410162, "loss": 12.6322, "step": 840 }, { "epoch": 0.045795811126312855, "grad_norm": 0.7951696899986485, "learning_rate": 0.00015263157894736845, "loss": 12.5994, "step": 841 }, { "epoch": 0.04585026512289586, "grad_norm": 0.8781451899122102, "learning_rate": 0.00015281306715063522, "loss": 12.7485, "step": 842 }, { "epoch": 0.045904719119478875, "grad_norm": 0.7856930125310769, "learning_rate": 0.00015299455535390202, "loss": 12.6924, "step": 843 }, { "epoch": 0.04595917311606189, "grad_norm": 0.8669106113156154, "learning_rate": 0.0001531760435571688, "loss": 12.756, "step": 844 }, { "epoch": 0.0460136271126449, "grad_norm": 0.7645949228985675, "learning_rate": 0.00015335753176043556, "loss": 12.5389, "step": 845 }, { "epoch": 0.04606808110922791, "grad_norm": 0.8798600529095105, "learning_rate": 0.00015353901996370236, "loss": 12.728, "step": 846 }, { "epoch": 0.04612253510581092, "grad_norm": 0.741923863402535, "learning_rate": 0.00015372050816696916, "loss": 12.5727, "step": 847 }, { "epoch": 0.046176989102393935, "grad_norm": 0.8260769631339705, "learning_rate": 0.00015390199637023595, "loss": 12.6141, "step": 848 }, { "epoch": 0.04623144309897695, "grad_norm": 0.8194036460957305, "learning_rate": 0.00015408348457350272, "loss": 12.5255, "step": 849 }, { "epoch": 0.046285897095559955, "grad_norm": 0.8691842016409564, "learning_rate": 0.00015426497277676952, "loss": 12.665, "step": 850 }, { "epoch": 0.04634035109214297, "grad_norm": 0.8265824746983291, "learning_rate": 0.0001544464609800363, "loss": 12.6078, "step": 851 }, { "epoch": 0.04639480508872598, "grad_norm": 0.7402054058609698, "learning_rate": 0.0001546279491833031, "loss": 12.6317, "step": 852 }, { "epoch": 0.046449259085308996, "grad_norm": 0.8385811912548654, "learning_rate": 0.0001548094373865699, "loss": 12.5836, "step": 853 }, { "epoch": 0.046503713081892, "grad_norm": 0.8237170640137944, "learning_rate": 0.0001549909255898367, "loss": 12.6291, "step": 854 }, { "epoch": 0.046558167078475016, "grad_norm": 0.823899899114155, "learning_rate": 0.00015517241379310346, "loss": 12.4743, "step": 855 }, { "epoch": 0.04661262107505803, "grad_norm": 0.7988258901543164, "learning_rate": 0.00015535390199637023, "loss": 12.4996, "step": 856 }, { "epoch": 0.04666707507164104, "grad_norm": 0.809244770017492, "learning_rate": 0.00015553539019963703, "loss": 12.575, "step": 857 }, { "epoch": 0.04672152906822405, "grad_norm": 0.7959350249409557, "learning_rate": 0.0001557168784029038, "loss": 12.5737, "step": 858 }, { "epoch": 0.04677598306480706, "grad_norm": 1.0155065239576782, "learning_rate": 0.00015589836660617062, "loss": 12.55, "step": 859 }, { "epoch": 0.046830437061390076, "grad_norm": 0.9194464079420726, "learning_rate": 0.0001560798548094374, "loss": 12.551, "step": 860 }, { "epoch": 0.04688489105797308, "grad_norm": 0.9893867712993335, "learning_rate": 0.0001562613430127042, "loss": 12.5736, "step": 861 }, { "epoch": 0.046939345054556096, "grad_norm": 0.862259978923311, "learning_rate": 0.00015644283121597096, "loss": 12.5943, "step": 862 }, { "epoch": 0.04699379905113911, "grad_norm": 0.7750438451575176, "learning_rate": 0.00015662431941923776, "loss": 12.5714, "step": 863 }, { "epoch": 0.04704825304772212, "grad_norm": 0.8220468097414089, "learning_rate": 0.00015680580762250453, "loss": 12.6296, "step": 864 }, { "epoch": 0.04710270704430513, "grad_norm": 0.8713481011755143, "learning_rate": 0.00015698729582577133, "loss": 12.5893, "step": 865 }, { "epoch": 0.04715716104088814, "grad_norm": 0.9801443084095905, "learning_rate": 0.00015716878402903813, "loss": 12.7519, "step": 866 }, { "epoch": 0.04721161503747116, "grad_norm": 0.9026000187524962, "learning_rate": 0.00015735027223230493, "loss": 12.7312, "step": 867 }, { "epoch": 0.04726606903405417, "grad_norm": 0.895500696590104, "learning_rate": 0.0001575317604355717, "loss": 12.5189, "step": 868 }, { "epoch": 0.04732052303063718, "grad_norm": 0.8867758320775264, "learning_rate": 0.00015771324863883847, "loss": 12.5417, "step": 869 }, { "epoch": 0.04737497702722019, "grad_norm": 0.8124655980552189, "learning_rate": 0.00015789473684210527, "loss": 12.6201, "step": 870 }, { "epoch": 0.047429431023803204, "grad_norm": 0.9722011505333534, "learning_rate": 0.00015807622504537206, "loss": 12.5834, "step": 871 }, { "epoch": 0.04748388502038622, "grad_norm": 0.982984567178049, "learning_rate": 0.00015825771324863886, "loss": 12.6461, "step": 872 }, { "epoch": 0.047538339016969224, "grad_norm": 0.8381062655513668, "learning_rate": 0.00015843920145190563, "loss": 12.5809, "step": 873 }, { "epoch": 0.04759279301355224, "grad_norm": 0.9650603897304773, "learning_rate": 0.00015862068965517243, "loss": 12.7033, "step": 874 }, { "epoch": 0.04764724701013525, "grad_norm": 0.8854713870148178, "learning_rate": 0.0001588021778584392, "loss": 12.5962, "step": 875 }, { "epoch": 0.047701701006718264, "grad_norm": 0.7761059432684236, "learning_rate": 0.000158983666061706, "loss": 12.6441, "step": 876 }, { "epoch": 0.04775615500330127, "grad_norm": 0.8718350296220381, "learning_rate": 0.00015916515426497277, "loss": 12.6047, "step": 877 }, { "epoch": 0.047810608999884284, "grad_norm": 0.82164764356862, "learning_rate": 0.0001593466424682396, "loss": 12.5409, "step": 878 }, { "epoch": 0.0478650629964673, "grad_norm": 0.8196215669312907, "learning_rate": 0.00015952813067150637, "loss": 12.5776, "step": 879 }, { "epoch": 0.04791951699305031, "grad_norm": 1.014476253091219, "learning_rate": 0.00015970961887477314, "loss": 12.6802, "step": 880 }, { "epoch": 0.04797397098963332, "grad_norm": 0.8195727306565402, "learning_rate": 0.00015989110707803993, "loss": 12.5736, "step": 881 }, { "epoch": 0.04802842498621633, "grad_norm": 0.862536103470985, "learning_rate": 0.0001600725952813067, "loss": 12.4775, "step": 882 }, { "epoch": 0.048082878982799344, "grad_norm": 0.7498353482743876, "learning_rate": 0.0001602540834845735, "loss": 12.6313, "step": 883 }, { "epoch": 0.04813733297938236, "grad_norm": 0.8062614637177298, "learning_rate": 0.0001604355716878403, "loss": 12.4909, "step": 884 }, { "epoch": 0.048191786975965364, "grad_norm": 0.8415146193366887, "learning_rate": 0.0001606170598911071, "loss": 12.6701, "step": 885 }, { "epoch": 0.04824624097254838, "grad_norm": 0.8429914823102769, "learning_rate": 0.00016079854809437387, "loss": 12.6284, "step": 886 }, { "epoch": 0.04830069496913139, "grad_norm": 0.8222857362939741, "learning_rate": 0.00016098003629764067, "loss": 12.5522, "step": 887 }, { "epoch": 0.048355148965714405, "grad_norm": 0.7890962523234009, "learning_rate": 0.00016116152450090744, "loss": 12.5168, "step": 888 }, { "epoch": 0.04840960296229741, "grad_norm": 0.8912584208255049, "learning_rate": 0.00016134301270417424, "loss": 12.6498, "step": 889 }, { "epoch": 0.048464056958880425, "grad_norm": 0.9530476347867869, "learning_rate": 0.00016152450090744103, "loss": 12.6872, "step": 890 }, { "epoch": 0.04851851095546344, "grad_norm": 0.7724850359717667, "learning_rate": 0.0001617059891107078, "loss": 12.5858, "step": 891 }, { "epoch": 0.04857296495204645, "grad_norm": 1.0184330473902963, "learning_rate": 0.0001618874773139746, "loss": 12.6251, "step": 892 }, { "epoch": 0.04862741894862946, "grad_norm": 0.9254790514667174, "learning_rate": 0.00016206896551724137, "loss": 12.6039, "step": 893 }, { "epoch": 0.04868187294521247, "grad_norm": 0.758029348988625, "learning_rate": 0.00016225045372050817, "loss": 12.4594, "step": 894 }, { "epoch": 0.048736326941795485, "grad_norm": 0.7274927678606498, "learning_rate": 0.00016243194192377494, "loss": 12.5977, "step": 895 }, { "epoch": 0.0487907809383785, "grad_norm": 0.8384075646167376, "learning_rate": 0.00016261343012704177, "loss": 12.6449, "step": 896 }, { "epoch": 0.048845234934961505, "grad_norm": 0.7486479322661992, "learning_rate": 0.00016279491833030854, "loss": 12.6152, "step": 897 }, { "epoch": 0.04889968893154452, "grad_norm": 0.9106636506573582, "learning_rate": 0.00016297640653357534, "loss": 12.6102, "step": 898 }, { "epoch": 0.04895414292812753, "grad_norm": 0.8113137306900519, "learning_rate": 0.0001631578947368421, "loss": 12.5951, "step": 899 }, { "epoch": 0.049008596924710546, "grad_norm": 0.8959399175330679, "learning_rate": 0.0001633393829401089, "loss": 12.7228, "step": 900 }, { "epoch": 0.04906305092129355, "grad_norm": 1.0275683355568288, "learning_rate": 0.00016352087114337568, "loss": 12.7082, "step": 901 }, { "epoch": 0.049117504917876566, "grad_norm": 0.889496269768351, "learning_rate": 0.00016370235934664247, "loss": 12.6843, "step": 902 }, { "epoch": 0.04917195891445958, "grad_norm": 0.7581300503636256, "learning_rate": 0.00016388384754990927, "loss": 12.5467, "step": 903 }, { "epoch": 0.04922641291104259, "grad_norm": 0.8974381502103214, "learning_rate": 0.00016406533575317604, "loss": 12.6412, "step": 904 }, { "epoch": 0.0492808669076256, "grad_norm": 0.9315736025199278, "learning_rate": 0.00016424682395644284, "loss": 12.6383, "step": 905 }, { "epoch": 0.04933532090420861, "grad_norm": 0.8746410644890605, "learning_rate": 0.0001644283121597096, "loss": 12.689, "step": 906 }, { "epoch": 0.049389774900791626, "grad_norm": 0.8032178195748642, "learning_rate": 0.0001646098003629764, "loss": 12.5981, "step": 907 }, { "epoch": 0.04944422889737464, "grad_norm": 1.055609092234511, "learning_rate": 0.0001647912885662432, "loss": 12.6674, "step": 908 }, { "epoch": 0.049498682893957646, "grad_norm": 0.9329468352906664, "learning_rate": 0.00016497277676951, "loss": 12.6298, "step": 909 }, { "epoch": 0.04955313689054066, "grad_norm": 0.9100919688447834, "learning_rate": 0.00016515426497277678, "loss": 12.6856, "step": 910 }, { "epoch": 0.04960759088712367, "grad_norm": 0.8157553859700658, "learning_rate": 0.00016533575317604358, "loss": 12.6711, "step": 911 }, { "epoch": 0.04966204488370669, "grad_norm": 0.8407602222280705, "learning_rate": 0.00016551724137931035, "loss": 12.5952, "step": 912 }, { "epoch": 0.04971649888028969, "grad_norm": 0.8536838879477414, "learning_rate": 0.00016569872958257714, "loss": 12.4985, "step": 913 }, { "epoch": 0.04977095287687271, "grad_norm": 0.818610133507912, "learning_rate": 0.00016588021778584392, "loss": 12.5662, "step": 914 }, { "epoch": 0.04982540687345572, "grad_norm": 0.8852001994986728, "learning_rate": 0.0001660617059891107, "loss": 12.5936, "step": 915 }, { "epoch": 0.049879860870038734, "grad_norm": 0.9487098832759733, "learning_rate": 0.0001662431941923775, "loss": 12.7971, "step": 916 }, { "epoch": 0.04993431486662174, "grad_norm": 0.872066612504273, "learning_rate": 0.00016642468239564428, "loss": 12.7543, "step": 917 }, { "epoch": 0.049988768863204754, "grad_norm": 0.7908053266666842, "learning_rate": 0.00016660617059891108, "loss": 12.6731, "step": 918 }, { "epoch": 0.05004322285978777, "grad_norm": 0.8909361958279155, "learning_rate": 0.00016678765880217785, "loss": 12.6195, "step": 919 }, { "epoch": 0.05009767685637078, "grad_norm": 0.8839930655370404, "learning_rate": 0.00016696914700544465, "loss": 12.6823, "step": 920 }, { "epoch": 0.05015213085295379, "grad_norm": 0.8593282266531492, "learning_rate": 0.00016715063520871145, "loss": 12.5395, "step": 921 }, { "epoch": 0.0502065848495368, "grad_norm": 0.7522025735405317, "learning_rate": 0.00016733212341197824, "loss": 12.5567, "step": 922 }, { "epoch": 0.050261038846119814, "grad_norm": 1.0190661738060314, "learning_rate": 0.00016751361161524502, "loss": 12.6318, "step": 923 }, { "epoch": 0.05031549284270283, "grad_norm": 0.7614223532031728, "learning_rate": 0.0001676950998185118, "loss": 12.6078, "step": 924 }, { "epoch": 0.050369946839285834, "grad_norm": 0.9321891712221156, "learning_rate": 0.00016787658802177858, "loss": 12.6678, "step": 925 }, { "epoch": 0.05042440083586885, "grad_norm": 0.8467437632932115, "learning_rate": 0.00016805807622504538, "loss": 12.6101, "step": 926 }, { "epoch": 0.05047885483245186, "grad_norm": 1.0609254909084949, "learning_rate": 0.00016823956442831218, "loss": 12.7275, "step": 927 }, { "epoch": 0.05053330882903487, "grad_norm": 0.9175270724116192, "learning_rate": 0.00016842105263157895, "loss": 12.7194, "step": 928 }, { "epoch": 0.05058776282561788, "grad_norm": 0.9166307896802638, "learning_rate": 0.00016860254083484575, "loss": 12.6248, "step": 929 }, { "epoch": 0.050642216822200894, "grad_norm": 0.8315736647865203, "learning_rate": 0.00016878402903811252, "loss": 12.6652, "step": 930 }, { "epoch": 0.05069667081878391, "grad_norm": 0.779456315691902, "learning_rate": 0.00016896551724137932, "loss": 12.6193, "step": 931 }, { "epoch": 0.050751124815366915, "grad_norm": 0.9332276655363869, "learning_rate": 0.0001691470054446461, "loss": 12.4871, "step": 932 }, { "epoch": 0.05080557881194993, "grad_norm": 0.7483055003595003, "learning_rate": 0.0001693284936479129, "loss": 12.3438, "step": 933 }, { "epoch": 0.05086003280853294, "grad_norm": 1.043133034199254, "learning_rate": 0.00016950998185117968, "loss": 12.5724, "step": 934 }, { "epoch": 0.050914486805115955, "grad_norm": 0.9885656883730269, "learning_rate": 0.00016969147005444648, "loss": 12.5337, "step": 935 }, { "epoch": 0.05096894080169896, "grad_norm": 0.8284948943642783, "learning_rate": 0.00016987295825771325, "loss": 12.7829, "step": 936 }, { "epoch": 0.051023394798281975, "grad_norm": 0.8429753478015432, "learning_rate": 0.00017005444646098005, "loss": 12.671, "step": 937 }, { "epoch": 0.05107784879486499, "grad_norm": 0.8150872100250276, "learning_rate": 0.00017023593466424682, "loss": 12.5752, "step": 938 }, { "epoch": 0.051132302791448, "grad_norm": 0.975085573256272, "learning_rate": 0.00017041742286751362, "loss": 12.7308, "step": 939 }, { "epoch": 0.05118675678803101, "grad_norm": 0.8226356283220444, "learning_rate": 0.00017059891107078042, "loss": 12.7762, "step": 940 }, { "epoch": 0.05124121078461402, "grad_norm": 0.9192672341554673, "learning_rate": 0.0001707803992740472, "loss": 12.6354, "step": 941 }, { "epoch": 0.051295664781197035, "grad_norm": 0.8457327315713598, "learning_rate": 0.000170961887477314, "loss": 12.665, "step": 942 }, { "epoch": 0.05135011877778005, "grad_norm": 0.8549173394249183, "learning_rate": 0.00017114337568058076, "loss": 12.546, "step": 943 }, { "epoch": 0.051404572774363055, "grad_norm": 0.9118763020226056, "learning_rate": 0.00017132486388384756, "loss": 12.6458, "step": 944 }, { "epoch": 0.05145902677094607, "grad_norm": 0.8463997651394616, "learning_rate": 0.00017150635208711435, "loss": 12.7347, "step": 945 }, { "epoch": 0.05151348076752908, "grad_norm": 0.8627001066806589, "learning_rate": 0.00017168784029038115, "loss": 12.5186, "step": 946 }, { "epoch": 0.051567934764112096, "grad_norm": 0.8447051949506521, "learning_rate": 0.00017186932849364792, "loss": 12.6107, "step": 947 }, { "epoch": 0.0516223887606951, "grad_norm": 0.8246470203738206, "learning_rate": 0.00017205081669691472, "loss": 12.6495, "step": 948 }, { "epoch": 0.051676842757278116, "grad_norm": 0.8653752959166282, "learning_rate": 0.0001722323049001815, "loss": 12.5561, "step": 949 }, { "epoch": 0.05173129675386113, "grad_norm": 0.8007262251384583, "learning_rate": 0.00017241379310344826, "loss": 12.6482, "step": 950 }, { "epoch": 0.05178575075044414, "grad_norm": 0.8102077511779113, "learning_rate": 0.00017259528130671506, "loss": 12.6646, "step": 951 }, { "epoch": 0.05184020474702715, "grad_norm": 0.904787409745801, "learning_rate": 0.00017277676950998186, "loss": 12.613, "step": 952 }, { "epoch": 0.05189465874361016, "grad_norm": 0.8622064273999556, "learning_rate": 0.00017295825771324866, "loss": 12.6379, "step": 953 }, { "epoch": 0.051949112740193176, "grad_norm": 0.9227253225257225, "learning_rate": 0.00017313974591651543, "loss": 12.4684, "step": 954 }, { "epoch": 0.05200356673677619, "grad_norm": 0.9007687103506802, "learning_rate": 0.00017332123411978223, "loss": 12.6496, "step": 955 }, { "epoch": 0.052058020733359196, "grad_norm": 0.9285057014615055, "learning_rate": 0.000173502722323049, "loss": 12.5537, "step": 956 }, { "epoch": 0.05211247472994221, "grad_norm": 0.789205719752802, "learning_rate": 0.0001736842105263158, "loss": 12.5936, "step": 957 }, { "epoch": 0.05216692872652522, "grad_norm": 0.8941984741380373, "learning_rate": 0.0001738656987295826, "loss": 12.5961, "step": 958 }, { "epoch": 0.05222138272310824, "grad_norm": 0.9388036268594567, "learning_rate": 0.0001740471869328494, "loss": 12.609, "step": 959 }, { "epoch": 0.05227583671969124, "grad_norm": 0.9588542625267311, "learning_rate": 0.00017422867513611616, "loss": 12.662, "step": 960 }, { "epoch": 0.05233029071627426, "grad_norm": 0.9761067195333801, "learning_rate": 0.00017441016333938296, "loss": 12.7561, "step": 961 }, { "epoch": 0.05238474471285727, "grad_norm": 0.7943628439855379, "learning_rate": 0.00017459165154264973, "loss": 12.661, "step": 962 }, { "epoch": 0.052439198709440284, "grad_norm": 0.7609648264074853, "learning_rate": 0.0001747731397459165, "loss": 12.6289, "step": 963 }, { "epoch": 0.05249365270602329, "grad_norm": 0.9609887685299381, "learning_rate": 0.00017495462794918333, "loss": 12.7005, "step": 964 }, { "epoch": 0.052548106702606304, "grad_norm": 0.7698262991659199, "learning_rate": 0.0001751361161524501, "loss": 12.5711, "step": 965 }, { "epoch": 0.05260256069918932, "grad_norm": 0.8387736314013369, "learning_rate": 0.0001753176043557169, "loss": 12.6618, "step": 966 }, { "epoch": 0.05265701469577233, "grad_norm": 0.8919145099367959, "learning_rate": 0.00017549909255898367, "loss": 12.6663, "step": 967 }, { "epoch": 0.05271146869235534, "grad_norm": 0.9217171184647402, "learning_rate": 0.00017568058076225046, "loss": 12.6593, "step": 968 }, { "epoch": 0.05276592268893835, "grad_norm": 0.8225240714439287, "learning_rate": 0.00017586206896551723, "loss": 12.7085, "step": 969 }, { "epoch": 0.052820376685521364, "grad_norm": 0.8413886223855485, "learning_rate": 0.00017604355716878403, "loss": 12.5573, "step": 970 }, { "epoch": 0.05287483068210438, "grad_norm": 0.7944122346053981, "learning_rate": 0.00017622504537205083, "loss": 12.649, "step": 971 }, { "epoch": 0.052929284678687384, "grad_norm": 0.9027497161747975, "learning_rate": 0.00017640653357531763, "loss": 12.6321, "step": 972 }, { "epoch": 0.0529837386752704, "grad_norm": 0.7864322574182031, "learning_rate": 0.0001765880217785844, "loss": 12.7023, "step": 973 }, { "epoch": 0.05303819267185341, "grad_norm": 0.8749044448504391, "learning_rate": 0.00017676950998185117, "loss": 12.6465, "step": 974 }, { "epoch": 0.053092646668436425, "grad_norm": 0.733046163574283, "learning_rate": 0.00017695099818511797, "loss": 12.5625, "step": 975 }, { "epoch": 0.05314710066501943, "grad_norm": 0.850086651338118, "learning_rate": 0.00017713248638838477, "loss": 12.6172, "step": 976 }, { "epoch": 0.053201554661602445, "grad_norm": 0.9315575438714128, "learning_rate": 0.00017731397459165156, "loss": 12.7023, "step": 977 }, { "epoch": 0.05325600865818546, "grad_norm": 0.8522769717057842, "learning_rate": 0.00017749546279491833, "loss": 12.7316, "step": 978 }, { "epoch": 0.05331046265476847, "grad_norm": 0.7952996316417338, "learning_rate": 0.00017767695099818513, "loss": 12.5978, "step": 979 }, { "epoch": 0.05336491665135148, "grad_norm": 0.8405135630987488, "learning_rate": 0.0001778584392014519, "loss": 12.8137, "step": 980 }, { "epoch": 0.05341937064793449, "grad_norm": 0.7651645706970682, "learning_rate": 0.0001780399274047187, "loss": 12.5507, "step": 981 }, { "epoch": 0.053473824644517505, "grad_norm": 0.8459505872252346, "learning_rate": 0.0001782214156079855, "loss": 12.5907, "step": 982 }, { "epoch": 0.05352827864110052, "grad_norm": 0.7574463269859852, "learning_rate": 0.0001784029038112523, "loss": 12.4691, "step": 983 }, { "epoch": 0.053582732637683525, "grad_norm": 0.8922042819317508, "learning_rate": 0.00017858439201451907, "loss": 12.7275, "step": 984 }, { "epoch": 0.05363718663426654, "grad_norm": 0.7755357628949647, "learning_rate": 0.00017876588021778584, "loss": 12.6564, "step": 985 }, { "epoch": 0.05369164063084955, "grad_norm": 0.888720018295191, "learning_rate": 0.00017894736842105264, "loss": 12.4645, "step": 986 }, { "epoch": 0.053746094627432565, "grad_norm": 0.8258444684364085, "learning_rate": 0.0001791288566243194, "loss": 12.7308, "step": 987 }, { "epoch": 0.05380054862401557, "grad_norm": 0.8517525708787187, "learning_rate": 0.0001793103448275862, "loss": 12.6665, "step": 988 }, { "epoch": 0.053855002620598585, "grad_norm": 0.9308572420303274, "learning_rate": 0.000179491833030853, "loss": 12.6683, "step": 989 }, { "epoch": 0.0539094566171816, "grad_norm": 0.947701272403253, "learning_rate": 0.0001796733212341198, "loss": 12.7302, "step": 990 }, { "epoch": 0.05396391061376461, "grad_norm": 0.8412669138813667, "learning_rate": 0.00017985480943738657, "loss": 12.4787, "step": 991 }, { "epoch": 0.05401836461034762, "grad_norm": 0.9849026278127962, "learning_rate": 0.00018003629764065337, "loss": 12.5891, "step": 992 }, { "epoch": 0.05407281860693063, "grad_norm": 0.9591250834751449, "learning_rate": 0.00018021778584392014, "loss": 12.6623, "step": 993 }, { "epoch": 0.054127272603513646, "grad_norm": 0.9657335153684073, "learning_rate": 0.00018039927404718694, "loss": 12.6658, "step": 994 }, { "epoch": 0.05418172660009665, "grad_norm": 0.7862727136981313, "learning_rate": 0.00018058076225045374, "loss": 12.6252, "step": 995 }, { "epoch": 0.054236180596679666, "grad_norm": 0.8994392012305145, "learning_rate": 0.00018076225045372054, "loss": 12.6117, "step": 996 }, { "epoch": 0.05429063459326268, "grad_norm": 0.9888859609422734, "learning_rate": 0.0001809437386569873, "loss": 12.542, "step": 997 }, { "epoch": 0.05434508858984569, "grad_norm": 0.8085407049387496, "learning_rate": 0.00018112522686025408, "loss": 12.511, "step": 998 }, { "epoch": 0.0543995425864287, "grad_norm": 0.8706037750909749, "learning_rate": 0.00018130671506352088, "loss": 12.6018, "step": 999 }, { "epoch": 0.05445399658301171, "grad_norm": 0.9259418524451194, "learning_rate": 0.00018148820326678765, "loss": 12.8483, "step": 1000 }, { "epoch": 0.054508450579594726, "grad_norm": 0.7746764072383874, "learning_rate": 0.00018166969147005447, "loss": 12.5888, "step": 1001 }, { "epoch": 0.05456290457617774, "grad_norm": 0.9766058753944424, "learning_rate": 0.00018185117967332124, "loss": 12.5827, "step": 1002 }, { "epoch": 0.054617358572760746, "grad_norm": 0.8856003767869254, "learning_rate": 0.00018203266787658804, "loss": 12.4993, "step": 1003 }, { "epoch": 0.05467181256934376, "grad_norm": 0.9485411740557078, "learning_rate": 0.0001822141560798548, "loss": 12.677, "step": 1004 }, { "epoch": 0.05472626656592677, "grad_norm": 0.7672312949692742, "learning_rate": 0.0001823956442831216, "loss": 12.6134, "step": 1005 }, { "epoch": 0.05478072056250979, "grad_norm": 0.9172643751252387, "learning_rate": 0.00018257713248638838, "loss": 12.7224, "step": 1006 }, { "epoch": 0.05483517455909279, "grad_norm": 0.8420022646409505, "learning_rate": 0.00018275862068965518, "loss": 12.6461, "step": 1007 }, { "epoch": 0.05488962855567581, "grad_norm": 0.8333960383700091, "learning_rate": 0.00018294010889292198, "loss": 12.6246, "step": 1008 }, { "epoch": 0.05494408255225882, "grad_norm": 1.0096244098506704, "learning_rate": 0.00018312159709618875, "loss": 12.7259, "step": 1009 }, { "epoch": 0.054998536548841834, "grad_norm": 0.7871373395387745, "learning_rate": 0.00018330308529945554, "loss": 12.7275, "step": 1010 }, { "epoch": 0.05505299054542484, "grad_norm": 1.032343747821382, "learning_rate": 0.00018348457350272232, "loss": 12.6628, "step": 1011 }, { "epoch": 0.055107444542007854, "grad_norm": 0.7083079315534552, "learning_rate": 0.00018366606170598911, "loss": 12.4748, "step": 1012 }, { "epoch": 0.05516189853859087, "grad_norm": 1.1907392275444992, "learning_rate": 0.0001838475499092559, "loss": 12.5132, "step": 1013 }, { "epoch": 0.05521635253517388, "grad_norm": 0.8442442447866033, "learning_rate": 0.0001840290381125227, "loss": 12.6859, "step": 1014 }, { "epoch": 0.05527080653175689, "grad_norm": 0.9375890211793874, "learning_rate": 0.00018421052631578948, "loss": 12.6139, "step": 1015 }, { "epoch": 0.0553252605283399, "grad_norm": 0.7957705162002837, "learning_rate": 0.00018439201451905628, "loss": 12.6227, "step": 1016 }, { "epoch": 0.055379714524922914, "grad_norm": 0.8572364025943078, "learning_rate": 0.00018457350272232305, "loss": 12.4793, "step": 1017 }, { "epoch": 0.05543416852150593, "grad_norm": 0.8594165745763462, "learning_rate": 0.00018475499092558985, "loss": 12.7353, "step": 1018 }, { "epoch": 0.055488622518088934, "grad_norm": 0.8947901476734842, "learning_rate": 0.00018493647912885665, "loss": 12.5524, "step": 1019 }, { "epoch": 0.05554307651467195, "grad_norm": 0.8317346016577231, "learning_rate": 0.00018511796733212342, "loss": 12.5814, "step": 1020 }, { "epoch": 0.05559753051125496, "grad_norm": 0.7741287564610906, "learning_rate": 0.00018529945553539021, "loss": 12.7118, "step": 1021 }, { "epoch": 0.055651984507837975, "grad_norm": 0.9229904453397703, "learning_rate": 0.00018548094373865698, "loss": 12.7203, "step": 1022 }, { "epoch": 0.05570643850442098, "grad_norm": 0.7189120710008146, "learning_rate": 0.00018566243194192378, "loss": 12.7316, "step": 1023 }, { "epoch": 0.055760892501003995, "grad_norm": 0.7996729763717355, "learning_rate": 0.00018584392014519055, "loss": 12.6907, "step": 1024 }, { "epoch": 0.05581534649758701, "grad_norm": 0.8282088203606525, "learning_rate": 0.00018602540834845735, "loss": 12.6942, "step": 1025 }, { "epoch": 0.05586980049417002, "grad_norm": 0.8101927238311741, "learning_rate": 0.00018620689655172415, "loss": 12.6429, "step": 1026 }, { "epoch": 0.05592425449075303, "grad_norm": 0.8723302771571373, "learning_rate": 0.00018638838475499095, "loss": 12.715, "step": 1027 }, { "epoch": 0.05597870848733604, "grad_norm": 0.8993457263793324, "learning_rate": 0.00018656987295825772, "loss": 12.7494, "step": 1028 }, { "epoch": 0.056033162483919055, "grad_norm": 0.8956846970014478, "learning_rate": 0.00018675136116152452, "loss": 12.7523, "step": 1029 }, { "epoch": 0.05608761648050207, "grad_norm": 0.848471231997586, "learning_rate": 0.0001869328493647913, "loss": 12.7121, "step": 1030 }, { "epoch": 0.056142070477085075, "grad_norm": 0.7681001667107862, "learning_rate": 0.00018711433756805809, "loss": 12.6024, "step": 1031 }, { "epoch": 0.05619652447366809, "grad_norm": 0.7924794070425718, "learning_rate": 0.00018729582577132488, "loss": 12.7012, "step": 1032 }, { "epoch": 0.0562509784702511, "grad_norm": 0.8349770749508899, "learning_rate": 0.00018747731397459165, "loss": 12.6386, "step": 1033 }, { "epoch": 0.056305432466834116, "grad_norm": 0.8191879420778659, "learning_rate": 0.00018765880217785845, "loss": 12.6633, "step": 1034 }, { "epoch": 0.05635988646341712, "grad_norm": 0.9108979371089774, "learning_rate": 0.00018784029038112522, "loss": 12.7074, "step": 1035 }, { "epoch": 0.056414340460000136, "grad_norm": 0.9995188221538889, "learning_rate": 0.00018802177858439202, "loss": 12.7757, "step": 1036 }, { "epoch": 0.05646879445658315, "grad_norm": 0.8331304603254887, "learning_rate": 0.0001882032667876588, "loss": 12.7268, "step": 1037 }, { "epoch": 0.05652324845316616, "grad_norm": 0.8421314134400244, "learning_rate": 0.00018838475499092562, "loss": 12.6786, "step": 1038 }, { "epoch": 0.05657770244974917, "grad_norm": 0.9249062144746272, "learning_rate": 0.0001885662431941924, "loss": 12.6439, "step": 1039 }, { "epoch": 0.05663215644633218, "grad_norm": 1.0740033540400613, "learning_rate": 0.00018874773139745919, "loss": 12.6613, "step": 1040 }, { "epoch": 0.056686610442915196, "grad_norm": 0.9515879897654931, "learning_rate": 0.00018892921960072596, "loss": 12.7961, "step": 1041 }, { "epoch": 0.05674106443949821, "grad_norm": 1.0687652941035266, "learning_rate": 0.00018911070780399275, "loss": 12.7123, "step": 1042 }, { "epoch": 0.056795518436081216, "grad_norm": 0.9446694097624536, "learning_rate": 0.00018929219600725953, "loss": 12.7562, "step": 1043 }, { "epoch": 0.05684997243266423, "grad_norm": 0.9237974447352099, "learning_rate": 0.00018947368421052632, "loss": 12.8357, "step": 1044 }, { "epoch": 0.05690442642924724, "grad_norm": 0.8936942172764373, "learning_rate": 0.00018965517241379312, "loss": 12.7792, "step": 1045 }, { "epoch": 0.056958880425830256, "grad_norm": 0.9961818489725659, "learning_rate": 0.0001898366606170599, "loss": 12.6665, "step": 1046 }, { "epoch": 0.05701333442241326, "grad_norm": 0.8390675260109299, "learning_rate": 0.0001900181488203267, "loss": 12.6463, "step": 1047 }, { "epoch": 0.057067788418996276, "grad_norm": 0.9127513873491976, "learning_rate": 0.00019019963702359346, "loss": 12.6908, "step": 1048 }, { "epoch": 0.05712224241557929, "grad_norm": 0.7822521272987107, "learning_rate": 0.00019038112522686026, "loss": 12.6916, "step": 1049 }, { "epoch": 0.0571766964121623, "grad_norm": 0.9316417618579463, "learning_rate": 0.00019056261343012706, "loss": 12.6148, "step": 1050 }, { "epoch": 0.05723115040874531, "grad_norm": 0.8267893855283287, "learning_rate": 0.00019074410163339386, "loss": 12.6444, "step": 1051 }, { "epoch": 0.05728560440532832, "grad_norm": 0.8756184417618663, "learning_rate": 0.00019092558983666063, "loss": 12.6479, "step": 1052 }, { "epoch": 0.05734005840191134, "grad_norm": 0.8537715754578956, "learning_rate": 0.00019110707803992742, "loss": 12.8705, "step": 1053 }, { "epoch": 0.05739451239849435, "grad_norm": 0.8793465083226172, "learning_rate": 0.0001912885662431942, "loss": 12.6592, "step": 1054 }, { "epoch": 0.05744896639507736, "grad_norm": 0.914347190314965, "learning_rate": 0.000191470054446461, "loss": 12.6625, "step": 1055 }, { "epoch": 0.05750342039166037, "grad_norm": 0.8856692561805453, "learning_rate": 0.0001916515426497278, "loss": 12.692, "step": 1056 }, { "epoch": 0.057557874388243384, "grad_norm": 0.8458952197724661, "learning_rate": 0.00019183303085299456, "loss": 12.7096, "step": 1057 }, { "epoch": 0.0576123283848264, "grad_norm": 0.7757824745642351, "learning_rate": 0.00019201451905626136, "loss": 12.7721, "step": 1058 }, { "epoch": 0.057666782381409404, "grad_norm": 0.975994260166484, "learning_rate": 0.00019219600725952813, "loss": 12.719, "step": 1059 }, { "epoch": 0.05772123637799242, "grad_norm": 0.8585148684375852, "learning_rate": 0.00019237749546279493, "loss": 12.7498, "step": 1060 }, { "epoch": 0.05777569037457543, "grad_norm": 0.8545141793726162, "learning_rate": 0.0001925589836660617, "loss": 12.7521, "step": 1061 }, { "epoch": 0.05783014437115844, "grad_norm": 1.0351696982338359, "learning_rate": 0.0001927404718693285, "loss": 12.6858, "step": 1062 }, { "epoch": 0.05788459836774145, "grad_norm": 0.8932909538412702, "learning_rate": 0.0001929219600725953, "loss": 12.6172, "step": 1063 }, { "epoch": 0.057939052364324464, "grad_norm": 0.8033540231795703, "learning_rate": 0.0001931034482758621, "loss": 12.7224, "step": 1064 }, { "epoch": 0.05799350636090748, "grad_norm": 0.9647008699499642, "learning_rate": 0.00019328493647912886, "loss": 12.8202, "step": 1065 }, { "epoch": 0.058047960357490484, "grad_norm": 0.8107858754859842, "learning_rate": 0.00019346642468239566, "loss": 12.6884, "step": 1066 }, { "epoch": 0.0581024143540735, "grad_norm": 0.8763919745645723, "learning_rate": 0.00019364791288566243, "loss": 12.5539, "step": 1067 }, { "epoch": 0.05815686835065651, "grad_norm": 1.0992738520488685, "learning_rate": 0.00019382940108892923, "loss": 12.6674, "step": 1068 }, { "epoch": 0.058211322347239525, "grad_norm": 0.7850194207297764, "learning_rate": 0.00019401088929219603, "loss": 12.597, "step": 1069 }, { "epoch": 0.05826577634382253, "grad_norm": 0.9852227560575982, "learning_rate": 0.0001941923774954628, "loss": 12.6947, "step": 1070 }, { "epoch": 0.058320230340405545, "grad_norm": 1.0687907738165234, "learning_rate": 0.0001943738656987296, "loss": 12.8078, "step": 1071 }, { "epoch": 0.05837468433698856, "grad_norm": 0.8874665780436435, "learning_rate": 0.00019455535390199637, "loss": 12.6525, "step": 1072 }, { "epoch": 0.05842913833357157, "grad_norm": 0.8938001131329727, "learning_rate": 0.00019473684210526317, "loss": 12.7372, "step": 1073 }, { "epoch": 0.05848359233015458, "grad_norm": 0.8296677243268067, "learning_rate": 0.00019491833030852994, "loss": 12.6129, "step": 1074 }, { "epoch": 0.05853804632673759, "grad_norm": 0.9888068104357823, "learning_rate": 0.00019509981851179676, "loss": 12.8547, "step": 1075 }, { "epoch": 0.058592500323320605, "grad_norm": 1.028019721476723, "learning_rate": 0.00019528130671506353, "loss": 12.7056, "step": 1076 }, { "epoch": 0.05864695431990362, "grad_norm": 0.8361357407910752, "learning_rate": 0.00019546279491833033, "loss": 12.6857, "step": 1077 }, { "epoch": 0.058701408316486625, "grad_norm": 0.8973490125437626, "learning_rate": 0.0001956442831215971, "loss": 12.6222, "step": 1078 }, { "epoch": 0.05875586231306964, "grad_norm": 0.7877561882710619, "learning_rate": 0.00019582577132486387, "loss": 12.7484, "step": 1079 }, { "epoch": 0.05881031630965265, "grad_norm": 1.346493036428632, "learning_rate": 0.00019600725952813067, "loss": 12.7032, "step": 1080 }, { "epoch": 0.058864770306235666, "grad_norm": 0.9387631520562696, "learning_rate": 0.00019618874773139747, "loss": 12.6168, "step": 1081 }, { "epoch": 0.05891922430281867, "grad_norm": 0.8426429211134219, "learning_rate": 0.00019637023593466427, "loss": 12.7582, "step": 1082 }, { "epoch": 0.058973678299401686, "grad_norm": 0.8821282555993558, "learning_rate": 0.00019655172413793104, "loss": 12.6952, "step": 1083 }, { "epoch": 0.0590281322959847, "grad_norm": 0.9413833624250499, "learning_rate": 0.00019673321234119784, "loss": 12.8895, "step": 1084 }, { "epoch": 0.05908258629256771, "grad_norm": 1.0551890250213953, "learning_rate": 0.0001969147005444646, "loss": 12.7802, "step": 1085 }, { "epoch": 0.05913704028915072, "grad_norm": 0.826483127495897, "learning_rate": 0.0001970961887477314, "loss": 12.7205, "step": 1086 }, { "epoch": 0.05919149428573373, "grad_norm": 0.953767015147213, "learning_rate": 0.0001972776769509982, "loss": 12.6614, "step": 1087 }, { "epoch": 0.059245948282316746, "grad_norm": 0.8755683812872677, "learning_rate": 0.000197459165154265, "loss": 12.7046, "step": 1088 }, { "epoch": 0.05930040227889976, "grad_norm": 0.8242367546212285, "learning_rate": 0.00019764065335753177, "loss": 12.5865, "step": 1089 }, { "epoch": 0.059354856275482766, "grad_norm": 0.8118383627116624, "learning_rate": 0.00019782214156079857, "loss": 12.7108, "step": 1090 }, { "epoch": 0.05940931027206578, "grad_norm": 0.8309785056550677, "learning_rate": 0.00019800362976406534, "loss": 12.7555, "step": 1091 }, { "epoch": 0.05946376426864879, "grad_norm": 0.8702493286280903, "learning_rate": 0.0001981851179673321, "loss": 12.7094, "step": 1092 }, { "epoch": 0.059518218265231806, "grad_norm": 0.8935609387918308, "learning_rate": 0.00019836660617059894, "loss": 12.8853, "step": 1093 }, { "epoch": 0.05957267226181481, "grad_norm": 0.8176063965684935, "learning_rate": 0.0001985480943738657, "loss": 12.7473, "step": 1094 }, { "epoch": 0.059627126258397826, "grad_norm": 0.9382886609351457, "learning_rate": 0.0001987295825771325, "loss": 12.6793, "step": 1095 }, { "epoch": 0.05968158025498084, "grad_norm": 0.8780758861923207, "learning_rate": 0.00019891107078039928, "loss": 12.6795, "step": 1096 }, { "epoch": 0.05973603425156385, "grad_norm": 0.9060522534957417, "learning_rate": 0.00019909255898366607, "loss": 12.6617, "step": 1097 }, { "epoch": 0.05979048824814686, "grad_norm": 0.8515080058550354, "learning_rate": 0.00019927404718693284, "loss": 12.6893, "step": 1098 }, { "epoch": 0.05984494224472987, "grad_norm": 0.8872822863676731, "learning_rate": 0.00019945553539019964, "loss": 12.7036, "step": 1099 }, { "epoch": 0.05989939624131289, "grad_norm": 0.838829157388531, "learning_rate": 0.00019963702359346644, "loss": 12.6507, "step": 1100 }, { "epoch": 0.0599538502378959, "grad_norm": 0.8884415432987369, "learning_rate": 0.00019981851179673324, "loss": 12.8487, "step": 1101 }, { "epoch": 0.06000830423447891, "grad_norm": 0.9740412298051592, "learning_rate": 0.0002, "loss": 12.6881, "step": 1102 }, { "epoch": 0.06006275823106192, "grad_norm": 0.9241866593303006, "learning_rate": 0.0001999999996111916, "loss": 12.7269, "step": 1103 }, { "epoch": 0.060117212227644934, "grad_norm": 0.9278070088616288, "learning_rate": 0.00019999999844476647, "loss": 12.7711, "step": 1104 }, { "epoch": 0.06017166622422795, "grad_norm": 0.8756109875155865, "learning_rate": 0.00019999999650072457, "loss": 12.6504, "step": 1105 }, { "epoch": 0.060226120220810954, "grad_norm": 0.9555653657587291, "learning_rate": 0.0001999999937790659, "loss": 12.5121, "step": 1106 }, { "epoch": 0.06028057421739397, "grad_norm": 0.8096454821951842, "learning_rate": 0.00019999999027979054, "loss": 12.6582, "step": 1107 }, { "epoch": 0.06033502821397698, "grad_norm": 0.9296910383062582, "learning_rate": 0.00019999998600289846, "loss": 12.6657, "step": 1108 }, { "epoch": 0.060389482210559994, "grad_norm": 0.8466965321383215, "learning_rate": 0.00019999998094838973, "loss": 12.6374, "step": 1109 }, { "epoch": 0.060443936207143, "grad_norm": 0.8264775366654458, "learning_rate": 0.0001999999751162644, "loss": 12.7052, "step": 1110 }, { "epoch": 0.060498390203726014, "grad_norm": 0.9400289147748783, "learning_rate": 0.00019999996850652245, "loss": 12.7985, "step": 1111 }, { "epoch": 0.06055284420030903, "grad_norm": 0.940019638160756, "learning_rate": 0.00019999996111916399, "loss": 12.7924, "step": 1112 }, { "epoch": 0.06060729819689204, "grad_norm": 0.7509979058254492, "learning_rate": 0.00019999995295418908, "loss": 12.6273, "step": 1113 }, { "epoch": 0.06066175219347505, "grad_norm": 0.8144515839177705, "learning_rate": 0.00019999994401159775, "loss": 12.7125, "step": 1114 }, { "epoch": 0.06071620619005806, "grad_norm": 0.7871352320666705, "learning_rate": 0.0001999999342913901, "loss": 12.6371, "step": 1115 }, { "epoch": 0.060770660186641075, "grad_norm": 0.8128165695381447, "learning_rate": 0.0001999999237935662, "loss": 12.7798, "step": 1116 }, { "epoch": 0.06082511418322409, "grad_norm": 0.9145599160586337, "learning_rate": 0.00019999991251812608, "loss": 12.7207, "step": 1117 }, { "epoch": 0.060879568179807095, "grad_norm": 0.8061894716041041, "learning_rate": 0.00019999990046506988, "loss": 12.6761, "step": 1118 }, { "epoch": 0.06093402217639011, "grad_norm": 0.84955895633201, "learning_rate": 0.00019999988763439773, "loss": 12.6494, "step": 1119 }, { "epoch": 0.06098847617297312, "grad_norm": 0.8119447540792254, "learning_rate": 0.00019999987402610962, "loss": 12.7347, "step": 1120 }, { "epoch": 0.061042930169556135, "grad_norm": 0.8194282221432447, "learning_rate": 0.00019999985964020577, "loss": 12.7614, "step": 1121 }, { "epoch": 0.06109738416613914, "grad_norm": 0.8014853175758311, "learning_rate": 0.00019999984447668622, "loss": 12.7345, "step": 1122 }, { "epoch": 0.061151838162722155, "grad_norm": 0.9225218876342778, "learning_rate": 0.00019999982853555111, "loss": 12.751, "step": 1123 }, { "epoch": 0.06120629215930517, "grad_norm": 0.8304127287116028, "learning_rate": 0.00019999981181680057, "loss": 12.7568, "step": 1124 }, { "epoch": 0.06126074615588818, "grad_norm": 0.8598278997579997, "learning_rate": 0.00019999979432043472, "loss": 12.5819, "step": 1125 }, { "epoch": 0.06131520015247119, "grad_norm": 0.8826576027775591, "learning_rate": 0.00019999977604645368, "loss": 12.7199, "step": 1126 }, { "epoch": 0.0613696541490542, "grad_norm": 0.804636809658474, "learning_rate": 0.00019999975699485763, "loss": 12.5365, "step": 1127 }, { "epoch": 0.061424108145637216, "grad_norm": 0.8346707911072948, "learning_rate": 0.00019999973716564672, "loss": 12.7627, "step": 1128 }, { "epoch": 0.06147856214222022, "grad_norm": 0.8549089778900794, "learning_rate": 0.00019999971655882106, "loss": 12.6977, "step": 1129 }, { "epoch": 0.061533016138803236, "grad_norm": 0.8472158811477527, "learning_rate": 0.0001999996951743808, "loss": 12.785, "step": 1130 }, { "epoch": 0.06158747013538625, "grad_norm": 0.7968932169209947, "learning_rate": 0.00019999967301232623, "loss": 12.6148, "step": 1131 }, { "epoch": 0.06164192413196926, "grad_norm": 0.8026579849549752, "learning_rate": 0.00019999965007265735, "loss": 12.6353, "step": 1132 }, { "epoch": 0.06169637812855227, "grad_norm": 0.793885187224609, "learning_rate": 0.00019999962635537446, "loss": 12.6965, "step": 1133 }, { "epoch": 0.06175083212513528, "grad_norm": 0.8813812746509808, "learning_rate": 0.0001999996018604777, "loss": 12.6435, "step": 1134 }, { "epoch": 0.061805286121718296, "grad_norm": 0.7731513844126909, "learning_rate": 0.00019999957658796725, "loss": 12.6346, "step": 1135 }, { "epoch": 0.06185974011830131, "grad_norm": 0.8927790695605196, "learning_rate": 0.00019999955053784336, "loss": 12.8121, "step": 1136 }, { "epoch": 0.061914194114884316, "grad_norm": 0.8686464281487228, "learning_rate": 0.00019999952371010617, "loss": 12.7914, "step": 1137 }, { "epoch": 0.06196864811146733, "grad_norm": 0.8148901326895414, "learning_rate": 0.0001999994961047559, "loss": 12.7512, "step": 1138 }, { "epoch": 0.06202310210805034, "grad_norm": 0.8212055968907241, "learning_rate": 0.00019999946772179282, "loss": 12.7898, "step": 1139 }, { "epoch": 0.06207755610463336, "grad_norm": 0.859935862765019, "learning_rate": 0.00019999943856121707, "loss": 12.7415, "step": 1140 }, { "epoch": 0.06213201010121636, "grad_norm": 0.8271274292210274, "learning_rate": 0.00019999940862302893, "loss": 12.5209, "step": 1141 }, { "epoch": 0.06218646409779938, "grad_norm": 0.9194182466076581, "learning_rate": 0.0001999993779072286, "loss": 12.8127, "step": 1142 }, { "epoch": 0.06224091809438239, "grad_norm": 0.7947629670968179, "learning_rate": 0.00019999934641381635, "loss": 12.5794, "step": 1143 }, { "epoch": 0.062295372090965404, "grad_norm": 0.8589934847949596, "learning_rate": 0.0001999993141427924, "loss": 12.862, "step": 1144 }, { "epoch": 0.06234982608754841, "grad_norm": 0.9370285835777666, "learning_rate": 0.00019999928109415706, "loss": 12.6531, "step": 1145 }, { "epoch": 0.062404280084131424, "grad_norm": 0.8650870724910286, "learning_rate": 0.00019999924726791051, "loss": 12.7084, "step": 1146 }, { "epoch": 0.06245873408071444, "grad_norm": 0.8105626060240043, "learning_rate": 0.00019999921266405303, "loss": 12.6377, "step": 1147 }, { "epoch": 0.06251318807729744, "grad_norm": 0.8396580432219618, "learning_rate": 0.00019999917728258493, "loss": 12.626, "step": 1148 }, { "epoch": 0.06256764207388046, "grad_norm": 0.8616677580171016, "learning_rate": 0.00019999914112350643, "loss": 12.7841, "step": 1149 }, { "epoch": 0.06262209607046347, "grad_norm": 0.8225606805167363, "learning_rate": 0.00019999910418681783, "loss": 12.6528, "step": 1150 }, { "epoch": 0.06267655006704648, "grad_norm": 0.8853599634712473, "learning_rate": 0.00019999906647251946, "loss": 12.9739, "step": 1151 }, { "epoch": 0.0627310040636295, "grad_norm": 0.8499949719325622, "learning_rate": 0.00019999902798061156, "loss": 12.7154, "step": 1152 }, { "epoch": 0.0627854580602125, "grad_norm": 0.7984338515402579, "learning_rate": 0.00019999898871109445, "loss": 12.7525, "step": 1153 }, { "epoch": 0.06283991205679552, "grad_norm": 0.8270221917898859, "learning_rate": 0.00019999894866396846, "loss": 12.6632, "step": 1154 }, { "epoch": 0.06289436605337853, "grad_norm": 0.8417958776114451, "learning_rate": 0.00019999890783923386, "loss": 12.6455, "step": 1155 }, { "epoch": 0.06294882004996154, "grad_norm": 0.9328871763734922, "learning_rate": 0.00019999886623689098, "loss": 12.8024, "step": 1156 }, { "epoch": 0.06300327404654456, "grad_norm": 1.0856841110950344, "learning_rate": 0.00019999882385694014, "loss": 12.6466, "step": 1157 }, { "epoch": 0.06305772804312756, "grad_norm": 0.7988820949419523, "learning_rate": 0.00019999878069938167, "loss": 12.8665, "step": 1158 }, { "epoch": 0.06311218203971057, "grad_norm": 0.906338199416811, "learning_rate": 0.00019999873676421594, "loss": 12.826, "step": 1159 }, { "epoch": 0.06316663603629359, "grad_norm": 0.9190725587084829, "learning_rate": 0.00019999869205144323, "loss": 12.8269, "step": 1160 }, { "epoch": 0.0632210900328766, "grad_norm": 0.8270321207992188, "learning_rate": 0.00019999864656106392, "loss": 12.7298, "step": 1161 }, { "epoch": 0.06327554402945962, "grad_norm": 0.7746905689590335, "learning_rate": 0.0001999986002930784, "loss": 12.559, "step": 1162 }, { "epoch": 0.06332999802604262, "grad_norm": 0.8378946907958874, "learning_rate": 0.00019999855324748697, "loss": 12.8177, "step": 1163 }, { "epoch": 0.06338445202262563, "grad_norm": 0.7859625331062614, "learning_rate": 0.00019999850542429002, "loss": 12.7153, "step": 1164 }, { "epoch": 0.06343890601920865, "grad_norm": 0.8331880794666584, "learning_rate": 0.00019999845682348792, "loss": 12.6974, "step": 1165 }, { "epoch": 0.06349336001579166, "grad_norm": 0.962658613381509, "learning_rate": 0.00019999840744508107, "loss": 12.6871, "step": 1166 }, { "epoch": 0.06354781401237466, "grad_norm": 0.819743012172273, "learning_rate": 0.00019999835728906984, "loss": 12.5648, "step": 1167 }, { "epoch": 0.06360226800895769, "grad_norm": 0.8145608023687774, "learning_rate": 0.00019999830635545457, "loss": 12.7575, "step": 1168 }, { "epoch": 0.06365672200554069, "grad_norm": 0.9500062161145167, "learning_rate": 0.00019999825464423574, "loss": 12.7535, "step": 1169 }, { "epoch": 0.06371117600212371, "grad_norm": 0.8339180428418321, "learning_rate": 0.0001999982021554137, "loss": 12.7517, "step": 1170 }, { "epoch": 0.06376562999870672, "grad_norm": 0.8859042989078137, "learning_rate": 0.00019999814888898887, "loss": 12.7061, "step": 1171 }, { "epoch": 0.06382008399528973, "grad_norm": 0.9593859826719203, "learning_rate": 0.00019999809484496167, "loss": 12.769, "step": 1172 }, { "epoch": 0.06387453799187275, "grad_norm": 0.9254926975424156, "learning_rate": 0.0001999980400233325, "loss": 12.7988, "step": 1173 }, { "epoch": 0.06392899198845575, "grad_norm": 0.9218656660623168, "learning_rate": 0.00019999798442410177, "loss": 12.6074, "step": 1174 }, { "epoch": 0.06398344598503876, "grad_norm": 0.9195323497831199, "learning_rate": 0.00019999792804727, "loss": 12.7693, "step": 1175 }, { "epoch": 0.06403789998162178, "grad_norm": 0.8757789350337974, "learning_rate": 0.00019999787089283757, "loss": 12.666, "step": 1176 }, { "epoch": 0.06409235397820479, "grad_norm": 0.896203375036291, "learning_rate": 0.0001999978129608049, "loss": 12.8525, "step": 1177 }, { "epoch": 0.0641468079747878, "grad_norm": 0.9466387119592451, "learning_rate": 0.0001999977542511725, "loss": 12.7746, "step": 1178 }, { "epoch": 0.06420126197137081, "grad_norm": 0.9107062802083105, "learning_rate": 0.00019999769476394076, "loss": 12.7753, "step": 1179 }, { "epoch": 0.06425571596795382, "grad_norm": 0.8633923015901338, "learning_rate": 0.00019999763449911017, "loss": 12.7459, "step": 1180 }, { "epoch": 0.06431016996453684, "grad_norm": 0.8523060925255115, "learning_rate": 0.00019999757345668122, "loss": 12.7009, "step": 1181 }, { "epoch": 0.06436462396111985, "grad_norm": 0.8732343539755694, "learning_rate": 0.00019999751163665437, "loss": 12.8505, "step": 1182 }, { "epoch": 0.06441907795770285, "grad_norm": 0.8955887659856802, "learning_rate": 0.00019999744903903007, "loss": 12.6876, "step": 1183 }, { "epoch": 0.06447353195428587, "grad_norm": 0.8869982370172741, "learning_rate": 0.00019999738566380887, "loss": 12.6886, "step": 1184 }, { "epoch": 0.06452798595086888, "grad_norm": 0.8861061504796613, "learning_rate": 0.00019999732151099124, "loss": 12.892, "step": 1185 }, { "epoch": 0.0645824399474519, "grad_norm": 0.8650163616204314, "learning_rate": 0.00019999725658057766, "loss": 12.6152, "step": 1186 }, { "epoch": 0.0646368939440349, "grad_norm": 0.7317105713251505, "learning_rate": 0.00019999719087256864, "loss": 12.6659, "step": 1187 }, { "epoch": 0.06469134794061791, "grad_norm": 0.9458481135838617, "learning_rate": 0.00019999712438696467, "loss": 12.9147, "step": 1188 }, { "epoch": 0.06474580193720093, "grad_norm": 0.7615024396322205, "learning_rate": 0.00019999705712376632, "loss": 12.7225, "step": 1189 }, { "epoch": 0.06480025593378394, "grad_norm": 0.8568408849038813, "learning_rate": 0.00019999698908297408, "loss": 12.5842, "step": 1190 }, { "epoch": 0.06485470993036695, "grad_norm": 0.814394862279894, "learning_rate": 0.00019999692026458847, "loss": 12.8761, "step": 1191 }, { "epoch": 0.06490916392694997, "grad_norm": 0.8714145177343299, "learning_rate": 0.00019999685066861007, "loss": 12.8574, "step": 1192 }, { "epoch": 0.06496361792353297, "grad_norm": 0.8264656686816955, "learning_rate": 0.00019999678029503936, "loss": 12.7122, "step": 1193 }, { "epoch": 0.06501807192011598, "grad_norm": 0.8104604521918491, "learning_rate": 0.00019999670914387695, "loss": 12.6977, "step": 1194 }, { "epoch": 0.065072525916699, "grad_norm": 0.8354849449147481, "learning_rate": 0.0001999966372151233, "loss": 12.7495, "step": 1195 }, { "epoch": 0.06512697991328201, "grad_norm": 0.8670770016037759, "learning_rate": 0.00019999656450877908, "loss": 12.8445, "step": 1196 }, { "epoch": 0.06518143390986503, "grad_norm": 0.761947793544306, "learning_rate": 0.00019999649102484475, "loss": 12.7165, "step": 1197 }, { "epoch": 0.06523588790644803, "grad_norm": 0.8575494256497836, "learning_rate": 0.00019999641676332098, "loss": 12.7475, "step": 1198 }, { "epoch": 0.06529034190303104, "grad_norm": 0.9446326318271256, "learning_rate": 0.00019999634172420834, "loss": 12.7995, "step": 1199 }, { "epoch": 0.06534479589961406, "grad_norm": 0.8926293577591496, "learning_rate": 0.00019999626590750733, "loss": 12.8856, "step": 1200 }, { "epoch": 0.06539924989619707, "grad_norm": 0.8785564167937711, "learning_rate": 0.00019999618931321859, "loss": 12.4816, "step": 1201 }, { "epoch": 0.06545370389278007, "grad_norm": 0.8769465556308361, "learning_rate": 0.00019999611194134272, "loss": 12.7773, "step": 1202 }, { "epoch": 0.0655081578893631, "grad_norm": 0.8078239982110514, "learning_rate": 0.0001999960337918803, "loss": 12.6893, "step": 1203 }, { "epoch": 0.0655626118859461, "grad_norm": 0.8764038524011952, "learning_rate": 0.000199995954864832, "loss": 12.6881, "step": 1204 }, { "epoch": 0.06561706588252912, "grad_norm": 1.0046378226101262, "learning_rate": 0.00019999587516019834, "loss": 12.6412, "step": 1205 }, { "epoch": 0.06567151987911213, "grad_norm": 0.8499068641220393, "learning_rate": 0.00019999579467797998, "loss": 12.8251, "step": 1206 }, { "epoch": 0.06572597387569513, "grad_norm": 0.8114198595859603, "learning_rate": 0.00019999571341817755, "loss": 12.7518, "step": 1207 }, { "epoch": 0.06578042787227815, "grad_norm": 0.7826828314807258, "learning_rate": 0.0001999956313807917, "loss": 12.773, "step": 1208 }, { "epoch": 0.06583488186886116, "grad_norm": 0.9134842134989745, "learning_rate": 0.00019999554856582304, "loss": 12.6953, "step": 1209 }, { "epoch": 0.06588933586544417, "grad_norm": 0.8648511371370636, "learning_rate": 0.00019999546497327227, "loss": 12.6783, "step": 1210 }, { "epoch": 0.06594378986202719, "grad_norm": 0.937972103939476, "learning_rate": 0.00019999538060313995, "loss": 12.7148, "step": 1211 }, { "epoch": 0.0659982438586102, "grad_norm": 0.7882262998747774, "learning_rate": 0.00019999529545542677, "loss": 12.5461, "step": 1212 }, { "epoch": 0.06605269785519322, "grad_norm": 0.8935868580453479, "learning_rate": 0.00019999520953013344, "loss": 12.8204, "step": 1213 }, { "epoch": 0.06610715185177622, "grad_norm": 0.8836712629861396, "learning_rate": 0.00019999512282726055, "loss": 12.6649, "step": 1214 }, { "epoch": 0.06616160584835923, "grad_norm": 0.7804057926443057, "learning_rate": 0.00019999503534680888, "loss": 12.7262, "step": 1215 }, { "epoch": 0.06621605984494225, "grad_norm": 0.8700122862674238, "learning_rate": 0.000199994947088779, "loss": 12.8055, "step": 1216 }, { "epoch": 0.06627051384152526, "grad_norm": 0.8834004841790215, "learning_rate": 0.00019999485805317164, "loss": 12.762, "step": 1217 }, { "epoch": 0.06632496783810826, "grad_norm": 0.8343108775202628, "learning_rate": 0.00019999476823998752, "loss": 12.6515, "step": 1218 }, { "epoch": 0.06637942183469128, "grad_norm": 0.8579726093320554, "learning_rate": 0.00019999467764922728, "loss": 12.7467, "step": 1219 }, { "epoch": 0.06643387583127429, "grad_norm": 0.8654133283698366, "learning_rate": 0.00019999458628089167, "loss": 12.7794, "step": 1220 }, { "epoch": 0.06648832982785731, "grad_norm": 0.8325988034952059, "learning_rate": 0.00019999449413498138, "loss": 12.6766, "step": 1221 }, { "epoch": 0.06654278382444032, "grad_norm": 0.7726895332022075, "learning_rate": 0.00019999440121149715, "loss": 12.7095, "step": 1222 }, { "epoch": 0.06659723782102332, "grad_norm": 0.8422111747797787, "learning_rate": 0.00019999430751043972, "loss": 12.7101, "step": 1223 }, { "epoch": 0.06665169181760634, "grad_norm": 0.8713740577866859, "learning_rate": 0.00019999421303180972, "loss": 12.7866, "step": 1224 }, { "epoch": 0.06670614581418935, "grad_norm": 0.7861309399978694, "learning_rate": 0.000199994117775608, "loss": 12.5477, "step": 1225 }, { "epoch": 0.06676059981077236, "grad_norm": 0.7909438744345703, "learning_rate": 0.00019999402174183524, "loss": 12.6011, "step": 1226 }, { "epoch": 0.06681505380735538, "grad_norm": 0.805144263404179, "learning_rate": 0.00019999392493049215, "loss": 12.6455, "step": 1227 }, { "epoch": 0.06686950780393838, "grad_norm": 0.9161247444164494, "learning_rate": 0.0001999938273415796, "loss": 12.9511, "step": 1228 }, { "epoch": 0.0669239618005214, "grad_norm": 0.8007339932273023, "learning_rate": 0.00019999372897509826, "loss": 12.4325, "step": 1229 }, { "epoch": 0.06697841579710441, "grad_norm": 0.8580605244250328, "learning_rate": 0.00019999362983104887, "loss": 12.7317, "step": 1230 }, { "epoch": 0.06703286979368742, "grad_norm": 0.9518365532593344, "learning_rate": 0.0001999935299094323, "loss": 12.8734, "step": 1231 }, { "epoch": 0.06708732379027044, "grad_norm": 0.8239272304844115, "learning_rate": 0.00019999342921024927, "loss": 12.7499, "step": 1232 }, { "epoch": 0.06714177778685344, "grad_norm": 0.8968558651975471, "learning_rate": 0.00019999332773350053, "loss": 12.8772, "step": 1233 }, { "epoch": 0.06719623178343645, "grad_norm": 0.9650739082159776, "learning_rate": 0.00019999322547918692, "loss": 12.864, "step": 1234 }, { "epoch": 0.06725068578001947, "grad_norm": 0.8170253300214879, "learning_rate": 0.00019999312244730924, "loss": 12.8578, "step": 1235 }, { "epoch": 0.06730513977660248, "grad_norm": 0.8107661672269623, "learning_rate": 0.00019999301863786825, "loss": 12.6231, "step": 1236 }, { "epoch": 0.0673595937731855, "grad_norm": 0.8584076187816582, "learning_rate": 0.00019999291405086477, "loss": 12.8479, "step": 1237 }, { "epoch": 0.0674140477697685, "grad_norm": 0.8387218124859667, "learning_rate": 0.00019999280868629964, "loss": 12.8035, "step": 1238 }, { "epoch": 0.06746850176635151, "grad_norm": 0.833310340002492, "learning_rate": 0.00019999270254417363, "loss": 12.813, "step": 1239 }, { "epoch": 0.06752295576293453, "grad_norm": 0.7848080365904714, "learning_rate": 0.00019999259562448766, "loss": 12.5327, "step": 1240 }, { "epoch": 0.06757740975951754, "grad_norm": 0.8079709147478482, "learning_rate": 0.00019999248792724244, "loss": 12.7529, "step": 1241 }, { "epoch": 0.06763186375610054, "grad_norm": 0.797562748653487, "learning_rate": 0.0001999923794524389, "loss": 12.8353, "step": 1242 }, { "epoch": 0.06768631775268356, "grad_norm": 0.7568940240260569, "learning_rate": 0.00019999227020007783, "loss": 12.5846, "step": 1243 }, { "epoch": 0.06774077174926657, "grad_norm": 0.9021563734495636, "learning_rate": 0.00019999216017016006, "loss": 12.7845, "step": 1244 }, { "epoch": 0.06779522574584959, "grad_norm": 0.7172017086809768, "learning_rate": 0.00019999204936268656, "loss": 12.3948, "step": 1245 }, { "epoch": 0.0678496797424326, "grad_norm": 0.899680702607057, "learning_rate": 0.00019999193777765805, "loss": 12.6401, "step": 1246 }, { "epoch": 0.0679041337390156, "grad_norm": 0.8631946648104156, "learning_rate": 0.0001999918254150755, "loss": 12.7803, "step": 1247 }, { "epoch": 0.06795858773559862, "grad_norm": 0.8901750091644829, "learning_rate": 0.00019999171227493974, "loss": 12.7478, "step": 1248 }, { "epoch": 0.06801304173218163, "grad_norm": 0.8298302381975877, "learning_rate": 0.00019999159835725166, "loss": 12.6882, "step": 1249 }, { "epoch": 0.06806749572876464, "grad_norm": 0.8391719325869385, "learning_rate": 0.00019999148366201214, "loss": 12.7615, "step": 1250 }, { "epoch": 0.06812194972534766, "grad_norm": 0.9542640810854296, "learning_rate": 0.0001999913681892221, "loss": 12.8767, "step": 1251 }, { "epoch": 0.06817640372193066, "grad_norm": 0.7886309533998018, "learning_rate": 0.00019999125193888238, "loss": 12.6535, "step": 1252 }, { "epoch": 0.06823085771851368, "grad_norm": 0.8938502848779079, "learning_rate": 0.0001999911349109939, "loss": 12.7479, "step": 1253 }, { "epoch": 0.06828531171509669, "grad_norm": 1.01894846212262, "learning_rate": 0.00019999101710555762, "loss": 12.7659, "step": 1254 }, { "epoch": 0.0683397657116797, "grad_norm": 0.8200912021011945, "learning_rate": 0.0001999908985225744, "loss": 12.7416, "step": 1255 }, { "epoch": 0.06839421970826272, "grad_norm": 0.7572550774775388, "learning_rate": 0.00019999077916204517, "loss": 12.7203, "step": 1256 }, { "epoch": 0.06844867370484572, "grad_norm": 0.9595690781436814, "learning_rate": 0.00019999065902397093, "loss": 12.9055, "step": 1257 }, { "epoch": 0.06850312770142873, "grad_norm": 0.9216699050913904, "learning_rate": 0.00019999053810835254, "loss": 12.9216, "step": 1258 }, { "epoch": 0.06855758169801175, "grad_norm": 0.7783220350551281, "learning_rate": 0.00019999041641519095, "loss": 12.7104, "step": 1259 }, { "epoch": 0.06861203569459476, "grad_norm": 0.7964697170367746, "learning_rate": 0.0001999902939444871, "loss": 12.5829, "step": 1260 }, { "epoch": 0.06866648969117777, "grad_norm": 0.947797767672639, "learning_rate": 0.00019999017069624193, "loss": 12.8261, "step": 1261 }, { "epoch": 0.06872094368776079, "grad_norm": 0.9990216177536839, "learning_rate": 0.00019999004667045647, "loss": 12.6765, "step": 1262 }, { "epoch": 0.06877539768434379, "grad_norm": 0.9003760583623563, "learning_rate": 0.00019998992186713165, "loss": 12.7849, "step": 1263 }, { "epoch": 0.06882985168092681, "grad_norm": 0.8353313742840284, "learning_rate": 0.00019998979628626837, "loss": 12.6698, "step": 1264 }, { "epoch": 0.06888430567750982, "grad_norm": 0.8651149020580153, "learning_rate": 0.00019998966992786768, "loss": 12.6586, "step": 1265 }, { "epoch": 0.06893875967409283, "grad_norm": 0.9382816369907496, "learning_rate": 0.0001999895427919306, "loss": 12.9221, "step": 1266 }, { "epoch": 0.06899321367067585, "grad_norm": 0.7998177833076818, "learning_rate": 0.00019998941487845803, "loss": 12.6578, "step": 1267 }, { "epoch": 0.06904766766725885, "grad_norm": 0.8184372156049693, "learning_rate": 0.00019998928618745102, "loss": 12.7793, "step": 1268 }, { "epoch": 0.06910212166384186, "grad_norm": 0.8071861459286777, "learning_rate": 0.00019998915671891055, "loss": 12.3847, "step": 1269 }, { "epoch": 0.06915657566042488, "grad_norm": 0.8132926016895157, "learning_rate": 0.0001999890264728376, "loss": 12.8209, "step": 1270 }, { "epoch": 0.06921102965700789, "grad_norm": 0.8625809229455872, "learning_rate": 0.00019998889544923322, "loss": 12.6077, "step": 1271 }, { "epoch": 0.0692654836535909, "grad_norm": 0.877250661583724, "learning_rate": 0.00019998876364809843, "loss": 12.888, "step": 1272 }, { "epoch": 0.06931993765017391, "grad_norm": 0.9665899385261744, "learning_rate": 0.00019998863106943427, "loss": 12.8492, "step": 1273 }, { "epoch": 0.06937439164675692, "grad_norm": 0.8873863859176445, "learning_rate": 0.0001999884977132417, "loss": 12.7165, "step": 1274 }, { "epoch": 0.06942884564333994, "grad_norm": 0.8998207159482029, "learning_rate": 0.00019998836357952183, "loss": 12.7232, "step": 1275 }, { "epoch": 0.06948329963992295, "grad_norm": 0.9357481029675431, "learning_rate": 0.00019998822866827568, "loss": 12.7706, "step": 1276 }, { "epoch": 0.06953775363650595, "grad_norm": 0.9390340910364658, "learning_rate": 0.0001999880929795043, "loss": 12.8162, "step": 1277 }, { "epoch": 0.06959220763308897, "grad_norm": 0.8788392373945151, "learning_rate": 0.00019998795651320875, "loss": 12.7985, "step": 1278 }, { "epoch": 0.06964666162967198, "grad_norm": 0.9910757288059426, "learning_rate": 0.00019998781926939004, "loss": 12.7973, "step": 1279 }, { "epoch": 0.069701115626255, "grad_norm": 0.7825317381332015, "learning_rate": 0.00019998768124804931, "loss": 12.7743, "step": 1280 }, { "epoch": 0.069755569622838, "grad_norm": 0.9466682022844718, "learning_rate": 0.0001999875424491876, "loss": 12.8456, "step": 1281 }, { "epoch": 0.06981002361942101, "grad_norm": 0.8392171551646117, "learning_rate": 0.00019998740287280597, "loss": 12.7803, "step": 1282 }, { "epoch": 0.06986447761600403, "grad_norm": 0.9682561669853998, "learning_rate": 0.00019998726251890556, "loss": 12.7737, "step": 1283 }, { "epoch": 0.06991893161258704, "grad_norm": 1.0730646542580322, "learning_rate": 0.0001999871213874874, "loss": 12.6476, "step": 1284 }, { "epoch": 0.06997338560917005, "grad_norm": 0.8438483865520613, "learning_rate": 0.00019998697947855263, "loss": 12.8169, "step": 1285 }, { "epoch": 0.07002783960575307, "grad_norm": 0.883713442906129, "learning_rate": 0.00019998683679210236, "loss": 12.7374, "step": 1286 }, { "epoch": 0.07008229360233607, "grad_norm": 0.940886253494083, "learning_rate": 0.00019998669332813764, "loss": 12.7748, "step": 1287 }, { "epoch": 0.0701367475989191, "grad_norm": 0.7718253307822253, "learning_rate": 0.00019998654908665966, "loss": 12.665, "step": 1288 }, { "epoch": 0.0701912015955021, "grad_norm": 0.9335549581515296, "learning_rate": 0.00019998640406766947, "loss": 12.728, "step": 1289 }, { "epoch": 0.07024565559208511, "grad_norm": 0.7689425613584284, "learning_rate": 0.00019998625827116827, "loss": 12.7429, "step": 1290 }, { "epoch": 0.07030010958866813, "grad_norm": 0.8506528158595922, "learning_rate": 0.00019998611169715712, "loss": 12.8497, "step": 1291 }, { "epoch": 0.07035456358525113, "grad_norm": 0.7830197778249123, "learning_rate": 0.00019998596434563724, "loss": 12.8308, "step": 1292 }, { "epoch": 0.07040901758183414, "grad_norm": 0.7674751880153996, "learning_rate": 0.00019998581621660973, "loss": 12.6265, "step": 1293 }, { "epoch": 0.07046347157841716, "grad_norm": 0.8792718237656185, "learning_rate": 0.0001999856673100757, "loss": 12.6221, "step": 1294 }, { "epoch": 0.07051792557500017, "grad_norm": 0.888901523985518, "learning_rate": 0.0001999855176260364, "loss": 12.6924, "step": 1295 }, { "epoch": 0.07057237957158319, "grad_norm": 0.8764202643621799, "learning_rate": 0.00019998536716449292, "loss": 12.7861, "step": 1296 }, { "epoch": 0.0706268335681662, "grad_norm": 1.0514054139949702, "learning_rate": 0.00019998521592544646, "loss": 12.738, "step": 1297 }, { "epoch": 0.0706812875647492, "grad_norm": 0.9078005384364782, "learning_rate": 0.0001999850639088982, "loss": 12.6706, "step": 1298 }, { "epoch": 0.07073574156133222, "grad_norm": 1.0033928702575605, "learning_rate": 0.00019998491111484934, "loss": 12.7786, "step": 1299 }, { "epoch": 0.07079019555791523, "grad_norm": 0.9070619957929708, "learning_rate": 0.000199984757543301, "loss": 12.7996, "step": 1300 }, { "epoch": 0.07084464955449823, "grad_norm": 0.8637899760457025, "learning_rate": 0.00019998460319425445, "loss": 12.6295, "step": 1301 }, { "epoch": 0.07089910355108126, "grad_norm": 0.9270227663127247, "learning_rate": 0.00019998444806771084, "loss": 12.693, "step": 1302 }, { "epoch": 0.07095355754766426, "grad_norm": 0.8678327197234903, "learning_rate": 0.0001999842921636714, "loss": 12.6845, "step": 1303 }, { "epoch": 0.07100801154424728, "grad_norm": 0.9123627237275738, "learning_rate": 0.00019998413548213734, "loss": 12.738, "step": 1304 }, { "epoch": 0.07106246554083029, "grad_norm": 0.7763699733385914, "learning_rate": 0.00019998397802310986, "loss": 12.6883, "step": 1305 }, { "epoch": 0.0711169195374133, "grad_norm": 1.0035685910073238, "learning_rate": 0.00019998381978659024, "loss": 12.889, "step": 1306 }, { "epoch": 0.07117137353399632, "grad_norm": 0.890221905170563, "learning_rate": 0.00019998366077257962, "loss": 12.8601, "step": 1307 }, { "epoch": 0.07122582753057932, "grad_norm": 0.8082291689304159, "learning_rate": 0.0001999835009810793, "loss": 12.7015, "step": 1308 }, { "epoch": 0.07128028152716233, "grad_norm": 0.8360757752561405, "learning_rate": 0.00019998334041209054, "loss": 12.7729, "step": 1309 }, { "epoch": 0.07133473552374535, "grad_norm": 0.8218382034287375, "learning_rate": 0.00019998317906561454, "loss": 12.8634, "step": 1310 }, { "epoch": 0.07138918952032836, "grad_norm": 0.924703496644343, "learning_rate": 0.00019998301694165255, "loss": 12.7538, "step": 1311 }, { "epoch": 0.07144364351691138, "grad_norm": 0.8849010458774731, "learning_rate": 0.00019998285404020588, "loss": 12.7549, "step": 1312 }, { "epoch": 0.07149809751349438, "grad_norm": 0.7943815027792276, "learning_rate": 0.00019998269036127577, "loss": 12.6778, "step": 1313 }, { "epoch": 0.07155255151007739, "grad_norm": 0.7558448698202527, "learning_rate": 0.00019998252590486346, "loss": 12.7548, "step": 1314 }, { "epoch": 0.07160700550666041, "grad_norm": 0.8911125068611316, "learning_rate": 0.00019998236067097033, "loss": 12.7131, "step": 1315 }, { "epoch": 0.07166145950324342, "grad_norm": 0.8561803781699435, "learning_rate": 0.00019998219465959752, "loss": 12.8283, "step": 1316 }, { "epoch": 0.07171591349982642, "grad_norm": 0.8473732850401972, "learning_rate": 0.00019998202787074645, "loss": 12.8933, "step": 1317 }, { "epoch": 0.07177036749640944, "grad_norm": 0.727468497024035, "learning_rate": 0.00019998186030441832, "loss": 12.6992, "step": 1318 }, { "epoch": 0.07182482149299245, "grad_norm": 0.8180382041248009, "learning_rate": 0.00019998169196061452, "loss": 12.6889, "step": 1319 }, { "epoch": 0.07187927548957547, "grad_norm": 0.7852274604537989, "learning_rate": 0.0001999815228393363, "loss": 12.7212, "step": 1320 }, { "epoch": 0.07193372948615848, "grad_norm": 0.9769106293559628, "learning_rate": 0.00019998135294058497, "loss": 12.8875, "step": 1321 }, { "epoch": 0.07198818348274148, "grad_norm": 0.8503420912576485, "learning_rate": 0.0001999811822643619, "loss": 12.7531, "step": 1322 }, { "epoch": 0.0720426374793245, "grad_norm": 0.7948220930979146, "learning_rate": 0.00019998101081066837, "loss": 12.5656, "step": 1323 }, { "epoch": 0.07209709147590751, "grad_norm": 0.8488679894413691, "learning_rate": 0.00019998083857950577, "loss": 12.8234, "step": 1324 }, { "epoch": 0.07215154547249052, "grad_norm": 0.9413227053637365, "learning_rate": 0.00019998066557087537, "loss": 12.7785, "step": 1325 }, { "epoch": 0.07220599946907354, "grad_norm": 0.7645388464604446, "learning_rate": 0.00019998049178477853, "loss": 12.6192, "step": 1326 }, { "epoch": 0.07226045346565654, "grad_norm": 0.9032030787263513, "learning_rate": 0.00019998031722121663, "loss": 12.8004, "step": 1327 }, { "epoch": 0.07231490746223955, "grad_norm": 0.9060112901526959, "learning_rate": 0.00019998014188019105, "loss": 12.8844, "step": 1328 }, { "epoch": 0.07236936145882257, "grad_norm": 0.9041037289993008, "learning_rate": 0.00019997996576170312, "loss": 12.6895, "step": 1329 }, { "epoch": 0.07242381545540558, "grad_norm": 0.8757460620636227, "learning_rate": 0.00019997978886575416, "loss": 12.7804, "step": 1330 }, { "epoch": 0.0724782694519886, "grad_norm": 0.8629108827418805, "learning_rate": 0.00019997961119234563, "loss": 12.6409, "step": 1331 }, { "epoch": 0.0725327234485716, "grad_norm": 0.8312445852962548, "learning_rate": 0.00019997943274147889, "loss": 12.7261, "step": 1332 }, { "epoch": 0.07258717744515461, "grad_norm": 0.8520995556695088, "learning_rate": 0.00019997925351315527, "loss": 12.6756, "step": 1333 }, { "epoch": 0.07264163144173763, "grad_norm": 0.9686838517002854, "learning_rate": 0.00019997907350737624, "loss": 12.8004, "step": 1334 }, { "epoch": 0.07269608543832064, "grad_norm": 0.9052860715073252, "learning_rate": 0.0001999788927241432, "loss": 12.8299, "step": 1335 }, { "epoch": 0.07275053943490364, "grad_norm": 0.7694932364124429, "learning_rate": 0.00019997871116345746, "loss": 12.5894, "step": 1336 }, { "epoch": 0.07280499343148666, "grad_norm": 0.8697208727797224, "learning_rate": 0.00019997852882532052, "loss": 12.7327, "step": 1337 }, { "epoch": 0.07285944742806967, "grad_norm": 0.7950392651172854, "learning_rate": 0.00019997834570973378, "loss": 12.6369, "step": 1338 }, { "epoch": 0.07291390142465269, "grad_norm": 0.9149058956676394, "learning_rate": 0.00019997816181669865, "loss": 12.6927, "step": 1339 }, { "epoch": 0.0729683554212357, "grad_norm": 0.7917477900076075, "learning_rate": 0.00019997797714621656, "loss": 12.6357, "step": 1340 }, { "epoch": 0.0730228094178187, "grad_norm": 0.8974376761470795, "learning_rate": 0.00019997779169828896, "loss": 12.5907, "step": 1341 }, { "epoch": 0.07307726341440172, "grad_norm": 0.9046560938278421, "learning_rate": 0.0001999776054729173, "loss": 12.63, "step": 1342 }, { "epoch": 0.07313171741098473, "grad_norm": 0.8687122090517582, "learning_rate": 0.000199977418470103, "loss": 12.6174, "step": 1343 }, { "epoch": 0.07318617140756774, "grad_norm": 0.8219971025694719, "learning_rate": 0.00019997723068984754, "loss": 12.7169, "step": 1344 }, { "epoch": 0.07324062540415076, "grad_norm": 0.9499603674891286, "learning_rate": 0.00019997704213215234, "loss": 12.7951, "step": 1345 }, { "epoch": 0.07329507940073376, "grad_norm": 0.8403131431498408, "learning_rate": 0.00019997685279701889, "loss": 12.6047, "step": 1346 }, { "epoch": 0.07334953339731679, "grad_norm": 1.0116847450199355, "learning_rate": 0.00019997666268444872, "loss": 12.7429, "step": 1347 }, { "epoch": 0.07340398739389979, "grad_norm": 0.8687286331334413, "learning_rate": 0.00019997647179444323, "loss": 12.7573, "step": 1348 }, { "epoch": 0.0734584413904828, "grad_norm": 0.9707217570791576, "learning_rate": 0.0001999762801270039, "loss": 12.6194, "step": 1349 }, { "epoch": 0.07351289538706582, "grad_norm": 0.913308078894722, "learning_rate": 0.0001999760876821323, "loss": 12.791, "step": 1350 }, { "epoch": 0.07356734938364883, "grad_norm": 0.870495234403029, "learning_rate": 0.00019997589445982982, "loss": 12.6617, "step": 1351 }, { "epoch": 0.07362180338023183, "grad_norm": 0.9077511397743955, "learning_rate": 0.00019997570046009807, "loss": 12.7826, "step": 1352 }, { "epoch": 0.07367625737681485, "grad_norm": 0.865216146586963, "learning_rate": 0.00019997550568293847, "loss": 12.6991, "step": 1353 }, { "epoch": 0.07373071137339786, "grad_norm": 0.8039294697903542, "learning_rate": 0.00019997531012835257, "loss": 12.718, "step": 1354 }, { "epoch": 0.07378516536998088, "grad_norm": 0.8036977410464696, "learning_rate": 0.00019997511379634192, "loss": 12.6545, "step": 1355 }, { "epoch": 0.07383961936656389, "grad_norm": 0.9204505253050527, "learning_rate": 0.00019997491668690803, "loss": 12.6631, "step": 1356 }, { "epoch": 0.07389407336314689, "grad_norm": 0.8244381847188533, "learning_rate": 0.0001999747188000524, "loss": 12.7989, "step": 1357 }, { "epoch": 0.07394852735972991, "grad_norm": 0.794070214572969, "learning_rate": 0.00019997452013577658, "loss": 12.6709, "step": 1358 }, { "epoch": 0.07400298135631292, "grad_norm": 0.8115791022284864, "learning_rate": 0.00019997432069408214, "loss": 12.7089, "step": 1359 }, { "epoch": 0.07405743535289593, "grad_norm": 0.8152891881701941, "learning_rate": 0.00019997412047497058, "loss": 12.6603, "step": 1360 }, { "epoch": 0.07411188934947895, "grad_norm": 0.8932045405545638, "learning_rate": 0.00019997391947844354, "loss": 12.6571, "step": 1361 }, { "epoch": 0.07416634334606195, "grad_norm": 0.8189990385705692, "learning_rate": 0.00019997371770450256, "loss": 12.7136, "step": 1362 }, { "epoch": 0.07422079734264497, "grad_norm": 0.8729171047097313, "learning_rate": 0.00019997351515314913, "loss": 12.744, "step": 1363 }, { "epoch": 0.07427525133922798, "grad_norm": 0.9197472633047581, "learning_rate": 0.0001999733118243849, "loss": 12.6353, "step": 1364 }, { "epoch": 0.07432970533581099, "grad_norm": 0.8243862529909959, "learning_rate": 0.00019997310771821143, "loss": 12.8734, "step": 1365 }, { "epoch": 0.074384159332394, "grad_norm": 0.7922404837557487, "learning_rate": 0.0001999729028346303, "loss": 12.7866, "step": 1366 }, { "epoch": 0.07443861332897701, "grad_norm": 0.8674908828154206, "learning_rate": 0.00019997269717364312, "loss": 12.632, "step": 1367 }, { "epoch": 0.07449306732556002, "grad_norm": 0.8979652665766763, "learning_rate": 0.0001999724907352515, "loss": 12.6652, "step": 1368 }, { "epoch": 0.07454752132214304, "grad_norm": 0.8543331532248469, "learning_rate": 0.000199972283519457, "loss": 12.7693, "step": 1369 }, { "epoch": 0.07460197531872605, "grad_norm": 0.8961519185426239, "learning_rate": 0.00019997207552626127, "loss": 12.7032, "step": 1370 }, { "epoch": 0.07465642931530907, "grad_norm": 0.8094646158907232, "learning_rate": 0.0001999718667556659, "loss": 12.7456, "step": 1371 }, { "epoch": 0.07471088331189207, "grad_norm": 1.108898840885292, "learning_rate": 0.00019997165720767255, "loss": 12.7149, "step": 1372 }, { "epoch": 0.07476533730847508, "grad_norm": 0.8660662469532117, "learning_rate": 0.00019997144688228282, "loss": 12.765, "step": 1373 }, { "epoch": 0.0748197913050581, "grad_norm": 0.8121742729568635, "learning_rate": 0.00019997123577949837, "loss": 12.7631, "step": 1374 }, { "epoch": 0.0748742453016411, "grad_norm": 0.9143360210967595, "learning_rate": 0.0001999710238993208, "loss": 12.7528, "step": 1375 }, { "epoch": 0.07492869929822411, "grad_norm": 0.8538749301886916, "learning_rate": 0.0001999708112417518, "loss": 12.7464, "step": 1376 }, { "epoch": 0.07498315329480713, "grad_norm": 0.8345594107229769, "learning_rate": 0.000199970597806793, "loss": 12.7875, "step": 1377 }, { "epoch": 0.07503760729139014, "grad_norm": 0.799878598540185, "learning_rate": 0.00019997038359444605, "loss": 12.6608, "step": 1378 }, { "epoch": 0.07509206128797316, "grad_norm": 0.7981199107212789, "learning_rate": 0.00019997016860471268, "loss": 12.7673, "step": 1379 }, { "epoch": 0.07514651528455617, "grad_norm": 0.8155962023244945, "learning_rate": 0.00019996995283759445, "loss": 12.6999, "step": 1380 }, { "epoch": 0.07520096928113917, "grad_norm": 0.8283187517960794, "learning_rate": 0.00019996973629309316, "loss": 12.7195, "step": 1381 }, { "epoch": 0.0752554232777222, "grad_norm": 0.8160714565733528, "learning_rate": 0.0001999695189712104, "loss": 12.5985, "step": 1382 }, { "epoch": 0.0753098772743052, "grad_norm": 0.7522090236557449, "learning_rate": 0.0001999693008719479, "loss": 12.6562, "step": 1383 }, { "epoch": 0.07536433127088821, "grad_norm": 0.8246447349504396, "learning_rate": 0.00019996908199530736, "loss": 12.7008, "step": 1384 }, { "epoch": 0.07541878526747123, "grad_norm": 0.7906471474347675, "learning_rate": 0.00019996886234129046, "loss": 12.6111, "step": 1385 }, { "epoch": 0.07547323926405423, "grad_norm": 0.8896552341485157, "learning_rate": 0.00019996864190989895, "loss": 12.6309, "step": 1386 }, { "epoch": 0.07552769326063725, "grad_norm": 0.8374448428066728, "learning_rate": 0.00019996842070113449, "loss": 12.8523, "step": 1387 }, { "epoch": 0.07558214725722026, "grad_norm": 0.8911290392829221, "learning_rate": 0.00019996819871499882, "loss": 12.7662, "step": 1388 }, { "epoch": 0.07563660125380327, "grad_norm": 0.8776686874771998, "learning_rate": 0.00019996797595149367, "loss": 12.6586, "step": 1389 }, { "epoch": 0.07569105525038629, "grad_norm": 0.7757694249013276, "learning_rate": 0.0001999677524106208, "loss": 12.7784, "step": 1390 }, { "epoch": 0.0757455092469693, "grad_norm": 0.8463963270041872, "learning_rate": 0.00019996752809238192, "loss": 12.7496, "step": 1391 }, { "epoch": 0.0757999632435523, "grad_norm": 0.9666247978951433, "learning_rate": 0.0001999673029967788, "loss": 12.6967, "step": 1392 }, { "epoch": 0.07585441724013532, "grad_norm": 0.911715433375072, "learning_rate": 0.00019996707712381312, "loss": 12.7605, "step": 1393 }, { "epoch": 0.07590887123671833, "grad_norm": 0.896211013201681, "learning_rate": 0.0001999668504734867, "loss": 12.7567, "step": 1394 }, { "epoch": 0.07596332523330133, "grad_norm": 0.7875639968187476, "learning_rate": 0.00019996662304580127, "loss": 12.7262, "step": 1395 }, { "epoch": 0.07601777922988436, "grad_norm": 0.8028651920579518, "learning_rate": 0.00019996639484075863, "loss": 12.6632, "step": 1396 }, { "epoch": 0.07607223322646736, "grad_norm": 0.8290344892272751, "learning_rate": 0.00019996616585836056, "loss": 12.7777, "step": 1397 }, { "epoch": 0.07612668722305038, "grad_norm": 0.808369979827406, "learning_rate": 0.0001999659360986088, "loss": 12.6464, "step": 1398 }, { "epoch": 0.07618114121963339, "grad_norm": 0.8593156464362707, "learning_rate": 0.00019996570556150516, "loss": 12.8318, "step": 1399 }, { "epoch": 0.0762355952162164, "grad_norm": 0.8476622102536071, "learning_rate": 0.0001999654742470514, "loss": 12.5885, "step": 1400 }, { "epoch": 0.07629004921279942, "grad_norm": 1.0197744142411682, "learning_rate": 0.0001999652421552494, "loss": 12.8219, "step": 1401 }, { "epoch": 0.07634450320938242, "grad_norm": 0.8796520538054026, "learning_rate": 0.0001999650092861009, "loss": 12.7627, "step": 1402 }, { "epoch": 0.07639895720596543, "grad_norm": 0.8767347301844162, "learning_rate": 0.0001999647756396077, "loss": 12.6461, "step": 1403 }, { "epoch": 0.07645341120254845, "grad_norm": 0.8968474287614663, "learning_rate": 0.00019996454121577167, "loss": 12.7267, "step": 1404 }, { "epoch": 0.07650786519913146, "grad_norm": 0.8363621234719137, "learning_rate": 0.00019996430601459454, "loss": 12.627, "step": 1405 }, { "epoch": 0.07656231919571448, "grad_norm": 0.8442098672535624, "learning_rate": 0.00019996407003607827, "loss": 12.7895, "step": 1406 }, { "epoch": 0.07661677319229748, "grad_norm": 0.895294258933168, "learning_rate": 0.0001999638332802246, "loss": 12.8037, "step": 1407 }, { "epoch": 0.07667122718888049, "grad_norm": 0.8150125735904853, "learning_rate": 0.0001999635957470354, "loss": 12.8318, "step": 1408 }, { "epoch": 0.07672568118546351, "grad_norm": 0.8107636017840302, "learning_rate": 0.00019996335743651254, "loss": 12.7149, "step": 1409 }, { "epoch": 0.07678013518204652, "grad_norm": 0.8164062918816573, "learning_rate": 0.00019996311834865783, "loss": 12.7795, "step": 1410 }, { "epoch": 0.07683458917862952, "grad_norm": 0.7278267164603288, "learning_rate": 0.00019996287848347315, "loss": 12.7514, "step": 1411 }, { "epoch": 0.07688904317521254, "grad_norm": 0.8448076956301601, "learning_rate": 0.00019996263784096034, "loss": 12.8286, "step": 1412 }, { "epoch": 0.07694349717179555, "grad_norm": 0.8097564652529006, "learning_rate": 0.00019996239642112133, "loss": 12.5989, "step": 1413 }, { "epoch": 0.07699795116837857, "grad_norm": 0.8107001306743004, "learning_rate": 0.00019996215422395794, "loss": 12.7783, "step": 1414 }, { "epoch": 0.07705240516496158, "grad_norm": 0.8086448265638508, "learning_rate": 0.00019996191124947208, "loss": 12.6879, "step": 1415 }, { "epoch": 0.07710685916154458, "grad_norm": 0.7665013421105268, "learning_rate": 0.00019996166749766564, "loss": 12.6297, "step": 1416 }, { "epoch": 0.0771613131581276, "grad_norm": 0.7943762132967175, "learning_rate": 0.00019996142296854047, "loss": 12.7468, "step": 1417 }, { "epoch": 0.07721576715471061, "grad_norm": 0.8981478243989491, "learning_rate": 0.00019996117766209857, "loss": 12.8986, "step": 1418 }, { "epoch": 0.07727022115129362, "grad_norm": 0.8143901857096051, "learning_rate": 0.00019996093157834176, "loss": 12.78, "step": 1419 }, { "epoch": 0.07732467514787664, "grad_norm": 0.8756714785828305, "learning_rate": 0.000199960684717272, "loss": 12.785, "step": 1420 }, { "epoch": 0.07737912914445964, "grad_norm": 0.8496567372331549, "learning_rate": 0.00019996043707889118, "loss": 12.7625, "step": 1421 }, { "epoch": 0.07743358314104266, "grad_norm": 0.8563016596814594, "learning_rate": 0.00019996018866320122, "loss": 12.818, "step": 1422 }, { "epoch": 0.07748803713762567, "grad_norm": 0.7546198210158892, "learning_rate": 0.0001999599394702041, "loss": 12.6427, "step": 1423 }, { "epoch": 0.07754249113420868, "grad_norm": 1.0243034781564102, "learning_rate": 0.00019995968949990171, "loss": 12.7954, "step": 1424 }, { "epoch": 0.0775969451307917, "grad_norm": 0.8101715088607566, "learning_rate": 0.000199959438752296, "loss": 12.7001, "step": 1425 }, { "epoch": 0.0776513991273747, "grad_norm": 0.8761233776508695, "learning_rate": 0.000199959187227389, "loss": 12.8675, "step": 1426 }, { "epoch": 0.07770585312395771, "grad_norm": 0.9545124675812537, "learning_rate": 0.00019995893492518252, "loss": 12.7311, "step": 1427 }, { "epoch": 0.07776030712054073, "grad_norm": 1.0653521779212813, "learning_rate": 0.00019995868184567863, "loss": 12.837, "step": 1428 }, { "epoch": 0.07781476111712374, "grad_norm": 0.7823863372608583, "learning_rate": 0.00019995842798887925, "loss": 12.646, "step": 1429 }, { "epoch": 0.07786921511370676, "grad_norm": 0.9468592289955438, "learning_rate": 0.0001999581733547864, "loss": 12.5773, "step": 1430 }, { "epoch": 0.07792366911028976, "grad_norm": 0.8859844301333014, "learning_rate": 0.000199957917943402, "loss": 12.6881, "step": 1431 }, { "epoch": 0.07797812310687277, "grad_norm": 0.7958834591179493, "learning_rate": 0.00019995766175472807, "loss": 12.7938, "step": 1432 }, { "epoch": 0.07803257710345579, "grad_norm": 0.9045806271425504, "learning_rate": 0.00019995740478876662, "loss": 12.8938, "step": 1433 }, { "epoch": 0.0780870311000388, "grad_norm": 0.9066374152311184, "learning_rate": 0.0001999571470455196, "loss": 12.8734, "step": 1434 }, { "epoch": 0.0781414850966218, "grad_norm": 1.105244040197418, "learning_rate": 0.00019995688852498907, "loss": 12.8091, "step": 1435 }, { "epoch": 0.07819593909320482, "grad_norm": 0.9163991440480068, "learning_rate": 0.000199956629227177, "loss": 12.8646, "step": 1436 }, { "epoch": 0.07825039308978783, "grad_norm": 0.9264755380848951, "learning_rate": 0.0001999563691520854, "loss": 12.6942, "step": 1437 }, { "epoch": 0.07830484708637085, "grad_norm": 0.8982000129129306, "learning_rate": 0.00019995610829971633, "loss": 12.7016, "step": 1438 }, { "epoch": 0.07835930108295386, "grad_norm": 0.8429653568543607, "learning_rate": 0.0001999558466700718, "loss": 12.7855, "step": 1439 }, { "epoch": 0.07841375507953686, "grad_norm": 1.02653089378891, "learning_rate": 0.00019995558426315384, "loss": 12.699, "step": 1440 }, { "epoch": 0.07846820907611989, "grad_norm": 0.8931709995895065, "learning_rate": 0.0001999553210789645, "loss": 12.8778, "step": 1441 }, { "epoch": 0.07852266307270289, "grad_norm": 1.077265824329578, "learning_rate": 0.00019995505711750583, "loss": 12.7602, "step": 1442 }, { "epoch": 0.0785771170692859, "grad_norm": 0.8471373686576927, "learning_rate": 0.00019995479237877985, "loss": 12.7904, "step": 1443 }, { "epoch": 0.07863157106586892, "grad_norm": 1.0498061251024426, "learning_rate": 0.00019995452686278866, "loss": 12.9102, "step": 1444 }, { "epoch": 0.07868602506245193, "grad_norm": 0.9447132721604138, "learning_rate": 0.00019995426056953428, "loss": 12.7107, "step": 1445 }, { "epoch": 0.07874047905903495, "grad_norm": 0.963176706304791, "learning_rate": 0.00019995399349901884, "loss": 12.9, "step": 1446 }, { "epoch": 0.07879493305561795, "grad_norm": 0.9043190032368775, "learning_rate": 0.00019995372565124436, "loss": 12.8733, "step": 1447 }, { "epoch": 0.07884938705220096, "grad_norm": 0.895109525929898, "learning_rate": 0.00019995345702621296, "loss": 12.7444, "step": 1448 }, { "epoch": 0.07890384104878398, "grad_norm": 1.0157615390283048, "learning_rate": 0.00019995318762392673, "loss": 12.7979, "step": 1449 }, { "epoch": 0.07895829504536699, "grad_norm": 0.9177562381958061, "learning_rate": 0.0001999529174443877, "loss": 12.7878, "step": 1450 }, { "epoch": 0.07901274904194999, "grad_norm": 0.8671856971809547, "learning_rate": 0.0001999526464875981, "loss": 12.8859, "step": 1451 }, { "epoch": 0.07906720303853301, "grad_norm": 0.7667322554377042, "learning_rate": 0.0001999523747535599, "loss": 12.7204, "step": 1452 }, { "epoch": 0.07912165703511602, "grad_norm": 0.8422632651938327, "learning_rate": 0.0001999521022422753, "loss": 12.7557, "step": 1453 }, { "epoch": 0.07917611103169904, "grad_norm": 0.9579270469578615, "learning_rate": 0.00019995182895374635, "loss": 12.6818, "step": 1454 }, { "epoch": 0.07923056502828205, "grad_norm": 0.8821151364436076, "learning_rate": 0.00019995155488797525, "loss": 12.6819, "step": 1455 }, { "epoch": 0.07928501902486505, "grad_norm": 0.8051216332043438, "learning_rate": 0.0001999512800449641, "loss": 12.7483, "step": 1456 }, { "epoch": 0.07933947302144807, "grad_norm": 0.8706203100578653, "learning_rate": 0.00019995100442471504, "loss": 12.7462, "step": 1457 }, { "epoch": 0.07939392701803108, "grad_norm": 0.8392511128264428, "learning_rate": 0.00019995072802723017, "loss": 12.7625, "step": 1458 }, { "epoch": 0.07944838101461409, "grad_norm": 0.7159408022254062, "learning_rate": 0.00019995045085251172, "loss": 12.6584, "step": 1459 }, { "epoch": 0.0795028350111971, "grad_norm": 0.8182234292300353, "learning_rate": 0.00019995017290056177, "loss": 12.7386, "step": 1460 }, { "epoch": 0.07955728900778011, "grad_norm": 0.8776394827159079, "learning_rate": 0.00019994989417138252, "loss": 12.84, "step": 1461 }, { "epoch": 0.07961174300436312, "grad_norm": 0.7774015631160571, "learning_rate": 0.00019994961466497614, "loss": 12.6897, "step": 1462 }, { "epoch": 0.07966619700094614, "grad_norm": 0.7153588530373372, "learning_rate": 0.0001999493343813448, "loss": 12.696, "step": 1463 }, { "epoch": 0.07972065099752915, "grad_norm": 0.8526001778470991, "learning_rate": 0.00019994905332049067, "loss": 12.6799, "step": 1464 }, { "epoch": 0.07977510499411217, "grad_norm": 0.8395065430273043, "learning_rate": 0.00019994877148241593, "loss": 12.7107, "step": 1465 }, { "epoch": 0.07982955899069517, "grad_norm": 0.7910115963722115, "learning_rate": 0.0001999484888671228, "loss": 12.7074, "step": 1466 }, { "epoch": 0.07988401298727818, "grad_norm": 0.8736867046566489, "learning_rate": 0.00019994820547461343, "loss": 12.7821, "step": 1467 }, { "epoch": 0.0799384669838612, "grad_norm": 0.7909169429327706, "learning_rate": 0.0001999479213048901, "loss": 12.7732, "step": 1468 }, { "epoch": 0.07999292098044421, "grad_norm": 0.712764262632445, "learning_rate": 0.00019994763635795493, "loss": 12.5218, "step": 1469 }, { "epoch": 0.08004737497702721, "grad_norm": 0.7489196228051317, "learning_rate": 0.00019994735063381017, "loss": 12.7551, "step": 1470 }, { "epoch": 0.08010182897361023, "grad_norm": 0.8125103235341982, "learning_rate": 0.0001999470641324581, "loss": 12.7572, "step": 1471 }, { "epoch": 0.08015628297019324, "grad_norm": 0.8649553566871733, "learning_rate": 0.00019994677685390087, "loss": 12.7905, "step": 1472 }, { "epoch": 0.08021073696677626, "grad_norm": 0.8974811724427579, "learning_rate": 0.00019994648879814074, "loss": 12.7806, "step": 1473 }, { "epoch": 0.08026519096335927, "grad_norm": 0.7657442868661224, "learning_rate": 0.00019994619996517997, "loss": 12.7352, "step": 1474 }, { "epoch": 0.08031964495994227, "grad_norm": 0.8360366489275451, "learning_rate": 0.00019994591035502076, "loss": 12.7474, "step": 1475 }, { "epoch": 0.0803740989565253, "grad_norm": 0.7867683365999412, "learning_rate": 0.0001999456199676654, "loss": 12.5452, "step": 1476 }, { "epoch": 0.0804285529531083, "grad_norm": 0.8163665133622418, "learning_rate": 0.00019994532880311617, "loss": 12.6865, "step": 1477 }, { "epoch": 0.08048300694969131, "grad_norm": 0.8767810719540375, "learning_rate": 0.00019994503686137524, "loss": 12.7188, "step": 1478 }, { "epoch": 0.08053746094627433, "grad_norm": 0.7954817734841462, "learning_rate": 0.000199944744142445, "loss": 12.7262, "step": 1479 }, { "epoch": 0.08059191494285733, "grad_norm": 0.7956972360870302, "learning_rate": 0.00019994445064632762, "loss": 12.81, "step": 1480 }, { "epoch": 0.08064636893944035, "grad_norm": 0.9637155793432769, "learning_rate": 0.00019994415637302547, "loss": 12.6966, "step": 1481 }, { "epoch": 0.08070082293602336, "grad_norm": 0.8875279185921682, "learning_rate": 0.0001999438613225408, "loss": 12.6457, "step": 1482 }, { "epoch": 0.08075527693260637, "grad_norm": 0.8889887759230662, "learning_rate": 0.00019994356549487587, "loss": 12.7212, "step": 1483 }, { "epoch": 0.08080973092918939, "grad_norm": 0.8712764841724392, "learning_rate": 0.00019994326889003302, "loss": 12.709, "step": 1484 }, { "epoch": 0.0808641849257724, "grad_norm": 0.8618471723921115, "learning_rate": 0.0001999429715080146, "loss": 12.6181, "step": 1485 }, { "epoch": 0.0809186389223554, "grad_norm": 0.9185534941177032, "learning_rate": 0.00019994267334882282, "loss": 12.7084, "step": 1486 }, { "epoch": 0.08097309291893842, "grad_norm": 0.8517384145727733, "learning_rate": 0.0001999423744124601, "loss": 12.7456, "step": 1487 }, { "epoch": 0.08102754691552143, "grad_norm": 0.7427396610603257, "learning_rate": 0.00019994207469892867, "loss": 12.6107, "step": 1488 }, { "epoch": 0.08108200091210445, "grad_norm": 0.80926578166257, "learning_rate": 0.00019994177420823092, "loss": 12.7398, "step": 1489 }, { "epoch": 0.08113645490868746, "grad_norm": 0.9206143365338525, "learning_rate": 0.00019994147294036916, "loss": 12.7456, "step": 1490 }, { "epoch": 0.08119090890527046, "grad_norm": 0.8103141575787716, "learning_rate": 0.00019994117089534576, "loss": 12.7612, "step": 1491 }, { "epoch": 0.08124536290185348, "grad_norm": 0.8039408920478894, "learning_rate": 0.00019994086807316306, "loss": 12.6768, "step": 1492 }, { "epoch": 0.08129981689843649, "grad_norm": 0.9364347767261306, "learning_rate": 0.0001999405644738234, "loss": 12.7028, "step": 1493 }, { "epoch": 0.0813542708950195, "grad_norm": 0.7758284076712086, "learning_rate": 0.00019994026009732916, "loss": 12.7189, "step": 1494 }, { "epoch": 0.08140872489160252, "grad_norm": 0.7628202869652245, "learning_rate": 0.00019993995494368272, "loss": 12.7589, "step": 1495 }, { "epoch": 0.08146317888818552, "grad_norm": 0.8970131980378315, "learning_rate": 0.00019993964901288637, "loss": 12.7318, "step": 1496 }, { "epoch": 0.08151763288476854, "grad_norm": 0.9069018572400294, "learning_rate": 0.0001999393423049426, "loss": 12.8936, "step": 1497 }, { "epoch": 0.08157208688135155, "grad_norm": 0.876220223123507, "learning_rate": 0.00019993903481985373, "loss": 12.812, "step": 1498 }, { "epoch": 0.08162654087793456, "grad_norm": 0.8535583445255587, "learning_rate": 0.00019993872655762215, "loss": 12.698, "step": 1499 }, { "epoch": 0.08168099487451758, "grad_norm": 0.8631114608236895, "learning_rate": 0.00019993841751825032, "loss": 12.8378, "step": 1500 }, { "epoch": 0.08173544887110058, "grad_norm": 0.9638776193483425, "learning_rate": 0.00019993810770174055, "loss": 12.7563, "step": 1501 }, { "epoch": 0.08178990286768359, "grad_norm": 1.1874397730440935, "learning_rate": 0.00019993779710809532, "loss": 12.4875, "step": 1502 }, { "epoch": 0.08184435686426661, "grad_norm": 0.7936581239575987, "learning_rate": 0.00019993748573731698, "loss": 12.714, "step": 1503 }, { "epoch": 0.08189881086084962, "grad_norm": 0.7976386685488491, "learning_rate": 0.00019993717358940803, "loss": 12.7046, "step": 1504 }, { "epoch": 0.08195326485743264, "grad_norm": 0.9118200058009364, "learning_rate": 0.00019993686066437086, "loss": 12.7126, "step": 1505 }, { "epoch": 0.08200771885401564, "grad_norm": 0.8982301897740976, "learning_rate": 0.00019993654696220787, "loss": 12.7474, "step": 1506 }, { "epoch": 0.08206217285059865, "grad_norm": 0.8340940733070505, "learning_rate": 0.00019993623248292156, "loss": 12.6143, "step": 1507 }, { "epoch": 0.08211662684718167, "grad_norm": 0.7856628911786119, "learning_rate": 0.00019993591722651432, "loss": 12.6957, "step": 1508 }, { "epoch": 0.08217108084376468, "grad_norm": 0.8563602762901024, "learning_rate": 0.00019993560119298866, "loss": 12.7298, "step": 1509 }, { "epoch": 0.08222553484034768, "grad_norm": 0.9064468196397422, "learning_rate": 0.00019993528438234698, "loss": 12.7308, "step": 1510 }, { "epoch": 0.0822799888369307, "grad_norm": 0.7929295665041554, "learning_rate": 0.0001999349667945918, "loss": 12.6453, "step": 1511 }, { "epoch": 0.08233444283351371, "grad_norm": 0.7409195103316495, "learning_rate": 0.00019993464842972552, "loss": 12.7021, "step": 1512 }, { "epoch": 0.08238889683009673, "grad_norm": 1.0174626901426223, "learning_rate": 0.00019993432928775069, "loss": 12.654, "step": 1513 }, { "epoch": 0.08244335082667974, "grad_norm": 0.8385420276571517, "learning_rate": 0.00019993400936866974, "loss": 12.7606, "step": 1514 }, { "epoch": 0.08249780482326274, "grad_norm": 0.7773434141608896, "learning_rate": 0.00019993368867248518, "loss": 12.6926, "step": 1515 }, { "epoch": 0.08255225881984576, "grad_norm": 0.948935487487568, "learning_rate": 0.0001999333671991995, "loss": 12.7262, "step": 1516 }, { "epoch": 0.08260671281642877, "grad_norm": 0.7845942917925295, "learning_rate": 0.0001999330449488152, "loss": 12.7695, "step": 1517 }, { "epoch": 0.08266116681301178, "grad_norm": 0.796696184682058, "learning_rate": 0.00019993272192133477, "loss": 12.7048, "step": 1518 }, { "epoch": 0.0827156208095948, "grad_norm": 0.8115038277668638, "learning_rate": 0.00019993239811676075, "loss": 12.7077, "step": 1519 }, { "epoch": 0.0827700748061778, "grad_norm": 0.8598970799174874, "learning_rate": 0.00019993207353509562, "loss": 12.7663, "step": 1520 }, { "epoch": 0.08282452880276082, "grad_norm": 0.8329175862926992, "learning_rate": 0.00019993174817634196, "loss": 12.6002, "step": 1521 }, { "epoch": 0.08287898279934383, "grad_norm": 0.9493548901124595, "learning_rate": 0.00019993142204050224, "loss": 12.8797, "step": 1522 }, { "epoch": 0.08293343679592684, "grad_norm": 0.7844916289661439, "learning_rate": 0.00019993109512757903, "loss": 12.7075, "step": 1523 }, { "epoch": 0.08298789079250986, "grad_norm": 0.7485570224627376, "learning_rate": 0.00019993076743757485, "loss": 12.7142, "step": 1524 }, { "epoch": 0.08304234478909286, "grad_norm": 0.8230611251453327, "learning_rate": 0.0001999304389704923, "loss": 12.6775, "step": 1525 }, { "epoch": 0.08309679878567587, "grad_norm": 0.7802141642317684, "learning_rate": 0.00019993010972633389, "loss": 12.7862, "step": 1526 }, { "epoch": 0.08315125278225889, "grad_norm": 0.804109574654945, "learning_rate": 0.0001999297797051022, "loss": 12.7384, "step": 1527 }, { "epoch": 0.0832057067788419, "grad_norm": 0.7549845829476725, "learning_rate": 0.00019992944890679976, "loss": 12.6077, "step": 1528 }, { "epoch": 0.0832601607754249, "grad_norm": 0.8158889763260669, "learning_rate": 0.00019992911733142916, "loss": 12.7293, "step": 1529 }, { "epoch": 0.08331461477200792, "grad_norm": 0.8322818323951467, "learning_rate": 0.00019992878497899298, "loss": 12.7941, "step": 1530 }, { "epoch": 0.08336906876859093, "grad_norm": 0.7974712778159135, "learning_rate": 0.00019992845184949384, "loss": 12.6936, "step": 1531 }, { "epoch": 0.08342352276517395, "grad_norm": 0.789866577373669, "learning_rate": 0.0001999281179429343, "loss": 12.684, "step": 1532 }, { "epoch": 0.08347797676175696, "grad_norm": 0.8534707399357608, "learning_rate": 0.00019992778325931694, "loss": 12.6751, "step": 1533 }, { "epoch": 0.08353243075833997, "grad_norm": 0.9367458502912297, "learning_rate": 0.00019992744779864438, "loss": 12.7332, "step": 1534 }, { "epoch": 0.08358688475492299, "grad_norm": 0.9117832539518956, "learning_rate": 0.00019992711156091925, "loss": 12.6615, "step": 1535 }, { "epoch": 0.08364133875150599, "grad_norm": 0.8034947883057498, "learning_rate": 0.00019992677454614414, "loss": 12.6091, "step": 1536 }, { "epoch": 0.083695792748089, "grad_norm": 0.8610815668935233, "learning_rate": 0.00019992643675432163, "loss": 12.6785, "step": 1537 }, { "epoch": 0.08375024674467202, "grad_norm": 0.7634725773013094, "learning_rate": 0.00019992609818545443, "loss": 12.6252, "step": 1538 }, { "epoch": 0.08380470074125503, "grad_norm": 0.773853435934736, "learning_rate": 0.00019992575883954512, "loss": 12.6299, "step": 1539 }, { "epoch": 0.08385915473783805, "grad_norm": 0.83449596965377, "learning_rate": 0.00019992541871659636, "loss": 12.7319, "step": 1540 }, { "epoch": 0.08391360873442105, "grad_norm": 0.7817968068976223, "learning_rate": 0.00019992507781661076, "loss": 12.6707, "step": 1541 }, { "epoch": 0.08396806273100406, "grad_norm": 0.8437153661982023, "learning_rate": 0.00019992473613959102, "loss": 12.7417, "step": 1542 }, { "epoch": 0.08402251672758708, "grad_norm": 0.7850470107731357, "learning_rate": 0.00019992439368553977, "loss": 12.7926, "step": 1543 }, { "epoch": 0.08407697072417009, "grad_norm": 0.8146054061971123, "learning_rate": 0.0001999240504544597, "loss": 12.8017, "step": 1544 }, { "epoch": 0.08413142472075309, "grad_norm": 0.871613332041884, "learning_rate": 0.00019992370644635342, "loss": 12.8044, "step": 1545 }, { "epoch": 0.08418587871733611, "grad_norm": 0.8555636063343115, "learning_rate": 0.00019992336166122366, "loss": 12.7711, "step": 1546 }, { "epoch": 0.08424033271391912, "grad_norm": 0.9492028973039249, "learning_rate": 0.0001999230160990731, "loss": 12.8954, "step": 1547 }, { "epoch": 0.08429478671050214, "grad_norm": 0.8221100770793226, "learning_rate": 0.00019992266975990436, "loss": 12.8115, "step": 1548 }, { "epoch": 0.08434924070708515, "grad_norm": 0.8717001137503292, "learning_rate": 0.00019992232264372023, "loss": 12.7496, "step": 1549 }, { "epoch": 0.08440369470366815, "grad_norm": 0.8772885860945219, "learning_rate": 0.00019992197475052334, "loss": 12.6613, "step": 1550 }, { "epoch": 0.08445814870025117, "grad_norm": 0.7647801603416208, "learning_rate": 0.00019992162608031643, "loss": 12.605, "step": 1551 }, { "epoch": 0.08451260269683418, "grad_norm": 0.8249989839894225, "learning_rate": 0.00019992127663310218, "loss": 12.713, "step": 1552 }, { "epoch": 0.08456705669341719, "grad_norm": 0.8004163246461077, "learning_rate": 0.00019992092640888336, "loss": 12.8538, "step": 1553 }, { "epoch": 0.0846215106900002, "grad_norm": 0.7652912812630724, "learning_rate": 0.00019992057540766262, "loss": 12.6518, "step": 1554 }, { "epoch": 0.08467596468658321, "grad_norm": 0.7636164105422508, "learning_rate": 0.00019992022362944276, "loss": 12.7801, "step": 1555 }, { "epoch": 0.08473041868316623, "grad_norm": 0.7770954475005418, "learning_rate": 0.00019991987107422646, "loss": 12.7198, "step": 1556 }, { "epoch": 0.08478487267974924, "grad_norm": 0.7876480663521178, "learning_rate": 0.0001999195177420165, "loss": 12.7493, "step": 1557 }, { "epoch": 0.08483932667633225, "grad_norm": 0.7950431925010902, "learning_rate": 0.0001999191636328156, "loss": 12.7189, "step": 1558 }, { "epoch": 0.08489378067291527, "grad_norm": 0.8317608535674442, "learning_rate": 0.00019991880874662655, "loss": 12.8068, "step": 1559 }, { "epoch": 0.08494823466949827, "grad_norm": 0.9077379398287838, "learning_rate": 0.00019991845308345204, "loss": 12.9445, "step": 1560 }, { "epoch": 0.08500268866608128, "grad_norm": 1.2586595675046723, "learning_rate": 0.00019991809664329492, "loss": 12.7941, "step": 1561 }, { "epoch": 0.0850571426626643, "grad_norm": 0.934969280480126, "learning_rate": 0.00019991773942615795, "loss": 12.7453, "step": 1562 }, { "epoch": 0.08511159665924731, "grad_norm": 0.8689759469261831, "learning_rate": 0.00019991738143204383, "loss": 12.716, "step": 1563 }, { "epoch": 0.08516605065583033, "grad_norm": 0.9048636278651105, "learning_rate": 0.00019991702266095542, "loss": 12.6813, "step": 1564 }, { "epoch": 0.08522050465241333, "grad_norm": 0.9309895881664149, "learning_rate": 0.0001999166631128955, "loss": 12.8226, "step": 1565 }, { "epoch": 0.08527495864899634, "grad_norm": 1.085437499269038, "learning_rate": 0.00019991630278786682, "loss": 12.7647, "step": 1566 }, { "epoch": 0.08532941264557936, "grad_norm": 0.9087661866473474, "learning_rate": 0.00019991594168587224, "loss": 12.8076, "step": 1567 }, { "epoch": 0.08538386664216237, "grad_norm": 0.9896982322788401, "learning_rate": 0.00019991557980691453, "loss": 12.7972, "step": 1568 }, { "epoch": 0.08543832063874537, "grad_norm": 0.832692895973604, "learning_rate": 0.0001999152171509965, "loss": 12.7677, "step": 1569 }, { "epoch": 0.0854927746353284, "grad_norm": 0.9248339271879307, "learning_rate": 0.00019991485371812103, "loss": 12.789, "step": 1570 }, { "epoch": 0.0855472286319114, "grad_norm": 0.767994607022334, "learning_rate": 0.00019991448950829085, "loss": 12.6611, "step": 1571 }, { "epoch": 0.08560168262849442, "grad_norm": 0.8421196864236498, "learning_rate": 0.0001999141245215089, "loss": 12.7529, "step": 1572 }, { "epoch": 0.08565613662507743, "grad_norm": 0.9014060216445708, "learning_rate": 0.0001999137587577779, "loss": 12.7877, "step": 1573 }, { "epoch": 0.08571059062166043, "grad_norm": 0.866376171224473, "learning_rate": 0.00019991339221710078, "loss": 12.7103, "step": 1574 }, { "epoch": 0.08576504461824346, "grad_norm": 0.721762025104229, "learning_rate": 0.0001999130248994804, "loss": 12.7141, "step": 1575 }, { "epoch": 0.08581949861482646, "grad_norm": 0.7766649078647998, "learning_rate": 0.00019991265680491954, "loss": 12.6779, "step": 1576 }, { "epoch": 0.08587395261140947, "grad_norm": 0.809840598959927, "learning_rate": 0.00019991228793342112, "loss": 12.8861, "step": 1577 }, { "epoch": 0.08592840660799249, "grad_norm": 0.8275739487902802, "learning_rate": 0.000199911918284988, "loss": 12.5711, "step": 1578 }, { "epoch": 0.0859828606045755, "grad_norm": 0.8132028689679895, "learning_rate": 0.00019991154785962306, "loss": 12.8249, "step": 1579 }, { "epoch": 0.08603731460115852, "grad_norm": 1.3463942717961974, "learning_rate": 0.00019991117665732914, "loss": 12.8725, "step": 1580 }, { "epoch": 0.08609176859774152, "grad_norm": 0.7744003920615815, "learning_rate": 0.00019991080467810917, "loss": 12.733, "step": 1581 }, { "epoch": 0.08614622259432453, "grad_norm": 0.8797671865198825, "learning_rate": 0.00019991043192196602, "loss": 12.7362, "step": 1582 }, { "epoch": 0.08620067659090755, "grad_norm": 0.8685316217744672, "learning_rate": 0.0001999100583889026, "loss": 12.8417, "step": 1583 }, { "epoch": 0.08625513058749056, "grad_norm": 0.8634938681153937, "learning_rate": 0.0001999096840789218, "loss": 12.8137, "step": 1584 }, { "epoch": 0.08630958458407356, "grad_norm": 0.9845889721614345, "learning_rate": 0.00019990930899202656, "loss": 12.8088, "step": 1585 }, { "epoch": 0.08636403858065658, "grad_norm": 0.9203403556282245, "learning_rate": 0.00019990893312821976, "loss": 12.8132, "step": 1586 }, { "epoch": 0.08641849257723959, "grad_norm": 0.8059258371430177, "learning_rate": 0.00019990855648750438, "loss": 12.6877, "step": 1587 }, { "epoch": 0.08647294657382261, "grad_norm": 0.9359421414283161, "learning_rate": 0.00019990817906988327, "loss": 12.7852, "step": 1588 }, { "epoch": 0.08652740057040562, "grad_norm": 0.828140212206367, "learning_rate": 0.00019990780087535942, "loss": 12.7481, "step": 1589 }, { "epoch": 0.08658185456698862, "grad_norm": 0.8451668669812522, "learning_rate": 0.00019990742190393573, "loss": 12.7679, "step": 1590 }, { "epoch": 0.08663630856357164, "grad_norm": 0.9171546488032268, "learning_rate": 0.0001999070421556152, "loss": 12.7237, "step": 1591 }, { "epoch": 0.08669076256015465, "grad_norm": 0.7928322850405436, "learning_rate": 0.00019990666163040077, "loss": 12.831, "step": 1592 }, { "epoch": 0.08674521655673766, "grad_norm": 0.7971408312985833, "learning_rate": 0.00019990628032829537, "loss": 12.6751, "step": 1593 }, { "epoch": 0.08679967055332068, "grad_norm": 0.7932785961720318, "learning_rate": 0.00019990589824930198, "loss": 12.732, "step": 1594 }, { "epoch": 0.08685412454990368, "grad_norm": 0.8749998597118366, "learning_rate": 0.00019990551539342355, "loss": 12.7074, "step": 1595 }, { "epoch": 0.08690857854648669, "grad_norm": 0.81543285620202, "learning_rate": 0.0001999051317606631, "loss": 12.7376, "step": 1596 }, { "epoch": 0.08696303254306971, "grad_norm": 0.9261825740964793, "learning_rate": 0.0001999047473510236, "loss": 12.8214, "step": 1597 }, { "epoch": 0.08701748653965272, "grad_norm": 0.8622490108458055, "learning_rate": 0.00019990436216450803, "loss": 12.7227, "step": 1598 }, { "epoch": 0.08707194053623574, "grad_norm": 0.8550542644779822, "learning_rate": 0.00019990397620111937, "loss": 12.7084, "step": 1599 }, { "epoch": 0.08712639453281874, "grad_norm": 0.8381618824955894, "learning_rate": 0.00019990358946086063, "loss": 12.6713, "step": 1600 }, { "epoch": 0.08718084852940175, "grad_norm": 0.9796927090666376, "learning_rate": 0.00019990320194373485, "loss": 12.7857, "step": 1601 }, { "epoch": 0.08723530252598477, "grad_norm": 0.9539240134811797, "learning_rate": 0.000199902813649745, "loss": 12.7795, "step": 1602 }, { "epoch": 0.08728975652256778, "grad_norm": 0.8105784092854924, "learning_rate": 0.0001999024245788941, "loss": 12.6445, "step": 1603 }, { "epoch": 0.08734421051915078, "grad_norm": 0.7883626144517288, "learning_rate": 0.00019990203473118522, "loss": 12.6751, "step": 1604 }, { "epoch": 0.0873986645157338, "grad_norm": 0.8767347617569384, "learning_rate": 0.00019990164410662136, "loss": 12.6106, "step": 1605 }, { "epoch": 0.08745311851231681, "grad_norm": 0.9084313776833122, "learning_rate": 0.00019990125270520558, "loss": 12.7858, "step": 1606 }, { "epoch": 0.08750757250889983, "grad_norm": 0.8747776366189766, "learning_rate": 0.0001999008605269409, "loss": 12.9065, "step": 1607 }, { "epoch": 0.08756202650548284, "grad_norm": 0.8489601291243307, "learning_rate": 0.00019990046757183033, "loss": 12.8027, "step": 1608 }, { "epoch": 0.08761648050206584, "grad_norm": 0.9162027477048691, "learning_rate": 0.00019990007383987698, "loss": 12.7236, "step": 1609 }, { "epoch": 0.08767093449864886, "grad_norm": 0.8386437850394985, "learning_rate": 0.00019989967933108394, "loss": 12.7247, "step": 1610 }, { "epoch": 0.08772538849523187, "grad_norm": 0.8829733589907728, "learning_rate": 0.00019989928404545425, "loss": 12.7478, "step": 1611 }, { "epoch": 0.08777984249181488, "grad_norm": 0.8150772135220602, "learning_rate": 0.00019989888798299093, "loss": 12.6919, "step": 1612 }, { "epoch": 0.0878342964883979, "grad_norm": 0.7878375599715177, "learning_rate": 0.0001998984911436971, "loss": 12.6992, "step": 1613 }, { "epoch": 0.0878887504849809, "grad_norm": 0.8416899312712404, "learning_rate": 0.0001998980935275759, "loss": 12.7049, "step": 1614 }, { "epoch": 0.08794320448156392, "grad_norm": 0.9133401631169028, "learning_rate": 0.00019989769513463035, "loss": 12.8479, "step": 1615 }, { "epoch": 0.08799765847814693, "grad_norm": 0.8952845893190265, "learning_rate": 0.00019989729596486355, "loss": 12.7254, "step": 1616 }, { "epoch": 0.08805211247472994, "grad_norm": 0.8407850339653142, "learning_rate": 0.00019989689601827864, "loss": 12.7772, "step": 1617 }, { "epoch": 0.08810656647131296, "grad_norm": 0.8027908080124432, "learning_rate": 0.0001998964952948787, "loss": 12.793, "step": 1618 }, { "epoch": 0.08816102046789596, "grad_norm": 0.8720305737368109, "learning_rate": 0.00019989609379466688, "loss": 12.8101, "step": 1619 }, { "epoch": 0.08821547446447897, "grad_norm": 0.8423254972283053, "learning_rate": 0.0001998956915176463, "loss": 12.7649, "step": 1620 }, { "epoch": 0.08826992846106199, "grad_norm": 0.8906736340443412, "learning_rate": 0.00019989528846382, "loss": 12.6544, "step": 1621 }, { "epoch": 0.088324382457645, "grad_norm": 0.8685979598804452, "learning_rate": 0.00019989488463319127, "loss": 12.7049, "step": 1622 }, { "epoch": 0.08837883645422802, "grad_norm": 0.9201523573659224, "learning_rate": 0.0001998944800257631, "loss": 12.7724, "step": 1623 }, { "epoch": 0.08843329045081103, "grad_norm": 0.7950617489837553, "learning_rate": 0.00019989407464153874, "loss": 12.6128, "step": 1624 }, { "epoch": 0.08848774444739403, "grad_norm": 0.8431903484712208, "learning_rate": 0.00019989366848052127, "loss": 12.6943, "step": 1625 }, { "epoch": 0.08854219844397705, "grad_norm": 0.792854903121601, "learning_rate": 0.0001998932615427139, "loss": 12.7372, "step": 1626 }, { "epoch": 0.08859665244056006, "grad_norm": 0.919581588175274, "learning_rate": 0.00019989285382811977, "loss": 12.8211, "step": 1627 }, { "epoch": 0.08865110643714307, "grad_norm": 1.0225144687152612, "learning_rate": 0.00019989244533674208, "loss": 12.9169, "step": 1628 }, { "epoch": 0.08870556043372609, "grad_norm": 0.9628652507615445, "learning_rate": 0.00019989203606858395, "loss": 12.7378, "step": 1629 }, { "epoch": 0.08876001443030909, "grad_norm": 0.9482872431610143, "learning_rate": 0.0001998916260236486, "loss": 12.7291, "step": 1630 }, { "epoch": 0.08881446842689211, "grad_norm": 0.8991062958487321, "learning_rate": 0.00019989121520193925, "loss": 12.817, "step": 1631 }, { "epoch": 0.08886892242347512, "grad_norm": 0.9023899125767259, "learning_rate": 0.00019989080360345902, "loss": 12.7704, "step": 1632 }, { "epoch": 0.08892337642005813, "grad_norm": 0.7729477313833728, "learning_rate": 0.00019989039122821116, "loss": 12.6245, "step": 1633 }, { "epoch": 0.08897783041664115, "grad_norm": 1.0417855932195912, "learning_rate": 0.00019988997807619886, "loss": 12.7979, "step": 1634 }, { "epoch": 0.08903228441322415, "grad_norm": 0.7937034858967675, "learning_rate": 0.00019988956414742536, "loss": 12.7046, "step": 1635 }, { "epoch": 0.08908673840980716, "grad_norm": 0.8001933910250998, "learning_rate": 0.00019988914944189386, "loss": 12.7312, "step": 1636 }, { "epoch": 0.08914119240639018, "grad_norm": 0.8558583761877688, "learning_rate": 0.00019988873395960756, "loss": 12.7965, "step": 1637 }, { "epoch": 0.08919564640297319, "grad_norm": 0.7599770895603871, "learning_rate": 0.00019988831770056972, "loss": 12.7082, "step": 1638 }, { "epoch": 0.0892501003995562, "grad_norm": 0.7960868786708084, "learning_rate": 0.00019988790066478358, "loss": 12.6888, "step": 1639 }, { "epoch": 0.08930455439613921, "grad_norm": 0.7261475247206844, "learning_rate": 0.00019988748285225237, "loss": 12.5913, "step": 1640 }, { "epoch": 0.08935900839272222, "grad_norm": 0.7993605836884233, "learning_rate": 0.00019988706426297932, "loss": 12.717, "step": 1641 }, { "epoch": 0.08941346238930524, "grad_norm": 0.748685997004754, "learning_rate": 0.00019988664489696773, "loss": 12.794, "step": 1642 }, { "epoch": 0.08946791638588825, "grad_norm": 0.8745510045470801, "learning_rate": 0.00019988622475422085, "loss": 12.7614, "step": 1643 }, { "epoch": 0.08952237038247125, "grad_norm": 0.8887894206010839, "learning_rate": 0.00019988580383474192, "loss": 12.8313, "step": 1644 }, { "epoch": 0.08957682437905427, "grad_norm": 0.7790629439842283, "learning_rate": 0.0001998853821385342, "loss": 12.6754, "step": 1645 }, { "epoch": 0.08963127837563728, "grad_norm": 0.7548331983129816, "learning_rate": 0.00019988495966560103, "loss": 12.6716, "step": 1646 }, { "epoch": 0.0896857323722203, "grad_norm": 0.8683759239019688, "learning_rate": 0.00019988453641594568, "loss": 12.7828, "step": 1647 }, { "epoch": 0.0897401863688033, "grad_norm": 0.8333977638249082, "learning_rate": 0.0001998841123895714, "loss": 12.7488, "step": 1648 }, { "epoch": 0.08979464036538631, "grad_norm": 0.7821755083844757, "learning_rate": 0.0001998836875864815, "loss": 12.803, "step": 1649 }, { "epoch": 0.08984909436196933, "grad_norm": 0.8495799064371253, "learning_rate": 0.00019988326200667933, "loss": 12.614, "step": 1650 }, { "epoch": 0.08990354835855234, "grad_norm": 0.8922029842895346, "learning_rate": 0.00019988283565016812, "loss": 12.7251, "step": 1651 }, { "epoch": 0.08995800235513535, "grad_norm": 0.8249423440125538, "learning_rate": 0.00019988240851695125, "loss": 12.7249, "step": 1652 }, { "epoch": 0.09001245635171837, "grad_norm": 0.7715976194103749, "learning_rate": 0.00019988198060703205, "loss": 12.7746, "step": 1653 }, { "epoch": 0.09006691034830137, "grad_norm": 0.8493084075161741, "learning_rate": 0.00019988155192041378, "loss": 12.6983, "step": 1654 }, { "epoch": 0.0901213643448844, "grad_norm": 0.8833809799334171, "learning_rate": 0.00019988112245709983, "loss": 12.6754, "step": 1655 }, { "epoch": 0.0901758183414674, "grad_norm": 0.813960208688227, "learning_rate": 0.00019988069221709348, "loss": 12.7352, "step": 1656 }, { "epoch": 0.09023027233805041, "grad_norm": 0.7960213057660804, "learning_rate": 0.0001998802612003982, "loss": 12.8401, "step": 1657 }, { "epoch": 0.09028472633463343, "grad_norm": 0.7846256038589693, "learning_rate": 0.0001998798294070172, "loss": 12.8505, "step": 1658 }, { "epoch": 0.09033918033121643, "grad_norm": 0.911526256365673, "learning_rate": 0.0001998793968369539, "loss": 12.8778, "step": 1659 }, { "epoch": 0.09039363432779944, "grad_norm": 0.8647590123578177, "learning_rate": 0.00019987896349021167, "loss": 12.6736, "step": 1660 }, { "epoch": 0.09044808832438246, "grad_norm": 0.9390747596810028, "learning_rate": 0.00019987852936679388, "loss": 12.7272, "step": 1661 }, { "epoch": 0.09050254232096547, "grad_norm": 0.9456846322227296, "learning_rate": 0.00019987809446670387, "loss": 12.7014, "step": 1662 }, { "epoch": 0.09055699631754847, "grad_norm": 0.9041210520828457, "learning_rate": 0.00019987765878994507, "loss": 12.6473, "step": 1663 }, { "epoch": 0.0906114503141315, "grad_norm": 0.7426745800172005, "learning_rate": 0.00019987722233652086, "loss": 12.7287, "step": 1664 }, { "epoch": 0.0906659043107145, "grad_norm": 1.0350350328989921, "learning_rate": 0.00019987678510643457, "loss": 12.7622, "step": 1665 }, { "epoch": 0.09072035830729752, "grad_norm": 0.7795116162583914, "learning_rate": 0.0001998763470996897, "loss": 12.644, "step": 1666 }, { "epoch": 0.09077481230388053, "grad_norm": 1.0945356128978212, "learning_rate": 0.00019987590831628955, "loss": 12.7748, "step": 1667 }, { "epoch": 0.09082926630046353, "grad_norm": 0.9341997157336556, "learning_rate": 0.00019987546875623765, "loss": 12.6985, "step": 1668 }, { "epoch": 0.09088372029704656, "grad_norm": 1.0754647338306391, "learning_rate": 0.0001998750284195373, "loss": 12.743, "step": 1669 }, { "epoch": 0.09093817429362956, "grad_norm": 0.8777951178194842, "learning_rate": 0.00019987458730619202, "loss": 12.6949, "step": 1670 }, { "epoch": 0.09099262829021257, "grad_norm": 0.7755947654045108, "learning_rate": 0.0001998741454162052, "loss": 12.7449, "step": 1671 }, { "epoch": 0.09104708228679559, "grad_norm": 0.9579356976820738, "learning_rate": 0.00019987370274958025, "loss": 12.6444, "step": 1672 }, { "epoch": 0.0911015362833786, "grad_norm": 0.8891914081952949, "learning_rate": 0.00019987325930632065, "loss": 12.878, "step": 1673 }, { "epoch": 0.09115599027996162, "grad_norm": 0.9225125348962756, "learning_rate": 0.00019987281508642983, "loss": 12.5637, "step": 1674 }, { "epoch": 0.09121044427654462, "grad_norm": 0.7929658741370371, "learning_rate": 0.00019987237008991127, "loss": 12.5481, "step": 1675 }, { "epoch": 0.09126489827312763, "grad_norm": 0.8286918511585734, "learning_rate": 0.00019987192431676843, "loss": 12.7087, "step": 1676 }, { "epoch": 0.09131935226971065, "grad_norm": 0.8387876060507591, "learning_rate": 0.00019987147776700473, "loss": 12.6977, "step": 1677 }, { "epoch": 0.09137380626629366, "grad_norm": 0.8037519330448577, "learning_rate": 0.00019987103044062364, "loss": 12.7798, "step": 1678 }, { "epoch": 0.09142826026287666, "grad_norm": 0.8990989280319842, "learning_rate": 0.0001998705823376287, "loss": 12.7042, "step": 1679 }, { "epoch": 0.09148271425945968, "grad_norm": 0.801847772818663, "learning_rate": 0.00019987013345802336, "loss": 12.8197, "step": 1680 }, { "epoch": 0.09153716825604269, "grad_norm": 0.8304429597862878, "learning_rate": 0.00019986968380181113, "loss": 12.682, "step": 1681 }, { "epoch": 0.09159162225262571, "grad_norm": 0.886711334594854, "learning_rate": 0.00019986923336899547, "loss": 12.5952, "step": 1682 }, { "epoch": 0.09164607624920872, "grad_norm": 0.7732843373918673, "learning_rate": 0.0001998687821595799, "loss": 12.6595, "step": 1683 }, { "epoch": 0.09170053024579172, "grad_norm": 0.9707310684742881, "learning_rate": 0.00019986833017356797, "loss": 12.7915, "step": 1684 }, { "epoch": 0.09175498424237474, "grad_norm": 0.7450978514787958, "learning_rate": 0.00019986787741096311, "loss": 12.6324, "step": 1685 }, { "epoch": 0.09180943823895775, "grad_norm": 0.8637568515593369, "learning_rate": 0.0001998674238717689, "loss": 12.729, "step": 1686 }, { "epoch": 0.09186389223554076, "grad_norm": 0.7476349343697685, "learning_rate": 0.0001998669695559889, "loss": 12.6907, "step": 1687 }, { "epoch": 0.09191834623212378, "grad_norm": 0.7931163580976857, "learning_rate": 0.00019986651446362653, "loss": 12.8222, "step": 1688 }, { "epoch": 0.09197280022870678, "grad_norm": 0.7635941349745159, "learning_rate": 0.00019986605859468543, "loss": 12.6869, "step": 1689 }, { "epoch": 0.0920272542252898, "grad_norm": 0.7402233735864691, "learning_rate": 0.0001998656019491691, "loss": 12.6336, "step": 1690 }, { "epoch": 0.09208170822187281, "grad_norm": 0.7765142940286349, "learning_rate": 0.0001998651445270811, "loss": 12.6721, "step": 1691 }, { "epoch": 0.09213616221845582, "grad_norm": 0.8834448067628252, "learning_rate": 0.000199864686328425, "loss": 12.763, "step": 1692 }, { "epoch": 0.09219061621503884, "grad_norm": 0.7297738087025569, "learning_rate": 0.00019986422735320436, "loss": 12.7451, "step": 1693 }, { "epoch": 0.09224507021162184, "grad_norm": 0.7648076695333648, "learning_rate": 0.00019986376760142274, "loss": 12.8448, "step": 1694 }, { "epoch": 0.09229952420820485, "grad_norm": 0.7793752591738599, "learning_rate": 0.00019986330707308367, "loss": 12.9426, "step": 1695 }, { "epoch": 0.09235397820478787, "grad_norm": 0.7778053571595327, "learning_rate": 0.00019986284576819084, "loss": 12.6704, "step": 1696 }, { "epoch": 0.09240843220137088, "grad_norm": 0.7202035826372535, "learning_rate": 0.00019986238368674774, "loss": 12.7045, "step": 1697 }, { "epoch": 0.0924628861979539, "grad_norm": 0.7528090876710528, "learning_rate": 0.000199861920828758, "loss": 12.7146, "step": 1698 }, { "epoch": 0.0925173401945369, "grad_norm": 0.7248727230127, "learning_rate": 0.00019986145719422523, "loss": 12.6796, "step": 1699 }, { "epoch": 0.09257179419111991, "grad_norm": 0.7771517513852315, "learning_rate": 0.000199860992783153, "loss": 12.7017, "step": 1700 }, { "epoch": 0.09262624818770293, "grad_norm": 0.7915660755684254, "learning_rate": 0.00019986052759554497, "loss": 12.6844, "step": 1701 }, { "epoch": 0.09268070218428594, "grad_norm": 0.8352445974040995, "learning_rate": 0.0001998600616314047, "loss": 12.7781, "step": 1702 }, { "epoch": 0.09273515618086894, "grad_norm": 0.7784112929413094, "learning_rate": 0.00019985959489073586, "loss": 12.6843, "step": 1703 }, { "epoch": 0.09278961017745196, "grad_norm": 0.9289410263110156, "learning_rate": 0.00019985912737354206, "loss": 12.7159, "step": 1704 }, { "epoch": 0.09284406417403497, "grad_norm": 0.7633534931473822, "learning_rate": 0.00019985865907982695, "loss": 12.6632, "step": 1705 }, { "epoch": 0.09289851817061799, "grad_norm": 0.7276428028327624, "learning_rate": 0.00019985819000959416, "loss": 12.7251, "step": 1706 }, { "epoch": 0.092952972167201, "grad_norm": 0.819345051368717, "learning_rate": 0.0001998577201628473, "loss": 12.667, "step": 1707 }, { "epoch": 0.093007426163784, "grad_norm": 0.7978315345932591, "learning_rate": 0.00019985724953959012, "loss": 12.7466, "step": 1708 }, { "epoch": 0.09306188016036702, "grad_norm": 0.7802810303182017, "learning_rate": 0.0001998567781398262, "loss": 12.6264, "step": 1709 }, { "epoch": 0.09311633415695003, "grad_norm": 0.8275515696845572, "learning_rate": 0.0001998563059635592, "loss": 12.6937, "step": 1710 }, { "epoch": 0.09317078815353304, "grad_norm": 0.8539871682918575, "learning_rate": 0.00019985583301079286, "loss": 12.7544, "step": 1711 }, { "epoch": 0.09322524215011606, "grad_norm": 0.7965901249303493, "learning_rate": 0.00019985535928153077, "loss": 12.743, "step": 1712 }, { "epoch": 0.09327969614669906, "grad_norm": 1.0469523307408424, "learning_rate": 0.00019985488477577672, "loss": 12.835, "step": 1713 }, { "epoch": 0.09333415014328209, "grad_norm": 1.0200281444576251, "learning_rate": 0.0001998544094935343, "loss": 12.7051, "step": 1714 }, { "epoch": 0.09338860413986509, "grad_norm": 0.9224592123697221, "learning_rate": 0.00019985393343480726, "loss": 12.7287, "step": 1715 }, { "epoch": 0.0934430581364481, "grad_norm": 0.8094401652320615, "learning_rate": 0.00019985345659959927, "loss": 12.7504, "step": 1716 }, { "epoch": 0.09349751213303112, "grad_norm": 0.8806549796355795, "learning_rate": 0.00019985297898791407, "loss": 12.7822, "step": 1717 }, { "epoch": 0.09355196612961413, "grad_norm": 0.8017319253189918, "learning_rate": 0.00019985250059975534, "loss": 12.7851, "step": 1718 }, { "epoch": 0.09360642012619713, "grad_norm": 0.7970534334621997, "learning_rate": 0.00019985202143512688, "loss": 12.727, "step": 1719 }, { "epoch": 0.09366087412278015, "grad_norm": 0.8444063092172269, "learning_rate": 0.00019985154149403228, "loss": 12.6435, "step": 1720 }, { "epoch": 0.09371532811936316, "grad_norm": 0.7854418054238925, "learning_rate": 0.00019985106077647543, "loss": 12.575, "step": 1721 }, { "epoch": 0.09376978211594617, "grad_norm": 0.9488296508732591, "learning_rate": 0.00019985057928245992, "loss": 12.689, "step": 1722 }, { "epoch": 0.09382423611252919, "grad_norm": 0.9478793448789539, "learning_rate": 0.00019985009701198957, "loss": 12.8562, "step": 1723 }, { "epoch": 0.09387869010911219, "grad_norm": 1.3421603271758058, "learning_rate": 0.00019984961396506815, "loss": 12.7497, "step": 1724 }, { "epoch": 0.09393314410569521, "grad_norm": 1.0297410530163407, "learning_rate": 0.00019984913014169938, "loss": 12.7798, "step": 1725 }, { "epoch": 0.09398759810227822, "grad_norm": 1.1291195574382247, "learning_rate": 0.000199848645541887, "loss": 12.648, "step": 1726 }, { "epoch": 0.09404205209886123, "grad_norm": 0.9849925329312679, "learning_rate": 0.00019984816016563483, "loss": 12.5487, "step": 1727 }, { "epoch": 0.09409650609544425, "grad_norm": 0.8181122519141973, "learning_rate": 0.0001998476740129466, "loss": 12.6672, "step": 1728 }, { "epoch": 0.09415096009202725, "grad_norm": 0.9015974890306336, "learning_rate": 0.00019984718708382615, "loss": 12.771, "step": 1729 }, { "epoch": 0.09420541408861026, "grad_norm": 0.8528008739132253, "learning_rate": 0.00019984669937827719, "loss": 12.7735, "step": 1730 }, { "epoch": 0.09425986808519328, "grad_norm": 0.8831817396018113, "learning_rate": 0.00019984621089630356, "loss": 12.696, "step": 1731 }, { "epoch": 0.09431432208177629, "grad_norm": 0.834736105594587, "learning_rate": 0.00019984572163790908, "loss": 12.7276, "step": 1732 }, { "epoch": 0.0943687760783593, "grad_norm": 0.9257863305824491, "learning_rate": 0.00019984523160309752, "loss": 12.8682, "step": 1733 }, { "epoch": 0.09442323007494231, "grad_norm": 0.8519996446656369, "learning_rate": 0.00019984474079187266, "loss": 12.7941, "step": 1734 }, { "epoch": 0.09447768407152532, "grad_norm": 0.8464983573387057, "learning_rate": 0.00019984424920423837, "loss": 12.7706, "step": 1735 }, { "epoch": 0.09453213806810834, "grad_norm": 0.8616173053640237, "learning_rate": 0.00019984375684019848, "loss": 12.7734, "step": 1736 }, { "epoch": 0.09458659206469135, "grad_norm": 0.8584513670065499, "learning_rate": 0.00019984326369975675, "loss": 12.8588, "step": 1737 }, { "epoch": 0.09464104606127435, "grad_norm": 0.7275419976228897, "learning_rate": 0.00019984276978291709, "loss": 12.6037, "step": 1738 }, { "epoch": 0.09469550005785737, "grad_norm": 0.90704252581912, "learning_rate": 0.00019984227508968328, "loss": 12.8174, "step": 1739 }, { "epoch": 0.09474995405444038, "grad_norm": 0.7890718965669796, "learning_rate": 0.0001998417796200592, "loss": 12.7361, "step": 1740 }, { "epoch": 0.0948044080510234, "grad_norm": 0.8700908686746459, "learning_rate": 0.0001998412833740487, "loss": 12.7187, "step": 1741 }, { "epoch": 0.09485886204760641, "grad_norm": 0.8820330592465839, "learning_rate": 0.00019984078635165565, "loss": 12.7653, "step": 1742 }, { "epoch": 0.09491331604418941, "grad_norm": 0.8480096022522114, "learning_rate": 0.0001998402885528839, "loss": 12.7305, "step": 1743 }, { "epoch": 0.09496777004077243, "grad_norm": 0.844964622166045, "learning_rate": 0.00019983978997773733, "loss": 12.6504, "step": 1744 }, { "epoch": 0.09502222403735544, "grad_norm": 0.7337818119185138, "learning_rate": 0.0001998392906262198, "loss": 12.6787, "step": 1745 }, { "epoch": 0.09507667803393845, "grad_norm": 0.8951156428654814, "learning_rate": 0.0001998387904983352, "loss": 12.629, "step": 1746 }, { "epoch": 0.09513113203052147, "grad_norm": 0.7417498529985256, "learning_rate": 0.00019983828959408743, "loss": 12.6818, "step": 1747 }, { "epoch": 0.09518558602710447, "grad_norm": 0.8106578184852027, "learning_rate": 0.00019983778791348038, "loss": 12.6571, "step": 1748 }, { "epoch": 0.0952400400236875, "grad_norm": 0.8339705961484438, "learning_rate": 0.00019983728545651795, "loss": 12.7501, "step": 1749 }, { "epoch": 0.0952944940202705, "grad_norm": 0.7656032487484683, "learning_rate": 0.00019983678222320402, "loss": 12.6431, "step": 1750 }, { "epoch": 0.09534894801685351, "grad_norm": 0.7678729913466037, "learning_rate": 0.00019983627821354254, "loss": 12.7806, "step": 1751 }, { "epoch": 0.09540340201343653, "grad_norm": 0.7412100270850847, "learning_rate": 0.00019983577342753744, "loss": 12.8495, "step": 1752 }, { "epoch": 0.09545785601001953, "grad_norm": 0.8329923403455379, "learning_rate": 0.0001998352678651926, "loss": 12.6991, "step": 1753 }, { "epoch": 0.09551231000660254, "grad_norm": 0.6852399910270895, "learning_rate": 0.00019983476152651196, "loss": 12.6196, "step": 1754 }, { "epoch": 0.09556676400318556, "grad_norm": 0.7473720505363164, "learning_rate": 0.0001998342544114995, "loss": 12.641, "step": 1755 }, { "epoch": 0.09562121799976857, "grad_norm": 0.8072951108918488, "learning_rate": 0.00019983374652015915, "loss": 12.6861, "step": 1756 }, { "epoch": 0.09567567199635159, "grad_norm": 0.8901914678671833, "learning_rate": 0.0001998332378524948, "loss": 12.7401, "step": 1757 }, { "epoch": 0.0957301259929346, "grad_norm": 0.7381858416182421, "learning_rate": 0.00019983272840851048, "loss": 12.6173, "step": 1758 }, { "epoch": 0.0957845799895176, "grad_norm": 0.7780328757663166, "learning_rate": 0.00019983221818821011, "loss": 12.6119, "step": 1759 }, { "epoch": 0.09583903398610062, "grad_norm": 0.8312609985976824, "learning_rate": 0.00019983170719159769, "loss": 12.7331, "step": 1760 }, { "epoch": 0.09589348798268363, "grad_norm": 0.8830377568322108, "learning_rate": 0.00019983119541867718, "loss": 12.9335, "step": 1761 }, { "epoch": 0.09594794197926663, "grad_norm": 1.0031691115579495, "learning_rate": 0.0001998306828694525, "loss": 12.6282, "step": 1762 }, { "epoch": 0.09600239597584966, "grad_norm": 0.8776848123359052, "learning_rate": 0.00019983016954392771, "loss": 12.5339, "step": 1763 }, { "epoch": 0.09605684997243266, "grad_norm": 0.9444493402268572, "learning_rate": 0.0001998296554421068, "loss": 12.797, "step": 1764 }, { "epoch": 0.09611130396901568, "grad_norm": 0.8453358988930049, "learning_rate": 0.00019982914056399374, "loss": 12.7444, "step": 1765 }, { "epoch": 0.09616575796559869, "grad_norm": 1.0057800902831977, "learning_rate": 0.00019982862490959256, "loss": 12.615, "step": 1766 }, { "epoch": 0.0962202119621817, "grad_norm": 0.8227321761404245, "learning_rate": 0.0001998281084789072, "loss": 12.7163, "step": 1767 }, { "epoch": 0.09627466595876472, "grad_norm": 0.9078437574448277, "learning_rate": 0.00019982759127194178, "loss": 12.8353, "step": 1768 }, { "epoch": 0.09632911995534772, "grad_norm": 0.8607841260012521, "learning_rate": 0.00019982707328870025, "loss": 12.7217, "step": 1769 }, { "epoch": 0.09638357395193073, "grad_norm": 0.806857734594643, "learning_rate": 0.00019982655452918663, "loss": 12.8509, "step": 1770 }, { "epoch": 0.09643802794851375, "grad_norm": 0.9016798428985953, "learning_rate": 0.00019982603499340502, "loss": 12.7054, "step": 1771 }, { "epoch": 0.09649248194509676, "grad_norm": 0.8270136349810251, "learning_rate": 0.00019982551468135943, "loss": 12.7688, "step": 1772 }, { "epoch": 0.09654693594167978, "grad_norm": 0.7586339597744727, "learning_rate": 0.00019982499359305384, "loss": 12.6027, "step": 1773 }, { "epoch": 0.09660138993826278, "grad_norm": 0.8039689280170982, "learning_rate": 0.00019982447172849243, "loss": 12.6849, "step": 1774 }, { "epoch": 0.09665584393484579, "grad_norm": 0.864875110019144, "learning_rate": 0.00019982394908767912, "loss": 12.8059, "step": 1775 }, { "epoch": 0.09671029793142881, "grad_norm": 0.7660481506729493, "learning_rate": 0.00019982342567061807, "loss": 12.6182, "step": 1776 }, { "epoch": 0.09676475192801182, "grad_norm": 0.8262935586777914, "learning_rate": 0.00019982290147731334, "loss": 12.6749, "step": 1777 }, { "epoch": 0.09681920592459482, "grad_norm": 0.8597501934936786, "learning_rate": 0.00019982237650776897, "loss": 12.8307, "step": 1778 }, { "epoch": 0.09687365992117784, "grad_norm": 0.9670302026180937, "learning_rate": 0.00019982185076198905, "loss": 12.6684, "step": 1779 }, { "epoch": 0.09692811391776085, "grad_norm": 1.0795139067045474, "learning_rate": 0.0001998213242399777, "loss": 12.7065, "step": 1780 }, { "epoch": 0.09698256791434387, "grad_norm": 0.8816692193509686, "learning_rate": 0.00019982079694173897, "loss": 12.6734, "step": 1781 }, { "epoch": 0.09703702191092688, "grad_norm": 0.7591381912463186, "learning_rate": 0.00019982026886727702, "loss": 12.5892, "step": 1782 }, { "epoch": 0.09709147590750988, "grad_norm": 0.831267387648959, "learning_rate": 0.00019981974001659586, "loss": 12.687, "step": 1783 }, { "epoch": 0.0971459299040929, "grad_norm": 0.8723663496857355, "learning_rate": 0.0001998192103896997, "loss": 12.7374, "step": 1784 }, { "epoch": 0.09720038390067591, "grad_norm": 0.7613084972321603, "learning_rate": 0.0001998186799865926, "loss": 12.8568, "step": 1785 }, { "epoch": 0.09725483789725892, "grad_norm": 0.9693531135750679, "learning_rate": 0.00019981814880727875, "loss": 12.7736, "step": 1786 }, { "epoch": 0.09730929189384194, "grad_norm": 0.8070502979590324, "learning_rate": 0.00019981761685176222, "loss": 12.7267, "step": 1787 }, { "epoch": 0.09736374589042494, "grad_norm": 0.7585035282744843, "learning_rate": 0.0001998170841200471, "loss": 12.6695, "step": 1788 }, { "epoch": 0.09741819988700795, "grad_norm": 0.8562878599847409, "learning_rate": 0.00019981655061213766, "loss": 12.7517, "step": 1789 }, { "epoch": 0.09747265388359097, "grad_norm": 0.8076468198038834, "learning_rate": 0.000199816016328038, "loss": 12.6599, "step": 1790 }, { "epoch": 0.09752710788017398, "grad_norm": 0.8624308020640535, "learning_rate": 0.0001998154812677522, "loss": 12.8433, "step": 1791 }, { "epoch": 0.097581561876757, "grad_norm": 0.8124216890531085, "learning_rate": 0.00019981494543128448, "loss": 12.7618, "step": 1792 }, { "epoch": 0.09763601587334, "grad_norm": 0.9145054786820154, "learning_rate": 0.00019981440881863905, "loss": 12.6682, "step": 1793 }, { "epoch": 0.09769046986992301, "grad_norm": 0.8483562639490578, "learning_rate": 0.00019981387142982003, "loss": 12.8717, "step": 1794 }, { "epoch": 0.09774492386650603, "grad_norm": 0.8409069976588321, "learning_rate": 0.00019981333326483158, "loss": 12.7535, "step": 1795 }, { "epoch": 0.09779937786308904, "grad_norm": 0.8193328017494412, "learning_rate": 0.0001998127943236779, "loss": 12.8387, "step": 1796 }, { "epoch": 0.09785383185967204, "grad_norm": 0.7893282197935113, "learning_rate": 0.00019981225460636326, "loss": 12.6631, "step": 1797 }, { "epoch": 0.09790828585625506, "grad_norm": 0.9814262516729859, "learning_rate": 0.00019981171411289172, "loss": 12.7136, "step": 1798 }, { "epoch": 0.09796273985283807, "grad_norm": 0.8465352965252302, "learning_rate": 0.00019981117284326757, "loss": 12.7594, "step": 1799 }, { "epoch": 0.09801719384942109, "grad_norm": 0.8189406451797356, "learning_rate": 0.00019981063079749505, "loss": 12.6765, "step": 1800 }, { "epoch": 0.0980716478460041, "grad_norm": 0.8344960848336206, "learning_rate": 0.00019981008797557827, "loss": 12.7143, "step": 1801 }, { "epoch": 0.0981261018425871, "grad_norm": 0.793799228357131, "learning_rate": 0.00019980954437752153, "loss": 12.6669, "step": 1802 }, { "epoch": 0.09818055583917012, "grad_norm": 0.8231053066247628, "learning_rate": 0.00019980900000332903, "loss": 12.7411, "step": 1803 }, { "epoch": 0.09823500983575313, "grad_norm": 0.8060131383467297, "learning_rate": 0.000199808454853005, "loss": 12.7213, "step": 1804 }, { "epoch": 0.09828946383233614, "grad_norm": 0.7542976640094788, "learning_rate": 0.0001998079089265537, "loss": 12.7119, "step": 1805 }, { "epoch": 0.09834391782891916, "grad_norm": 0.8811074779603901, "learning_rate": 0.0001998073622239794, "loss": 12.7143, "step": 1806 }, { "epoch": 0.09839837182550216, "grad_norm": 0.7443022336030499, "learning_rate": 0.00019980681474528623, "loss": 12.5875, "step": 1807 }, { "epoch": 0.09845282582208519, "grad_norm": 0.8255299157055661, "learning_rate": 0.0001998062664904786, "loss": 12.777, "step": 1808 }, { "epoch": 0.09850727981866819, "grad_norm": 0.7914682742250475, "learning_rate": 0.00019980571745956068, "loss": 12.6854, "step": 1809 }, { "epoch": 0.0985617338152512, "grad_norm": 0.9831540550159134, "learning_rate": 0.00019980516765253674, "loss": 12.8013, "step": 1810 }, { "epoch": 0.09861618781183422, "grad_norm": 0.7181653532335807, "learning_rate": 0.0001998046170694111, "loss": 12.6832, "step": 1811 }, { "epoch": 0.09867064180841723, "grad_norm": 0.8613942793337783, "learning_rate": 0.000199804065710188, "loss": 12.7333, "step": 1812 }, { "epoch": 0.09872509580500023, "grad_norm": 0.7765773016663301, "learning_rate": 0.00019980351357487178, "loss": 12.7315, "step": 1813 }, { "epoch": 0.09877954980158325, "grad_norm": 0.6808776096578668, "learning_rate": 0.0001998029606634667, "loss": 12.7699, "step": 1814 }, { "epoch": 0.09883400379816626, "grad_norm": 0.8799736462156778, "learning_rate": 0.00019980240697597704, "loss": 12.7738, "step": 1815 }, { "epoch": 0.09888845779474928, "grad_norm": 0.8627772359823945, "learning_rate": 0.00019980185251240715, "loss": 12.7422, "step": 1816 }, { "epoch": 0.09894291179133229, "grad_norm": 0.9523158797483452, "learning_rate": 0.00019980129727276128, "loss": 12.6161, "step": 1817 }, { "epoch": 0.09899736578791529, "grad_norm": 0.8316339425431188, "learning_rate": 0.00019980074125704381, "loss": 12.7533, "step": 1818 }, { "epoch": 0.09905181978449831, "grad_norm": 0.7555606839824176, "learning_rate": 0.00019980018446525904, "loss": 12.8045, "step": 1819 }, { "epoch": 0.09910627378108132, "grad_norm": 0.8736839244670254, "learning_rate": 0.00019979962689741133, "loss": 12.8051, "step": 1820 }, { "epoch": 0.09916072777766433, "grad_norm": 0.8298289371961125, "learning_rate": 0.00019979906855350493, "loss": 12.7442, "step": 1821 }, { "epoch": 0.09921518177424735, "grad_norm": 0.7359470330154858, "learning_rate": 0.00019979850943354429, "loss": 12.6789, "step": 1822 }, { "epoch": 0.09926963577083035, "grad_norm": 0.7984255387074274, "learning_rate": 0.00019979794953753368, "loss": 12.6387, "step": 1823 }, { "epoch": 0.09932408976741337, "grad_norm": 0.7948787479597286, "learning_rate": 0.00019979738886547748, "loss": 12.7372, "step": 1824 }, { "epoch": 0.09937854376399638, "grad_norm": 0.7525955093049125, "learning_rate": 0.00019979682741738005, "loss": 12.5815, "step": 1825 }, { "epoch": 0.09943299776057939, "grad_norm": 0.8045976161147929, "learning_rate": 0.00019979626519324572, "loss": 12.6558, "step": 1826 }, { "epoch": 0.0994874517571624, "grad_norm": 0.868763583150912, "learning_rate": 0.00019979570219307892, "loss": 12.9435, "step": 1827 }, { "epoch": 0.09954190575374541, "grad_norm": 0.8254280429266189, "learning_rate": 0.000199795138416884, "loss": 12.7005, "step": 1828 }, { "epoch": 0.09959635975032842, "grad_norm": 0.8215715474865103, "learning_rate": 0.00019979457386466536, "loss": 12.6894, "step": 1829 }, { "epoch": 0.09965081374691144, "grad_norm": 0.8617949069115154, "learning_rate": 0.0001997940085364274, "loss": 12.8104, "step": 1830 }, { "epoch": 0.09970526774349445, "grad_norm": 0.7084497799870297, "learning_rate": 0.00019979344243217445, "loss": 12.5418, "step": 1831 }, { "epoch": 0.09975972174007747, "grad_norm": 0.7914428213251324, "learning_rate": 0.00019979287555191096, "loss": 12.7525, "step": 1832 }, { "epoch": 0.09981417573666047, "grad_norm": 0.8036534649424923, "learning_rate": 0.00019979230789564137, "loss": 12.8148, "step": 1833 }, { "epoch": 0.09986862973324348, "grad_norm": 0.783771502898549, "learning_rate": 0.00019979173946337, "loss": 12.6985, "step": 1834 }, { "epoch": 0.0999230837298265, "grad_norm": 0.8433229115010147, "learning_rate": 0.00019979117025510136, "loss": 12.8037, "step": 1835 }, { "epoch": 0.09997753772640951, "grad_norm": 0.7781085291475797, "learning_rate": 0.00019979060027083988, "loss": 12.6502, "step": 1836 }, { "epoch": 0.10003199172299251, "grad_norm": 0.7396520761371812, "learning_rate": 0.00019979002951058992, "loss": 12.6156, "step": 1837 }, { "epoch": 0.10008644571957553, "grad_norm": 0.7502516601071137, "learning_rate": 0.00019978945797435594, "loss": 12.6214, "step": 1838 }, { "epoch": 0.10014089971615854, "grad_norm": 0.7945760298938975, "learning_rate": 0.00019978888566214245, "loss": 12.7648, "step": 1839 }, { "epoch": 0.10019535371274156, "grad_norm": 0.805994217809907, "learning_rate": 0.00019978831257395384, "loss": 12.6749, "step": 1840 }, { "epoch": 0.10024980770932457, "grad_norm": 0.7585193198751154, "learning_rate": 0.00019978773870979452, "loss": 12.6268, "step": 1841 }, { "epoch": 0.10030426170590757, "grad_norm": 0.8305633847794065, "learning_rate": 0.00019978716406966905, "loss": 12.7269, "step": 1842 }, { "epoch": 0.1003587157024906, "grad_norm": 0.7422976190993655, "learning_rate": 0.00019978658865358185, "loss": 12.7719, "step": 1843 }, { "epoch": 0.1004131696990736, "grad_norm": 0.740572357468796, "learning_rate": 0.00019978601246153742, "loss": 12.7352, "step": 1844 }, { "epoch": 0.10046762369565661, "grad_norm": 0.7707640168766531, "learning_rate": 0.00019978543549354022, "loss": 12.4927, "step": 1845 }, { "epoch": 0.10052207769223963, "grad_norm": 0.815737711358069, "learning_rate": 0.00019978485774959474, "loss": 12.6872, "step": 1846 }, { "epoch": 0.10057653168882263, "grad_norm": 0.7948585836889104, "learning_rate": 0.00019978427922970546, "loss": 12.6934, "step": 1847 }, { "epoch": 0.10063098568540566, "grad_norm": 0.7752338166617883, "learning_rate": 0.0001997836999338769, "loss": 12.7227, "step": 1848 }, { "epoch": 0.10068543968198866, "grad_norm": 0.7549758786488137, "learning_rate": 0.00019978311986211354, "loss": 12.7107, "step": 1849 }, { "epoch": 0.10073989367857167, "grad_norm": 0.7228918115206341, "learning_rate": 0.00019978253901441992, "loss": 12.7228, "step": 1850 }, { "epoch": 0.10079434767515469, "grad_norm": 0.9869084616467323, "learning_rate": 0.00019978195739080054, "loss": 12.691, "step": 1851 }, { "epoch": 0.1008488016717377, "grad_norm": 0.7541298740075157, "learning_rate": 0.00019978137499125994, "loss": 12.6926, "step": 1852 }, { "epoch": 0.1009032556683207, "grad_norm": 0.8353392921949634, "learning_rate": 0.0001997807918158026, "loss": 12.693, "step": 1853 }, { "epoch": 0.10095770966490372, "grad_norm": 0.9022504957175957, "learning_rate": 0.00019978020786443312, "loss": 12.8224, "step": 1854 }, { "epoch": 0.10101216366148673, "grad_norm": 0.7754694247261438, "learning_rate": 0.00019977962313715602, "loss": 12.6533, "step": 1855 }, { "epoch": 0.10106661765806974, "grad_norm": 0.9496107528055869, "learning_rate": 0.0001997790376339758, "loss": 12.6253, "step": 1856 }, { "epoch": 0.10112107165465276, "grad_norm": 0.8295457108745374, "learning_rate": 0.00019977845135489707, "loss": 12.759, "step": 1857 }, { "epoch": 0.10117552565123576, "grad_norm": 0.9132320161804102, "learning_rate": 0.00019977786429992438, "loss": 12.7149, "step": 1858 }, { "epoch": 0.10122997964781878, "grad_norm": 0.9099955640502901, "learning_rate": 0.0001997772764690623, "loss": 12.7591, "step": 1859 }, { "epoch": 0.10128443364440179, "grad_norm": 0.9136455396020913, "learning_rate": 0.00019977668786231534, "loss": 12.6479, "step": 1860 }, { "epoch": 0.1013388876409848, "grad_norm": 1.0443368465604608, "learning_rate": 0.00019977609847968812, "loss": 12.7831, "step": 1861 }, { "epoch": 0.10139334163756782, "grad_norm": 0.8544793584331826, "learning_rate": 0.00019977550832118526, "loss": 12.8603, "step": 1862 }, { "epoch": 0.10144779563415082, "grad_norm": 0.9758123126638216, "learning_rate": 0.00019977491738681132, "loss": 12.8229, "step": 1863 }, { "epoch": 0.10150224963073383, "grad_norm": 1.0450927875618519, "learning_rate": 0.00019977432567657086, "loss": 12.785, "step": 1864 }, { "epoch": 0.10155670362731685, "grad_norm": 0.8321896909389046, "learning_rate": 0.0001997737331904685, "loss": 12.7621, "step": 1865 }, { "epoch": 0.10161115762389986, "grad_norm": 1.086697442673203, "learning_rate": 0.0001997731399285089, "loss": 12.7405, "step": 1866 }, { "epoch": 0.10166561162048288, "grad_norm": 0.8795583094614052, "learning_rate": 0.0001997725458906966, "loss": 12.6583, "step": 1867 }, { "epoch": 0.10172006561706588, "grad_norm": 0.9093661938885378, "learning_rate": 0.00019977195107703625, "loss": 12.6382, "step": 1868 }, { "epoch": 0.10177451961364889, "grad_norm": 0.8206637853045706, "learning_rate": 0.0001997713554875325, "loss": 12.7288, "step": 1869 }, { "epoch": 0.10182897361023191, "grad_norm": 0.7959548053380612, "learning_rate": 0.00019977075912218996, "loss": 12.5389, "step": 1870 }, { "epoch": 0.10188342760681492, "grad_norm": 0.8940054567632745, "learning_rate": 0.00019977016198101326, "loss": 12.7477, "step": 1871 }, { "epoch": 0.10193788160339792, "grad_norm": 0.7983708631762738, "learning_rate": 0.00019976956406400704, "loss": 12.6809, "step": 1872 }, { "epoch": 0.10199233559998094, "grad_norm": 0.8947777592247796, "learning_rate": 0.00019976896537117597, "loss": 12.7298, "step": 1873 }, { "epoch": 0.10204678959656395, "grad_norm": 0.7639695051807167, "learning_rate": 0.00019976836590252469, "loss": 12.8679, "step": 1874 }, { "epoch": 0.10210124359314697, "grad_norm": 0.8503436782329791, "learning_rate": 0.00019976776565805787, "loss": 12.7481, "step": 1875 }, { "epoch": 0.10215569758972998, "grad_norm": 0.7299504612088192, "learning_rate": 0.00019976716463778016, "loss": 12.6675, "step": 1876 }, { "epoch": 0.10221015158631298, "grad_norm": 0.8034290287601704, "learning_rate": 0.00019976656284169625, "loss": 12.7463, "step": 1877 }, { "epoch": 0.102264605582896, "grad_norm": 0.9378384856057707, "learning_rate": 0.0001997659602698108, "loss": 12.7667, "step": 1878 }, { "epoch": 0.10231905957947901, "grad_norm": 0.8112566177222267, "learning_rate": 0.00019976535692212854, "loss": 12.6914, "step": 1879 }, { "epoch": 0.10237351357606202, "grad_norm": 0.8836096715216687, "learning_rate": 0.00019976475279865415, "loss": 12.5431, "step": 1880 }, { "epoch": 0.10242796757264504, "grad_norm": 0.7678332184149458, "learning_rate": 0.00019976414789939226, "loss": 12.68, "step": 1881 }, { "epoch": 0.10248242156922804, "grad_norm": 0.9820205706063538, "learning_rate": 0.00019976354222434766, "loss": 12.5327, "step": 1882 }, { "epoch": 0.10253687556581106, "grad_norm": 0.839689545120463, "learning_rate": 0.00019976293577352502, "loss": 12.7281, "step": 1883 }, { "epoch": 0.10259132956239407, "grad_norm": 0.7763360763117715, "learning_rate": 0.00019976232854692903, "loss": 12.7423, "step": 1884 }, { "epoch": 0.10264578355897708, "grad_norm": 0.8263614999513452, "learning_rate": 0.0001997617205445645, "loss": 12.7349, "step": 1885 }, { "epoch": 0.1027002375555601, "grad_norm": 0.8544881902889455, "learning_rate": 0.00019976111176643607, "loss": 12.8311, "step": 1886 }, { "epoch": 0.1027546915521431, "grad_norm": 0.803396672866949, "learning_rate": 0.0001997605022125485, "loss": 12.75, "step": 1887 }, { "epoch": 0.10280914554872611, "grad_norm": 0.7346963496646222, "learning_rate": 0.00019975989188290654, "loss": 12.6567, "step": 1888 }, { "epoch": 0.10286359954530913, "grad_norm": 0.8449617676754794, "learning_rate": 0.00019975928077751496, "loss": 12.6853, "step": 1889 }, { "epoch": 0.10291805354189214, "grad_norm": 0.7721976538792544, "learning_rate": 0.00019975866889637844, "loss": 12.657, "step": 1890 }, { "epoch": 0.10297250753847516, "grad_norm": 0.8370477843159757, "learning_rate": 0.0001997580562395018, "loss": 12.7145, "step": 1891 }, { "epoch": 0.10302696153505816, "grad_norm": 0.786273334397099, "learning_rate": 0.0001997574428068898, "loss": 12.7155, "step": 1892 }, { "epoch": 0.10308141553164117, "grad_norm": 0.7773834981716732, "learning_rate": 0.00019975682859854716, "loss": 12.6723, "step": 1893 }, { "epoch": 0.10313586952822419, "grad_norm": 0.7131770678235348, "learning_rate": 0.0001997562136144787, "loss": 12.7067, "step": 1894 }, { "epoch": 0.1031903235248072, "grad_norm": 0.751203222047551, "learning_rate": 0.00019975559785468923, "loss": 12.7865, "step": 1895 }, { "epoch": 0.1032447775213902, "grad_norm": 0.8029767491515284, "learning_rate": 0.00019975498131918348, "loss": 12.8447, "step": 1896 }, { "epoch": 0.10329923151797323, "grad_norm": 0.8498280896911178, "learning_rate": 0.00019975436400796625, "loss": 12.6351, "step": 1897 }, { "epoch": 0.10335368551455623, "grad_norm": 0.8046931953621171, "learning_rate": 0.00019975374592104235, "loss": 12.814, "step": 1898 }, { "epoch": 0.10340813951113925, "grad_norm": 0.8911829559360334, "learning_rate": 0.00019975312705841663, "loss": 12.833, "step": 1899 }, { "epoch": 0.10346259350772226, "grad_norm": 0.9755131176201265, "learning_rate": 0.00019975250742009382, "loss": 12.7744, "step": 1900 }, { "epoch": 0.10351704750430527, "grad_norm": 0.7652603388028444, "learning_rate": 0.00019975188700607882, "loss": 12.6047, "step": 1901 }, { "epoch": 0.10357150150088829, "grad_norm": 0.7308159389641743, "learning_rate": 0.00019975126581637642, "loss": 12.54, "step": 1902 }, { "epoch": 0.10362595549747129, "grad_norm": 0.8841537311014933, "learning_rate": 0.00019975064385099143, "loss": 12.8176, "step": 1903 }, { "epoch": 0.1036804094940543, "grad_norm": 0.7959266490780836, "learning_rate": 0.0001997500211099287, "loss": 12.8648, "step": 1904 }, { "epoch": 0.10373486349063732, "grad_norm": 0.7932715396835438, "learning_rate": 0.0001997493975931931, "loss": 12.7503, "step": 1905 }, { "epoch": 0.10378931748722033, "grad_norm": 0.7059892133233987, "learning_rate": 0.00019974877330078945, "loss": 12.6694, "step": 1906 }, { "epoch": 0.10384377148380335, "grad_norm": 0.7647967486749511, "learning_rate": 0.00019974814823272265, "loss": 12.7022, "step": 1907 }, { "epoch": 0.10389822548038635, "grad_norm": 0.7149325329573871, "learning_rate": 0.00019974752238899744, "loss": 12.7661, "step": 1908 }, { "epoch": 0.10395267947696936, "grad_norm": 0.8302278048238623, "learning_rate": 0.00019974689576961882, "loss": 12.7787, "step": 1909 }, { "epoch": 0.10400713347355238, "grad_norm": 0.7650845662765747, "learning_rate": 0.00019974626837459161, "loss": 12.7433, "step": 1910 }, { "epoch": 0.10406158747013539, "grad_norm": 0.7263652929619784, "learning_rate": 0.00019974564020392067, "loss": 12.8098, "step": 1911 }, { "epoch": 0.10411604146671839, "grad_norm": 0.75783854526348, "learning_rate": 0.00019974501125761092, "loss": 12.6675, "step": 1912 }, { "epoch": 0.10417049546330141, "grad_norm": 0.9010729595464053, "learning_rate": 0.00019974438153566723, "loss": 12.7539, "step": 1913 }, { "epoch": 0.10422494945988442, "grad_norm": 0.8126529024951368, "learning_rate": 0.00019974375103809448, "loss": 12.8125, "step": 1914 }, { "epoch": 0.10427940345646744, "grad_norm": 0.941997554515698, "learning_rate": 0.0001997431197648976, "loss": 12.7994, "step": 1915 }, { "epoch": 0.10433385745305045, "grad_norm": 0.9094933655259666, "learning_rate": 0.00019974248771608154, "loss": 12.6271, "step": 1916 }, { "epoch": 0.10438831144963345, "grad_norm": 0.8626037953851565, "learning_rate": 0.00019974185489165112, "loss": 12.6484, "step": 1917 }, { "epoch": 0.10444276544621647, "grad_norm": 0.8310424783833855, "learning_rate": 0.00019974122129161133, "loss": 12.8424, "step": 1918 }, { "epoch": 0.10449721944279948, "grad_norm": 0.8262241343181942, "learning_rate": 0.00019974058691596706, "loss": 12.7997, "step": 1919 }, { "epoch": 0.10455167343938249, "grad_norm": 0.8746503937477267, "learning_rate": 0.0001997399517647233, "loss": 12.8625, "step": 1920 }, { "epoch": 0.1046061274359655, "grad_norm": 0.7630310464515302, "learning_rate": 0.0001997393158378849, "loss": 12.7733, "step": 1921 }, { "epoch": 0.10466058143254851, "grad_norm": 0.9309510895312205, "learning_rate": 0.0001997386791354569, "loss": 12.6717, "step": 1922 }, { "epoch": 0.10471503542913152, "grad_norm": 0.7652281673953589, "learning_rate": 0.00019973804165744418, "loss": 12.6441, "step": 1923 }, { "epoch": 0.10476948942571454, "grad_norm": 0.9094132289341906, "learning_rate": 0.0001997374034038517, "loss": 12.8794, "step": 1924 }, { "epoch": 0.10482394342229755, "grad_norm": 0.9693633912024958, "learning_rate": 0.0001997367643746845, "loss": 12.6089, "step": 1925 }, { "epoch": 0.10487839741888057, "grad_norm": 0.7918707360267294, "learning_rate": 0.00019973612456994743, "loss": 12.7389, "step": 1926 }, { "epoch": 0.10493285141546357, "grad_norm": 0.8084363062606581, "learning_rate": 0.00019973548398964557, "loss": 12.7046, "step": 1927 }, { "epoch": 0.10498730541204658, "grad_norm": 0.9597969610620458, "learning_rate": 0.00019973484263378387, "loss": 12.865, "step": 1928 }, { "epoch": 0.1050417594086296, "grad_norm": 0.8646966847427582, "learning_rate": 0.00019973420050236728, "loss": 12.7711, "step": 1929 }, { "epoch": 0.10509621340521261, "grad_norm": 0.8524972206331253, "learning_rate": 0.00019973355759540082, "loss": 12.7456, "step": 1930 }, { "epoch": 0.10515066740179561, "grad_norm": 0.7654825536901725, "learning_rate": 0.00019973291391288953, "loss": 12.7896, "step": 1931 }, { "epoch": 0.10520512139837863, "grad_norm": 0.8152675935793777, "learning_rate": 0.00019973226945483834, "loss": 12.7541, "step": 1932 }, { "epoch": 0.10525957539496164, "grad_norm": 0.8117049500801464, "learning_rate": 0.0001997316242212523, "loss": 12.8113, "step": 1933 }, { "epoch": 0.10531402939154466, "grad_norm": 0.8842097075775519, "learning_rate": 0.00019973097821213642, "loss": 12.7517, "step": 1934 }, { "epoch": 0.10536848338812767, "grad_norm": 0.7566468144569976, "learning_rate": 0.00019973033142749576, "loss": 12.7313, "step": 1935 }, { "epoch": 0.10542293738471067, "grad_norm": 0.7788525110569708, "learning_rate": 0.00019972968386733532, "loss": 12.8033, "step": 1936 }, { "epoch": 0.1054773913812937, "grad_norm": 0.8621093718166254, "learning_rate": 0.0001997290355316601, "loss": 12.9192, "step": 1937 }, { "epoch": 0.1055318453778767, "grad_norm": 0.732165906619715, "learning_rate": 0.0001997283864204752, "loss": 12.6428, "step": 1938 }, { "epoch": 0.10558629937445971, "grad_norm": 0.7521490290685846, "learning_rate": 0.00019972773653378562, "loss": 12.6545, "step": 1939 }, { "epoch": 0.10564075337104273, "grad_norm": 0.9478641791415195, "learning_rate": 0.00019972708587159642, "loss": 12.7364, "step": 1940 }, { "epoch": 0.10569520736762573, "grad_norm": 0.7784312137825782, "learning_rate": 0.0001997264344339127, "loss": 12.664, "step": 1941 }, { "epoch": 0.10574966136420876, "grad_norm": 0.7947087795298028, "learning_rate": 0.00019972578222073953, "loss": 12.7374, "step": 1942 }, { "epoch": 0.10580411536079176, "grad_norm": 0.7660446958449624, "learning_rate": 0.00019972512923208192, "loss": 12.6068, "step": 1943 }, { "epoch": 0.10585856935737477, "grad_norm": 0.9478451868901601, "learning_rate": 0.000199724475467945, "loss": 12.7313, "step": 1944 }, { "epoch": 0.10591302335395779, "grad_norm": 0.8443286080664616, "learning_rate": 0.00019972382092833381, "loss": 12.7797, "step": 1945 }, { "epoch": 0.1059674773505408, "grad_norm": 0.7757027982670757, "learning_rate": 0.00019972316561325348, "loss": 12.7802, "step": 1946 }, { "epoch": 0.1060219313471238, "grad_norm": 0.7323537505609665, "learning_rate": 0.0001997225095227091, "loss": 12.7715, "step": 1947 }, { "epoch": 0.10607638534370682, "grad_norm": 0.7258173389061959, "learning_rate": 0.00019972185265670572, "loss": 12.7192, "step": 1948 }, { "epoch": 0.10613083934028983, "grad_norm": 0.9886967263255692, "learning_rate": 0.00019972119501524853, "loss": 12.948, "step": 1949 }, { "epoch": 0.10618529333687285, "grad_norm": 0.779434641578507, "learning_rate": 0.0001997205365983426, "loss": 12.6265, "step": 1950 }, { "epoch": 0.10623974733345586, "grad_norm": 0.7378281448418083, "learning_rate": 0.00019971987740599305, "loss": 12.6904, "step": 1951 }, { "epoch": 0.10629420133003886, "grad_norm": 0.8763514796537106, "learning_rate": 0.00019971921743820503, "loss": 12.7581, "step": 1952 }, { "epoch": 0.10634865532662188, "grad_norm": 0.7598026179643976, "learning_rate": 0.00019971855669498364, "loss": 12.6264, "step": 1953 }, { "epoch": 0.10640310932320489, "grad_norm": 0.7677533412606428, "learning_rate": 0.00019971789517633402, "loss": 12.5165, "step": 1954 }, { "epoch": 0.1064575633197879, "grad_norm": 0.8014143428907597, "learning_rate": 0.00019971723288226133, "loss": 12.8213, "step": 1955 }, { "epoch": 0.10651201731637092, "grad_norm": 1.0166532682991876, "learning_rate": 0.0001997165698127707, "loss": 12.6291, "step": 1956 }, { "epoch": 0.10656647131295392, "grad_norm": 0.7397763869887779, "learning_rate": 0.00019971590596786732, "loss": 12.7036, "step": 1957 }, { "epoch": 0.10662092530953694, "grad_norm": 0.8243836863860697, "learning_rate": 0.0001997152413475563, "loss": 12.7095, "step": 1958 }, { "epoch": 0.10667537930611995, "grad_norm": 0.7607529958960203, "learning_rate": 0.0001997145759518429, "loss": 12.7681, "step": 1959 }, { "epoch": 0.10672983330270296, "grad_norm": 0.8500255738770411, "learning_rate": 0.00019971390978073219, "loss": 12.8752, "step": 1960 }, { "epoch": 0.10678428729928598, "grad_norm": 0.8429022005007072, "learning_rate": 0.0001997132428342294, "loss": 12.7818, "step": 1961 }, { "epoch": 0.10683874129586898, "grad_norm": 0.8973653852038352, "learning_rate": 0.00019971257511233975, "loss": 12.829, "step": 1962 }, { "epoch": 0.10689319529245199, "grad_norm": 0.9450106747699376, "learning_rate": 0.00019971190661506832, "loss": 12.7089, "step": 1963 }, { "epoch": 0.10694764928903501, "grad_norm": 0.8972410673730398, "learning_rate": 0.00019971123734242044, "loss": 12.469, "step": 1964 }, { "epoch": 0.10700210328561802, "grad_norm": 0.781966142046788, "learning_rate": 0.00019971056729440126, "loss": 12.5841, "step": 1965 }, { "epoch": 0.10705655728220104, "grad_norm": 0.8277637906824876, "learning_rate": 0.00019970989647101597, "loss": 12.6589, "step": 1966 }, { "epoch": 0.10711101127878404, "grad_norm": 0.8526458345610508, "learning_rate": 0.0001997092248722698, "loss": 12.7865, "step": 1967 }, { "epoch": 0.10716546527536705, "grad_norm": 0.7697247304481947, "learning_rate": 0.00019970855249816798, "loss": 12.7212, "step": 1968 }, { "epoch": 0.10721991927195007, "grad_norm": 0.8853362692299411, "learning_rate": 0.00019970787934871573, "loss": 12.7994, "step": 1969 }, { "epoch": 0.10727437326853308, "grad_norm": 0.7867428966469842, "learning_rate": 0.0001997072054239183, "loss": 12.5694, "step": 1970 }, { "epoch": 0.10732882726511608, "grad_norm": 0.7358861967218404, "learning_rate": 0.0001997065307237809, "loss": 12.5993, "step": 1971 }, { "epoch": 0.1073832812616991, "grad_norm": 0.7950033939733909, "learning_rate": 0.00019970585524830883, "loss": 12.6651, "step": 1972 }, { "epoch": 0.10743773525828211, "grad_norm": 0.8758062462747864, "learning_rate": 0.0001997051789975073, "loss": 12.7358, "step": 1973 }, { "epoch": 0.10749218925486513, "grad_norm": 0.8597195443725842, "learning_rate": 0.00019970450197138155, "loss": 12.7349, "step": 1974 }, { "epoch": 0.10754664325144814, "grad_norm": 0.790691546135162, "learning_rate": 0.00019970382416993688, "loss": 12.8114, "step": 1975 }, { "epoch": 0.10760109724803114, "grad_norm": 0.9024575737104155, "learning_rate": 0.00019970314559317854, "loss": 12.8282, "step": 1976 }, { "epoch": 0.10765555124461416, "grad_norm": 0.7525079128703486, "learning_rate": 0.00019970246624111186, "loss": 12.7042, "step": 1977 }, { "epoch": 0.10771000524119717, "grad_norm": 0.7830885538690692, "learning_rate": 0.00019970178611374207, "loss": 12.7363, "step": 1978 }, { "epoch": 0.10776445923778018, "grad_norm": 0.7758410027452511, "learning_rate": 0.00019970110521107446, "loss": 12.711, "step": 1979 }, { "epoch": 0.1078189132343632, "grad_norm": 0.786509695907984, "learning_rate": 0.00019970042353311434, "loss": 12.6893, "step": 1980 }, { "epoch": 0.1078733672309462, "grad_norm": 0.7910151871874543, "learning_rate": 0.00019969974107986703, "loss": 12.6859, "step": 1981 }, { "epoch": 0.10792782122752922, "grad_norm": 0.7356011892291389, "learning_rate": 0.00019969905785133775, "loss": 12.6665, "step": 1982 }, { "epoch": 0.10798227522411223, "grad_norm": 0.791857100119298, "learning_rate": 0.00019969837384753195, "loss": 12.6574, "step": 1983 }, { "epoch": 0.10803672922069524, "grad_norm": 0.9453268512645497, "learning_rate": 0.00019969768906845484, "loss": 12.9026, "step": 1984 }, { "epoch": 0.10809118321727826, "grad_norm": 0.7369807898916599, "learning_rate": 0.00019969700351411178, "loss": 12.4894, "step": 1985 }, { "epoch": 0.10814563721386126, "grad_norm": 0.7844522057126512, "learning_rate": 0.0001996963171845081, "loss": 12.6189, "step": 1986 }, { "epoch": 0.10820009121044427, "grad_norm": 0.8845416386732657, "learning_rate": 0.00019969563007964913, "loss": 12.8518, "step": 1987 }, { "epoch": 0.10825454520702729, "grad_norm": 0.8037598314151181, "learning_rate": 0.00019969494219954025, "loss": 12.7211, "step": 1988 }, { "epoch": 0.1083089992036103, "grad_norm": 0.9389458047150615, "learning_rate": 0.00019969425354418675, "loss": 12.7243, "step": 1989 }, { "epoch": 0.1083634532001933, "grad_norm": 0.7320127631185521, "learning_rate": 0.00019969356411359405, "loss": 12.6032, "step": 1990 }, { "epoch": 0.10841790719677633, "grad_norm": 0.8657140221983711, "learning_rate": 0.00019969287390776748, "loss": 12.7469, "step": 1991 }, { "epoch": 0.10847236119335933, "grad_norm": 0.7649625558778893, "learning_rate": 0.00019969218292671234, "loss": 12.7505, "step": 1992 }, { "epoch": 0.10852681518994235, "grad_norm": 0.8960064777973007, "learning_rate": 0.00019969149117043413, "loss": 12.6676, "step": 1993 }, { "epoch": 0.10858126918652536, "grad_norm": 1.017277538626505, "learning_rate": 0.00019969079863893817, "loss": 12.7488, "step": 1994 }, { "epoch": 0.10863572318310837, "grad_norm": 0.8590189883083232, "learning_rate": 0.00019969010533222982, "loss": 12.742, "step": 1995 }, { "epoch": 0.10869017717969139, "grad_norm": 0.8725955485441322, "learning_rate": 0.00019968941125031447, "loss": 12.7131, "step": 1996 }, { "epoch": 0.10874463117627439, "grad_norm": 0.9533012684001186, "learning_rate": 0.00019968871639319756, "loss": 12.8064, "step": 1997 }, { "epoch": 0.1087990851728574, "grad_norm": 0.8515093684993654, "learning_rate": 0.0001996880207608845, "loss": 12.7675, "step": 1998 }, { "epoch": 0.10885353916944042, "grad_norm": 0.9449399025658869, "learning_rate": 0.00019968732435338062, "loss": 12.7264, "step": 1999 }, { "epoch": 0.10890799316602343, "grad_norm": 0.8716562621301317, "learning_rate": 0.0001996866271706914, "loss": 12.7379, "step": 2000 }, { "epoch": 0.10896244716260645, "grad_norm": 0.7605747741023537, "learning_rate": 0.00019968592921282228, "loss": 12.6486, "step": 2001 }, { "epoch": 0.10901690115918945, "grad_norm": 0.8690279429757708, "learning_rate": 0.00019968523047977864, "loss": 12.727, "step": 2002 }, { "epoch": 0.10907135515577246, "grad_norm": 0.7740230628936833, "learning_rate": 0.00019968453097156594, "loss": 12.7432, "step": 2003 }, { "epoch": 0.10912580915235548, "grad_norm": 0.8301607370104841, "learning_rate": 0.0001996838306881896, "loss": 12.7203, "step": 2004 }, { "epoch": 0.10918026314893849, "grad_norm": 0.7939352424523266, "learning_rate": 0.00019968312962965508, "loss": 12.695, "step": 2005 }, { "epoch": 0.10923471714552149, "grad_norm": 0.880346087195917, "learning_rate": 0.00019968242779596783, "loss": 12.8456, "step": 2006 }, { "epoch": 0.10928917114210451, "grad_norm": 0.7813320479992395, "learning_rate": 0.00019968172518713327, "loss": 12.7121, "step": 2007 }, { "epoch": 0.10934362513868752, "grad_norm": 0.8484785526716306, "learning_rate": 0.00019968102180315696, "loss": 12.7572, "step": 2008 }, { "epoch": 0.10939807913527054, "grad_norm": 0.898153371419266, "learning_rate": 0.00019968031764404427, "loss": 12.7857, "step": 2009 }, { "epoch": 0.10945253313185355, "grad_norm": 0.741570262515584, "learning_rate": 0.0001996796127098007, "loss": 12.8034, "step": 2010 }, { "epoch": 0.10950698712843655, "grad_norm": 0.9657599315611324, "learning_rate": 0.00019967890700043177, "loss": 12.5358, "step": 2011 }, { "epoch": 0.10956144112501957, "grad_norm": 0.7940396053085513, "learning_rate": 0.00019967820051594294, "loss": 12.7363, "step": 2012 }, { "epoch": 0.10961589512160258, "grad_norm": 1.0561796034970556, "learning_rate": 0.0001996774932563397, "loss": 12.7496, "step": 2013 }, { "epoch": 0.10967034911818559, "grad_norm": 0.8155241579906948, "learning_rate": 0.00019967678522162758, "loss": 12.8103, "step": 2014 }, { "epoch": 0.10972480311476861, "grad_norm": 0.7664867207293443, "learning_rate": 0.00019967607641181205, "loss": 12.7256, "step": 2015 }, { "epoch": 0.10977925711135161, "grad_norm": 0.7413458849382605, "learning_rate": 0.00019967536682689862, "loss": 12.6486, "step": 2016 }, { "epoch": 0.10983371110793463, "grad_norm": 0.7641870409209854, "learning_rate": 0.00019967465646689284, "loss": 12.6495, "step": 2017 }, { "epoch": 0.10988816510451764, "grad_norm": 0.7952154969764736, "learning_rate": 0.0001996739453318002, "loss": 12.7041, "step": 2018 }, { "epoch": 0.10994261910110065, "grad_norm": 0.8621410230361722, "learning_rate": 0.00019967323342162625, "loss": 12.7359, "step": 2019 }, { "epoch": 0.10999707309768367, "grad_norm": 0.7941226482770086, "learning_rate": 0.0001996725207363765, "loss": 12.7605, "step": 2020 }, { "epoch": 0.11005152709426667, "grad_norm": 0.8875273838164063, "learning_rate": 0.00019967180727605656, "loss": 12.8796, "step": 2021 }, { "epoch": 0.11010598109084968, "grad_norm": 0.9317449387044903, "learning_rate": 0.0001996710930406719, "loss": 12.7256, "step": 2022 }, { "epoch": 0.1101604350874327, "grad_norm": 0.7733791956195418, "learning_rate": 0.00019967037803022812, "loss": 12.7287, "step": 2023 }, { "epoch": 0.11021488908401571, "grad_norm": 0.839253425039756, "learning_rate": 0.00019966966224473076, "loss": 12.8589, "step": 2024 }, { "epoch": 0.11026934308059873, "grad_norm": 0.8383895898304654, "learning_rate": 0.0001996689456841854, "loss": 12.6426, "step": 2025 }, { "epoch": 0.11032379707718173, "grad_norm": 0.80800769816932, "learning_rate": 0.00019966822834859759, "loss": 12.7299, "step": 2026 }, { "epoch": 0.11037825107376474, "grad_norm": 0.9158105841488882, "learning_rate": 0.00019966751023797294, "loss": 12.8265, "step": 2027 }, { "epoch": 0.11043270507034776, "grad_norm": 0.869986832787026, "learning_rate": 0.00019966679135231702, "loss": 12.7817, "step": 2028 }, { "epoch": 0.11048715906693077, "grad_norm": 0.8128647516680227, "learning_rate": 0.00019966607169163538, "loss": 12.7022, "step": 2029 }, { "epoch": 0.11054161306351377, "grad_norm": 0.8051905540730242, "learning_rate": 0.00019966535125593368, "loss": 12.8388, "step": 2030 }, { "epoch": 0.1105960670600968, "grad_norm": 0.7719518429212509, "learning_rate": 0.0001996646300452175, "loss": 12.7637, "step": 2031 }, { "epoch": 0.1106505210566798, "grad_norm": 0.8971123617842752, "learning_rate": 0.00019966390805949242, "loss": 12.8074, "step": 2032 }, { "epoch": 0.11070497505326282, "grad_norm": 0.8091233612763397, "learning_rate": 0.0001996631852987641, "loss": 12.6037, "step": 2033 }, { "epoch": 0.11075942904984583, "grad_norm": 0.8335833189649199, "learning_rate": 0.0001996624617630381, "loss": 12.5976, "step": 2034 }, { "epoch": 0.11081388304642883, "grad_norm": 0.7532461021786356, "learning_rate": 0.00019966173745232011, "loss": 12.7016, "step": 2035 }, { "epoch": 0.11086833704301186, "grad_norm": 0.9038619999472759, "learning_rate": 0.00019966101236661575, "loss": 12.5796, "step": 2036 }, { "epoch": 0.11092279103959486, "grad_norm": 0.9020584325889739, "learning_rate": 0.00019966028650593063, "loss": 12.8904, "step": 2037 }, { "epoch": 0.11097724503617787, "grad_norm": 0.8719156748425685, "learning_rate": 0.0001996595598702704, "loss": 12.8904, "step": 2038 }, { "epoch": 0.11103169903276089, "grad_norm": 0.8031254838524301, "learning_rate": 0.0001996588324596407, "loss": 12.7358, "step": 2039 }, { "epoch": 0.1110861530293439, "grad_norm": 0.8312455018258538, "learning_rate": 0.00019965810427404726, "loss": 12.7067, "step": 2040 }, { "epoch": 0.11114060702592692, "grad_norm": 0.6682438162781495, "learning_rate": 0.00019965737531349567, "loss": 12.5912, "step": 2041 }, { "epoch": 0.11119506102250992, "grad_norm": 0.9103969344133255, "learning_rate": 0.00019965664557799163, "loss": 12.9311, "step": 2042 }, { "epoch": 0.11124951501909293, "grad_norm": 0.7586090916444304, "learning_rate": 0.00019965591506754076, "loss": 12.7437, "step": 2043 }, { "epoch": 0.11130396901567595, "grad_norm": 0.7919965882278415, "learning_rate": 0.0001996551837821488, "loss": 12.7225, "step": 2044 }, { "epoch": 0.11135842301225896, "grad_norm": 0.7519166731231169, "learning_rate": 0.00019965445172182142, "loss": 12.729, "step": 2045 }, { "epoch": 0.11141287700884196, "grad_norm": 0.8124237368088452, "learning_rate": 0.0001996537188865643, "loss": 12.6507, "step": 2046 }, { "epoch": 0.11146733100542498, "grad_norm": 0.8276721559395761, "learning_rate": 0.0001996529852763832, "loss": 12.849, "step": 2047 }, { "epoch": 0.11152178500200799, "grad_norm": 0.7338358319650686, "learning_rate": 0.00019965225089128372, "loss": 12.7346, "step": 2048 }, { "epoch": 0.11157623899859101, "grad_norm": 0.7879128849080722, "learning_rate": 0.00019965151573127164, "loss": 12.7471, "step": 2049 }, { "epoch": 0.11163069299517402, "grad_norm": 0.7036905561446233, "learning_rate": 0.00019965077979635268, "loss": 12.691, "step": 2050 }, { "epoch": 0.11168514699175702, "grad_norm": 0.7659057759800266, "learning_rate": 0.00019965004308653253, "loss": 12.7796, "step": 2051 }, { "epoch": 0.11173960098834004, "grad_norm": 0.9112024972953926, "learning_rate": 0.00019964930560181695, "loss": 12.6461, "step": 2052 }, { "epoch": 0.11179405498492305, "grad_norm": 0.815708476482424, "learning_rate": 0.00019964856734221162, "loss": 12.662, "step": 2053 }, { "epoch": 0.11184850898150606, "grad_norm": 0.7861656973503325, "learning_rate": 0.00019964782830772236, "loss": 12.6978, "step": 2054 }, { "epoch": 0.11190296297808908, "grad_norm": 0.7568981781640535, "learning_rate": 0.00019964708849835484, "loss": 12.8035, "step": 2055 }, { "epoch": 0.11195741697467208, "grad_norm": 0.7885340729467678, "learning_rate": 0.00019964634791411488, "loss": 12.8172, "step": 2056 }, { "epoch": 0.11201187097125509, "grad_norm": 0.7049502508081198, "learning_rate": 0.00019964560655500818, "loss": 12.6953, "step": 2057 }, { "epoch": 0.11206632496783811, "grad_norm": 0.7515432089172787, "learning_rate": 0.00019964486442104057, "loss": 12.6345, "step": 2058 }, { "epoch": 0.11212077896442112, "grad_norm": 0.810079398805017, "learning_rate": 0.00019964412151221773, "loss": 12.8406, "step": 2059 }, { "epoch": 0.11217523296100414, "grad_norm": 0.8140262106072949, "learning_rate": 0.00019964337782854555, "loss": 12.5698, "step": 2060 }, { "epoch": 0.11222968695758714, "grad_norm": 0.7161724218226323, "learning_rate": 0.00019964263337002972, "loss": 12.6271, "step": 2061 }, { "epoch": 0.11228414095417015, "grad_norm": 0.753009588216702, "learning_rate": 0.00019964188813667607, "loss": 12.6349, "step": 2062 }, { "epoch": 0.11233859495075317, "grad_norm": 0.7505899607318048, "learning_rate": 0.0001996411421284904, "loss": 12.5799, "step": 2063 }, { "epoch": 0.11239304894733618, "grad_norm": 0.8278026206975396, "learning_rate": 0.00019964039534547847, "loss": 12.7407, "step": 2064 }, { "epoch": 0.11244750294391918, "grad_norm": 0.8331622863627176, "learning_rate": 0.00019963964778764613, "loss": 12.6814, "step": 2065 }, { "epoch": 0.1125019569405022, "grad_norm": 0.7699158187386606, "learning_rate": 0.00019963889945499917, "loss": 12.6718, "step": 2066 }, { "epoch": 0.11255641093708521, "grad_norm": 0.7747270710988973, "learning_rate": 0.00019963815034754344, "loss": 12.7746, "step": 2067 }, { "epoch": 0.11261086493366823, "grad_norm": 0.7494007434705581, "learning_rate": 0.00019963740046528475, "loss": 12.6735, "step": 2068 }, { "epoch": 0.11266531893025124, "grad_norm": 0.746165912951711, "learning_rate": 0.0001996366498082289, "loss": 12.766, "step": 2069 }, { "epoch": 0.11271977292683424, "grad_norm": 0.8449008773597039, "learning_rate": 0.00019963589837638176, "loss": 12.7827, "step": 2070 }, { "epoch": 0.11277422692341726, "grad_norm": 0.7303376725361176, "learning_rate": 0.00019963514616974916, "loss": 12.738, "step": 2071 }, { "epoch": 0.11282868092000027, "grad_norm": 0.7473221651838649, "learning_rate": 0.00019963439318833697, "loss": 12.7155, "step": 2072 }, { "epoch": 0.11288313491658328, "grad_norm": 0.825100614677872, "learning_rate": 0.000199633639432151, "loss": 12.7331, "step": 2073 }, { "epoch": 0.1129375889131663, "grad_norm": 0.7861781030471263, "learning_rate": 0.00019963288490119717, "loss": 12.7108, "step": 2074 }, { "epoch": 0.1129920429097493, "grad_norm": 0.7526941015138744, "learning_rate": 0.0001996321295954813, "loss": 12.7691, "step": 2075 }, { "epoch": 0.11304649690633232, "grad_norm": 0.8979029163830556, "learning_rate": 0.00019963137351500931, "loss": 12.6463, "step": 2076 }, { "epoch": 0.11310095090291533, "grad_norm": 0.905851466359616, "learning_rate": 0.00019963061665978705, "loss": 12.8106, "step": 2077 }, { "epoch": 0.11315540489949834, "grad_norm": 0.8874248846001883, "learning_rate": 0.00019962985902982036, "loss": 12.8702, "step": 2078 }, { "epoch": 0.11320985889608136, "grad_norm": 0.7718457677154574, "learning_rate": 0.00019962910062511525, "loss": 12.7201, "step": 2079 }, { "epoch": 0.11326431289266436, "grad_norm": 0.7637357437072458, "learning_rate": 0.00019962834144567748, "loss": 12.7155, "step": 2080 }, { "epoch": 0.11331876688924737, "grad_norm": 0.8570310071456682, "learning_rate": 0.00019962758149151303, "loss": 12.8059, "step": 2081 }, { "epoch": 0.11337322088583039, "grad_norm": 0.8012256975464873, "learning_rate": 0.00019962682076262781, "loss": 12.7262, "step": 2082 }, { "epoch": 0.1134276748824134, "grad_norm": 0.7009570353759675, "learning_rate": 0.00019962605925902775, "loss": 12.6513, "step": 2083 }, { "epoch": 0.11348212887899642, "grad_norm": 0.8049734064740511, "learning_rate": 0.00019962529698071873, "loss": 12.8019, "step": 2084 }, { "epoch": 0.11353658287557943, "grad_norm": 0.7857397204885804, "learning_rate": 0.00019962453392770668, "loss": 12.7982, "step": 2085 }, { "epoch": 0.11359103687216243, "grad_norm": 0.7731529643017948, "learning_rate": 0.00019962377009999756, "loss": 12.7888, "step": 2086 }, { "epoch": 0.11364549086874545, "grad_norm": 0.741120737210802, "learning_rate": 0.0001996230054975973, "loss": 12.5751, "step": 2087 }, { "epoch": 0.11369994486532846, "grad_norm": 0.7620596420969351, "learning_rate": 0.0001996222401205118, "loss": 12.7663, "step": 2088 }, { "epoch": 0.11375439886191147, "grad_norm": 0.7492434504656282, "learning_rate": 0.0001996214739687471, "loss": 12.7384, "step": 2089 }, { "epoch": 0.11380885285849449, "grad_norm": 0.7840620448864547, "learning_rate": 0.0001996207070423091, "loss": 12.6857, "step": 2090 }, { "epoch": 0.11386330685507749, "grad_norm": 0.8200841690305539, "learning_rate": 0.00019961993934120378, "loss": 12.7391, "step": 2091 }, { "epoch": 0.11391776085166051, "grad_norm": 0.8040405029865603, "learning_rate": 0.0001996191708654371, "loss": 12.7379, "step": 2092 }, { "epoch": 0.11397221484824352, "grad_norm": 0.7587519143034004, "learning_rate": 0.00019961840161501505, "loss": 12.6308, "step": 2093 }, { "epoch": 0.11402666884482653, "grad_norm": 0.784489810754187, "learning_rate": 0.0001996176315899436, "loss": 12.7887, "step": 2094 }, { "epoch": 0.11408112284140955, "grad_norm": 0.8164944082645404, "learning_rate": 0.0001996168607902287, "loss": 12.7018, "step": 2095 }, { "epoch": 0.11413557683799255, "grad_norm": 0.835692331915501, "learning_rate": 0.00019961608921587645, "loss": 12.7757, "step": 2096 }, { "epoch": 0.11419003083457556, "grad_norm": 0.7558616368210048, "learning_rate": 0.00019961531686689274, "loss": 12.5786, "step": 2097 }, { "epoch": 0.11424448483115858, "grad_norm": 0.7079213163165838, "learning_rate": 0.00019961454374328364, "loss": 12.6923, "step": 2098 }, { "epoch": 0.11429893882774159, "grad_norm": 0.7888926831701923, "learning_rate": 0.00019961376984505512, "loss": 12.825, "step": 2099 }, { "epoch": 0.1143533928243246, "grad_norm": 0.774072981035808, "learning_rate": 0.00019961299517221324, "loss": 12.7196, "step": 2100 }, { "epoch": 0.11440784682090761, "grad_norm": 0.6690168651460398, "learning_rate": 0.00019961221972476402, "loss": 12.5567, "step": 2101 }, { "epoch": 0.11446230081749062, "grad_norm": 0.8531511807742205, "learning_rate": 0.00019961144350271343, "loss": 12.7074, "step": 2102 }, { "epoch": 0.11451675481407364, "grad_norm": 0.7090196312756721, "learning_rate": 0.0001996106665060676, "loss": 12.6886, "step": 2103 }, { "epoch": 0.11457120881065665, "grad_norm": 0.7685376179371318, "learning_rate": 0.0001996098887348325, "loss": 12.7699, "step": 2104 }, { "epoch": 0.11462566280723965, "grad_norm": 0.740224080424988, "learning_rate": 0.0001996091101890142, "loss": 12.6349, "step": 2105 }, { "epoch": 0.11468011680382267, "grad_norm": 0.9454100900329399, "learning_rate": 0.00019960833086861873, "loss": 12.8987, "step": 2106 }, { "epoch": 0.11473457080040568, "grad_norm": 0.7763313314121196, "learning_rate": 0.00019960755077365222, "loss": 12.6243, "step": 2107 }, { "epoch": 0.1147890247969887, "grad_norm": 0.7103990252977854, "learning_rate": 0.0001996067699041207, "loss": 12.6774, "step": 2108 }, { "epoch": 0.11484347879357171, "grad_norm": 0.7405021657321451, "learning_rate": 0.00019960598826003018, "loss": 12.6477, "step": 2109 }, { "epoch": 0.11489793279015471, "grad_norm": 0.8219760829652516, "learning_rate": 0.00019960520584138682, "loss": 12.7941, "step": 2110 }, { "epoch": 0.11495238678673773, "grad_norm": 0.9959298108093985, "learning_rate": 0.00019960442264819664, "loss": 12.7902, "step": 2111 }, { "epoch": 0.11500684078332074, "grad_norm": 0.6913064619941739, "learning_rate": 0.00019960363868046582, "loss": 12.5739, "step": 2112 }, { "epoch": 0.11506129477990375, "grad_norm": 0.7620375931662848, "learning_rate": 0.00019960285393820037, "loss": 12.653, "step": 2113 }, { "epoch": 0.11511574877648677, "grad_norm": 0.8280792459478163, "learning_rate": 0.00019960206842140643, "loss": 12.6963, "step": 2114 }, { "epoch": 0.11517020277306977, "grad_norm": 0.7494683299858886, "learning_rate": 0.0001996012821300901, "loss": 12.6244, "step": 2115 }, { "epoch": 0.1152246567696528, "grad_norm": 0.706202371763292, "learning_rate": 0.00019960049506425752, "loss": 12.6273, "step": 2116 }, { "epoch": 0.1152791107662358, "grad_norm": 0.8221177533629022, "learning_rate": 0.00019959970722391478, "loss": 12.8283, "step": 2117 }, { "epoch": 0.11533356476281881, "grad_norm": 0.831064960973766, "learning_rate": 0.00019959891860906799, "loss": 12.6878, "step": 2118 }, { "epoch": 0.11538801875940183, "grad_norm": 0.881850146847089, "learning_rate": 0.00019959812921972332, "loss": 12.8005, "step": 2119 }, { "epoch": 0.11544247275598483, "grad_norm": 0.8511223229165487, "learning_rate": 0.0001995973390558869, "loss": 12.7423, "step": 2120 }, { "epoch": 0.11549692675256784, "grad_norm": 0.8376433573079606, "learning_rate": 0.00019959654811756486, "loss": 12.8614, "step": 2121 }, { "epoch": 0.11555138074915086, "grad_norm": 0.8510792016650545, "learning_rate": 0.00019959575640476334, "loss": 12.6251, "step": 2122 }, { "epoch": 0.11560583474573387, "grad_norm": 0.8033614865041665, "learning_rate": 0.00019959496391748857, "loss": 12.7687, "step": 2123 }, { "epoch": 0.11566028874231687, "grad_norm": 0.8177322004397564, "learning_rate": 0.00019959417065574663, "loss": 12.5712, "step": 2124 }, { "epoch": 0.1157147427388999, "grad_norm": 0.8300869860612424, "learning_rate": 0.00019959337661954368, "loss": 12.8096, "step": 2125 }, { "epoch": 0.1157691967354829, "grad_norm": 0.7413698447803599, "learning_rate": 0.000199592581808886, "loss": 12.6082, "step": 2126 }, { "epoch": 0.11582365073206592, "grad_norm": 0.7823550575389198, "learning_rate": 0.00019959178622377965, "loss": 12.6772, "step": 2127 }, { "epoch": 0.11587810472864893, "grad_norm": 0.8882803396640111, "learning_rate": 0.00019959098986423087, "loss": 12.8569, "step": 2128 }, { "epoch": 0.11593255872523194, "grad_norm": 0.826671747324549, "learning_rate": 0.00019959019273024588, "loss": 12.7348, "step": 2129 }, { "epoch": 0.11598701272181496, "grad_norm": 0.7841105201498944, "learning_rate": 0.00019958939482183084, "loss": 12.7031, "step": 2130 }, { "epoch": 0.11604146671839796, "grad_norm": 0.7900489427215688, "learning_rate": 0.00019958859613899196, "loss": 12.5509, "step": 2131 }, { "epoch": 0.11609592071498097, "grad_norm": 0.723579613398403, "learning_rate": 0.0001995877966817355, "loss": 12.6316, "step": 2132 }, { "epoch": 0.11615037471156399, "grad_norm": 1.030053378973563, "learning_rate": 0.00019958699645006758, "loss": 12.9159, "step": 2133 }, { "epoch": 0.116204828708147, "grad_norm": 0.857899856222025, "learning_rate": 0.00019958619544399445, "loss": 12.744, "step": 2134 }, { "epoch": 0.11625928270473002, "grad_norm": 0.7480106693179064, "learning_rate": 0.00019958539366352241, "loss": 12.8448, "step": 2135 }, { "epoch": 0.11631373670131302, "grad_norm": 0.7751551904107694, "learning_rate": 0.00019958459110865765, "loss": 12.6368, "step": 2136 }, { "epoch": 0.11636819069789603, "grad_norm": 0.7603853151602749, "learning_rate": 0.0001995837877794064, "loss": 12.7677, "step": 2137 }, { "epoch": 0.11642264469447905, "grad_norm": 0.881158263097917, "learning_rate": 0.0001995829836757749, "loss": 12.7606, "step": 2138 }, { "epoch": 0.11647709869106206, "grad_norm": 0.7802242944553751, "learning_rate": 0.00019958217879776944, "loss": 12.6844, "step": 2139 }, { "epoch": 0.11653155268764506, "grad_norm": 0.9848652595670689, "learning_rate": 0.00019958137314539625, "loss": 12.5426, "step": 2140 }, { "epoch": 0.11658600668422808, "grad_norm": 0.7941566177687074, "learning_rate": 0.00019958056671866162, "loss": 12.8575, "step": 2141 }, { "epoch": 0.11664046068081109, "grad_norm": 0.7449880770493477, "learning_rate": 0.00019957975951757177, "loss": 12.6769, "step": 2142 }, { "epoch": 0.11669491467739411, "grad_norm": 0.7723677676886868, "learning_rate": 0.00019957895154213302, "loss": 12.6147, "step": 2143 }, { "epoch": 0.11674936867397712, "grad_norm": 0.7912735436279764, "learning_rate": 0.00019957814279235165, "loss": 12.6037, "step": 2144 }, { "epoch": 0.11680382267056012, "grad_norm": 0.7972391102056795, "learning_rate": 0.00019957733326823394, "loss": 12.7607, "step": 2145 }, { "epoch": 0.11685827666714314, "grad_norm": 0.8313684183370367, "learning_rate": 0.00019957652296978618, "loss": 12.6734, "step": 2146 }, { "epoch": 0.11691273066372615, "grad_norm": 0.792697041691428, "learning_rate": 0.00019957571189701469, "loss": 12.5648, "step": 2147 }, { "epoch": 0.11696718466030916, "grad_norm": 0.9063581429753217, "learning_rate": 0.00019957490004992575, "loss": 12.7155, "step": 2148 }, { "epoch": 0.11702163865689218, "grad_norm": 0.8434538546620092, "learning_rate": 0.00019957408742852573, "loss": 12.6305, "step": 2149 }, { "epoch": 0.11707609265347518, "grad_norm": 0.87158769555442, "learning_rate": 0.00019957327403282088, "loss": 12.6689, "step": 2150 }, { "epoch": 0.1171305466500582, "grad_norm": 0.8126919819323308, "learning_rate": 0.00019957245986281755, "loss": 12.6945, "step": 2151 }, { "epoch": 0.11718500064664121, "grad_norm": 0.8233288581078374, "learning_rate": 0.00019957164491852207, "loss": 12.7854, "step": 2152 }, { "epoch": 0.11723945464322422, "grad_norm": 0.7885447903071905, "learning_rate": 0.0001995708291999408, "loss": 12.5977, "step": 2153 }, { "epoch": 0.11729390863980724, "grad_norm": 0.8239947426294945, "learning_rate": 0.00019957001270708003, "loss": 12.6609, "step": 2154 }, { "epoch": 0.11734836263639024, "grad_norm": 0.7073707699017527, "learning_rate": 0.00019956919543994615, "loss": 12.6269, "step": 2155 }, { "epoch": 0.11740281663297325, "grad_norm": 0.8425301365131279, "learning_rate": 0.00019956837739854556, "loss": 12.8125, "step": 2156 }, { "epoch": 0.11745727062955627, "grad_norm": 0.7271759383651856, "learning_rate": 0.0001995675585828845, "loss": 12.6695, "step": 2157 }, { "epoch": 0.11751172462613928, "grad_norm": 0.8336817554060225, "learning_rate": 0.00019956673899296944, "loss": 12.7262, "step": 2158 }, { "epoch": 0.1175661786227223, "grad_norm": 0.8076930572191388, "learning_rate": 0.00019956591862880675, "loss": 12.7619, "step": 2159 }, { "epoch": 0.1176206326193053, "grad_norm": 0.7989577147925232, "learning_rate": 0.00019956509749040273, "loss": 12.6479, "step": 2160 }, { "epoch": 0.11767508661588831, "grad_norm": 0.8037586122289786, "learning_rate": 0.00019956427557776384, "loss": 12.6592, "step": 2161 }, { "epoch": 0.11772954061247133, "grad_norm": 0.7611174663838151, "learning_rate": 0.00019956345289089643, "loss": 12.6262, "step": 2162 }, { "epoch": 0.11778399460905434, "grad_norm": 0.8435221901079686, "learning_rate": 0.0001995626294298069, "loss": 12.72, "step": 2163 }, { "epoch": 0.11783844860563734, "grad_norm": 0.8431094578143535, "learning_rate": 0.00019956180519450171, "loss": 12.7281, "step": 2164 }, { "epoch": 0.11789290260222036, "grad_norm": 1.0147865888423293, "learning_rate": 0.00019956098018498723, "loss": 12.7111, "step": 2165 }, { "epoch": 0.11794735659880337, "grad_norm": 0.7481049960190732, "learning_rate": 0.00019956015440126984, "loss": 12.751, "step": 2166 }, { "epoch": 0.11800181059538639, "grad_norm": 1.0240643900963289, "learning_rate": 0.000199559327843356, "loss": 12.8251, "step": 2167 }, { "epoch": 0.1180562645919694, "grad_norm": 0.8048856773837297, "learning_rate": 0.00019955850051125214, "loss": 12.7436, "step": 2168 }, { "epoch": 0.1181107185885524, "grad_norm": 0.7799355673998167, "learning_rate": 0.0001995576724049647, "loss": 12.7746, "step": 2169 }, { "epoch": 0.11816517258513543, "grad_norm": 0.898073428935087, "learning_rate": 0.00019955684352450007, "loss": 12.6445, "step": 2170 }, { "epoch": 0.11821962658171843, "grad_norm": 0.8455278039776454, "learning_rate": 0.00019955601386986471, "loss": 12.9123, "step": 2171 }, { "epoch": 0.11827408057830144, "grad_norm": 0.8917322796775545, "learning_rate": 0.00019955518344106512, "loss": 12.63, "step": 2172 }, { "epoch": 0.11832853457488446, "grad_norm": 0.8617661704418199, "learning_rate": 0.00019955435223810772, "loss": 12.8358, "step": 2173 }, { "epoch": 0.11838298857146747, "grad_norm": 0.915808254151994, "learning_rate": 0.00019955352026099901, "loss": 12.5354, "step": 2174 }, { "epoch": 0.11843744256805049, "grad_norm": 1.055264887510652, "learning_rate": 0.0001995526875097454, "loss": 12.6227, "step": 2175 }, { "epoch": 0.11849189656463349, "grad_norm": 0.9186264289083459, "learning_rate": 0.0001995518539843534, "loss": 12.617, "step": 2176 }, { "epoch": 0.1185463505612165, "grad_norm": 1.0774450319609858, "learning_rate": 0.0001995510196848295, "loss": 12.8788, "step": 2177 }, { "epoch": 0.11860080455779952, "grad_norm": 0.7694483552357013, "learning_rate": 0.00019955018461118018, "loss": 12.7367, "step": 2178 }, { "epoch": 0.11865525855438253, "grad_norm": 1.2952253320732638, "learning_rate": 0.0001995493487634119, "loss": 12.7683, "step": 2179 }, { "epoch": 0.11870971255096553, "grad_norm": 0.8391668405024848, "learning_rate": 0.00019954851214153124, "loss": 12.5749, "step": 2180 }, { "epoch": 0.11876416654754855, "grad_norm": 0.9121798107531427, "learning_rate": 0.0001995476747455446, "loss": 12.7191, "step": 2181 }, { "epoch": 0.11881862054413156, "grad_norm": 0.8153326608698361, "learning_rate": 0.0001995468365754586, "loss": 12.6872, "step": 2182 }, { "epoch": 0.11887307454071458, "grad_norm": 0.7900303177250834, "learning_rate": 0.00019954599763127967, "loss": 12.6046, "step": 2183 }, { "epoch": 0.11892752853729759, "grad_norm": 0.7569958139616827, "learning_rate": 0.00019954515791301437, "loss": 12.6619, "step": 2184 }, { "epoch": 0.11898198253388059, "grad_norm": 0.8371377500237001, "learning_rate": 0.00019954431742066927, "loss": 12.6206, "step": 2185 }, { "epoch": 0.11903643653046361, "grad_norm": 0.853149233833002, "learning_rate": 0.00019954347615425082, "loss": 12.6786, "step": 2186 }, { "epoch": 0.11909089052704662, "grad_norm": 0.7817696062905213, "learning_rate": 0.0001995426341137656, "loss": 12.7055, "step": 2187 }, { "epoch": 0.11914534452362963, "grad_norm": 0.7871792577118936, "learning_rate": 0.00019954179129922018, "loss": 12.6681, "step": 2188 }, { "epoch": 0.11919979852021265, "grad_norm": 0.8416226746173138, "learning_rate": 0.0001995409477106211, "loss": 12.8002, "step": 2189 }, { "epoch": 0.11925425251679565, "grad_norm": 0.7951692811202323, "learning_rate": 0.0001995401033479749, "loss": 12.8236, "step": 2190 }, { "epoch": 0.11930870651337866, "grad_norm": 0.8553946427982531, "learning_rate": 0.0001995392582112882, "loss": 12.6695, "step": 2191 }, { "epoch": 0.11936316050996168, "grad_norm": 0.7860369946268659, "learning_rate": 0.00019953841230056752, "loss": 12.5614, "step": 2192 }, { "epoch": 0.11941761450654469, "grad_norm": 0.7248201454741638, "learning_rate": 0.0001995375656158194, "loss": 12.5566, "step": 2193 }, { "epoch": 0.1194720685031277, "grad_norm": 0.7820765623167737, "learning_rate": 0.00019953671815705056, "loss": 12.7546, "step": 2194 }, { "epoch": 0.11952652249971071, "grad_norm": 0.7850126988810134, "learning_rate": 0.00019953586992426747, "loss": 12.589, "step": 2195 }, { "epoch": 0.11958097649629372, "grad_norm": 0.7990527946487643, "learning_rate": 0.00019953502091747677, "loss": 12.6696, "step": 2196 }, { "epoch": 0.11963543049287674, "grad_norm": 0.8866117370502644, "learning_rate": 0.00019953417113668505, "loss": 12.8036, "step": 2197 }, { "epoch": 0.11968988448945975, "grad_norm": 0.7082177203292337, "learning_rate": 0.00019953332058189892, "loss": 12.6838, "step": 2198 }, { "epoch": 0.11974433848604275, "grad_norm": 0.8070197457949011, "learning_rate": 0.000199532469253125, "loss": 12.7203, "step": 2199 }, { "epoch": 0.11979879248262577, "grad_norm": 0.8369868055876668, "learning_rate": 0.00019953161715036992, "loss": 12.6899, "step": 2200 }, { "epoch": 0.11985324647920878, "grad_norm": 0.8999597921541851, "learning_rate": 0.0001995307642736403, "loss": 12.6945, "step": 2201 }, { "epoch": 0.1199077004757918, "grad_norm": 0.7692650720140853, "learning_rate": 0.00019952991062294274, "loss": 12.7636, "step": 2202 }, { "epoch": 0.11996215447237481, "grad_norm": 0.8510153607320751, "learning_rate": 0.0001995290561982839, "loss": 12.7078, "step": 2203 }, { "epoch": 0.12001660846895781, "grad_norm": 0.783743892081945, "learning_rate": 0.00019952820099967044, "loss": 12.7044, "step": 2204 }, { "epoch": 0.12007106246554083, "grad_norm": 0.7653316210747961, "learning_rate": 0.000199527345027109, "loss": 12.7126, "step": 2205 }, { "epoch": 0.12012551646212384, "grad_norm": 0.8536028075609401, "learning_rate": 0.00019952648828060622, "loss": 12.7369, "step": 2206 }, { "epoch": 0.12017997045870685, "grad_norm": 0.7582973421226827, "learning_rate": 0.0001995256307601688, "loss": 12.7263, "step": 2207 }, { "epoch": 0.12023442445528987, "grad_norm": 0.9344165946344044, "learning_rate": 0.00019952477246580337, "loss": 12.6734, "step": 2208 }, { "epoch": 0.12028887845187287, "grad_norm": 0.7814808828459261, "learning_rate": 0.00019952391339751665, "loss": 12.7027, "step": 2209 }, { "epoch": 0.1203433324484559, "grad_norm": 0.8403333966762496, "learning_rate": 0.00019952305355531525, "loss": 12.7408, "step": 2210 }, { "epoch": 0.1203977864450389, "grad_norm": 0.8319704410053225, "learning_rate": 0.0001995221929392059, "loss": 12.7889, "step": 2211 }, { "epoch": 0.12045224044162191, "grad_norm": 0.7947473187321004, "learning_rate": 0.00019952133154919527, "loss": 12.7497, "step": 2212 }, { "epoch": 0.12050669443820493, "grad_norm": 0.7839426718385665, "learning_rate": 0.00019952046938529012, "loss": 12.7165, "step": 2213 }, { "epoch": 0.12056114843478793, "grad_norm": 0.7638925777090524, "learning_rate": 0.00019951960644749706, "loss": 12.6444, "step": 2214 }, { "epoch": 0.12061560243137094, "grad_norm": 0.7897163234635378, "learning_rate": 0.00019951874273582291, "loss": 12.6192, "step": 2215 }, { "epoch": 0.12067005642795396, "grad_norm": 0.7563778420238759, "learning_rate": 0.0001995178782502743, "loss": 12.6769, "step": 2216 }, { "epoch": 0.12072451042453697, "grad_norm": 0.9390704289738171, "learning_rate": 0.00019951701299085798, "loss": 12.8826, "step": 2217 }, { "epoch": 0.12077896442111999, "grad_norm": 0.8678848428558147, "learning_rate": 0.00019951614695758069, "loss": 12.8051, "step": 2218 }, { "epoch": 0.120833418417703, "grad_norm": 0.9110099563247891, "learning_rate": 0.00019951528015044913, "loss": 12.7489, "step": 2219 }, { "epoch": 0.120887872414286, "grad_norm": 0.8804716108031068, "learning_rate": 0.00019951441256947007, "loss": 12.6187, "step": 2220 }, { "epoch": 0.12094232641086902, "grad_norm": 0.7909037529558258, "learning_rate": 0.00019951354421465024, "loss": 12.688, "step": 2221 }, { "epoch": 0.12099678040745203, "grad_norm": 0.8543656619867279, "learning_rate": 0.00019951267508599643, "loss": 12.7867, "step": 2222 }, { "epoch": 0.12105123440403504, "grad_norm": 0.6982517176985668, "learning_rate": 0.00019951180518351536, "loss": 12.6393, "step": 2223 }, { "epoch": 0.12110568840061806, "grad_norm": 0.9337327935082366, "learning_rate": 0.0001995109345072138, "loss": 12.8072, "step": 2224 }, { "epoch": 0.12116014239720106, "grad_norm": 0.9208777629968543, "learning_rate": 0.00019951006305709852, "loss": 12.6814, "step": 2225 }, { "epoch": 0.12121459639378408, "grad_norm": 0.7661250813181665, "learning_rate": 0.0001995091908331763, "loss": 12.7313, "step": 2226 }, { "epoch": 0.12126905039036709, "grad_norm": 0.8286799047391251, "learning_rate": 0.00019950831783545393, "loss": 12.6088, "step": 2227 }, { "epoch": 0.1213235043869501, "grad_norm": 0.9007022814790493, "learning_rate": 0.00019950744406393818, "loss": 12.8683, "step": 2228 }, { "epoch": 0.12137795838353312, "grad_norm": 0.7584611212657733, "learning_rate": 0.00019950656951863588, "loss": 12.6202, "step": 2229 }, { "epoch": 0.12143241238011612, "grad_norm": 0.852208382943685, "learning_rate": 0.0001995056941995538, "loss": 12.8656, "step": 2230 }, { "epoch": 0.12148686637669913, "grad_norm": 0.6967002768684686, "learning_rate": 0.00019950481810669874, "loss": 12.6892, "step": 2231 }, { "epoch": 0.12154132037328215, "grad_norm": 0.8661162169371703, "learning_rate": 0.00019950394124007757, "loss": 12.5622, "step": 2232 }, { "epoch": 0.12159577436986516, "grad_norm": 0.7339221495289832, "learning_rate": 0.000199503063599697, "loss": 12.6426, "step": 2233 }, { "epoch": 0.12165022836644818, "grad_norm": 0.8185295105254345, "learning_rate": 0.000199502185185564, "loss": 12.76, "step": 2234 }, { "epoch": 0.12170468236303118, "grad_norm": 0.7841512755708057, "learning_rate": 0.00019950130599768527, "loss": 12.8623, "step": 2235 }, { "epoch": 0.12175913635961419, "grad_norm": 0.8584145810028976, "learning_rate": 0.0001995004260360677, "loss": 12.7982, "step": 2236 }, { "epoch": 0.12181359035619721, "grad_norm": 0.8331097929496447, "learning_rate": 0.00019949954530071814, "loss": 12.7162, "step": 2237 }, { "epoch": 0.12186804435278022, "grad_norm": 0.7656528692633603, "learning_rate": 0.00019949866379164344, "loss": 12.6292, "step": 2238 }, { "epoch": 0.12192249834936322, "grad_norm": 0.8247940047932069, "learning_rate": 0.00019949778150885042, "loss": 12.5262, "step": 2239 }, { "epoch": 0.12197695234594624, "grad_norm": 0.7842828299270601, "learning_rate": 0.00019949689845234598, "loss": 12.7852, "step": 2240 }, { "epoch": 0.12203140634252925, "grad_norm": 0.8133244862462937, "learning_rate": 0.00019949601462213696, "loss": 12.7031, "step": 2241 }, { "epoch": 0.12208586033911227, "grad_norm": 0.713675822257346, "learning_rate": 0.00019949513001823026, "loss": 12.592, "step": 2242 }, { "epoch": 0.12214031433569528, "grad_norm": 0.8027291483722292, "learning_rate": 0.00019949424464063274, "loss": 12.7108, "step": 2243 }, { "epoch": 0.12219476833227828, "grad_norm": 0.8099594121442285, "learning_rate": 0.0001994933584893513, "loss": 12.5928, "step": 2244 }, { "epoch": 0.1222492223288613, "grad_norm": 0.772771167685069, "learning_rate": 0.0001994924715643928, "loss": 12.5073, "step": 2245 }, { "epoch": 0.12230367632544431, "grad_norm": 0.7813372535261173, "learning_rate": 0.00019949158386576413, "loss": 12.4561, "step": 2246 }, { "epoch": 0.12235813032202732, "grad_norm": 0.789388708629161, "learning_rate": 0.00019949069539347227, "loss": 12.7137, "step": 2247 }, { "epoch": 0.12241258431861034, "grad_norm": 0.7318944286969588, "learning_rate": 0.00019948980614752405, "loss": 12.5061, "step": 2248 }, { "epoch": 0.12246703831519334, "grad_norm": 0.8059878366303711, "learning_rate": 0.00019948891612792645, "loss": 12.6272, "step": 2249 }, { "epoch": 0.12252149231177636, "grad_norm": 0.8177049921666123, "learning_rate": 0.0001994880253346863, "loss": 12.6948, "step": 2250 }, { "epoch": 0.12257594630835937, "grad_norm": 0.760296658253682, "learning_rate": 0.00019948713376781064, "loss": 12.6752, "step": 2251 }, { "epoch": 0.12263040030494238, "grad_norm": 0.781897864488266, "learning_rate": 0.0001994862414273063, "loss": 12.6803, "step": 2252 }, { "epoch": 0.1226848543015254, "grad_norm": 0.7677497443346096, "learning_rate": 0.00019948534831318025, "loss": 12.7446, "step": 2253 }, { "epoch": 0.1227393082981084, "grad_norm": 0.7855649193744076, "learning_rate": 0.0001994844544254395, "loss": 12.7643, "step": 2254 }, { "epoch": 0.12279376229469141, "grad_norm": 0.8211053950537586, "learning_rate": 0.0001994835597640909, "loss": 12.7967, "step": 2255 }, { "epoch": 0.12284821629127443, "grad_norm": 0.8089345545600491, "learning_rate": 0.00019948266432914146, "loss": 12.8287, "step": 2256 }, { "epoch": 0.12290267028785744, "grad_norm": 0.9045745536594514, "learning_rate": 0.00019948176812059818, "loss": 12.7589, "step": 2257 }, { "epoch": 0.12295712428444044, "grad_norm": 0.8362285356153368, "learning_rate": 0.00019948087113846796, "loss": 12.9202, "step": 2258 }, { "epoch": 0.12301157828102346, "grad_norm": 0.7167029837106066, "learning_rate": 0.0001994799733827578, "loss": 12.6848, "step": 2259 }, { "epoch": 0.12306603227760647, "grad_norm": 0.9069713997609858, "learning_rate": 0.0001994790748534747, "loss": 12.6965, "step": 2260 }, { "epoch": 0.12312048627418949, "grad_norm": 0.8544585569223123, "learning_rate": 0.00019947817555062563, "loss": 12.774, "step": 2261 }, { "epoch": 0.1231749402707725, "grad_norm": 0.7448066164304611, "learning_rate": 0.00019947727547421756, "loss": 12.6796, "step": 2262 }, { "epoch": 0.1232293942673555, "grad_norm": 0.7350628811540768, "learning_rate": 0.00019947637462425753, "loss": 12.7487, "step": 2263 }, { "epoch": 0.12328384826393853, "grad_norm": 0.8324612782436918, "learning_rate": 0.00019947547300075254, "loss": 12.6417, "step": 2264 }, { "epoch": 0.12333830226052153, "grad_norm": 0.8118064750379741, "learning_rate": 0.00019947457060370957, "loss": 12.6985, "step": 2265 }, { "epoch": 0.12339275625710454, "grad_norm": 0.7497361623227577, "learning_rate": 0.00019947366743313568, "loss": 12.6324, "step": 2266 }, { "epoch": 0.12344721025368756, "grad_norm": 0.8414180110506619, "learning_rate": 0.00019947276348903782, "loss": 12.6279, "step": 2267 }, { "epoch": 0.12350166425027057, "grad_norm": 0.7324293159778035, "learning_rate": 0.00019947185877142314, "loss": 12.7224, "step": 2268 }, { "epoch": 0.12355611824685359, "grad_norm": 0.8854429184534627, "learning_rate": 0.00019947095328029857, "loss": 12.7163, "step": 2269 }, { "epoch": 0.12361057224343659, "grad_norm": 0.9119324234099492, "learning_rate": 0.00019947004701567118, "loss": 12.8091, "step": 2270 }, { "epoch": 0.1236650262400196, "grad_norm": 0.8905496427432454, "learning_rate": 0.00019946913997754802, "loss": 12.7258, "step": 2271 }, { "epoch": 0.12371948023660262, "grad_norm": 0.9118610556427518, "learning_rate": 0.00019946823216593614, "loss": 12.7396, "step": 2272 }, { "epoch": 0.12377393423318563, "grad_norm": 0.7735477988456516, "learning_rate": 0.00019946732358084264, "loss": 12.8204, "step": 2273 }, { "epoch": 0.12382838822976863, "grad_norm": 0.9140441735532921, "learning_rate": 0.00019946641422227454, "loss": 12.6388, "step": 2274 }, { "epoch": 0.12388284222635165, "grad_norm": 0.7906245809149918, "learning_rate": 0.00019946550409023889, "loss": 12.6582, "step": 2275 }, { "epoch": 0.12393729622293466, "grad_norm": 0.9227199554541721, "learning_rate": 0.00019946459318474283, "loss": 12.7565, "step": 2276 }, { "epoch": 0.12399175021951768, "grad_norm": 0.8634134768192193, "learning_rate": 0.00019946368150579343, "loss": 12.7553, "step": 2277 }, { "epoch": 0.12404620421610069, "grad_norm": 0.8388803922129331, "learning_rate": 0.00019946276905339773, "loss": 12.872, "step": 2278 }, { "epoch": 0.12410065821268369, "grad_norm": 0.8528527510163847, "learning_rate": 0.00019946185582756288, "loss": 12.8716, "step": 2279 }, { "epoch": 0.12415511220926671, "grad_norm": 0.9367156268725985, "learning_rate": 0.00019946094182829595, "loss": 12.7734, "step": 2280 }, { "epoch": 0.12420956620584972, "grad_norm": 0.7778224462800217, "learning_rate": 0.00019946002705560406, "loss": 12.6889, "step": 2281 }, { "epoch": 0.12426402020243273, "grad_norm": 0.7491322077833296, "learning_rate": 0.0001994591115094943, "loss": 12.6262, "step": 2282 }, { "epoch": 0.12431847419901575, "grad_norm": 0.8402926697141994, "learning_rate": 0.00019945819518997384, "loss": 12.7234, "step": 2283 }, { "epoch": 0.12437292819559875, "grad_norm": 0.8636335512953711, "learning_rate": 0.00019945727809704975, "loss": 12.608, "step": 2284 }, { "epoch": 0.12442738219218177, "grad_norm": 0.7371257107997498, "learning_rate": 0.0001994563602307292, "loss": 12.5674, "step": 2285 }, { "epoch": 0.12448183618876478, "grad_norm": 0.7708748365298516, "learning_rate": 0.0001994554415910193, "loss": 12.6949, "step": 2286 }, { "epoch": 0.12453629018534779, "grad_norm": 0.8447973867023547, "learning_rate": 0.0001994545221779272, "loss": 12.688, "step": 2287 }, { "epoch": 0.12459074418193081, "grad_norm": 1.108122665821199, "learning_rate": 0.0001994536019914601, "loss": 12.9816, "step": 2288 }, { "epoch": 0.12464519817851381, "grad_norm": 0.8541583693169076, "learning_rate": 0.00019945268103162506, "loss": 12.6994, "step": 2289 }, { "epoch": 0.12469965217509682, "grad_norm": 0.8429082152082202, "learning_rate": 0.00019945175929842935, "loss": 12.7596, "step": 2290 }, { "epoch": 0.12475410617167984, "grad_norm": 1.0515128515445635, "learning_rate": 0.00019945083679188006, "loss": 12.8312, "step": 2291 }, { "epoch": 0.12480856016826285, "grad_norm": 0.8531126268343717, "learning_rate": 0.00019944991351198435, "loss": 12.7377, "step": 2292 }, { "epoch": 0.12486301416484587, "grad_norm": 0.7856715624294648, "learning_rate": 0.0001994489894587495, "loss": 12.6877, "step": 2293 }, { "epoch": 0.12491746816142887, "grad_norm": 1.0596739665009962, "learning_rate": 0.00019944806463218257, "loss": 12.6832, "step": 2294 }, { "epoch": 0.12497192215801188, "grad_norm": 0.8027737513539224, "learning_rate": 0.0001994471390322908, "loss": 12.7159, "step": 2295 }, { "epoch": 0.1250263761545949, "grad_norm": 0.9906726771062633, "learning_rate": 0.00019944621265908147, "loss": 12.6231, "step": 2296 }, { "epoch": 0.1250808301511779, "grad_norm": 0.8841377858178393, "learning_rate": 0.00019944528551256167, "loss": 12.8105, "step": 2297 }, { "epoch": 0.12513528414776093, "grad_norm": 0.9171916869214579, "learning_rate": 0.00019944435759273866, "loss": 12.7818, "step": 2298 }, { "epoch": 0.12518973814434392, "grad_norm": 0.8517112317085067, "learning_rate": 0.00019944342889961964, "loss": 12.6482, "step": 2299 }, { "epoch": 0.12524419214092694, "grad_norm": 0.8005258077612063, "learning_rate": 0.00019944249943321186, "loss": 12.7086, "step": 2300 }, { "epoch": 0.12529864613750996, "grad_norm": 0.7865738100260907, "learning_rate": 0.00019944156919352252, "loss": 12.6759, "step": 2301 }, { "epoch": 0.12535310013409295, "grad_norm": 0.8039482381347336, "learning_rate": 0.00019944063818055888, "loss": 12.6076, "step": 2302 }, { "epoch": 0.12540755413067597, "grad_norm": 0.7764854255852931, "learning_rate": 0.0001994397063943281, "loss": 12.7396, "step": 2303 }, { "epoch": 0.125462008127259, "grad_norm": 0.837158284917924, "learning_rate": 0.00019943877383483757, "loss": 12.5878, "step": 2304 }, { "epoch": 0.12551646212384202, "grad_norm": 0.8371200249719656, "learning_rate": 0.0001994378405020944, "loss": 12.7985, "step": 2305 }, { "epoch": 0.125570916120425, "grad_norm": 0.8137839369614454, "learning_rate": 0.00019943690639610592, "loss": 12.8366, "step": 2306 }, { "epoch": 0.12562537011700803, "grad_norm": 0.8556346327405181, "learning_rate": 0.00019943597151687937, "loss": 12.7881, "step": 2307 }, { "epoch": 0.12567982411359105, "grad_norm": 0.7709814149035997, "learning_rate": 0.00019943503586442206, "loss": 12.7591, "step": 2308 }, { "epoch": 0.12573427811017404, "grad_norm": 0.7898640756861948, "learning_rate": 0.00019943409943874122, "loss": 12.6926, "step": 2309 }, { "epoch": 0.12578873210675706, "grad_norm": 0.7367569366625164, "learning_rate": 0.00019943316223984414, "loss": 12.7746, "step": 2310 }, { "epoch": 0.12584318610334008, "grad_norm": 0.7454982746348482, "learning_rate": 0.00019943222426773813, "loss": 12.7185, "step": 2311 }, { "epoch": 0.12589764009992307, "grad_norm": 0.7967554594677826, "learning_rate": 0.00019943128552243046, "loss": 12.6746, "step": 2312 }, { "epoch": 0.1259520940965061, "grad_norm": 0.7758457456666698, "learning_rate": 0.00019943034600392845, "loss": 12.783, "step": 2313 }, { "epoch": 0.12600654809308912, "grad_norm": 0.8175081193982631, "learning_rate": 0.00019942940571223935, "loss": 12.7546, "step": 2314 }, { "epoch": 0.1260610020896721, "grad_norm": 0.7223717731557533, "learning_rate": 0.00019942846464737058, "loss": 12.6316, "step": 2315 }, { "epoch": 0.12611545608625513, "grad_norm": 0.7201639049114413, "learning_rate": 0.00019942752280932937, "loss": 12.6579, "step": 2316 }, { "epoch": 0.12616991008283815, "grad_norm": 0.6723939651791366, "learning_rate": 0.00019942658019812305, "loss": 12.7038, "step": 2317 }, { "epoch": 0.12622436407942114, "grad_norm": 0.756418238434827, "learning_rate": 0.000199425636813759, "loss": 12.7699, "step": 2318 }, { "epoch": 0.12627881807600416, "grad_norm": 0.7184296523805659, "learning_rate": 0.0001994246926562445, "loss": 12.6331, "step": 2319 }, { "epoch": 0.12633327207258718, "grad_norm": 0.8591570290913682, "learning_rate": 0.0001994237477255869, "loss": 12.7931, "step": 2320 }, { "epoch": 0.12638772606917018, "grad_norm": 0.7893364107713335, "learning_rate": 0.0001994228020217936, "loss": 12.7506, "step": 2321 }, { "epoch": 0.1264421800657532, "grad_norm": 0.8951456925670717, "learning_rate": 0.00019942185554487193, "loss": 12.7711, "step": 2322 }, { "epoch": 0.12649663406233622, "grad_norm": 0.9598649738838202, "learning_rate": 0.0001994209082948292, "loss": 12.616, "step": 2323 }, { "epoch": 0.12655108805891924, "grad_norm": 0.7527902561752199, "learning_rate": 0.00019941996027167286, "loss": 12.5971, "step": 2324 }, { "epoch": 0.12660554205550223, "grad_norm": 0.768501031065377, "learning_rate": 0.0001994190114754102, "loss": 12.6549, "step": 2325 }, { "epoch": 0.12665999605208525, "grad_norm": 0.817273213089173, "learning_rate": 0.00019941806190604863, "loss": 12.5479, "step": 2326 }, { "epoch": 0.12671445004866827, "grad_norm": 0.8645238416141345, "learning_rate": 0.00019941711156359554, "loss": 12.8375, "step": 2327 }, { "epoch": 0.12676890404525126, "grad_norm": 1.278825422993769, "learning_rate": 0.00019941616044805833, "loss": 12.6886, "step": 2328 }, { "epoch": 0.12682335804183428, "grad_norm": 0.7926814342834742, "learning_rate": 0.0001994152085594444, "loss": 12.8379, "step": 2329 }, { "epoch": 0.1268778120384173, "grad_norm": 0.8287504474355063, "learning_rate": 0.0001994142558977611, "loss": 12.6508, "step": 2330 }, { "epoch": 0.1269322660350003, "grad_norm": 1.0534286243495066, "learning_rate": 0.0001994133024630159, "loss": 12.7352, "step": 2331 }, { "epoch": 0.12698672003158332, "grad_norm": 0.7493620548408583, "learning_rate": 0.00019941234825521616, "loss": 12.7401, "step": 2332 }, { "epoch": 0.12704117402816634, "grad_norm": 0.8318357837689145, "learning_rate": 0.00019941139327436935, "loss": 12.652, "step": 2333 }, { "epoch": 0.12709562802474933, "grad_norm": 0.7715913785784512, "learning_rate": 0.0001994104375204829, "loss": 12.7926, "step": 2334 }, { "epoch": 0.12715008202133235, "grad_norm": 0.7542855191671053, "learning_rate": 0.00019940948099356418, "loss": 12.6063, "step": 2335 }, { "epoch": 0.12720453601791537, "grad_norm": 0.9658985909567381, "learning_rate": 0.00019940852369362068, "loss": 12.6083, "step": 2336 }, { "epoch": 0.12725899001449836, "grad_norm": 0.6851974060541999, "learning_rate": 0.00019940756562065982, "loss": 12.5249, "step": 2337 }, { "epoch": 0.12731344401108138, "grad_norm": 0.7785530678240814, "learning_rate": 0.00019940660677468904, "loss": 12.7989, "step": 2338 }, { "epoch": 0.1273678980076644, "grad_norm": 0.7379846325689866, "learning_rate": 0.00019940564715571586, "loss": 12.6929, "step": 2339 }, { "epoch": 0.12742235200424742, "grad_norm": 0.792901212371482, "learning_rate": 0.00019940468676374765, "loss": 12.779, "step": 2340 }, { "epoch": 0.12747680600083042, "grad_norm": 0.7337004708678737, "learning_rate": 0.000199403725598792, "loss": 12.6837, "step": 2341 }, { "epoch": 0.12753125999741344, "grad_norm": 0.7368460634198486, "learning_rate": 0.00019940276366085623, "loss": 12.6762, "step": 2342 }, { "epoch": 0.12758571399399646, "grad_norm": 0.7485113299926779, "learning_rate": 0.00019940180094994792, "loss": 12.6945, "step": 2343 }, { "epoch": 0.12764016799057945, "grad_norm": 0.7121957433105658, "learning_rate": 0.00019940083746607456, "loss": 12.6726, "step": 2344 }, { "epoch": 0.12769462198716247, "grad_norm": 1.5233087180492444, "learning_rate": 0.0001993998732092436, "loss": 12.6615, "step": 2345 }, { "epoch": 0.1277490759837455, "grad_norm": 0.7962138637784169, "learning_rate": 0.00019939890817946259, "loss": 12.6639, "step": 2346 }, { "epoch": 0.12780352998032848, "grad_norm": 0.7149436169520963, "learning_rate": 0.00019939794237673896, "loss": 12.7055, "step": 2347 }, { "epoch": 0.1278579839769115, "grad_norm": 0.7602346657839104, "learning_rate": 0.00019939697580108025, "loss": 12.7092, "step": 2348 }, { "epoch": 0.12791243797349452, "grad_norm": 0.751533657680629, "learning_rate": 0.00019939600845249403, "loss": 12.6796, "step": 2349 }, { "epoch": 0.12796689197007752, "grad_norm": 0.7980077399207712, "learning_rate": 0.00019939504033098776, "loss": 12.6877, "step": 2350 }, { "epoch": 0.12802134596666054, "grad_norm": 0.7532150415596107, "learning_rate": 0.000199394071436569, "loss": 12.7105, "step": 2351 }, { "epoch": 0.12807579996324356, "grad_norm": 0.7315030759058961, "learning_rate": 0.00019939310176924523, "loss": 12.646, "step": 2352 }, { "epoch": 0.12813025395982655, "grad_norm": 0.7864302503318793, "learning_rate": 0.00019939213132902408, "loss": 12.6704, "step": 2353 }, { "epoch": 0.12818470795640957, "grad_norm": 0.7913586628644312, "learning_rate": 0.00019939116011591303, "loss": 12.7471, "step": 2354 }, { "epoch": 0.1282391619529926, "grad_norm": 0.8119181526158408, "learning_rate": 0.00019939018812991966, "loss": 12.7335, "step": 2355 }, { "epoch": 0.1282936159495756, "grad_norm": 0.7735205615622864, "learning_rate": 0.00019938921537105152, "loss": 12.7511, "step": 2356 }, { "epoch": 0.1283480699461586, "grad_norm": 0.7968665962275414, "learning_rate": 0.00019938824183931617, "loss": 12.7516, "step": 2357 }, { "epoch": 0.12840252394274163, "grad_norm": 0.7338392610553128, "learning_rate": 0.00019938726753472116, "loss": 12.6123, "step": 2358 }, { "epoch": 0.12845697793932465, "grad_norm": 0.7827847883010737, "learning_rate": 0.00019938629245727413, "loss": 12.7633, "step": 2359 }, { "epoch": 0.12851143193590764, "grad_norm": 0.7450380140451616, "learning_rate": 0.00019938531660698258, "loss": 12.773, "step": 2360 }, { "epoch": 0.12856588593249066, "grad_norm": 0.7989051885239961, "learning_rate": 0.00019938433998385418, "loss": 12.6847, "step": 2361 }, { "epoch": 0.12862033992907368, "grad_norm": 0.7356763210194389, "learning_rate": 0.00019938336258789647, "loss": 12.655, "step": 2362 }, { "epoch": 0.12867479392565667, "grad_norm": 0.8986725123346193, "learning_rate": 0.00019938238441911705, "loss": 12.9576, "step": 2363 }, { "epoch": 0.1287292479222397, "grad_norm": 0.7481316704368125, "learning_rate": 0.00019938140547752354, "loss": 12.737, "step": 2364 }, { "epoch": 0.1287837019188227, "grad_norm": 0.7894228636204875, "learning_rate": 0.0001993804257631236, "loss": 12.7876, "step": 2365 }, { "epoch": 0.1288381559154057, "grad_norm": 0.8122936643074704, "learning_rate": 0.0001993794452759248, "loss": 12.7443, "step": 2366 }, { "epoch": 0.12889260991198873, "grad_norm": 0.7765834068020498, "learning_rate": 0.00019937846401593473, "loss": 12.8403, "step": 2367 }, { "epoch": 0.12894706390857175, "grad_norm": 0.6921187212435841, "learning_rate": 0.00019937748198316105, "loss": 12.6311, "step": 2368 }, { "epoch": 0.12900151790515474, "grad_norm": 0.7502571830149722, "learning_rate": 0.00019937649917761143, "loss": 12.6613, "step": 2369 }, { "epoch": 0.12905597190173776, "grad_norm": 0.7821551845777908, "learning_rate": 0.0001993755155992935, "loss": 12.8988, "step": 2370 }, { "epoch": 0.12911042589832078, "grad_norm": 0.7418220682205031, "learning_rate": 0.00019937453124821487, "loss": 12.6488, "step": 2371 }, { "epoch": 0.1291648798949038, "grad_norm": 0.8117136365721119, "learning_rate": 0.00019937354612438321, "loss": 12.6928, "step": 2372 }, { "epoch": 0.1292193338914868, "grad_norm": 0.7981778766751755, "learning_rate": 0.00019937256022780622, "loss": 12.8224, "step": 2373 }, { "epoch": 0.1292737878880698, "grad_norm": 0.7479712328362667, "learning_rate": 0.00019937157355849153, "loss": 12.6721, "step": 2374 }, { "epoch": 0.12932824188465283, "grad_norm": 0.7884826639891351, "learning_rate": 0.0001993705861164468, "loss": 12.8831, "step": 2375 }, { "epoch": 0.12938269588123583, "grad_norm": 0.7255531485760002, "learning_rate": 0.00019936959790167974, "loss": 12.6682, "step": 2376 }, { "epoch": 0.12943714987781885, "grad_norm": 0.7100455346830856, "learning_rate": 0.00019936860891419804, "loss": 12.669, "step": 2377 }, { "epoch": 0.12949160387440187, "grad_norm": 0.7806212789623882, "learning_rate": 0.00019936761915400936, "loss": 12.532, "step": 2378 }, { "epoch": 0.12954605787098486, "grad_norm": 0.7431760599900052, "learning_rate": 0.0001993666286211214, "loss": 12.6516, "step": 2379 }, { "epoch": 0.12960051186756788, "grad_norm": 0.7213877200770814, "learning_rate": 0.00019936563731554188, "loss": 12.6298, "step": 2380 }, { "epoch": 0.1296549658641509, "grad_norm": 0.8736262624845132, "learning_rate": 0.0001993646452372785, "loss": 12.8446, "step": 2381 }, { "epoch": 0.1297094198607339, "grad_norm": 0.7855509988563504, "learning_rate": 0.000199363652386339, "loss": 12.7075, "step": 2382 }, { "epoch": 0.1297638738573169, "grad_norm": 0.7299958322594098, "learning_rate": 0.00019936265876273104, "loss": 12.6899, "step": 2383 }, { "epoch": 0.12981832785389993, "grad_norm": 0.8168217259134454, "learning_rate": 0.00019936166436646237, "loss": 12.7358, "step": 2384 }, { "epoch": 0.12987278185048293, "grad_norm": 0.7768691061368659, "learning_rate": 0.00019936066919754077, "loss": 12.705, "step": 2385 }, { "epoch": 0.12992723584706595, "grad_norm": 0.8847332828074436, "learning_rate": 0.00019935967325597392, "loss": 12.7719, "step": 2386 }, { "epoch": 0.12998168984364897, "grad_norm": 0.7083908366634801, "learning_rate": 0.0001993586765417696, "loss": 12.7458, "step": 2387 }, { "epoch": 0.13003614384023196, "grad_norm": 0.7875102036153997, "learning_rate": 0.00019935767905493557, "loss": 12.6281, "step": 2388 }, { "epoch": 0.13009059783681498, "grad_norm": 0.7395792011876929, "learning_rate": 0.00019935668079547957, "loss": 12.6863, "step": 2389 }, { "epoch": 0.130145051833398, "grad_norm": 0.7621670186428621, "learning_rate": 0.00019935568176340928, "loss": 12.7617, "step": 2390 }, { "epoch": 0.13019950582998102, "grad_norm": 0.7847645774595151, "learning_rate": 0.00019935468195873262, "loss": 12.7119, "step": 2391 }, { "epoch": 0.13025395982656401, "grad_norm": 0.7375661269533667, "learning_rate": 0.00019935368138145727, "loss": 12.7158, "step": 2392 }, { "epoch": 0.13030841382314703, "grad_norm": 0.8126269955672756, "learning_rate": 0.00019935268003159101, "loss": 12.6302, "step": 2393 }, { "epoch": 0.13036286781973005, "grad_norm": 0.8268543789843175, "learning_rate": 0.0001993516779091417, "loss": 12.7678, "step": 2394 }, { "epoch": 0.13041732181631305, "grad_norm": 0.8205187707857627, "learning_rate": 0.00019935067501411705, "loss": 12.6628, "step": 2395 }, { "epoch": 0.13047177581289607, "grad_norm": 0.7378866568993926, "learning_rate": 0.0001993496713465249, "loss": 12.7025, "step": 2396 }, { "epoch": 0.1305262298094791, "grad_norm": 0.877940758716135, "learning_rate": 0.00019934866690637302, "loss": 12.72, "step": 2397 }, { "epoch": 0.13058068380606208, "grad_norm": 0.7267513693465012, "learning_rate": 0.00019934766169366929, "loss": 12.6185, "step": 2398 }, { "epoch": 0.1306351378026451, "grad_norm": 0.6846617254702555, "learning_rate": 0.0001993466557084214, "loss": 12.6491, "step": 2399 }, { "epoch": 0.13068959179922812, "grad_norm": 0.7491981620181035, "learning_rate": 0.00019934564895063734, "loss": 12.6363, "step": 2400 }, { "epoch": 0.13074404579581111, "grad_norm": 0.7751118812572666, "learning_rate": 0.00019934464142032482, "loss": 12.7001, "step": 2401 }, { "epoch": 0.13079849979239414, "grad_norm": 0.8264496185916214, "learning_rate": 0.00019934363311749172, "loss": 12.73, "step": 2402 }, { "epoch": 0.13085295378897716, "grad_norm": 0.8321637617252079, "learning_rate": 0.00019934262404214584, "loss": 12.6028, "step": 2403 }, { "epoch": 0.13090740778556015, "grad_norm": 0.7848027006827053, "learning_rate": 0.0001993416141942951, "loss": 12.9119, "step": 2404 }, { "epoch": 0.13096186178214317, "grad_norm": 0.857679708710815, "learning_rate": 0.00019934060357394725, "loss": 12.6421, "step": 2405 }, { "epoch": 0.1310163157787262, "grad_norm": 0.8055289289746956, "learning_rate": 0.00019933959218111026, "loss": 12.6846, "step": 2406 }, { "epoch": 0.1310707697753092, "grad_norm": 0.7747666351779556, "learning_rate": 0.0001993385800157919, "loss": 12.7859, "step": 2407 }, { "epoch": 0.1311252237718922, "grad_norm": 0.756568540020912, "learning_rate": 0.00019933756707800012, "loss": 12.8728, "step": 2408 }, { "epoch": 0.13117967776847522, "grad_norm": 0.7997659508246298, "learning_rate": 0.00019933655336774276, "loss": 12.7844, "step": 2409 }, { "epoch": 0.13123413176505824, "grad_norm": 0.8394640526431785, "learning_rate": 0.00019933553888502767, "loss": 12.7839, "step": 2410 }, { "epoch": 0.13128858576164124, "grad_norm": 0.8828481353034261, "learning_rate": 0.0001993345236298628, "loss": 12.6463, "step": 2411 }, { "epoch": 0.13134303975822426, "grad_norm": 0.7343617366920848, "learning_rate": 0.000199333507602256, "loss": 12.7517, "step": 2412 }, { "epoch": 0.13139749375480728, "grad_norm": 0.8459936383316501, "learning_rate": 0.0001993324908022152, "loss": 12.7808, "step": 2413 }, { "epoch": 0.13145194775139027, "grad_norm": 0.7608363034537843, "learning_rate": 0.00019933147322974827, "loss": 12.7196, "step": 2414 }, { "epoch": 0.1315064017479733, "grad_norm": 0.6668086521669773, "learning_rate": 0.00019933045488486318, "loss": 12.5531, "step": 2415 }, { "epoch": 0.1315608557445563, "grad_norm": 0.7002068468454027, "learning_rate": 0.00019932943576756777, "loss": 12.7032, "step": 2416 }, { "epoch": 0.1316153097411393, "grad_norm": 0.9000736480641097, "learning_rate": 0.00019932841587787006, "loss": 12.6522, "step": 2417 }, { "epoch": 0.13166976373772232, "grad_norm": 0.7721331574493356, "learning_rate": 0.0001993273952157779, "loss": 12.6671, "step": 2418 }, { "epoch": 0.13172421773430534, "grad_norm": 0.8308437806311205, "learning_rate": 0.00019932637378129926, "loss": 12.5842, "step": 2419 }, { "epoch": 0.13177867173088834, "grad_norm": 0.8219447869064825, "learning_rate": 0.00019932535157444206, "loss": 12.9223, "step": 2420 }, { "epoch": 0.13183312572747136, "grad_norm": 0.8560251642378786, "learning_rate": 0.00019932432859521432, "loss": 12.7095, "step": 2421 }, { "epoch": 0.13188757972405438, "grad_norm": 0.7308984604514989, "learning_rate": 0.00019932330484362392, "loss": 12.7873, "step": 2422 }, { "epoch": 0.1319420337206374, "grad_norm": 0.810039047049506, "learning_rate": 0.00019932228031967886, "loss": 12.6626, "step": 2423 }, { "epoch": 0.1319964877172204, "grad_norm": 0.7508769945880449, "learning_rate": 0.00019932125502338706, "loss": 12.7308, "step": 2424 }, { "epoch": 0.1320509417138034, "grad_norm": 0.756496840443002, "learning_rate": 0.00019932022895475653, "loss": 12.7622, "step": 2425 }, { "epoch": 0.13210539571038643, "grad_norm": 0.8579954679459876, "learning_rate": 0.00019931920211379526, "loss": 12.6203, "step": 2426 }, { "epoch": 0.13215984970696942, "grad_norm": 0.8188439639621717, "learning_rate": 0.00019931817450051121, "loss": 12.777, "step": 2427 }, { "epoch": 0.13221430370355244, "grad_norm": 0.7532868906855189, "learning_rate": 0.00019931714611491237, "loss": 12.8348, "step": 2428 }, { "epoch": 0.13226875770013546, "grad_norm": 0.7958028147926751, "learning_rate": 0.00019931611695700677, "loss": 12.694, "step": 2429 }, { "epoch": 0.13232321169671846, "grad_norm": 0.7814894757386551, "learning_rate": 0.00019931508702680236, "loss": 12.6925, "step": 2430 }, { "epoch": 0.13237766569330148, "grad_norm": 0.8324479778200059, "learning_rate": 0.00019931405632430722, "loss": 12.6425, "step": 2431 }, { "epoch": 0.1324321196898845, "grad_norm": 0.8092206373047455, "learning_rate": 0.00019931302484952926, "loss": 12.8163, "step": 2432 }, { "epoch": 0.1324865736864675, "grad_norm": 0.7377978092615135, "learning_rate": 0.00019931199260247664, "loss": 12.5553, "step": 2433 }, { "epoch": 0.1325410276830505, "grad_norm": 0.755796514648496, "learning_rate": 0.00019931095958315725, "loss": 12.7638, "step": 2434 }, { "epoch": 0.13259548167963353, "grad_norm": 0.8040857355178929, "learning_rate": 0.00019930992579157922, "loss": 12.7484, "step": 2435 }, { "epoch": 0.13264993567621652, "grad_norm": 0.7939845278937419, "learning_rate": 0.00019930889122775054, "loss": 12.5594, "step": 2436 }, { "epoch": 0.13270438967279954, "grad_norm": 0.7699514168539291, "learning_rate": 0.00019930785589167927, "loss": 12.7626, "step": 2437 }, { "epoch": 0.13275884366938256, "grad_norm": 1.033416481849656, "learning_rate": 0.00019930681978337343, "loss": 12.8065, "step": 2438 }, { "epoch": 0.13281329766596559, "grad_norm": 0.7417687328317938, "learning_rate": 0.00019930578290284115, "loss": 12.6928, "step": 2439 }, { "epoch": 0.13286775166254858, "grad_norm": 0.8562815513817471, "learning_rate": 0.00019930474525009043, "loss": 12.8132, "step": 2440 }, { "epoch": 0.1329222056591316, "grad_norm": 0.8832737109439536, "learning_rate": 0.00019930370682512936, "loss": 12.7486, "step": 2441 }, { "epoch": 0.13297665965571462, "grad_norm": 0.8655967771917551, "learning_rate": 0.000199302667627966, "loss": 12.7677, "step": 2442 }, { "epoch": 0.1330311136522976, "grad_norm": 0.8352025276927129, "learning_rate": 0.00019930162765860847, "loss": 12.6883, "step": 2443 }, { "epoch": 0.13308556764888063, "grad_norm": 0.7657269519474376, "learning_rate": 0.0001993005869170648, "loss": 12.639, "step": 2444 }, { "epoch": 0.13314002164546365, "grad_norm": 1.0200413938057031, "learning_rate": 0.00019929954540334315, "loss": 12.6656, "step": 2445 }, { "epoch": 0.13319447564204664, "grad_norm": 0.7208134883027584, "learning_rate": 0.00019929850311745155, "loss": 12.7271, "step": 2446 }, { "epoch": 0.13324892963862967, "grad_norm": 0.8273881026402172, "learning_rate": 0.00019929746005939813, "loss": 12.7771, "step": 2447 }, { "epoch": 0.13330338363521269, "grad_norm": 0.7493445587390899, "learning_rate": 0.00019929641622919104, "loss": 12.6896, "step": 2448 }, { "epoch": 0.13335783763179568, "grad_norm": 0.742375521459718, "learning_rate": 0.00019929537162683835, "loss": 12.6528, "step": 2449 }, { "epoch": 0.1334122916283787, "grad_norm": 0.9208561020185162, "learning_rate": 0.0001992943262523482, "loss": 12.7464, "step": 2450 }, { "epoch": 0.13346674562496172, "grad_norm": 0.7293788209577327, "learning_rate": 0.0001992932801057287, "loss": 12.6756, "step": 2451 }, { "epoch": 0.1335211996215447, "grad_norm": 0.9920197065465052, "learning_rate": 0.00019929223318698803, "loss": 12.7579, "step": 2452 }, { "epoch": 0.13357565361812773, "grad_norm": 0.9922516429306217, "learning_rate": 0.0001992911854961343, "loss": 12.6291, "step": 2453 }, { "epoch": 0.13363010761471075, "grad_norm": 0.8123075917723392, "learning_rate": 0.00019929013703317563, "loss": 12.7533, "step": 2454 }, { "epoch": 0.13368456161129375, "grad_norm": 0.8599656764223896, "learning_rate": 0.00019928908779812026, "loss": 12.6901, "step": 2455 }, { "epoch": 0.13373901560787677, "grad_norm": 0.8907052736792533, "learning_rate": 0.00019928803779097623, "loss": 12.7852, "step": 2456 }, { "epoch": 0.13379346960445979, "grad_norm": 0.8338265561480035, "learning_rate": 0.00019928698701175178, "loss": 12.6099, "step": 2457 }, { "epoch": 0.1338479236010428, "grad_norm": 0.9079405685140904, "learning_rate": 0.0001992859354604551, "loss": 12.6389, "step": 2458 }, { "epoch": 0.1339023775976258, "grad_norm": 0.8278803604932522, "learning_rate": 0.00019928488313709434, "loss": 12.7932, "step": 2459 }, { "epoch": 0.13395683159420882, "grad_norm": 0.8003952204297358, "learning_rate": 0.00019928383004167764, "loss": 12.7772, "step": 2460 }, { "epoch": 0.13401128559079184, "grad_norm": 0.672183505197408, "learning_rate": 0.00019928277617421326, "loss": 12.5482, "step": 2461 }, { "epoch": 0.13406573958737483, "grad_norm": 0.7396340985238629, "learning_rate": 0.00019928172153470933, "loss": 12.7081, "step": 2462 }, { "epoch": 0.13412019358395785, "grad_norm": 0.735938888362427, "learning_rate": 0.00019928066612317412, "loss": 12.5721, "step": 2463 }, { "epoch": 0.13417464758054087, "grad_norm": 0.7331259242018381, "learning_rate": 0.00019927960993961578, "loss": 12.5909, "step": 2464 }, { "epoch": 0.13422910157712387, "grad_norm": 0.8850945952602003, "learning_rate": 0.00019927855298404253, "loss": 12.7318, "step": 2465 }, { "epoch": 0.1342835555737069, "grad_norm": 0.7471688438849723, "learning_rate": 0.00019927749525646264, "loss": 12.6139, "step": 2466 }, { "epoch": 0.1343380095702899, "grad_norm": 0.8094440813264298, "learning_rate": 0.0001992764367568843, "loss": 12.8425, "step": 2467 }, { "epoch": 0.1343924635668729, "grad_norm": 0.8310365902118073, "learning_rate": 0.0001992753774853157, "loss": 12.652, "step": 2468 }, { "epoch": 0.13444691756345592, "grad_norm": 0.7309480519652607, "learning_rate": 0.00019927431744176514, "loss": 12.6415, "step": 2469 }, { "epoch": 0.13450137156003894, "grad_norm": 0.8196147699360571, "learning_rate": 0.00019927325662624082, "loss": 12.7008, "step": 2470 }, { "epoch": 0.13455582555662193, "grad_norm": 0.8298741408117133, "learning_rate": 0.00019927219503875103, "loss": 12.7277, "step": 2471 }, { "epoch": 0.13461027955320495, "grad_norm": 0.6554252989687059, "learning_rate": 0.00019927113267930398, "loss": 12.6143, "step": 2472 }, { "epoch": 0.13466473354978797, "grad_norm": 0.7473824587961554, "learning_rate": 0.000199270069547908, "loss": 12.6095, "step": 2473 }, { "epoch": 0.134719187546371, "grad_norm": 0.8160647428862018, "learning_rate": 0.00019926900564457128, "loss": 12.692, "step": 2474 }, { "epoch": 0.134773641542954, "grad_norm": 0.7492014235360235, "learning_rate": 0.0001992679409693021, "loss": 12.7103, "step": 2475 }, { "epoch": 0.134828095539537, "grad_norm": 0.839607875095279, "learning_rate": 0.0001992668755221088, "loss": 12.8071, "step": 2476 }, { "epoch": 0.13488254953612003, "grad_norm": 0.801927344699619, "learning_rate": 0.00019926580930299963, "loss": 12.7339, "step": 2477 }, { "epoch": 0.13493700353270302, "grad_norm": 0.8960165647196202, "learning_rate": 0.00019926474231198285, "loss": 12.6735, "step": 2478 }, { "epoch": 0.13499145752928604, "grad_norm": 0.7933722921821766, "learning_rate": 0.0001992636745490668, "loss": 12.7974, "step": 2479 }, { "epoch": 0.13504591152586906, "grad_norm": 0.9451088853717208, "learning_rate": 0.0001992626060142598, "loss": 12.5382, "step": 2480 }, { "epoch": 0.13510036552245205, "grad_norm": 0.8345545262101434, "learning_rate": 0.00019926153670757007, "loss": 12.7088, "step": 2481 }, { "epoch": 0.13515481951903507, "grad_norm": 0.7211833903776345, "learning_rate": 0.00019926046662900606, "loss": 12.6001, "step": 2482 }, { "epoch": 0.1352092735156181, "grad_norm": 0.8170867246642093, "learning_rate": 0.00019925939577857595, "loss": 12.7586, "step": 2483 }, { "epoch": 0.1352637275122011, "grad_norm": 0.8766455026502107, "learning_rate": 0.00019925832415628817, "loss": 12.8397, "step": 2484 }, { "epoch": 0.1353181815087841, "grad_norm": 0.8304795223103192, "learning_rate": 0.000199257251762151, "loss": 12.8194, "step": 2485 }, { "epoch": 0.13537263550536713, "grad_norm": 0.8258332528487458, "learning_rate": 0.0001992561785961728, "loss": 12.8221, "step": 2486 }, { "epoch": 0.13542708950195012, "grad_norm": 0.7722041721631009, "learning_rate": 0.0001992551046583619, "loss": 12.6855, "step": 2487 }, { "epoch": 0.13548154349853314, "grad_norm": 0.8196913899421888, "learning_rate": 0.00019925402994872666, "loss": 12.6272, "step": 2488 }, { "epoch": 0.13553599749511616, "grad_norm": 0.8220844770061915, "learning_rate": 0.00019925295446727548, "loss": 12.7254, "step": 2489 }, { "epoch": 0.13559045149169918, "grad_norm": 0.7441266743639062, "learning_rate": 0.00019925187821401663, "loss": 12.6433, "step": 2490 }, { "epoch": 0.13564490548828217, "grad_norm": 0.7922384453675508, "learning_rate": 0.00019925080118895857, "loss": 12.7242, "step": 2491 }, { "epoch": 0.1356993594848652, "grad_norm": 0.7615577137990498, "learning_rate": 0.00019924972339210962, "loss": 12.6124, "step": 2492 }, { "epoch": 0.13575381348144822, "grad_norm": 0.8763841639212409, "learning_rate": 0.00019924864482347813, "loss": 12.7149, "step": 2493 }, { "epoch": 0.1358082674780312, "grad_norm": 0.755861381321944, "learning_rate": 0.00019924756548307258, "loss": 12.7621, "step": 2494 }, { "epoch": 0.13586272147461423, "grad_norm": 0.7151226118739447, "learning_rate": 0.00019924648537090133, "loss": 12.5913, "step": 2495 }, { "epoch": 0.13591717547119725, "grad_norm": 0.7134477879040643, "learning_rate": 0.00019924540448697275, "loss": 12.7356, "step": 2496 }, { "epoch": 0.13597162946778024, "grad_norm": 0.7416597457191845, "learning_rate": 0.00019924432283129526, "loss": 12.6654, "step": 2497 }, { "epoch": 0.13602608346436326, "grad_norm": 0.8960485887635284, "learning_rate": 0.00019924324040387727, "loss": 12.6783, "step": 2498 }, { "epoch": 0.13608053746094628, "grad_norm": 0.8043344559108634, "learning_rate": 0.0001992421572047272, "loss": 12.6638, "step": 2499 }, { "epoch": 0.13613499145752928, "grad_norm": 0.8477672467383391, "learning_rate": 0.00019924107323385348, "loss": 12.6786, "step": 2500 }, { "epoch": 0.1361894454541123, "grad_norm": 0.8350506299141978, "learning_rate": 0.00019923998849126452, "loss": 12.7616, "step": 2501 }, { "epoch": 0.13624389945069532, "grad_norm": 0.755115387664609, "learning_rate": 0.0001992389029769688, "loss": 12.5811, "step": 2502 }, { "epoch": 0.1362983534472783, "grad_norm": 0.8296141248216201, "learning_rate": 0.00019923781669097467, "loss": 12.697, "step": 2503 }, { "epoch": 0.13635280744386133, "grad_norm": 0.777487444662502, "learning_rate": 0.00019923672963329068, "loss": 12.9226, "step": 2504 }, { "epoch": 0.13640726144044435, "grad_norm": 0.8009633102281982, "learning_rate": 0.00019923564180392522, "loss": 12.7052, "step": 2505 }, { "epoch": 0.13646171543702737, "grad_norm": 0.8660245138597285, "learning_rate": 0.00019923455320288678, "loss": 12.6371, "step": 2506 }, { "epoch": 0.13651616943361036, "grad_norm": 0.799333483782979, "learning_rate": 0.0001992334638301838, "loss": 12.5216, "step": 2507 }, { "epoch": 0.13657062343019338, "grad_norm": 0.7890530468567644, "learning_rate": 0.00019923237368582478, "loss": 12.7124, "step": 2508 }, { "epoch": 0.1366250774267764, "grad_norm": 0.8051322273979854, "learning_rate": 0.00019923128276981816, "loss": 12.7529, "step": 2509 }, { "epoch": 0.1366795314233594, "grad_norm": 0.7613188323047255, "learning_rate": 0.00019923019108217244, "loss": 12.6688, "step": 2510 }, { "epoch": 0.13673398541994242, "grad_norm": 0.7175554055582198, "learning_rate": 0.00019922909862289613, "loss": 12.6997, "step": 2511 }, { "epoch": 0.13678843941652544, "grad_norm": 0.7617772690976709, "learning_rate": 0.00019922800539199772, "loss": 12.7001, "step": 2512 }, { "epoch": 0.13684289341310843, "grad_norm": 0.8815313899645151, "learning_rate": 0.00019922691138948566, "loss": 12.7137, "step": 2513 }, { "epoch": 0.13689734740969145, "grad_norm": 0.6953988947363619, "learning_rate": 0.0001992258166153685, "loss": 12.6242, "step": 2514 }, { "epoch": 0.13695180140627447, "grad_norm": 0.7211808446872298, "learning_rate": 0.00019922472106965482, "loss": 12.6297, "step": 2515 }, { "epoch": 0.13700625540285746, "grad_norm": 0.7805289346970223, "learning_rate": 0.00019922362475235301, "loss": 12.7185, "step": 2516 }, { "epoch": 0.13706070939944048, "grad_norm": 0.7435082825109165, "learning_rate": 0.00019922252766347166, "loss": 12.6931, "step": 2517 }, { "epoch": 0.1371151633960235, "grad_norm": 0.7593964045770272, "learning_rate": 0.0001992214298030193, "loss": 12.7587, "step": 2518 }, { "epoch": 0.1371696173926065, "grad_norm": 1.0711067900247304, "learning_rate": 0.00019922033117100447, "loss": 12.7403, "step": 2519 }, { "epoch": 0.13722407138918952, "grad_norm": 0.8442713964578787, "learning_rate": 0.0001992192317674357, "loss": 12.6896, "step": 2520 }, { "epoch": 0.13727852538577254, "grad_norm": 0.7379985680047709, "learning_rate": 0.00019921813159232151, "loss": 12.5902, "step": 2521 }, { "epoch": 0.13733297938235553, "grad_norm": 0.641792785472628, "learning_rate": 0.00019921703064567056, "loss": 12.5831, "step": 2522 }, { "epoch": 0.13738743337893855, "grad_norm": 0.7085982767459995, "learning_rate": 0.0001992159289274913, "loss": 12.5308, "step": 2523 }, { "epoch": 0.13744188737552157, "grad_norm": 0.8689263472913269, "learning_rate": 0.00019921482643779235, "loss": 12.6573, "step": 2524 }, { "epoch": 0.1374963413721046, "grad_norm": 0.8102736269735316, "learning_rate": 0.00019921372317658224, "loss": 12.7661, "step": 2525 }, { "epoch": 0.13755079536868758, "grad_norm": 0.9204667558381068, "learning_rate": 0.00019921261914386963, "loss": 12.6912, "step": 2526 }, { "epoch": 0.1376052493652706, "grad_norm": 0.7654727932975922, "learning_rate": 0.000199211514339663, "loss": 12.7644, "step": 2527 }, { "epoch": 0.13765970336185362, "grad_norm": 1.0129557668524476, "learning_rate": 0.00019921040876397103, "loss": 12.7797, "step": 2528 }, { "epoch": 0.13771415735843662, "grad_norm": 0.7456783582541698, "learning_rate": 0.0001992093024168023, "loss": 12.6864, "step": 2529 }, { "epoch": 0.13776861135501964, "grad_norm": 0.7895332665858877, "learning_rate": 0.00019920819529816534, "loss": 12.686, "step": 2530 }, { "epoch": 0.13782306535160266, "grad_norm": 0.7372087027065335, "learning_rate": 0.0001992070874080689, "loss": 12.7634, "step": 2531 }, { "epoch": 0.13787751934818565, "grad_norm": 0.8003564961950486, "learning_rate": 0.00019920597874652143, "loss": 12.6631, "step": 2532 }, { "epoch": 0.13793197334476867, "grad_norm": 0.7813288190540522, "learning_rate": 0.0001992048693135317, "loss": 12.6787, "step": 2533 }, { "epoch": 0.1379864273413517, "grad_norm": 0.7680793879946148, "learning_rate": 0.0001992037591091082, "loss": 12.772, "step": 2534 }, { "epoch": 0.13804088133793468, "grad_norm": 0.7710086521748657, "learning_rate": 0.00019920264813325965, "loss": 12.6736, "step": 2535 }, { "epoch": 0.1380953353345177, "grad_norm": 0.7414826352875737, "learning_rate": 0.0001992015363859947, "loss": 12.7004, "step": 2536 }, { "epoch": 0.13814978933110073, "grad_norm": 0.8210043929211484, "learning_rate": 0.00019920042386732196, "loss": 12.6852, "step": 2537 }, { "epoch": 0.13820424332768372, "grad_norm": 0.7534293641843519, "learning_rate": 0.0001991993105772501, "loss": 12.6567, "step": 2538 }, { "epoch": 0.13825869732426674, "grad_norm": 0.8809000111534994, "learning_rate": 0.00019919819651578776, "loss": 12.7414, "step": 2539 }, { "epoch": 0.13831315132084976, "grad_norm": 0.7685448480984839, "learning_rate": 0.0001991970816829436, "loss": 12.686, "step": 2540 }, { "epoch": 0.13836760531743278, "grad_norm": 0.7667664978318831, "learning_rate": 0.0001991959660787263, "loss": 12.6296, "step": 2541 }, { "epoch": 0.13842205931401577, "grad_norm": 0.810234524969098, "learning_rate": 0.00019919484970314452, "loss": 12.7798, "step": 2542 }, { "epoch": 0.1384765133105988, "grad_norm": 0.7663707870937774, "learning_rate": 0.00019919373255620695, "loss": 12.6499, "step": 2543 }, { "epoch": 0.1385309673071818, "grad_norm": 0.7614050036552538, "learning_rate": 0.0001991926146379223, "loss": 12.7253, "step": 2544 }, { "epoch": 0.1385854213037648, "grad_norm": 0.7614062286590654, "learning_rate": 0.00019919149594829926, "loss": 12.6519, "step": 2545 }, { "epoch": 0.13863987530034783, "grad_norm": 0.7781884061851773, "learning_rate": 0.00019919037648734647, "loss": 12.7314, "step": 2546 }, { "epoch": 0.13869432929693085, "grad_norm": 0.7678996328527208, "learning_rate": 0.0001991892562550727, "loss": 12.628, "step": 2547 }, { "epoch": 0.13874878329351384, "grad_norm": 0.8359242351768541, "learning_rate": 0.00019918813525148665, "loss": 12.6445, "step": 2548 }, { "epoch": 0.13880323729009686, "grad_norm": 0.772133255022356, "learning_rate": 0.000199187013476597, "loss": 12.7278, "step": 2549 }, { "epoch": 0.13885769128667988, "grad_norm": 0.9285555529661113, "learning_rate": 0.0001991858909304125, "loss": 12.6475, "step": 2550 }, { "epoch": 0.13891214528326287, "grad_norm": 0.748614685498673, "learning_rate": 0.00019918476761294191, "loss": 12.6584, "step": 2551 }, { "epoch": 0.1389665992798459, "grad_norm": 0.9228432707306571, "learning_rate": 0.00019918364352419391, "loss": 12.6811, "step": 2552 }, { "epoch": 0.1390210532764289, "grad_norm": 0.8046804415097579, "learning_rate": 0.00019918251866417729, "loss": 12.5896, "step": 2553 }, { "epoch": 0.1390755072730119, "grad_norm": 0.841468945990471, "learning_rate": 0.00019918139303290073, "loss": 12.5972, "step": 2554 }, { "epoch": 0.13912996126959493, "grad_norm": 1.0219679946081544, "learning_rate": 0.00019918026663037305, "loss": 12.4939, "step": 2555 }, { "epoch": 0.13918441526617795, "grad_norm": 0.7614948596107192, "learning_rate": 0.000199179139456603, "loss": 12.7957, "step": 2556 }, { "epoch": 0.13923886926276097, "grad_norm": 1.125763541075716, "learning_rate": 0.00019917801151159931, "loss": 12.6848, "step": 2557 }, { "epoch": 0.13929332325934396, "grad_norm": 0.7167558919471144, "learning_rate": 0.00019917688279537076, "loss": 12.6508, "step": 2558 }, { "epoch": 0.13934777725592698, "grad_norm": 1.0221540450131401, "learning_rate": 0.00019917575330792616, "loss": 12.6141, "step": 2559 }, { "epoch": 0.13940223125251, "grad_norm": 0.863351781333219, "learning_rate": 0.00019917462304927424, "loss": 12.7706, "step": 2560 }, { "epoch": 0.139456685249093, "grad_norm": 0.845744534563339, "learning_rate": 0.00019917349201942385, "loss": 12.8127, "step": 2561 }, { "epoch": 0.139511139245676, "grad_norm": 0.8801687590672354, "learning_rate": 0.00019917236021838375, "loss": 12.7274, "step": 2562 }, { "epoch": 0.13956559324225903, "grad_norm": 0.7298449140222769, "learning_rate": 0.00019917122764616274, "loss": 12.779, "step": 2563 }, { "epoch": 0.13962004723884203, "grad_norm": 0.7832122595156031, "learning_rate": 0.00019917009430276962, "loss": 12.5244, "step": 2564 }, { "epoch": 0.13967450123542505, "grad_norm": 0.7527806598249291, "learning_rate": 0.00019916896018821323, "loss": 12.5743, "step": 2565 }, { "epoch": 0.13972895523200807, "grad_norm": 0.7631874231541864, "learning_rate": 0.0001991678253025024, "loss": 12.6855, "step": 2566 }, { "epoch": 0.13978340922859106, "grad_norm": 0.7998953837645839, "learning_rate": 0.0001991666896456459, "loss": 12.6305, "step": 2567 }, { "epoch": 0.13983786322517408, "grad_norm": 0.7844095551718671, "learning_rate": 0.00019916555321765258, "loss": 12.7029, "step": 2568 }, { "epoch": 0.1398923172217571, "grad_norm": 0.761532138143132, "learning_rate": 0.0001991644160185313, "loss": 12.6633, "step": 2569 }, { "epoch": 0.1399467712183401, "grad_norm": 0.7509558404118452, "learning_rate": 0.0001991632780482909, "loss": 12.7437, "step": 2570 }, { "epoch": 0.14000122521492311, "grad_norm": 0.7299977982300054, "learning_rate": 0.0001991621393069402, "loss": 12.5211, "step": 2571 }, { "epoch": 0.14005567921150613, "grad_norm": 0.8211312042072629, "learning_rate": 0.0001991609997944881, "loss": 12.7196, "step": 2572 }, { "epoch": 0.14011013320808915, "grad_norm": 0.735678027919518, "learning_rate": 0.00019915985951094342, "loss": 12.6854, "step": 2573 }, { "epoch": 0.14016458720467215, "grad_norm": 0.7765696874719373, "learning_rate": 0.00019915871845631506, "loss": 12.6538, "step": 2574 }, { "epoch": 0.14021904120125517, "grad_norm": 0.7117218844333242, "learning_rate": 0.00019915757663061188, "loss": 12.6685, "step": 2575 }, { "epoch": 0.1402734951978382, "grad_norm": 0.8627660601894691, "learning_rate": 0.00019915643403384272, "loss": 12.7439, "step": 2576 }, { "epoch": 0.14032794919442118, "grad_norm": 0.7356120518935028, "learning_rate": 0.00019915529066601652, "loss": 12.8082, "step": 2577 }, { "epoch": 0.1403824031910042, "grad_norm": 0.7130187369102607, "learning_rate": 0.00019915414652714217, "loss": 12.7221, "step": 2578 }, { "epoch": 0.14043685718758722, "grad_norm": 0.7332688257439224, "learning_rate": 0.00019915300161722852, "loss": 12.7258, "step": 2579 }, { "epoch": 0.14049131118417021, "grad_norm": 0.7630906052367811, "learning_rate": 0.00019915185593628453, "loss": 12.6905, "step": 2580 }, { "epoch": 0.14054576518075323, "grad_norm": 0.9138139283846225, "learning_rate": 0.00019915070948431905, "loss": 12.6803, "step": 2581 }, { "epoch": 0.14060021917733626, "grad_norm": 0.7234398462032869, "learning_rate": 0.00019914956226134103, "loss": 12.669, "step": 2582 }, { "epoch": 0.14065467317391925, "grad_norm": 0.7252327764689702, "learning_rate": 0.0001991484142673594, "loss": 12.6976, "step": 2583 }, { "epoch": 0.14070912717050227, "grad_norm": 0.7898203683406123, "learning_rate": 0.00019914726550238307, "loss": 12.8086, "step": 2584 }, { "epoch": 0.1407635811670853, "grad_norm": 0.752966658793422, "learning_rate": 0.00019914611596642096, "loss": 12.6919, "step": 2585 }, { "epoch": 0.14081803516366828, "grad_norm": 0.9067939979456567, "learning_rate": 0.00019914496565948207, "loss": 12.6859, "step": 2586 }, { "epoch": 0.1408724891602513, "grad_norm": 0.715271796852438, "learning_rate": 0.00019914381458157525, "loss": 12.6533, "step": 2587 }, { "epoch": 0.14092694315683432, "grad_norm": 0.763482898726477, "learning_rate": 0.00019914266273270953, "loss": 12.5664, "step": 2588 }, { "epoch": 0.14098139715341731, "grad_norm": 0.7376861678712301, "learning_rate": 0.0001991415101128938, "loss": 12.6123, "step": 2589 }, { "epoch": 0.14103585115000034, "grad_norm": 0.8084686482048893, "learning_rate": 0.00019914035672213712, "loss": 12.6054, "step": 2590 }, { "epoch": 0.14109030514658336, "grad_norm": 0.8438341417341487, "learning_rate": 0.00019913920256044837, "loss": 12.7978, "step": 2591 }, { "epoch": 0.14114475914316638, "grad_norm": 0.7656913542268167, "learning_rate": 0.00019913804762783656, "loss": 12.6378, "step": 2592 }, { "epoch": 0.14119921313974937, "grad_norm": 0.7226525503659902, "learning_rate": 0.00019913689192431065, "loss": 12.6956, "step": 2593 }, { "epoch": 0.1412536671363324, "grad_norm": 0.7566159616987096, "learning_rate": 0.00019913573544987968, "loss": 12.7507, "step": 2594 }, { "epoch": 0.1413081211329154, "grad_norm": 0.9359513791113954, "learning_rate": 0.00019913457820455258, "loss": 12.7758, "step": 2595 }, { "epoch": 0.1413625751294984, "grad_norm": 0.7716699789726437, "learning_rate": 0.00019913342018833835, "loss": 12.5928, "step": 2596 }, { "epoch": 0.14141702912608142, "grad_norm": 0.855705424725422, "learning_rate": 0.00019913226140124608, "loss": 12.6545, "step": 2597 }, { "epoch": 0.14147148312266444, "grad_norm": 0.7477286757754348, "learning_rate": 0.00019913110184328466, "loss": 12.6477, "step": 2598 }, { "epoch": 0.14152593711924744, "grad_norm": 0.7656128177871533, "learning_rate": 0.0001991299415144632, "loss": 12.6694, "step": 2599 }, { "epoch": 0.14158039111583046, "grad_norm": 0.8201103526357508, "learning_rate": 0.00019912878041479067, "loss": 12.7311, "step": 2600 }, { "epoch": 0.14163484511241348, "grad_norm": 0.7757724450627341, "learning_rate": 0.00019912761854427616, "loss": 12.7812, "step": 2601 }, { "epoch": 0.14168929910899647, "grad_norm": 0.7652702562645756, "learning_rate": 0.00019912645590292865, "loss": 12.8233, "step": 2602 }, { "epoch": 0.1417437531055795, "grad_norm": 0.8294807319246501, "learning_rate": 0.00019912529249075718, "loss": 12.606, "step": 2603 }, { "epoch": 0.1417982071021625, "grad_norm": 0.7085809011050793, "learning_rate": 0.0001991241283077708, "loss": 12.6265, "step": 2604 }, { "epoch": 0.1418526610987455, "grad_norm": 0.7706834778729315, "learning_rate": 0.00019912296335397863, "loss": 12.7192, "step": 2605 }, { "epoch": 0.14190711509532852, "grad_norm": 0.6831567962305096, "learning_rate": 0.00019912179762938964, "loss": 12.5742, "step": 2606 }, { "epoch": 0.14196156909191154, "grad_norm": 0.7245976056716881, "learning_rate": 0.0001991206311340129, "loss": 12.7203, "step": 2607 }, { "epoch": 0.14201602308849456, "grad_norm": 0.7092967526585179, "learning_rate": 0.00019911946386785755, "loss": 12.7227, "step": 2608 }, { "epoch": 0.14207047708507756, "grad_norm": 0.8299215978940133, "learning_rate": 0.0001991182958309326, "loss": 12.6424, "step": 2609 }, { "epoch": 0.14212493108166058, "grad_norm": 0.7427920778837738, "learning_rate": 0.00019911712702324716, "loss": 12.7091, "step": 2610 }, { "epoch": 0.1421793850782436, "grad_norm": 0.8184591293143213, "learning_rate": 0.00019911595744481034, "loss": 12.7721, "step": 2611 }, { "epoch": 0.1422338390748266, "grad_norm": 0.8594294714337313, "learning_rate": 0.00019911478709563123, "loss": 12.7675, "step": 2612 }, { "epoch": 0.1422882930714096, "grad_norm": 0.7905177187484469, "learning_rate": 0.00019911361597571887, "loss": 12.7313, "step": 2613 }, { "epoch": 0.14234274706799263, "grad_norm": 0.6542339698773333, "learning_rate": 0.00019911244408508241, "loss": 12.6825, "step": 2614 }, { "epoch": 0.14239720106457562, "grad_norm": 0.6864872742719939, "learning_rate": 0.000199111271423731, "loss": 12.7059, "step": 2615 }, { "epoch": 0.14245165506115864, "grad_norm": 0.8509600880961752, "learning_rate": 0.0001991100979916737, "loss": 12.825, "step": 2616 }, { "epoch": 0.14250610905774166, "grad_norm": 0.8027983418211375, "learning_rate": 0.00019910892378891966, "loss": 12.7105, "step": 2617 }, { "epoch": 0.14256056305432466, "grad_norm": 0.7234317331840034, "learning_rate": 0.000199107748815478, "loss": 12.6505, "step": 2618 }, { "epoch": 0.14261501705090768, "grad_norm": 0.7664867974063245, "learning_rate": 0.0001991065730713579, "loss": 12.7388, "step": 2619 }, { "epoch": 0.1426694710474907, "grad_norm": 0.7636742822664422, "learning_rate": 0.00019910539655656844, "loss": 12.6586, "step": 2620 }, { "epoch": 0.1427239250440737, "grad_norm": 0.785599042442663, "learning_rate": 0.0001991042192711188, "loss": 12.6612, "step": 2621 }, { "epoch": 0.1427783790406567, "grad_norm": 0.6841416305877701, "learning_rate": 0.00019910304121501811, "loss": 12.5579, "step": 2622 }, { "epoch": 0.14283283303723973, "grad_norm": 0.7405822791815138, "learning_rate": 0.00019910186238827557, "loss": 12.7123, "step": 2623 }, { "epoch": 0.14288728703382275, "grad_norm": 0.7209279711717004, "learning_rate": 0.00019910068279090036, "loss": 12.6044, "step": 2624 }, { "epoch": 0.14294174103040574, "grad_norm": 0.8261997294453759, "learning_rate": 0.0001990995024229016, "loss": 12.6075, "step": 2625 }, { "epoch": 0.14299619502698876, "grad_norm": 0.7613534265460365, "learning_rate": 0.00019909832128428846, "loss": 12.6447, "step": 2626 }, { "epoch": 0.14305064902357179, "grad_norm": 0.7563704893104031, "learning_rate": 0.0001990971393750702, "loss": 12.5855, "step": 2627 }, { "epoch": 0.14310510302015478, "grad_norm": 0.7399588497910308, "learning_rate": 0.00019909595669525594, "loss": 12.6363, "step": 2628 }, { "epoch": 0.1431595570167378, "grad_norm": 0.7423730889180716, "learning_rate": 0.0001990947732448549, "loss": 12.623, "step": 2629 }, { "epoch": 0.14321401101332082, "grad_norm": 0.7780993727686799, "learning_rate": 0.00019909358902387626, "loss": 12.6195, "step": 2630 }, { "epoch": 0.1432684650099038, "grad_norm": 0.695759998723745, "learning_rate": 0.0001990924040323293, "loss": 12.6837, "step": 2631 }, { "epoch": 0.14332291900648683, "grad_norm": 0.7818291616707426, "learning_rate": 0.0001990912182702232, "loss": 12.7589, "step": 2632 }, { "epoch": 0.14337737300306985, "grad_norm": 0.728177780364699, "learning_rate": 0.0001990900317375671, "loss": 12.6438, "step": 2633 }, { "epoch": 0.14343182699965284, "grad_norm": 0.7192789143055213, "learning_rate": 0.00019908884443437037, "loss": 12.7923, "step": 2634 }, { "epoch": 0.14348628099623587, "grad_norm": 0.7560552245070992, "learning_rate": 0.00019908765636064213, "loss": 12.5752, "step": 2635 }, { "epoch": 0.14354073499281889, "grad_norm": 0.7917742140949682, "learning_rate": 0.00019908646751639166, "loss": 12.7673, "step": 2636 }, { "epoch": 0.14359518898940188, "grad_norm": 0.7889192817451162, "learning_rate": 0.00019908527790162822, "loss": 12.7232, "step": 2637 }, { "epoch": 0.1436496429859849, "grad_norm": 0.7590576115205909, "learning_rate": 0.000199084087516361, "loss": 12.629, "step": 2638 }, { "epoch": 0.14370409698256792, "grad_norm": 0.7968942636474549, "learning_rate": 0.00019908289636059933, "loss": 12.7596, "step": 2639 }, { "epoch": 0.14375855097915094, "grad_norm": 0.7118388249750108, "learning_rate": 0.00019908170443435244, "loss": 12.6103, "step": 2640 }, { "epoch": 0.14381300497573393, "grad_norm": 0.7676927176741307, "learning_rate": 0.0001990805117376296, "loss": 12.659, "step": 2641 }, { "epoch": 0.14386745897231695, "grad_norm": 0.74628843718605, "learning_rate": 0.0001990793182704401, "loss": 12.6684, "step": 2642 }, { "epoch": 0.14392191296889997, "grad_norm": 0.765142973884477, "learning_rate": 0.00019907812403279314, "loss": 12.6766, "step": 2643 }, { "epoch": 0.14397636696548297, "grad_norm": 1.120038400913752, "learning_rate": 0.00019907692902469814, "loss": 12.7644, "step": 2644 }, { "epoch": 0.144030820962066, "grad_norm": 0.8613421582516949, "learning_rate": 0.0001990757332461643, "loss": 12.7654, "step": 2645 }, { "epoch": 0.144085274958649, "grad_norm": 0.7597908331512231, "learning_rate": 0.00019907453669720096, "loss": 12.732, "step": 2646 }, { "epoch": 0.144139728955232, "grad_norm": 0.9086438951971259, "learning_rate": 0.0001990733393778174, "loss": 12.6952, "step": 2647 }, { "epoch": 0.14419418295181502, "grad_norm": 0.735107309446148, "learning_rate": 0.00019907214128802293, "loss": 12.6305, "step": 2648 }, { "epoch": 0.14424863694839804, "grad_norm": 0.8510749009659634, "learning_rate": 0.0001990709424278269, "loss": 12.6497, "step": 2649 }, { "epoch": 0.14430309094498103, "grad_norm": 2.553487336315287, "learning_rate": 0.00019906974279723858, "loss": 12.859, "step": 2650 }, { "epoch": 0.14435754494156405, "grad_norm": 0.8497281216272348, "learning_rate": 0.00019906854239626733, "loss": 12.6034, "step": 2651 }, { "epoch": 0.14441199893814707, "grad_norm": 0.7741178450896294, "learning_rate": 0.0001990673412249225, "loss": 12.7448, "step": 2652 }, { "epoch": 0.14446645293473007, "grad_norm": 0.8050047338057631, "learning_rate": 0.00019906613928321338, "loss": 12.5269, "step": 2653 }, { "epoch": 0.1445209069313131, "grad_norm": 0.764721188085812, "learning_rate": 0.00019906493657114935, "loss": 12.8101, "step": 2654 }, { "epoch": 0.1445753609278961, "grad_norm": 0.9205425051456191, "learning_rate": 0.0001990637330887398, "loss": 12.6569, "step": 2655 }, { "epoch": 0.1446298149244791, "grad_norm": 0.7721467979457826, "learning_rate": 0.00019906252883599402, "loss": 12.7271, "step": 2656 }, { "epoch": 0.14468426892106212, "grad_norm": 0.7858920669134861, "learning_rate": 0.0001990613238129214, "loss": 12.8252, "step": 2657 }, { "epoch": 0.14473872291764514, "grad_norm": 0.765720003106791, "learning_rate": 0.0001990601180195313, "loss": 12.7792, "step": 2658 }, { "epoch": 0.14479317691422816, "grad_norm": 0.7591642681701061, "learning_rate": 0.00019905891145583312, "loss": 12.6342, "step": 2659 }, { "epoch": 0.14484763091081115, "grad_norm": 0.7738866144071866, "learning_rate": 0.00019905770412183626, "loss": 12.7038, "step": 2660 }, { "epoch": 0.14490208490739417, "grad_norm": 0.7751819170403272, "learning_rate": 0.00019905649601755006, "loss": 12.7029, "step": 2661 }, { "epoch": 0.1449565389039772, "grad_norm": 1.037253868229172, "learning_rate": 0.00019905528714298393, "loss": 12.6876, "step": 2662 }, { "epoch": 0.1450109929005602, "grad_norm": 0.8228831140879636, "learning_rate": 0.00019905407749814727, "loss": 12.6693, "step": 2663 }, { "epoch": 0.1450654468971432, "grad_norm": 0.7472181487251265, "learning_rate": 0.0001990528670830495, "loss": 12.5575, "step": 2664 }, { "epoch": 0.14511990089372623, "grad_norm": 0.8676633248694123, "learning_rate": 0.00019905165589770003, "loss": 12.6504, "step": 2665 }, { "epoch": 0.14517435489030922, "grad_norm": 0.833166831577227, "learning_rate": 0.00019905044394210827, "loss": 12.8559, "step": 2666 }, { "epoch": 0.14522880888689224, "grad_norm": 0.7431818143738881, "learning_rate": 0.00019904923121628367, "loss": 12.599, "step": 2667 }, { "epoch": 0.14528326288347526, "grad_norm": 0.9159990154597104, "learning_rate": 0.0001990480177202356, "loss": 12.7133, "step": 2668 }, { "epoch": 0.14533771688005825, "grad_norm": 0.7690112766314912, "learning_rate": 0.00019904680345397355, "loss": 12.7107, "step": 2669 }, { "epoch": 0.14539217087664127, "grad_norm": 0.7763570974348, "learning_rate": 0.00019904558841750696, "loss": 12.797, "step": 2670 }, { "epoch": 0.1454466248732243, "grad_norm": 0.885475603482203, "learning_rate": 0.00019904437261084526, "loss": 12.648, "step": 2671 }, { "epoch": 0.1455010788698073, "grad_norm": 0.7985814589081149, "learning_rate": 0.00019904315603399788, "loss": 12.9738, "step": 2672 }, { "epoch": 0.1455555328663903, "grad_norm": 0.937349046219652, "learning_rate": 0.00019904193868697432, "loss": 12.7084, "step": 2673 }, { "epoch": 0.14560998686297333, "grad_norm": 0.7784287617162705, "learning_rate": 0.00019904072056978404, "loss": 12.6306, "step": 2674 }, { "epoch": 0.14566444085955635, "grad_norm": 0.6986676816712994, "learning_rate": 0.00019903950168243654, "loss": 12.6791, "step": 2675 }, { "epoch": 0.14571889485613934, "grad_norm": 0.7982549576873136, "learning_rate": 0.0001990382820249412, "loss": 12.5907, "step": 2676 }, { "epoch": 0.14577334885272236, "grad_norm": 0.7865765018957946, "learning_rate": 0.00019903706159730763, "loss": 12.683, "step": 2677 }, { "epoch": 0.14582780284930538, "grad_norm": 0.7727965652112779, "learning_rate": 0.00019903584039954525, "loss": 12.73, "step": 2678 }, { "epoch": 0.14588225684588838, "grad_norm": 0.7201995605130986, "learning_rate": 0.00019903461843166352, "loss": 12.5938, "step": 2679 }, { "epoch": 0.1459367108424714, "grad_norm": 0.776229206051119, "learning_rate": 0.00019903339569367202, "loss": 12.6367, "step": 2680 }, { "epoch": 0.14599116483905442, "grad_norm": 0.7445473575011761, "learning_rate": 0.00019903217218558025, "loss": 12.7577, "step": 2681 }, { "epoch": 0.1460456188356374, "grad_norm": 0.7552656467499095, "learning_rate": 0.0001990309479073977, "loss": 12.7018, "step": 2682 }, { "epoch": 0.14610007283222043, "grad_norm": 0.7021799060374931, "learning_rate": 0.00019902972285913386, "loss": 12.5492, "step": 2683 }, { "epoch": 0.14615452682880345, "grad_norm": 0.7101796742359953, "learning_rate": 0.0001990284970407983, "loss": 12.6654, "step": 2684 }, { "epoch": 0.14620898082538644, "grad_norm": 0.7770953225119109, "learning_rate": 0.00019902727045240055, "loss": 12.7807, "step": 2685 }, { "epoch": 0.14626343482196946, "grad_norm": 0.8205011594444994, "learning_rate": 0.0001990260430939501, "loss": 12.7381, "step": 2686 }, { "epoch": 0.14631788881855248, "grad_norm": 0.8011777697339707, "learning_rate": 0.00019902481496545657, "loss": 12.6545, "step": 2687 }, { "epoch": 0.14637234281513548, "grad_norm": 0.7561686468407363, "learning_rate": 0.0001990235860669295, "loss": 12.7167, "step": 2688 }, { "epoch": 0.1464267968117185, "grad_norm": 0.7239528910960851, "learning_rate": 0.00019902235639837837, "loss": 12.7289, "step": 2689 }, { "epoch": 0.14648125080830152, "grad_norm": 0.7739801417470364, "learning_rate": 0.0001990211259598128, "loss": 12.7053, "step": 2690 }, { "epoch": 0.14653570480488454, "grad_norm": 0.8893441290926933, "learning_rate": 0.00019901989475124235, "loss": 12.7693, "step": 2691 }, { "epoch": 0.14659015880146753, "grad_norm": 0.7881993519858137, "learning_rate": 0.00019901866277267658, "loss": 12.7999, "step": 2692 }, { "epoch": 0.14664461279805055, "grad_norm": 0.7960471770020839, "learning_rate": 0.0001990174300241251, "loss": 12.7491, "step": 2693 }, { "epoch": 0.14669906679463357, "grad_norm": 0.7753083056275317, "learning_rate": 0.00019901619650559749, "loss": 12.7001, "step": 2694 }, { "epoch": 0.14675352079121656, "grad_norm": 0.7696722499154163, "learning_rate": 0.00019901496221710332, "loss": 12.6874, "step": 2695 }, { "epoch": 0.14680797478779958, "grad_norm": 0.767633402938913, "learning_rate": 0.00019901372715865217, "loss": 12.6353, "step": 2696 }, { "epoch": 0.1468624287843826, "grad_norm": 0.7438423585527448, "learning_rate": 0.0001990124913302537, "loss": 12.7425, "step": 2697 }, { "epoch": 0.1469168827809656, "grad_norm": 0.7757341838499866, "learning_rate": 0.0001990112547319175, "loss": 12.8279, "step": 2698 }, { "epoch": 0.14697133677754862, "grad_norm": 0.8258565883469542, "learning_rate": 0.00019901001736365317, "loss": 12.6837, "step": 2699 }, { "epoch": 0.14702579077413164, "grad_norm": 0.8004203175040449, "learning_rate": 0.00019900877922547034, "loss": 12.7775, "step": 2700 }, { "epoch": 0.14708024477071463, "grad_norm": 0.7947067824549463, "learning_rate": 0.00019900754031737866, "loss": 12.7518, "step": 2701 }, { "epoch": 0.14713469876729765, "grad_norm": 0.9069636423116467, "learning_rate": 0.00019900630063938773, "loss": 12.7515, "step": 2702 }, { "epoch": 0.14718915276388067, "grad_norm": 0.7338969251876433, "learning_rate": 0.00019900506019150717, "loss": 12.6569, "step": 2703 }, { "epoch": 0.14724360676046366, "grad_norm": 0.747056261107894, "learning_rate": 0.00019900381897374668, "loss": 12.6114, "step": 2704 }, { "epoch": 0.14729806075704668, "grad_norm": 0.792429734575613, "learning_rate": 0.0001990025769861159, "loss": 12.7098, "step": 2705 }, { "epoch": 0.1473525147536297, "grad_norm": 0.7958552148528896, "learning_rate": 0.0001990013342286245, "loss": 12.6546, "step": 2706 }, { "epoch": 0.14740696875021272, "grad_norm": 0.7635541867716035, "learning_rate": 0.00019900009070128208, "loss": 12.6584, "step": 2707 }, { "epoch": 0.14746142274679572, "grad_norm": 0.820738536195993, "learning_rate": 0.00019899884640409837, "loss": 12.8891, "step": 2708 }, { "epoch": 0.14751587674337874, "grad_norm": 0.678436148866974, "learning_rate": 0.00019899760133708304, "loss": 12.7063, "step": 2709 }, { "epoch": 0.14757033073996176, "grad_norm": 0.7338891818564695, "learning_rate": 0.00019899635550024573, "loss": 12.6804, "step": 2710 }, { "epoch": 0.14762478473654475, "grad_norm": 0.7677957679961459, "learning_rate": 0.0001989951088935962, "loss": 12.5545, "step": 2711 }, { "epoch": 0.14767923873312777, "grad_norm": 0.7455124550308035, "learning_rate": 0.00019899386151714407, "loss": 12.7571, "step": 2712 }, { "epoch": 0.1477336927297108, "grad_norm": 0.7554064266083648, "learning_rate": 0.00019899261337089907, "loss": 12.7199, "step": 2713 }, { "epoch": 0.14778814672629378, "grad_norm": 0.7000666033089813, "learning_rate": 0.0001989913644548709, "loss": 12.7881, "step": 2714 }, { "epoch": 0.1478426007228768, "grad_norm": 0.7409220000487262, "learning_rate": 0.00019899011476906932, "loss": 12.6374, "step": 2715 }, { "epoch": 0.14789705471945983, "grad_norm": 0.7127590442514861, "learning_rate": 0.00019898886431350397, "loss": 12.6687, "step": 2716 }, { "epoch": 0.14795150871604282, "grad_norm": 0.8103966832252935, "learning_rate": 0.0001989876130881846, "loss": 12.6737, "step": 2717 }, { "epoch": 0.14800596271262584, "grad_norm": 0.7294411546682413, "learning_rate": 0.000198986361093121, "loss": 12.6861, "step": 2718 }, { "epoch": 0.14806041670920886, "grad_norm": 0.7964709794829142, "learning_rate": 0.0001989851083283228, "loss": 12.5385, "step": 2719 }, { "epoch": 0.14811487070579185, "grad_norm": 0.7113450423677704, "learning_rate": 0.00019898385479379986, "loss": 12.601, "step": 2720 }, { "epoch": 0.14816932470237487, "grad_norm": 0.7822473870313964, "learning_rate": 0.00019898260048956183, "loss": 12.603, "step": 2721 }, { "epoch": 0.1482237786989579, "grad_norm": 0.8135540599153726, "learning_rate": 0.0001989813454156185, "loss": 12.6545, "step": 2722 }, { "epoch": 0.14827823269554088, "grad_norm": 0.8233043530388209, "learning_rate": 0.00019898008957197966, "loss": 12.7226, "step": 2723 }, { "epoch": 0.1483326866921239, "grad_norm": 0.9156143678477184, "learning_rate": 0.000198978832958655, "loss": 12.7056, "step": 2724 }, { "epoch": 0.14838714068870693, "grad_norm": 0.810162685105311, "learning_rate": 0.00019897757557565436, "loss": 12.682, "step": 2725 }, { "epoch": 0.14844159468528995, "grad_norm": 0.8342449403585397, "learning_rate": 0.00019897631742298746, "loss": 12.7605, "step": 2726 }, { "epoch": 0.14849604868187294, "grad_norm": 0.7489492357339741, "learning_rate": 0.00019897505850066414, "loss": 12.6675, "step": 2727 }, { "epoch": 0.14855050267845596, "grad_norm": 0.8945862280358247, "learning_rate": 0.00019897379880869418, "loss": 12.7221, "step": 2728 }, { "epoch": 0.14860495667503898, "grad_norm": 0.8578394375204317, "learning_rate": 0.00019897253834708735, "loss": 12.6227, "step": 2729 }, { "epoch": 0.14865941067162197, "grad_norm": 0.8320179117170514, "learning_rate": 0.00019897127711585347, "loss": 12.7534, "step": 2730 }, { "epoch": 0.148713864668205, "grad_norm": 0.913571833439365, "learning_rate": 0.00019897001511500232, "loss": 12.6558, "step": 2731 }, { "epoch": 0.148768318664788, "grad_norm": 0.7032303064815117, "learning_rate": 0.00019896875234454378, "loss": 12.6742, "step": 2732 }, { "epoch": 0.148822772661371, "grad_norm": 0.863979360450013, "learning_rate": 0.0001989674888044876, "loss": 12.6833, "step": 2733 }, { "epoch": 0.14887722665795403, "grad_norm": 0.8477605359647903, "learning_rate": 0.00019896622449484363, "loss": 12.724, "step": 2734 }, { "epoch": 0.14893168065453705, "grad_norm": 0.7009812417053154, "learning_rate": 0.00019896495941562167, "loss": 12.477, "step": 2735 }, { "epoch": 0.14898613465112004, "grad_norm": 1.1028258025088578, "learning_rate": 0.00019896369356683165, "loss": 12.7799, "step": 2736 }, { "epoch": 0.14904058864770306, "grad_norm": 0.823652454643868, "learning_rate": 0.00019896242694848333, "loss": 12.7377, "step": 2737 }, { "epoch": 0.14909504264428608, "grad_norm": 0.9733281948709586, "learning_rate": 0.00019896115956058655, "loss": 12.7747, "step": 2738 }, { "epoch": 0.14914949664086907, "grad_norm": 0.7976004474814257, "learning_rate": 0.00019895989140315123, "loss": 12.8456, "step": 2739 }, { "epoch": 0.1492039506374521, "grad_norm": 0.9644150577357924, "learning_rate": 0.0001989586224761872, "loss": 12.7945, "step": 2740 }, { "epoch": 0.1492584046340351, "grad_norm": 0.8759892203051738, "learning_rate": 0.0001989573527797043, "loss": 12.7189, "step": 2741 }, { "epoch": 0.14931285863061813, "grad_norm": 0.8336760294714186, "learning_rate": 0.00019895608231371246, "loss": 12.6849, "step": 2742 }, { "epoch": 0.14936731262720113, "grad_norm": 0.8626112894874401, "learning_rate": 0.0001989548110782215, "loss": 12.733, "step": 2743 }, { "epoch": 0.14942176662378415, "grad_norm": 0.7318897808235327, "learning_rate": 0.00019895353907324137, "loss": 12.6482, "step": 2744 }, { "epoch": 0.14947622062036717, "grad_norm": 0.9435418862621813, "learning_rate": 0.0001989522662987819, "loss": 12.788, "step": 2745 }, { "epoch": 0.14953067461695016, "grad_norm": 0.8022696319211658, "learning_rate": 0.000198950992754853, "loss": 12.438, "step": 2746 }, { "epoch": 0.14958512861353318, "grad_norm": 0.7575661421408524, "learning_rate": 0.00019894971844146463, "loss": 12.7393, "step": 2747 }, { "epoch": 0.1496395826101162, "grad_norm": 0.7805792001603048, "learning_rate": 0.00019894844335862662, "loss": 12.657, "step": 2748 }, { "epoch": 0.1496940366066992, "grad_norm": 0.8126920101810677, "learning_rate": 0.00019894716750634892, "loss": 12.8064, "step": 2749 }, { "epoch": 0.1497484906032822, "grad_norm": 0.8474201711084006, "learning_rate": 0.00019894589088464146, "loss": 12.754, "step": 2750 }, { "epoch": 0.14980294459986523, "grad_norm": 0.7755472736544625, "learning_rate": 0.00019894461349351415, "loss": 12.822, "step": 2751 }, { "epoch": 0.14985739859644823, "grad_norm": 0.7593805950832129, "learning_rate": 0.00019894333533297694, "loss": 12.6108, "step": 2752 }, { "epoch": 0.14991185259303125, "grad_norm": 0.7520301260416404, "learning_rate": 0.0001989420564030398, "loss": 12.6183, "step": 2753 }, { "epoch": 0.14996630658961427, "grad_norm": 0.749234270126392, "learning_rate": 0.0001989407767037126, "loss": 12.6935, "step": 2754 }, { "epoch": 0.15002076058619726, "grad_norm": 0.8756663060744522, "learning_rate": 0.00019893949623500534, "loss": 12.7225, "step": 2755 }, { "epoch": 0.15007521458278028, "grad_norm": 0.7676286733455003, "learning_rate": 0.00019893821499692793, "loss": 12.7744, "step": 2756 }, { "epoch": 0.1501296685793633, "grad_norm": 0.7605119624902635, "learning_rate": 0.0001989369329894904, "loss": 12.6357, "step": 2757 }, { "epoch": 0.15018412257594632, "grad_norm": 0.8580930249417181, "learning_rate": 0.00019893565021270268, "loss": 12.6745, "step": 2758 }, { "epoch": 0.15023857657252931, "grad_norm": 0.7583813012738259, "learning_rate": 0.00019893436666657474, "loss": 12.7093, "step": 2759 }, { "epoch": 0.15029303056911233, "grad_norm": 0.8563488809263123, "learning_rate": 0.0001989330823511166, "loss": 12.7741, "step": 2760 }, { "epoch": 0.15034748456569536, "grad_norm": 0.7543344960375925, "learning_rate": 0.00019893179726633822, "loss": 12.6322, "step": 2761 }, { "epoch": 0.15040193856227835, "grad_norm": 0.8365295252292132, "learning_rate": 0.0001989305114122496, "loss": 12.6755, "step": 2762 }, { "epoch": 0.15045639255886137, "grad_norm": 0.717356381989756, "learning_rate": 0.00019892922478886068, "loss": 12.6843, "step": 2763 }, { "epoch": 0.1505108465554444, "grad_norm": 0.7970472395126367, "learning_rate": 0.00019892793739618157, "loss": 12.7954, "step": 2764 }, { "epoch": 0.15056530055202738, "grad_norm": 0.8229387020294725, "learning_rate": 0.0001989266492342222, "loss": 12.6806, "step": 2765 }, { "epoch": 0.1506197545486104, "grad_norm": 0.7299889919023793, "learning_rate": 0.00019892536030299262, "loss": 12.7584, "step": 2766 }, { "epoch": 0.15067420854519342, "grad_norm": 0.8202907454446939, "learning_rate": 0.00019892407060250286, "loss": 12.8171, "step": 2767 }, { "epoch": 0.15072866254177641, "grad_norm": 0.7692958776460064, "learning_rate": 0.00019892278013276292, "loss": 12.7063, "step": 2768 }, { "epoch": 0.15078311653835944, "grad_norm": 0.7407039841520368, "learning_rate": 0.0001989214888937829, "loss": 12.6678, "step": 2769 }, { "epoch": 0.15083757053494246, "grad_norm": 0.6944323470913601, "learning_rate": 0.00019892019688557273, "loss": 12.6697, "step": 2770 }, { "epoch": 0.15089202453152545, "grad_norm": 0.9503595753704032, "learning_rate": 0.00019891890410814257, "loss": 12.6659, "step": 2771 }, { "epoch": 0.15094647852810847, "grad_norm": 0.7002113714765726, "learning_rate": 0.0001989176105615024, "loss": 12.5938, "step": 2772 }, { "epoch": 0.1510009325246915, "grad_norm": 0.7207990649669148, "learning_rate": 0.0001989163162456623, "loss": 12.6238, "step": 2773 }, { "epoch": 0.1510553865212745, "grad_norm": 0.7609110684132062, "learning_rate": 0.00019891502116063233, "loss": 12.5131, "step": 2774 }, { "epoch": 0.1511098405178575, "grad_norm": 0.7841808488718762, "learning_rate": 0.00019891372530642256, "loss": 12.7415, "step": 2775 }, { "epoch": 0.15116429451444052, "grad_norm": 0.803554084916481, "learning_rate": 0.0001989124286830431, "loss": 12.848, "step": 2776 }, { "epoch": 0.15121874851102354, "grad_norm": 0.7090519416935699, "learning_rate": 0.00019891113129050402, "loss": 12.6934, "step": 2777 }, { "epoch": 0.15127320250760654, "grad_norm": 0.9183418134038173, "learning_rate": 0.00019890983312881538, "loss": 12.6013, "step": 2778 }, { "epoch": 0.15132765650418956, "grad_norm": 0.6772283912015566, "learning_rate": 0.00019890853419798728, "loss": 12.6931, "step": 2779 }, { "epoch": 0.15138211050077258, "grad_norm": 0.7876816429288338, "learning_rate": 0.0001989072344980298, "loss": 12.753, "step": 2780 }, { "epoch": 0.15143656449735557, "grad_norm": 0.6960728512524404, "learning_rate": 0.00019890593402895312, "loss": 12.606, "step": 2781 }, { "epoch": 0.1514910184939386, "grad_norm": 0.7555357427749649, "learning_rate": 0.00019890463279076731, "loss": 12.5668, "step": 2782 }, { "epoch": 0.1515454724905216, "grad_norm": 0.7010977415108224, "learning_rate": 0.00019890333078348248, "loss": 12.6256, "step": 2783 }, { "epoch": 0.1515999264871046, "grad_norm": 0.7232246106573289, "learning_rate": 0.00019890202800710877, "loss": 12.7359, "step": 2784 }, { "epoch": 0.15165438048368762, "grad_norm": 0.809163964335571, "learning_rate": 0.0001989007244616563, "loss": 12.8173, "step": 2785 }, { "epoch": 0.15170883448027064, "grad_norm": 0.790540688648389, "learning_rate": 0.0001988994201471352, "loss": 12.5647, "step": 2786 }, { "epoch": 0.15176328847685364, "grad_norm": 0.6886477145769496, "learning_rate": 0.00019889811506355564, "loss": 12.5209, "step": 2787 }, { "epoch": 0.15181774247343666, "grad_norm": 0.6892518048460101, "learning_rate": 0.00019889680921092776, "loss": 12.5624, "step": 2788 }, { "epoch": 0.15187219647001968, "grad_norm": 0.7063700559267442, "learning_rate": 0.0001988955025892617, "loss": 12.6218, "step": 2789 }, { "epoch": 0.15192665046660267, "grad_norm": 0.7893300858011191, "learning_rate": 0.0001988941951985676, "loss": 12.641, "step": 2790 }, { "epoch": 0.1519811044631857, "grad_norm": 0.7314835383366215, "learning_rate": 0.0001988928870388557, "loss": 12.5973, "step": 2791 }, { "epoch": 0.1520355584597687, "grad_norm": 0.7727702514239536, "learning_rate": 0.00019889157811013607, "loss": 12.6172, "step": 2792 }, { "epoch": 0.15209001245635173, "grad_norm": 0.7357199420721806, "learning_rate": 0.000198890268412419, "loss": 12.6283, "step": 2793 }, { "epoch": 0.15214446645293472, "grad_norm": 0.7029293740093456, "learning_rate": 0.00019888895794571457, "loss": 12.7138, "step": 2794 }, { "epoch": 0.15219892044951774, "grad_norm": 0.8140800206572754, "learning_rate": 0.00019888764671003304, "loss": 12.7915, "step": 2795 }, { "epoch": 0.15225337444610076, "grad_norm": 0.7189172652761978, "learning_rate": 0.0001988863347053846, "loss": 12.6233, "step": 2796 }, { "epoch": 0.15230782844268376, "grad_norm": 0.6867836990626499, "learning_rate": 0.00019888502193177944, "loss": 12.671, "step": 2797 }, { "epoch": 0.15236228243926678, "grad_norm": 0.7207526699098658, "learning_rate": 0.00019888370838922774, "loss": 12.6913, "step": 2798 }, { "epoch": 0.1524167364358498, "grad_norm": 0.834167090031831, "learning_rate": 0.00019888239407773973, "loss": 12.5075, "step": 2799 }, { "epoch": 0.1524711904324328, "grad_norm": 0.7527501201976892, "learning_rate": 0.00019888107899732567, "loss": 12.5776, "step": 2800 }, { "epoch": 0.1525256444290158, "grad_norm": 0.705797037985442, "learning_rate": 0.00019887976314799576, "loss": 12.5589, "step": 2801 }, { "epoch": 0.15258009842559883, "grad_norm": 0.8131126845625408, "learning_rate": 0.00019887844652976023, "loss": 12.9504, "step": 2802 }, { "epoch": 0.15263455242218182, "grad_norm": 0.7197483465561019, "learning_rate": 0.00019887712914262932, "loss": 12.6358, "step": 2803 }, { "epoch": 0.15268900641876484, "grad_norm": 0.7211535406910872, "learning_rate": 0.00019887581098661326, "loss": 12.6082, "step": 2804 }, { "epoch": 0.15274346041534786, "grad_norm": 0.7153783931890983, "learning_rate": 0.0001988744920617223, "loss": 12.6324, "step": 2805 }, { "epoch": 0.15279791441193086, "grad_norm": 0.8625198065469974, "learning_rate": 0.00019887317236796673, "loss": 12.6345, "step": 2806 }, { "epoch": 0.15285236840851388, "grad_norm": 0.7268672211214469, "learning_rate": 0.00019887185190535676, "loss": 12.5933, "step": 2807 }, { "epoch": 0.1529068224050969, "grad_norm": 0.7191003964132267, "learning_rate": 0.00019887053067390271, "loss": 12.718, "step": 2808 }, { "epoch": 0.15296127640167992, "grad_norm": 0.9156558752399968, "learning_rate": 0.00019886920867361486, "loss": 12.5851, "step": 2809 }, { "epoch": 0.1530157303982629, "grad_norm": 0.7778164323577543, "learning_rate": 0.00019886788590450343, "loss": 12.5485, "step": 2810 }, { "epoch": 0.15307018439484593, "grad_norm": 0.7567328985770456, "learning_rate": 0.00019886656236657875, "loss": 12.7041, "step": 2811 }, { "epoch": 0.15312463839142895, "grad_norm": 0.7678411182063787, "learning_rate": 0.00019886523805985108, "loss": 12.6737, "step": 2812 }, { "epoch": 0.15317909238801194, "grad_norm": 0.8627798690144588, "learning_rate": 0.0001988639129843308, "loss": 12.6411, "step": 2813 }, { "epoch": 0.15323354638459497, "grad_norm": 0.735304833237844, "learning_rate": 0.00019886258714002807, "loss": 12.6742, "step": 2814 }, { "epoch": 0.15328800038117799, "grad_norm": 0.7593330629316116, "learning_rate": 0.00019886126052695333, "loss": 12.6313, "step": 2815 }, { "epoch": 0.15334245437776098, "grad_norm": 0.6571157408790251, "learning_rate": 0.00019885993314511686, "loss": 12.6673, "step": 2816 }, { "epoch": 0.153396908374344, "grad_norm": 0.7836367975743452, "learning_rate": 0.00019885860499452895, "loss": 12.6894, "step": 2817 }, { "epoch": 0.15345136237092702, "grad_norm": 0.7174222449230672, "learning_rate": 0.00019885727607519993, "loss": 12.5857, "step": 2818 }, { "epoch": 0.15350581636751, "grad_norm": 0.8054844096099334, "learning_rate": 0.00019885594638714018, "loss": 12.7942, "step": 2819 }, { "epoch": 0.15356027036409303, "grad_norm": 0.7531539053210494, "learning_rate": 0.00019885461593036, "loss": 12.7152, "step": 2820 }, { "epoch": 0.15361472436067605, "grad_norm": 0.8023736412652699, "learning_rate": 0.00019885328470486976, "loss": 12.8768, "step": 2821 }, { "epoch": 0.15366917835725905, "grad_norm": 0.7543875214548945, "learning_rate": 0.0001988519527106798, "loss": 12.5622, "step": 2822 }, { "epoch": 0.15372363235384207, "grad_norm": 0.7248015081840921, "learning_rate": 0.0001988506199478005, "loss": 12.6281, "step": 2823 }, { "epoch": 0.15377808635042509, "grad_norm": 0.7369565113377408, "learning_rate": 0.00019884928641624217, "loss": 12.5889, "step": 2824 }, { "epoch": 0.1538325403470081, "grad_norm": 0.755269816391, "learning_rate": 0.00019884795211601522, "loss": 12.659, "step": 2825 }, { "epoch": 0.1538869943435911, "grad_norm": 0.8702353886072459, "learning_rate": 0.00019884661704713003, "loss": 12.6518, "step": 2826 }, { "epoch": 0.15394144834017412, "grad_norm": 0.8471764604812616, "learning_rate": 0.00019884528120959693, "loss": 12.6379, "step": 2827 }, { "epoch": 0.15399590233675714, "grad_norm": 0.7872512739321531, "learning_rate": 0.00019884394460342636, "loss": 12.509, "step": 2828 }, { "epoch": 0.15405035633334013, "grad_norm": 0.8235514382992309, "learning_rate": 0.00019884260722862873, "loss": 12.6816, "step": 2829 }, { "epoch": 0.15410481032992315, "grad_norm": 1.414682065801912, "learning_rate": 0.0001988412690852144, "loss": 12.7264, "step": 2830 }, { "epoch": 0.15415926432650617, "grad_norm": 0.7508374862892704, "learning_rate": 0.00019883993017319376, "loss": 12.7554, "step": 2831 }, { "epoch": 0.15421371832308917, "grad_norm": 0.7707106426936741, "learning_rate": 0.00019883859049257726, "loss": 12.6894, "step": 2832 }, { "epoch": 0.1542681723196722, "grad_norm": 0.7810345921420442, "learning_rate": 0.00019883725004337532, "loss": 12.6557, "step": 2833 }, { "epoch": 0.1543226263162552, "grad_norm": 0.6986731637448959, "learning_rate": 0.00019883590882559834, "loss": 12.7629, "step": 2834 }, { "epoch": 0.1543770803128382, "grad_norm": 0.8182701064880522, "learning_rate": 0.0001988345668392568, "loss": 12.7564, "step": 2835 }, { "epoch": 0.15443153430942122, "grad_norm": 0.7583269846678126, "learning_rate": 0.00019883322408436102, "loss": 12.5635, "step": 2836 }, { "epoch": 0.15448598830600424, "grad_norm": 0.721085359058006, "learning_rate": 0.00019883188056092155, "loss": 12.5803, "step": 2837 }, { "epoch": 0.15454044230258723, "grad_norm": 0.8972990116057311, "learning_rate": 0.00019883053626894878, "loss": 12.721, "step": 2838 }, { "epoch": 0.15459489629917025, "grad_norm": 0.7696583704912467, "learning_rate": 0.00019882919120845324, "loss": 12.6906, "step": 2839 }, { "epoch": 0.15464935029575327, "grad_norm": 0.8683192989979782, "learning_rate": 0.0001988278453794453, "loss": 12.7031, "step": 2840 }, { "epoch": 0.1547038042923363, "grad_norm": 0.7580129590742768, "learning_rate": 0.00019882649878193544, "loss": 12.66, "step": 2841 }, { "epoch": 0.1547582582889193, "grad_norm": 0.8619060801915377, "learning_rate": 0.00019882515141593417, "loss": 12.7066, "step": 2842 }, { "epoch": 0.1548127122855023, "grad_norm": 0.8092195760188854, "learning_rate": 0.00019882380328145195, "loss": 12.7587, "step": 2843 }, { "epoch": 0.15486716628208533, "grad_norm": 0.7464848175934536, "learning_rate": 0.0001988224543784993, "loss": 12.6538, "step": 2844 }, { "epoch": 0.15492162027866832, "grad_norm": 0.8128263278093952, "learning_rate": 0.0001988211047070866, "loss": 12.6386, "step": 2845 }, { "epoch": 0.15497607427525134, "grad_norm": 0.7961419281399771, "learning_rate": 0.0001988197542672245, "loss": 12.6517, "step": 2846 }, { "epoch": 0.15503052827183436, "grad_norm": 0.8514430955903212, "learning_rate": 0.00019881840305892336, "loss": 12.6183, "step": 2847 }, { "epoch": 0.15508498226841735, "grad_norm": 0.7891271901770917, "learning_rate": 0.00019881705108219376, "loss": 12.6508, "step": 2848 }, { "epoch": 0.15513943626500037, "grad_norm": 0.8303345806312178, "learning_rate": 0.0001988156983370462, "loss": 12.604, "step": 2849 }, { "epoch": 0.1551938902615834, "grad_norm": 0.8482866449361063, "learning_rate": 0.0001988143448234912, "loss": 12.6734, "step": 2850 }, { "epoch": 0.1552483442581664, "grad_norm": 0.7796472708220716, "learning_rate": 0.0001988129905415393, "loss": 12.4922, "step": 2851 }, { "epoch": 0.1553027982547494, "grad_norm": 0.9465182394861125, "learning_rate": 0.000198811635491201, "loss": 12.7001, "step": 2852 }, { "epoch": 0.15535725225133243, "grad_norm": 0.7281216127951547, "learning_rate": 0.00019881027967248683, "loss": 12.6122, "step": 2853 }, { "epoch": 0.15541170624791542, "grad_norm": 0.9286753088079683, "learning_rate": 0.00019880892308540737, "loss": 12.8227, "step": 2854 }, { "epoch": 0.15546616024449844, "grad_norm": 0.7763893618414947, "learning_rate": 0.00019880756572997316, "loss": 12.7216, "step": 2855 }, { "epoch": 0.15552061424108146, "grad_norm": 0.7916498937728372, "learning_rate": 0.00019880620760619476, "loss": 12.6203, "step": 2856 }, { "epoch": 0.15557506823766445, "grad_norm": 0.767571823535825, "learning_rate": 0.0001988048487140827, "loss": 12.5859, "step": 2857 }, { "epoch": 0.15562952223424747, "grad_norm": 0.7645270956833853, "learning_rate": 0.00019880348905364757, "loss": 12.7418, "step": 2858 }, { "epoch": 0.1556839762308305, "grad_norm": 0.9638854534597893, "learning_rate": 0.00019880212862489994, "loss": 12.6037, "step": 2859 }, { "epoch": 0.15573843022741352, "grad_norm": 0.7436913641362939, "learning_rate": 0.0001988007674278504, "loss": 12.6679, "step": 2860 }, { "epoch": 0.1557928842239965, "grad_norm": 0.8370044615080934, "learning_rate": 0.00019879940546250953, "loss": 12.623, "step": 2861 }, { "epoch": 0.15584733822057953, "grad_norm": 0.6821081371810797, "learning_rate": 0.0001987980427288879, "loss": 12.5769, "step": 2862 }, { "epoch": 0.15590179221716255, "grad_norm": 0.7644536575584995, "learning_rate": 0.0001987966792269961, "loss": 12.503, "step": 2863 }, { "epoch": 0.15595624621374554, "grad_norm": 0.7734674709924113, "learning_rate": 0.00019879531495684477, "loss": 12.6612, "step": 2864 }, { "epoch": 0.15601070021032856, "grad_norm": 0.7339254537144414, "learning_rate": 0.00019879394991844453, "loss": 12.5314, "step": 2865 }, { "epoch": 0.15606515420691158, "grad_norm": 0.754806928649199, "learning_rate": 0.00019879258411180595, "loss": 12.6488, "step": 2866 }, { "epoch": 0.15611960820349458, "grad_norm": 0.7838025302628356, "learning_rate": 0.00019879121753693966, "loss": 12.624, "step": 2867 }, { "epoch": 0.1561740622000776, "grad_norm": 0.9476835270509917, "learning_rate": 0.00019878985019385629, "loss": 12.7108, "step": 2868 }, { "epoch": 0.15622851619666062, "grad_norm": 0.7778603460412541, "learning_rate": 0.0001987884820825665, "loss": 12.7608, "step": 2869 }, { "epoch": 0.1562829701932436, "grad_norm": 0.8146019433611156, "learning_rate": 0.00019878711320308088, "loss": 12.5931, "step": 2870 }, { "epoch": 0.15633742418982663, "grad_norm": 0.8538501969506916, "learning_rate": 0.00019878574355541013, "loss": 12.7687, "step": 2871 }, { "epoch": 0.15639187818640965, "grad_norm": 0.7748395850487926, "learning_rate": 0.00019878437313956485, "loss": 12.5725, "step": 2872 }, { "epoch": 0.15644633218299264, "grad_norm": 0.9468791007489141, "learning_rate": 0.00019878300195555574, "loss": 12.7611, "step": 2873 }, { "epoch": 0.15650078617957566, "grad_norm": 0.8541710954520909, "learning_rate": 0.0001987816300033934, "loss": 12.757, "step": 2874 }, { "epoch": 0.15655524017615868, "grad_norm": 0.8001891904172195, "learning_rate": 0.00019878025728308857, "loss": 12.5571, "step": 2875 }, { "epoch": 0.1566096941727417, "grad_norm": 0.7420531715125143, "learning_rate": 0.0001987788837946519, "loss": 12.8099, "step": 2876 }, { "epoch": 0.1566641481693247, "grad_norm": 0.8668620926535715, "learning_rate": 0.00019877750953809403, "loss": 12.8152, "step": 2877 }, { "epoch": 0.15671860216590772, "grad_norm": 0.7066024735778503, "learning_rate": 0.00019877613451342572, "loss": 12.6544, "step": 2878 }, { "epoch": 0.15677305616249074, "grad_norm": 0.7301587483127819, "learning_rate": 0.0001987747587206576, "loss": 12.6587, "step": 2879 }, { "epoch": 0.15682751015907373, "grad_norm": 0.8221851904995162, "learning_rate": 0.0001987733821598004, "loss": 12.614, "step": 2880 }, { "epoch": 0.15688196415565675, "grad_norm": 0.7800050428325034, "learning_rate": 0.00019877200483086482, "loss": 12.7346, "step": 2881 }, { "epoch": 0.15693641815223977, "grad_norm": 0.7538371294006235, "learning_rate": 0.00019877062673386155, "loss": 12.7771, "step": 2882 }, { "epoch": 0.15699087214882276, "grad_norm": 0.7737338005678903, "learning_rate": 0.00019876924786880136, "loss": 12.6477, "step": 2883 }, { "epoch": 0.15704532614540578, "grad_norm": 0.7794075495170949, "learning_rate": 0.0001987678682356949, "loss": 12.8022, "step": 2884 }, { "epoch": 0.1570997801419888, "grad_norm": 0.8710909802578073, "learning_rate": 0.00019876648783455293, "loss": 12.659, "step": 2885 }, { "epoch": 0.1571542341385718, "grad_norm": 0.668760916103844, "learning_rate": 0.0001987651066653862, "loss": 12.686, "step": 2886 }, { "epoch": 0.15720868813515482, "grad_norm": 0.7562285864065703, "learning_rate": 0.00019876372472820545, "loss": 12.6519, "step": 2887 }, { "epoch": 0.15726314213173784, "grad_norm": 0.7185663162783882, "learning_rate": 0.0001987623420230214, "loss": 12.6415, "step": 2888 }, { "epoch": 0.15731759612832083, "grad_norm": 0.6729839607124669, "learning_rate": 0.00019876095854984483, "loss": 12.6349, "step": 2889 }, { "epoch": 0.15737205012490385, "grad_norm": 0.7545275225102389, "learning_rate": 0.00019875957430868645, "loss": 12.7718, "step": 2890 }, { "epoch": 0.15742650412148687, "grad_norm": 0.7892422343726105, "learning_rate": 0.0001987581892995571, "loss": 12.566, "step": 2891 }, { "epoch": 0.1574809581180699, "grad_norm": 0.800924414122386, "learning_rate": 0.0001987568035224675, "loss": 12.7672, "step": 2892 }, { "epoch": 0.15753541211465288, "grad_norm": 0.7405628682209897, "learning_rate": 0.0001987554169774284, "loss": 12.6177, "step": 2893 }, { "epoch": 0.1575898661112359, "grad_norm": 0.8188446538091821, "learning_rate": 0.00019875402966445065, "loss": 12.8018, "step": 2894 }, { "epoch": 0.15764432010781892, "grad_norm": 0.6775815545337175, "learning_rate": 0.00019875264158354498, "loss": 12.9058, "step": 2895 }, { "epoch": 0.15769877410440192, "grad_norm": 0.7745018906486485, "learning_rate": 0.00019875125273472222, "loss": 12.6926, "step": 2896 }, { "epoch": 0.15775322810098494, "grad_norm": 0.6856967013924328, "learning_rate": 0.00019874986311799316, "loss": 12.6651, "step": 2897 }, { "epoch": 0.15780768209756796, "grad_norm": 0.8311969803551889, "learning_rate": 0.00019874847273336862, "loss": 12.8152, "step": 2898 }, { "epoch": 0.15786213609415095, "grad_norm": 0.7484518446515533, "learning_rate": 0.00019874708158085938, "loss": 12.7422, "step": 2899 }, { "epoch": 0.15791659009073397, "grad_norm": 0.9253746140131502, "learning_rate": 0.00019874568966047625, "loss": 12.8505, "step": 2900 }, { "epoch": 0.157971044087317, "grad_norm": 0.7642243120959947, "learning_rate": 0.0001987442969722301, "loss": 12.6175, "step": 2901 }, { "epoch": 0.15802549808389998, "grad_norm": 0.7379359930600351, "learning_rate": 0.00019874290351613177, "loss": 12.7439, "step": 2902 }, { "epoch": 0.158079952080483, "grad_norm": 0.7361247160464598, "learning_rate": 0.000198741509292192, "loss": 12.5702, "step": 2903 }, { "epoch": 0.15813440607706603, "grad_norm": 0.7632560096569945, "learning_rate": 0.00019874011430042173, "loss": 12.5885, "step": 2904 }, { "epoch": 0.15818886007364902, "grad_norm": 0.7121434411200048, "learning_rate": 0.00019873871854083177, "loss": 12.6405, "step": 2905 }, { "epoch": 0.15824331407023204, "grad_norm": 0.6974010357137779, "learning_rate": 0.00019873732201343297, "loss": 12.6232, "step": 2906 }, { "epoch": 0.15829776806681506, "grad_norm": 0.6934937196933434, "learning_rate": 0.00019873592471823622, "loss": 12.5748, "step": 2907 }, { "epoch": 0.15835222206339808, "grad_norm": 0.8136084193231012, "learning_rate": 0.0001987345266552523, "loss": 12.705, "step": 2908 }, { "epoch": 0.15840667605998107, "grad_norm": 0.8153925561329763, "learning_rate": 0.0001987331278244922, "loss": 12.6548, "step": 2909 }, { "epoch": 0.1584611300565641, "grad_norm": 0.7461307062300498, "learning_rate": 0.00019873172822596673, "loss": 12.6724, "step": 2910 }, { "epoch": 0.1585155840531471, "grad_norm": 0.7591005159059452, "learning_rate": 0.0001987303278596868, "loss": 12.7604, "step": 2911 }, { "epoch": 0.1585700380497301, "grad_norm": 0.7719825542635549, "learning_rate": 0.00019872892672566326, "loss": 12.755, "step": 2912 }, { "epoch": 0.15862449204631313, "grad_norm": 0.7409777070497231, "learning_rate": 0.000198727524823907, "loss": 12.7466, "step": 2913 }, { "epoch": 0.15867894604289615, "grad_norm": 0.8675143707913091, "learning_rate": 0.000198726122154429, "loss": 12.642, "step": 2914 }, { "epoch": 0.15873340003947914, "grad_norm": 0.8154491006459972, "learning_rate": 0.0001987247187172401, "loss": 12.749, "step": 2915 }, { "epoch": 0.15878785403606216, "grad_norm": 0.8774062749892383, "learning_rate": 0.0001987233145123512, "loss": 12.744, "step": 2916 }, { "epoch": 0.15884230803264518, "grad_norm": 0.7171864769206286, "learning_rate": 0.00019872190953977331, "loss": 12.6279, "step": 2917 }, { "epoch": 0.15889676202922817, "grad_norm": 0.8983152653112244, "learning_rate": 0.0001987205037995173, "loss": 12.6355, "step": 2918 }, { "epoch": 0.1589512160258112, "grad_norm": 0.8848065508819202, "learning_rate": 0.00019871909729159403, "loss": 12.8295, "step": 2919 }, { "epoch": 0.1590056700223942, "grad_norm": 0.8467832737921628, "learning_rate": 0.00019871769001601454, "loss": 12.5814, "step": 2920 }, { "epoch": 0.1590601240189772, "grad_norm": 0.84887487191556, "learning_rate": 0.00019871628197278972, "loss": 12.8861, "step": 2921 }, { "epoch": 0.15911457801556023, "grad_norm": 0.8061164552631324, "learning_rate": 0.00019871487316193057, "loss": 12.7834, "step": 2922 }, { "epoch": 0.15916903201214325, "grad_norm": 0.7107138305932041, "learning_rate": 0.000198713463583448, "loss": 12.5469, "step": 2923 }, { "epoch": 0.15922348600872624, "grad_norm": 0.6743243016861461, "learning_rate": 0.00019871205323735298, "loss": 12.6979, "step": 2924 }, { "epoch": 0.15927794000530926, "grad_norm": 0.7720761548238595, "learning_rate": 0.00019871064212365647, "loss": 12.5449, "step": 2925 }, { "epoch": 0.15933239400189228, "grad_norm": 0.7922355008399666, "learning_rate": 0.00019870923024236948, "loss": 12.6402, "step": 2926 }, { "epoch": 0.1593868479984753, "grad_norm": 0.8070662326895096, "learning_rate": 0.00019870781759350292, "loss": 12.6296, "step": 2927 }, { "epoch": 0.1594413019950583, "grad_norm": 0.6803895819196606, "learning_rate": 0.00019870640417706784, "loss": 12.5325, "step": 2928 }, { "epoch": 0.1594957559916413, "grad_norm": 0.7126599815491631, "learning_rate": 0.00019870498999307522, "loss": 12.6576, "step": 2929 }, { "epoch": 0.15955020998822433, "grad_norm": 0.8448286288702788, "learning_rate": 0.00019870357504153603, "loss": 12.7188, "step": 2930 }, { "epoch": 0.15960466398480733, "grad_norm": 0.7214711269702817, "learning_rate": 0.00019870215932246127, "loss": 12.6394, "step": 2931 }, { "epoch": 0.15965911798139035, "grad_norm": 0.829531978726736, "learning_rate": 0.000198700742835862, "loss": 12.7261, "step": 2932 }, { "epoch": 0.15971357197797337, "grad_norm": 0.7058088612808299, "learning_rate": 0.00019869932558174919, "loss": 12.678, "step": 2933 }, { "epoch": 0.15976802597455636, "grad_norm": 0.7317555508306628, "learning_rate": 0.00019869790756013385, "loss": 12.5755, "step": 2934 }, { "epoch": 0.15982247997113938, "grad_norm": 0.6647603557904068, "learning_rate": 0.00019869648877102707, "loss": 12.6979, "step": 2935 }, { "epoch": 0.1598769339677224, "grad_norm": 0.8407777414091054, "learning_rate": 0.00019869506921443982, "loss": 12.7168, "step": 2936 }, { "epoch": 0.1599313879643054, "grad_norm": 0.743608301092564, "learning_rate": 0.0001986936488903832, "loss": 12.756, "step": 2937 }, { "epoch": 0.15998584196088841, "grad_norm": 0.8049838269368632, "learning_rate": 0.00019869222779886814, "loss": 12.6823, "step": 2938 }, { "epoch": 0.16004029595747143, "grad_norm": 0.7606150998743767, "learning_rate": 0.00019869080593990578, "loss": 12.6691, "step": 2939 }, { "epoch": 0.16009474995405443, "grad_norm": 0.6970909985961943, "learning_rate": 0.0001986893833135072, "loss": 12.7718, "step": 2940 }, { "epoch": 0.16014920395063745, "grad_norm": 0.7769916732202724, "learning_rate": 0.00019868795991968342, "loss": 12.6826, "step": 2941 }, { "epoch": 0.16020365794722047, "grad_norm": 0.7659872792765248, "learning_rate": 0.0001986865357584455, "loss": 12.7583, "step": 2942 }, { "epoch": 0.1602581119438035, "grad_norm": 0.8445126991147179, "learning_rate": 0.00019868511082980455, "loss": 12.8178, "step": 2943 }, { "epoch": 0.16031256594038648, "grad_norm": 0.763277351074957, "learning_rate": 0.0001986836851337716, "loss": 12.6646, "step": 2944 }, { "epoch": 0.1603670199369695, "grad_norm": 1.0276730264139622, "learning_rate": 0.00019868225867035778, "loss": 12.837, "step": 2945 }, { "epoch": 0.16042147393355252, "grad_norm": 1.0233540700530248, "learning_rate": 0.00019868083143957416, "loss": 12.7812, "step": 2946 }, { "epoch": 0.16047592793013551, "grad_norm": 0.7844462560812835, "learning_rate": 0.00019867940344143185, "loss": 12.6027, "step": 2947 }, { "epoch": 0.16053038192671854, "grad_norm": 0.8081790004592988, "learning_rate": 0.00019867797467594195, "loss": 12.5724, "step": 2948 }, { "epoch": 0.16058483592330156, "grad_norm": 0.7615334540876233, "learning_rate": 0.00019867654514311558, "loss": 12.6042, "step": 2949 }, { "epoch": 0.16063928991988455, "grad_norm": 0.7693417927056728, "learning_rate": 0.00019867511484296385, "loss": 12.7132, "step": 2950 }, { "epoch": 0.16069374391646757, "grad_norm": 0.7432777721279483, "learning_rate": 0.00019867368377549785, "loss": 12.6545, "step": 2951 }, { "epoch": 0.1607481979130506, "grad_norm": 0.7256121338548468, "learning_rate": 0.00019867225194072875, "loss": 12.6685, "step": 2952 }, { "epoch": 0.16080265190963358, "grad_norm": 0.7448417158991302, "learning_rate": 0.00019867081933866768, "loss": 12.711, "step": 2953 }, { "epoch": 0.1608571059062166, "grad_norm": 0.8333907442536371, "learning_rate": 0.0001986693859693258, "loss": 12.6638, "step": 2954 }, { "epoch": 0.16091155990279962, "grad_norm": 0.813839754003139, "learning_rate": 0.00019866795183271418, "loss": 12.7326, "step": 2955 }, { "epoch": 0.16096601389938262, "grad_norm": 0.8254595639514176, "learning_rate": 0.000198666516928844, "loss": 12.8003, "step": 2956 }, { "epoch": 0.16102046789596564, "grad_norm": 0.7082473755766474, "learning_rate": 0.0001986650812577265, "loss": 12.7089, "step": 2957 }, { "epoch": 0.16107492189254866, "grad_norm": 0.8686973365777078, "learning_rate": 0.00019866364481937275, "loss": 12.8251, "step": 2958 }, { "epoch": 0.16112937588913168, "grad_norm": 0.8270284101134693, "learning_rate": 0.00019866220761379397, "loss": 12.8409, "step": 2959 }, { "epoch": 0.16118382988571467, "grad_norm": 0.7209478339761264, "learning_rate": 0.00019866076964100132, "loss": 12.7463, "step": 2960 }, { "epoch": 0.1612382838822977, "grad_norm": 0.9011747411779443, "learning_rate": 0.00019865933090100593, "loss": 12.7234, "step": 2961 }, { "epoch": 0.1612927378788807, "grad_norm": 0.7302090317140539, "learning_rate": 0.00019865789139381906, "loss": 12.6734, "step": 2962 }, { "epoch": 0.1613471918754637, "grad_norm": 0.8439839869720785, "learning_rate": 0.00019865645111945192, "loss": 12.7653, "step": 2963 }, { "epoch": 0.16140164587204672, "grad_norm": 0.7831387332858022, "learning_rate": 0.00019865501007791564, "loss": 12.6617, "step": 2964 }, { "epoch": 0.16145609986862974, "grad_norm": 0.751759533220739, "learning_rate": 0.00019865356826922147, "loss": 12.6347, "step": 2965 }, { "epoch": 0.16151055386521274, "grad_norm": 0.784713141210689, "learning_rate": 0.0001986521256933806, "loss": 12.6752, "step": 2966 }, { "epoch": 0.16156500786179576, "grad_norm": 0.9349312628518618, "learning_rate": 0.00019865068235040427, "loss": 12.8224, "step": 2967 }, { "epoch": 0.16161946185837878, "grad_norm": 0.796775119153196, "learning_rate": 0.00019864923824030367, "loss": 12.6793, "step": 2968 }, { "epoch": 0.16167391585496177, "grad_norm": 0.9158652239242536, "learning_rate": 0.00019864779336309005, "loss": 12.752, "step": 2969 }, { "epoch": 0.1617283698515448, "grad_norm": 0.7597183903240505, "learning_rate": 0.0001986463477187747, "loss": 12.7695, "step": 2970 }, { "epoch": 0.1617828238481278, "grad_norm": 0.9777933769595428, "learning_rate": 0.00019864490130736874, "loss": 12.7579, "step": 2971 }, { "epoch": 0.1618372778447108, "grad_norm": 0.6985247110538119, "learning_rate": 0.0001986434541288835, "loss": 12.6177, "step": 2972 }, { "epoch": 0.16189173184129382, "grad_norm": 0.7980997965207286, "learning_rate": 0.00019864200618333023, "loss": 12.7239, "step": 2973 }, { "epoch": 0.16194618583787684, "grad_norm": 0.8623726372473786, "learning_rate": 0.00019864055747072018, "loss": 12.7306, "step": 2974 }, { "epoch": 0.16200063983445986, "grad_norm": 0.7770705515852193, "learning_rate": 0.0001986391079910646, "loss": 12.8426, "step": 2975 }, { "epoch": 0.16205509383104286, "grad_norm": 0.7478972825442104, "learning_rate": 0.00019863765774437477, "loss": 12.6541, "step": 2976 }, { "epoch": 0.16210954782762588, "grad_norm": 0.7143585535799926, "learning_rate": 0.000198636206730662, "loss": 12.7268, "step": 2977 }, { "epoch": 0.1621640018242089, "grad_norm": 0.8776883368027423, "learning_rate": 0.0001986347549499375, "loss": 12.831, "step": 2978 }, { "epoch": 0.1622184558207919, "grad_norm": 0.7508476394175179, "learning_rate": 0.00019863330240221263, "loss": 12.833, "step": 2979 }, { "epoch": 0.1622729098173749, "grad_norm": 0.8346959291607994, "learning_rate": 0.00019863184908749866, "loss": 12.6451, "step": 2980 }, { "epoch": 0.16232736381395793, "grad_norm": 0.7329729295070289, "learning_rate": 0.0001986303950058069, "loss": 12.743, "step": 2981 }, { "epoch": 0.16238181781054092, "grad_norm": 0.7357999171068264, "learning_rate": 0.00019862894015714865, "loss": 12.6297, "step": 2982 }, { "epoch": 0.16243627180712394, "grad_norm": 0.7464982470254528, "learning_rate": 0.00019862748454153523, "loss": 12.7155, "step": 2983 }, { "epoch": 0.16249072580370696, "grad_norm": 0.7276796561806051, "learning_rate": 0.00019862602815897792, "loss": 12.7921, "step": 2984 }, { "epoch": 0.16254517980028996, "grad_norm": 0.7468609325498567, "learning_rate": 0.0001986245710094881, "loss": 12.7216, "step": 2985 }, { "epoch": 0.16259963379687298, "grad_norm": 0.7581697225369602, "learning_rate": 0.00019862311309307704, "loss": 12.6114, "step": 2986 }, { "epoch": 0.162654087793456, "grad_norm": 0.7915931346131438, "learning_rate": 0.00019862165440975616, "loss": 12.745, "step": 2987 }, { "epoch": 0.162708541790039, "grad_norm": 0.775253020930781, "learning_rate": 0.00019862019495953673, "loss": 12.6253, "step": 2988 }, { "epoch": 0.162762995786622, "grad_norm": 0.7540019789596962, "learning_rate": 0.0001986187347424301, "loss": 12.6637, "step": 2989 }, { "epoch": 0.16281744978320503, "grad_norm": 0.7955802651375394, "learning_rate": 0.00019861727375844769, "loss": 12.6314, "step": 2990 }, { "epoch": 0.16287190377978802, "grad_norm": 0.704076718526564, "learning_rate": 0.00019861581200760083, "loss": 12.654, "step": 2991 }, { "epoch": 0.16292635777637104, "grad_norm": 0.752020939332643, "learning_rate": 0.00019861434948990084, "loss": 12.7554, "step": 2992 }, { "epoch": 0.16298081177295407, "grad_norm": 0.8500661663949628, "learning_rate": 0.00019861288620535915, "loss": 12.7574, "step": 2993 }, { "epoch": 0.16303526576953709, "grad_norm": 0.7151514384071872, "learning_rate": 0.00019861142215398713, "loss": 12.6612, "step": 2994 }, { "epoch": 0.16308971976612008, "grad_norm": 0.8014697789286036, "learning_rate": 0.00019860995733579615, "loss": 12.6775, "step": 2995 }, { "epoch": 0.1631441737627031, "grad_norm": 0.7902169978055278, "learning_rate": 0.0001986084917507976, "loss": 12.8139, "step": 2996 }, { "epoch": 0.16319862775928612, "grad_norm": 0.7667894415875761, "learning_rate": 0.00019860702539900287, "loss": 12.5831, "step": 2997 }, { "epoch": 0.1632530817558691, "grad_norm": 0.7606419870046596, "learning_rate": 0.00019860555828042338, "loss": 12.5801, "step": 2998 }, { "epoch": 0.16330753575245213, "grad_norm": 0.7268333122039216, "learning_rate": 0.00019860409039507054, "loss": 12.6711, "step": 2999 }, { "epoch": 0.16336198974903515, "grad_norm": 0.8293781927525129, "learning_rate": 0.00019860262174295574, "loss": 12.667, "step": 3000 }, { "epoch": 0.16341644374561815, "grad_norm": 0.6821855115896598, "learning_rate": 0.00019860115232409045, "loss": 12.646, "step": 3001 }, { "epoch": 0.16347089774220117, "grad_norm": 0.8003391933674313, "learning_rate": 0.00019859968213848604, "loss": 12.5705, "step": 3002 }, { "epoch": 0.16352535173878419, "grad_norm": 0.8315845013677727, "learning_rate": 0.00019859821118615396, "loss": 12.7818, "step": 3003 }, { "epoch": 0.16357980573536718, "grad_norm": 0.8041144709489726, "learning_rate": 0.00019859673946710568, "loss": 12.6929, "step": 3004 }, { "epoch": 0.1636342597319502, "grad_norm": 0.7491663856126463, "learning_rate": 0.0001985952669813526, "loss": 12.6536, "step": 3005 }, { "epoch": 0.16368871372853322, "grad_norm": 0.7213642535047672, "learning_rate": 0.0001985937937289062, "loss": 12.6771, "step": 3006 }, { "epoch": 0.1637431677251162, "grad_norm": 0.7064321339419524, "learning_rate": 0.00019859231970977792, "loss": 12.7313, "step": 3007 }, { "epoch": 0.16379762172169923, "grad_norm": 0.7118779395124208, "learning_rate": 0.00019859084492397923, "loss": 12.6198, "step": 3008 }, { "epoch": 0.16385207571828225, "grad_norm": 0.776431535884499, "learning_rate": 0.0001985893693715216, "loss": 12.6581, "step": 3009 }, { "epoch": 0.16390652971486527, "grad_norm": 0.7060943938035943, "learning_rate": 0.00019858789305241648, "loss": 12.6482, "step": 3010 }, { "epoch": 0.16396098371144827, "grad_norm": 0.7462608617503164, "learning_rate": 0.0001985864159666754, "loss": 12.7181, "step": 3011 }, { "epoch": 0.1640154377080313, "grad_norm": 0.7949141644882773, "learning_rate": 0.0001985849381143098, "loss": 12.7098, "step": 3012 }, { "epoch": 0.1640698917046143, "grad_norm": 0.7700097507545192, "learning_rate": 0.00019858345949533117, "loss": 12.6542, "step": 3013 }, { "epoch": 0.1641243457011973, "grad_norm": 0.7223619675646541, "learning_rate": 0.00019858198010975106, "loss": 12.7549, "step": 3014 }, { "epoch": 0.16417879969778032, "grad_norm": 0.6751448073396646, "learning_rate": 0.00019858049995758094, "loss": 12.6315, "step": 3015 }, { "epoch": 0.16423325369436334, "grad_norm": 0.7771226973080849, "learning_rate": 0.0001985790190388323, "loss": 12.6497, "step": 3016 }, { "epoch": 0.16428770769094633, "grad_norm": 0.6711637065491515, "learning_rate": 0.00019857753735351668, "loss": 12.5573, "step": 3017 }, { "epoch": 0.16434216168752935, "grad_norm": 0.712214129858097, "learning_rate": 0.0001985760549016456, "loss": 12.7238, "step": 3018 }, { "epoch": 0.16439661568411237, "grad_norm": 0.7331697045163766, "learning_rate": 0.0001985745716832306, "loss": 12.5742, "step": 3019 }, { "epoch": 0.16445106968069537, "grad_norm": 0.7119826674527502, "learning_rate": 0.0001985730876982832, "loss": 12.6483, "step": 3020 }, { "epoch": 0.1645055236772784, "grad_norm": 0.6974600839247409, "learning_rate": 0.0001985716029468149, "loss": 12.6442, "step": 3021 }, { "epoch": 0.1645599776738614, "grad_norm": 0.8670722829202118, "learning_rate": 0.00019857011742883734, "loss": 12.8303, "step": 3022 }, { "epoch": 0.1646144316704444, "grad_norm": 0.7635478127989597, "learning_rate": 0.00019856863114436197, "loss": 12.5336, "step": 3023 }, { "epoch": 0.16466888566702742, "grad_norm": 0.7542722269262729, "learning_rate": 0.0001985671440934004, "loss": 12.6527, "step": 3024 }, { "epoch": 0.16472333966361044, "grad_norm": 0.8416074307555329, "learning_rate": 0.00019856565627596423, "loss": 12.7418, "step": 3025 }, { "epoch": 0.16477779366019346, "grad_norm": 0.7287356370803173, "learning_rate": 0.00019856416769206496, "loss": 12.7479, "step": 3026 }, { "epoch": 0.16483224765677645, "grad_norm": 0.8005722489828518, "learning_rate": 0.00019856267834171418, "loss": 12.6369, "step": 3027 }, { "epoch": 0.16488670165335947, "grad_norm": 0.8305008235817432, "learning_rate": 0.00019856118822492348, "loss": 12.6217, "step": 3028 }, { "epoch": 0.1649411556499425, "grad_norm": 0.7033320894025007, "learning_rate": 0.0001985596973417045, "loss": 12.6356, "step": 3029 }, { "epoch": 0.1649956096465255, "grad_norm": 0.7793475423768594, "learning_rate": 0.00019855820569206873, "loss": 12.5976, "step": 3030 }, { "epoch": 0.1650500636431085, "grad_norm": 0.7107485974300436, "learning_rate": 0.00019855671327602786, "loss": 12.6629, "step": 3031 }, { "epoch": 0.16510451763969153, "grad_norm": 0.8246162928773088, "learning_rate": 0.00019855522009359344, "loss": 12.7947, "step": 3032 }, { "epoch": 0.16515897163627452, "grad_norm": 0.8131775899827965, "learning_rate": 0.0001985537261447771, "loss": 12.7613, "step": 3033 }, { "epoch": 0.16521342563285754, "grad_norm": 0.7587550524533624, "learning_rate": 0.00019855223142959045, "loss": 12.6487, "step": 3034 }, { "epoch": 0.16526787962944056, "grad_norm": 0.9463315526005036, "learning_rate": 0.00019855073594804513, "loss": 12.8261, "step": 3035 }, { "epoch": 0.16532233362602355, "grad_norm": 0.6858694256651935, "learning_rate": 0.0001985492397001528, "loss": 12.6521, "step": 3036 }, { "epoch": 0.16537678762260657, "grad_norm": 0.9290574372859011, "learning_rate": 0.00019854774268592503, "loss": 12.7764, "step": 3037 }, { "epoch": 0.1654312416191896, "grad_norm": 0.7226513185149467, "learning_rate": 0.00019854624490537345, "loss": 12.5919, "step": 3038 }, { "epoch": 0.1654856956157726, "grad_norm": 0.8268556528120298, "learning_rate": 0.00019854474635850976, "loss": 12.5752, "step": 3039 }, { "epoch": 0.1655401496123556, "grad_norm": 0.6935520582092968, "learning_rate": 0.0001985432470453456, "loss": 12.624, "step": 3040 }, { "epoch": 0.16559460360893863, "grad_norm": 0.7905359262554176, "learning_rate": 0.00019854174696589265, "loss": 12.6458, "step": 3041 }, { "epoch": 0.16564905760552165, "grad_norm": 0.8584511171853852, "learning_rate": 0.00019854024612016256, "loss": 12.787, "step": 3042 }, { "epoch": 0.16570351160210464, "grad_norm": 0.7234888567257534, "learning_rate": 0.00019853874450816695, "loss": 12.7128, "step": 3043 }, { "epoch": 0.16575796559868766, "grad_norm": 0.8364271048465408, "learning_rate": 0.00019853724212991756, "loss": 12.7128, "step": 3044 }, { "epoch": 0.16581241959527068, "grad_norm": 0.7495999449297123, "learning_rate": 0.00019853573898542603, "loss": 12.7308, "step": 3045 }, { "epoch": 0.16586687359185368, "grad_norm": 0.7474271252052067, "learning_rate": 0.0001985342350747041, "loss": 12.5932, "step": 3046 }, { "epoch": 0.1659213275884367, "grad_norm": 0.7774008075239626, "learning_rate": 0.00019853273039776342, "loss": 12.7495, "step": 3047 }, { "epoch": 0.16597578158501972, "grad_norm": 0.7127847304079973, "learning_rate": 0.0001985312249546157, "loss": 12.8242, "step": 3048 }, { "epoch": 0.1660302355816027, "grad_norm": 0.8142909144845354, "learning_rate": 0.00019852971874527263, "loss": 12.7933, "step": 3049 }, { "epoch": 0.16608468957818573, "grad_norm": 0.8571399956290684, "learning_rate": 0.00019852821176974595, "loss": 12.7599, "step": 3050 }, { "epoch": 0.16613914357476875, "grad_norm": 0.7255126471391168, "learning_rate": 0.0001985267040280474, "loss": 12.6355, "step": 3051 }, { "epoch": 0.16619359757135174, "grad_norm": 0.8548735300335262, "learning_rate": 0.00019852519552018868, "loss": 12.7333, "step": 3052 }, { "epoch": 0.16624805156793476, "grad_norm": 0.9585000229029373, "learning_rate": 0.0001985236862461815, "loss": 12.6209, "step": 3053 }, { "epoch": 0.16630250556451778, "grad_norm": 0.8346856967389, "learning_rate": 0.0001985221762060376, "loss": 12.8314, "step": 3054 }, { "epoch": 0.16635695956110078, "grad_norm": 0.781151732816868, "learning_rate": 0.00019852066539976875, "loss": 12.6076, "step": 3055 }, { "epoch": 0.1664114135576838, "grad_norm": 0.8753576736840428, "learning_rate": 0.00019851915382738668, "loss": 12.7246, "step": 3056 }, { "epoch": 0.16646586755426682, "grad_norm": 0.7551967445969222, "learning_rate": 0.00019851764148890317, "loss": 12.672, "step": 3057 }, { "epoch": 0.1665203215508498, "grad_norm": 0.7104804983466242, "learning_rate": 0.00019851612838432992, "loss": 12.5429, "step": 3058 }, { "epoch": 0.16657477554743283, "grad_norm": 0.810574169985711, "learning_rate": 0.00019851461451367877, "loss": 12.6264, "step": 3059 }, { "epoch": 0.16662922954401585, "grad_norm": 0.7584177948039127, "learning_rate": 0.00019851309987696145, "loss": 12.7277, "step": 3060 }, { "epoch": 0.16668368354059887, "grad_norm": 0.7381274284204892, "learning_rate": 0.00019851158447418973, "loss": 12.7544, "step": 3061 }, { "epoch": 0.16673813753718186, "grad_norm": 0.7600336760368321, "learning_rate": 0.00019851006830537543, "loss": 12.6901, "step": 3062 }, { "epoch": 0.16679259153376488, "grad_norm": 0.8158717737436204, "learning_rate": 0.00019850855137053028, "loss": 12.7702, "step": 3063 }, { "epoch": 0.1668470455303479, "grad_norm": 0.74919777778861, "learning_rate": 0.00019850703366966615, "loss": 12.605, "step": 3064 }, { "epoch": 0.1669014995269309, "grad_norm": 0.7882382250155103, "learning_rate": 0.00019850551520279478, "loss": 12.806, "step": 3065 }, { "epoch": 0.16695595352351392, "grad_norm": 0.7955354153451767, "learning_rate": 0.00019850399596992802, "loss": 12.6832, "step": 3066 }, { "epoch": 0.16701040752009694, "grad_norm": 0.7322099613336235, "learning_rate": 0.00019850247597107766, "loss": 12.6294, "step": 3067 }, { "epoch": 0.16706486151667993, "grad_norm": 0.7805140231750468, "learning_rate": 0.00019850095520625556, "loss": 12.7636, "step": 3068 }, { "epoch": 0.16711931551326295, "grad_norm": 0.7115740455768171, "learning_rate": 0.00019849943367547347, "loss": 12.5487, "step": 3069 }, { "epoch": 0.16717376950984597, "grad_norm": 0.7621711099837365, "learning_rate": 0.0001984979113787433, "loss": 12.7246, "step": 3070 }, { "epoch": 0.16722822350642896, "grad_norm": 0.7449652831037106, "learning_rate": 0.00019849638831607682, "loss": 12.6964, "step": 3071 }, { "epoch": 0.16728267750301198, "grad_norm": 0.7552484504379975, "learning_rate": 0.00019849486448748592, "loss": 12.73, "step": 3072 }, { "epoch": 0.167337131499595, "grad_norm": 0.7649404312476997, "learning_rate": 0.00019849333989298246, "loss": 12.6314, "step": 3073 }, { "epoch": 0.167391585496178, "grad_norm": 0.8408831313771824, "learning_rate": 0.00019849181453257826, "loss": 12.7805, "step": 3074 }, { "epoch": 0.16744603949276102, "grad_norm": 0.7302841277403656, "learning_rate": 0.00019849028840628516, "loss": 12.5063, "step": 3075 }, { "epoch": 0.16750049348934404, "grad_norm": 0.7544122102561781, "learning_rate": 0.00019848876151411511, "loss": 12.7941, "step": 3076 }, { "epoch": 0.16755494748592706, "grad_norm": 0.7918574476711356, "learning_rate": 0.00019848723385607989, "loss": 12.6725, "step": 3077 }, { "epoch": 0.16760940148251005, "grad_norm": 0.751101721987298, "learning_rate": 0.00019848570543219146, "loss": 12.6924, "step": 3078 }, { "epoch": 0.16766385547909307, "grad_norm": 0.6838336506900237, "learning_rate": 0.00019848417624246162, "loss": 12.6729, "step": 3079 }, { "epoch": 0.1677183094756761, "grad_norm": 0.7408676965449393, "learning_rate": 0.00019848264628690237, "loss": 12.7387, "step": 3080 }, { "epoch": 0.16777276347225908, "grad_norm": 0.7696366140174506, "learning_rate": 0.00019848111556552552, "loss": 12.7856, "step": 3081 }, { "epoch": 0.1678272174688421, "grad_norm": 0.8200316785096359, "learning_rate": 0.00019847958407834298, "loss": 12.6265, "step": 3082 }, { "epoch": 0.16788167146542513, "grad_norm": 0.681234990809023, "learning_rate": 0.0001984780518253667, "loss": 12.6042, "step": 3083 }, { "epoch": 0.16793612546200812, "grad_norm": 0.8458155782816993, "learning_rate": 0.00019847651880660857, "loss": 12.6334, "step": 3084 }, { "epoch": 0.16799057945859114, "grad_norm": 0.8541159959566602, "learning_rate": 0.00019847498502208053, "loss": 12.6321, "step": 3085 }, { "epoch": 0.16804503345517416, "grad_norm": 0.7817962520050384, "learning_rate": 0.00019847345047179445, "loss": 12.6858, "step": 3086 }, { "epoch": 0.16809948745175715, "grad_norm": 0.740149125300525, "learning_rate": 0.00019847191515576235, "loss": 12.6561, "step": 3087 }, { "epoch": 0.16815394144834017, "grad_norm": 0.7569091163765901, "learning_rate": 0.0001984703790739961, "loss": 12.6237, "step": 3088 }, { "epoch": 0.1682083954449232, "grad_norm": 0.8628887769130403, "learning_rate": 0.00019846884222650768, "loss": 12.7761, "step": 3089 }, { "epoch": 0.16826284944150618, "grad_norm": 0.8704712641782385, "learning_rate": 0.00019846730461330902, "loss": 12.76, "step": 3090 }, { "epoch": 0.1683173034380892, "grad_norm": 0.7408081447934876, "learning_rate": 0.0001984657662344121, "loss": 12.6615, "step": 3091 }, { "epoch": 0.16837175743467223, "grad_norm": 0.710881601350767, "learning_rate": 0.00019846422708982885, "loss": 12.5017, "step": 3092 }, { "epoch": 0.16842621143125525, "grad_norm": 0.8098661766532628, "learning_rate": 0.00019846268717957127, "loss": 12.6211, "step": 3093 }, { "epoch": 0.16848066542783824, "grad_norm": 0.775012426784901, "learning_rate": 0.00019846114650365134, "loss": 12.6198, "step": 3094 }, { "epoch": 0.16853511942442126, "grad_norm": 0.80326601496314, "learning_rate": 0.000198459605062081, "loss": 12.6613, "step": 3095 }, { "epoch": 0.16858957342100428, "grad_norm": 0.7354652502997332, "learning_rate": 0.00019845806285487228, "loss": 12.615, "step": 3096 }, { "epoch": 0.16864402741758727, "grad_norm": 0.7450886072732027, "learning_rate": 0.00019845651988203712, "loss": 12.54, "step": 3097 }, { "epoch": 0.1686984814141703, "grad_norm": 0.7933552614013386, "learning_rate": 0.00019845497614358757, "loss": 12.5981, "step": 3098 }, { "epoch": 0.1687529354107533, "grad_norm": 0.7968877726596918, "learning_rate": 0.0001984534316395356, "loss": 12.6495, "step": 3099 }, { "epoch": 0.1688073894073363, "grad_norm": 0.7612335336482233, "learning_rate": 0.00019845188636989324, "loss": 12.7011, "step": 3100 }, { "epoch": 0.16886184340391933, "grad_norm": 0.7379465764326891, "learning_rate": 0.00019845034033467253, "loss": 12.647, "step": 3101 }, { "epoch": 0.16891629740050235, "grad_norm": 0.6342380056369946, "learning_rate": 0.0001984487935338854, "loss": 12.3162, "step": 3102 }, { "epoch": 0.16897075139708534, "grad_norm": 0.7992548336863188, "learning_rate": 0.000198447245967544, "loss": 12.6207, "step": 3103 }, { "epoch": 0.16902520539366836, "grad_norm": 0.7347690982602108, "learning_rate": 0.0001984456976356603, "loss": 12.6189, "step": 3104 }, { "epoch": 0.16907965939025138, "grad_norm": 0.7411609138616736, "learning_rate": 0.0001984441485382463, "loss": 12.6495, "step": 3105 }, { "epoch": 0.16913411338683437, "grad_norm": 0.8311106556317931, "learning_rate": 0.00019844259867531414, "loss": 12.6338, "step": 3106 }, { "epoch": 0.1691885673834174, "grad_norm": 0.766495636804695, "learning_rate": 0.00019844104804687582, "loss": 12.7428, "step": 3107 }, { "epoch": 0.1692430213800004, "grad_norm": 0.8369928669689569, "learning_rate": 0.0001984394966529434, "loss": 12.7464, "step": 3108 }, { "epoch": 0.16929747537658343, "grad_norm": 0.8005274422552616, "learning_rate": 0.00019843794449352892, "loss": 12.5939, "step": 3109 }, { "epoch": 0.16935192937316643, "grad_norm": 0.6739084256004799, "learning_rate": 0.0001984363915686445, "loss": 12.4955, "step": 3110 }, { "epoch": 0.16940638336974945, "grad_norm": 0.8504633667447858, "learning_rate": 0.00019843483787830216, "loss": 12.6238, "step": 3111 }, { "epoch": 0.16946083736633247, "grad_norm": 0.8152522438802968, "learning_rate": 0.00019843328342251406, "loss": 12.8807, "step": 3112 }, { "epoch": 0.16951529136291546, "grad_norm": 0.7834123281067574, "learning_rate": 0.0001984317282012922, "loss": 12.7353, "step": 3113 }, { "epoch": 0.16956974535949848, "grad_norm": 0.7587210169588774, "learning_rate": 0.00019843017221464875, "loss": 12.6709, "step": 3114 }, { "epoch": 0.1696241993560815, "grad_norm": 0.8104141569775231, "learning_rate": 0.00019842861546259574, "loss": 12.6332, "step": 3115 }, { "epoch": 0.1696786533526645, "grad_norm": 0.959464087999072, "learning_rate": 0.00019842705794514533, "loss": 12.6767, "step": 3116 }, { "epoch": 0.16973310734924751, "grad_norm": 0.7914692125288751, "learning_rate": 0.00019842549966230961, "loss": 12.8013, "step": 3117 }, { "epoch": 0.16978756134583053, "grad_norm": 0.6492714647380093, "learning_rate": 0.0001984239406141007, "loss": 12.5668, "step": 3118 }, { "epoch": 0.16984201534241353, "grad_norm": 0.9351858633515623, "learning_rate": 0.00019842238080053072, "loss": 12.64, "step": 3119 }, { "epoch": 0.16989646933899655, "grad_norm": 0.8610702832263224, "learning_rate": 0.00019842082022161178, "loss": 12.7856, "step": 3120 }, { "epoch": 0.16995092333557957, "grad_norm": 0.7780332218014057, "learning_rate": 0.00019841925887735607, "loss": 12.6922, "step": 3121 }, { "epoch": 0.17000537733216256, "grad_norm": 0.8377695759243816, "learning_rate": 0.0001984176967677757, "loss": 12.7686, "step": 3122 }, { "epoch": 0.17005983132874558, "grad_norm": 0.8259157927171062, "learning_rate": 0.0001984161338928828, "loss": 12.6289, "step": 3123 }, { "epoch": 0.1701142853253286, "grad_norm": 0.797915984011378, "learning_rate": 0.00019841457025268953, "loss": 12.6509, "step": 3124 }, { "epoch": 0.1701687393219116, "grad_norm": 0.7578127442759067, "learning_rate": 0.00019841300584720807, "loss": 12.6557, "step": 3125 }, { "epoch": 0.17022319331849461, "grad_norm": 0.8260447831694434, "learning_rate": 0.00019841144067645058, "loss": 12.6095, "step": 3126 }, { "epoch": 0.17027764731507763, "grad_norm": 0.9170401923117737, "learning_rate": 0.00019840987474042918, "loss": 12.6943, "step": 3127 }, { "epoch": 0.17033210131166066, "grad_norm": 0.6838717466562656, "learning_rate": 0.0001984083080391561, "loss": 12.5913, "step": 3128 }, { "epoch": 0.17038655530824365, "grad_norm": 0.6734878857682747, "learning_rate": 0.00019840674057264356, "loss": 12.5914, "step": 3129 }, { "epoch": 0.17044100930482667, "grad_norm": 0.8851280009559688, "learning_rate": 0.00019840517234090367, "loss": 12.7171, "step": 3130 }, { "epoch": 0.1704954633014097, "grad_norm": 0.8336628423928786, "learning_rate": 0.00019840360334394869, "loss": 12.5769, "step": 3131 }, { "epoch": 0.17054991729799268, "grad_norm": 0.8233460415229792, "learning_rate": 0.00019840203358179075, "loss": 12.6803, "step": 3132 }, { "epoch": 0.1706043712945757, "grad_norm": 0.8575100075433871, "learning_rate": 0.00019840046305444212, "loss": 12.6334, "step": 3133 }, { "epoch": 0.17065882529115872, "grad_norm": 0.8402671498996254, "learning_rate": 0.00019839889176191498, "loss": 12.7051, "step": 3134 }, { "epoch": 0.17071327928774171, "grad_norm": 0.8442756476235956, "learning_rate": 0.00019839731970422152, "loss": 12.598, "step": 3135 }, { "epoch": 0.17076773328432474, "grad_norm": 0.7790665163489613, "learning_rate": 0.00019839574688137405, "loss": 12.673, "step": 3136 }, { "epoch": 0.17082218728090776, "grad_norm": 0.6776444996263663, "learning_rate": 0.00019839417329338473, "loss": 12.5547, "step": 3137 }, { "epoch": 0.17087664127749075, "grad_norm": 0.7170712222884784, "learning_rate": 0.00019839259894026583, "loss": 12.6168, "step": 3138 }, { "epoch": 0.17093109527407377, "grad_norm": 0.8250292301702511, "learning_rate": 0.00019839102382202957, "loss": 12.6966, "step": 3139 }, { "epoch": 0.1709855492706568, "grad_norm": 0.776378688447562, "learning_rate": 0.00019838944793868822, "loss": 12.6753, "step": 3140 }, { "epoch": 0.17104000326723978, "grad_norm": 0.7560489984821958, "learning_rate": 0.000198387871290254, "loss": 12.5989, "step": 3141 }, { "epoch": 0.1710944572638228, "grad_norm": 0.7974557424591885, "learning_rate": 0.0001983862938767392, "loss": 12.5405, "step": 3142 }, { "epoch": 0.17114891126040582, "grad_norm": 0.7014349054792098, "learning_rate": 0.0001983847156981561, "loss": 12.5353, "step": 3143 }, { "epoch": 0.17120336525698884, "grad_norm": 0.723315929267737, "learning_rate": 0.00019838313675451695, "loss": 12.6474, "step": 3144 }, { "epoch": 0.17125781925357184, "grad_norm": 0.9779302802477249, "learning_rate": 0.00019838155704583402, "loss": 12.8977, "step": 3145 }, { "epoch": 0.17131227325015486, "grad_norm": 0.7365367954267793, "learning_rate": 0.00019837997657211958, "loss": 12.68, "step": 3146 }, { "epoch": 0.17136672724673788, "grad_norm": 0.7836697915049639, "learning_rate": 0.00019837839533338599, "loss": 12.5582, "step": 3147 }, { "epoch": 0.17142118124332087, "grad_norm": 0.694905046743744, "learning_rate": 0.00019837681332964544, "loss": 12.6245, "step": 3148 }, { "epoch": 0.1714756352399039, "grad_norm": 0.7345872077956427, "learning_rate": 0.00019837523056091035, "loss": 12.6434, "step": 3149 }, { "epoch": 0.1715300892364869, "grad_norm": 0.8676001382683689, "learning_rate": 0.00019837364702719295, "loss": 12.7774, "step": 3150 }, { "epoch": 0.1715845432330699, "grad_norm": 0.749859401437326, "learning_rate": 0.00019837206272850554, "loss": 12.7502, "step": 3151 }, { "epoch": 0.17163899722965292, "grad_norm": 0.7496417092437363, "learning_rate": 0.00019837047766486053, "loss": 12.7435, "step": 3152 }, { "epoch": 0.17169345122623594, "grad_norm": 0.7099237668025427, "learning_rate": 0.00019836889183627015, "loss": 12.495, "step": 3153 }, { "epoch": 0.17174790522281894, "grad_norm": 0.7285641308612487, "learning_rate": 0.0001983673052427468, "loss": 12.5386, "step": 3154 }, { "epoch": 0.17180235921940196, "grad_norm": 0.7009655388375987, "learning_rate": 0.00019836571788430275, "loss": 12.7553, "step": 3155 }, { "epoch": 0.17185681321598498, "grad_norm": 0.7463493574611223, "learning_rate": 0.0001983641297609504, "loss": 12.636, "step": 3156 }, { "epoch": 0.17191126721256797, "grad_norm": 0.787048181874437, "learning_rate": 0.00019836254087270206, "loss": 12.7262, "step": 3157 }, { "epoch": 0.171965721209151, "grad_norm": 0.814045454316862, "learning_rate": 0.00019836095121957014, "loss": 12.6681, "step": 3158 }, { "epoch": 0.172020175205734, "grad_norm": 0.8867722235844003, "learning_rate": 0.00019835936080156698, "loss": 12.5865, "step": 3159 }, { "epoch": 0.17207462920231703, "grad_norm": 0.7674062863483792, "learning_rate": 0.00019835776961870492, "loss": 12.5926, "step": 3160 }, { "epoch": 0.17212908319890002, "grad_norm": 0.877774539505825, "learning_rate": 0.00019835617767099633, "loss": 12.604, "step": 3161 }, { "epoch": 0.17218353719548304, "grad_norm": 0.7515196326795087, "learning_rate": 0.00019835458495845362, "loss": 12.683, "step": 3162 }, { "epoch": 0.17223799119206606, "grad_norm": 0.7810814803217557, "learning_rate": 0.00019835299148108918, "loss": 12.5438, "step": 3163 }, { "epoch": 0.17229244518864906, "grad_norm": 0.8523834949136776, "learning_rate": 0.00019835139723891536, "loss": 12.7393, "step": 3164 }, { "epoch": 0.17234689918523208, "grad_norm": 0.7607407245315153, "learning_rate": 0.0001983498022319446, "loss": 12.7176, "step": 3165 }, { "epoch": 0.1724013531818151, "grad_norm": 0.76191705067529, "learning_rate": 0.00019834820646018927, "loss": 12.5316, "step": 3166 }, { "epoch": 0.1724558071783981, "grad_norm": 0.7260989726107212, "learning_rate": 0.0001983466099236618, "loss": 12.7281, "step": 3167 }, { "epoch": 0.1725102611749811, "grad_norm": 0.7763558291867316, "learning_rate": 0.00019834501262237458, "loss": 12.6782, "step": 3168 }, { "epoch": 0.17256471517156413, "grad_norm": 0.7983010911998738, "learning_rate": 0.0001983434145563401, "loss": 12.7117, "step": 3169 }, { "epoch": 0.17261916916814712, "grad_norm": 0.7605819883968827, "learning_rate": 0.00019834181572557066, "loss": 12.7948, "step": 3170 }, { "epoch": 0.17267362316473014, "grad_norm": 0.8374294116547367, "learning_rate": 0.00019834021613007882, "loss": 12.7513, "step": 3171 }, { "epoch": 0.17272807716131316, "grad_norm": 0.6585356583533877, "learning_rate": 0.00019833861576987698, "loss": 12.5781, "step": 3172 }, { "epoch": 0.17278253115789616, "grad_norm": 0.9425149602084731, "learning_rate": 0.00019833701464497754, "loss": 12.7727, "step": 3173 }, { "epoch": 0.17283698515447918, "grad_norm": 0.7464082223439957, "learning_rate": 0.000198335412755393, "loss": 12.7494, "step": 3174 }, { "epoch": 0.1728914391510622, "grad_norm": 0.8587155233360285, "learning_rate": 0.00019833381010113578, "loss": 12.5224, "step": 3175 }, { "epoch": 0.17294589314764522, "grad_norm": 0.747952703741679, "learning_rate": 0.0001983322066822184, "loss": 12.4944, "step": 3176 }, { "epoch": 0.1730003471442282, "grad_norm": 0.7827107540027283, "learning_rate": 0.00019833060249865327, "loss": 12.6914, "step": 3177 }, { "epoch": 0.17305480114081123, "grad_norm": 0.7433316310571197, "learning_rate": 0.0001983289975504529, "loss": 12.4551, "step": 3178 }, { "epoch": 0.17310925513739425, "grad_norm": 0.8347754411825807, "learning_rate": 0.00019832739183762969, "loss": 12.7181, "step": 3179 }, { "epoch": 0.17316370913397724, "grad_norm": 0.7751754459911796, "learning_rate": 0.00019832578536019626, "loss": 12.6883, "step": 3180 }, { "epoch": 0.17321816313056027, "grad_norm": 0.8741734159288691, "learning_rate": 0.00019832417811816501, "loss": 12.7347, "step": 3181 }, { "epoch": 0.17327261712714329, "grad_norm": 0.8318468746826131, "learning_rate": 0.00019832257011154845, "loss": 12.6819, "step": 3182 }, { "epoch": 0.17332707112372628, "grad_norm": 0.7039626157093785, "learning_rate": 0.0001983209613403591, "loss": 12.674, "step": 3183 }, { "epoch": 0.1733815251203093, "grad_norm": 0.9025010506269673, "learning_rate": 0.0001983193518046095, "loss": 12.5735, "step": 3184 }, { "epoch": 0.17343597911689232, "grad_norm": 0.7546511643836435, "learning_rate": 0.00019831774150431213, "loss": 12.6793, "step": 3185 }, { "epoch": 0.1734904331134753, "grad_norm": 0.8394354751748568, "learning_rate": 0.00019831613043947948, "loss": 12.6723, "step": 3186 }, { "epoch": 0.17354488711005833, "grad_norm": 0.7574481668785278, "learning_rate": 0.0001983145186101241, "loss": 12.5683, "step": 3187 }, { "epoch": 0.17359934110664135, "grad_norm": 0.9026138172851492, "learning_rate": 0.0001983129060162586, "loss": 12.638, "step": 3188 }, { "epoch": 0.17365379510322435, "grad_norm": 0.6765043031700043, "learning_rate": 0.00019831129265789537, "loss": 12.6092, "step": 3189 }, { "epoch": 0.17370824909980737, "grad_norm": 0.7886541026462125, "learning_rate": 0.0001983096785350471, "loss": 12.7237, "step": 3190 }, { "epoch": 0.1737627030963904, "grad_norm": 0.7844971851503274, "learning_rate": 0.0001983080636477263, "loss": 12.7892, "step": 3191 }, { "epoch": 0.17381715709297338, "grad_norm": 0.6930078606339654, "learning_rate": 0.00019830644799594547, "loss": 12.5887, "step": 3192 }, { "epoch": 0.1738716110895564, "grad_norm": 0.7923600954734632, "learning_rate": 0.00019830483157971723, "loss": 12.6886, "step": 3193 }, { "epoch": 0.17392606508613942, "grad_norm": 0.8338987623626151, "learning_rate": 0.00019830321439905414, "loss": 12.7361, "step": 3194 }, { "epoch": 0.17398051908272244, "grad_norm": 0.7749506167348895, "learning_rate": 0.00019830159645396875, "loss": 12.849, "step": 3195 }, { "epoch": 0.17403497307930543, "grad_norm": 0.7390801603555475, "learning_rate": 0.00019829997774447366, "loss": 12.5025, "step": 3196 }, { "epoch": 0.17408942707588845, "grad_norm": 0.6948756460703287, "learning_rate": 0.0001982983582705815, "loss": 12.6153, "step": 3197 }, { "epoch": 0.17414388107247147, "grad_norm": 0.7011689581732615, "learning_rate": 0.00019829673803230477, "loss": 12.7191, "step": 3198 }, { "epoch": 0.17419833506905447, "grad_norm": 0.6850588411098556, "learning_rate": 0.00019829511702965612, "loss": 12.5695, "step": 3199 }, { "epoch": 0.1742527890656375, "grad_norm": 0.7668874363449892, "learning_rate": 0.0001982934952626482, "loss": 12.7662, "step": 3200 }, { "epoch": 0.1743072430622205, "grad_norm": 0.7504905218030471, "learning_rate": 0.00019829187273129355, "loss": 12.6012, "step": 3201 }, { "epoch": 0.1743616970588035, "grad_norm": 0.7171816702004552, "learning_rate": 0.00019829024943560485, "loss": 12.6831, "step": 3202 }, { "epoch": 0.17441615105538652, "grad_norm": 0.698158237637559, "learning_rate": 0.00019828862537559462, "loss": 12.5953, "step": 3203 }, { "epoch": 0.17447060505196954, "grad_norm": 0.8732362510339932, "learning_rate": 0.0001982870005512756, "loss": 12.6716, "step": 3204 }, { "epoch": 0.17452505904855253, "grad_norm": 0.8154029138565516, "learning_rate": 0.00019828537496266037, "loss": 12.6764, "step": 3205 }, { "epoch": 0.17457951304513555, "grad_norm": 0.7699923056388698, "learning_rate": 0.0001982837486097616, "loss": 12.6628, "step": 3206 }, { "epoch": 0.17463396704171857, "grad_norm": 0.7861215643164291, "learning_rate": 0.00019828212149259188, "loss": 12.6717, "step": 3207 }, { "epoch": 0.17468842103830157, "grad_norm": 0.6983338883252923, "learning_rate": 0.0001982804936111639, "loss": 12.6699, "step": 3208 }, { "epoch": 0.1747428750348846, "grad_norm": 0.8298596274818021, "learning_rate": 0.00019827886496549037, "loss": 12.5887, "step": 3209 }, { "epoch": 0.1747973290314676, "grad_norm": 0.7776100334544354, "learning_rate": 0.0001982772355555839, "loss": 12.6827, "step": 3210 }, { "epoch": 0.17485178302805063, "grad_norm": 0.7710869955834241, "learning_rate": 0.00019827560538145713, "loss": 12.6449, "step": 3211 }, { "epoch": 0.17490623702463362, "grad_norm": 0.7444032286300318, "learning_rate": 0.0001982739744431228, "loss": 12.6241, "step": 3212 }, { "epoch": 0.17496069102121664, "grad_norm": 0.8584217284851458, "learning_rate": 0.00019827234274059352, "loss": 12.6233, "step": 3213 }, { "epoch": 0.17501514501779966, "grad_norm": 0.7472171044808118, "learning_rate": 0.00019827071027388207, "loss": 12.7727, "step": 3214 }, { "epoch": 0.17506959901438265, "grad_norm": 0.7241449526372566, "learning_rate": 0.00019826907704300107, "loss": 12.6059, "step": 3215 }, { "epoch": 0.17512405301096567, "grad_norm": 0.6725069804670222, "learning_rate": 0.00019826744304796327, "loss": 12.5751, "step": 3216 }, { "epoch": 0.1751785070075487, "grad_norm": 0.6894594253663173, "learning_rate": 0.00019826580828878135, "loss": 12.6274, "step": 3217 }, { "epoch": 0.1752329610041317, "grad_norm": 0.7304834406583183, "learning_rate": 0.000198264172765468, "loss": 12.7152, "step": 3218 }, { "epoch": 0.1752874150007147, "grad_norm": 0.8386256205995041, "learning_rate": 0.00019826253647803598, "loss": 12.6445, "step": 3219 }, { "epoch": 0.17534186899729773, "grad_norm": 1.270172642023828, "learning_rate": 0.000198260899426498, "loss": 12.5894, "step": 3220 }, { "epoch": 0.17539632299388072, "grad_norm": 0.7344319767853419, "learning_rate": 0.00019825926161086679, "loss": 12.7958, "step": 3221 }, { "epoch": 0.17545077699046374, "grad_norm": 0.734680231052794, "learning_rate": 0.00019825762303115506, "loss": 12.6942, "step": 3222 }, { "epoch": 0.17550523098704676, "grad_norm": 0.8336006775340267, "learning_rate": 0.00019825598368737558, "loss": 12.8308, "step": 3223 }, { "epoch": 0.17555968498362975, "grad_norm": 0.7262882153581018, "learning_rate": 0.00019825434357954112, "loss": 12.6866, "step": 3224 }, { "epoch": 0.17561413898021278, "grad_norm": 0.7392613877216176, "learning_rate": 0.0001982527027076644, "loss": 12.6137, "step": 3225 }, { "epoch": 0.1756685929767958, "grad_norm": 0.6975694053545508, "learning_rate": 0.0001982510610717582, "loss": 12.6543, "step": 3226 }, { "epoch": 0.17572304697337882, "grad_norm": 0.7534491680096834, "learning_rate": 0.00019824941867183524, "loss": 12.6165, "step": 3227 }, { "epoch": 0.1757775009699618, "grad_norm": 0.7788563975747334, "learning_rate": 0.00019824777550790835, "loss": 12.5636, "step": 3228 }, { "epoch": 0.17583195496654483, "grad_norm": 0.7506448121279704, "learning_rate": 0.00019824613157999027, "loss": 12.5972, "step": 3229 }, { "epoch": 0.17588640896312785, "grad_norm": 0.6771530701488644, "learning_rate": 0.0001982444868880938, "loss": 12.6503, "step": 3230 }, { "epoch": 0.17594086295971084, "grad_norm": 0.8799280336028142, "learning_rate": 0.0001982428414322317, "loss": 12.7639, "step": 3231 }, { "epoch": 0.17599531695629386, "grad_norm": 0.7902749117766416, "learning_rate": 0.0001982411952124168, "loss": 12.768, "step": 3232 }, { "epoch": 0.17604977095287688, "grad_norm": 0.6973865579783748, "learning_rate": 0.00019823954822866191, "loss": 12.5541, "step": 3233 }, { "epoch": 0.17610422494945988, "grad_norm": 0.6975249553608402, "learning_rate": 0.0001982379004809798, "loss": 12.6062, "step": 3234 }, { "epoch": 0.1761586789460429, "grad_norm": 0.7168173724680111, "learning_rate": 0.0001982362519693833, "loss": 12.7842, "step": 3235 }, { "epoch": 0.17621313294262592, "grad_norm": 0.7565153526890861, "learning_rate": 0.00019823460269388526, "loss": 12.7059, "step": 3236 }, { "epoch": 0.1762675869392089, "grad_norm": 0.8209036578975398, "learning_rate": 0.00019823295265449843, "loss": 12.6638, "step": 3237 }, { "epoch": 0.17632204093579193, "grad_norm": 0.7981299201729861, "learning_rate": 0.00019823130185123573, "loss": 12.7187, "step": 3238 }, { "epoch": 0.17637649493237495, "grad_norm": 0.7747231650804178, "learning_rate": 0.00019822965028410994, "loss": 12.6225, "step": 3239 }, { "epoch": 0.17643094892895794, "grad_norm": 0.7333857540750321, "learning_rate": 0.0001982279979531339, "loss": 12.6742, "step": 3240 }, { "epoch": 0.17648540292554096, "grad_norm": 0.8397253043859492, "learning_rate": 0.0001982263448583205, "loss": 12.7436, "step": 3241 }, { "epoch": 0.17653985692212398, "grad_norm": 0.7815864103858401, "learning_rate": 0.00019822469099968254, "loss": 12.6333, "step": 3242 }, { "epoch": 0.176594310918707, "grad_norm": 0.8209808262548153, "learning_rate": 0.0001982230363772329, "loss": 12.7196, "step": 3243 }, { "epoch": 0.17664876491529, "grad_norm": 0.7262043789621151, "learning_rate": 0.0001982213809909845, "loss": 12.6364, "step": 3244 }, { "epoch": 0.17670321891187302, "grad_norm": 0.8607138924882243, "learning_rate": 0.00019821972484095017, "loss": 12.7051, "step": 3245 }, { "epoch": 0.17675767290845604, "grad_norm": 0.8057809652765885, "learning_rate": 0.00019821806792714276, "loss": 12.5776, "step": 3246 }, { "epoch": 0.17681212690503903, "grad_norm": 0.7042559054435703, "learning_rate": 0.0001982164102495752, "loss": 12.4916, "step": 3247 }, { "epoch": 0.17686658090162205, "grad_norm": 1.057714095909052, "learning_rate": 0.00019821475180826034, "loss": 12.668, "step": 3248 }, { "epoch": 0.17692103489820507, "grad_norm": 0.7899993707377523, "learning_rate": 0.00019821309260321113, "loss": 12.5721, "step": 3249 }, { "epoch": 0.17697548889478806, "grad_norm": 0.8471101192669516, "learning_rate": 0.00019821143263444043, "loss": 12.5475, "step": 3250 }, { "epoch": 0.17702994289137108, "grad_norm": 0.6735795320454601, "learning_rate": 0.00019820977190196116, "loss": 12.5648, "step": 3251 }, { "epoch": 0.1770843968879541, "grad_norm": 0.9204359196176464, "learning_rate": 0.00019820811040578625, "loss": 12.9584, "step": 3252 }, { "epoch": 0.1771388508845371, "grad_norm": 0.9138396791378408, "learning_rate": 0.00019820644814592858, "loss": 12.6978, "step": 3253 }, { "epoch": 0.17719330488112012, "grad_norm": 0.7301330238149067, "learning_rate": 0.0001982047851224011, "loss": 12.545, "step": 3254 }, { "epoch": 0.17724775887770314, "grad_norm": 0.9013668175356127, "learning_rate": 0.00019820312133521674, "loss": 12.6496, "step": 3255 }, { "epoch": 0.17730221287428613, "grad_norm": 0.9341817253221993, "learning_rate": 0.00019820145678438845, "loss": 12.6747, "step": 3256 }, { "epoch": 0.17735666687086915, "grad_norm": 0.7502328588779547, "learning_rate": 0.00019819979146992915, "loss": 12.6536, "step": 3257 }, { "epoch": 0.17741112086745217, "grad_norm": 0.7681904747639378, "learning_rate": 0.0001981981253918518, "loss": 12.7236, "step": 3258 }, { "epoch": 0.17746557486403516, "grad_norm": 0.8920610418799703, "learning_rate": 0.0001981964585501694, "loss": 12.6539, "step": 3259 }, { "epoch": 0.17752002886061818, "grad_norm": 0.7628111397992703, "learning_rate": 0.00019819479094489482, "loss": 12.714, "step": 3260 }, { "epoch": 0.1775744828572012, "grad_norm": 0.7071311753164595, "learning_rate": 0.00019819312257604113, "loss": 12.6685, "step": 3261 }, { "epoch": 0.17762893685378423, "grad_norm": 0.8942352428065906, "learning_rate": 0.0001981914534436212, "loss": 12.7463, "step": 3262 }, { "epoch": 0.17768339085036722, "grad_norm": 0.8637475451289109, "learning_rate": 0.0001981897835476481, "loss": 12.5307, "step": 3263 }, { "epoch": 0.17773784484695024, "grad_norm": 0.9470761933823619, "learning_rate": 0.00019818811288813476, "loss": 12.7053, "step": 3264 }, { "epoch": 0.17779229884353326, "grad_norm": 0.7384704398906923, "learning_rate": 0.0001981864414650942, "loss": 12.6304, "step": 3265 }, { "epoch": 0.17784675284011625, "grad_norm": 0.7974334541792593, "learning_rate": 0.00019818476927853937, "loss": 12.7698, "step": 3266 }, { "epoch": 0.17790120683669927, "grad_norm": 0.728317539288044, "learning_rate": 0.00019818309632848333, "loss": 12.6083, "step": 3267 }, { "epoch": 0.1779556608332823, "grad_norm": 0.6856666037687628, "learning_rate": 0.00019818142261493908, "loss": 12.6041, "step": 3268 }, { "epoch": 0.17801011482986528, "grad_norm": 0.8080880119981516, "learning_rate": 0.00019817974813791963, "loss": 12.7661, "step": 3269 }, { "epoch": 0.1780645688264483, "grad_norm": 0.7107996562679639, "learning_rate": 0.00019817807289743798, "loss": 12.6347, "step": 3270 }, { "epoch": 0.17811902282303133, "grad_norm": 0.790679300367646, "learning_rate": 0.00019817639689350715, "loss": 12.6127, "step": 3271 }, { "epoch": 0.17817347681961432, "grad_norm": 0.7567804862897068, "learning_rate": 0.00019817472012614021, "loss": 12.7102, "step": 3272 }, { "epoch": 0.17822793081619734, "grad_norm": 0.8069231009329779, "learning_rate": 0.00019817304259535018, "loss": 12.7045, "step": 3273 }, { "epoch": 0.17828238481278036, "grad_norm": 0.7595982136123148, "learning_rate": 0.00019817136430115012, "loss": 12.6997, "step": 3274 }, { "epoch": 0.17833683880936335, "grad_norm": 0.7309321844207629, "learning_rate": 0.00019816968524355306, "loss": 12.6139, "step": 3275 }, { "epoch": 0.17839129280594637, "grad_norm": 0.7192650353233917, "learning_rate": 0.0001981680054225721, "loss": 12.6906, "step": 3276 }, { "epoch": 0.1784457468025294, "grad_norm": 0.7683459462222892, "learning_rate": 0.0001981663248382202, "loss": 12.758, "step": 3277 }, { "epoch": 0.1785002007991124, "grad_norm": 0.7604309449895051, "learning_rate": 0.00019816464349051055, "loss": 12.708, "step": 3278 }, { "epoch": 0.1785546547956954, "grad_norm": 0.6915703638272156, "learning_rate": 0.00019816296137945612, "loss": 12.638, "step": 3279 }, { "epoch": 0.17860910879227843, "grad_norm": 1.1253518154266444, "learning_rate": 0.00019816127850507008, "loss": 12.7063, "step": 3280 }, { "epoch": 0.17866356278886145, "grad_norm": 0.7630225540272567, "learning_rate": 0.00019815959486736543, "loss": 12.627, "step": 3281 }, { "epoch": 0.17871801678544444, "grad_norm": 0.9935145198532191, "learning_rate": 0.00019815791046635538, "loss": 12.7206, "step": 3282 }, { "epoch": 0.17877247078202746, "grad_norm": 0.736334650149649, "learning_rate": 0.0001981562253020529, "loss": 12.8125, "step": 3283 }, { "epoch": 0.17882692477861048, "grad_norm": 0.7808149186506571, "learning_rate": 0.00019815453937447117, "loss": 12.6405, "step": 3284 }, { "epoch": 0.17888137877519347, "grad_norm": 0.8210946819500441, "learning_rate": 0.0001981528526836233, "loss": 12.6571, "step": 3285 }, { "epoch": 0.1789358327717765, "grad_norm": 0.86965004642108, "learning_rate": 0.00019815116522952235, "loss": 12.7401, "step": 3286 }, { "epoch": 0.1789902867683595, "grad_norm": 0.7868117017342933, "learning_rate": 0.0001981494770121815, "loss": 12.5413, "step": 3287 }, { "epoch": 0.1790447407649425, "grad_norm": 0.7415827071978902, "learning_rate": 0.00019814778803161387, "loss": 12.6923, "step": 3288 }, { "epoch": 0.17909919476152553, "grad_norm": 0.6851120650197461, "learning_rate": 0.00019814609828783258, "loss": 12.56, "step": 3289 }, { "epoch": 0.17915364875810855, "grad_norm": 0.8225961915115911, "learning_rate": 0.00019814440778085076, "loss": 12.6163, "step": 3290 }, { "epoch": 0.17920810275469154, "grad_norm": 0.7409751466179318, "learning_rate": 0.00019814271651068154, "loss": 12.6108, "step": 3291 }, { "epoch": 0.17926255675127456, "grad_norm": 0.7492258593142511, "learning_rate": 0.0001981410244773381, "loss": 12.5378, "step": 3292 }, { "epoch": 0.17931701074785758, "grad_norm": 0.7253161388755572, "learning_rate": 0.00019813933168083363, "loss": 12.7377, "step": 3293 }, { "epoch": 0.1793714647444406, "grad_norm": 0.8344633552358383, "learning_rate": 0.00019813763812118125, "loss": 12.7151, "step": 3294 }, { "epoch": 0.1794259187410236, "grad_norm": 0.7765629199089185, "learning_rate": 0.00019813594379839415, "loss": 12.5645, "step": 3295 }, { "epoch": 0.1794803727376066, "grad_norm": 0.8082931219937859, "learning_rate": 0.00019813424871248546, "loss": 12.7144, "step": 3296 }, { "epoch": 0.17953482673418963, "grad_norm": 0.7589009678676194, "learning_rate": 0.00019813255286346842, "loss": 12.7288, "step": 3297 }, { "epoch": 0.17958928073077263, "grad_norm": 0.790735241959672, "learning_rate": 0.0001981308562513562, "loss": 12.6141, "step": 3298 }, { "epoch": 0.17964373472735565, "grad_norm": 0.7073433938535153, "learning_rate": 0.00019812915887616194, "loss": 12.5315, "step": 3299 }, { "epoch": 0.17969818872393867, "grad_norm": 0.7446488567697064, "learning_rate": 0.00019812746073789893, "loss": 12.6907, "step": 3300 }, { "epoch": 0.17975264272052166, "grad_norm": 0.7491534452924706, "learning_rate": 0.0001981257618365803, "loss": 12.6864, "step": 3301 }, { "epoch": 0.17980709671710468, "grad_norm": 0.821475530739584, "learning_rate": 0.0001981240621722193, "loss": 12.6571, "step": 3302 }, { "epoch": 0.1798615507136877, "grad_norm": 0.8260413049475901, "learning_rate": 0.00019812236174482917, "loss": 12.7932, "step": 3303 }, { "epoch": 0.1799160047102707, "grad_norm": 0.8552027146811687, "learning_rate": 0.00019812066055442307, "loss": 12.7166, "step": 3304 }, { "epoch": 0.17997045870685371, "grad_norm": 0.691431305896408, "learning_rate": 0.00019811895860101424, "loss": 12.7565, "step": 3305 }, { "epoch": 0.18002491270343673, "grad_norm": 0.7541206039195266, "learning_rate": 0.00019811725588461595, "loss": 12.7832, "step": 3306 }, { "epoch": 0.18007936670001973, "grad_norm": 0.7329115364844615, "learning_rate": 0.00019811555240524143, "loss": 12.6535, "step": 3307 }, { "epoch": 0.18013382069660275, "grad_norm": 0.7848715954841441, "learning_rate": 0.00019811384816290393, "loss": 12.7121, "step": 3308 }, { "epoch": 0.18018827469318577, "grad_norm": 0.7401571327497739, "learning_rate": 0.0001981121431576167, "loss": 12.6728, "step": 3309 }, { "epoch": 0.1802427286897688, "grad_norm": 0.7385315373003121, "learning_rate": 0.00019811043738939295, "loss": 12.6642, "step": 3310 }, { "epoch": 0.18029718268635178, "grad_norm": 0.795431374670674, "learning_rate": 0.00019810873085824603, "loss": 12.5564, "step": 3311 }, { "epoch": 0.1803516366829348, "grad_norm": 0.7905420016747329, "learning_rate": 0.00019810702356418914, "loss": 12.7727, "step": 3312 }, { "epoch": 0.18040609067951782, "grad_norm": 0.856463722382322, "learning_rate": 0.00019810531550723556, "loss": 12.7771, "step": 3313 }, { "epoch": 0.18046054467610081, "grad_norm": 0.663682842099032, "learning_rate": 0.00019810360668739863, "loss": 12.5482, "step": 3314 }, { "epoch": 0.18051499867268384, "grad_norm": 0.7304808709709537, "learning_rate": 0.0001981018971046916, "loss": 12.6683, "step": 3315 }, { "epoch": 0.18056945266926686, "grad_norm": 0.8716973827013581, "learning_rate": 0.00019810018675912774, "loss": 12.6943, "step": 3316 }, { "epoch": 0.18062390666584985, "grad_norm": 0.8052038652161817, "learning_rate": 0.0001980984756507204, "loss": 12.7423, "step": 3317 }, { "epoch": 0.18067836066243287, "grad_norm": 0.6865640501576407, "learning_rate": 0.00019809676377948287, "loss": 12.5838, "step": 3318 }, { "epoch": 0.1807328146590159, "grad_norm": 0.7787199776120453, "learning_rate": 0.00019809505114542843, "loss": 12.7606, "step": 3319 }, { "epoch": 0.18078726865559888, "grad_norm": 0.6657871497753294, "learning_rate": 0.00019809333774857045, "loss": 12.619, "step": 3320 }, { "epoch": 0.1808417226521819, "grad_norm": 0.7472678299687542, "learning_rate": 0.00019809162358892218, "loss": 12.778, "step": 3321 }, { "epoch": 0.18089617664876492, "grad_norm": 0.7521286002919139, "learning_rate": 0.00019808990866649703, "loss": 12.5975, "step": 3322 }, { "epoch": 0.18095063064534792, "grad_norm": 0.7485712192091213, "learning_rate": 0.0001980881929813083, "loss": 12.5831, "step": 3323 }, { "epoch": 0.18100508464193094, "grad_norm": 0.7729968986430557, "learning_rate": 0.00019808647653336934, "loss": 12.6564, "step": 3324 }, { "epoch": 0.18105953863851396, "grad_norm": 0.7335308303636128, "learning_rate": 0.00019808475932269346, "loss": 12.628, "step": 3325 }, { "epoch": 0.18111399263509695, "grad_norm": 0.8883852418423221, "learning_rate": 0.00019808304134929405, "loss": 12.7038, "step": 3326 }, { "epoch": 0.18116844663167997, "grad_norm": 0.8086481974347323, "learning_rate": 0.00019808132261318447, "loss": 12.5788, "step": 3327 }, { "epoch": 0.181222900628263, "grad_norm": 0.7483757045666115, "learning_rate": 0.00019807960311437809, "loss": 12.7052, "step": 3328 }, { "epoch": 0.181277354624846, "grad_norm": 0.7724274921487599, "learning_rate": 0.00019807788285288824, "loss": 12.6693, "step": 3329 }, { "epoch": 0.181331808621429, "grad_norm": 0.741722193902972, "learning_rate": 0.00019807616182872833, "loss": 12.5813, "step": 3330 }, { "epoch": 0.18138626261801202, "grad_norm": 0.7272278492201305, "learning_rate": 0.00019807444004191175, "loss": 12.5707, "step": 3331 }, { "epoch": 0.18144071661459504, "grad_norm": 0.7854612846382387, "learning_rate": 0.00019807271749245188, "loss": 12.8055, "step": 3332 }, { "epoch": 0.18149517061117804, "grad_norm": 0.823783487606722, "learning_rate": 0.00019807099418036212, "loss": 12.6623, "step": 3333 }, { "epoch": 0.18154962460776106, "grad_norm": 0.7936635697631835, "learning_rate": 0.00019806927010565585, "loss": 12.5691, "step": 3334 }, { "epoch": 0.18160407860434408, "grad_norm": 0.7383972750382136, "learning_rate": 0.0001980675452683465, "loss": 12.4969, "step": 3335 }, { "epoch": 0.18165853260092707, "grad_norm": 0.8732299978656494, "learning_rate": 0.00019806581966844746, "loss": 12.6761, "step": 3336 }, { "epoch": 0.1817129865975101, "grad_norm": 0.8064179318291677, "learning_rate": 0.00019806409330597218, "loss": 12.7272, "step": 3337 }, { "epoch": 0.1817674405940931, "grad_norm": 0.7422343085071372, "learning_rate": 0.00019806236618093403, "loss": 12.6766, "step": 3338 }, { "epoch": 0.1818218945906761, "grad_norm": 0.8175942277868645, "learning_rate": 0.0001980606382933465, "loss": 12.6871, "step": 3339 }, { "epoch": 0.18187634858725912, "grad_norm": 0.7404435885300982, "learning_rate": 0.000198058909643223, "loss": 12.5923, "step": 3340 }, { "epoch": 0.18193080258384214, "grad_norm": 0.8017199061331057, "learning_rate": 0.00019805718023057695, "loss": 12.6781, "step": 3341 }, { "epoch": 0.18198525658042514, "grad_norm": 0.6885014995804388, "learning_rate": 0.00019805545005542184, "loss": 12.7684, "step": 3342 }, { "epoch": 0.18203971057700816, "grad_norm": 0.7907680166328611, "learning_rate": 0.00019805371911777112, "loss": 12.8546, "step": 3343 }, { "epoch": 0.18209416457359118, "grad_norm": 0.7060355670276862, "learning_rate": 0.00019805198741763822, "loss": 12.4949, "step": 3344 }, { "epoch": 0.1821486185701742, "grad_norm": 0.9516032000572783, "learning_rate": 0.00019805025495503662, "loss": 12.7103, "step": 3345 }, { "epoch": 0.1822030725667572, "grad_norm": 0.7449253439223767, "learning_rate": 0.00019804852172997981, "loss": 12.5891, "step": 3346 }, { "epoch": 0.1822575265633402, "grad_norm": 0.729946216620088, "learning_rate": 0.00019804678774248125, "loss": 12.8393, "step": 3347 }, { "epoch": 0.18231198055992323, "grad_norm": 0.9178332662915702, "learning_rate": 0.00019804505299255443, "loss": 12.6575, "step": 3348 }, { "epoch": 0.18236643455650622, "grad_norm": 0.7749283115288399, "learning_rate": 0.00019804331748021282, "loss": 12.6209, "step": 3349 }, { "epoch": 0.18242088855308924, "grad_norm": 0.9139733963471919, "learning_rate": 0.00019804158120546994, "loss": 12.7139, "step": 3350 }, { "epoch": 0.18247534254967226, "grad_norm": 0.7763906942557518, "learning_rate": 0.00019803984416833927, "loss": 12.5467, "step": 3351 }, { "epoch": 0.18252979654625526, "grad_norm": 0.7252198432139424, "learning_rate": 0.00019803810636883436, "loss": 12.6515, "step": 3352 }, { "epoch": 0.18258425054283828, "grad_norm": 0.9978346602920308, "learning_rate": 0.00019803636780696865, "loss": 12.6464, "step": 3353 }, { "epoch": 0.1826387045394213, "grad_norm": 0.7093383174056861, "learning_rate": 0.00019803462848275573, "loss": 12.7392, "step": 3354 }, { "epoch": 0.1826931585360043, "grad_norm": 0.9341940050524234, "learning_rate": 0.00019803288839620911, "loss": 12.7433, "step": 3355 }, { "epoch": 0.1827476125325873, "grad_norm": 0.7787445813299527, "learning_rate": 0.00019803114754734228, "loss": 12.6275, "step": 3356 }, { "epoch": 0.18280206652917033, "grad_norm": 0.7238973947276285, "learning_rate": 0.00019802940593616883, "loss": 12.5868, "step": 3357 }, { "epoch": 0.18285652052575332, "grad_norm": 0.8909256720217067, "learning_rate": 0.00019802766356270227, "loss": 12.6952, "step": 3358 }, { "epoch": 0.18291097452233634, "grad_norm": 0.9060266000302456, "learning_rate": 0.00019802592042695614, "loss": 12.6288, "step": 3359 }, { "epoch": 0.18296542851891937, "grad_norm": 0.8317096102878461, "learning_rate": 0.00019802417652894406, "loss": 12.8364, "step": 3360 }, { "epoch": 0.18301988251550239, "grad_norm": 0.7046624142474951, "learning_rate": 0.00019802243186867953, "loss": 12.6454, "step": 3361 }, { "epoch": 0.18307433651208538, "grad_norm": 0.7724845162821498, "learning_rate": 0.0001980206864461761, "loss": 12.5423, "step": 3362 }, { "epoch": 0.1831287905086684, "grad_norm": 0.7007103141816504, "learning_rate": 0.0001980189402614474, "loss": 12.5998, "step": 3363 }, { "epoch": 0.18318324450525142, "grad_norm": 0.7136727791052012, "learning_rate": 0.000198017193314507, "loss": 12.5667, "step": 3364 }, { "epoch": 0.1832376985018344, "grad_norm": 0.7296237009613211, "learning_rate": 0.00019801544560536845, "loss": 12.6287, "step": 3365 }, { "epoch": 0.18329215249841743, "grad_norm": 0.8388171730170142, "learning_rate": 0.00019801369713404535, "loss": 12.6726, "step": 3366 }, { "epoch": 0.18334660649500045, "grad_norm": 0.7969860000049219, "learning_rate": 0.0001980119479005513, "loss": 12.6525, "step": 3367 }, { "epoch": 0.18340106049158345, "grad_norm": 0.8607464188262216, "learning_rate": 0.00019801019790489992, "loss": 12.8035, "step": 3368 }, { "epoch": 0.18345551448816647, "grad_norm": 0.8417130114551891, "learning_rate": 0.00019800844714710478, "loss": 12.6438, "step": 3369 }, { "epoch": 0.18350996848474949, "grad_norm": 0.7454307447608005, "learning_rate": 0.00019800669562717956, "loss": 12.6587, "step": 3370 }, { "epoch": 0.18356442248133248, "grad_norm": 0.7564182643950726, "learning_rate": 0.00019800494334513781, "loss": 12.7091, "step": 3371 }, { "epoch": 0.1836188764779155, "grad_norm": 0.751046314032216, "learning_rate": 0.00019800319030099318, "loss": 12.5357, "step": 3372 }, { "epoch": 0.18367333047449852, "grad_norm": 0.6986462694900774, "learning_rate": 0.00019800143649475934, "loss": 12.6685, "step": 3373 }, { "epoch": 0.1837277844710815, "grad_norm": 0.7858772949677835, "learning_rate": 0.00019799968192644985, "loss": 12.7326, "step": 3374 }, { "epoch": 0.18378223846766453, "grad_norm": 0.6825472324506604, "learning_rate": 0.00019799792659607842, "loss": 12.6151, "step": 3375 }, { "epoch": 0.18383669246424755, "grad_norm": 0.7411115867200715, "learning_rate": 0.0001979961705036587, "loss": 12.5658, "step": 3376 }, { "epoch": 0.18389114646083055, "grad_norm": 0.8472704769040416, "learning_rate": 0.0001979944136492043, "loss": 12.7508, "step": 3377 }, { "epoch": 0.18394560045741357, "grad_norm": 0.756717561964962, "learning_rate": 0.0001979926560327289, "loss": 12.6634, "step": 3378 }, { "epoch": 0.1840000544539966, "grad_norm": 0.7606214364101355, "learning_rate": 0.00019799089765424618, "loss": 12.5667, "step": 3379 }, { "epoch": 0.1840545084505796, "grad_norm": 0.7270193995130935, "learning_rate": 0.00019798913851376982, "loss": 12.6351, "step": 3380 }, { "epoch": 0.1841089624471626, "grad_norm": 0.6805493559802995, "learning_rate": 0.00019798737861131347, "loss": 12.6746, "step": 3381 }, { "epoch": 0.18416341644374562, "grad_norm": 0.7056558366013929, "learning_rate": 0.00019798561794689086, "loss": 12.6943, "step": 3382 }, { "epoch": 0.18421787044032864, "grad_norm": 0.7657220496359567, "learning_rate": 0.0001979838565205156, "loss": 12.6128, "step": 3383 }, { "epoch": 0.18427232443691163, "grad_norm": 0.9039108815858633, "learning_rate": 0.0001979820943322015, "loss": 12.6562, "step": 3384 }, { "epoch": 0.18432677843349465, "grad_norm": 0.86946493008817, "learning_rate": 0.00019798033138196218, "loss": 12.5324, "step": 3385 }, { "epoch": 0.18438123243007767, "grad_norm": 0.7912167800969766, "learning_rate": 0.00019797856766981135, "loss": 12.6925, "step": 3386 }, { "epoch": 0.18443568642666067, "grad_norm": 0.7999489254168872, "learning_rate": 0.00019797680319576276, "loss": 12.6894, "step": 3387 }, { "epoch": 0.1844901404232437, "grad_norm": 0.7324214346710298, "learning_rate": 0.00019797503795983016, "loss": 12.6803, "step": 3388 }, { "epoch": 0.1845445944198267, "grad_norm": 0.756609872779989, "learning_rate": 0.00019797327196202717, "loss": 12.6434, "step": 3389 }, { "epoch": 0.1845990484164097, "grad_norm": 0.7015324007008875, "learning_rate": 0.00019797150520236762, "loss": 12.6552, "step": 3390 }, { "epoch": 0.18465350241299272, "grad_norm": 0.7122255001024863, "learning_rate": 0.0001979697376808652, "loss": 12.5259, "step": 3391 }, { "epoch": 0.18470795640957574, "grad_norm": 0.9522221558042216, "learning_rate": 0.0001979679693975337, "loss": 12.673, "step": 3392 }, { "epoch": 0.18476241040615873, "grad_norm": 0.7110517403519583, "learning_rate": 0.00019796620035238678, "loss": 12.6476, "step": 3393 }, { "epoch": 0.18481686440274175, "grad_norm": 0.7595909509678056, "learning_rate": 0.00019796443054543832, "loss": 12.6126, "step": 3394 }, { "epoch": 0.18487131839932477, "grad_norm": 0.687617268889062, "learning_rate": 0.000197962659976702, "loss": 12.5402, "step": 3395 }, { "epoch": 0.1849257723959078, "grad_norm": 0.7039148224733114, "learning_rate": 0.00019796088864619158, "loss": 12.7266, "step": 3396 }, { "epoch": 0.1849802263924908, "grad_norm": 0.7788472692865894, "learning_rate": 0.00019795911655392089, "loss": 12.5487, "step": 3397 }, { "epoch": 0.1850346803890738, "grad_norm": 0.7193934435877664, "learning_rate": 0.00019795734369990368, "loss": 12.6761, "step": 3398 }, { "epoch": 0.18508913438565683, "grad_norm": 0.7146683469628885, "learning_rate": 0.00019795557008415372, "loss": 12.4951, "step": 3399 }, { "epoch": 0.18514358838223982, "grad_norm": 0.781946581429394, "learning_rate": 0.00019795379570668487, "loss": 12.5245, "step": 3400 }, { "epoch": 0.18519804237882284, "grad_norm": 0.6925827278733167, "learning_rate": 0.00019795202056751082, "loss": 12.6528, "step": 3401 }, { "epoch": 0.18525249637540586, "grad_norm": 0.7960254736251621, "learning_rate": 0.00019795024466664546, "loss": 12.8152, "step": 3402 }, { "epoch": 0.18530695037198885, "grad_norm": 0.7310673158847482, "learning_rate": 0.00019794846800410255, "loss": 12.6397, "step": 3403 }, { "epoch": 0.18536140436857187, "grad_norm": 0.73383372208105, "learning_rate": 0.00019794669057989595, "loss": 12.5688, "step": 3404 }, { "epoch": 0.1854158583651549, "grad_norm": 0.836279865981678, "learning_rate": 0.00019794491239403944, "loss": 12.5942, "step": 3405 }, { "epoch": 0.1854703123617379, "grad_norm": 0.7712799032342063, "learning_rate": 0.0001979431334465469, "loss": 12.6425, "step": 3406 }, { "epoch": 0.1855247663583209, "grad_norm": 0.6909617201882076, "learning_rate": 0.0001979413537374321, "loss": 12.588, "step": 3407 }, { "epoch": 0.18557922035490393, "grad_norm": 0.7048457518660487, "learning_rate": 0.0001979395732667089, "loss": 12.6108, "step": 3408 }, { "epoch": 0.18563367435148692, "grad_norm": 0.7466368679226278, "learning_rate": 0.00019793779203439117, "loss": 12.6437, "step": 3409 }, { "epoch": 0.18568812834806994, "grad_norm": 0.7581910439112242, "learning_rate": 0.00019793601004049273, "loss": 12.6338, "step": 3410 }, { "epoch": 0.18574258234465296, "grad_norm": 0.6889479667097653, "learning_rate": 0.00019793422728502747, "loss": 12.6501, "step": 3411 }, { "epoch": 0.18579703634123598, "grad_norm": 0.7053095188967593, "learning_rate": 0.00019793244376800924, "loss": 12.6259, "step": 3412 }, { "epoch": 0.18585149033781898, "grad_norm": 0.9028178062423818, "learning_rate": 0.0001979306594894519, "loss": 12.6088, "step": 3413 }, { "epoch": 0.185905944334402, "grad_norm": 0.7099854745460383, "learning_rate": 0.00019792887444936933, "loss": 12.6394, "step": 3414 }, { "epoch": 0.18596039833098502, "grad_norm": 0.6928213414862964, "learning_rate": 0.0001979270886477754, "loss": 12.4603, "step": 3415 }, { "epoch": 0.186014852327568, "grad_norm": 0.6587384490980607, "learning_rate": 0.000197925302084684, "loss": 12.6143, "step": 3416 }, { "epoch": 0.18606930632415103, "grad_norm": 0.7064330914174174, "learning_rate": 0.00019792351476010905, "loss": 12.5708, "step": 3417 }, { "epoch": 0.18612376032073405, "grad_norm": 0.7396116906610933, "learning_rate": 0.00019792172667406442, "loss": 12.6673, "step": 3418 }, { "epoch": 0.18617821431731704, "grad_norm": 0.7228600413696578, "learning_rate": 0.000197919937826564, "loss": 12.5551, "step": 3419 }, { "epoch": 0.18623266831390006, "grad_norm": 0.8579167739390514, "learning_rate": 0.00019791814821762174, "loss": 12.6024, "step": 3420 }, { "epoch": 0.18628712231048308, "grad_norm": 0.8120542124616528, "learning_rate": 0.00019791635784725155, "loss": 12.8604, "step": 3421 }, { "epoch": 0.18634157630706608, "grad_norm": 0.7673698957399567, "learning_rate": 0.00019791456671546732, "loss": 12.6082, "step": 3422 }, { "epoch": 0.1863960303036491, "grad_norm": 0.785400566174054, "learning_rate": 0.000197912774822283, "loss": 12.6288, "step": 3423 }, { "epoch": 0.18645048430023212, "grad_norm": 0.7907865406949788, "learning_rate": 0.00019791098216771254, "loss": 12.6943, "step": 3424 }, { "epoch": 0.1865049382968151, "grad_norm": 0.8620753392475704, "learning_rate": 0.00019790918875176985, "loss": 12.8335, "step": 3425 }, { "epoch": 0.18655939229339813, "grad_norm": 0.7712543156805853, "learning_rate": 0.0001979073945744689, "loss": 12.6657, "step": 3426 }, { "epoch": 0.18661384628998115, "grad_norm": 0.8981380922682508, "learning_rate": 0.00019790559963582362, "loss": 12.7522, "step": 3427 }, { "epoch": 0.18666830028656417, "grad_norm": 0.7264599236766378, "learning_rate": 0.00019790380393584797, "loss": 12.6306, "step": 3428 }, { "epoch": 0.18672275428314716, "grad_norm": 0.7341988030503047, "learning_rate": 0.00019790200747455593, "loss": 12.6287, "step": 3429 }, { "epoch": 0.18677720827973018, "grad_norm": 0.6866438472063808, "learning_rate": 0.00019790021025196147, "loss": 12.6171, "step": 3430 }, { "epoch": 0.1868316622763132, "grad_norm": 0.8609424467053118, "learning_rate": 0.00019789841226807857, "loss": 12.6022, "step": 3431 }, { "epoch": 0.1868861162728962, "grad_norm": 0.735664009505058, "learning_rate": 0.00019789661352292116, "loss": 12.7115, "step": 3432 }, { "epoch": 0.18694057026947922, "grad_norm": 0.8061194136233919, "learning_rate": 0.0001978948140165033, "loss": 12.6571, "step": 3433 }, { "epoch": 0.18699502426606224, "grad_norm": 0.7001891909756394, "learning_rate": 0.00019789301374883894, "loss": 12.6699, "step": 3434 }, { "epoch": 0.18704947826264523, "grad_norm": 0.6471394560553497, "learning_rate": 0.00019789121271994206, "loss": 12.6167, "step": 3435 }, { "epoch": 0.18710393225922825, "grad_norm": 0.8114243930971807, "learning_rate": 0.00019788941092982674, "loss": 12.5519, "step": 3436 }, { "epoch": 0.18715838625581127, "grad_norm": 0.7556975256688439, "learning_rate": 0.00019788760837850694, "loss": 12.6453, "step": 3437 }, { "epoch": 0.18721284025239426, "grad_norm": 0.8111179653669358, "learning_rate": 0.00019788580506599664, "loss": 12.7268, "step": 3438 }, { "epoch": 0.18726729424897728, "grad_norm": 0.7792236041663045, "learning_rate": 0.00019788400099230992, "loss": 12.471, "step": 3439 }, { "epoch": 0.1873217482455603, "grad_norm": 0.7636202013337339, "learning_rate": 0.00019788219615746083, "loss": 12.7225, "step": 3440 }, { "epoch": 0.1873762022421433, "grad_norm": 0.7703390768900522, "learning_rate": 0.00019788039056146332, "loss": 12.77, "step": 3441 }, { "epoch": 0.18743065623872632, "grad_norm": 0.6989497989715391, "learning_rate": 0.0001978785842043315, "loss": 12.572, "step": 3442 }, { "epoch": 0.18748511023530934, "grad_norm": 0.7531763129520263, "learning_rate": 0.0001978767770860794, "loss": 12.6281, "step": 3443 }, { "epoch": 0.18753956423189233, "grad_norm": 0.7656064978206467, "learning_rate": 0.00019787496920672107, "loss": 12.683, "step": 3444 }, { "epoch": 0.18759401822847535, "grad_norm": 0.6959064866014217, "learning_rate": 0.00019787316056627053, "loss": 12.6926, "step": 3445 }, { "epoch": 0.18764847222505837, "grad_norm": 0.7310517195944778, "learning_rate": 0.00019787135116474191, "loss": 12.6944, "step": 3446 }, { "epoch": 0.1877029262216414, "grad_norm": 0.7050888293219245, "learning_rate": 0.00019786954100214926, "loss": 12.6899, "step": 3447 }, { "epoch": 0.18775738021822438, "grad_norm": 0.7331840748186392, "learning_rate": 0.00019786773007850664, "loss": 12.7043, "step": 3448 }, { "epoch": 0.1878118342148074, "grad_norm": 0.8760959354346283, "learning_rate": 0.0001978659183938281, "loss": 12.5749, "step": 3449 }, { "epoch": 0.18786628821139043, "grad_norm": 0.6531939284147806, "learning_rate": 0.00019786410594812784, "loss": 12.533, "step": 3450 }, { "epoch": 0.18792074220797342, "grad_norm": 0.7905262008052769, "learning_rate": 0.00019786229274141982, "loss": 12.6418, "step": 3451 }, { "epoch": 0.18797519620455644, "grad_norm": 0.7837407214029306, "learning_rate": 0.00019786047877371821, "loss": 12.4968, "step": 3452 }, { "epoch": 0.18802965020113946, "grad_norm": 0.6829892747844226, "learning_rate": 0.0001978586640450371, "loss": 12.5865, "step": 3453 }, { "epoch": 0.18808410419772245, "grad_norm": 0.7905919519547869, "learning_rate": 0.00019785684855539066, "loss": 12.7048, "step": 3454 }, { "epoch": 0.18813855819430547, "grad_norm": 0.7202467701468647, "learning_rate": 0.0001978550323047929, "loss": 12.6159, "step": 3455 }, { "epoch": 0.1881930121908885, "grad_norm": 0.7070151712650198, "learning_rate": 0.00019785321529325803, "loss": 12.654, "step": 3456 }, { "epoch": 0.18824746618747148, "grad_norm": 0.7513944704198623, "learning_rate": 0.0001978513975208001, "loss": 12.6776, "step": 3457 }, { "epoch": 0.1883019201840545, "grad_norm": 0.7102718478964493, "learning_rate": 0.00019784957898743335, "loss": 12.4791, "step": 3458 }, { "epoch": 0.18835637418063753, "grad_norm": 0.6849626063754578, "learning_rate": 0.00019784775969317183, "loss": 12.6471, "step": 3459 }, { "epoch": 0.18841082817722052, "grad_norm": 0.7217393791466998, "learning_rate": 0.00019784593963802975, "loss": 12.6078, "step": 3460 }, { "epoch": 0.18846528217380354, "grad_norm": 0.9547534304507546, "learning_rate": 0.0001978441188220212, "loss": 12.6762, "step": 3461 }, { "epoch": 0.18851973617038656, "grad_norm": 0.7400562576011662, "learning_rate": 0.0001978422972451604, "loss": 12.5686, "step": 3462 }, { "epoch": 0.18857419016696958, "grad_norm": 0.7819096937512829, "learning_rate": 0.00019784047490746146, "loss": 12.7465, "step": 3463 }, { "epoch": 0.18862864416355257, "grad_norm": 0.6978935409395687, "learning_rate": 0.00019783865180893862, "loss": 12.6414, "step": 3464 }, { "epoch": 0.1886830981601356, "grad_norm": 0.745264747694646, "learning_rate": 0.000197836827949606, "loss": 12.6252, "step": 3465 }, { "epoch": 0.1887375521567186, "grad_norm": 0.7120841832106395, "learning_rate": 0.0001978350033294778, "loss": 12.5873, "step": 3466 }, { "epoch": 0.1887920061533016, "grad_norm": 0.7915147314262276, "learning_rate": 0.00019783317794856817, "loss": 12.4969, "step": 3467 }, { "epoch": 0.18884646014988463, "grad_norm": 0.7766806085597008, "learning_rate": 0.00019783135180689138, "loss": 12.5835, "step": 3468 }, { "epoch": 0.18890091414646765, "grad_norm": 0.8489177098808214, "learning_rate": 0.0001978295249044616, "loss": 12.7021, "step": 3469 }, { "epoch": 0.18895536814305064, "grad_norm": 0.7755385665913826, "learning_rate": 0.000197827697241293, "loss": 12.656, "step": 3470 }, { "epoch": 0.18900982213963366, "grad_norm": 0.7276129651295314, "learning_rate": 0.00019782586881739983, "loss": 12.6165, "step": 3471 }, { "epoch": 0.18906427613621668, "grad_norm": 0.7669102297638349, "learning_rate": 0.0001978240396327963, "loss": 12.6595, "step": 3472 }, { "epoch": 0.18911873013279967, "grad_norm": 0.7420823944772779, "learning_rate": 0.00019782220968749665, "loss": 12.5876, "step": 3473 }, { "epoch": 0.1891731841293827, "grad_norm": 0.7468557020621868, "learning_rate": 0.0001978203789815151, "loss": 12.6916, "step": 3474 }, { "epoch": 0.1892276381259657, "grad_norm": 0.7106718007187686, "learning_rate": 0.00019781854751486582, "loss": 12.5696, "step": 3475 }, { "epoch": 0.1892820921225487, "grad_norm": 0.7332562200016313, "learning_rate": 0.00019781671528756314, "loss": 12.7741, "step": 3476 }, { "epoch": 0.18933654611913173, "grad_norm": 0.7833748917159779, "learning_rate": 0.00019781488229962132, "loss": 12.689, "step": 3477 }, { "epoch": 0.18939100011571475, "grad_norm": 0.7741583004705924, "learning_rate": 0.0001978130485510545, "loss": 12.7469, "step": 3478 }, { "epoch": 0.18944545411229777, "grad_norm": 0.7177523289604667, "learning_rate": 0.00019781121404187707, "loss": 12.4962, "step": 3479 }, { "epoch": 0.18949990810888076, "grad_norm": 0.809544019401128, "learning_rate": 0.0001978093787721032, "loss": 12.7359, "step": 3480 }, { "epoch": 0.18955436210546378, "grad_norm": 0.8553144004000792, "learning_rate": 0.00019780754274174723, "loss": 12.818, "step": 3481 }, { "epoch": 0.1896088161020468, "grad_norm": 0.7081591841393877, "learning_rate": 0.00019780570595082336, "loss": 12.5759, "step": 3482 }, { "epoch": 0.1896632700986298, "grad_norm": 0.8695116509656418, "learning_rate": 0.00019780386839934595, "loss": 12.6974, "step": 3483 }, { "epoch": 0.18971772409521281, "grad_norm": 0.8223589842834956, "learning_rate": 0.00019780203008732924, "loss": 12.6353, "step": 3484 }, { "epoch": 0.18977217809179583, "grad_norm": 0.7446285543581759, "learning_rate": 0.00019780019101478758, "loss": 12.6826, "step": 3485 }, { "epoch": 0.18982663208837883, "grad_norm": 0.6896485167003834, "learning_rate": 0.00019779835118173523, "loss": 12.4591, "step": 3486 }, { "epoch": 0.18988108608496185, "grad_norm": 0.7866481478000416, "learning_rate": 0.00019779651058818645, "loss": 12.7327, "step": 3487 }, { "epoch": 0.18993554008154487, "grad_norm": 0.7697027786000047, "learning_rate": 0.00019779466923415564, "loss": 12.5679, "step": 3488 }, { "epoch": 0.18998999407812786, "grad_norm": 0.779594290786733, "learning_rate": 0.00019779282711965705, "loss": 12.643, "step": 3489 }, { "epoch": 0.19004444807471088, "grad_norm": 0.8184831223384603, "learning_rate": 0.00019779098424470507, "loss": 12.6132, "step": 3490 }, { "epoch": 0.1900989020712939, "grad_norm": 0.7787519451121007, "learning_rate": 0.000197789140609314, "loss": 12.7536, "step": 3491 }, { "epoch": 0.1901533560678769, "grad_norm": 0.7319492566811413, "learning_rate": 0.00019778729621349817, "loss": 12.6267, "step": 3492 }, { "epoch": 0.19020781006445991, "grad_norm": 0.9438662989100811, "learning_rate": 0.0001977854510572719, "loss": 12.7885, "step": 3493 }, { "epoch": 0.19026226406104293, "grad_norm": 0.7472861900488181, "learning_rate": 0.0001977836051406496, "loss": 12.5697, "step": 3494 }, { "epoch": 0.19031671805762596, "grad_norm": 0.7530749349726638, "learning_rate": 0.00019778175846364558, "loss": 12.7655, "step": 3495 }, { "epoch": 0.19037117205420895, "grad_norm": 0.717289086680717, "learning_rate": 0.00019777991102627417, "loss": 12.7446, "step": 3496 }, { "epoch": 0.19042562605079197, "grad_norm": 0.7927481726756973, "learning_rate": 0.0001977780628285498, "loss": 12.6446, "step": 3497 }, { "epoch": 0.190480080047375, "grad_norm": 0.7975134354075496, "learning_rate": 0.00019777621387048684, "loss": 12.6087, "step": 3498 }, { "epoch": 0.19053453404395798, "grad_norm": 0.7714088806598007, "learning_rate": 0.0001977743641520996, "loss": 12.6812, "step": 3499 }, { "epoch": 0.190588988040541, "grad_norm": 0.7761793014499357, "learning_rate": 0.00019777251367340254, "loss": 12.4691, "step": 3500 }, { "epoch": 0.19064344203712402, "grad_norm": 0.7577645649898556, "learning_rate": 0.00019777066243441, "loss": 12.7787, "step": 3501 }, { "epoch": 0.19069789603370702, "grad_norm": 0.8089804177245533, "learning_rate": 0.0001977688104351364, "loss": 12.6915, "step": 3502 }, { "epoch": 0.19075235003029004, "grad_norm": 0.7639733935034829, "learning_rate": 0.00019776695767559615, "loss": 12.6868, "step": 3503 }, { "epoch": 0.19080680402687306, "grad_norm": 0.8485259089297601, "learning_rate": 0.0001977651041558036, "loss": 12.6876, "step": 3504 }, { "epoch": 0.19086125802345605, "grad_norm": 0.7641466460154173, "learning_rate": 0.00019776324987577323, "loss": 12.7128, "step": 3505 }, { "epoch": 0.19091571202003907, "grad_norm": 1.031112607794016, "learning_rate": 0.00019776139483551944, "loss": 12.7414, "step": 3506 }, { "epoch": 0.1909701660166221, "grad_norm": 0.7165868709934979, "learning_rate": 0.00019775953903505665, "loss": 12.5893, "step": 3507 }, { "epoch": 0.19102462001320508, "grad_norm": 0.6812113548886141, "learning_rate": 0.00019775768247439927, "loss": 12.5462, "step": 3508 }, { "epoch": 0.1910790740097881, "grad_norm": 0.8234924571777671, "learning_rate": 0.0001977558251535618, "loss": 12.5584, "step": 3509 }, { "epoch": 0.19113352800637112, "grad_norm": 0.7551867552780744, "learning_rate": 0.0001977539670725586, "loss": 12.651, "step": 3510 }, { "epoch": 0.19118798200295412, "grad_norm": 0.8335507001913836, "learning_rate": 0.00019775210823140416, "loss": 12.7434, "step": 3511 }, { "epoch": 0.19124243599953714, "grad_norm": 0.9366970779404721, "learning_rate": 0.00019775024863011293, "loss": 12.7378, "step": 3512 }, { "epoch": 0.19129688999612016, "grad_norm": 0.788139924247736, "learning_rate": 0.0001977483882686994, "loss": 12.6645, "step": 3513 }, { "epoch": 0.19135134399270318, "grad_norm": 1.008796680829356, "learning_rate": 0.000197746527147178, "loss": 12.79, "step": 3514 }, { "epoch": 0.19140579798928617, "grad_norm": 0.7957919244741967, "learning_rate": 0.0001977446652655632, "loss": 12.727, "step": 3515 }, { "epoch": 0.1914602519858692, "grad_norm": 0.7216544125020525, "learning_rate": 0.0001977428026238695, "loss": 12.7369, "step": 3516 }, { "epoch": 0.1915147059824522, "grad_norm": 0.7966202844772511, "learning_rate": 0.00019774093922211137, "loss": 12.6249, "step": 3517 }, { "epoch": 0.1915691599790352, "grad_norm": 0.761783076179875, "learning_rate": 0.00019773907506030332, "loss": 12.6354, "step": 3518 }, { "epoch": 0.19162361397561822, "grad_norm": 0.7296691536804387, "learning_rate": 0.0001977372101384598, "loss": 12.6756, "step": 3519 }, { "epoch": 0.19167806797220124, "grad_norm": 0.7538202468078059, "learning_rate": 0.00019773534445659537, "loss": 12.7087, "step": 3520 }, { "epoch": 0.19173252196878424, "grad_norm": 0.8182761392904533, "learning_rate": 0.00019773347801472452, "loss": 12.6986, "step": 3521 }, { "epoch": 0.19178697596536726, "grad_norm": 0.6941479890687625, "learning_rate": 0.00019773161081286172, "loss": 12.7346, "step": 3522 }, { "epoch": 0.19184142996195028, "grad_norm": 0.7328857273974302, "learning_rate": 0.00019772974285102156, "loss": 12.707, "step": 3523 }, { "epoch": 0.19189588395853327, "grad_norm": 0.8779759648629172, "learning_rate": 0.00019772787412921853, "loss": 12.6799, "step": 3524 }, { "epoch": 0.1919503379551163, "grad_norm": 0.8102918945732309, "learning_rate": 0.00019772600464746715, "loss": 12.7078, "step": 3525 }, { "epoch": 0.1920047919516993, "grad_norm": 0.7432385246336675, "learning_rate": 0.00019772413440578197, "loss": 12.576, "step": 3526 }, { "epoch": 0.1920592459482823, "grad_norm": 0.7526038693013306, "learning_rate": 0.00019772226340417754, "loss": 12.6807, "step": 3527 }, { "epoch": 0.19211369994486532, "grad_norm": 0.8225261794530042, "learning_rate": 0.00019772039164266838, "loss": 12.5654, "step": 3528 }, { "epoch": 0.19216815394144834, "grad_norm": 0.725848010653964, "learning_rate": 0.00019771851912126908, "loss": 12.6766, "step": 3529 }, { "epoch": 0.19222260793803136, "grad_norm": 0.7908395411886413, "learning_rate": 0.00019771664583999418, "loss": 12.5595, "step": 3530 }, { "epoch": 0.19227706193461436, "grad_norm": 0.745581408980533, "learning_rate": 0.00019771477179885826, "loss": 12.7509, "step": 3531 }, { "epoch": 0.19233151593119738, "grad_norm": 0.8018799413676838, "learning_rate": 0.00019771289699787589, "loss": 12.695, "step": 3532 }, { "epoch": 0.1923859699277804, "grad_norm": 0.810353107267892, "learning_rate": 0.00019771102143706167, "loss": 12.6875, "step": 3533 }, { "epoch": 0.1924404239243634, "grad_norm": 0.7348926531608867, "learning_rate": 0.00019770914511643012, "loss": 12.6932, "step": 3534 }, { "epoch": 0.1924948779209464, "grad_norm": 0.8973143644014755, "learning_rate": 0.0001977072680359959, "loss": 12.6792, "step": 3535 }, { "epoch": 0.19254933191752943, "grad_norm": 0.7632452197424912, "learning_rate": 0.00019770539019577357, "loss": 12.7185, "step": 3536 }, { "epoch": 0.19260378591411242, "grad_norm": 0.7927514340501035, "learning_rate": 0.00019770351159577773, "loss": 12.5981, "step": 3537 }, { "epoch": 0.19265823991069544, "grad_norm": 0.7068230142827054, "learning_rate": 0.000197701632236023, "loss": 12.8504, "step": 3538 }, { "epoch": 0.19271269390727847, "grad_norm": 0.7134631121518779, "learning_rate": 0.000197699752116524, "loss": 12.6706, "step": 3539 }, { "epoch": 0.19276714790386146, "grad_norm": 0.8083137902876478, "learning_rate": 0.00019769787123729535, "loss": 12.6535, "step": 3540 }, { "epoch": 0.19282160190044448, "grad_norm": 0.7201291945896695, "learning_rate": 0.00019769598959835168, "loss": 12.6102, "step": 3541 }, { "epoch": 0.1928760558970275, "grad_norm": 0.7248858192915315, "learning_rate": 0.00019769410719970757, "loss": 12.5687, "step": 3542 }, { "epoch": 0.1929305098936105, "grad_norm": 0.8362500648989917, "learning_rate": 0.00019769222404137773, "loss": 12.7465, "step": 3543 }, { "epoch": 0.1929849638901935, "grad_norm": 0.705968235830725, "learning_rate": 0.00019769034012337677, "loss": 12.5646, "step": 3544 }, { "epoch": 0.19303941788677653, "grad_norm": 0.7222014227029238, "learning_rate": 0.00019768845544571931, "loss": 12.6912, "step": 3545 }, { "epoch": 0.19309387188335955, "grad_norm": 0.8222074073217709, "learning_rate": 0.0001976865700084201, "loss": 12.558, "step": 3546 }, { "epoch": 0.19314832587994255, "grad_norm": 0.803715808297221, "learning_rate": 0.0001976846838114937, "loss": 12.765, "step": 3547 }, { "epoch": 0.19320277987652557, "grad_norm": 0.9054215650732284, "learning_rate": 0.00019768279685495482, "loss": 12.6645, "step": 3548 }, { "epoch": 0.19325723387310859, "grad_norm": 0.7785454014057418, "learning_rate": 0.00019768090913881815, "loss": 12.7655, "step": 3549 }, { "epoch": 0.19331168786969158, "grad_norm": 0.734109054191284, "learning_rate": 0.00019767902066309832, "loss": 12.6601, "step": 3550 }, { "epoch": 0.1933661418662746, "grad_norm": 0.838383336586159, "learning_rate": 0.00019767713142781007, "loss": 12.6776, "step": 3551 }, { "epoch": 0.19342059586285762, "grad_norm": 0.7860534851895626, "learning_rate": 0.00019767524143296804, "loss": 12.6996, "step": 3552 }, { "epoch": 0.1934750498594406, "grad_norm": 0.7393473966552592, "learning_rate": 0.00019767335067858696, "loss": 12.7366, "step": 3553 }, { "epoch": 0.19352950385602363, "grad_norm": 0.7676063068921294, "learning_rate": 0.00019767145916468155, "loss": 12.7784, "step": 3554 }, { "epoch": 0.19358395785260665, "grad_norm": 0.6940158010728265, "learning_rate": 0.00019766956689126647, "loss": 12.7075, "step": 3555 }, { "epoch": 0.19363841184918965, "grad_norm": 0.8176845403633383, "learning_rate": 0.00019766767385835646, "loss": 12.6988, "step": 3556 }, { "epoch": 0.19369286584577267, "grad_norm": 0.8256601427666457, "learning_rate": 0.00019766578006596625, "loss": 12.705, "step": 3557 }, { "epoch": 0.1937473198423557, "grad_norm": 0.7528657863935839, "learning_rate": 0.00019766388551411055, "loss": 12.5481, "step": 3558 }, { "epoch": 0.19380177383893868, "grad_norm": 0.7907231207947625, "learning_rate": 0.00019766199020280407, "loss": 12.5655, "step": 3559 }, { "epoch": 0.1938562278355217, "grad_norm": 0.8433905921188369, "learning_rate": 0.0001976600941320616, "loss": 12.6689, "step": 3560 }, { "epoch": 0.19391068183210472, "grad_norm": 0.7864133499683497, "learning_rate": 0.00019765819730189788, "loss": 12.5944, "step": 3561 }, { "epoch": 0.19396513582868774, "grad_norm": 0.7921089125035029, "learning_rate": 0.00019765629971232762, "loss": 12.7421, "step": 3562 }, { "epoch": 0.19401958982527073, "grad_norm": 0.769961242712185, "learning_rate": 0.00019765440136336563, "loss": 12.5976, "step": 3563 }, { "epoch": 0.19407404382185375, "grad_norm": 0.7854683420579901, "learning_rate": 0.0001976525022550266, "loss": 12.7104, "step": 3564 }, { "epoch": 0.19412849781843677, "grad_norm": 0.7055786570242942, "learning_rate": 0.00019765060238732533, "loss": 12.6875, "step": 3565 }, { "epoch": 0.19418295181501977, "grad_norm": 0.6676731987699449, "learning_rate": 0.0001976487017602766, "loss": 12.6477, "step": 3566 }, { "epoch": 0.1942374058116028, "grad_norm": 0.7538115788203865, "learning_rate": 0.0001976468003738952, "loss": 12.7183, "step": 3567 }, { "epoch": 0.1942918598081858, "grad_norm": 0.75439765158688, "learning_rate": 0.00019764489822819594, "loss": 12.6667, "step": 3568 }, { "epoch": 0.1943463138047688, "grad_norm": 0.7020514027822288, "learning_rate": 0.00019764299532319354, "loss": 12.6256, "step": 3569 }, { "epoch": 0.19440076780135182, "grad_norm": 0.7834423558108528, "learning_rate": 0.00019764109165890283, "loss": 12.6853, "step": 3570 }, { "epoch": 0.19445522179793484, "grad_norm": 0.8136321623655445, "learning_rate": 0.00019763918723533864, "loss": 12.6909, "step": 3571 }, { "epoch": 0.19450967579451783, "grad_norm": 0.8091687627500386, "learning_rate": 0.00019763728205251572, "loss": 12.8608, "step": 3572 }, { "epoch": 0.19456412979110085, "grad_norm": 0.8657728047541944, "learning_rate": 0.00019763537611044892, "loss": 12.6988, "step": 3573 }, { "epoch": 0.19461858378768387, "grad_norm": 0.7915471591130552, "learning_rate": 0.0001976334694091531, "loss": 12.491, "step": 3574 }, { "epoch": 0.19467303778426687, "grad_norm": 0.7494389684777423, "learning_rate": 0.00019763156194864306, "loss": 12.721, "step": 3575 }, { "epoch": 0.1947274917808499, "grad_norm": 0.73901543888061, "learning_rate": 0.0001976296537289336, "loss": 12.7059, "step": 3576 }, { "epoch": 0.1947819457774329, "grad_norm": 0.7546077616744777, "learning_rate": 0.00019762774475003955, "loss": 12.6357, "step": 3577 }, { "epoch": 0.1948363997740159, "grad_norm": 0.7994713205953073, "learning_rate": 0.00019762583501197582, "loss": 12.7197, "step": 3578 }, { "epoch": 0.19489085377059892, "grad_norm": 0.7869063678294197, "learning_rate": 0.00019762392451475722, "loss": 12.6343, "step": 3579 }, { "epoch": 0.19494530776718194, "grad_norm": 0.7443898321946537, "learning_rate": 0.0001976220132583986, "loss": 12.5181, "step": 3580 }, { "epoch": 0.19499976176376496, "grad_norm": 0.6598295189631749, "learning_rate": 0.00019762010124291484, "loss": 12.4168, "step": 3581 }, { "epoch": 0.19505421576034795, "grad_norm": 0.6901751080579652, "learning_rate": 0.0001976181884683208, "loss": 12.7246, "step": 3582 }, { "epoch": 0.19510866975693097, "grad_norm": 0.8222129196616311, "learning_rate": 0.00019761627493463136, "loss": 12.6252, "step": 3583 }, { "epoch": 0.195163123753514, "grad_norm": 0.694561116471691, "learning_rate": 0.00019761436064186138, "loss": 12.6051, "step": 3584 }, { "epoch": 0.195217577750097, "grad_norm": 0.7677200945317284, "learning_rate": 0.0001976124455900258, "loss": 12.7399, "step": 3585 }, { "epoch": 0.19527203174668, "grad_norm": 0.7603252493444852, "learning_rate": 0.00019761052977913942, "loss": 12.5219, "step": 3586 }, { "epoch": 0.19532648574326303, "grad_norm": 0.7413408765375056, "learning_rate": 0.00019760861320921723, "loss": 12.7147, "step": 3587 }, { "epoch": 0.19538093973984602, "grad_norm": 0.7076795749184328, "learning_rate": 0.00019760669588027408, "loss": 12.5845, "step": 3588 }, { "epoch": 0.19543539373642904, "grad_norm": 0.7097969871713746, "learning_rate": 0.0001976047777923249, "loss": 12.5254, "step": 3589 }, { "epoch": 0.19548984773301206, "grad_norm": 0.6822010258754557, "learning_rate": 0.0001976028589453846, "loss": 12.5417, "step": 3590 }, { "epoch": 0.19554430172959505, "grad_norm": 0.7519241744027907, "learning_rate": 0.00019760093933946809, "loss": 12.5946, "step": 3591 }, { "epoch": 0.19559875572617808, "grad_norm": 0.7397640484556057, "learning_rate": 0.00019759901897459033, "loss": 12.6998, "step": 3592 }, { "epoch": 0.1956532097227611, "grad_norm": 0.7444813318525035, "learning_rate": 0.0001975970978507662, "loss": 12.6904, "step": 3593 }, { "epoch": 0.1957076637193441, "grad_norm": 0.6485345517894925, "learning_rate": 0.0001975951759680107, "loss": 12.5317, "step": 3594 }, { "epoch": 0.1957621177159271, "grad_norm": 0.7359840794178918, "learning_rate": 0.00019759325332633872, "loss": 12.683, "step": 3595 }, { "epoch": 0.19581657171251013, "grad_norm": 0.7701808720342013, "learning_rate": 0.00019759132992576528, "loss": 12.7741, "step": 3596 }, { "epoch": 0.19587102570909315, "grad_norm": 0.7356083817207066, "learning_rate": 0.00019758940576630524, "loss": 12.445, "step": 3597 }, { "epoch": 0.19592547970567614, "grad_norm": 0.7055571693231458, "learning_rate": 0.00019758748084797363, "loss": 12.6499, "step": 3598 }, { "epoch": 0.19597993370225916, "grad_norm": 0.7139018855523392, "learning_rate": 0.00019758555517078544, "loss": 12.5885, "step": 3599 }, { "epoch": 0.19603438769884218, "grad_norm": 0.6686289752594662, "learning_rate": 0.00019758362873475557, "loss": 12.6985, "step": 3600 }, { "epoch": 0.19608884169542518, "grad_norm": 0.7238026367304898, "learning_rate": 0.00019758170153989904, "loss": 12.6118, "step": 3601 }, { "epoch": 0.1961432956920082, "grad_norm": 0.7231655713054286, "learning_rate": 0.00019757977358623083, "loss": 12.5382, "step": 3602 }, { "epoch": 0.19619774968859122, "grad_norm": 0.7240620390875896, "learning_rate": 0.00019757784487376597, "loss": 12.6104, "step": 3603 }, { "epoch": 0.1962522036851742, "grad_norm": 0.8309518403394776, "learning_rate": 0.00019757591540251937, "loss": 12.786, "step": 3604 }, { "epoch": 0.19630665768175723, "grad_norm": 0.7182033651055718, "learning_rate": 0.00019757398517250612, "loss": 12.6781, "step": 3605 }, { "epoch": 0.19636111167834025, "grad_norm": 0.6624124827710981, "learning_rate": 0.0001975720541837412, "loss": 12.6035, "step": 3606 }, { "epoch": 0.19641556567492324, "grad_norm": 0.8103886947473927, "learning_rate": 0.00019757012243623963, "loss": 12.5608, "step": 3607 }, { "epoch": 0.19647001967150626, "grad_norm": 0.6924633612516438, "learning_rate": 0.0001975681899300164, "loss": 12.6219, "step": 3608 }, { "epoch": 0.19652447366808928, "grad_norm": 0.7250157464819681, "learning_rate": 0.0001975662566650866, "loss": 12.6682, "step": 3609 }, { "epoch": 0.19657892766467228, "grad_norm": 0.8019069046132882, "learning_rate": 0.0001975643226414652, "loss": 12.5714, "step": 3610 }, { "epoch": 0.1966333816612553, "grad_norm": 0.8310541251566375, "learning_rate": 0.00019756238785916729, "loss": 12.6828, "step": 3611 }, { "epoch": 0.19668783565783832, "grad_norm": 0.6882459675921699, "learning_rate": 0.00019756045231820784, "loss": 12.6901, "step": 3612 }, { "epoch": 0.19674228965442134, "grad_norm": 0.8262341699163686, "learning_rate": 0.000197558516018602, "loss": 12.7507, "step": 3613 }, { "epoch": 0.19679674365100433, "grad_norm": 0.7657928247465275, "learning_rate": 0.00019755657896036475, "loss": 12.6165, "step": 3614 }, { "epoch": 0.19685119764758735, "grad_norm": 0.7631725744086381, "learning_rate": 0.0001975546411435112, "loss": 12.5987, "step": 3615 }, { "epoch": 0.19690565164417037, "grad_norm": 0.7345946704374849, "learning_rate": 0.0001975527025680564, "loss": 12.594, "step": 3616 }, { "epoch": 0.19696010564075336, "grad_norm": 0.7681731733906019, "learning_rate": 0.00019755076323401543, "loss": 12.6932, "step": 3617 }, { "epoch": 0.19701455963733638, "grad_norm": 0.8080127465641304, "learning_rate": 0.00019754882314140335, "loss": 12.7142, "step": 3618 }, { "epoch": 0.1970690136339194, "grad_norm": 0.8400502525702276, "learning_rate": 0.00019754688229023528, "loss": 12.6429, "step": 3619 }, { "epoch": 0.1971234676305024, "grad_norm": 0.7696932357860664, "learning_rate": 0.00019754494068052628, "loss": 12.6779, "step": 3620 }, { "epoch": 0.19717792162708542, "grad_norm": 0.7362750272307614, "learning_rate": 0.00019754299831229146, "loss": 12.6583, "step": 3621 }, { "epoch": 0.19723237562366844, "grad_norm": 0.9548179295323732, "learning_rate": 0.00019754105518554594, "loss": 12.7402, "step": 3622 }, { "epoch": 0.19728682962025143, "grad_norm": 0.7760525845636607, "learning_rate": 0.0001975391113003048, "loss": 12.7086, "step": 3623 }, { "epoch": 0.19734128361683445, "grad_norm": 0.9345824260359603, "learning_rate": 0.0001975371666565832, "loss": 12.653, "step": 3624 }, { "epoch": 0.19739573761341747, "grad_norm": 0.8025717083886728, "learning_rate": 0.00019753522125439622, "loss": 12.6398, "step": 3625 }, { "epoch": 0.19745019161000046, "grad_norm": 0.7705338143926113, "learning_rate": 0.00019753327509375898, "loss": 12.5353, "step": 3626 }, { "epoch": 0.19750464560658348, "grad_norm": 0.9081350449246679, "learning_rate": 0.00019753132817468667, "loss": 12.6306, "step": 3627 }, { "epoch": 0.1975590996031665, "grad_norm": 0.7150794542815331, "learning_rate": 0.00019752938049719438, "loss": 12.6129, "step": 3628 }, { "epoch": 0.19761355359974953, "grad_norm": 0.7866917662670726, "learning_rate": 0.0001975274320612973, "loss": 12.7477, "step": 3629 }, { "epoch": 0.19766800759633252, "grad_norm": 0.7704851762300791, "learning_rate": 0.00019752548286701053, "loss": 12.6923, "step": 3630 }, { "epoch": 0.19772246159291554, "grad_norm": 0.830753690438923, "learning_rate": 0.00019752353291434922, "loss": 12.7148, "step": 3631 }, { "epoch": 0.19777691558949856, "grad_norm": 0.9358040617724722, "learning_rate": 0.00019752158220332858, "loss": 12.8388, "step": 3632 }, { "epoch": 0.19783136958608155, "grad_norm": 0.7476298552772048, "learning_rate": 0.0001975196307339638, "loss": 12.6567, "step": 3633 }, { "epoch": 0.19788582358266457, "grad_norm": 0.7800103956608793, "learning_rate": 0.00019751767850627, "loss": 12.6094, "step": 3634 }, { "epoch": 0.1979402775792476, "grad_norm": 0.7566188148483881, "learning_rate": 0.00019751572552026235, "loss": 12.6221, "step": 3635 }, { "epoch": 0.19799473157583058, "grad_norm": 0.6787264592319261, "learning_rate": 0.0001975137717759561, "loss": 12.7796, "step": 3636 }, { "epoch": 0.1980491855724136, "grad_norm": 0.7822758510904512, "learning_rate": 0.00019751181727336637, "loss": 12.6914, "step": 3637 }, { "epoch": 0.19810363956899663, "grad_norm": 0.710249806995049, "learning_rate": 0.00019750986201250842, "loss": 12.7077, "step": 3638 }, { "epoch": 0.19815809356557962, "grad_norm": 0.7490688178996289, "learning_rate": 0.00019750790599339744, "loss": 12.7766, "step": 3639 }, { "epoch": 0.19821254756216264, "grad_norm": 0.7043863922849712, "learning_rate": 0.00019750594921604862, "loss": 12.6596, "step": 3640 }, { "epoch": 0.19826700155874566, "grad_norm": 0.8152263313748491, "learning_rate": 0.0001975039916804772, "loss": 12.7449, "step": 3641 }, { "epoch": 0.19832145555532865, "grad_norm": 0.7191860510140848, "learning_rate": 0.00019750203338669836, "loss": 12.7179, "step": 3642 }, { "epoch": 0.19837590955191167, "grad_norm": 0.7064986821688003, "learning_rate": 0.00019750007433472737, "loss": 12.6284, "step": 3643 }, { "epoch": 0.1984303635484947, "grad_norm": 1.002927774669751, "learning_rate": 0.00019749811452457946, "loss": 12.4528, "step": 3644 }, { "epoch": 0.19848481754507769, "grad_norm": 0.7787618250614149, "learning_rate": 0.00019749615395626985, "loss": 12.6582, "step": 3645 }, { "epoch": 0.1985392715416607, "grad_norm": 0.8308895758175826, "learning_rate": 0.0001974941926298138, "loss": 12.7564, "step": 3646 }, { "epoch": 0.19859372553824373, "grad_norm": 0.697413418831933, "learning_rate": 0.00019749223054522656, "loss": 12.6837, "step": 3647 }, { "epoch": 0.19864817953482675, "grad_norm": 0.6846133126736759, "learning_rate": 0.0001974902677025234, "loss": 12.4405, "step": 3648 }, { "epoch": 0.19870263353140974, "grad_norm": 0.7054346767013996, "learning_rate": 0.00019748830410171956, "loss": 12.6491, "step": 3649 }, { "epoch": 0.19875708752799276, "grad_norm": 0.8471597612637782, "learning_rate": 0.00019748633974283033, "loss": 12.7337, "step": 3650 }, { "epoch": 0.19881154152457578, "grad_norm": 0.6812288046264803, "learning_rate": 0.00019748437462587096, "loss": 12.5475, "step": 3651 }, { "epoch": 0.19886599552115877, "grad_norm": 0.8246124260906363, "learning_rate": 0.00019748240875085672, "loss": 12.6354, "step": 3652 }, { "epoch": 0.1989204495177418, "grad_norm": 0.7300175077201014, "learning_rate": 0.00019748044211780297, "loss": 12.5718, "step": 3653 }, { "epoch": 0.1989749035143248, "grad_norm": 0.7580912082168959, "learning_rate": 0.00019747847472672493, "loss": 12.6165, "step": 3654 }, { "epoch": 0.1990293575109078, "grad_norm": 0.6839688816562365, "learning_rate": 0.00019747650657763792, "loss": 12.6779, "step": 3655 }, { "epoch": 0.19908381150749083, "grad_norm": 0.7581402696591821, "learning_rate": 0.00019747453767055725, "loss": 12.5625, "step": 3656 }, { "epoch": 0.19913826550407385, "grad_norm": 0.7254368507070799, "learning_rate": 0.00019747256800549824, "loss": 12.529, "step": 3657 }, { "epoch": 0.19919271950065684, "grad_norm": 0.6925063557379147, "learning_rate": 0.00019747059758247617, "loss": 12.56, "step": 3658 }, { "epoch": 0.19924717349723986, "grad_norm": 0.7606285519649787, "learning_rate": 0.00019746862640150642, "loss": 12.5587, "step": 3659 }, { "epoch": 0.19930162749382288, "grad_norm": 0.726365152862436, "learning_rate": 0.00019746665446260426, "loss": 12.6198, "step": 3660 }, { "epoch": 0.19935608149040587, "grad_norm": 0.6861309571156384, "learning_rate": 0.00019746468176578503, "loss": 12.7161, "step": 3661 }, { "epoch": 0.1994105354869889, "grad_norm": 0.8451387517182866, "learning_rate": 0.00019746270831106415, "loss": 12.6925, "step": 3662 }, { "epoch": 0.19946498948357191, "grad_norm": 0.8460799924334934, "learning_rate": 0.00019746073409845685, "loss": 12.5613, "step": 3663 }, { "epoch": 0.19951944348015493, "grad_norm": 0.8082933138547622, "learning_rate": 0.00019745875912797857, "loss": 12.4263, "step": 3664 }, { "epoch": 0.19957389747673793, "grad_norm": 0.6649570550012347, "learning_rate": 0.00019745678339964462, "loss": 12.512, "step": 3665 }, { "epoch": 0.19962835147332095, "grad_norm": 0.7607425799101774, "learning_rate": 0.00019745480691347038, "loss": 12.7488, "step": 3666 }, { "epoch": 0.19968280546990397, "grad_norm": 0.7754492850947158, "learning_rate": 0.00019745282966947123, "loss": 12.7272, "step": 3667 }, { "epoch": 0.19973725946648696, "grad_norm": 0.7840978078878431, "learning_rate": 0.00019745085166766253, "loss": 12.6944, "step": 3668 }, { "epoch": 0.19979171346306998, "grad_norm": 0.6741926222868774, "learning_rate": 0.00019744887290805963, "loss": 12.5978, "step": 3669 }, { "epoch": 0.199846167459653, "grad_norm": 0.7342454477300046, "learning_rate": 0.000197446893390678, "loss": 12.6297, "step": 3670 }, { "epoch": 0.199900621456236, "grad_norm": 0.8151529838473607, "learning_rate": 0.00019744491311553296, "loss": 12.8349, "step": 3671 }, { "epoch": 0.19995507545281901, "grad_norm": 0.738421652122142, "learning_rate": 0.00019744293208263995, "loss": 12.7038, "step": 3672 }, { "epoch": 0.20000952944940203, "grad_norm": 0.7168652829178292, "learning_rate": 0.00019744095029201438, "loss": 12.6497, "step": 3673 }, { "epoch": 0.20006398344598503, "grad_norm": 0.8215008669818747, "learning_rate": 0.0001974389677436716, "loss": 12.6954, "step": 3674 }, { "epoch": 0.20011843744256805, "grad_norm": 0.8097171866747358, "learning_rate": 0.0001974369844376271, "loss": 12.5313, "step": 3675 }, { "epoch": 0.20017289143915107, "grad_norm": 0.7084186130888639, "learning_rate": 0.00019743500037389624, "loss": 12.6974, "step": 3676 }, { "epoch": 0.20022734543573406, "grad_norm": 0.7524209666082665, "learning_rate": 0.00019743301555249446, "loss": 12.5611, "step": 3677 }, { "epoch": 0.20028179943231708, "grad_norm": 0.7798370366657434, "learning_rate": 0.00019743102997343725, "loss": 12.5747, "step": 3678 }, { "epoch": 0.2003362534289001, "grad_norm": 0.8022141403799491, "learning_rate": 0.00019742904363674, "loss": 12.7231, "step": 3679 }, { "epoch": 0.20039070742548312, "grad_norm": 0.8906558553915508, "learning_rate": 0.00019742705654241815, "loss": 12.6655, "step": 3680 }, { "epoch": 0.20044516142206611, "grad_norm": 0.6849201171068462, "learning_rate": 0.00019742506869048718, "loss": 12.6287, "step": 3681 }, { "epoch": 0.20049961541864914, "grad_norm": 0.7380211492839521, "learning_rate": 0.00019742308008096254, "loss": 12.806, "step": 3682 }, { "epoch": 0.20055406941523216, "grad_norm": 0.7440520912943274, "learning_rate": 0.00019742109071385972, "loss": 12.6976, "step": 3683 }, { "epoch": 0.20060852341181515, "grad_norm": 0.8094904770848835, "learning_rate": 0.0001974191005891941, "loss": 12.7635, "step": 3684 }, { "epoch": 0.20066297740839817, "grad_norm": 1.0039098394519197, "learning_rate": 0.0001974171097069813, "loss": 12.8509, "step": 3685 }, { "epoch": 0.2007174314049812, "grad_norm": 0.720703183546859, "learning_rate": 0.00019741511806723664, "loss": 12.6221, "step": 3686 }, { "epoch": 0.20077188540156418, "grad_norm": 0.8402311327294334, "learning_rate": 0.00019741312566997572, "loss": 12.6864, "step": 3687 }, { "epoch": 0.2008263393981472, "grad_norm": 0.6559263382773919, "learning_rate": 0.00019741113251521398, "loss": 12.4228, "step": 3688 }, { "epoch": 0.20088079339473022, "grad_norm": 0.721615468741822, "learning_rate": 0.00019740913860296697, "loss": 12.6649, "step": 3689 }, { "epoch": 0.20093524739131322, "grad_norm": 0.7573016725666538, "learning_rate": 0.00019740714393325014, "loss": 12.5624, "step": 3690 }, { "epoch": 0.20098970138789624, "grad_norm": 0.8287294148461015, "learning_rate": 0.00019740514850607904, "loss": 12.7974, "step": 3691 }, { "epoch": 0.20104415538447926, "grad_norm": 0.7486278885824178, "learning_rate": 0.00019740315232146913, "loss": 12.6035, "step": 3692 }, { "epoch": 0.20109860938106225, "grad_norm": 0.7321991888679197, "learning_rate": 0.00019740115537943603, "loss": 12.7047, "step": 3693 }, { "epoch": 0.20115306337764527, "grad_norm": 0.7230726632935411, "learning_rate": 0.00019739915767999518, "loss": 12.6969, "step": 3694 }, { "epoch": 0.2012075173742283, "grad_norm": 0.8336807145808396, "learning_rate": 0.00019739715922316214, "loss": 12.7886, "step": 3695 }, { "epoch": 0.2012619713708113, "grad_norm": 0.8261360393578282, "learning_rate": 0.00019739516000895246, "loss": 12.7838, "step": 3696 }, { "epoch": 0.2013164253673943, "grad_norm": 0.7161373279083758, "learning_rate": 0.00019739316003738167, "loss": 12.6303, "step": 3697 }, { "epoch": 0.20137087936397732, "grad_norm": 0.6807051699186523, "learning_rate": 0.00019739115930846537, "loss": 12.626, "step": 3698 }, { "epoch": 0.20142533336056034, "grad_norm": 0.6295910457426575, "learning_rate": 0.00019738915782221907, "loss": 12.5255, "step": 3699 }, { "epoch": 0.20147978735714334, "grad_norm": 0.7683727689702888, "learning_rate": 0.00019738715557865834, "loss": 12.7087, "step": 3700 }, { "epoch": 0.20153424135372636, "grad_norm": 0.744167704136972, "learning_rate": 0.00019738515257779877, "loss": 12.5179, "step": 3701 }, { "epoch": 0.20158869535030938, "grad_norm": 0.69197717897407, "learning_rate": 0.0001973831488196559, "loss": 12.4385, "step": 3702 }, { "epoch": 0.20164314934689237, "grad_norm": 0.7643508370180074, "learning_rate": 0.00019738114430424534, "loss": 12.5915, "step": 3703 }, { "epoch": 0.2016976033434754, "grad_norm": 0.7776169332587081, "learning_rate": 0.00019737913903158268, "loss": 12.7894, "step": 3704 }, { "epoch": 0.2017520573400584, "grad_norm": 0.7447411020234989, "learning_rate": 0.0001973771330016835, "loss": 12.5682, "step": 3705 }, { "epoch": 0.2018065113366414, "grad_norm": 0.7246836967348779, "learning_rate": 0.0001973751262145634, "loss": 12.6121, "step": 3706 }, { "epoch": 0.20186096533322442, "grad_norm": 0.7535649752440313, "learning_rate": 0.00019737311867023798, "loss": 12.6519, "step": 3707 }, { "epoch": 0.20191541932980744, "grad_norm": 0.7390233694094689, "learning_rate": 0.0001973711103687229, "loss": 12.8212, "step": 3708 }, { "epoch": 0.20196987332639044, "grad_norm": 0.8148239607128785, "learning_rate": 0.00019736910131003369, "loss": 12.6742, "step": 3709 }, { "epoch": 0.20202432732297346, "grad_norm": 0.7451607895808458, "learning_rate": 0.00019736709149418603, "loss": 12.7395, "step": 3710 }, { "epoch": 0.20207878131955648, "grad_norm": 0.7093270748909235, "learning_rate": 0.00019736508092119554, "loss": 12.644, "step": 3711 }, { "epoch": 0.20213323531613947, "grad_norm": 0.8758348356705866, "learning_rate": 0.00019736306959107787, "loss": 12.8661, "step": 3712 }, { "epoch": 0.2021876893127225, "grad_norm": 0.6816408808300222, "learning_rate": 0.00019736105750384864, "loss": 12.6664, "step": 3713 }, { "epoch": 0.2022421433093055, "grad_norm": 0.7525269361120305, "learning_rate": 0.00019735904465952348, "loss": 12.6835, "step": 3714 }, { "epoch": 0.20229659730588853, "grad_norm": 0.8220374711376354, "learning_rate": 0.00019735703105811807, "loss": 12.6514, "step": 3715 }, { "epoch": 0.20235105130247152, "grad_norm": 0.6714431280177346, "learning_rate": 0.00019735501669964806, "loss": 12.6893, "step": 3716 }, { "epoch": 0.20240550529905454, "grad_norm": 0.9556940251485908, "learning_rate": 0.00019735300158412911, "loss": 12.7626, "step": 3717 }, { "epoch": 0.20245995929563756, "grad_norm": 0.7312910563842567, "learning_rate": 0.0001973509857115769, "loss": 12.5876, "step": 3718 }, { "epoch": 0.20251441329222056, "grad_norm": 0.7475186407951433, "learning_rate": 0.0001973489690820071, "loss": 12.6019, "step": 3719 }, { "epoch": 0.20256886728880358, "grad_norm": 0.7941381101364623, "learning_rate": 0.0001973469516954354, "loss": 12.648, "step": 3720 }, { "epoch": 0.2026233212853866, "grad_norm": 0.7493618173219636, "learning_rate": 0.00019734493355187747, "loss": 12.5324, "step": 3721 }, { "epoch": 0.2026777752819696, "grad_norm": 0.7908043493982647, "learning_rate": 0.00019734291465134903, "loss": 12.6918, "step": 3722 }, { "epoch": 0.2027322292785526, "grad_norm": 0.9627774794856256, "learning_rate": 0.00019734089499386573, "loss": 12.6197, "step": 3723 }, { "epoch": 0.20278668327513563, "grad_norm": 0.7098037134646387, "learning_rate": 0.0001973388745794433, "loss": 12.6087, "step": 3724 }, { "epoch": 0.20284113727171862, "grad_norm": 0.7636725436853691, "learning_rate": 0.0001973368534080975, "loss": 12.6153, "step": 3725 }, { "epoch": 0.20289559126830164, "grad_norm": 0.7626349542601414, "learning_rate": 0.00019733483147984395, "loss": 12.6811, "step": 3726 }, { "epoch": 0.20295004526488467, "grad_norm": 0.74736286062578, "learning_rate": 0.00019733280879469847, "loss": 12.6376, "step": 3727 }, { "epoch": 0.20300449926146766, "grad_norm": 0.7195779098222057, "learning_rate": 0.00019733078535267673, "loss": 12.6022, "step": 3728 }, { "epoch": 0.20305895325805068, "grad_norm": 0.7584375166225391, "learning_rate": 0.00019732876115379449, "loss": 12.6474, "step": 3729 }, { "epoch": 0.2031134072546337, "grad_norm": 0.6761342916077878, "learning_rate": 0.00019732673619806746, "loss": 12.5903, "step": 3730 }, { "epoch": 0.20316786125121672, "grad_norm": 0.7292770150961099, "learning_rate": 0.00019732471048551143, "loss": 12.5185, "step": 3731 }, { "epoch": 0.2032223152477997, "grad_norm": 0.6717013120758528, "learning_rate": 0.00019732268401614214, "loss": 12.7058, "step": 3732 }, { "epoch": 0.20327676924438273, "grad_norm": 0.7336554411162386, "learning_rate": 0.00019732065678997529, "loss": 12.6187, "step": 3733 }, { "epoch": 0.20333122324096575, "grad_norm": 0.6693441989255688, "learning_rate": 0.00019731862880702675, "loss": 12.5719, "step": 3734 }, { "epoch": 0.20338567723754875, "grad_norm": 0.7673519285327757, "learning_rate": 0.0001973166000673122, "loss": 12.6662, "step": 3735 }, { "epoch": 0.20344013123413177, "grad_norm": 0.6610446427454186, "learning_rate": 0.00019731457057084746, "loss": 12.6042, "step": 3736 }, { "epoch": 0.2034945852307148, "grad_norm": 0.7575110207153776, "learning_rate": 0.0001973125403176483, "loss": 12.6657, "step": 3737 }, { "epoch": 0.20354903922729778, "grad_norm": 0.7236274014841737, "learning_rate": 0.00019731050930773048, "loss": 12.665, "step": 3738 }, { "epoch": 0.2036034932238808, "grad_norm": 0.7145867625466173, "learning_rate": 0.00019730847754110983, "loss": 12.6669, "step": 3739 }, { "epoch": 0.20365794722046382, "grad_norm": 0.7723555587156568, "learning_rate": 0.00019730644501780216, "loss": 12.5307, "step": 3740 }, { "epoch": 0.2037124012170468, "grad_norm": 0.7367181865589261, "learning_rate": 0.00019730441173782323, "loss": 12.5939, "step": 3741 }, { "epoch": 0.20376685521362983, "grad_norm": 0.7185443582889142, "learning_rate": 0.0001973023777011889, "loss": 12.5633, "step": 3742 }, { "epoch": 0.20382130921021285, "grad_norm": 0.9380030962463133, "learning_rate": 0.00019730034290791495, "loss": 12.8148, "step": 3743 }, { "epoch": 0.20387576320679585, "grad_norm": 0.7097460165179456, "learning_rate": 0.00019729830735801723, "loss": 12.5936, "step": 3744 }, { "epoch": 0.20393021720337887, "grad_norm": 0.7630980977859622, "learning_rate": 0.00019729627105151157, "loss": 12.5326, "step": 3745 }, { "epoch": 0.2039846711999619, "grad_norm": 0.7874843041862235, "learning_rate": 0.00019729423398841375, "loss": 12.7463, "step": 3746 }, { "epoch": 0.2040391251965449, "grad_norm": 0.7706619050603203, "learning_rate": 0.00019729219616873965, "loss": 12.5942, "step": 3747 }, { "epoch": 0.2040935791931279, "grad_norm": 0.8042598954022532, "learning_rate": 0.00019729015759250516, "loss": 12.6013, "step": 3748 }, { "epoch": 0.20414803318971092, "grad_norm": 0.7745473764242377, "learning_rate": 0.00019728811825972604, "loss": 12.5237, "step": 3749 }, { "epoch": 0.20420248718629394, "grad_norm": 0.7852220734332034, "learning_rate": 0.0001972860781704182, "loss": 12.652, "step": 3750 }, { "epoch": 0.20425694118287693, "grad_norm": 0.7271940145806121, "learning_rate": 0.00019728403732459756, "loss": 12.6442, "step": 3751 }, { "epoch": 0.20431139517945995, "grad_norm": 0.796056460138645, "learning_rate": 0.00019728199572227988, "loss": 12.4352, "step": 3752 }, { "epoch": 0.20436584917604297, "grad_norm": 0.9419382278691907, "learning_rate": 0.0001972799533634811, "loss": 12.5889, "step": 3753 }, { "epoch": 0.20442030317262597, "grad_norm": 0.687316538090516, "learning_rate": 0.0001972779102482171, "loss": 12.5692, "step": 3754 }, { "epoch": 0.204474757169209, "grad_norm": 0.7396866618011716, "learning_rate": 0.00019727586637650373, "loss": 12.5719, "step": 3755 }, { "epoch": 0.204529211165792, "grad_norm": 0.8042947033928121, "learning_rate": 0.00019727382174835692, "loss": 12.6433, "step": 3756 }, { "epoch": 0.204583665162375, "grad_norm": 0.783706359978815, "learning_rate": 0.00019727177636379257, "loss": 12.6691, "step": 3757 }, { "epoch": 0.20463811915895802, "grad_norm": 0.8823304070658445, "learning_rate": 0.00019726973022282657, "loss": 12.6565, "step": 3758 }, { "epoch": 0.20469257315554104, "grad_norm": 0.7065231139049295, "learning_rate": 0.00019726768332547484, "loss": 12.642, "step": 3759 }, { "epoch": 0.20474702715212403, "grad_norm": 0.8411424177529961, "learning_rate": 0.00019726563567175326, "loss": 12.6264, "step": 3760 }, { "epoch": 0.20480148114870705, "grad_norm": 0.7193541612205515, "learning_rate": 0.00019726358726167783, "loss": 12.7119, "step": 3761 }, { "epoch": 0.20485593514529007, "grad_norm": 0.826046012175022, "learning_rate": 0.0001972615380952644, "loss": 12.6619, "step": 3762 }, { "epoch": 0.2049103891418731, "grad_norm": 0.8003066900229244, "learning_rate": 0.00019725948817252896, "loss": 12.6848, "step": 3763 }, { "epoch": 0.2049648431384561, "grad_norm": 0.707534024551744, "learning_rate": 0.00019725743749348743, "loss": 12.6277, "step": 3764 }, { "epoch": 0.2050192971350391, "grad_norm": 0.7530481312430138, "learning_rate": 0.00019725538605815573, "loss": 12.6478, "step": 3765 }, { "epoch": 0.20507375113162213, "grad_norm": 0.7858723007595092, "learning_rate": 0.00019725333386654987, "loss": 12.5266, "step": 3766 }, { "epoch": 0.20512820512820512, "grad_norm": 0.7056438702746797, "learning_rate": 0.00019725128091868576, "loss": 12.5997, "step": 3767 }, { "epoch": 0.20518265912478814, "grad_norm": 0.8041505336620463, "learning_rate": 0.00019724922721457938, "loss": 12.7159, "step": 3768 }, { "epoch": 0.20523711312137116, "grad_norm": 0.7398059661675201, "learning_rate": 0.00019724717275424673, "loss": 12.6444, "step": 3769 }, { "epoch": 0.20529156711795415, "grad_norm": 0.6868492068938551, "learning_rate": 0.00019724511753770374, "loss": 12.6264, "step": 3770 }, { "epoch": 0.20534602111453717, "grad_norm": 0.7634898216311271, "learning_rate": 0.0001972430615649664, "loss": 12.629, "step": 3771 }, { "epoch": 0.2054004751111202, "grad_norm": 0.8049756446724106, "learning_rate": 0.00019724100483605069, "loss": 12.7396, "step": 3772 }, { "epoch": 0.2054549291077032, "grad_norm": 0.7387584826388791, "learning_rate": 0.00019723894735097262, "loss": 12.5023, "step": 3773 }, { "epoch": 0.2055093831042862, "grad_norm": 0.6968592334321247, "learning_rate": 0.00019723688910974822, "loss": 12.6262, "step": 3774 }, { "epoch": 0.20556383710086923, "grad_norm": 0.7328486248615598, "learning_rate": 0.00019723483011239345, "loss": 12.6452, "step": 3775 }, { "epoch": 0.20561829109745222, "grad_norm": 0.8873669973092565, "learning_rate": 0.00019723277035892434, "loss": 12.4639, "step": 3776 }, { "epoch": 0.20567274509403524, "grad_norm": 0.800906708179299, "learning_rate": 0.00019723070984935687, "loss": 12.6349, "step": 3777 }, { "epoch": 0.20572719909061826, "grad_norm": 0.7353268544823438, "learning_rate": 0.00019722864858370714, "loss": 12.6264, "step": 3778 }, { "epoch": 0.20578165308720126, "grad_norm": 0.7185208506391326, "learning_rate": 0.00019722658656199112, "loss": 12.7086, "step": 3779 }, { "epoch": 0.20583610708378428, "grad_norm": 0.7791559699979477, "learning_rate": 0.00019722452378422484, "loss": 12.6312, "step": 3780 }, { "epoch": 0.2058905610803673, "grad_norm": 0.7679050894103957, "learning_rate": 0.00019722246025042438, "loss": 12.6771, "step": 3781 }, { "epoch": 0.20594501507695032, "grad_norm": 0.7874917981449171, "learning_rate": 0.00019722039596060573, "loss": 12.5139, "step": 3782 }, { "epoch": 0.2059994690735333, "grad_norm": 0.7991378424352726, "learning_rate": 0.00019721833091478498, "loss": 12.7016, "step": 3783 }, { "epoch": 0.20605392307011633, "grad_norm": 0.6959229181476858, "learning_rate": 0.0001972162651129782, "loss": 12.746, "step": 3784 }, { "epoch": 0.20610837706669935, "grad_norm": 0.8788473918123754, "learning_rate": 0.0001972141985552015, "loss": 12.6087, "step": 3785 }, { "epoch": 0.20616283106328234, "grad_norm": 0.7331671459525739, "learning_rate": 0.0001972121312414708, "loss": 12.3949, "step": 3786 }, { "epoch": 0.20621728505986536, "grad_norm": 0.7483671298906623, "learning_rate": 0.0001972100631718023, "loss": 12.6329, "step": 3787 }, { "epoch": 0.20627173905644838, "grad_norm": 0.7858065652580846, "learning_rate": 0.00019720799434621206, "loss": 12.512, "step": 3788 }, { "epoch": 0.20632619305303138, "grad_norm": 0.8633490504031776, "learning_rate": 0.00019720592476471613, "loss": 12.7427, "step": 3789 }, { "epoch": 0.2063806470496144, "grad_norm": 0.7954996022566327, "learning_rate": 0.00019720385442733063, "loss": 12.7359, "step": 3790 }, { "epoch": 0.20643510104619742, "grad_norm": 0.7359411612203388, "learning_rate": 0.00019720178333407166, "loss": 12.5897, "step": 3791 }, { "epoch": 0.2064895550427804, "grad_norm": 0.834858116614354, "learning_rate": 0.00019719971148495535, "loss": 12.7141, "step": 3792 }, { "epoch": 0.20654400903936343, "grad_norm": 0.8172693244285171, "learning_rate": 0.00019719763887999774, "loss": 12.6519, "step": 3793 }, { "epoch": 0.20659846303594645, "grad_norm": 0.7447324971926574, "learning_rate": 0.00019719556551921503, "loss": 12.7726, "step": 3794 }, { "epoch": 0.20665291703252944, "grad_norm": 0.790954552773982, "learning_rate": 0.00019719349140262326, "loss": 12.648, "step": 3795 }, { "epoch": 0.20670737102911246, "grad_norm": 0.7456368748812312, "learning_rate": 0.00019719141653023865, "loss": 12.5901, "step": 3796 }, { "epoch": 0.20676182502569548, "grad_norm": 0.7694652585477508, "learning_rate": 0.00019718934090207725, "loss": 12.5993, "step": 3797 }, { "epoch": 0.2068162790222785, "grad_norm": 0.6836342209484225, "learning_rate": 0.00019718726451815524, "loss": 12.6304, "step": 3798 }, { "epoch": 0.2068707330188615, "grad_norm": 0.7862179588431599, "learning_rate": 0.00019718518737848876, "loss": 12.5865, "step": 3799 }, { "epoch": 0.20692518701544452, "grad_norm": 0.7866118126659232, "learning_rate": 0.000197183109483094, "loss": 12.7151, "step": 3800 }, { "epoch": 0.20697964101202754, "grad_norm": 0.7387733279981449, "learning_rate": 0.00019718103083198705, "loss": 12.5917, "step": 3801 }, { "epoch": 0.20703409500861053, "grad_norm": 0.7673889903132298, "learning_rate": 0.0001971789514251841, "loss": 12.6231, "step": 3802 }, { "epoch": 0.20708854900519355, "grad_norm": 0.7998345043574764, "learning_rate": 0.00019717687126270133, "loss": 12.556, "step": 3803 }, { "epoch": 0.20714300300177657, "grad_norm": 0.6636026554154101, "learning_rate": 0.00019717479034455493, "loss": 12.5632, "step": 3804 }, { "epoch": 0.20719745699835956, "grad_norm": 0.7661527649226619, "learning_rate": 0.00019717270867076106, "loss": 12.5358, "step": 3805 }, { "epoch": 0.20725191099494258, "grad_norm": 0.8041290947239476, "learning_rate": 0.00019717062624133593, "loss": 12.779, "step": 3806 }, { "epoch": 0.2073063649915256, "grad_norm": 0.8016300510471224, "learning_rate": 0.00019716854305629569, "loss": 12.8196, "step": 3807 }, { "epoch": 0.2073608189881086, "grad_norm": 0.7844723455880096, "learning_rate": 0.00019716645911565657, "loss": 12.7492, "step": 3808 }, { "epoch": 0.20741527298469162, "grad_norm": 0.646050380941143, "learning_rate": 0.00019716437441943477, "loss": 12.6152, "step": 3809 }, { "epoch": 0.20746972698127464, "grad_norm": 0.7273011653324999, "learning_rate": 0.0001971622889676465, "loss": 12.5066, "step": 3810 }, { "epoch": 0.20752418097785763, "grad_norm": 0.6956035958581436, "learning_rate": 0.00019716020276030796, "loss": 12.5895, "step": 3811 }, { "epoch": 0.20757863497444065, "grad_norm": 0.68685569219118, "learning_rate": 0.00019715811579743543, "loss": 12.613, "step": 3812 }, { "epoch": 0.20763308897102367, "grad_norm": 0.6921888162595382, "learning_rate": 0.00019715602807904504, "loss": 12.6016, "step": 3813 }, { "epoch": 0.2076875429676067, "grad_norm": 0.7068906851597578, "learning_rate": 0.0001971539396051531, "loss": 12.5869, "step": 3814 }, { "epoch": 0.20774199696418968, "grad_norm": 0.6871538704273896, "learning_rate": 0.00019715185037577586, "loss": 12.5944, "step": 3815 }, { "epoch": 0.2077964509607727, "grad_norm": 0.7600040985296437, "learning_rate": 0.00019714976039092954, "loss": 12.5654, "step": 3816 }, { "epoch": 0.20785090495735573, "grad_norm": 0.6744308421520104, "learning_rate": 0.00019714766965063036, "loss": 12.5306, "step": 3817 }, { "epoch": 0.20790535895393872, "grad_norm": 0.7878816728043071, "learning_rate": 0.00019714557815489462, "loss": 12.793, "step": 3818 }, { "epoch": 0.20795981295052174, "grad_norm": 0.8099819105548215, "learning_rate": 0.0001971434859037386, "loss": 12.7286, "step": 3819 }, { "epoch": 0.20801426694710476, "grad_norm": 0.6456719660154859, "learning_rate": 0.0001971413928971785, "loss": 12.6326, "step": 3820 }, { "epoch": 0.20806872094368775, "grad_norm": 0.8364155602205413, "learning_rate": 0.00019713929913523068, "loss": 12.6638, "step": 3821 }, { "epoch": 0.20812317494027077, "grad_norm": 0.6785665463512941, "learning_rate": 0.00019713720461791135, "loss": 12.6289, "step": 3822 }, { "epoch": 0.2081776289368538, "grad_norm": 0.7327840517026947, "learning_rate": 0.00019713510934523683, "loss": 12.6189, "step": 3823 }, { "epoch": 0.20823208293343679, "grad_norm": 0.7269210860075299, "learning_rate": 0.00019713301331722343, "loss": 12.749, "step": 3824 }, { "epoch": 0.2082865369300198, "grad_norm": 0.7482373158985721, "learning_rate": 0.0001971309165338874, "loss": 12.6211, "step": 3825 }, { "epoch": 0.20834099092660283, "grad_norm": 0.6911904425702748, "learning_rate": 0.0001971288189952451, "loss": 12.5941, "step": 3826 }, { "epoch": 0.20839544492318582, "grad_norm": 0.8193143300411501, "learning_rate": 0.0001971267207013128, "loss": 12.6969, "step": 3827 }, { "epoch": 0.20844989891976884, "grad_norm": 0.711827074497607, "learning_rate": 0.00019712462165210684, "loss": 12.6972, "step": 3828 }, { "epoch": 0.20850435291635186, "grad_norm": 0.7307796661654593, "learning_rate": 0.00019712252184764354, "loss": 12.6634, "step": 3829 }, { "epoch": 0.20855880691293488, "grad_norm": 0.750460068624912, "learning_rate": 0.00019712042128793922, "loss": 12.6106, "step": 3830 }, { "epoch": 0.20861326090951787, "grad_norm": 0.7586157633791372, "learning_rate": 0.0001971183199730102, "loss": 12.8095, "step": 3831 }, { "epoch": 0.2086677149061009, "grad_norm": 0.7641058986350736, "learning_rate": 0.00019711621790287286, "loss": 12.6426, "step": 3832 }, { "epoch": 0.2087221689026839, "grad_norm": 0.7337260905000521, "learning_rate": 0.00019711411507754352, "loss": 12.4951, "step": 3833 }, { "epoch": 0.2087766228992669, "grad_norm": 0.7356825958053724, "learning_rate": 0.0001971120114970385, "loss": 12.4995, "step": 3834 }, { "epoch": 0.20883107689584993, "grad_norm": 0.6250276991757239, "learning_rate": 0.00019710990716137423, "loss": 12.5067, "step": 3835 }, { "epoch": 0.20888553089243295, "grad_norm": 0.7157164003208205, "learning_rate": 0.00019710780207056702, "loss": 12.5821, "step": 3836 }, { "epoch": 0.20893998488901594, "grad_norm": 0.781156813133304, "learning_rate": 0.00019710569622463327, "loss": 12.7614, "step": 3837 }, { "epoch": 0.20899443888559896, "grad_norm": 0.8273223237189452, "learning_rate": 0.00019710358962358933, "loss": 12.6423, "step": 3838 }, { "epoch": 0.20904889288218198, "grad_norm": 0.6768823229002183, "learning_rate": 0.0001971014822674516, "loss": 12.6379, "step": 3839 }, { "epoch": 0.20910334687876497, "grad_norm": 0.7061533212592964, "learning_rate": 0.00019709937415623646, "loss": 12.7136, "step": 3840 }, { "epoch": 0.209157800875348, "grad_norm": 0.686532646992562, "learning_rate": 0.00019709726528996027, "loss": 12.6175, "step": 3841 }, { "epoch": 0.209212254871931, "grad_norm": 0.7922752222688225, "learning_rate": 0.00019709515566863951, "loss": 12.7968, "step": 3842 }, { "epoch": 0.209266708868514, "grad_norm": 0.6801245682410086, "learning_rate": 0.00019709304529229053, "loss": 12.7147, "step": 3843 }, { "epoch": 0.20932116286509703, "grad_norm": 0.6555963750355286, "learning_rate": 0.0001970909341609297, "loss": 12.5695, "step": 3844 }, { "epoch": 0.20937561686168005, "grad_norm": 0.7913863987878447, "learning_rate": 0.00019708882227457354, "loss": 12.6226, "step": 3845 }, { "epoch": 0.20943007085826304, "grad_norm": 0.7239585315891942, "learning_rate": 0.00019708670963323842, "loss": 12.6389, "step": 3846 }, { "epoch": 0.20948452485484606, "grad_norm": 0.7520411193774028, "learning_rate": 0.00019708459623694072, "loss": 12.7562, "step": 3847 }, { "epoch": 0.20953897885142908, "grad_norm": 0.6390701288621873, "learning_rate": 0.00019708248208569695, "loss": 12.536, "step": 3848 }, { "epoch": 0.2095934328480121, "grad_norm": 0.7632552432870136, "learning_rate": 0.0001970803671795235, "loss": 12.7752, "step": 3849 }, { "epoch": 0.2096478868445951, "grad_norm": 0.6980795126129343, "learning_rate": 0.00019707825151843683, "loss": 12.6178, "step": 3850 }, { "epoch": 0.20970234084117811, "grad_norm": 0.6297619428404707, "learning_rate": 0.0001970761351024534, "loss": 12.6154, "step": 3851 }, { "epoch": 0.20975679483776113, "grad_norm": 0.716503403711023, "learning_rate": 0.0001970740179315897, "loss": 12.6761, "step": 3852 }, { "epoch": 0.20981124883434413, "grad_norm": 1.0613728402924163, "learning_rate": 0.0001970719000058621, "loss": 12.6331, "step": 3853 }, { "epoch": 0.20986570283092715, "grad_norm": 0.8591171392678476, "learning_rate": 0.00019706978132528718, "loss": 12.5784, "step": 3854 }, { "epoch": 0.20992015682751017, "grad_norm": 0.714119371116891, "learning_rate": 0.00019706766188988133, "loss": 12.6617, "step": 3855 }, { "epoch": 0.20997461082409316, "grad_norm": 0.7094014273501245, "learning_rate": 0.00019706554169966105, "loss": 12.6061, "step": 3856 }, { "epoch": 0.21002906482067618, "grad_norm": 0.9055915065824638, "learning_rate": 0.00019706342075464286, "loss": 12.688, "step": 3857 }, { "epoch": 0.2100835188172592, "grad_norm": 0.7266838551953319, "learning_rate": 0.00019706129905484323, "loss": 12.6648, "step": 3858 }, { "epoch": 0.2101379728138422, "grad_norm": 0.7897653004405011, "learning_rate": 0.00019705917660027867, "loss": 12.5621, "step": 3859 }, { "epoch": 0.21019242681042521, "grad_norm": 0.6577066357431925, "learning_rate": 0.00019705705339096566, "loss": 12.6753, "step": 3860 }, { "epoch": 0.21024688080700824, "grad_norm": 0.7631655878819147, "learning_rate": 0.00019705492942692074, "loss": 12.5231, "step": 3861 }, { "epoch": 0.21030133480359123, "grad_norm": 0.6823330943710011, "learning_rate": 0.00019705280470816043, "loss": 12.6195, "step": 3862 }, { "epoch": 0.21035578880017425, "grad_norm": 0.6802375107907133, "learning_rate": 0.0001970506792347012, "loss": 12.5155, "step": 3863 }, { "epoch": 0.21041024279675727, "grad_norm": 0.7288057731179113, "learning_rate": 0.00019704855300655964, "loss": 12.7704, "step": 3864 }, { "epoch": 0.2104646967933403, "grad_norm": 0.7067370312565961, "learning_rate": 0.00019704642602375223, "loss": 12.7041, "step": 3865 }, { "epoch": 0.21051915078992328, "grad_norm": 0.7328334988150504, "learning_rate": 0.00019704429828629554, "loss": 12.793, "step": 3866 }, { "epoch": 0.2105736047865063, "grad_norm": 0.743035582800159, "learning_rate": 0.00019704216979420612, "loss": 12.6079, "step": 3867 }, { "epoch": 0.21062805878308932, "grad_norm": 0.6952611456974587, "learning_rate": 0.00019704004054750055, "loss": 12.6263, "step": 3868 }, { "epoch": 0.21068251277967232, "grad_norm": 0.6967072401084503, "learning_rate": 0.0001970379105461953, "loss": 12.7435, "step": 3869 }, { "epoch": 0.21073696677625534, "grad_norm": 0.6810544243648045, "learning_rate": 0.00019703577979030698, "loss": 12.6692, "step": 3870 }, { "epoch": 0.21079142077283836, "grad_norm": 0.7173889510000903, "learning_rate": 0.0001970336482798522, "loss": 12.4846, "step": 3871 }, { "epoch": 0.21084587476942135, "grad_norm": 0.6766072230821139, "learning_rate": 0.0001970315160148475, "loss": 12.6103, "step": 3872 }, { "epoch": 0.21090032876600437, "grad_norm": 0.7340416998263765, "learning_rate": 0.00019702938299530942, "loss": 12.5992, "step": 3873 }, { "epoch": 0.2109547827625874, "grad_norm": 0.7250266860347498, "learning_rate": 0.00019702724922125462, "loss": 12.596, "step": 3874 }, { "epoch": 0.21100923675917038, "grad_norm": 0.7231440439405903, "learning_rate": 0.00019702511469269965, "loss": 12.6429, "step": 3875 }, { "epoch": 0.2110636907557534, "grad_norm": 0.7575607296218159, "learning_rate": 0.0001970229794096611, "loss": 12.637, "step": 3876 }, { "epoch": 0.21111814475233642, "grad_norm": 0.7560500121927132, "learning_rate": 0.0001970208433721556, "loss": 12.5314, "step": 3877 }, { "epoch": 0.21117259874891942, "grad_norm": 0.7222307087053694, "learning_rate": 0.00019701870658019976, "loss": 12.6902, "step": 3878 }, { "epoch": 0.21122705274550244, "grad_norm": 0.6647439342103886, "learning_rate": 0.0001970165690338102, "loss": 12.5535, "step": 3879 }, { "epoch": 0.21128150674208546, "grad_norm": 0.6745547574619641, "learning_rate": 0.00019701443073300349, "loss": 12.3277, "step": 3880 }, { "epoch": 0.21133596073866848, "grad_norm": 0.683311103597084, "learning_rate": 0.00019701229167779633, "loss": 12.6541, "step": 3881 }, { "epoch": 0.21139041473525147, "grad_norm": 0.7031825756420972, "learning_rate": 0.0001970101518682053, "loss": 12.3421, "step": 3882 }, { "epoch": 0.2114448687318345, "grad_norm": 0.6709281810865525, "learning_rate": 0.0001970080113042471, "loss": 12.6411, "step": 3883 }, { "epoch": 0.2114993227284175, "grad_norm": 0.7013708067172494, "learning_rate": 0.00019700586998593829, "loss": 12.622, "step": 3884 }, { "epoch": 0.2115537767250005, "grad_norm": 0.6970959113609223, "learning_rate": 0.0001970037279132956, "loss": 12.6072, "step": 3885 }, { "epoch": 0.21160823072158352, "grad_norm": 0.7172589159645827, "learning_rate": 0.00019700158508633564, "loss": 12.6771, "step": 3886 }, { "epoch": 0.21166268471816654, "grad_norm": 0.6968691606849642, "learning_rate": 0.00019699944150507507, "loss": 12.6998, "step": 3887 }, { "epoch": 0.21171713871474954, "grad_norm": 0.7427929804273878, "learning_rate": 0.0001969972971695306, "loss": 12.6571, "step": 3888 }, { "epoch": 0.21177159271133256, "grad_norm": 0.7081419720758875, "learning_rate": 0.00019699515207971885, "loss": 12.7094, "step": 3889 }, { "epoch": 0.21182604670791558, "grad_norm": 0.826287521696027, "learning_rate": 0.00019699300623565657, "loss": 12.5314, "step": 3890 }, { "epoch": 0.21188050070449857, "grad_norm": 0.7260506698265387, "learning_rate": 0.00019699085963736042, "loss": 12.6649, "step": 3891 }, { "epoch": 0.2119349547010816, "grad_norm": 0.8452203006842348, "learning_rate": 0.00019698871228484704, "loss": 12.6858, "step": 3892 }, { "epoch": 0.2119894086976646, "grad_norm": 0.7775963563602492, "learning_rate": 0.00019698656417813318, "loss": 12.5169, "step": 3893 }, { "epoch": 0.2120438626942476, "grad_norm": 0.7891925501823804, "learning_rate": 0.00019698441531723553, "loss": 12.7274, "step": 3894 }, { "epoch": 0.21209831669083062, "grad_norm": 0.6844535277171849, "learning_rate": 0.0001969822657021708, "loss": 12.494, "step": 3895 }, { "epoch": 0.21215277068741364, "grad_norm": 0.7594849342870383, "learning_rate": 0.0001969801153329557, "loss": 12.6393, "step": 3896 }, { "epoch": 0.21220722468399666, "grad_norm": 0.9077815788828331, "learning_rate": 0.000196977964209607, "loss": 12.6181, "step": 3897 }, { "epoch": 0.21226167868057966, "grad_norm": 0.7621298337666311, "learning_rate": 0.00019697581233214134, "loss": 12.6645, "step": 3898 }, { "epoch": 0.21231613267716268, "grad_norm": 0.8366099030579992, "learning_rate": 0.00019697365970057553, "loss": 12.6583, "step": 3899 }, { "epoch": 0.2123705866737457, "grad_norm": 0.7217507276712453, "learning_rate": 0.00019697150631492626, "loss": 12.6469, "step": 3900 }, { "epoch": 0.2124250406703287, "grad_norm": 0.8339395971536379, "learning_rate": 0.00019696935217521032, "loss": 12.7546, "step": 3901 }, { "epoch": 0.2124794946669117, "grad_norm": 0.7181593477883866, "learning_rate": 0.00019696719728144442, "loss": 12.5382, "step": 3902 }, { "epoch": 0.21253394866349473, "grad_norm": 0.7617599944487994, "learning_rate": 0.0001969650416336453, "loss": 12.4544, "step": 3903 }, { "epoch": 0.21258840266007772, "grad_norm": 0.7392836979080841, "learning_rate": 0.0001969628852318298, "loss": 12.5215, "step": 3904 }, { "epoch": 0.21264285665666074, "grad_norm": 0.7660976492049502, "learning_rate": 0.00019696072807601464, "loss": 12.5948, "step": 3905 }, { "epoch": 0.21269731065324377, "grad_norm": 0.7933437707513886, "learning_rate": 0.0001969585701662166, "loss": 12.7446, "step": 3906 }, { "epoch": 0.21275176464982676, "grad_norm": 0.7222756885902184, "learning_rate": 0.00019695641150245242, "loss": 12.5878, "step": 3907 }, { "epoch": 0.21280621864640978, "grad_norm": 0.7396778956759119, "learning_rate": 0.000196954252084739, "loss": 12.6364, "step": 3908 }, { "epoch": 0.2128606726429928, "grad_norm": 0.7406513542494016, "learning_rate": 0.000196952091913093, "loss": 12.4174, "step": 3909 }, { "epoch": 0.2129151266395758, "grad_norm": 0.801324775162043, "learning_rate": 0.00019694993098753126, "loss": 12.6281, "step": 3910 }, { "epoch": 0.2129695806361588, "grad_norm": 0.7922004514295725, "learning_rate": 0.0001969477693080706, "loss": 12.711, "step": 3911 }, { "epoch": 0.21302403463274183, "grad_norm": 0.7192166126667284, "learning_rate": 0.00019694560687472787, "loss": 12.713, "step": 3912 }, { "epoch": 0.21307848862932482, "grad_norm": 0.8280048325074828, "learning_rate": 0.00019694344368751984, "loss": 12.7116, "step": 3913 }, { "epoch": 0.21313294262590785, "grad_norm": 0.7244318785527158, "learning_rate": 0.00019694127974646334, "loss": 12.6594, "step": 3914 }, { "epoch": 0.21318739662249087, "grad_norm": 0.8168065098975937, "learning_rate": 0.00019693911505157515, "loss": 12.6092, "step": 3915 }, { "epoch": 0.21324185061907389, "grad_norm": 0.6701580431033446, "learning_rate": 0.00019693694960287218, "loss": 12.5596, "step": 3916 }, { "epoch": 0.21329630461565688, "grad_norm": 0.7177138044919541, "learning_rate": 0.0001969347834003712, "loss": 12.5675, "step": 3917 }, { "epoch": 0.2133507586122399, "grad_norm": 0.820418801242099, "learning_rate": 0.00019693261644408908, "loss": 12.5892, "step": 3918 }, { "epoch": 0.21340521260882292, "grad_norm": 0.8034108120593468, "learning_rate": 0.00019693044873404274, "loss": 12.7667, "step": 3919 }, { "epoch": 0.2134596666054059, "grad_norm": 0.7710641017405725, "learning_rate": 0.00019692828027024893, "loss": 12.7673, "step": 3920 }, { "epoch": 0.21351412060198893, "grad_norm": 0.6792728233039044, "learning_rate": 0.00019692611105272457, "loss": 12.6426, "step": 3921 }, { "epoch": 0.21356857459857195, "grad_norm": 0.8051947786505732, "learning_rate": 0.0001969239410814865, "loss": 12.6935, "step": 3922 }, { "epoch": 0.21362302859515495, "grad_norm": 0.8199349179966275, "learning_rate": 0.00019692177035655163, "loss": 12.5231, "step": 3923 }, { "epoch": 0.21367748259173797, "grad_norm": 0.6997119387345971, "learning_rate": 0.00019691959887793684, "loss": 12.6468, "step": 3924 }, { "epoch": 0.213731936588321, "grad_norm": 0.8372167910644972, "learning_rate": 0.00019691742664565895, "loss": 12.6043, "step": 3925 }, { "epoch": 0.21378639058490398, "grad_norm": 0.6962495433547871, "learning_rate": 0.0001969152536597349, "loss": 12.684, "step": 3926 }, { "epoch": 0.213840844581487, "grad_norm": 0.7266854442111711, "learning_rate": 0.00019691307992018161, "loss": 12.4952, "step": 3927 }, { "epoch": 0.21389529857807002, "grad_norm": 0.7655950665136835, "learning_rate": 0.00019691090542701595, "loss": 12.4292, "step": 3928 }, { "epoch": 0.213949752574653, "grad_norm": 0.7277789599591761, "learning_rate": 0.00019690873018025483, "loss": 12.6611, "step": 3929 }, { "epoch": 0.21400420657123603, "grad_norm": 0.8271872279766458, "learning_rate": 0.0001969065541799152, "loss": 12.6395, "step": 3930 }, { "epoch": 0.21405866056781905, "grad_norm": 0.661706779368455, "learning_rate": 0.00019690437742601394, "loss": 12.6383, "step": 3931 }, { "epoch": 0.21411311456440207, "grad_norm": 0.7064738109764515, "learning_rate": 0.00019690219991856797, "loss": 12.6467, "step": 3932 }, { "epoch": 0.21416756856098507, "grad_norm": 0.7982907102311182, "learning_rate": 0.00019690002165759424, "loss": 12.5564, "step": 3933 }, { "epoch": 0.2142220225575681, "grad_norm": 0.7074857690670541, "learning_rate": 0.00019689784264310972, "loss": 12.5441, "step": 3934 }, { "epoch": 0.2142764765541511, "grad_norm": 0.7952675669800652, "learning_rate": 0.00019689566287513132, "loss": 12.7703, "step": 3935 }, { "epoch": 0.2143309305507341, "grad_norm": 0.7824586864295231, "learning_rate": 0.00019689348235367598, "loss": 12.7611, "step": 3936 }, { "epoch": 0.21438538454731712, "grad_norm": 0.7943644437390582, "learning_rate": 0.00019689130107876067, "loss": 12.6341, "step": 3937 }, { "epoch": 0.21443983854390014, "grad_norm": 0.7623897601744621, "learning_rate": 0.00019688911905040238, "loss": 12.6317, "step": 3938 }, { "epoch": 0.21449429254048313, "grad_norm": 0.728279967862069, "learning_rate": 0.00019688693626861804, "loss": 12.5935, "step": 3939 }, { "epoch": 0.21454874653706615, "grad_norm": 0.7511841158265858, "learning_rate": 0.00019688475273342464, "loss": 12.505, "step": 3940 }, { "epoch": 0.21460320053364917, "grad_norm": 0.7418238611000724, "learning_rate": 0.00019688256844483914, "loss": 12.565, "step": 3941 }, { "epoch": 0.21465765453023217, "grad_norm": 0.718602048206933, "learning_rate": 0.00019688038340287856, "loss": 12.7313, "step": 3942 }, { "epoch": 0.2147121085268152, "grad_norm": 0.6239293268350437, "learning_rate": 0.00019687819760755987, "loss": 12.6087, "step": 3943 }, { "epoch": 0.2147665625233982, "grad_norm": 0.7081825399431875, "learning_rate": 0.00019687601105890004, "loss": 12.5982, "step": 3944 }, { "epoch": 0.2148210165199812, "grad_norm": 0.7000307810121966, "learning_rate": 0.0001968738237569161, "loss": 12.5143, "step": 3945 }, { "epoch": 0.21487547051656422, "grad_norm": 0.7241333911326127, "learning_rate": 0.0001968716357016251, "loss": 12.6107, "step": 3946 }, { "epoch": 0.21492992451314724, "grad_norm": 0.7073929167858216, "learning_rate": 0.00019686944689304402, "loss": 12.6784, "step": 3947 }, { "epoch": 0.21498437850973026, "grad_norm": 0.6963246525209683, "learning_rate": 0.00019686725733118982, "loss": 12.6063, "step": 3948 }, { "epoch": 0.21503883250631325, "grad_norm": 0.696928836554792, "learning_rate": 0.00019686506701607965, "loss": 12.5421, "step": 3949 }, { "epoch": 0.21509328650289627, "grad_norm": 0.7947151519515174, "learning_rate": 0.00019686287594773043, "loss": 12.7203, "step": 3950 }, { "epoch": 0.2151477404994793, "grad_norm": 0.6941464137226102, "learning_rate": 0.00019686068412615927, "loss": 12.651, "step": 3951 }, { "epoch": 0.2152021944960623, "grad_norm": 0.7422280709843154, "learning_rate": 0.00019685849155138315, "loss": 12.6142, "step": 3952 }, { "epoch": 0.2152566484926453, "grad_norm": 0.6568159068976495, "learning_rate": 0.00019685629822341919, "loss": 12.4753, "step": 3953 }, { "epoch": 0.21531110248922833, "grad_norm": 0.7266034209404038, "learning_rate": 0.0001968541041422844, "loss": 12.7158, "step": 3954 }, { "epoch": 0.21536555648581132, "grad_norm": 0.6945221783504558, "learning_rate": 0.00019685190930799585, "loss": 12.6453, "step": 3955 }, { "epoch": 0.21542001048239434, "grad_norm": 0.7526163319281612, "learning_rate": 0.00019684971372057063, "loss": 12.6438, "step": 3956 }, { "epoch": 0.21547446447897736, "grad_norm": 0.7752381893997414, "learning_rate": 0.00019684751738002575, "loss": 12.6173, "step": 3957 }, { "epoch": 0.21552891847556035, "grad_norm": 0.6891814177801274, "learning_rate": 0.00019684532028637836, "loss": 12.5664, "step": 3958 }, { "epoch": 0.21558337247214338, "grad_norm": 0.7040966702280157, "learning_rate": 0.0001968431224396455, "loss": 12.6211, "step": 3959 }, { "epoch": 0.2156378264687264, "grad_norm": 0.6086700373400811, "learning_rate": 0.00019684092383984433, "loss": 12.5274, "step": 3960 }, { "epoch": 0.2156922804653094, "grad_norm": 0.7380800816251231, "learning_rate": 0.00019683872448699184, "loss": 12.683, "step": 3961 }, { "epoch": 0.2157467344618924, "grad_norm": 0.7400584232257849, "learning_rate": 0.00019683652438110523, "loss": 12.6166, "step": 3962 }, { "epoch": 0.21580118845847543, "grad_norm": 0.756629350492766, "learning_rate": 0.00019683432352220158, "loss": 12.6753, "step": 3963 }, { "epoch": 0.21585564245505845, "grad_norm": 0.6501914484379949, "learning_rate": 0.00019683212191029794, "loss": 12.4404, "step": 3964 }, { "epoch": 0.21591009645164144, "grad_norm": 0.7284031309302922, "learning_rate": 0.0001968299195454115, "loss": 12.5702, "step": 3965 }, { "epoch": 0.21596455044822446, "grad_norm": 0.7840331234544401, "learning_rate": 0.00019682771642755937, "loss": 12.8293, "step": 3966 }, { "epoch": 0.21601900444480748, "grad_norm": 0.7101177614963934, "learning_rate": 0.00019682551255675867, "loss": 12.7148, "step": 3967 }, { "epoch": 0.21607345844139048, "grad_norm": 0.7470213722881334, "learning_rate": 0.00019682330793302657, "loss": 12.7036, "step": 3968 }, { "epoch": 0.2161279124379735, "grad_norm": 1.004377002918978, "learning_rate": 0.00019682110255638018, "loss": 12.6216, "step": 3969 }, { "epoch": 0.21618236643455652, "grad_norm": 0.7040489342355073, "learning_rate": 0.00019681889642683668, "loss": 12.648, "step": 3970 }, { "epoch": 0.2162368204311395, "grad_norm": 0.7142888356405561, "learning_rate": 0.00019681668954441315, "loss": 12.5634, "step": 3971 }, { "epoch": 0.21629127442772253, "grad_norm": 0.7280247286864808, "learning_rate": 0.00019681448190912682, "loss": 12.5871, "step": 3972 }, { "epoch": 0.21634572842430555, "grad_norm": 0.7804693598806084, "learning_rate": 0.00019681227352099487, "loss": 12.7291, "step": 3973 }, { "epoch": 0.21640018242088854, "grad_norm": 0.7894803374028877, "learning_rate": 0.00019681006438003443, "loss": 12.6476, "step": 3974 }, { "epoch": 0.21645463641747156, "grad_norm": 0.6976484929957576, "learning_rate": 0.00019680785448626274, "loss": 12.6018, "step": 3975 }, { "epoch": 0.21650909041405458, "grad_norm": 0.6978787462272699, "learning_rate": 0.00019680564383969687, "loss": 12.651, "step": 3976 }, { "epoch": 0.21656354441063758, "grad_norm": 0.7023747519995679, "learning_rate": 0.00019680343244035412, "loss": 12.7279, "step": 3977 }, { "epoch": 0.2166179984072206, "grad_norm": 0.7581605914443555, "learning_rate": 0.00019680122028825162, "loss": 12.5454, "step": 3978 }, { "epoch": 0.21667245240380362, "grad_norm": 0.8027276031510364, "learning_rate": 0.00019679900738340663, "loss": 12.6674, "step": 3979 }, { "epoch": 0.2167269064003866, "grad_norm": 0.7443417644929946, "learning_rate": 0.0001967967937258363, "loss": 12.694, "step": 3980 }, { "epoch": 0.21678136039696963, "grad_norm": 0.7614531973040706, "learning_rate": 0.00019679457931555787, "loss": 12.7878, "step": 3981 }, { "epoch": 0.21683581439355265, "grad_norm": 0.7739638001554833, "learning_rate": 0.00019679236415258856, "loss": 12.672, "step": 3982 }, { "epoch": 0.21689026839013567, "grad_norm": 0.7819656619703798, "learning_rate": 0.0001967901482369456, "loss": 12.5756, "step": 3983 }, { "epoch": 0.21694472238671866, "grad_norm": 0.7461819005859127, "learning_rate": 0.00019678793156864622, "loss": 12.7048, "step": 3984 }, { "epoch": 0.21699917638330168, "grad_norm": 0.7125263632988991, "learning_rate": 0.00019678571414770763, "loss": 12.5498, "step": 3985 }, { "epoch": 0.2170536303798847, "grad_norm": 0.7633302060511443, "learning_rate": 0.0001967834959741471, "loss": 12.5501, "step": 3986 }, { "epoch": 0.2171080843764677, "grad_norm": 0.6537109819979228, "learning_rate": 0.0001967812770479819, "loss": 12.5498, "step": 3987 }, { "epoch": 0.21716253837305072, "grad_norm": 0.6956757640626371, "learning_rate": 0.00019677905736922928, "loss": 12.6805, "step": 3988 }, { "epoch": 0.21721699236963374, "grad_norm": 0.9038073036106121, "learning_rate": 0.00019677683693790642, "loss": 12.5616, "step": 3989 }, { "epoch": 0.21727144636621673, "grad_norm": 0.7845108311106693, "learning_rate": 0.0001967746157540307, "loss": 12.6692, "step": 3990 }, { "epoch": 0.21732590036279975, "grad_norm": 0.865577954805613, "learning_rate": 0.00019677239381761928, "loss": 12.5767, "step": 3991 }, { "epoch": 0.21738035435938277, "grad_norm": 0.6931754652824569, "learning_rate": 0.00019677017112868956, "loss": 12.5629, "step": 3992 }, { "epoch": 0.21743480835596576, "grad_norm": 0.7771496190857541, "learning_rate": 0.00019676794768725873, "loss": 12.6854, "step": 3993 }, { "epoch": 0.21748926235254878, "grad_norm": 0.7092020476329501, "learning_rate": 0.0001967657234933441, "loss": 12.6091, "step": 3994 }, { "epoch": 0.2175437163491318, "grad_norm": 0.693179443968465, "learning_rate": 0.000196763498546963, "loss": 12.7026, "step": 3995 }, { "epoch": 0.2175981703457148, "grad_norm": 0.731434066275842, "learning_rate": 0.00019676127284813267, "loss": 12.6795, "step": 3996 }, { "epoch": 0.21765262434229782, "grad_norm": 0.7032078557389057, "learning_rate": 0.0001967590463968705, "loss": 12.6302, "step": 3997 }, { "epoch": 0.21770707833888084, "grad_norm": 0.635970585227238, "learning_rate": 0.00019675681919319372, "loss": 12.6628, "step": 3998 }, { "epoch": 0.21776153233546386, "grad_norm": 0.8211180272514582, "learning_rate": 0.00019675459123711972, "loss": 12.8123, "step": 3999 }, { "epoch": 0.21781598633204685, "grad_norm": 0.7603117513168859, "learning_rate": 0.00019675236252866577, "loss": 12.578, "step": 4000 }, { "epoch": 0.21787044032862987, "grad_norm": 0.7594642151567548, "learning_rate": 0.00019675013306784923, "loss": 12.5305, "step": 4001 }, { "epoch": 0.2179248943252129, "grad_norm": 0.7699914028970279, "learning_rate": 0.00019674790285468746, "loss": 12.7227, "step": 4002 }, { "epoch": 0.21797934832179588, "grad_norm": 0.7397614162642909, "learning_rate": 0.00019674567188919775, "loss": 12.678, "step": 4003 }, { "epoch": 0.2180338023183789, "grad_norm": 0.7175728299201021, "learning_rate": 0.00019674344017139744, "loss": 12.6919, "step": 4004 }, { "epoch": 0.21808825631496193, "grad_norm": 0.7697025967962892, "learning_rate": 0.00019674120770130394, "loss": 12.7325, "step": 4005 }, { "epoch": 0.21814271031154492, "grad_norm": 0.7006179379055654, "learning_rate": 0.00019673897447893462, "loss": 12.5337, "step": 4006 }, { "epoch": 0.21819716430812794, "grad_norm": 0.7105004628449897, "learning_rate": 0.00019673674050430678, "loss": 12.677, "step": 4007 }, { "epoch": 0.21825161830471096, "grad_norm": 0.6571087553060003, "learning_rate": 0.0001967345057774378, "loss": 12.6363, "step": 4008 }, { "epoch": 0.21830607230129395, "grad_norm": 0.7093641040329042, "learning_rate": 0.00019673227029834512, "loss": 12.5658, "step": 4009 }, { "epoch": 0.21836052629787697, "grad_norm": 0.6380887380541235, "learning_rate": 0.00019673003406704605, "loss": 12.5858, "step": 4010 }, { "epoch": 0.21841498029446, "grad_norm": 0.7916333527955144, "learning_rate": 0.00019672779708355804, "loss": 12.7207, "step": 4011 }, { "epoch": 0.21846943429104299, "grad_norm": 0.6850591486656107, "learning_rate": 0.00019672555934789845, "loss": 12.6482, "step": 4012 }, { "epoch": 0.218523888287626, "grad_norm": 0.7198343448962357, "learning_rate": 0.0001967233208600847, "loss": 12.6282, "step": 4013 }, { "epoch": 0.21857834228420903, "grad_norm": 0.7496108215300044, "learning_rate": 0.0001967210816201342, "loss": 12.6532, "step": 4014 }, { "epoch": 0.21863279628079205, "grad_norm": 0.7301845943741914, "learning_rate": 0.0001967188416280643, "loss": 12.5878, "step": 4015 }, { "epoch": 0.21868725027737504, "grad_norm": 0.7480537564589492, "learning_rate": 0.0001967166008838925, "loss": 12.7344, "step": 4016 }, { "epoch": 0.21874170427395806, "grad_norm": 0.7040530822679284, "learning_rate": 0.00019671435938763615, "loss": 12.6365, "step": 4017 }, { "epoch": 0.21879615827054108, "grad_norm": 0.729618764939438, "learning_rate": 0.0001967121171393128, "loss": 12.6541, "step": 4018 }, { "epoch": 0.21885061226712407, "grad_norm": 0.7578991783646115, "learning_rate": 0.00019670987413893974, "loss": 12.7008, "step": 4019 }, { "epoch": 0.2189050662637071, "grad_norm": 0.7139122569181902, "learning_rate": 0.00019670763038653452, "loss": 12.6024, "step": 4020 }, { "epoch": 0.2189595202602901, "grad_norm": 0.7190580028658345, "learning_rate": 0.00019670538588211455, "loss": 12.6473, "step": 4021 }, { "epoch": 0.2190139742568731, "grad_norm": 0.7332441857723779, "learning_rate": 0.00019670314062569726, "loss": 12.6767, "step": 4022 }, { "epoch": 0.21906842825345613, "grad_norm": 0.7084478878468786, "learning_rate": 0.00019670089461730012, "loss": 12.657, "step": 4023 }, { "epoch": 0.21912288225003915, "grad_norm": 0.7499768445803511, "learning_rate": 0.00019669864785694063, "loss": 12.5707, "step": 4024 }, { "epoch": 0.21917733624662214, "grad_norm": 0.8299453060159917, "learning_rate": 0.00019669640034463624, "loss": 12.8349, "step": 4025 }, { "epoch": 0.21923179024320516, "grad_norm": 0.7774346746945877, "learning_rate": 0.00019669415208040442, "loss": 12.5669, "step": 4026 }, { "epoch": 0.21928624423978818, "grad_norm": 0.7413977284793508, "learning_rate": 0.00019669190306426264, "loss": 12.407, "step": 4027 }, { "epoch": 0.21934069823637117, "grad_norm": 0.7179477776342924, "learning_rate": 0.00019668965329622845, "loss": 12.6059, "step": 4028 }, { "epoch": 0.2193951522329542, "grad_norm": 0.7544261333086814, "learning_rate": 0.00019668740277631926, "loss": 12.5511, "step": 4029 }, { "epoch": 0.21944960622953721, "grad_norm": 0.6788519137175846, "learning_rate": 0.0001966851515045526, "loss": 12.6608, "step": 4030 }, { "epoch": 0.21950406022612023, "grad_norm": 0.7230530691838619, "learning_rate": 0.00019668289948094602, "loss": 12.6335, "step": 4031 }, { "epoch": 0.21955851422270323, "grad_norm": 0.7056961235171023, "learning_rate": 0.000196680646705517, "loss": 12.673, "step": 4032 }, { "epoch": 0.21961296821928625, "grad_norm": 0.7378250013137789, "learning_rate": 0.00019667839317828305, "loss": 12.7235, "step": 4033 }, { "epoch": 0.21966742221586927, "grad_norm": 0.7101142061368517, "learning_rate": 0.00019667613889926168, "loss": 12.6265, "step": 4034 }, { "epoch": 0.21972187621245226, "grad_norm": 0.708434740030311, "learning_rate": 0.00019667388386847048, "loss": 12.6863, "step": 4035 }, { "epoch": 0.21977633020903528, "grad_norm": 0.7302184396771872, "learning_rate": 0.00019667162808592695, "loss": 12.4923, "step": 4036 }, { "epoch": 0.2198307842056183, "grad_norm": 0.9297099830656682, "learning_rate": 0.0001966693715516486, "loss": 12.5281, "step": 4037 }, { "epoch": 0.2198852382022013, "grad_norm": 0.7556286735752703, "learning_rate": 0.00019666711426565302, "loss": 12.6344, "step": 4038 }, { "epoch": 0.21993969219878431, "grad_norm": 0.7837233340630051, "learning_rate": 0.00019666485622795774, "loss": 12.5989, "step": 4039 }, { "epoch": 0.21999414619536733, "grad_norm": 0.7453415181216037, "learning_rate": 0.00019666259743858033, "loss": 12.6815, "step": 4040 }, { "epoch": 0.22004860019195033, "grad_norm": 0.800459321357, "learning_rate": 0.00019666033789753832, "loss": 12.6644, "step": 4041 }, { "epoch": 0.22010305418853335, "grad_norm": 0.7277918752524827, "learning_rate": 0.00019665807760484936, "loss": 12.7014, "step": 4042 }, { "epoch": 0.22015750818511637, "grad_norm": 0.7666164066093426, "learning_rate": 0.00019665581656053095, "loss": 12.5904, "step": 4043 }, { "epoch": 0.22021196218169936, "grad_norm": 0.8457032281116588, "learning_rate": 0.00019665355476460072, "loss": 12.6473, "step": 4044 }, { "epoch": 0.22026641617828238, "grad_norm": 0.8591039778091485, "learning_rate": 0.00019665129221707625, "loss": 12.6753, "step": 4045 }, { "epoch": 0.2203208701748654, "grad_norm": 0.8961209104006046, "learning_rate": 0.0001966490289179751, "loss": 12.3711, "step": 4046 }, { "epoch": 0.2203753241714484, "grad_norm": 0.703577480574146, "learning_rate": 0.0001966467648673149, "loss": 12.4597, "step": 4047 }, { "epoch": 0.22042977816803141, "grad_norm": 0.7176771691091773, "learning_rate": 0.00019664450006511323, "loss": 12.4849, "step": 4048 }, { "epoch": 0.22048423216461444, "grad_norm": 0.9445294769580541, "learning_rate": 0.00019664223451138776, "loss": 12.6456, "step": 4049 }, { "epoch": 0.22053868616119746, "grad_norm": 0.79355294350249, "learning_rate": 0.00019663996820615604, "loss": 12.5788, "step": 4050 }, { "epoch": 0.22059314015778045, "grad_norm": 0.7021683884325197, "learning_rate": 0.0001966377011494357, "loss": 12.628, "step": 4051 }, { "epoch": 0.22064759415436347, "grad_norm": 0.8065361759591646, "learning_rate": 0.00019663543334124444, "loss": 12.6926, "step": 4052 }, { "epoch": 0.2207020481509465, "grad_norm": 0.7289818176356541, "learning_rate": 0.00019663316478159984, "loss": 12.627, "step": 4053 }, { "epoch": 0.22075650214752948, "grad_norm": 0.6840374913718438, "learning_rate": 0.0001966308954705195, "loss": 12.639, "step": 4054 }, { "epoch": 0.2208109561441125, "grad_norm": 0.7007821582356654, "learning_rate": 0.00019662862540802115, "loss": 12.6939, "step": 4055 }, { "epoch": 0.22086541014069552, "grad_norm": 0.820489336660405, "learning_rate": 0.00019662635459412239, "loss": 12.6103, "step": 4056 }, { "epoch": 0.22091986413727852, "grad_norm": 0.6884190201784677, "learning_rate": 0.0001966240830288409, "loss": 12.6194, "step": 4057 }, { "epoch": 0.22097431813386154, "grad_norm": 0.7121741594391826, "learning_rate": 0.00019662181071219433, "loss": 12.6443, "step": 4058 }, { "epoch": 0.22102877213044456, "grad_norm": 0.8198708361122542, "learning_rate": 0.00019661953764420036, "loss": 12.7249, "step": 4059 }, { "epoch": 0.22108322612702755, "grad_norm": 0.7579769411330018, "learning_rate": 0.00019661726382487666, "loss": 12.5446, "step": 4060 }, { "epoch": 0.22113768012361057, "grad_norm": 0.7321724612096204, "learning_rate": 0.0001966149892542409, "loss": 12.6478, "step": 4061 }, { "epoch": 0.2211921341201936, "grad_norm": 0.7611930700464427, "learning_rate": 0.0001966127139323108, "loss": 12.6653, "step": 4062 }, { "epoch": 0.22124658811677658, "grad_norm": 0.7843978002683786, "learning_rate": 0.00019661043785910404, "loss": 12.6038, "step": 4063 }, { "epoch": 0.2213010421133596, "grad_norm": 0.6158107207239923, "learning_rate": 0.00019660816103463831, "loss": 12.5457, "step": 4064 }, { "epoch": 0.22135549610994262, "grad_norm": 0.7349160639773851, "learning_rate": 0.00019660588345893132, "loss": 12.7054, "step": 4065 }, { "epoch": 0.22140995010652564, "grad_norm": 0.8064390119251176, "learning_rate": 0.00019660360513200074, "loss": 12.7049, "step": 4066 }, { "epoch": 0.22146440410310864, "grad_norm": 0.8285649273192288, "learning_rate": 0.00019660132605386438, "loss": 12.7298, "step": 4067 }, { "epoch": 0.22151885809969166, "grad_norm": 0.7237315038079152, "learning_rate": 0.00019659904622453987, "loss": 12.6079, "step": 4068 }, { "epoch": 0.22157331209627468, "grad_norm": 0.6493306300297004, "learning_rate": 0.00019659676564404502, "loss": 12.6085, "step": 4069 }, { "epoch": 0.22162776609285767, "grad_norm": 0.6758583142842389, "learning_rate": 0.00019659448431239747, "loss": 12.6432, "step": 4070 }, { "epoch": 0.2216822200894407, "grad_norm": 0.7787709224078277, "learning_rate": 0.00019659220222961504, "loss": 12.7466, "step": 4071 }, { "epoch": 0.2217366740860237, "grad_norm": 0.7290007115312267, "learning_rate": 0.00019658991939571543, "loss": 12.8019, "step": 4072 }, { "epoch": 0.2217911280826067, "grad_norm": 0.8270168433996183, "learning_rate": 0.0001965876358107164, "loss": 12.5678, "step": 4073 }, { "epoch": 0.22184558207918972, "grad_norm": 0.679111176559926, "learning_rate": 0.00019658535147463576, "loss": 12.5322, "step": 4074 }, { "epoch": 0.22190003607577274, "grad_norm": 0.7672042597534164, "learning_rate": 0.0001965830663874912, "loss": 12.4992, "step": 4075 }, { "epoch": 0.22195449007235574, "grad_norm": 0.7822125277954906, "learning_rate": 0.00019658078054930053, "loss": 12.6805, "step": 4076 }, { "epoch": 0.22200894406893876, "grad_norm": 0.708356713729573, "learning_rate": 0.0001965784939600815, "loss": 12.7529, "step": 4077 }, { "epoch": 0.22206339806552178, "grad_norm": 0.9155739250602781, "learning_rate": 0.00019657620661985193, "loss": 12.8248, "step": 4078 }, { "epoch": 0.22211785206210477, "grad_norm": 0.7570284536971716, "learning_rate": 0.00019657391852862956, "loss": 12.7426, "step": 4079 }, { "epoch": 0.2221723060586878, "grad_norm": 0.7462136050334757, "learning_rate": 0.0001965716296864322, "loss": 12.7032, "step": 4080 }, { "epoch": 0.2222267600552708, "grad_norm": 0.7066401415807251, "learning_rate": 0.00019656934009327769, "loss": 12.587, "step": 4081 }, { "epoch": 0.22228121405185383, "grad_norm": 0.7253805256660538, "learning_rate": 0.00019656704974918375, "loss": 12.511, "step": 4082 }, { "epoch": 0.22233566804843682, "grad_norm": 0.7647726895218305, "learning_rate": 0.00019656475865416825, "loss": 12.6477, "step": 4083 }, { "epoch": 0.22239012204501984, "grad_norm": 0.7215912435196542, "learning_rate": 0.00019656246680824902, "loss": 12.623, "step": 4084 }, { "epoch": 0.22244457604160287, "grad_norm": 0.7155861894393772, "learning_rate": 0.00019656017421144383, "loss": 12.6334, "step": 4085 }, { "epoch": 0.22249903003818586, "grad_norm": 0.7067711985235319, "learning_rate": 0.00019655788086377052, "loss": 12.6266, "step": 4086 }, { "epoch": 0.22255348403476888, "grad_norm": 0.7096197888718686, "learning_rate": 0.00019655558676524696, "loss": 12.6845, "step": 4087 }, { "epoch": 0.2226079380313519, "grad_norm": 0.7265919662042608, "learning_rate": 0.00019655329191589096, "loss": 12.5453, "step": 4088 }, { "epoch": 0.2226623920279349, "grad_norm": 0.7522879143900745, "learning_rate": 0.00019655099631572036, "loss": 12.6943, "step": 4089 }, { "epoch": 0.2227168460245179, "grad_norm": 0.6771672916143178, "learning_rate": 0.00019654869996475302, "loss": 12.5641, "step": 4090 }, { "epoch": 0.22277130002110093, "grad_norm": 0.7851229816759847, "learning_rate": 0.00019654640286300681, "loss": 12.6618, "step": 4091 }, { "epoch": 0.22282575401768392, "grad_norm": 0.72448806730371, "learning_rate": 0.00019654410501049956, "loss": 12.5751, "step": 4092 }, { "epoch": 0.22288020801426695, "grad_norm": 0.7173123686937297, "learning_rate": 0.0001965418064072492, "loss": 12.6472, "step": 4093 }, { "epoch": 0.22293466201084997, "grad_norm": 0.6722797988171498, "learning_rate": 0.00019653950705327352, "loss": 12.6206, "step": 4094 }, { "epoch": 0.22298911600743296, "grad_norm": 0.7023256897730658, "learning_rate": 0.00019653720694859045, "loss": 12.5819, "step": 4095 }, { "epoch": 0.22304357000401598, "grad_norm": 0.6768174394192674, "learning_rate": 0.0001965349060932179, "loss": 12.6696, "step": 4096 }, { "epoch": 0.223098024000599, "grad_norm": 0.6975337948647515, "learning_rate": 0.0001965326044871737, "loss": 12.5153, "step": 4097 }, { "epoch": 0.22315247799718202, "grad_norm": 0.6670804020560877, "learning_rate": 0.00019653030213047582, "loss": 12.5818, "step": 4098 }, { "epoch": 0.223206931993765, "grad_norm": 0.7435266309532783, "learning_rate": 0.00019652799902314207, "loss": 12.6334, "step": 4099 }, { "epoch": 0.22326138599034803, "grad_norm": 0.6434074828767778, "learning_rate": 0.00019652569516519043, "loss": 12.7553, "step": 4100 }, { "epoch": 0.22331583998693105, "grad_norm": 0.7702468225912249, "learning_rate": 0.00019652339055663883, "loss": 12.6044, "step": 4101 }, { "epoch": 0.22337029398351405, "grad_norm": 0.6827954463588795, "learning_rate": 0.0001965210851975051, "loss": 12.6484, "step": 4102 }, { "epoch": 0.22342474798009707, "grad_norm": 0.6972485509404794, "learning_rate": 0.00019651877908780728, "loss": 12.6238, "step": 4103 }, { "epoch": 0.2234792019766801, "grad_norm": 0.6520662489966935, "learning_rate": 0.0001965164722275632, "loss": 12.5524, "step": 4104 }, { "epoch": 0.22353365597326308, "grad_norm": 0.7948014592544079, "learning_rate": 0.00019651416461679086, "loss": 12.7693, "step": 4105 }, { "epoch": 0.2235881099698461, "grad_norm": 0.6865331043079922, "learning_rate": 0.00019651185625550822, "loss": 12.6462, "step": 4106 }, { "epoch": 0.22364256396642912, "grad_norm": 0.678447503680815, "learning_rate": 0.00019650954714373316, "loss": 12.4801, "step": 4107 }, { "epoch": 0.2236970179630121, "grad_norm": 0.6861766778644109, "learning_rate": 0.0001965072372814837, "loss": 12.7116, "step": 4108 }, { "epoch": 0.22375147195959513, "grad_norm": 0.6552148787788998, "learning_rate": 0.00019650492666877778, "loss": 12.5291, "step": 4109 }, { "epoch": 0.22380592595617815, "grad_norm": 0.7820906246883227, "learning_rate": 0.00019650261530563336, "loss": 12.773, "step": 4110 }, { "epoch": 0.22386037995276115, "grad_norm": 0.7375117802220131, "learning_rate": 0.00019650030319206844, "loss": 12.698, "step": 4111 }, { "epoch": 0.22391483394934417, "grad_norm": 0.6853124627196624, "learning_rate": 0.00019649799032810095, "loss": 12.6565, "step": 4112 }, { "epoch": 0.2239692879459272, "grad_norm": 0.8280545884927758, "learning_rate": 0.0001964956767137489, "loss": 12.5425, "step": 4113 }, { "epoch": 0.22402374194251018, "grad_norm": 0.7010860592903173, "learning_rate": 0.00019649336234903031, "loss": 12.6033, "step": 4114 }, { "epoch": 0.2240781959390932, "grad_norm": 0.8662239711101437, "learning_rate": 0.00019649104723396313, "loss": 12.7076, "step": 4115 }, { "epoch": 0.22413264993567622, "grad_norm": 0.7016765695316105, "learning_rate": 0.0001964887313685654, "loss": 12.6596, "step": 4116 }, { "epoch": 0.22418710393225924, "grad_norm": 0.7540474476328786, "learning_rate": 0.0001964864147528551, "loss": 12.6279, "step": 4117 }, { "epoch": 0.22424155792884223, "grad_norm": 0.8586664519926759, "learning_rate": 0.0001964840973868503, "loss": 12.677, "step": 4118 }, { "epoch": 0.22429601192542525, "grad_norm": 0.7518180989190282, "learning_rate": 0.00019648177927056892, "loss": 12.652, "step": 4119 }, { "epoch": 0.22435046592200827, "grad_norm": 0.8042141580978586, "learning_rate": 0.00019647946040402908, "loss": 12.6802, "step": 4120 }, { "epoch": 0.22440491991859127, "grad_norm": 0.7799997298026489, "learning_rate": 0.00019647714078724877, "loss": 12.6783, "step": 4121 }, { "epoch": 0.2244593739151743, "grad_norm": 0.7540211261418177, "learning_rate": 0.00019647482042024607, "loss": 12.5943, "step": 4122 }, { "epoch": 0.2245138279117573, "grad_norm": 0.8132608128572707, "learning_rate": 0.00019647249930303894, "loss": 12.5836, "step": 4123 }, { "epoch": 0.2245682819083403, "grad_norm": 0.6984708197327121, "learning_rate": 0.0001964701774356455, "loss": 12.5904, "step": 4124 }, { "epoch": 0.22462273590492332, "grad_norm": 0.8561128483421145, "learning_rate": 0.00019646785481808375, "loss": 12.5673, "step": 4125 }, { "epoch": 0.22467718990150634, "grad_norm": 0.7102278937221332, "learning_rate": 0.0001964655314503718, "loss": 12.6511, "step": 4126 }, { "epoch": 0.22473164389808933, "grad_norm": 0.8040309226278436, "learning_rate": 0.0001964632073325277, "loss": 12.5945, "step": 4127 }, { "epoch": 0.22478609789467235, "grad_norm": 0.7324994720574526, "learning_rate": 0.00019646088246456952, "loss": 12.5847, "step": 4128 }, { "epoch": 0.22484055189125537, "grad_norm": 0.6946331463546693, "learning_rate": 0.00019645855684651535, "loss": 12.6176, "step": 4129 }, { "epoch": 0.22489500588783837, "grad_norm": 0.8127685703864836, "learning_rate": 0.00019645623047838325, "loss": 12.6553, "step": 4130 }, { "epoch": 0.2249494598844214, "grad_norm": 0.7716413856032072, "learning_rate": 0.00019645390336019136, "loss": 12.6135, "step": 4131 }, { "epoch": 0.2250039138810044, "grad_norm": 0.7148837586860729, "learning_rate": 0.0001964515754919577, "loss": 12.608, "step": 4132 }, { "epoch": 0.22505836787758743, "grad_norm": 0.7684066446503315, "learning_rate": 0.00019644924687370045, "loss": 12.5608, "step": 4133 }, { "epoch": 0.22511282187417042, "grad_norm": 0.7642611411607005, "learning_rate": 0.00019644691750543767, "loss": 12.5928, "step": 4134 }, { "epoch": 0.22516727587075344, "grad_norm": 0.8170139904918936, "learning_rate": 0.0001964445873871875, "loss": 12.588, "step": 4135 }, { "epoch": 0.22522172986733646, "grad_norm": 0.754685287492516, "learning_rate": 0.000196442256518968, "loss": 12.65, "step": 4136 }, { "epoch": 0.22527618386391945, "grad_norm": 0.7479712814785009, "learning_rate": 0.00019643992490079736, "loss": 12.509, "step": 4137 }, { "epoch": 0.22533063786050248, "grad_norm": 0.7957960856658557, "learning_rate": 0.00019643759253269372, "loss": 12.5958, "step": 4138 }, { "epoch": 0.2253850918570855, "grad_norm": 0.6433026845291914, "learning_rate": 0.00019643525941467516, "loss": 12.5226, "step": 4139 }, { "epoch": 0.2254395458536685, "grad_norm": 0.7612427732206075, "learning_rate": 0.00019643292554675986, "loss": 12.536, "step": 4140 }, { "epoch": 0.2254939998502515, "grad_norm": 0.7186653394613721, "learning_rate": 0.00019643059092896596, "loss": 12.5145, "step": 4141 }, { "epoch": 0.22554845384683453, "grad_norm": 0.8423148084191446, "learning_rate": 0.0001964282555613116, "loss": 12.6958, "step": 4142 }, { "epoch": 0.22560290784341752, "grad_norm": 0.7648405356031992, "learning_rate": 0.00019642591944381497, "loss": 12.5947, "step": 4143 }, { "epoch": 0.22565736184000054, "grad_norm": 0.7652229852198938, "learning_rate": 0.0001964235825764942, "loss": 12.6697, "step": 4144 }, { "epoch": 0.22571181583658356, "grad_norm": 0.7824181915244368, "learning_rate": 0.00019642124495936752, "loss": 12.581, "step": 4145 }, { "epoch": 0.22576626983316656, "grad_norm": 0.766258138818385, "learning_rate": 0.000196418906592453, "loss": 12.6982, "step": 4146 }, { "epoch": 0.22582072382974958, "grad_norm": 0.7751052091342521, "learning_rate": 0.00019641656747576897, "loss": 12.6076, "step": 4147 }, { "epoch": 0.2258751778263326, "grad_norm": 0.9516446815823854, "learning_rate": 0.00019641422760933351, "loss": 12.8024, "step": 4148 }, { "epoch": 0.22592963182291562, "grad_norm": 0.6470356345065555, "learning_rate": 0.00019641188699316483, "loss": 12.6156, "step": 4149 }, { "epoch": 0.2259840858194986, "grad_norm": 0.7168144094402956, "learning_rate": 0.00019640954562728119, "loss": 12.5586, "step": 4150 }, { "epoch": 0.22603853981608163, "grad_norm": 0.704367688027529, "learning_rate": 0.0001964072035117007, "loss": 12.7024, "step": 4151 }, { "epoch": 0.22609299381266465, "grad_norm": 0.7572302834035705, "learning_rate": 0.00019640486064644166, "loss": 12.6288, "step": 4152 }, { "epoch": 0.22614744780924764, "grad_norm": 0.8705790782861066, "learning_rate": 0.00019640251703152229, "loss": 12.5788, "step": 4153 }, { "epoch": 0.22620190180583066, "grad_norm": 0.719569601862707, "learning_rate": 0.00019640017266696073, "loss": 12.6585, "step": 4154 }, { "epoch": 0.22625635580241368, "grad_norm": 0.8192872285406801, "learning_rate": 0.00019639782755277526, "loss": 12.8433, "step": 4155 }, { "epoch": 0.22631080979899668, "grad_norm": 0.7591508591799239, "learning_rate": 0.00019639548168898414, "loss": 12.7183, "step": 4156 }, { "epoch": 0.2263652637955797, "grad_norm": 0.6542259611617403, "learning_rate": 0.0001963931350756056, "loss": 12.542, "step": 4157 }, { "epoch": 0.22641971779216272, "grad_norm": 0.7187092777667181, "learning_rate": 0.00019639078771265783, "loss": 12.6347, "step": 4158 }, { "epoch": 0.2264741717887457, "grad_norm": 0.7491587955339019, "learning_rate": 0.00019638843960015918, "loss": 12.6626, "step": 4159 }, { "epoch": 0.22652862578532873, "grad_norm": 0.6890269683428409, "learning_rate": 0.00019638609073812784, "loss": 12.6092, "step": 4160 }, { "epoch": 0.22658307978191175, "grad_norm": 0.7204526761660863, "learning_rate": 0.0001963837411265821, "loss": 12.5484, "step": 4161 }, { "epoch": 0.22663753377849474, "grad_norm": 0.6620763697964263, "learning_rate": 0.0001963813907655402, "loss": 12.5442, "step": 4162 }, { "epoch": 0.22669198777507776, "grad_norm": 0.6967383157647706, "learning_rate": 0.00019637903965502048, "loss": 12.6577, "step": 4163 }, { "epoch": 0.22674644177166078, "grad_norm": 0.7205232920046198, "learning_rate": 0.00019637668779504114, "loss": 12.7114, "step": 4164 }, { "epoch": 0.2268008957682438, "grad_norm": 0.8163517909426771, "learning_rate": 0.00019637433518562055, "loss": 12.673, "step": 4165 }, { "epoch": 0.2268553497648268, "grad_norm": 0.7954287675018735, "learning_rate": 0.00019637198182677695, "loss": 12.5768, "step": 4166 }, { "epoch": 0.22690980376140982, "grad_norm": 0.7460431744584577, "learning_rate": 0.00019636962771852866, "loss": 12.4162, "step": 4167 }, { "epoch": 0.22696425775799284, "grad_norm": 0.7643922709161617, "learning_rate": 0.00019636727286089398, "loss": 12.5648, "step": 4168 }, { "epoch": 0.22701871175457583, "grad_norm": 0.6811069165096117, "learning_rate": 0.00019636491725389123, "loss": 12.5059, "step": 4169 }, { "epoch": 0.22707316575115885, "grad_norm": 0.6880108265779822, "learning_rate": 0.00019636256089753876, "loss": 12.5661, "step": 4170 }, { "epoch": 0.22712761974774187, "grad_norm": 0.8314396438696734, "learning_rate": 0.0001963602037918548, "loss": 12.5678, "step": 4171 }, { "epoch": 0.22718207374432486, "grad_norm": 0.8376904101132442, "learning_rate": 0.00019635784593685776, "loss": 12.8749, "step": 4172 }, { "epoch": 0.22723652774090788, "grad_norm": 0.7170796749752048, "learning_rate": 0.00019635548733256596, "loss": 12.5654, "step": 4173 }, { "epoch": 0.2272909817374909, "grad_norm": 0.7785199073373608, "learning_rate": 0.00019635312797899773, "loss": 12.6354, "step": 4174 }, { "epoch": 0.2273454357340739, "grad_norm": 0.683307625656454, "learning_rate": 0.0001963507678761714, "loss": 12.516, "step": 4175 }, { "epoch": 0.22739988973065692, "grad_norm": 0.7206391649286215, "learning_rate": 0.00019634840702410534, "loss": 12.5217, "step": 4176 }, { "epoch": 0.22745434372723994, "grad_norm": 0.7691043392271019, "learning_rate": 0.00019634604542281793, "loss": 12.6352, "step": 4177 }, { "epoch": 0.22750879772382293, "grad_norm": 0.7698192268790165, "learning_rate": 0.0001963436830723275, "loss": 12.6516, "step": 4178 }, { "epoch": 0.22756325172040595, "grad_norm": 0.8346404720305081, "learning_rate": 0.00019634131997265243, "loss": 12.6, "step": 4179 }, { "epoch": 0.22761770571698897, "grad_norm": 0.6818073699154533, "learning_rate": 0.0001963389561238111, "loss": 12.6393, "step": 4180 }, { "epoch": 0.22767215971357196, "grad_norm": 0.8939568346328681, "learning_rate": 0.00019633659152582192, "loss": 12.6412, "step": 4181 }, { "epoch": 0.22772661371015498, "grad_norm": 0.8633836344610171, "learning_rate": 0.0001963342261787032, "loss": 12.5934, "step": 4182 }, { "epoch": 0.227781067706738, "grad_norm": 0.7662553649513152, "learning_rate": 0.00019633186008247342, "loss": 12.7304, "step": 4183 }, { "epoch": 0.22783552170332103, "grad_norm": 0.8173319496752366, "learning_rate": 0.00019632949323715093, "loss": 12.7181, "step": 4184 }, { "epoch": 0.22788997569990402, "grad_norm": 0.7528487750285134, "learning_rate": 0.00019632712564275414, "loss": 12.574, "step": 4185 }, { "epoch": 0.22794442969648704, "grad_norm": 0.9025976985810679, "learning_rate": 0.00019632475729930147, "loss": 12.3497, "step": 4186 }, { "epoch": 0.22799888369307006, "grad_norm": 0.7802567733274954, "learning_rate": 0.00019632238820681134, "loss": 12.7456, "step": 4187 }, { "epoch": 0.22805333768965305, "grad_norm": 0.8131014048200902, "learning_rate": 0.0001963200183653022, "loss": 12.57, "step": 4188 }, { "epoch": 0.22810779168623607, "grad_norm": 0.75760509813138, "learning_rate": 0.00019631764777479238, "loss": 12.723, "step": 4189 }, { "epoch": 0.2281622456828191, "grad_norm": 0.7666646103675416, "learning_rate": 0.0001963152764353004, "loss": 12.524, "step": 4190 }, { "epoch": 0.22821669967940209, "grad_norm": 0.7536721587180825, "learning_rate": 0.00019631290434684467, "loss": 12.4945, "step": 4191 }, { "epoch": 0.2282711536759851, "grad_norm": 0.7901263068923782, "learning_rate": 0.00019631053150944366, "loss": 12.6857, "step": 4192 }, { "epoch": 0.22832560767256813, "grad_norm": 0.7848747736826245, "learning_rate": 0.00019630815792311582, "loss": 12.6599, "step": 4193 }, { "epoch": 0.22838006166915112, "grad_norm": 0.7355671871543848, "learning_rate": 0.00019630578358787956, "loss": 12.585, "step": 4194 }, { "epoch": 0.22843451566573414, "grad_norm": 0.8300002825783853, "learning_rate": 0.0001963034085037534, "loss": 12.7075, "step": 4195 }, { "epoch": 0.22848896966231716, "grad_norm": 0.7966639359103621, "learning_rate": 0.00019630103267075577, "loss": 12.7292, "step": 4196 }, { "epoch": 0.22854342365890015, "grad_norm": 0.7727276636202759, "learning_rate": 0.00019629865608890515, "loss": 12.648, "step": 4197 }, { "epoch": 0.22859787765548317, "grad_norm": 0.7582843931050108, "learning_rate": 0.00019629627875822006, "loss": 12.6394, "step": 4198 }, { "epoch": 0.2286523316520662, "grad_norm": 0.7575204216635002, "learning_rate": 0.00019629390067871894, "loss": 12.563, "step": 4199 }, { "epoch": 0.2287067856486492, "grad_norm": 0.7514351983682588, "learning_rate": 0.00019629152185042032, "loss": 12.5458, "step": 4200 }, { "epoch": 0.2287612396452322, "grad_norm": 0.7343913978310641, "learning_rate": 0.00019628914227334265, "loss": 12.6761, "step": 4201 }, { "epoch": 0.22881569364181523, "grad_norm": 0.7953266280694237, "learning_rate": 0.0001962867619475045, "loss": 12.4819, "step": 4202 }, { "epoch": 0.22887014763839825, "grad_norm": 0.674170778375874, "learning_rate": 0.0001962843808729243, "loss": 12.6999, "step": 4203 }, { "epoch": 0.22892460163498124, "grad_norm": 0.8598325420364537, "learning_rate": 0.00019628199904962065, "loss": 12.5789, "step": 4204 }, { "epoch": 0.22897905563156426, "grad_norm": 0.7080454670134934, "learning_rate": 0.00019627961647761198, "loss": 12.7293, "step": 4205 }, { "epoch": 0.22903350962814728, "grad_norm": 0.8012826117160416, "learning_rate": 0.0001962772331569169, "loss": 12.5654, "step": 4206 }, { "epoch": 0.22908796362473027, "grad_norm": 0.8221366642183998, "learning_rate": 0.0001962748490875539, "loss": 12.7532, "step": 4207 }, { "epoch": 0.2291424176213133, "grad_norm": 0.7487133557436079, "learning_rate": 0.00019627246426954153, "loss": 12.7589, "step": 4208 }, { "epoch": 0.22919687161789631, "grad_norm": 0.8018833033180254, "learning_rate": 0.00019627007870289833, "loss": 12.5792, "step": 4209 }, { "epoch": 0.2292513256144793, "grad_norm": 0.6758886976115429, "learning_rate": 0.00019626769238764285, "loss": 12.5047, "step": 4210 }, { "epoch": 0.22930577961106233, "grad_norm": 0.7721195598513376, "learning_rate": 0.00019626530532379366, "loss": 12.6491, "step": 4211 }, { "epoch": 0.22936023360764535, "grad_norm": 0.7287958100996574, "learning_rate": 0.00019626291751136932, "loss": 12.7247, "step": 4212 }, { "epoch": 0.22941468760422834, "grad_norm": 0.70739885144157, "learning_rate": 0.00019626052895038837, "loss": 12.6829, "step": 4213 }, { "epoch": 0.22946914160081136, "grad_norm": 0.9481536470865546, "learning_rate": 0.00019625813964086941, "loss": 12.6122, "step": 4214 }, { "epoch": 0.22952359559739438, "grad_norm": 0.7477751782480188, "learning_rate": 0.000196255749582831, "loss": 12.5888, "step": 4215 }, { "epoch": 0.2295780495939774, "grad_norm": 0.8458601177972982, "learning_rate": 0.00019625335877629176, "loss": 12.5862, "step": 4216 }, { "epoch": 0.2296325035905604, "grad_norm": 0.7034832614989683, "learning_rate": 0.00019625096722127025, "loss": 12.6872, "step": 4217 }, { "epoch": 0.22968695758714341, "grad_norm": 0.8110735484700187, "learning_rate": 0.0001962485749177851, "loss": 12.425, "step": 4218 }, { "epoch": 0.22974141158372643, "grad_norm": 0.8934675458851745, "learning_rate": 0.00019624618186585483, "loss": 12.7089, "step": 4219 }, { "epoch": 0.22979586558030943, "grad_norm": 0.9536438597146135, "learning_rate": 0.00019624378806549816, "loss": 12.8108, "step": 4220 }, { "epoch": 0.22985031957689245, "grad_norm": 0.7748427675417733, "learning_rate": 0.00019624139351673368, "loss": 12.592, "step": 4221 }, { "epoch": 0.22990477357347547, "grad_norm": 0.7547516755231537, "learning_rate": 0.00019623899821957994, "loss": 12.6094, "step": 4222 }, { "epoch": 0.22995922757005846, "grad_norm": 0.7069906171041361, "learning_rate": 0.0001962366021740556, "loss": 12.4284, "step": 4223 }, { "epoch": 0.23001368156664148, "grad_norm": 0.7339094492319244, "learning_rate": 0.00019623420538017933, "loss": 12.6512, "step": 4224 }, { "epoch": 0.2300681355632245, "grad_norm": 0.7168079873618214, "learning_rate": 0.00019623180783796972, "loss": 12.7724, "step": 4225 }, { "epoch": 0.2301225895598075, "grad_norm": 0.7628963015436998, "learning_rate": 0.00019622940954744546, "loss": 12.7698, "step": 4226 }, { "epoch": 0.23017704355639051, "grad_norm": 0.800313160513469, "learning_rate": 0.00019622701050862516, "loss": 12.668, "step": 4227 }, { "epoch": 0.23023149755297354, "grad_norm": 0.7402059656718677, "learning_rate": 0.0001962246107215275, "loss": 12.7014, "step": 4228 }, { "epoch": 0.23028595154955653, "grad_norm": 0.768773436899772, "learning_rate": 0.0001962222101861711, "loss": 12.6177, "step": 4229 }, { "epoch": 0.23034040554613955, "grad_norm": 0.7433993649806422, "learning_rate": 0.00019621980890257467, "loss": 12.6767, "step": 4230 }, { "epoch": 0.23039485954272257, "grad_norm": 0.7848663476635999, "learning_rate": 0.0001962174068707569, "loss": 12.6785, "step": 4231 }, { "epoch": 0.2304493135393056, "grad_norm": 0.9097231002227915, "learning_rate": 0.00019621500409073642, "loss": 12.7234, "step": 4232 }, { "epoch": 0.23050376753588858, "grad_norm": 0.737656135746251, "learning_rate": 0.0001962126005625319, "loss": 12.5546, "step": 4233 }, { "epoch": 0.2305582215324716, "grad_norm": 0.7434625052577787, "learning_rate": 0.0001962101962861621, "loss": 12.6206, "step": 4234 }, { "epoch": 0.23061267552905462, "grad_norm": 0.7067678014290748, "learning_rate": 0.00019620779126164567, "loss": 12.5727, "step": 4235 }, { "epoch": 0.23066712952563762, "grad_norm": 0.8034904350345814, "learning_rate": 0.00019620538548900134, "loss": 12.5893, "step": 4236 }, { "epoch": 0.23072158352222064, "grad_norm": 0.7364403510918875, "learning_rate": 0.00019620297896824778, "loss": 12.6274, "step": 4237 }, { "epoch": 0.23077603751880366, "grad_norm": 0.7482614937671299, "learning_rate": 0.00019620057169940372, "loss": 12.6263, "step": 4238 }, { "epoch": 0.23083049151538665, "grad_norm": 0.7421278997437073, "learning_rate": 0.0001961981636824879, "loss": 12.5673, "step": 4239 }, { "epoch": 0.23088494551196967, "grad_norm": 0.7276833601350934, "learning_rate": 0.000196195754917519, "loss": 12.5512, "step": 4240 }, { "epoch": 0.2309393995085527, "grad_norm": 0.7876421122370915, "learning_rate": 0.00019619334540451578, "loss": 12.6634, "step": 4241 }, { "epoch": 0.23099385350513568, "grad_norm": 0.6979158172927132, "learning_rate": 0.00019619093514349698, "loss": 12.5341, "step": 4242 }, { "epoch": 0.2310483075017187, "grad_norm": 0.655866451015755, "learning_rate": 0.00019618852413448134, "loss": 12.5973, "step": 4243 }, { "epoch": 0.23110276149830172, "grad_norm": 0.8482436189951839, "learning_rate": 0.0001961861123774876, "loss": 12.7595, "step": 4244 }, { "epoch": 0.23115721549488472, "grad_norm": 0.6987739037734397, "learning_rate": 0.00019618369987253452, "loss": 12.5868, "step": 4245 }, { "epoch": 0.23121166949146774, "grad_norm": 0.7143858283375534, "learning_rate": 0.0001961812866196409, "loss": 12.563, "step": 4246 }, { "epoch": 0.23126612348805076, "grad_norm": 0.7598538747605517, "learning_rate": 0.00019617887261882543, "loss": 12.6624, "step": 4247 }, { "epoch": 0.23132057748463375, "grad_norm": 0.6621791276826825, "learning_rate": 0.0001961764578701069, "loss": 12.4685, "step": 4248 }, { "epoch": 0.23137503148121677, "grad_norm": 0.6874364514880499, "learning_rate": 0.00019617404237350412, "loss": 12.6243, "step": 4249 }, { "epoch": 0.2314294854777998, "grad_norm": 0.6843292576653252, "learning_rate": 0.00019617162612903588, "loss": 12.6069, "step": 4250 }, { "epoch": 0.2314839394743828, "grad_norm": 0.7386083117850814, "learning_rate": 0.00019616920913672093, "loss": 12.6869, "step": 4251 }, { "epoch": 0.2315383934709658, "grad_norm": 0.7430864456399691, "learning_rate": 0.00019616679139657808, "loss": 12.7086, "step": 4252 }, { "epoch": 0.23159284746754882, "grad_norm": 0.7003175792165027, "learning_rate": 0.00019616437290862613, "loss": 12.5829, "step": 4253 }, { "epoch": 0.23164730146413184, "grad_norm": 0.789833782665007, "learning_rate": 0.0001961619536728839, "loss": 12.6162, "step": 4254 }, { "epoch": 0.23170175546071484, "grad_norm": 0.7355859612170126, "learning_rate": 0.00019615953368937018, "loss": 12.456, "step": 4255 }, { "epoch": 0.23175620945729786, "grad_norm": 0.7273326403014023, "learning_rate": 0.0001961571129581038, "loss": 12.639, "step": 4256 }, { "epoch": 0.23181066345388088, "grad_norm": 0.7677796485027044, "learning_rate": 0.00019615469147910358, "loss": 12.7071, "step": 4257 }, { "epoch": 0.23186511745046387, "grad_norm": 0.6403050854559836, "learning_rate": 0.00019615226925238837, "loss": 12.5584, "step": 4258 }, { "epoch": 0.2319195714470469, "grad_norm": 0.7559823101393152, "learning_rate": 0.00019614984627797699, "loss": 12.5595, "step": 4259 }, { "epoch": 0.2319740254436299, "grad_norm": 0.6971580283174519, "learning_rate": 0.00019614742255588826, "loss": 12.6703, "step": 4260 }, { "epoch": 0.2320284794402129, "grad_norm": 0.7388504771360798, "learning_rate": 0.00019614499808614106, "loss": 12.6664, "step": 4261 }, { "epoch": 0.23208293343679592, "grad_norm": 0.8428539947928397, "learning_rate": 0.00019614257286875423, "loss": 12.64, "step": 4262 }, { "epoch": 0.23213738743337894, "grad_norm": 0.6449467533861518, "learning_rate": 0.00019614014690374666, "loss": 12.5579, "step": 4263 }, { "epoch": 0.23219184142996194, "grad_norm": 0.7948607639082977, "learning_rate": 0.00019613772019113715, "loss": 12.603, "step": 4264 }, { "epoch": 0.23224629542654496, "grad_norm": 0.7533943506759686, "learning_rate": 0.0001961352927309446, "loss": 12.6152, "step": 4265 }, { "epoch": 0.23230074942312798, "grad_norm": 0.7791838528326387, "learning_rate": 0.0001961328645231879, "loss": 12.5939, "step": 4266 }, { "epoch": 0.232355203419711, "grad_norm": 0.73616378505053, "learning_rate": 0.00019613043556788594, "loss": 12.5496, "step": 4267 }, { "epoch": 0.232409657416294, "grad_norm": 0.7783342577870753, "learning_rate": 0.0001961280058650576, "loss": 12.74, "step": 4268 }, { "epoch": 0.232464111412877, "grad_norm": 0.7484188737899983, "learning_rate": 0.0001961255754147217, "loss": 12.5975, "step": 4269 }, { "epoch": 0.23251856540946003, "grad_norm": 0.6277519572272489, "learning_rate": 0.00019612314421689727, "loss": 12.4444, "step": 4270 }, { "epoch": 0.23257301940604302, "grad_norm": 0.7895078058046108, "learning_rate": 0.00019612071227160315, "loss": 12.5838, "step": 4271 }, { "epoch": 0.23262747340262604, "grad_norm": 0.7453697623277638, "learning_rate": 0.00019611827957885823, "loss": 12.4951, "step": 4272 }, { "epoch": 0.23268192739920907, "grad_norm": 0.7374459052273574, "learning_rate": 0.00019611584613868146, "loss": 12.6958, "step": 4273 }, { "epoch": 0.23273638139579206, "grad_norm": 0.7871237598849445, "learning_rate": 0.00019611341195109174, "loss": 12.6594, "step": 4274 }, { "epoch": 0.23279083539237508, "grad_norm": 0.7746752949147973, "learning_rate": 0.00019611097701610804, "loss": 12.7189, "step": 4275 }, { "epoch": 0.2328452893889581, "grad_norm": 0.8101023763463789, "learning_rate": 0.00019610854133374922, "loss": 12.6418, "step": 4276 }, { "epoch": 0.2328997433855411, "grad_norm": 0.876845460569168, "learning_rate": 0.00019610610490403428, "loss": 12.5988, "step": 4277 }, { "epoch": 0.2329541973821241, "grad_norm": 0.6829682379865623, "learning_rate": 0.00019610366772698218, "loss": 12.7112, "step": 4278 }, { "epoch": 0.23300865137870713, "grad_norm": 0.8823059947422979, "learning_rate": 0.00019610122980261182, "loss": 12.7105, "step": 4279 }, { "epoch": 0.23306310537529012, "grad_norm": 0.6757896880882112, "learning_rate": 0.00019609879113094216, "loss": 12.5127, "step": 4280 }, { "epoch": 0.23311755937187315, "grad_norm": 0.7799192024165947, "learning_rate": 0.0001960963517119922, "loss": 12.6248, "step": 4281 }, { "epoch": 0.23317201336845617, "grad_norm": 0.7738107472625442, "learning_rate": 0.0001960939115457809, "loss": 12.6383, "step": 4282 }, { "epoch": 0.2332264673650392, "grad_norm": 0.6654857619867839, "learning_rate": 0.00019609147063232723, "loss": 12.5648, "step": 4283 }, { "epoch": 0.23328092136162218, "grad_norm": 0.7831452104370247, "learning_rate": 0.0001960890289716502, "loss": 12.6137, "step": 4284 }, { "epoch": 0.2333353753582052, "grad_norm": 0.7727339044966621, "learning_rate": 0.0001960865865637687, "loss": 12.7041, "step": 4285 }, { "epoch": 0.23338982935478822, "grad_norm": 0.7715279175080322, "learning_rate": 0.00019608414340870184, "loss": 12.7531, "step": 4286 }, { "epoch": 0.2334442833513712, "grad_norm": 0.8001630128369412, "learning_rate": 0.00019608169950646859, "loss": 12.5734, "step": 4287 }, { "epoch": 0.23349873734795423, "grad_norm": 0.787395148810035, "learning_rate": 0.00019607925485708787, "loss": 12.6792, "step": 4288 }, { "epoch": 0.23355319134453725, "grad_norm": 0.842592994942548, "learning_rate": 0.00019607680946057875, "loss": 12.666, "step": 4289 }, { "epoch": 0.23360764534112025, "grad_norm": 0.7512388506883075, "learning_rate": 0.0001960743633169603, "loss": 12.59, "step": 4290 }, { "epoch": 0.23366209933770327, "grad_norm": 0.72247188846382, "learning_rate": 0.00019607191642625145, "loss": 12.6564, "step": 4291 }, { "epoch": 0.2337165533342863, "grad_norm": 0.8745336571336647, "learning_rate": 0.0001960694687884713, "loss": 12.5119, "step": 4292 }, { "epoch": 0.23377100733086928, "grad_norm": 0.6912730098958464, "learning_rate": 0.0001960670204036388, "loss": 12.5903, "step": 4293 }, { "epoch": 0.2338254613274523, "grad_norm": 0.8363161507249239, "learning_rate": 0.00019606457127177308, "loss": 12.6366, "step": 4294 }, { "epoch": 0.23387991532403532, "grad_norm": 0.7493847572979708, "learning_rate": 0.00019606212139289313, "loss": 12.6625, "step": 4295 }, { "epoch": 0.2339343693206183, "grad_norm": 0.7003006750115612, "learning_rate": 0.00019605967076701802, "loss": 12.5667, "step": 4296 }, { "epoch": 0.23398882331720133, "grad_norm": 0.7875107147194136, "learning_rate": 0.00019605721939416678, "loss": 12.6736, "step": 4297 }, { "epoch": 0.23404327731378435, "grad_norm": 0.7435004985456852, "learning_rate": 0.00019605476727435855, "loss": 12.6327, "step": 4298 }, { "epoch": 0.23409773131036737, "grad_norm": 0.9089254508292351, "learning_rate": 0.0001960523144076123, "loss": 12.7987, "step": 4299 }, { "epoch": 0.23415218530695037, "grad_norm": 0.7142398932570184, "learning_rate": 0.00019604986079394711, "loss": 12.6418, "step": 4300 }, { "epoch": 0.2342066393035334, "grad_norm": 0.6312072194048043, "learning_rate": 0.00019604740643338215, "loss": 12.5074, "step": 4301 }, { "epoch": 0.2342610933001164, "grad_norm": 0.6643375200710461, "learning_rate": 0.00019604495132593644, "loss": 12.3109, "step": 4302 }, { "epoch": 0.2343155472966994, "grad_norm": 0.7941833749668644, "learning_rate": 0.00019604249547162906, "loss": 12.6772, "step": 4303 }, { "epoch": 0.23437000129328242, "grad_norm": 0.709594513312813, "learning_rate": 0.00019604003887047916, "loss": 12.5156, "step": 4304 }, { "epoch": 0.23442445528986544, "grad_norm": 0.7762370769912964, "learning_rate": 0.00019603758152250577, "loss": 12.8532, "step": 4305 }, { "epoch": 0.23447890928644843, "grad_norm": 0.7910019480267998, "learning_rate": 0.00019603512342772808, "loss": 12.5901, "step": 4306 }, { "epoch": 0.23453336328303145, "grad_norm": 0.6729287952350281, "learning_rate": 0.00019603266458616514, "loss": 12.5129, "step": 4307 }, { "epoch": 0.23458781727961447, "grad_norm": 0.7864051008022672, "learning_rate": 0.00019603020499783612, "loss": 12.411, "step": 4308 }, { "epoch": 0.23464227127619747, "grad_norm": 0.7335956838909282, "learning_rate": 0.00019602774466276007, "loss": 12.6596, "step": 4309 }, { "epoch": 0.2346967252727805, "grad_norm": 0.7876811390143509, "learning_rate": 0.00019602528358095625, "loss": 12.6153, "step": 4310 }, { "epoch": 0.2347511792693635, "grad_norm": 0.7767342521011587, "learning_rate": 0.00019602282175244367, "loss": 12.5518, "step": 4311 }, { "epoch": 0.2348056332659465, "grad_norm": 0.7399594559128674, "learning_rate": 0.00019602035917724153, "loss": 12.6122, "step": 4312 }, { "epoch": 0.23486008726252952, "grad_norm": 0.8289972152340461, "learning_rate": 0.000196017895855369, "loss": 12.5061, "step": 4313 }, { "epoch": 0.23491454125911254, "grad_norm": 0.7408918207456543, "learning_rate": 0.00019601543178684517, "loss": 12.5571, "step": 4314 }, { "epoch": 0.23496899525569553, "grad_norm": 0.8245995095661995, "learning_rate": 0.00019601296697168926, "loss": 12.6654, "step": 4315 }, { "epoch": 0.23502344925227855, "grad_norm": 0.7435772943999259, "learning_rate": 0.00019601050140992044, "loss": 12.6058, "step": 4316 }, { "epoch": 0.23507790324886157, "grad_norm": 0.7958728150332822, "learning_rate": 0.00019600803510155782, "loss": 12.7924, "step": 4317 }, { "epoch": 0.2351323572454446, "grad_norm": 0.7486224387089223, "learning_rate": 0.00019600556804662064, "loss": 12.634, "step": 4318 }, { "epoch": 0.2351868112420276, "grad_norm": 0.7969138995287975, "learning_rate": 0.00019600310024512808, "loss": 12.6919, "step": 4319 }, { "epoch": 0.2352412652386106, "grad_norm": 0.70620545564187, "learning_rate": 0.00019600063169709927, "loss": 12.718, "step": 4320 }, { "epoch": 0.23529571923519363, "grad_norm": 0.7393046120739886, "learning_rate": 0.0001959981624025535, "loss": 12.5344, "step": 4321 }, { "epoch": 0.23535017323177662, "grad_norm": 0.7254327438539935, "learning_rate": 0.00019599569236150986, "loss": 12.5618, "step": 4322 }, { "epoch": 0.23540462722835964, "grad_norm": 0.6887552576178985, "learning_rate": 0.00019599322157398764, "loss": 12.6536, "step": 4323 }, { "epoch": 0.23545908122494266, "grad_norm": 0.7553241878725601, "learning_rate": 0.00019599075004000607, "loss": 12.5744, "step": 4324 }, { "epoch": 0.23551353522152566, "grad_norm": 0.7499048113007982, "learning_rate": 0.00019598827775958432, "loss": 12.6286, "step": 4325 }, { "epoch": 0.23556798921810868, "grad_norm": 0.8636137911339891, "learning_rate": 0.0001959858047327416, "loss": 12.7978, "step": 4326 }, { "epoch": 0.2356224432146917, "grad_norm": 0.7486050817099174, "learning_rate": 0.00019598333095949716, "loss": 12.5916, "step": 4327 }, { "epoch": 0.2356768972112747, "grad_norm": 0.719927559818087, "learning_rate": 0.00019598085643987025, "loss": 12.6053, "step": 4328 }, { "epoch": 0.2357313512078577, "grad_norm": 0.7384823362730173, "learning_rate": 0.0001959783811738801, "loss": 12.8224, "step": 4329 }, { "epoch": 0.23578580520444073, "grad_norm": 0.6720819302333678, "learning_rate": 0.00019597590516154598, "loss": 12.5344, "step": 4330 }, { "epoch": 0.23584025920102372, "grad_norm": 0.7603169777358452, "learning_rate": 0.00019597342840288711, "loss": 12.6039, "step": 4331 }, { "epoch": 0.23589471319760674, "grad_norm": 0.7062455194557229, "learning_rate": 0.00019597095089792278, "loss": 12.6485, "step": 4332 }, { "epoch": 0.23594916719418976, "grad_norm": 0.735680768440551, "learning_rate": 0.0001959684726466722, "loss": 12.8305, "step": 4333 }, { "epoch": 0.23600362119077278, "grad_norm": 0.6484772548160889, "learning_rate": 0.00019596599364915472, "loss": 12.4678, "step": 4334 }, { "epoch": 0.23605807518735578, "grad_norm": 0.7327670055095351, "learning_rate": 0.0001959635139053896, "loss": 12.5556, "step": 4335 }, { "epoch": 0.2361125291839388, "grad_norm": 0.7395264390154621, "learning_rate": 0.00019596103341539608, "loss": 12.8079, "step": 4336 }, { "epoch": 0.23616698318052182, "grad_norm": 0.6869678528143364, "learning_rate": 0.00019595855217919347, "loss": 12.5308, "step": 4337 }, { "epoch": 0.2362214371771048, "grad_norm": 0.7119042798010712, "learning_rate": 0.00019595607019680107, "loss": 12.8081, "step": 4338 }, { "epoch": 0.23627589117368783, "grad_norm": 0.8493247308149601, "learning_rate": 0.00019595358746823819, "loss": 12.594, "step": 4339 }, { "epoch": 0.23633034517027085, "grad_norm": 0.7133131279686575, "learning_rate": 0.0001959511039935241, "loss": 12.5086, "step": 4340 }, { "epoch": 0.23638479916685384, "grad_norm": 0.905017738612647, "learning_rate": 0.00019594861977267813, "loss": 12.7374, "step": 4341 }, { "epoch": 0.23643925316343686, "grad_norm": 0.7011975944578445, "learning_rate": 0.0001959461348057196, "loss": 12.6633, "step": 4342 }, { "epoch": 0.23649370716001988, "grad_norm": 0.6679431058281032, "learning_rate": 0.00019594364909266787, "loss": 12.5931, "step": 4343 }, { "epoch": 0.23654816115660288, "grad_norm": 0.7666953457547615, "learning_rate": 0.0001959411626335422, "loss": 12.6656, "step": 4344 }, { "epoch": 0.2366026151531859, "grad_norm": 0.7048870608679273, "learning_rate": 0.00019593867542836197, "loss": 12.6733, "step": 4345 }, { "epoch": 0.23665706914976892, "grad_norm": 0.6755394346574072, "learning_rate": 0.0001959361874771465, "loss": 12.5798, "step": 4346 }, { "epoch": 0.2367115231463519, "grad_norm": 0.8117541978725121, "learning_rate": 0.0001959336987799152, "loss": 12.576, "step": 4347 }, { "epoch": 0.23676597714293493, "grad_norm": 0.7168325850886029, "learning_rate": 0.00019593120933668733, "loss": 12.6734, "step": 4348 }, { "epoch": 0.23682043113951795, "grad_norm": 0.7886452783719964, "learning_rate": 0.00019592871914748229, "loss": 12.8169, "step": 4349 }, { "epoch": 0.23687488513610097, "grad_norm": 0.6665072365679392, "learning_rate": 0.00019592622821231942, "loss": 12.574, "step": 4350 }, { "epoch": 0.23692933913268396, "grad_norm": 0.7309078222381065, "learning_rate": 0.00019592373653121815, "loss": 12.5544, "step": 4351 }, { "epoch": 0.23698379312926698, "grad_norm": 0.7226790696502614, "learning_rate": 0.00019592124410419782, "loss": 12.6653, "step": 4352 }, { "epoch": 0.23703824712585, "grad_norm": 0.7930188571842032, "learning_rate": 0.00019591875093127778, "loss": 12.5467, "step": 4353 }, { "epoch": 0.237092701122433, "grad_norm": 0.7132902283812247, "learning_rate": 0.00019591625701247743, "loss": 12.5414, "step": 4354 }, { "epoch": 0.23714715511901602, "grad_norm": 0.71476247205427, "learning_rate": 0.00019591376234781623, "loss": 12.664, "step": 4355 }, { "epoch": 0.23720160911559904, "grad_norm": 0.7012311447820085, "learning_rate": 0.0001959112669373135, "loss": 12.5862, "step": 4356 }, { "epoch": 0.23725606311218203, "grad_norm": 0.6501016857711592, "learning_rate": 0.0001959087707809887, "loss": 12.6829, "step": 4357 }, { "epoch": 0.23731051710876505, "grad_norm": 0.6585230561181619, "learning_rate": 0.0001959062738788612, "loss": 12.6696, "step": 4358 }, { "epoch": 0.23736497110534807, "grad_norm": 0.7190576518237434, "learning_rate": 0.00019590377623095043, "loss": 12.5348, "step": 4359 }, { "epoch": 0.23741942510193106, "grad_norm": 0.7866991075164738, "learning_rate": 0.0001959012778372758, "loss": 12.6122, "step": 4360 }, { "epoch": 0.23747387909851408, "grad_norm": 0.6349344258638765, "learning_rate": 0.00019589877869785678, "loss": 12.518, "step": 4361 }, { "epoch": 0.2375283330950971, "grad_norm": 0.7519536264575788, "learning_rate": 0.00019589627881271273, "loss": 12.7277, "step": 4362 }, { "epoch": 0.2375827870916801, "grad_norm": 0.6524831429079407, "learning_rate": 0.00019589377818186318, "loss": 12.549, "step": 4363 }, { "epoch": 0.23763724108826312, "grad_norm": 0.6799023071625376, "learning_rate": 0.0001958912768053275, "loss": 12.5668, "step": 4364 }, { "epoch": 0.23769169508484614, "grad_norm": 0.6664392562505124, "learning_rate": 0.00019588877468312518, "loss": 12.7095, "step": 4365 }, { "epoch": 0.23774614908142916, "grad_norm": 0.6533629567695574, "learning_rate": 0.00019588627181527568, "loss": 12.5943, "step": 4366 }, { "epoch": 0.23780060307801215, "grad_norm": 0.6506573048539561, "learning_rate": 0.00019588376820179845, "loss": 12.4782, "step": 4367 }, { "epoch": 0.23785505707459517, "grad_norm": 0.7732206437895202, "learning_rate": 0.00019588126384271294, "loss": 12.7024, "step": 4368 }, { "epoch": 0.2379095110711782, "grad_norm": 0.7102121516925589, "learning_rate": 0.00019587875873803865, "loss": 12.5706, "step": 4369 }, { "epoch": 0.23796396506776119, "grad_norm": 0.7655504404935409, "learning_rate": 0.00019587625288779506, "loss": 12.6266, "step": 4370 }, { "epoch": 0.2380184190643442, "grad_norm": 0.7034358027577429, "learning_rate": 0.00019587374629200164, "loss": 12.665, "step": 4371 }, { "epoch": 0.23807287306092723, "grad_norm": 0.7218635469509956, "learning_rate": 0.0001958712389506779, "loss": 12.4119, "step": 4372 }, { "epoch": 0.23812732705751022, "grad_norm": 0.7219544872173718, "learning_rate": 0.00019586873086384333, "loss": 12.6191, "step": 4373 }, { "epoch": 0.23818178105409324, "grad_norm": 0.6753360708985948, "learning_rate": 0.0001958662220315174, "loss": 12.6027, "step": 4374 }, { "epoch": 0.23823623505067626, "grad_norm": 0.7042105075748244, "learning_rate": 0.0001958637124537197, "loss": 12.678, "step": 4375 }, { "epoch": 0.23829068904725925, "grad_norm": 0.6937149586216751, "learning_rate": 0.00019586120213046964, "loss": 12.6869, "step": 4376 }, { "epoch": 0.23834514304384227, "grad_norm": 0.7638703568099982, "learning_rate": 0.00019585869106178685, "loss": 12.8213, "step": 4377 }, { "epoch": 0.2383995970404253, "grad_norm": 0.711513978494034, "learning_rate": 0.00019585617924769078, "loss": 12.6086, "step": 4378 }, { "epoch": 0.23845405103700829, "grad_norm": 0.6760817286826006, "learning_rate": 0.00019585366668820097, "loss": 12.7248, "step": 4379 }, { "epoch": 0.2385085050335913, "grad_norm": 0.8389641014052955, "learning_rate": 0.000195851153383337, "loss": 12.6197, "step": 4380 }, { "epoch": 0.23856295903017433, "grad_norm": 0.7277777233565489, "learning_rate": 0.00019584863933311836, "loss": 12.514, "step": 4381 }, { "epoch": 0.23861741302675732, "grad_norm": 0.7616073170124373, "learning_rate": 0.00019584612453756465, "loss": 12.6506, "step": 4382 }, { "epoch": 0.23867186702334034, "grad_norm": 0.6709118088136903, "learning_rate": 0.00019584360899669537, "loss": 12.7244, "step": 4383 }, { "epoch": 0.23872632101992336, "grad_norm": 0.746987612988271, "learning_rate": 0.00019584109271053018, "loss": 12.5503, "step": 4384 }, { "epoch": 0.23878077501650638, "grad_norm": 0.7572221948189825, "learning_rate": 0.0001958385756790885, "loss": 12.5647, "step": 4385 }, { "epoch": 0.23883522901308937, "grad_norm": 0.6949784375654698, "learning_rate": 0.00019583605790239004, "loss": 12.561, "step": 4386 }, { "epoch": 0.2388896830096724, "grad_norm": 0.741463522239718, "learning_rate": 0.00019583353938045433, "loss": 12.6955, "step": 4387 }, { "epoch": 0.2389441370062554, "grad_norm": 0.6497906124337596, "learning_rate": 0.0001958310201133009, "loss": 12.5737, "step": 4388 }, { "epoch": 0.2389985910028384, "grad_norm": 0.7040373728229842, "learning_rate": 0.0001958285001009494, "loss": 12.6129, "step": 4389 }, { "epoch": 0.23905304499942143, "grad_norm": 0.7392310666609192, "learning_rate": 0.00019582597934341943, "loss": 12.6106, "step": 4390 }, { "epoch": 0.23910749899600445, "grad_norm": 0.7254938521643921, "learning_rate": 0.00019582345784073058, "loss": 12.5225, "step": 4391 }, { "epoch": 0.23916195299258744, "grad_norm": 0.694043218706121, "learning_rate": 0.00019582093559290242, "loss": 12.5082, "step": 4392 }, { "epoch": 0.23921640698917046, "grad_norm": 0.6702855650816293, "learning_rate": 0.0001958184125999546, "loss": 12.6206, "step": 4393 }, { "epoch": 0.23927086098575348, "grad_norm": 0.693921757939034, "learning_rate": 0.00019581588886190675, "loss": 12.4757, "step": 4394 }, { "epoch": 0.23932531498233647, "grad_norm": 0.6787515322047294, "learning_rate": 0.00019581336437877848, "loss": 12.3603, "step": 4395 }, { "epoch": 0.2393797689789195, "grad_norm": 0.7769332202563006, "learning_rate": 0.0001958108391505894, "loss": 12.4286, "step": 4396 }, { "epoch": 0.23943422297550251, "grad_norm": 0.7618523412111193, "learning_rate": 0.0001958083131773592, "loss": 12.5702, "step": 4397 }, { "epoch": 0.2394886769720855, "grad_norm": 0.6980552748721157, "learning_rate": 0.0001958057864591075, "loss": 12.546, "step": 4398 }, { "epoch": 0.23954313096866853, "grad_norm": 0.9703323367362652, "learning_rate": 0.00019580325899585388, "loss": 12.6259, "step": 4399 }, { "epoch": 0.23959758496525155, "grad_norm": 0.6874644663477578, "learning_rate": 0.0001958007307876181, "loss": 12.6013, "step": 4400 }, { "epoch": 0.23965203896183457, "grad_norm": 0.7883185675467992, "learning_rate": 0.00019579820183441974, "loss": 12.6963, "step": 4401 }, { "epoch": 0.23970649295841756, "grad_norm": 0.7134853056312774, "learning_rate": 0.0001957956721362785, "loss": 12.5664, "step": 4402 }, { "epoch": 0.23976094695500058, "grad_norm": 0.6907933935357229, "learning_rate": 0.0001957931416932141, "loss": 12.6162, "step": 4403 }, { "epoch": 0.2398154009515836, "grad_norm": 0.6759806570517407, "learning_rate": 0.0001957906105052461, "loss": 12.4443, "step": 4404 }, { "epoch": 0.2398698549481666, "grad_norm": 0.6749819770696659, "learning_rate": 0.00019578807857239427, "loss": 12.4787, "step": 4405 }, { "epoch": 0.23992430894474961, "grad_norm": 0.6790796285000683, "learning_rate": 0.0001957855458946783, "loss": 12.5896, "step": 4406 }, { "epoch": 0.23997876294133264, "grad_norm": 0.8447902766005312, "learning_rate": 0.00019578301247211784, "loss": 12.7252, "step": 4407 }, { "epoch": 0.24003321693791563, "grad_norm": 0.6929824546055435, "learning_rate": 0.00019578047830473263, "loss": 12.6199, "step": 4408 }, { "epoch": 0.24008767093449865, "grad_norm": 0.684029054330614, "learning_rate": 0.00019577794339254234, "loss": 12.578, "step": 4409 }, { "epoch": 0.24014212493108167, "grad_norm": 0.7338430360544661, "learning_rate": 0.00019577540773556672, "loss": 12.6144, "step": 4410 }, { "epoch": 0.24019657892766466, "grad_norm": 0.6985904077182428, "learning_rate": 0.0001957728713338254, "loss": 12.6199, "step": 4411 }, { "epoch": 0.24025103292424768, "grad_norm": 0.6805492339585597, "learning_rate": 0.00019577033418733826, "loss": 12.6823, "step": 4412 }, { "epoch": 0.2403054869208307, "grad_norm": 0.7185307030195718, "learning_rate": 0.0001957677962961249, "loss": 12.6543, "step": 4413 }, { "epoch": 0.2403599409174137, "grad_norm": 0.6971857831409217, "learning_rate": 0.0001957652576602051, "loss": 12.5886, "step": 4414 }, { "epoch": 0.24041439491399672, "grad_norm": 0.6637590622574503, "learning_rate": 0.0001957627182795986, "loss": 12.6217, "step": 4415 }, { "epoch": 0.24046884891057974, "grad_norm": 0.7310212965379054, "learning_rate": 0.00019576017815432515, "loss": 12.6621, "step": 4416 }, { "epoch": 0.24052330290716276, "grad_norm": 0.7393074452512833, "learning_rate": 0.0001957576372844045, "loss": 12.5586, "step": 4417 }, { "epoch": 0.24057775690374575, "grad_norm": 0.7296197794415451, "learning_rate": 0.00019575509566985638, "loss": 12.569, "step": 4418 }, { "epoch": 0.24063221090032877, "grad_norm": 0.7011152828829472, "learning_rate": 0.00019575255331070058, "loss": 12.5821, "step": 4419 }, { "epoch": 0.2406866648969118, "grad_norm": 0.8052641623839756, "learning_rate": 0.0001957500102069569, "loss": 12.4746, "step": 4420 }, { "epoch": 0.24074111889349478, "grad_norm": 0.6879345374295561, "learning_rate": 0.00019574746635864506, "loss": 12.5922, "step": 4421 }, { "epoch": 0.2407955728900778, "grad_norm": 1.0817216594551267, "learning_rate": 0.00019574492176578485, "loss": 12.6664, "step": 4422 }, { "epoch": 0.24085002688666082, "grad_norm": 0.678943951419676, "learning_rate": 0.00019574237642839607, "loss": 12.6064, "step": 4423 }, { "epoch": 0.24090448088324382, "grad_norm": 0.7892394980152566, "learning_rate": 0.00019573983034649854, "loss": 12.6721, "step": 4424 }, { "epoch": 0.24095893487982684, "grad_norm": 0.6887403346294946, "learning_rate": 0.00019573728352011204, "loss": 12.5072, "step": 4425 }, { "epoch": 0.24101338887640986, "grad_norm": 0.7236036893698674, "learning_rate": 0.00019573473594925632, "loss": 12.7498, "step": 4426 }, { "epoch": 0.24106784287299285, "grad_norm": 0.7725445214988449, "learning_rate": 0.0001957321876339513, "loss": 12.4948, "step": 4427 }, { "epoch": 0.24112229686957587, "grad_norm": 0.7523623474627931, "learning_rate": 0.0001957296385742167, "loss": 12.6427, "step": 4428 }, { "epoch": 0.2411767508661589, "grad_norm": 0.7879142777310196, "learning_rate": 0.0001957270887700724, "loss": 12.6412, "step": 4429 }, { "epoch": 0.24123120486274188, "grad_norm": 0.6593665147814708, "learning_rate": 0.00019572453822153818, "loss": 12.6723, "step": 4430 }, { "epoch": 0.2412856588593249, "grad_norm": 0.7554934253466723, "learning_rate": 0.0001957219869286339, "loss": 12.7087, "step": 4431 }, { "epoch": 0.24134011285590792, "grad_norm": 0.6994668680293442, "learning_rate": 0.0001957194348913794, "loss": 12.3919, "step": 4432 }, { "epoch": 0.24139456685249094, "grad_norm": 0.6874407428394054, "learning_rate": 0.00019571688210979452, "loss": 12.5445, "step": 4433 }, { "epoch": 0.24144902084907394, "grad_norm": 0.667137984555039, "learning_rate": 0.00019571432858389912, "loss": 12.4685, "step": 4434 }, { "epoch": 0.24150347484565696, "grad_norm": 0.7996917414516334, "learning_rate": 0.00019571177431371303, "loss": 12.6403, "step": 4435 }, { "epoch": 0.24155792884223998, "grad_norm": 0.6913213809619198, "learning_rate": 0.00019570921929925616, "loss": 12.7323, "step": 4436 }, { "epoch": 0.24161238283882297, "grad_norm": 0.7221294539274075, "learning_rate": 0.00019570666354054835, "loss": 12.5116, "step": 4437 }, { "epoch": 0.241666836835406, "grad_norm": 0.6955443045709032, "learning_rate": 0.00019570410703760946, "loss": 12.7694, "step": 4438 }, { "epoch": 0.241721290831989, "grad_norm": 0.7939884123941371, "learning_rate": 0.0001957015497904594, "loss": 12.6727, "step": 4439 }, { "epoch": 0.241775744828572, "grad_norm": 0.6435736362998078, "learning_rate": 0.00019569899179911802, "loss": 12.6093, "step": 4440 }, { "epoch": 0.24183019882515502, "grad_norm": 0.6785583000701847, "learning_rate": 0.00019569643306360524, "loss": 12.6087, "step": 4441 }, { "epoch": 0.24188465282173804, "grad_norm": 0.8739610450351043, "learning_rate": 0.00019569387358394096, "loss": 12.5868, "step": 4442 }, { "epoch": 0.24193910681832104, "grad_norm": 0.6785969720317832, "learning_rate": 0.00019569131336014502, "loss": 12.6191, "step": 4443 }, { "epoch": 0.24199356081490406, "grad_norm": 0.6706174843742324, "learning_rate": 0.00019568875239223745, "loss": 12.6506, "step": 4444 }, { "epoch": 0.24204801481148708, "grad_norm": 0.6871753042850144, "learning_rate": 0.00019568619068023805, "loss": 12.6058, "step": 4445 }, { "epoch": 0.24210246880807007, "grad_norm": 0.7924294900049751, "learning_rate": 0.0001956836282241668, "loss": 12.597, "step": 4446 }, { "epoch": 0.2421569228046531, "grad_norm": 0.7458698287245629, "learning_rate": 0.0001956810650240436, "loss": 12.5591, "step": 4447 }, { "epoch": 0.2422113768012361, "grad_norm": 0.7321544092410979, "learning_rate": 0.0001956785010798884, "loss": 12.5925, "step": 4448 }, { "epoch": 0.2422658307978191, "grad_norm": 0.7487279437867312, "learning_rate": 0.00019567593639172116, "loss": 12.8325, "step": 4449 }, { "epoch": 0.24232028479440212, "grad_norm": 0.6905942359080257, "learning_rate": 0.00019567337095956173, "loss": 12.5908, "step": 4450 }, { "epoch": 0.24237473879098514, "grad_norm": 0.7374695447895508, "learning_rate": 0.00019567080478343017, "loss": 12.727, "step": 4451 }, { "epoch": 0.24242919278756817, "grad_norm": 0.6704989604779045, "learning_rate": 0.00019566823786334637, "loss": 12.5303, "step": 4452 }, { "epoch": 0.24248364678415116, "grad_norm": 0.8433399654324624, "learning_rate": 0.0001956656701993303, "loss": 12.7586, "step": 4453 }, { "epoch": 0.24253810078073418, "grad_norm": 0.6902272620976142, "learning_rate": 0.00019566310179140194, "loss": 12.5854, "step": 4454 }, { "epoch": 0.2425925547773172, "grad_norm": 0.6414382311660554, "learning_rate": 0.0001956605326395813, "loss": 12.5247, "step": 4455 }, { "epoch": 0.2426470087739002, "grad_norm": 0.6781107751163772, "learning_rate": 0.0001956579627438883, "loss": 12.6554, "step": 4456 }, { "epoch": 0.2427014627704832, "grad_norm": 0.6529822601179021, "learning_rate": 0.0001956553921043429, "loss": 12.4097, "step": 4457 }, { "epoch": 0.24275591676706623, "grad_norm": 0.64671283128718, "learning_rate": 0.00019565282072096514, "loss": 12.6114, "step": 4458 }, { "epoch": 0.24281037076364922, "grad_norm": 0.7302055774093955, "learning_rate": 0.000195650248593775, "loss": 12.7056, "step": 4459 }, { "epoch": 0.24286482476023225, "grad_norm": 0.6869491716617113, "learning_rate": 0.00019564767572279252, "loss": 12.5807, "step": 4460 }, { "epoch": 0.24291927875681527, "grad_norm": 0.7093695704767358, "learning_rate": 0.00019564510210803767, "loss": 12.6981, "step": 4461 }, { "epoch": 0.24297373275339826, "grad_norm": 0.7599144682393941, "learning_rate": 0.00019564252774953046, "loss": 12.6494, "step": 4462 }, { "epoch": 0.24302818674998128, "grad_norm": 0.6510888654090853, "learning_rate": 0.00019563995264729092, "loss": 12.5583, "step": 4463 }, { "epoch": 0.2430826407465643, "grad_norm": 0.6517012342100406, "learning_rate": 0.00019563737680133904, "loss": 12.5919, "step": 4464 }, { "epoch": 0.2431370947431473, "grad_norm": 0.7345404494271482, "learning_rate": 0.0001956348002116949, "loss": 12.5523, "step": 4465 }, { "epoch": 0.2431915487397303, "grad_norm": 0.6549193855626367, "learning_rate": 0.0001956322228783785, "loss": 12.6657, "step": 4466 }, { "epoch": 0.24324600273631333, "grad_norm": 0.713822158827217, "learning_rate": 0.00019562964480140992, "loss": 12.6496, "step": 4467 }, { "epoch": 0.24330045673289635, "grad_norm": 0.6505310466098979, "learning_rate": 0.00019562706598080917, "loss": 12.6382, "step": 4468 }, { "epoch": 0.24335491072947935, "grad_norm": 0.6734696028472843, "learning_rate": 0.00019562448641659633, "loss": 12.5192, "step": 4469 }, { "epoch": 0.24340936472606237, "grad_norm": 0.7173359735388568, "learning_rate": 0.00019562190610879142, "loss": 12.6257, "step": 4470 }, { "epoch": 0.2434638187226454, "grad_norm": 0.6768112396798218, "learning_rate": 0.0001956193250574146, "loss": 12.4036, "step": 4471 }, { "epoch": 0.24351827271922838, "grad_norm": 0.8482952575831432, "learning_rate": 0.0001956167432624858, "loss": 12.6744, "step": 4472 }, { "epoch": 0.2435727267158114, "grad_norm": 0.6896232479270313, "learning_rate": 0.0001956141607240252, "loss": 12.5744, "step": 4473 }, { "epoch": 0.24362718071239442, "grad_norm": 0.7615411170903834, "learning_rate": 0.00019561157744205283, "loss": 12.5567, "step": 4474 }, { "epoch": 0.2436816347089774, "grad_norm": 0.7651799858552987, "learning_rate": 0.00019560899341658882, "loss": 12.6451, "step": 4475 }, { "epoch": 0.24373608870556043, "grad_norm": 0.7505220181840234, "learning_rate": 0.00019560640864765326, "loss": 12.6047, "step": 4476 }, { "epoch": 0.24379054270214345, "grad_norm": 0.7986243578023776, "learning_rate": 0.0001956038231352662, "loss": 12.5866, "step": 4477 }, { "epoch": 0.24384499669872645, "grad_norm": 0.7308259229200806, "learning_rate": 0.0001956012368794478, "loss": 12.5419, "step": 4478 }, { "epoch": 0.24389945069530947, "grad_norm": 0.7536534796161848, "learning_rate": 0.00019559864988021814, "loss": 12.4244, "step": 4479 }, { "epoch": 0.2439539046918925, "grad_norm": 0.7359555051915565, "learning_rate": 0.00019559606213759737, "loss": 12.6927, "step": 4480 }, { "epoch": 0.24400835868847548, "grad_norm": 0.6842061251039289, "learning_rate": 0.00019559347365160555, "loss": 12.6481, "step": 4481 }, { "epoch": 0.2440628126850585, "grad_norm": 0.7077400251306337, "learning_rate": 0.00019559088442226287, "loss": 12.4308, "step": 4482 }, { "epoch": 0.24411726668164152, "grad_norm": 0.7482556729844912, "learning_rate": 0.00019558829444958942, "loss": 12.5591, "step": 4483 }, { "epoch": 0.24417172067822454, "grad_norm": 0.6449707271355242, "learning_rate": 0.0001955857037336054, "loss": 12.7376, "step": 4484 }, { "epoch": 0.24422617467480753, "grad_norm": 0.7326655502768871, "learning_rate": 0.00019558311227433088, "loss": 12.4296, "step": 4485 }, { "epoch": 0.24428062867139055, "grad_norm": 0.7200166033113317, "learning_rate": 0.0001955805200717861, "loss": 12.6405, "step": 4486 }, { "epoch": 0.24433508266797357, "grad_norm": 0.7553742880217637, "learning_rate": 0.00019557792712599113, "loss": 12.5288, "step": 4487 }, { "epoch": 0.24438953666455657, "grad_norm": 1.3477492623330136, "learning_rate": 0.00019557533343696616, "loss": 12.7448, "step": 4488 }, { "epoch": 0.2444439906611396, "grad_norm": 0.747810008635918, "learning_rate": 0.00019557273900473138, "loss": 12.6993, "step": 4489 }, { "epoch": 0.2444984446577226, "grad_norm": 0.7578599870862847, "learning_rate": 0.00019557014382930697, "loss": 12.5171, "step": 4490 }, { "epoch": 0.2445528986543056, "grad_norm": 0.7716666873016084, "learning_rate": 0.0001955675479107131, "loss": 12.4974, "step": 4491 }, { "epoch": 0.24460735265088862, "grad_norm": 0.7491187824359241, "learning_rate": 0.0001955649512489699, "loss": 12.6716, "step": 4492 }, { "epoch": 0.24466180664747164, "grad_norm": 0.7317652042296284, "learning_rate": 0.00019556235384409764, "loss": 12.5856, "step": 4493 }, { "epoch": 0.24471626064405463, "grad_norm": 0.7157174847925366, "learning_rate": 0.00019555975569611654, "loss": 12.7635, "step": 4494 }, { "epoch": 0.24477071464063765, "grad_norm": 0.7177168932371809, "learning_rate": 0.00019555715680504666, "loss": 12.5829, "step": 4495 }, { "epoch": 0.24482516863722067, "grad_norm": 0.7339058397426691, "learning_rate": 0.0001955545571709084, "loss": 12.6403, "step": 4496 }, { "epoch": 0.24487962263380367, "grad_norm": 0.7295417996740731, "learning_rate": 0.0001955519567937218, "loss": 12.613, "step": 4497 }, { "epoch": 0.2449340766303867, "grad_norm": 0.7351610935959223, "learning_rate": 0.00019554935567350721, "loss": 12.5433, "step": 4498 }, { "epoch": 0.2449885306269697, "grad_norm": 0.6982007901341996, "learning_rate": 0.00019554675381028478, "loss": 12.7112, "step": 4499 }, { "epoch": 0.24504298462355273, "grad_norm": 0.6846838175361749, "learning_rate": 0.00019554415120407478, "loss": 12.5285, "step": 4500 }, { "epoch": 0.24509743862013572, "grad_norm": 0.7254197253188015, "learning_rate": 0.00019554154785489744, "loss": 12.4814, "step": 4501 }, { "epoch": 0.24515189261671874, "grad_norm": 0.709088370584864, "learning_rate": 0.000195538943762773, "loss": 12.5766, "step": 4502 }, { "epoch": 0.24520634661330176, "grad_norm": 0.9468716232091118, "learning_rate": 0.00019553633892772172, "loss": 12.6409, "step": 4503 }, { "epoch": 0.24526080060988475, "grad_norm": 0.7480225416160845, "learning_rate": 0.00019553373334976385, "loss": 12.6486, "step": 4504 }, { "epoch": 0.24531525460646778, "grad_norm": 0.8838370936909044, "learning_rate": 0.00019553112702891962, "loss": 12.545, "step": 4505 }, { "epoch": 0.2453697086030508, "grad_norm": 0.7603176328637311, "learning_rate": 0.00019552851996520936, "loss": 12.6969, "step": 4506 }, { "epoch": 0.2454241625996338, "grad_norm": 0.6745237563930223, "learning_rate": 0.00019552591215865327, "loss": 12.5745, "step": 4507 }, { "epoch": 0.2454786165962168, "grad_norm": 0.7399326419492032, "learning_rate": 0.0001955233036092717, "loss": 12.6872, "step": 4508 }, { "epoch": 0.24553307059279983, "grad_norm": 0.7077478699077312, "learning_rate": 0.0001955206943170849, "loss": 12.5911, "step": 4509 }, { "epoch": 0.24558752458938282, "grad_norm": 0.7307766289166041, "learning_rate": 0.00019551808428211313, "loss": 12.5886, "step": 4510 }, { "epoch": 0.24564197858596584, "grad_norm": 0.6607937087585422, "learning_rate": 0.00019551547350437677, "loss": 12.6056, "step": 4511 }, { "epoch": 0.24569643258254886, "grad_norm": 0.6781420170848789, "learning_rate": 0.000195512861983896, "loss": 12.5309, "step": 4512 }, { "epoch": 0.24575088657913186, "grad_norm": 0.7274451215503108, "learning_rate": 0.00019551024972069126, "loss": 12.6383, "step": 4513 }, { "epoch": 0.24580534057571488, "grad_norm": 0.7328547155168079, "learning_rate": 0.00019550763671478277, "loss": 12.6395, "step": 4514 }, { "epoch": 0.2458597945722979, "grad_norm": 0.7098023166777683, "learning_rate": 0.00019550502296619089, "loss": 12.5694, "step": 4515 }, { "epoch": 0.2459142485688809, "grad_norm": 0.8286201301595062, "learning_rate": 0.00019550240847493594, "loss": 12.6161, "step": 4516 }, { "epoch": 0.2459687025654639, "grad_norm": 0.74030169363629, "learning_rate": 0.00019549979324103825, "loss": 12.6025, "step": 4517 }, { "epoch": 0.24602315656204693, "grad_norm": 0.6559365019851384, "learning_rate": 0.00019549717726451813, "loss": 12.6041, "step": 4518 }, { "epoch": 0.24607761055862995, "grad_norm": 0.7240693992512116, "learning_rate": 0.00019549456054539596, "loss": 12.6527, "step": 4519 }, { "epoch": 0.24613206455521294, "grad_norm": 0.8447464162145961, "learning_rate": 0.0001954919430836921, "loss": 12.5594, "step": 4520 }, { "epoch": 0.24618651855179596, "grad_norm": 0.7760926547157477, "learning_rate": 0.00019548932487942683, "loss": 12.6414, "step": 4521 }, { "epoch": 0.24624097254837898, "grad_norm": 0.7862686525620901, "learning_rate": 0.00019548670593262055, "loss": 12.5695, "step": 4522 }, { "epoch": 0.24629542654496198, "grad_norm": 0.6977819603616029, "learning_rate": 0.00019548408624329368, "loss": 12.5997, "step": 4523 }, { "epoch": 0.246349880541545, "grad_norm": 0.7581882072818931, "learning_rate": 0.0001954814658114665, "loss": 12.5679, "step": 4524 }, { "epoch": 0.24640433453812802, "grad_norm": 0.7417220054167826, "learning_rate": 0.00019547884463715944, "loss": 12.5288, "step": 4525 }, { "epoch": 0.246458788534711, "grad_norm": 0.6571353614508388, "learning_rate": 0.00019547622272039287, "loss": 12.5123, "step": 4526 }, { "epoch": 0.24651324253129403, "grad_norm": 0.9819943625994235, "learning_rate": 0.0001954736000611872, "loss": 12.6171, "step": 4527 }, { "epoch": 0.24656769652787705, "grad_norm": 0.8662000801950319, "learning_rate": 0.0001954709766595628, "loss": 12.702, "step": 4528 }, { "epoch": 0.24662215052446004, "grad_norm": 0.7330313779627509, "learning_rate": 0.00019546835251554008, "loss": 12.6235, "step": 4529 }, { "epoch": 0.24667660452104306, "grad_norm": 0.6708599988369802, "learning_rate": 0.00019546572762913942, "loss": 12.6261, "step": 4530 }, { "epoch": 0.24673105851762608, "grad_norm": 0.6875341786134898, "learning_rate": 0.00019546310200038125, "loss": 12.648, "step": 4531 }, { "epoch": 0.24678551251420908, "grad_norm": 0.6884220381444792, "learning_rate": 0.000195460475629286, "loss": 12.6227, "step": 4532 }, { "epoch": 0.2468399665107921, "grad_norm": 0.7260017148269406, "learning_rate": 0.0001954578485158741, "loss": 12.4933, "step": 4533 }, { "epoch": 0.24689442050737512, "grad_norm": 0.7986836441157589, "learning_rate": 0.00019545522066016595, "loss": 12.5843, "step": 4534 }, { "epoch": 0.24694887450395814, "grad_norm": 0.7316985170974813, "learning_rate": 0.00019545259206218198, "loss": 12.6712, "step": 4535 }, { "epoch": 0.24700332850054113, "grad_norm": 0.716498663788949, "learning_rate": 0.00019544996272194266, "loss": 12.6374, "step": 4536 }, { "epoch": 0.24705778249712415, "grad_norm": 0.6871600074292533, "learning_rate": 0.00019544733263946845, "loss": 12.5411, "step": 4537 }, { "epoch": 0.24711223649370717, "grad_norm": 0.8407670283999826, "learning_rate": 0.0001954447018147797, "loss": 12.6246, "step": 4538 }, { "epoch": 0.24716669049029016, "grad_norm": 0.722063707325831, "learning_rate": 0.00019544207024789703, "loss": 12.4979, "step": 4539 }, { "epoch": 0.24722114448687318, "grad_norm": 0.7595660700514336, "learning_rate": 0.00019543943793884076, "loss": 12.6016, "step": 4540 }, { "epoch": 0.2472755984834562, "grad_norm": 0.7803560129403373, "learning_rate": 0.00019543680488763143, "loss": 12.6529, "step": 4541 }, { "epoch": 0.2473300524800392, "grad_norm": 0.6852586791744784, "learning_rate": 0.00019543417109428953, "loss": 12.6201, "step": 4542 }, { "epoch": 0.24738450647662222, "grad_norm": 0.708842264429155, "learning_rate": 0.00019543153655883545, "loss": 12.6531, "step": 4543 }, { "epoch": 0.24743896047320524, "grad_norm": 0.6785694911685014, "learning_rate": 0.0001954289012812898, "loss": 12.6396, "step": 4544 }, { "epoch": 0.24749341446978823, "grad_norm": 0.6822476492481611, "learning_rate": 0.00019542626526167296, "loss": 12.5853, "step": 4545 }, { "epoch": 0.24754786846637125, "grad_norm": 0.6755924097105872, "learning_rate": 0.0001954236285000055, "loss": 12.5671, "step": 4546 }, { "epoch": 0.24760232246295427, "grad_norm": 0.702809518986082, "learning_rate": 0.0001954209909963079, "loss": 12.5755, "step": 4547 }, { "epoch": 0.24765677645953726, "grad_norm": 0.6967556394981288, "learning_rate": 0.0001954183527506007, "loss": 12.6763, "step": 4548 }, { "epoch": 0.24771123045612028, "grad_norm": 0.662645469888712, "learning_rate": 0.00019541571376290436, "loss": 12.5686, "step": 4549 }, { "epoch": 0.2477656844527033, "grad_norm": 0.74349023652389, "learning_rate": 0.00019541307403323944, "loss": 12.6814, "step": 4550 }, { "epoch": 0.24782013844928633, "grad_norm": 0.6806950441026586, "learning_rate": 0.00019541043356162643, "loss": 12.5481, "step": 4551 }, { "epoch": 0.24787459244586932, "grad_norm": 0.6826692501332223, "learning_rate": 0.0001954077923480859, "loss": 12.522, "step": 4552 }, { "epoch": 0.24792904644245234, "grad_norm": 0.7024226341344949, "learning_rate": 0.00019540515039263837, "loss": 12.5405, "step": 4553 }, { "epoch": 0.24798350043903536, "grad_norm": 0.7953597336673244, "learning_rate": 0.00019540250769530443, "loss": 12.6771, "step": 4554 }, { "epoch": 0.24803795443561835, "grad_norm": 0.6986416150802731, "learning_rate": 0.00019539986425610453, "loss": 12.5993, "step": 4555 }, { "epoch": 0.24809240843220137, "grad_norm": 0.7842682390286129, "learning_rate": 0.00019539722007505934, "loss": 12.6273, "step": 4556 }, { "epoch": 0.2481468624287844, "grad_norm": 0.7765687604639168, "learning_rate": 0.00019539457515218932, "loss": 12.5093, "step": 4557 }, { "epoch": 0.24820131642536739, "grad_norm": 0.8003804060961116, "learning_rate": 0.00019539192948751514, "loss": 12.5759, "step": 4558 }, { "epoch": 0.2482557704219504, "grad_norm": 0.7202326885917015, "learning_rate": 0.0001953892830810573, "loss": 12.4316, "step": 4559 }, { "epoch": 0.24831022441853343, "grad_norm": 0.7474728919149864, "learning_rate": 0.00019538663593283637, "loss": 12.6285, "step": 4560 }, { "epoch": 0.24836467841511642, "grad_norm": 0.7812249465629221, "learning_rate": 0.00019538398804287298, "loss": 12.6414, "step": 4561 }, { "epoch": 0.24841913241169944, "grad_norm": 0.693946563301219, "learning_rate": 0.00019538133941118772, "loss": 12.4846, "step": 4562 }, { "epoch": 0.24847358640828246, "grad_norm": 0.7697627760440247, "learning_rate": 0.00019537869003780116, "loss": 12.6134, "step": 4563 }, { "epoch": 0.24852804040486545, "grad_norm": 0.908532305342887, "learning_rate": 0.0001953760399227339, "loss": 12.6881, "step": 4564 }, { "epoch": 0.24858249440144847, "grad_norm": 0.6788580510320924, "learning_rate": 0.00019537338906600659, "loss": 12.5017, "step": 4565 }, { "epoch": 0.2486369483980315, "grad_norm": 0.7197728170770006, "learning_rate": 0.00019537073746763977, "loss": 12.5242, "step": 4566 }, { "epoch": 0.2486914023946145, "grad_norm": 0.8205666944352907, "learning_rate": 0.00019536808512765413, "loss": 12.7204, "step": 4567 }, { "epoch": 0.2487458563911975, "grad_norm": 0.7434051541276757, "learning_rate": 0.00019536543204607025, "loss": 12.6962, "step": 4568 }, { "epoch": 0.24880031038778053, "grad_norm": 0.6981666345210649, "learning_rate": 0.00019536277822290878, "loss": 12.645, "step": 4569 }, { "epoch": 0.24885476438436355, "grad_norm": 0.7823360214580403, "learning_rate": 0.00019536012365819038, "loss": 12.6003, "step": 4570 }, { "epoch": 0.24890921838094654, "grad_norm": 0.7563770621808997, "learning_rate": 0.00019535746835193564, "loss": 12.6248, "step": 4571 }, { "epoch": 0.24896367237752956, "grad_norm": 0.7685321115617461, "learning_rate": 0.00019535481230416524, "loss": 12.6604, "step": 4572 }, { "epoch": 0.24901812637411258, "grad_norm": 0.8625199287410501, "learning_rate": 0.00019535215551489982, "loss": 12.7927, "step": 4573 }, { "epoch": 0.24907258037069557, "grad_norm": 0.8455092054785058, "learning_rate": 0.00019534949798416006, "loss": 12.6436, "step": 4574 }, { "epoch": 0.2491270343672786, "grad_norm": 0.8004646051656875, "learning_rate": 0.00019534683971196662, "loss": 12.7403, "step": 4575 }, { "epoch": 0.24918148836386161, "grad_norm": 0.7167876264537866, "learning_rate": 0.00019534418069834013, "loss": 12.6044, "step": 4576 }, { "epoch": 0.2492359423604446, "grad_norm": 0.7419510607817421, "learning_rate": 0.00019534152094330133, "loss": 12.581, "step": 4577 }, { "epoch": 0.24929039635702763, "grad_norm": 0.8028717993107427, "learning_rate": 0.00019533886044687088, "loss": 12.5883, "step": 4578 }, { "epoch": 0.24934485035361065, "grad_norm": 0.7294878102992232, "learning_rate": 0.00019533619920906946, "loss": 12.5647, "step": 4579 }, { "epoch": 0.24939930435019364, "grad_norm": 0.7266599602586925, "learning_rate": 0.00019533353722991776, "loss": 12.6332, "step": 4580 }, { "epoch": 0.24945375834677666, "grad_norm": 0.7077771439865166, "learning_rate": 0.00019533087450943648, "loss": 12.6462, "step": 4581 }, { "epoch": 0.24950821234335968, "grad_norm": 0.675721997388249, "learning_rate": 0.00019532821104764633, "loss": 12.706, "step": 4582 }, { "epoch": 0.24956266633994267, "grad_norm": 0.729758379877045, "learning_rate": 0.00019532554684456805, "loss": 12.7759, "step": 4583 }, { "epoch": 0.2496171203365257, "grad_norm": 0.8246553097877232, "learning_rate": 0.0001953228819002223, "loss": 12.5836, "step": 4584 }, { "epoch": 0.24967157433310871, "grad_norm": 0.6787022092779801, "learning_rate": 0.00019532021621462988, "loss": 12.5742, "step": 4585 }, { "epoch": 0.24972602832969173, "grad_norm": 0.7525877839664052, "learning_rate": 0.00019531754978781141, "loss": 12.528, "step": 4586 }, { "epoch": 0.24978048232627473, "grad_norm": 0.7884072027096795, "learning_rate": 0.00019531488261978773, "loss": 12.5593, "step": 4587 }, { "epoch": 0.24983493632285775, "grad_norm": 0.7306243201798892, "learning_rate": 0.00019531221471057956, "loss": 12.6264, "step": 4588 }, { "epoch": 0.24988939031944077, "grad_norm": 0.6650271940846968, "learning_rate": 0.00019530954606020759, "loss": 12.5565, "step": 4589 }, { "epoch": 0.24994384431602376, "grad_norm": 0.6988800412957692, "learning_rate": 0.0001953068766686926, "loss": 12.5774, "step": 4590 }, { "epoch": 0.24999829831260678, "grad_norm": 0.8740708821128162, "learning_rate": 0.0001953042065360554, "loss": 12.8126, "step": 4591 }, { "epoch": 0.2500527523091898, "grad_norm": 0.8266690825077698, "learning_rate": 0.00019530153566231666, "loss": 12.5194, "step": 4592 }, { "epoch": 0.2501072063057728, "grad_norm": 0.6552219976399752, "learning_rate": 0.00019529886404749723, "loss": 12.4998, "step": 4593 }, { "epoch": 0.2501616603023558, "grad_norm": 0.7949170552992534, "learning_rate": 0.00019529619169161781, "loss": 12.6232, "step": 4594 }, { "epoch": 0.2502161142989388, "grad_norm": 0.7088863266575556, "learning_rate": 0.00019529351859469928, "loss": 12.6323, "step": 4595 }, { "epoch": 0.25027056829552186, "grad_norm": 0.841859299254048, "learning_rate": 0.0001952908447567623, "loss": 12.6773, "step": 4596 }, { "epoch": 0.25032502229210485, "grad_norm": 0.7756336581104332, "learning_rate": 0.00019528817017782778, "loss": 12.4028, "step": 4597 }, { "epoch": 0.25037947628868784, "grad_norm": 0.7572196940976758, "learning_rate": 0.00019528549485791646, "loss": 12.5988, "step": 4598 }, { "epoch": 0.2504339302852709, "grad_norm": 0.8035920475602101, "learning_rate": 0.00019528281879704912, "loss": 12.5705, "step": 4599 }, { "epoch": 0.2504883842818539, "grad_norm": 0.6541580909787088, "learning_rate": 0.00019528014199524663, "loss": 12.53, "step": 4600 }, { "epoch": 0.2505428382784369, "grad_norm": 0.7846336945332629, "learning_rate": 0.0001952774644525298, "loss": 12.6758, "step": 4601 }, { "epoch": 0.2505972922750199, "grad_norm": 0.7364919279335622, "learning_rate": 0.00019527478616891938, "loss": 12.5414, "step": 4602 }, { "epoch": 0.2506517462716029, "grad_norm": 0.7374226191410725, "learning_rate": 0.00019527210714443628, "loss": 12.6017, "step": 4603 }, { "epoch": 0.2507062002681859, "grad_norm": 0.6828349388848473, "learning_rate": 0.00019526942737910127, "loss": 12.6738, "step": 4604 }, { "epoch": 0.25076065426476896, "grad_norm": 0.7463262104769107, "learning_rate": 0.00019526674687293525, "loss": 12.655, "step": 4605 }, { "epoch": 0.25081510826135195, "grad_norm": 0.6958274601778409, "learning_rate": 0.000195264065625959, "loss": 12.4563, "step": 4606 }, { "epoch": 0.25086956225793494, "grad_norm": 0.8744339011342619, "learning_rate": 0.0001952613836381934, "loss": 12.6371, "step": 4607 }, { "epoch": 0.250924016254518, "grad_norm": 0.7447951983208602, "learning_rate": 0.00019525870090965935, "loss": 12.5699, "step": 4608 }, { "epoch": 0.250978470251101, "grad_norm": 0.8770057055867937, "learning_rate": 0.00019525601744037764, "loss": 12.6631, "step": 4609 }, { "epoch": 0.25103292424768403, "grad_norm": 0.704687414173674, "learning_rate": 0.00019525333323036913, "loss": 12.4892, "step": 4610 }, { "epoch": 0.251087378244267, "grad_norm": 0.8420204054546363, "learning_rate": 0.0001952506482796548, "loss": 12.5162, "step": 4611 }, { "epoch": 0.25114183224085, "grad_norm": 0.7180873889371547, "learning_rate": 0.00019524796258825537, "loss": 12.6455, "step": 4612 }, { "epoch": 0.25119628623743306, "grad_norm": 0.737165717454661, "learning_rate": 0.00019524527615619186, "loss": 12.7034, "step": 4613 }, { "epoch": 0.25125074023401606, "grad_norm": 0.7384097064777905, "learning_rate": 0.0001952425889834851, "loss": 12.7689, "step": 4614 }, { "epoch": 0.25130519423059905, "grad_norm": 0.8840678095617902, "learning_rate": 0.00019523990107015598, "loss": 12.7451, "step": 4615 }, { "epoch": 0.2513596482271821, "grad_norm": 0.7163607304960726, "learning_rate": 0.00019523721241622547, "loss": 12.4507, "step": 4616 }, { "epoch": 0.2514141022237651, "grad_norm": 0.7197221870928445, "learning_rate": 0.0001952345230217144, "loss": 12.757, "step": 4617 }, { "epoch": 0.2514685562203481, "grad_norm": 0.8539264131334994, "learning_rate": 0.0001952318328866437, "loss": 12.5577, "step": 4618 }, { "epoch": 0.25152301021693113, "grad_norm": 0.7491476006380345, "learning_rate": 0.00019522914201103428, "loss": 12.6483, "step": 4619 }, { "epoch": 0.2515774642135141, "grad_norm": 0.6817222614207579, "learning_rate": 0.00019522645039490708, "loss": 12.4453, "step": 4620 }, { "epoch": 0.2516319182100971, "grad_norm": 0.8458457675512254, "learning_rate": 0.00019522375803828306, "loss": 12.6245, "step": 4621 }, { "epoch": 0.25168637220668016, "grad_norm": 0.8751640549465032, "learning_rate": 0.0001952210649411831, "loss": 12.6798, "step": 4622 }, { "epoch": 0.25174082620326316, "grad_norm": 0.753038570022357, "learning_rate": 0.0001952183711036282, "loss": 12.5469, "step": 4623 }, { "epoch": 0.25179528019984615, "grad_norm": 0.7411325111987628, "learning_rate": 0.00019521567652563927, "loss": 12.592, "step": 4624 }, { "epoch": 0.2518497341964292, "grad_norm": 0.6747257568929965, "learning_rate": 0.0001952129812072373, "loss": 12.4863, "step": 4625 }, { "epoch": 0.2519041881930122, "grad_norm": 0.7675323270556783, "learning_rate": 0.00019521028514844316, "loss": 12.4248, "step": 4626 }, { "epoch": 0.2519586421895952, "grad_norm": 0.6906531701123305, "learning_rate": 0.00019520758834927788, "loss": 12.6437, "step": 4627 }, { "epoch": 0.25201309618617823, "grad_norm": 0.8792626963119561, "learning_rate": 0.00019520489080976247, "loss": 12.6249, "step": 4628 }, { "epoch": 0.2520675501827612, "grad_norm": 0.736497661373532, "learning_rate": 0.00019520219252991785, "loss": 12.5359, "step": 4629 }, { "epoch": 0.2521220041793442, "grad_norm": 0.8371944344160387, "learning_rate": 0.000195199493509765, "loss": 12.6581, "step": 4630 }, { "epoch": 0.25217645817592726, "grad_norm": 0.6801403777773931, "learning_rate": 0.00019519679374932494, "loss": 12.6248, "step": 4631 }, { "epoch": 0.25223091217251026, "grad_norm": 0.7314974324886186, "learning_rate": 0.00019519409324861864, "loss": 12.5213, "step": 4632 }, { "epoch": 0.25228536616909325, "grad_norm": 0.7927647348104103, "learning_rate": 0.0001951913920076671, "loss": 12.551, "step": 4633 }, { "epoch": 0.2523398201656763, "grad_norm": 0.652316124095355, "learning_rate": 0.00019518869002649135, "loss": 12.5875, "step": 4634 }, { "epoch": 0.2523942741622593, "grad_norm": 0.7051434214547742, "learning_rate": 0.00019518598730511238, "loss": 12.6323, "step": 4635 }, { "epoch": 0.2524487281588423, "grad_norm": 0.7956144347172418, "learning_rate": 0.00019518328384355118, "loss": 12.7241, "step": 4636 }, { "epoch": 0.25250318215542533, "grad_norm": 0.7341236561572148, "learning_rate": 0.00019518057964182882, "loss": 12.6638, "step": 4637 }, { "epoch": 0.2525576361520083, "grad_norm": 0.8583819376050722, "learning_rate": 0.0001951778746999663, "loss": 12.5505, "step": 4638 }, { "epoch": 0.2526120901485913, "grad_norm": 0.7002431638214149, "learning_rate": 0.00019517516901798468, "loss": 12.5828, "step": 4639 }, { "epoch": 0.25266654414517437, "grad_norm": 0.6704356853347958, "learning_rate": 0.00019517246259590502, "loss": 12.3791, "step": 4640 }, { "epoch": 0.25272099814175736, "grad_norm": 0.776267355947518, "learning_rate": 0.0001951697554337483, "loss": 12.6886, "step": 4641 }, { "epoch": 0.25277545213834035, "grad_norm": 0.6812531539118747, "learning_rate": 0.0001951670475315356, "loss": 12.5179, "step": 4642 }, { "epoch": 0.2528299061349234, "grad_norm": 0.7270298855868224, "learning_rate": 0.00019516433888928795, "loss": 12.5398, "step": 4643 }, { "epoch": 0.2528843601315064, "grad_norm": 0.6768849151005556, "learning_rate": 0.00019516162950702649, "loss": 12.5489, "step": 4644 }, { "epoch": 0.25293881412808944, "grad_norm": 0.6724977998810044, "learning_rate": 0.00019515891938477222, "loss": 12.5863, "step": 4645 }, { "epoch": 0.25299326812467243, "grad_norm": 0.7309444180220243, "learning_rate": 0.00019515620852254625, "loss": 12.4596, "step": 4646 }, { "epoch": 0.2530477221212554, "grad_norm": 0.7977134686558187, "learning_rate": 0.00019515349692036962, "loss": 12.7878, "step": 4647 }, { "epoch": 0.2531021761178385, "grad_norm": 0.6862794292196578, "learning_rate": 0.00019515078457826344, "loss": 12.4598, "step": 4648 }, { "epoch": 0.25315663011442147, "grad_norm": 0.701878486537764, "learning_rate": 0.0001951480714962488, "loss": 12.5413, "step": 4649 }, { "epoch": 0.25321108411100446, "grad_norm": 0.8180141005964334, "learning_rate": 0.0001951453576743468, "loss": 12.396, "step": 4650 }, { "epoch": 0.2532655381075875, "grad_norm": 0.7346431155727067, "learning_rate": 0.00019514264311257858, "loss": 12.6374, "step": 4651 }, { "epoch": 0.2533199921041705, "grad_norm": 0.6830933567935363, "learning_rate": 0.00019513992781096517, "loss": 12.6301, "step": 4652 }, { "epoch": 0.2533744461007535, "grad_norm": 0.716153198686455, "learning_rate": 0.00019513721176952776, "loss": 12.568, "step": 4653 }, { "epoch": 0.25342890009733654, "grad_norm": 0.6666130419809555, "learning_rate": 0.00019513449498828738, "loss": 12.5816, "step": 4654 }, { "epoch": 0.25348335409391953, "grad_norm": 0.7618240849952472, "learning_rate": 0.00019513177746726526, "loss": 12.5489, "step": 4655 }, { "epoch": 0.2535378080905025, "grad_norm": 0.7861808479939798, "learning_rate": 0.0001951290592064825, "loss": 12.6229, "step": 4656 }, { "epoch": 0.2535922620870856, "grad_norm": 0.7792128962577906, "learning_rate": 0.00019512634020596022, "loss": 12.6316, "step": 4657 }, { "epoch": 0.25364671608366857, "grad_norm": 0.728082447715372, "learning_rate": 0.00019512362046571953, "loss": 12.6125, "step": 4658 }, { "epoch": 0.25370117008025156, "grad_norm": 0.6593637366358605, "learning_rate": 0.00019512089998578163, "loss": 12.4644, "step": 4659 }, { "epoch": 0.2537556240768346, "grad_norm": 0.6873917824355797, "learning_rate": 0.00019511817876616765, "loss": 12.6692, "step": 4660 }, { "epoch": 0.2538100780734176, "grad_norm": 0.6782354154408184, "learning_rate": 0.00019511545680689878, "loss": 12.506, "step": 4661 }, { "epoch": 0.2538645320700006, "grad_norm": 0.7415950041617407, "learning_rate": 0.00019511273410799615, "loss": 12.5421, "step": 4662 }, { "epoch": 0.25391898606658364, "grad_norm": 0.767687803641539, "learning_rate": 0.00019511001066948097, "loss": 12.6216, "step": 4663 }, { "epoch": 0.25397344006316663, "grad_norm": 0.856036373749487, "learning_rate": 0.00019510728649137438, "loss": 12.651, "step": 4664 }, { "epoch": 0.2540278940597496, "grad_norm": 0.7352629375371106, "learning_rate": 0.0001951045615736976, "loss": 12.5666, "step": 4665 }, { "epoch": 0.2540823480563327, "grad_norm": 0.7208212681736773, "learning_rate": 0.00019510183591647174, "loss": 12.6415, "step": 4666 }, { "epoch": 0.25413680205291567, "grad_norm": 0.7564664297026775, "learning_rate": 0.00019509910951971812, "loss": 12.7613, "step": 4667 }, { "epoch": 0.25419125604949866, "grad_norm": 0.6759363508762495, "learning_rate": 0.00019509638238345787, "loss": 12.5443, "step": 4668 }, { "epoch": 0.2542457100460817, "grad_norm": 0.7440815182574181, "learning_rate": 0.00019509365450771219, "loss": 12.6338, "step": 4669 }, { "epoch": 0.2543001640426647, "grad_norm": 0.7406671177032623, "learning_rate": 0.00019509092589250232, "loss": 12.5788, "step": 4670 }, { "epoch": 0.2543546180392477, "grad_norm": 0.7009872427479097, "learning_rate": 0.00019508819653784942, "loss": 12.6131, "step": 4671 }, { "epoch": 0.25440907203583074, "grad_norm": 0.7815802022245177, "learning_rate": 0.00019508546644377478, "loss": 12.7572, "step": 4672 }, { "epoch": 0.25446352603241373, "grad_norm": 0.6203163966102102, "learning_rate": 0.00019508273561029963, "loss": 12.6344, "step": 4673 }, { "epoch": 0.2545179800289967, "grad_norm": 0.6469569272437944, "learning_rate": 0.00019508000403744517, "loss": 12.7187, "step": 4674 }, { "epoch": 0.2545724340255798, "grad_norm": 0.709319102432162, "learning_rate": 0.00019507727172523264, "loss": 12.6195, "step": 4675 }, { "epoch": 0.25462688802216277, "grad_norm": 0.6847848179775061, "learning_rate": 0.0001950745386736833, "loss": 12.717, "step": 4676 }, { "epoch": 0.2546813420187458, "grad_norm": 0.6724445808052889, "learning_rate": 0.0001950718048828184, "loss": 12.4394, "step": 4677 }, { "epoch": 0.2547357960153288, "grad_norm": 0.7258117422289937, "learning_rate": 0.00019506907035265924, "loss": 12.6126, "step": 4678 }, { "epoch": 0.2547902500119118, "grad_norm": 0.7029932545816441, "learning_rate": 0.000195066335083227, "loss": 12.6328, "step": 4679 }, { "epoch": 0.25484470400849485, "grad_norm": 0.7363875572775394, "learning_rate": 0.00019506359907454302, "loss": 12.6315, "step": 4680 }, { "epoch": 0.25489915800507784, "grad_norm": 0.7120951164580571, "learning_rate": 0.00019506086232662858, "loss": 12.4803, "step": 4681 }, { "epoch": 0.25495361200166083, "grad_norm": 0.7888478831502377, "learning_rate": 0.00019505812483950488, "loss": 12.6586, "step": 4682 }, { "epoch": 0.2550080659982439, "grad_norm": 0.6638333450862576, "learning_rate": 0.00019505538661319328, "loss": 12.5237, "step": 4683 }, { "epoch": 0.2550625199948269, "grad_norm": 0.6695752920801711, "learning_rate": 0.00019505264764771505, "loss": 12.5669, "step": 4684 }, { "epoch": 0.25511697399140987, "grad_norm": 0.7552995697491214, "learning_rate": 0.00019504990794309151, "loss": 12.5755, "step": 4685 }, { "epoch": 0.2551714279879929, "grad_norm": 0.7076072556809054, "learning_rate": 0.00019504716749934394, "loss": 12.5666, "step": 4686 }, { "epoch": 0.2552258819845759, "grad_norm": 0.7688772490788264, "learning_rate": 0.00019504442631649362, "loss": 12.7533, "step": 4687 }, { "epoch": 0.2552803359811589, "grad_norm": 0.7287168089619401, "learning_rate": 0.00019504168439456193, "loss": 12.4461, "step": 4688 }, { "epoch": 0.25533478997774195, "grad_norm": 0.7621658441592705, "learning_rate": 0.00019503894173357017, "loss": 12.4661, "step": 4689 }, { "epoch": 0.25538924397432494, "grad_norm": 0.6661750509434194, "learning_rate": 0.00019503619833353966, "loss": 12.5776, "step": 4690 }, { "epoch": 0.25544369797090793, "grad_norm": 0.8341339017909717, "learning_rate": 0.00019503345419449172, "loss": 12.7292, "step": 4691 }, { "epoch": 0.255498151967491, "grad_norm": 0.7358515877426577, "learning_rate": 0.0001950307093164477, "loss": 12.6913, "step": 4692 }, { "epoch": 0.255552605964074, "grad_norm": 0.7633606014871572, "learning_rate": 0.00019502796369942895, "loss": 12.5919, "step": 4693 }, { "epoch": 0.25560705996065697, "grad_norm": 0.7378217364635543, "learning_rate": 0.00019502521734345685, "loss": 12.6138, "step": 4694 }, { "epoch": 0.25566151395724, "grad_norm": 0.7411934737877932, "learning_rate": 0.00019502247024855268, "loss": 12.5775, "step": 4695 }, { "epoch": 0.255715967953823, "grad_norm": 0.6504062988581084, "learning_rate": 0.00019501972241473786, "loss": 12.7351, "step": 4696 }, { "epoch": 0.255770421950406, "grad_norm": 0.7644212366301545, "learning_rate": 0.00019501697384203376, "loss": 12.5324, "step": 4697 }, { "epoch": 0.25582487594698905, "grad_norm": 0.6450263672882387, "learning_rate": 0.00019501422453046174, "loss": 12.4571, "step": 4698 }, { "epoch": 0.25587932994357204, "grad_norm": 0.7020161938921881, "learning_rate": 0.00019501147448004318, "loss": 12.5161, "step": 4699 }, { "epoch": 0.25593378394015504, "grad_norm": 0.6694991425623197, "learning_rate": 0.00019500872369079944, "loss": 12.5092, "step": 4700 }, { "epoch": 0.2559882379367381, "grad_norm": 0.6472421122968421, "learning_rate": 0.0001950059721627519, "loss": 12.4466, "step": 4701 }, { "epoch": 0.2560426919333211, "grad_norm": 0.6765932649098179, "learning_rate": 0.00019500321989592204, "loss": 12.5659, "step": 4702 }, { "epoch": 0.25609714592990407, "grad_norm": 0.7110563531735874, "learning_rate": 0.0001950004668903312, "loss": 12.5553, "step": 4703 }, { "epoch": 0.2561515999264871, "grad_norm": 0.729546587513424, "learning_rate": 0.0001949977131460008, "loss": 12.5421, "step": 4704 }, { "epoch": 0.2562060539230701, "grad_norm": 0.6992897637338722, "learning_rate": 0.00019499495866295225, "loss": 12.6526, "step": 4705 }, { "epoch": 0.2562605079196531, "grad_norm": 0.7222729187165062, "learning_rate": 0.00019499220344120697, "loss": 12.6912, "step": 4706 }, { "epoch": 0.25631496191623615, "grad_norm": 0.8148839595759461, "learning_rate": 0.00019498944748078638, "loss": 12.6897, "step": 4707 }, { "epoch": 0.25636941591281914, "grad_norm": 0.6505194283908418, "learning_rate": 0.0001949866907817119, "loss": 12.553, "step": 4708 }, { "epoch": 0.25642386990940214, "grad_norm": 0.753503763796297, "learning_rate": 0.000194983933344005, "loss": 12.5926, "step": 4709 }, { "epoch": 0.2564783239059852, "grad_norm": 0.7228660846866438, "learning_rate": 0.0001949811751676871, "loss": 12.5322, "step": 4710 }, { "epoch": 0.2565327779025682, "grad_norm": 0.749767652793384, "learning_rate": 0.00019497841625277967, "loss": 12.5987, "step": 4711 }, { "epoch": 0.2565872318991512, "grad_norm": 0.7264752568702961, "learning_rate": 0.00019497565659930413, "loss": 12.6687, "step": 4712 }, { "epoch": 0.2566416858957342, "grad_norm": 0.7555201243456778, "learning_rate": 0.00019497289620728196, "loss": 12.5931, "step": 4713 }, { "epoch": 0.2566961398923172, "grad_norm": 0.7307088174451337, "learning_rate": 0.00019497013507673464, "loss": 12.6921, "step": 4714 }, { "epoch": 0.25675059388890026, "grad_norm": 0.7658765194364088, "learning_rate": 0.00019496737320768358, "loss": 12.6566, "step": 4715 }, { "epoch": 0.25680504788548325, "grad_norm": 0.8126722060344513, "learning_rate": 0.00019496461060015036, "loss": 12.6151, "step": 4716 }, { "epoch": 0.25685950188206624, "grad_norm": 0.6542168082813249, "learning_rate": 0.00019496184725415635, "loss": 12.4329, "step": 4717 }, { "epoch": 0.2569139558786493, "grad_norm": 0.6634464108209316, "learning_rate": 0.00019495908316972314, "loss": 12.4756, "step": 4718 }, { "epoch": 0.2569684098752323, "grad_norm": 0.7463975669736462, "learning_rate": 0.00019495631834687212, "loss": 12.6905, "step": 4719 }, { "epoch": 0.2570228638718153, "grad_norm": 0.7573454303384313, "learning_rate": 0.00019495355278562488, "loss": 12.7064, "step": 4720 }, { "epoch": 0.2570773178683983, "grad_norm": 0.734300825979996, "learning_rate": 0.00019495078648600287, "loss": 12.6519, "step": 4721 }, { "epoch": 0.2571317718649813, "grad_norm": 0.6784505364892656, "learning_rate": 0.00019494801944802762, "loss": 12.4599, "step": 4722 }, { "epoch": 0.2571862258615643, "grad_norm": 0.8001786847577275, "learning_rate": 0.00019494525167172068, "loss": 12.7742, "step": 4723 }, { "epoch": 0.25724067985814736, "grad_norm": 0.6908462600955748, "learning_rate": 0.00019494248315710352, "loss": 12.6113, "step": 4724 }, { "epoch": 0.25729513385473035, "grad_norm": 0.6502127393656697, "learning_rate": 0.0001949397139041977, "loss": 12.2824, "step": 4725 }, { "epoch": 0.25734958785131334, "grad_norm": 0.7030919669677971, "learning_rate": 0.00019493694391302472, "loss": 12.6083, "step": 4726 }, { "epoch": 0.2574040418478964, "grad_norm": 0.6798754788609441, "learning_rate": 0.00019493417318360617, "loss": 12.4886, "step": 4727 }, { "epoch": 0.2574584958444794, "grad_norm": 0.7230420580117327, "learning_rate": 0.00019493140171596355, "loss": 12.6009, "step": 4728 }, { "epoch": 0.2575129498410624, "grad_norm": 0.720929907434017, "learning_rate": 0.00019492862951011843, "loss": 12.6411, "step": 4729 }, { "epoch": 0.2575674038376454, "grad_norm": 0.7651425086653374, "learning_rate": 0.00019492585656609237, "loss": 12.6533, "step": 4730 }, { "epoch": 0.2576218578342284, "grad_norm": 0.8158447242348982, "learning_rate": 0.00019492308288390694, "loss": 12.69, "step": 4731 }, { "epoch": 0.2576763118308114, "grad_norm": 0.7244070048646627, "learning_rate": 0.00019492030846358368, "loss": 12.6977, "step": 4732 }, { "epoch": 0.25773076582739446, "grad_norm": 0.7611098891379391, "learning_rate": 0.0001949175333051442, "loss": 12.7027, "step": 4733 }, { "epoch": 0.25778521982397745, "grad_norm": 0.7838925677761639, "learning_rate": 0.00019491475740861006, "loss": 12.607, "step": 4734 }, { "epoch": 0.25783967382056044, "grad_norm": 0.6918956552390313, "learning_rate": 0.00019491198077400284, "loss": 12.5875, "step": 4735 }, { "epoch": 0.2578941278171435, "grad_norm": 0.8093985749932328, "learning_rate": 0.00019490920340134416, "loss": 12.691, "step": 4736 }, { "epoch": 0.2579485818137265, "grad_norm": 0.7667091966816839, "learning_rate": 0.00019490642529065556, "loss": 12.6538, "step": 4737 }, { "epoch": 0.2580030358103095, "grad_norm": 0.8050816124530826, "learning_rate": 0.00019490364644195873, "loss": 12.6441, "step": 4738 }, { "epoch": 0.2580574898068925, "grad_norm": 0.7796984412669973, "learning_rate": 0.0001949008668552752, "loss": 12.4625, "step": 4739 }, { "epoch": 0.2581119438034755, "grad_norm": 0.7954269326095742, "learning_rate": 0.00019489808653062662, "loss": 12.6378, "step": 4740 }, { "epoch": 0.2581663978000585, "grad_norm": 0.8546297428341599, "learning_rate": 0.0001948953054680346, "loss": 12.6656, "step": 4741 }, { "epoch": 0.25822085179664156, "grad_norm": 0.6836882241215804, "learning_rate": 0.0001948925236675208, "loss": 12.6027, "step": 4742 }, { "epoch": 0.25827530579322455, "grad_norm": 0.775457407414149, "learning_rate": 0.00019488974112910677, "loss": 12.6133, "step": 4743 }, { "epoch": 0.2583297597898076, "grad_norm": 0.6811598006775803, "learning_rate": 0.00019488695785281425, "loss": 12.6013, "step": 4744 }, { "epoch": 0.2583842137863906, "grad_norm": 0.8090432005026313, "learning_rate": 0.00019488417383866483, "loss": 12.7681, "step": 4745 }, { "epoch": 0.2584386677829736, "grad_norm": 0.9336443651864992, "learning_rate": 0.00019488138908668013, "loss": 12.7671, "step": 4746 }, { "epoch": 0.25849312177955663, "grad_norm": 0.6696001900123343, "learning_rate": 0.00019487860359688184, "loss": 12.6525, "step": 4747 }, { "epoch": 0.2585475757761396, "grad_norm": 0.7765604683795253, "learning_rate": 0.00019487581736929164, "loss": 12.563, "step": 4748 }, { "epoch": 0.2586020297727226, "grad_norm": 0.716271800511286, "learning_rate": 0.00019487303040393114, "loss": 12.6551, "step": 4749 }, { "epoch": 0.25865648376930567, "grad_norm": 0.6616733783918025, "learning_rate": 0.00019487024270082207, "loss": 12.5925, "step": 4750 }, { "epoch": 0.25871093776588866, "grad_norm": 0.7618926859107245, "learning_rate": 0.0001948674542599861, "loss": 12.6893, "step": 4751 }, { "epoch": 0.25876539176247165, "grad_norm": 0.747910560094233, "learning_rate": 0.00019486466508144488, "loss": 12.6486, "step": 4752 }, { "epoch": 0.2588198457590547, "grad_norm": 0.8001813888712213, "learning_rate": 0.0001948618751652201, "loss": 12.5778, "step": 4753 }, { "epoch": 0.2588742997556377, "grad_norm": 0.7698063187849554, "learning_rate": 0.00019485908451133348, "loss": 12.577, "step": 4754 }, { "epoch": 0.2589287537522207, "grad_norm": 0.7181274213017971, "learning_rate": 0.0001948562931198067, "loss": 12.5075, "step": 4755 }, { "epoch": 0.25898320774880373, "grad_norm": 0.815996907375043, "learning_rate": 0.00019485350099066154, "loss": 12.7174, "step": 4756 }, { "epoch": 0.2590376617453867, "grad_norm": 0.8277762565865211, "learning_rate": 0.00019485070812391957, "loss": 12.6259, "step": 4757 }, { "epoch": 0.2590921157419697, "grad_norm": 0.8255336053710628, "learning_rate": 0.00019484791451960262, "loss": 12.5534, "step": 4758 }, { "epoch": 0.25914656973855277, "grad_norm": 0.8046986799888131, "learning_rate": 0.00019484512017773237, "loss": 12.5145, "step": 4759 }, { "epoch": 0.25920102373513576, "grad_norm": 0.7738111831193657, "learning_rate": 0.00019484232509833058, "loss": 12.663, "step": 4760 }, { "epoch": 0.25925547773171875, "grad_norm": 0.7868122530602073, "learning_rate": 0.00019483952928141894, "loss": 12.5728, "step": 4761 }, { "epoch": 0.2593099317283018, "grad_norm": 0.8252109638843789, "learning_rate": 0.00019483673272701927, "loss": 12.6221, "step": 4762 }, { "epoch": 0.2593643857248848, "grad_norm": 0.752918560811296, "learning_rate": 0.00019483393543515322, "loss": 12.5168, "step": 4763 }, { "epoch": 0.2594188397214678, "grad_norm": 0.6652628886031119, "learning_rate": 0.00019483113740584256, "loss": 12.5479, "step": 4764 }, { "epoch": 0.25947329371805083, "grad_norm": 0.8406383840699488, "learning_rate": 0.0001948283386391091, "loss": 12.5782, "step": 4765 }, { "epoch": 0.2595277477146338, "grad_norm": 0.6806302328159711, "learning_rate": 0.00019482553913497457, "loss": 12.5683, "step": 4766 }, { "epoch": 0.2595822017112168, "grad_norm": 0.6847797554348255, "learning_rate": 0.00019482273889346075, "loss": 12.4412, "step": 4767 }, { "epoch": 0.25963665570779987, "grad_norm": 0.7825591587553425, "learning_rate": 0.0001948199379145894, "loss": 12.5548, "step": 4768 }, { "epoch": 0.25969110970438286, "grad_norm": 0.6803581758239637, "learning_rate": 0.00019481713619838234, "loss": 12.6537, "step": 4769 }, { "epoch": 0.25974556370096585, "grad_norm": 0.8865961981147894, "learning_rate": 0.0001948143337448613, "loss": 12.716, "step": 4770 }, { "epoch": 0.2598000176975489, "grad_norm": 0.8012560988317154, "learning_rate": 0.0001948115305540481, "loss": 12.6787, "step": 4771 }, { "epoch": 0.2598544716941319, "grad_norm": 0.7402747722806818, "learning_rate": 0.00019480872662596457, "loss": 12.5813, "step": 4772 }, { "epoch": 0.2599089256907149, "grad_norm": 0.7862461301402442, "learning_rate": 0.00019480592196063245, "loss": 12.6447, "step": 4773 }, { "epoch": 0.25996337968729794, "grad_norm": 0.6896585549357185, "learning_rate": 0.0001948031165580736, "loss": 12.6612, "step": 4774 }, { "epoch": 0.26001783368388093, "grad_norm": 0.7165201707189586, "learning_rate": 0.0001948003104183098, "loss": 12.6166, "step": 4775 }, { "epoch": 0.2600722876804639, "grad_norm": 0.772230066475727, "learning_rate": 0.0001947975035413629, "loss": 12.5939, "step": 4776 }, { "epoch": 0.26012674167704697, "grad_norm": 0.6933004562202322, "learning_rate": 0.0001947946959272547, "loss": 12.6642, "step": 4777 }, { "epoch": 0.26018119567362996, "grad_norm": 0.7647195199286283, "learning_rate": 0.0001947918875760071, "loss": 12.5439, "step": 4778 }, { "epoch": 0.260235649670213, "grad_norm": 0.7889058775371521, "learning_rate": 0.00019478907848764182, "loss": 12.7009, "step": 4779 }, { "epoch": 0.260290103666796, "grad_norm": 0.7187506723561059, "learning_rate": 0.0001947862686621808, "loss": 12.5066, "step": 4780 }, { "epoch": 0.260344557663379, "grad_norm": 0.6356252354910998, "learning_rate": 0.00019478345809964583, "loss": 12.4976, "step": 4781 }, { "epoch": 0.26039901165996204, "grad_norm": 0.8288372345791144, "learning_rate": 0.00019478064680005885, "loss": 12.6177, "step": 4782 }, { "epoch": 0.26045346565654504, "grad_norm": 0.7627296418926722, "learning_rate": 0.00019477783476344162, "loss": 12.6029, "step": 4783 }, { "epoch": 0.26050791965312803, "grad_norm": 0.6565336081551778, "learning_rate": 0.0001947750219898161, "loss": 12.4271, "step": 4784 }, { "epoch": 0.2605623736497111, "grad_norm": 0.6840268317999146, "learning_rate": 0.00019477220847920405, "loss": 12.573, "step": 4785 }, { "epoch": 0.26061682764629407, "grad_norm": 0.728386555080175, "learning_rate": 0.00019476939423162745, "loss": 12.696, "step": 4786 }, { "epoch": 0.26067128164287706, "grad_norm": 0.7978933441928162, "learning_rate": 0.00019476657924710815, "loss": 12.5638, "step": 4787 }, { "epoch": 0.2607257356394601, "grad_norm": 0.8473115840042355, "learning_rate": 0.00019476376352566804, "loss": 12.4955, "step": 4788 }, { "epoch": 0.2607801896360431, "grad_norm": 0.7401753572573325, "learning_rate": 0.000194760947067329, "loss": 12.6471, "step": 4789 }, { "epoch": 0.2608346436326261, "grad_norm": 0.7178835390453441, "learning_rate": 0.00019475812987211294, "loss": 12.4338, "step": 4790 }, { "epoch": 0.26088909762920914, "grad_norm": 0.8262283811762116, "learning_rate": 0.00019475531194004176, "loss": 12.7366, "step": 4791 }, { "epoch": 0.26094355162579214, "grad_norm": 0.6845621603784252, "learning_rate": 0.00019475249327113742, "loss": 12.6441, "step": 4792 }, { "epoch": 0.26099800562237513, "grad_norm": 0.7739572980317966, "learning_rate": 0.0001947496738654218, "loss": 12.493, "step": 4793 }, { "epoch": 0.2610524596189582, "grad_norm": 0.6352423477991079, "learning_rate": 0.0001947468537229168, "loss": 12.5679, "step": 4794 }, { "epoch": 0.26110691361554117, "grad_norm": 0.6745795145399036, "learning_rate": 0.0001947440328436444, "loss": 12.5574, "step": 4795 }, { "epoch": 0.26116136761212416, "grad_norm": 0.6633490884387779, "learning_rate": 0.0001947412112276265, "loss": 12.5556, "step": 4796 }, { "epoch": 0.2612158216087072, "grad_norm": 0.6370335086578439, "learning_rate": 0.00019473838887488506, "loss": 12.4562, "step": 4797 }, { "epoch": 0.2612702756052902, "grad_norm": 0.6154624229446888, "learning_rate": 0.00019473556578544201, "loss": 12.4528, "step": 4798 }, { "epoch": 0.2613247296018732, "grad_norm": 0.7574987473958148, "learning_rate": 0.00019473274195931932, "loss": 12.5948, "step": 4799 }, { "epoch": 0.26137918359845624, "grad_norm": 0.660503906328179, "learning_rate": 0.00019472991739653893, "loss": 12.5449, "step": 4800 }, { "epoch": 0.26143363759503924, "grad_norm": 0.680312990012292, "learning_rate": 0.00019472709209712282, "loss": 12.599, "step": 4801 }, { "epoch": 0.26148809159162223, "grad_norm": 0.6525863345468783, "learning_rate": 0.00019472426606109299, "loss": 12.5575, "step": 4802 }, { "epoch": 0.2615425455882053, "grad_norm": 0.719565202973381, "learning_rate": 0.00019472143928847134, "loss": 12.6364, "step": 4803 }, { "epoch": 0.26159699958478827, "grad_norm": 0.6854658367976487, "learning_rate": 0.0001947186117792799, "loss": 12.6155, "step": 4804 }, { "epoch": 0.26165145358137126, "grad_norm": 0.6462389765626788, "learning_rate": 0.00019471578353354066, "loss": 12.4988, "step": 4805 }, { "epoch": 0.2617059075779543, "grad_norm": 0.7059880398408249, "learning_rate": 0.0001947129545512756, "loss": 12.6068, "step": 4806 }, { "epoch": 0.2617603615745373, "grad_norm": 0.6738481299117473, "learning_rate": 0.00019471012483250673, "loss": 12.6121, "step": 4807 }, { "epoch": 0.2618148155711203, "grad_norm": 0.7047780449464357, "learning_rate": 0.00019470729437725604, "loss": 12.5718, "step": 4808 }, { "epoch": 0.26186926956770334, "grad_norm": 0.6574646377296559, "learning_rate": 0.00019470446318554553, "loss": 12.5995, "step": 4809 }, { "epoch": 0.26192372356428634, "grad_norm": 0.9005206763366099, "learning_rate": 0.00019470163125739727, "loss": 12.6679, "step": 4810 }, { "epoch": 0.2619781775608694, "grad_norm": 0.7396838483648821, "learning_rate": 0.0001946987985928332, "loss": 12.5763, "step": 4811 }, { "epoch": 0.2620326315574524, "grad_norm": 0.7708475776756964, "learning_rate": 0.00019469596519187542, "loss": 12.6198, "step": 4812 }, { "epoch": 0.26208708555403537, "grad_norm": 0.7720119216448281, "learning_rate": 0.00019469313105454595, "loss": 12.6241, "step": 4813 }, { "epoch": 0.2621415395506184, "grad_norm": 0.7334360755883025, "learning_rate": 0.00019469029618086677, "loss": 12.6099, "step": 4814 }, { "epoch": 0.2621959935472014, "grad_norm": 0.7044540805944692, "learning_rate": 0.00019468746057086002, "loss": 12.5668, "step": 4815 }, { "epoch": 0.2622504475437844, "grad_norm": 0.7207583424255852, "learning_rate": 0.00019468462422454766, "loss": 12.6091, "step": 4816 }, { "epoch": 0.26230490154036745, "grad_norm": 0.7101695408830705, "learning_rate": 0.00019468178714195179, "loss": 12.6525, "step": 4817 }, { "epoch": 0.26235935553695044, "grad_norm": 0.6242627094997542, "learning_rate": 0.00019467894932309444, "loss": 12.5819, "step": 4818 }, { "epoch": 0.26241380953353344, "grad_norm": 0.757938269214848, "learning_rate": 0.00019467611076799774, "loss": 12.6262, "step": 4819 }, { "epoch": 0.2624682635301165, "grad_norm": 0.6433189196932046, "learning_rate": 0.00019467327147668371, "loss": 12.502, "step": 4820 }, { "epoch": 0.2625227175266995, "grad_norm": 0.6852794377013726, "learning_rate": 0.00019467043144917443, "loss": 12.6259, "step": 4821 }, { "epoch": 0.26257717152328247, "grad_norm": 0.7833380402320976, "learning_rate": 0.000194667590685492, "loss": 12.6435, "step": 4822 }, { "epoch": 0.2626316255198655, "grad_norm": 0.6945606666550747, "learning_rate": 0.00019466474918565854, "loss": 12.4367, "step": 4823 }, { "epoch": 0.2626860795164485, "grad_norm": 0.7123898769033836, "learning_rate": 0.00019466190694969612, "loss": 12.509, "step": 4824 }, { "epoch": 0.2627405335130315, "grad_norm": 0.7654131241421731, "learning_rate": 0.00019465906397762682, "loss": 12.495, "step": 4825 }, { "epoch": 0.26279498750961455, "grad_norm": 0.8301231959722061, "learning_rate": 0.00019465622026947275, "loss": 12.757, "step": 4826 }, { "epoch": 0.26284944150619755, "grad_norm": 0.6288940118080169, "learning_rate": 0.00019465337582525604, "loss": 12.4903, "step": 4827 }, { "epoch": 0.26290389550278054, "grad_norm": 0.7417776345417484, "learning_rate": 0.0001946505306449988, "loss": 12.5361, "step": 4828 }, { "epoch": 0.2629583494993636, "grad_norm": 0.7752400517739513, "learning_rate": 0.00019464768472872318, "loss": 12.6089, "step": 4829 }, { "epoch": 0.2630128034959466, "grad_norm": 0.5998676948980813, "learning_rate": 0.00019464483807645128, "loss": 12.4756, "step": 4830 }, { "epoch": 0.26306725749252957, "grad_norm": 0.7658214146870006, "learning_rate": 0.00019464199068820528, "loss": 12.6996, "step": 4831 }, { "epoch": 0.2631217114891126, "grad_norm": 0.72957856770116, "learning_rate": 0.00019463914256400723, "loss": 12.4247, "step": 4832 }, { "epoch": 0.2631761654856956, "grad_norm": 0.7943362289892251, "learning_rate": 0.0001946362937038794, "loss": 12.6922, "step": 4833 }, { "epoch": 0.2632306194822786, "grad_norm": 0.6523756773773719, "learning_rate": 0.00019463344410784383, "loss": 12.4808, "step": 4834 }, { "epoch": 0.26328507347886165, "grad_norm": 0.6861067893129501, "learning_rate": 0.00019463059377592274, "loss": 12.5381, "step": 4835 }, { "epoch": 0.26333952747544465, "grad_norm": 0.6409161291059233, "learning_rate": 0.0001946277427081383, "loss": 12.5751, "step": 4836 }, { "epoch": 0.26339398147202764, "grad_norm": 0.8461938435521417, "learning_rate": 0.00019462489090451266, "loss": 12.6144, "step": 4837 }, { "epoch": 0.2634484354686107, "grad_norm": 0.6873872509981911, "learning_rate": 0.000194622038365068, "loss": 12.6681, "step": 4838 }, { "epoch": 0.2635028894651937, "grad_norm": 0.809696924328214, "learning_rate": 0.00019461918508982646, "loss": 12.5898, "step": 4839 }, { "epoch": 0.26355734346177667, "grad_norm": 0.7459462348725406, "learning_rate": 0.00019461633107881033, "loss": 12.7425, "step": 4840 }, { "epoch": 0.2636117974583597, "grad_norm": 0.6404604693639886, "learning_rate": 0.0001946134763320417, "loss": 12.5621, "step": 4841 }, { "epoch": 0.2636662514549427, "grad_norm": 0.6782170515842123, "learning_rate": 0.00019461062084954285, "loss": 12.5363, "step": 4842 }, { "epoch": 0.2637207054515257, "grad_norm": 0.7028036018440581, "learning_rate": 0.0001946077646313359, "loss": 12.5666, "step": 4843 }, { "epoch": 0.26377515944810875, "grad_norm": 0.6826374646724563, "learning_rate": 0.00019460490767744313, "loss": 12.6038, "step": 4844 }, { "epoch": 0.26382961344469175, "grad_norm": 0.6777141901476166, "learning_rate": 0.00019460204998788673, "loss": 12.5829, "step": 4845 }, { "epoch": 0.2638840674412748, "grad_norm": 0.6738235373260646, "learning_rate": 0.00019459919156268894, "loss": 12.5814, "step": 4846 }, { "epoch": 0.2639385214378578, "grad_norm": 0.6708886000655676, "learning_rate": 0.00019459633240187193, "loss": 12.6222, "step": 4847 }, { "epoch": 0.2639929754344408, "grad_norm": 0.7305811832108289, "learning_rate": 0.00019459347250545803, "loss": 12.5633, "step": 4848 }, { "epoch": 0.26404742943102383, "grad_norm": 0.7260244255550706, "learning_rate": 0.00019459061187346942, "loss": 12.5623, "step": 4849 }, { "epoch": 0.2641018834276068, "grad_norm": 0.6852163751677094, "learning_rate": 0.0001945877505059283, "loss": 12.5025, "step": 4850 }, { "epoch": 0.2641563374241898, "grad_norm": 0.6700399729180287, "learning_rate": 0.000194584888402857, "loss": 12.5428, "step": 4851 }, { "epoch": 0.26421079142077286, "grad_norm": 0.7012076478375378, "learning_rate": 0.00019458202556427775, "loss": 12.6202, "step": 4852 }, { "epoch": 0.26426524541735585, "grad_norm": 0.767091545667911, "learning_rate": 0.0001945791619902128, "loss": 12.4397, "step": 4853 }, { "epoch": 0.26431969941393885, "grad_norm": 0.7176345157155276, "learning_rate": 0.00019457629768068443, "loss": 12.5838, "step": 4854 }, { "epoch": 0.2643741534105219, "grad_norm": 0.7077784155956962, "learning_rate": 0.0001945734326357149, "loss": 12.4677, "step": 4855 }, { "epoch": 0.2644286074071049, "grad_norm": 0.6837911365687319, "learning_rate": 0.00019457056685532652, "loss": 12.492, "step": 4856 }, { "epoch": 0.2644830614036879, "grad_norm": 0.7466557995988053, "learning_rate": 0.0001945677003395415, "loss": 12.5368, "step": 4857 }, { "epoch": 0.26453751540027093, "grad_norm": 0.7541821594245927, "learning_rate": 0.00019456483308838226, "loss": 12.7146, "step": 4858 }, { "epoch": 0.2645919693968539, "grad_norm": 0.7277307319226684, "learning_rate": 0.00019456196510187095, "loss": 12.548, "step": 4859 }, { "epoch": 0.2646464233934369, "grad_norm": 0.750180857479281, "learning_rate": 0.00019455909638002998, "loss": 12.6692, "step": 4860 }, { "epoch": 0.26470087739001996, "grad_norm": 0.6882113934021395, "learning_rate": 0.0001945562269228816, "loss": 12.5586, "step": 4861 }, { "epoch": 0.26475533138660295, "grad_norm": 0.6876200797232995, "learning_rate": 0.00019455335673044814, "loss": 12.6751, "step": 4862 }, { "epoch": 0.26480978538318595, "grad_norm": 0.7413062407404183, "learning_rate": 0.00019455048580275193, "loss": 12.56, "step": 4863 }, { "epoch": 0.264864239379769, "grad_norm": 0.7997680346304358, "learning_rate": 0.0001945476141398153, "loss": 12.5148, "step": 4864 }, { "epoch": 0.264918693376352, "grad_norm": 0.7037783550914863, "learning_rate": 0.00019454474174166055, "loss": 12.4402, "step": 4865 }, { "epoch": 0.264973147372935, "grad_norm": 0.719253456465287, "learning_rate": 0.00019454186860831004, "loss": 12.526, "step": 4866 }, { "epoch": 0.26502760136951803, "grad_norm": 0.7821364793198318, "learning_rate": 0.0001945389947397861, "loss": 12.6374, "step": 4867 }, { "epoch": 0.265082055366101, "grad_norm": 0.8169352730341746, "learning_rate": 0.0001945361201361111, "loss": 12.6756, "step": 4868 }, { "epoch": 0.265136509362684, "grad_norm": 0.6669004440259494, "learning_rate": 0.00019453324479730736, "loss": 12.512, "step": 4869 }, { "epoch": 0.26519096335926706, "grad_norm": 0.6948054201853392, "learning_rate": 0.00019453036872339727, "loss": 12.6744, "step": 4870 }, { "epoch": 0.26524541735585005, "grad_norm": 0.6595901733830262, "learning_rate": 0.00019452749191440315, "loss": 12.6445, "step": 4871 }, { "epoch": 0.26529987135243305, "grad_norm": 0.7250972141249324, "learning_rate": 0.00019452461437034744, "loss": 12.6358, "step": 4872 }, { "epoch": 0.2653543253490161, "grad_norm": 0.8354176409927749, "learning_rate": 0.00019452173609125245, "loss": 12.6428, "step": 4873 }, { "epoch": 0.2654087793455991, "grad_norm": 0.6850492700866151, "learning_rate": 0.0001945188570771406, "loss": 12.565, "step": 4874 }, { "epoch": 0.2654632333421821, "grad_norm": 0.7474772489873514, "learning_rate": 0.00019451597732803426, "loss": 12.4716, "step": 4875 }, { "epoch": 0.26551768733876513, "grad_norm": 0.7239459348388512, "learning_rate": 0.00019451309684395581, "loss": 12.5, "step": 4876 }, { "epoch": 0.2655721413353481, "grad_norm": 0.7013468148695667, "learning_rate": 0.0001945102156249277, "loss": 12.6859, "step": 4877 }, { "epoch": 0.26562659533193117, "grad_norm": 0.6306546397409388, "learning_rate": 0.00019450733367097232, "loss": 12.258, "step": 4878 }, { "epoch": 0.26568104932851416, "grad_norm": 0.7732678904459518, "learning_rate": 0.00019450445098211203, "loss": 12.5905, "step": 4879 }, { "epoch": 0.26573550332509716, "grad_norm": 0.7102571767926045, "learning_rate": 0.00019450156755836928, "loss": 12.5944, "step": 4880 }, { "epoch": 0.2657899573216802, "grad_norm": 0.7428771349602378, "learning_rate": 0.0001944986833997665, "loss": 12.7083, "step": 4881 }, { "epoch": 0.2658444113182632, "grad_norm": 0.6644542846070517, "learning_rate": 0.0001944957985063261, "loss": 12.5964, "step": 4882 }, { "epoch": 0.2658988653148462, "grad_norm": 0.6076981577547662, "learning_rate": 0.00019449291287807055, "loss": 12.5323, "step": 4883 }, { "epoch": 0.26595331931142924, "grad_norm": 0.6774321021161596, "learning_rate": 0.00019449002651502224, "loss": 12.628, "step": 4884 }, { "epoch": 0.26600777330801223, "grad_norm": 0.6469750254177549, "learning_rate": 0.00019448713941720364, "loss": 12.5591, "step": 4885 }, { "epoch": 0.2660622273045952, "grad_norm": 0.6478552446275883, "learning_rate": 0.00019448425158463724, "loss": 12.5149, "step": 4886 }, { "epoch": 0.26611668130117827, "grad_norm": 0.6353492916660458, "learning_rate": 0.0001944813630173454, "loss": 12.6437, "step": 4887 }, { "epoch": 0.26617113529776126, "grad_norm": 0.7929604305168886, "learning_rate": 0.00019447847371535066, "loss": 12.6283, "step": 4888 }, { "epoch": 0.26622558929434426, "grad_norm": 0.6898416589120709, "learning_rate": 0.00019447558367867543, "loss": 12.5089, "step": 4889 }, { "epoch": 0.2662800432909273, "grad_norm": 0.7295361652237429, "learning_rate": 0.0001944726929073423, "loss": 12.5732, "step": 4890 }, { "epoch": 0.2663344972875103, "grad_norm": 0.7027949921759142, "learning_rate": 0.00019446980140137358, "loss": 12.6327, "step": 4891 }, { "epoch": 0.2663889512840933, "grad_norm": 0.7035390676555576, "learning_rate": 0.0001944669091607919, "loss": 12.6155, "step": 4892 }, { "epoch": 0.26644340528067634, "grad_norm": 0.6927777072601664, "learning_rate": 0.00019446401618561967, "loss": 12.6609, "step": 4893 }, { "epoch": 0.26649785927725933, "grad_norm": 0.7113014488080182, "learning_rate": 0.0001944611224758794, "loss": 12.6829, "step": 4894 }, { "epoch": 0.2665523132738423, "grad_norm": 0.6937576303435324, "learning_rate": 0.00019445822803159358, "loss": 12.4869, "step": 4895 }, { "epoch": 0.26660676727042537, "grad_norm": 0.7508896257482275, "learning_rate": 0.00019445533285278478, "loss": 12.6736, "step": 4896 }, { "epoch": 0.26666122126700836, "grad_norm": 0.6664214127305361, "learning_rate": 0.00019445243693947547, "loss": 12.7157, "step": 4897 }, { "epoch": 0.26671567526359136, "grad_norm": 0.7160016567620926, "learning_rate": 0.00019444954029168815, "loss": 12.5127, "step": 4898 }, { "epoch": 0.2667701292601744, "grad_norm": 0.7095136875260772, "learning_rate": 0.00019444664290944538, "loss": 12.6951, "step": 4899 }, { "epoch": 0.2668245832567574, "grad_norm": 0.7889749179258148, "learning_rate": 0.00019444374479276968, "loss": 12.5083, "step": 4900 }, { "epoch": 0.2668790372533404, "grad_norm": 0.6209670074747341, "learning_rate": 0.00019444084594168358, "loss": 12.5154, "step": 4901 }, { "epoch": 0.26693349124992344, "grad_norm": 0.7192247657918985, "learning_rate": 0.0001944379463562096, "loss": 12.6027, "step": 4902 }, { "epoch": 0.26698794524650643, "grad_norm": 0.877614733642246, "learning_rate": 0.00019443504603637032, "loss": 12.4859, "step": 4903 }, { "epoch": 0.2670423992430894, "grad_norm": 0.622920397255274, "learning_rate": 0.0001944321449821883, "loss": 12.5212, "step": 4904 }, { "epoch": 0.26709685323967247, "grad_norm": 0.8231902555703173, "learning_rate": 0.0001944292431936861, "loss": 12.5708, "step": 4905 }, { "epoch": 0.26715130723625546, "grad_norm": 0.7014509423459101, "learning_rate": 0.00019442634067088623, "loss": 12.6247, "step": 4906 }, { "epoch": 0.26720576123283846, "grad_norm": 0.7338425811141995, "learning_rate": 0.00019442343741381133, "loss": 12.567, "step": 4907 }, { "epoch": 0.2672602152294215, "grad_norm": 0.7128851004410109, "learning_rate": 0.00019442053342248392, "loss": 12.5589, "step": 4908 }, { "epoch": 0.2673146692260045, "grad_norm": 0.7640141068258829, "learning_rate": 0.00019441762869692664, "loss": 12.5248, "step": 4909 }, { "epoch": 0.2673691232225875, "grad_norm": 0.7195831536886108, "learning_rate": 0.000194414723237162, "loss": 12.6038, "step": 4910 }, { "epoch": 0.26742357721917054, "grad_norm": 0.702981087273901, "learning_rate": 0.00019441181704321267, "loss": 12.6929, "step": 4911 }, { "epoch": 0.26747803121575353, "grad_norm": 0.6649595768416755, "learning_rate": 0.00019440891011510123, "loss": 12.6238, "step": 4912 }, { "epoch": 0.2675324852123366, "grad_norm": 0.7465051804702764, "learning_rate": 0.00019440600245285023, "loss": 12.5796, "step": 4913 }, { "epoch": 0.26758693920891957, "grad_norm": 0.6771595729186441, "learning_rate": 0.00019440309405648236, "loss": 12.6006, "step": 4914 }, { "epoch": 0.26764139320550256, "grad_norm": 0.7984906107431816, "learning_rate": 0.0001944001849260202, "loss": 12.5348, "step": 4915 }, { "epoch": 0.2676958472020856, "grad_norm": 0.6987809489400444, "learning_rate": 0.00019439727506148635, "loss": 12.6028, "step": 4916 }, { "epoch": 0.2677503011986686, "grad_norm": 0.7515636815166795, "learning_rate": 0.00019439436446290346, "loss": 12.5774, "step": 4917 }, { "epoch": 0.2678047551952516, "grad_norm": 0.7772563152009838, "learning_rate": 0.00019439145313029417, "loss": 12.6186, "step": 4918 }, { "epoch": 0.26785920919183465, "grad_norm": 0.7773179505401135, "learning_rate": 0.00019438854106368112, "loss": 12.6273, "step": 4919 }, { "epoch": 0.26791366318841764, "grad_norm": 0.8029707125727654, "learning_rate": 0.00019438562826308692, "loss": 12.7813, "step": 4920 }, { "epoch": 0.26796811718500063, "grad_norm": 0.6739494023869455, "learning_rate": 0.00019438271472853427, "loss": 12.5858, "step": 4921 }, { "epoch": 0.2680225711815837, "grad_norm": 0.7172542758143176, "learning_rate": 0.0001943798004600458, "loss": 12.7093, "step": 4922 }, { "epoch": 0.2680770251781667, "grad_norm": 0.8540214909937995, "learning_rate": 0.00019437688545764417, "loss": 12.6435, "step": 4923 }, { "epoch": 0.26813147917474967, "grad_norm": 0.6922381771184539, "learning_rate": 0.00019437396972135206, "loss": 12.5729, "step": 4924 }, { "epoch": 0.2681859331713327, "grad_norm": 0.7388596470211966, "learning_rate": 0.00019437105325119212, "loss": 12.6941, "step": 4925 }, { "epoch": 0.2682403871679157, "grad_norm": 0.6799801483566477, "learning_rate": 0.00019436813604718705, "loss": 12.58, "step": 4926 }, { "epoch": 0.2682948411644987, "grad_norm": 0.677525774257368, "learning_rate": 0.00019436521810935954, "loss": 12.3613, "step": 4927 }, { "epoch": 0.26834929516108175, "grad_norm": 0.7070330522678878, "learning_rate": 0.00019436229943773224, "loss": 12.5587, "step": 4928 }, { "epoch": 0.26840374915766474, "grad_norm": 0.7431247845470497, "learning_rate": 0.0001943593800323279, "loss": 12.7184, "step": 4929 }, { "epoch": 0.26845820315424773, "grad_norm": 0.7448892262812907, "learning_rate": 0.00019435645989316917, "loss": 12.521, "step": 4930 }, { "epoch": 0.2685126571508308, "grad_norm": 0.7563793589303949, "learning_rate": 0.00019435353902027882, "loss": 12.8593, "step": 4931 }, { "epoch": 0.2685671111474138, "grad_norm": 0.6965002870763543, "learning_rate": 0.00019435061741367952, "loss": 12.5444, "step": 4932 }, { "epoch": 0.26862156514399677, "grad_norm": 0.7845469164455278, "learning_rate": 0.00019434769507339396, "loss": 12.6502, "step": 4933 }, { "epoch": 0.2686760191405798, "grad_norm": 0.6084132495946784, "learning_rate": 0.00019434477199944494, "loss": 12.6393, "step": 4934 }, { "epoch": 0.2687304731371628, "grad_norm": 0.7270494062285637, "learning_rate": 0.00019434184819185516, "loss": 12.5558, "step": 4935 }, { "epoch": 0.2687849271337458, "grad_norm": 0.7449005630000488, "learning_rate": 0.0001943389236506473, "loss": 12.6898, "step": 4936 }, { "epoch": 0.26883938113032885, "grad_norm": 0.7542098282712374, "learning_rate": 0.0001943359983758442, "loss": 12.5602, "step": 4937 }, { "epoch": 0.26889383512691184, "grad_norm": 0.6825357213113328, "learning_rate": 0.00019433307236746853, "loss": 12.6414, "step": 4938 }, { "epoch": 0.26894828912349483, "grad_norm": 0.7596587455890367, "learning_rate": 0.00019433014562554306, "loss": 12.4791, "step": 4939 }, { "epoch": 0.2690027431200779, "grad_norm": 0.7335019063446175, "learning_rate": 0.00019432721815009057, "loss": 12.5679, "step": 4940 }, { "epoch": 0.2690571971166609, "grad_norm": 0.8002044902081036, "learning_rate": 0.0001943242899411338, "loss": 12.6291, "step": 4941 }, { "epoch": 0.26911165111324387, "grad_norm": 0.7181184194287559, "learning_rate": 0.00019432136099869555, "loss": 12.5769, "step": 4942 }, { "epoch": 0.2691661051098269, "grad_norm": 0.693469130365641, "learning_rate": 0.0001943184313227986, "loss": 12.6736, "step": 4943 }, { "epoch": 0.2692205591064099, "grad_norm": 0.7883304347264259, "learning_rate": 0.00019431550091346565, "loss": 12.4797, "step": 4944 }, { "epoch": 0.26927501310299296, "grad_norm": 0.7675592218688019, "learning_rate": 0.0001943125697707196, "loss": 12.599, "step": 4945 }, { "epoch": 0.26932946709957595, "grad_norm": 0.721373700751928, "learning_rate": 0.0001943096378945832, "loss": 12.436, "step": 4946 }, { "epoch": 0.26938392109615894, "grad_norm": 0.7432999919007355, "learning_rate": 0.0001943067052850792, "loss": 12.6031, "step": 4947 }, { "epoch": 0.269438375092742, "grad_norm": 0.700930234427065, "learning_rate": 0.00019430377194223043, "loss": 12.6158, "step": 4948 }, { "epoch": 0.269492829089325, "grad_norm": 0.8856859606553761, "learning_rate": 0.00019430083786605977, "loss": 12.6485, "step": 4949 }, { "epoch": 0.269547283085908, "grad_norm": 0.7651445866542139, "learning_rate": 0.00019429790305658994, "loss": 12.5601, "step": 4950 }, { "epoch": 0.269601737082491, "grad_norm": 0.8712471278900041, "learning_rate": 0.00019429496751384383, "loss": 12.5648, "step": 4951 }, { "epoch": 0.269656191079074, "grad_norm": 0.836714647644876, "learning_rate": 0.00019429203123784422, "loss": 12.5715, "step": 4952 }, { "epoch": 0.269710645075657, "grad_norm": 0.7545099563149713, "learning_rate": 0.00019428909422861398, "loss": 12.4512, "step": 4953 }, { "epoch": 0.26976509907224006, "grad_norm": 0.7759249414188034, "learning_rate": 0.0001942861564861759, "loss": 12.576, "step": 4954 }, { "epoch": 0.26981955306882305, "grad_norm": 0.8070255412765706, "learning_rate": 0.0001942832180105529, "loss": 12.4277, "step": 4955 }, { "epoch": 0.26987400706540604, "grad_norm": 0.7411828713292871, "learning_rate": 0.00019428027880176777, "loss": 12.5935, "step": 4956 }, { "epoch": 0.2699284610619891, "grad_norm": 0.8762418560538697, "learning_rate": 0.00019427733885984337, "loss": 12.7198, "step": 4957 }, { "epoch": 0.2699829150585721, "grad_norm": 0.822480922374177, "learning_rate": 0.00019427439818480257, "loss": 12.5418, "step": 4958 }, { "epoch": 0.2700373690551551, "grad_norm": 0.7559488805950916, "learning_rate": 0.00019427145677666823, "loss": 12.5301, "step": 4959 }, { "epoch": 0.2700918230517381, "grad_norm": 0.7075578365647424, "learning_rate": 0.00019426851463546325, "loss": 12.5837, "step": 4960 }, { "epoch": 0.2701462770483211, "grad_norm": 0.7078321812508837, "learning_rate": 0.0001942655717612105, "loss": 12.6233, "step": 4961 }, { "epoch": 0.2702007310449041, "grad_norm": 0.7725000755982798, "learning_rate": 0.00019426262815393284, "loss": 12.4912, "step": 4962 }, { "epoch": 0.27025518504148716, "grad_norm": 0.7809703620738555, "learning_rate": 0.00019425968381365317, "loss": 12.7238, "step": 4963 }, { "epoch": 0.27030963903807015, "grad_norm": 0.7543002957619203, "learning_rate": 0.0001942567387403944, "loss": 12.5083, "step": 4964 }, { "epoch": 0.27036409303465314, "grad_norm": 0.7969983235647131, "learning_rate": 0.00019425379293417944, "loss": 12.6048, "step": 4965 }, { "epoch": 0.2704185470312362, "grad_norm": 0.7196112073402323, "learning_rate": 0.00019425084639503116, "loss": 12.5924, "step": 4966 }, { "epoch": 0.2704730010278192, "grad_norm": 0.7257632197119582, "learning_rate": 0.00019424789912297249, "loss": 12.6349, "step": 4967 }, { "epoch": 0.2705274550244022, "grad_norm": 0.9378435895655606, "learning_rate": 0.00019424495111802637, "loss": 12.6213, "step": 4968 }, { "epoch": 0.2705819090209852, "grad_norm": 0.7084946448579053, "learning_rate": 0.00019424200238021567, "loss": 12.5467, "step": 4969 }, { "epoch": 0.2706363630175682, "grad_norm": 0.7483908594182052, "learning_rate": 0.0001942390529095634, "loss": 12.6569, "step": 4970 }, { "epoch": 0.2706908170141512, "grad_norm": 0.7118408982414774, "learning_rate": 0.00019423610270609244, "loss": 12.5747, "step": 4971 }, { "epoch": 0.27074527101073426, "grad_norm": 0.68660993044931, "learning_rate": 0.0001942331517698257, "loss": 12.4705, "step": 4972 }, { "epoch": 0.27079972500731725, "grad_norm": 0.7802996073573238, "learning_rate": 0.00019423020010078622, "loss": 12.5696, "step": 4973 }, { "epoch": 0.27085417900390024, "grad_norm": 0.674993499837841, "learning_rate": 0.00019422724769899686, "loss": 12.6346, "step": 4974 }, { "epoch": 0.2709086330004833, "grad_norm": 0.9356688643207316, "learning_rate": 0.00019422429456448064, "loss": 12.8044, "step": 4975 }, { "epoch": 0.2709630869970663, "grad_norm": 0.7621397899824885, "learning_rate": 0.00019422134069726053, "loss": 12.571, "step": 4976 }, { "epoch": 0.2710175409936493, "grad_norm": 0.6867695978399014, "learning_rate": 0.00019421838609735942, "loss": 12.5917, "step": 4977 }, { "epoch": 0.2710719949902323, "grad_norm": 0.8231682604535635, "learning_rate": 0.0001942154307648004, "loss": 12.5752, "step": 4978 }, { "epoch": 0.2711264489868153, "grad_norm": 0.8111827291656801, "learning_rate": 0.00019421247469960634, "loss": 12.6336, "step": 4979 }, { "epoch": 0.27118090298339836, "grad_norm": 1.042410249315156, "learning_rate": 0.00019420951790180029, "loss": 12.6439, "step": 4980 }, { "epoch": 0.27123535697998136, "grad_norm": 0.7076263354008396, "learning_rate": 0.00019420656037140525, "loss": 12.6046, "step": 4981 }, { "epoch": 0.27128981097656435, "grad_norm": 0.6515365376827323, "learning_rate": 0.00019420360210844418, "loss": 12.5624, "step": 4982 }, { "epoch": 0.2713442649731474, "grad_norm": 0.7022423293026825, "learning_rate": 0.0001942006431129401, "loss": 12.5117, "step": 4983 }, { "epoch": 0.2713987189697304, "grad_norm": 0.7954526938737386, "learning_rate": 0.00019419768338491605, "loss": 12.7401, "step": 4984 }, { "epoch": 0.2714531729663134, "grad_norm": 0.8688496563455215, "learning_rate": 0.00019419472292439498, "loss": 12.7052, "step": 4985 }, { "epoch": 0.27150762696289643, "grad_norm": 0.6874496497060338, "learning_rate": 0.00019419176173139996, "loss": 12.653, "step": 4986 }, { "epoch": 0.2715620809594794, "grad_norm": 0.7350358087124989, "learning_rate": 0.000194188799805954, "loss": 12.7017, "step": 4987 }, { "epoch": 0.2716165349560624, "grad_norm": 0.7917394937965483, "learning_rate": 0.00019418583714808017, "loss": 12.5159, "step": 4988 }, { "epoch": 0.27167098895264546, "grad_norm": 0.7488765240242293, "learning_rate": 0.00019418287375780146, "loss": 12.6222, "step": 4989 }, { "epoch": 0.27172544294922846, "grad_norm": 0.7146616561206384, "learning_rate": 0.00019417990963514086, "loss": 12.4543, "step": 4990 }, { "epoch": 0.27177989694581145, "grad_norm": 0.7287116095576452, "learning_rate": 0.00019417694478012157, "loss": 12.5467, "step": 4991 }, { "epoch": 0.2718343509423945, "grad_norm": 0.8550421741239044, "learning_rate": 0.00019417397919276654, "loss": 12.6141, "step": 4992 }, { "epoch": 0.2718888049389775, "grad_norm": 0.8027759839067112, "learning_rate": 0.00019417101287309886, "loss": 12.6347, "step": 4993 }, { "epoch": 0.2719432589355605, "grad_norm": 0.7161657765497795, "learning_rate": 0.00019416804582114157, "loss": 12.4361, "step": 4994 }, { "epoch": 0.27199771293214353, "grad_norm": 0.7008462549080949, "learning_rate": 0.0001941650780369178, "loss": 12.6173, "step": 4995 }, { "epoch": 0.2720521669287265, "grad_norm": 0.7668462501840719, "learning_rate": 0.00019416210952045057, "loss": 12.5077, "step": 4996 }, { "epoch": 0.2721066209253095, "grad_norm": 0.7308418987478974, "learning_rate": 0.000194159140271763, "loss": 12.5051, "step": 4997 }, { "epoch": 0.27216107492189257, "grad_norm": 0.7483771822126182, "learning_rate": 0.00019415617029087815, "loss": 12.7825, "step": 4998 }, { "epoch": 0.27221552891847556, "grad_norm": 0.6782389168304814, "learning_rate": 0.00019415319957781914, "loss": 12.5248, "step": 4999 }, { "epoch": 0.27226998291505855, "grad_norm": 0.7489393248917106, "learning_rate": 0.00019415022813260903, "loss": 12.7125, "step": 5000 }, { "epoch": 0.2723244369116416, "grad_norm": 0.679800953165827, "learning_rate": 0.000194147255955271, "loss": 12.6751, "step": 5001 }, { "epoch": 0.2723788909082246, "grad_norm": 0.7888451496769374, "learning_rate": 0.0001941442830458281, "loss": 12.662, "step": 5002 }, { "epoch": 0.2724333449048076, "grad_norm": 0.7325560605775505, "learning_rate": 0.00019414130940430347, "loss": 12.7182, "step": 5003 }, { "epoch": 0.27248779890139063, "grad_norm": 0.7237907170438983, "learning_rate": 0.0001941383350307202, "loss": 12.4911, "step": 5004 }, { "epoch": 0.2725422528979736, "grad_norm": 0.6903596227181271, "learning_rate": 0.0001941353599251015, "loss": 12.4553, "step": 5005 }, { "epoch": 0.2725967068945566, "grad_norm": 0.8111728486824789, "learning_rate": 0.00019413238408747042, "loss": 12.6992, "step": 5006 }, { "epoch": 0.27265116089113967, "grad_norm": 0.8038359449773015, "learning_rate": 0.00019412940751785016, "loss": 12.5698, "step": 5007 }, { "epoch": 0.27270561488772266, "grad_norm": 0.6879193280914043, "learning_rate": 0.00019412643021626385, "loss": 12.5842, "step": 5008 }, { "epoch": 0.27276006888430565, "grad_norm": 0.7823374563346943, "learning_rate": 0.0001941234521827346, "loss": 12.5844, "step": 5009 }, { "epoch": 0.2728145228808887, "grad_norm": 1.109044580363149, "learning_rate": 0.00019412047341728562, "loss": 12.6108, "step": 5010 }, { "epoch": 0.2728689768774717, "grad_norm": 0.6525462085960635, "learning_rate": 0.00019411749391994002, "loss": 12.5394, "step": 5011 }, { "epoch": 0.27292343087405474, "grad_norm": 0.8202131839297926, "learning_rate": 0.00019411451369072104, "loss": 12.652, "step": 5012 }, { "epoch": 0.27297788487063773, "grad_norm": 0.6646217005732907, "learning_rate": 0.00019411153272965183, "loss": 12.3951, "step": 5013 }, { "epoch": 0.2730323388672207, "grad_norm": 0.7073205373515769, "learning_rate": 0.00019410855103675552, "loss": 12.4288, "step": 5014 }, { "epoch": 0.2730867928638038, "grad_norm": 0.7478502526049412, "learning_rate": 0.0001941055686120554, "loss": 12.6234, "step": 5015 }, { "epoch": 0.27314124686038677, "grad_norm": 0.7901002256999474, "learning_rate": 0.00019410258545557452, "loss": 12.6419, "step": 5016 }, { "epoch": 0.27319570085696976, "grad_norm": 0.7292447218005194, "learning_rate": 0.0001940996015673362, "loss": 12.6508, "step": 5017 }, { "epoch": 0.2732501548535528, "grad_norm": 0.7317662343794659, "learning_rate": 0.00019409661694736355, "loss": 12.6472, "step": 5018 }, { "epoch": 0.2733046088501358, "grad_norm": 0.7269954070004496, "learning_rate": 0.0001940936315956799, "loss": 12.4842, "step": 5019 }, { "epoch": 0.2733590628467188, "grad_norm": 0.68273618885286, "learning_rate": 0.00019409064551230833, "loss": 12.5733, "step": 5020 }, { "epoch": 0.27341351684330184, "grad_norm": 0.7839706162049932, "learning_rate": 0.00019408765869727214, "loss": 12.5424, "step": 5021 }, { "epoch": 0.27346797083988483, "grad_norm": 0.7754399766077261, "learning_rate": 0.00019408467115059454, "loss": 12.6342, "step": 5022 }, { "epoch": 0.2735224248364678, "grad_norm": 0.6691754583961804, "learning_rate": 0.00019408168287229875, "loss": 12.4901, "step": 5023 }, { "epoch": 0.2735768788330509, "grad_norm": 0.7275096808427216, "learning_rate": 0.00019407869386240805, "loss": 12.6807, "step": 5024 }, { "epoch": 0.27363133282963387, "grad_norm": 0.7222259300134751, "learning_rate": 0.00019407570412094562, "loss": 12.687, "step": 5025 }, { "epoch": 0.27368578682621686, "grad_norm": 0.6536571577211265, "learning_rate": 0.00019407271364793474, "loss": 12.5979, "step": 5026 }, { "epoch": 0.2737402408227999, "grad_norm": 0.6961205317617686, "learning_rate": 0.00019406972244339867, "loss": 12.5989, "step": 5027 }, { "epoch": 0.2737946948193829, "grad_norm": 0.7038426126899354, "learning_rate": 0.00019406673050736067, "loss": 12.6524, "step": 5028 }, { "epoch": 0.2738491488159659, "grad_norm": 0.7279571986835491, "learning_rate": 0.000194063737839844, "loss": 12.6209, "step": 5029 }, { "epoch": 0.27390360281254894, "grad_norm": 0.7821768847303526, "learning_rate": 0.0001940607444408719, "loss": 12.657, "step": 5030 }, { "epoch": 0.27395805680913193, "grad_norm": 0.7535716318076771, "learning_rate": 0.0001940577503104677, "loss": 12.5637, "step": 5031 }, { "epoch": 0.2740125108057149, "grad_norm": 0.740051362894615, "learning_rate": 0.00019405475544865465, "loss": 12.616, "step": 5032 }, { "epoch": 0.274066964802298, "grad_norm": 0.7922028822553647, "learning_rate": 0.00019405175985545605, "loss": 12.5247, "step": 5033 }, { "epoch": 0.27412141879888097, "grad_norm": 0.6497358398767344, "learning_rate": 0.00019404876353089522, "loss": 12.4952, "step": 5034 }, { "epoch": 0.27417587279546396, "grad_norm": 0.7550312787281879, "learning_rate": 0.0001940457664749954, "loss": 12.5047, "step": 5035 }, { "epoch": 0.274230326792047, "grad_norm": 0.6699670017875384, "learning_rate": 0.00019404276868777994, "loss": 12.6481, "step": 5036 }, { "epoch": 0.27428478078863, "grad_norm": 0.7225857141499656, "learning_rate": 0.00019403977016927212, "loss": 12.3542, "step": 5037 }, { "epoch": 0.274339234785213, "grad_norm": 0.6977057679519985, "learning_rate": 0.0001940367709194953, "loss": 12.5138, "step": 5038 }, { "epoch": 0.27439368878179604, "grad_norm": 0.704198464905272, "learning_rate": 0.0001940337709384728, "loss": 12.5917, "step": 5039 }, { "epoch": 0.27444814277837903, "grad_norm": 0.6711982258147914, "learning_rate": 0.0001940307702262279, "loss": 12.6132, "step": 5040 }, { "epoch": 0.274502596774962, "grad_norm": 0.8079769295912083, "learning_rate": 0.00019402776878278395, "loss": 12.6885, "step": 5041 }, { "epoch": 0.2745570507715451, "grad_norm": 0.6559387708196829, "learning_rate": 0.00019402476660816432, "loss": 12.5405, "step": 5042 }, { "epoch": 0.27461150476812807, "grad_norm": 0.7379381995247378, "learning_rate": 0.00019402176370239232, "loss": 12.6785, "step": 5043 }, { "epoch": 0.27466595876471106, "grad_norm": 0.690471720110709, "learning_rate": 0.00019401876006549132, "loss": 12.5732, "step": 5044 }, { "epoch": 0.2747204127612941, "grad_norm": 0.7080593199826031, "learning_rate": 0.0001940157556974847, "loss": 12.6293, "step": 5045 }, { "epoch": 0.2747748667578771, "grad_norm": 0.6739896979204867, "learning_rate": 0.0001940127505983958, "loss": 12.5172, "step": 5046 }, { "epoch": 0.27482932075446015, "grad_norm": 0.651768469028915, "learning_rate": 0.00019400974476824795, "loss": 12.5162, "step": 5047 }, { "epoch": 0.27488377475104314, "grad_norm": 0.838781952055105, "learning_rate": 0.00019400673820706458, "loss": 12.6232, "step": 5048 }, { "epoch": 0.27493822874762613, "grad_norm": 0.6672933137610362, "learning_rate": 0.00019400373091486904, "loss": 12.6947, "step": 5049 }, { "epoch": 0.2749926827442092, "grad_norm": 0.7835512965878694, "learning_rate": 0.00019400072289168474, "loss": 12.5005, "step": 5050 }, { "epoch": 0.2750471367407922, "grad_norm": 0.74299265506198, "learning_rate": 0.00019399771413753506, "loss": 12.5162, "step": 5051 }, { "epoch": 0.27510159073737517, "grad_norm": 0.7492972464867815, "learning_rate": 0.00019399470465244337, "loss": 12.6574, "step": 5052 }, { "epoch": 0.2751560447339582, "grad_norm": 0.7271180956005475, "learning_rate": 0.0001939916944364331, "loss": 12.47, "step": 5053 }, { "epoch": 0.2752104987305412, "grad_norm": 0.671577089718614, "learning_rate": 0.00019398868348952764, "loss": 12.6404, "step": 5054 }, { "epoch": 0.2752649527271242, "grad_norm": 0.7593124681164001, "learning_rate": 0.00019398567181175042, "loss": 12.6242, "step": 5055 }, { "epoch": 0.27531940672370725, "grad_norm": 0.7147968774089865, "learning_rate": 0.0001939826594031249, "loss": 12.4892, "step": 5056 }, { "epoch": 0.27537386072029024, "grad_norm": 0.6362963231401051, "learning_rate": 0.0001939796462636744, "loss": 12.5185, "step": 5057 }, { "epoch": 0.27542831471687323, "grad_norm": 0.710749446087054, "learning_rate": 0.0001939766323934224, "loss": 12.5753, "step": 5058 }, { "epoch": 0.2754827687134563, "grad_norm": 0.6111662941975942, "learning_rate": 0.0001939736177923924, "loss": 12.5201, "step": 5059 }, { "epoch": 0.2755372227100393, "grad_norm": 0.6880616611302837, "learning_rate": 0.00019397060246060776, "loss": 12.6284, "step": 5060 }, { "epoch": 0.27559167670662227, "grad_norm": 0.6281572869817057, "learning_rate": 0.00019396758639809197, "loss": 12.5722, "step": 5061 }, { "epoch": 0.2756461307032053, "grad_norm": 0.790136974222293, "learning_rate": 0.00019396456960486846, "loss": 12.6321, "step": 5062 }, { "epoch": 0.2757005846997883, "grad_norm": 0.6693878597166492, "learning_rate": 0.0001939615520809607, "loss": 12.5091, "step": 5063 }, { "epoch": 0.2757550386963713, "grad_norm": 0.6646705864772172, "learning_rate": 0.00019395853382639215, "loss": 12.6344, "step": 5064 }, { "epoch": 0.27580949269295435, "grad_norm": 0.7191256074928883, "learning_rate": 0.00019395551484118628, "loss": 12.6256, "step": 5065 }, { "epoch": 0.27586394668953734, "grad_norm": 0.8147113211400999, "learning_rate": 0.0001939524951253666, "loss": 12.6511, "step": 5066 }, { "epoch": 0.27591840068612034, "grad_norm": 0.6417233825294415, "learning_rate": 0.00019394947467895652, "loss": 12.543, "step": 5067 }, { "epoch": 0.2759728546827034, "grad_norm": 0.6366238255470038, "learning_rate": 0.0001939464535019796, "loss": 12.5125, "step": 5068 }, { "epoch": 0.2760273086792864, "grad_norm": 0.6932555039744597, "learning_rate": 0.0001939434315944593, "loss": 12.6213, "step": 5069 }, { "epoch": 0.27608176267586937, "grad_norm": 0.7148171868609757, "learning_rate": 0.0001939404089564191, "loss": 12.5546, "step": 5070 }, { "epoch": 0.2761362166724524, "grad_norm": 0.7662802540895398, "learning_rate": 0.00019393738558788254, "loss": 12.4323, "step": 5071 }, { "epoch": 0.2761906706690354, "grad_norm": 0.7005674022938893, "learning_rate": 0.00019393436148887314, "loss": 12.7013, "step": 5072 }, { "epoch": 0.2762451246656184, "grad_norm": 0.7041144517869332, "learning_rate": 0.00019393133665941437, "loss": 12.6209, "step": 5073 }, { "epoch": 0.27629957866220145, "grad_norm": 0.6777116434436815, "learning_rate": 0.00019392831109952977, "loss": 12.4813, "step": 5074 }, { "epoch": 0.27635403265878444, "grad_norm": 0.6845631530681943, "learning_rate": 0.00019392528480924285, "loss": 12.5907, "step": 5075 }, { "epoch": 0.27640848665536744, "grad_norm": 0.7114657166282016, "learning_rate": 0.00019392225778857723, "loss": 12.5673, "step": 5076 }, { "epoch": 0.2764629406519505, "grad_norm": 0.8107231975439327, "learning_rate": 0.00019391923003755633, "loss": 12.5245, "step": 5077 }, { "epoch": 0.2765173946485335, "grad_norm": 0.8040081885700733, "learning_rate": 0.00019391620155620375, "loss": 12.5707, "step": 5078 }, { "epoch": 0.2765718486451165, "grad_norm": 0.6572804074345328, "learning_rate": 0.00019391317234454305, "loss": 12.6469, "step": 5079 }, { "epoch": 0.2766263026416995, "grad_norm": 0.8236022163967431, "learning_rate": 0.00019391014240259776, "loss": 12.5572, "step": 5080 }, { "epoch": 0.2766807566382825, "grad_norm": 0.658914373309915, "learning_rate": 0.00019390711173039146, "loss": 12.5925, "step": 5081 }, { "epoch": 0.27673521063486556, "grad_norm": 0.7943194386641196, "learning_rate": 0.00019390408032794772, "loss": 12.6722, "step": 5082 }, { "epoch": 0.27678966463144855, "grad_norm": 0.6973239731440578, "learning_rate": 0.00019390104819529008, "loss": 12.552, "step": 5083 }, { "epoch": 0.27684411862803154, "grad_norm": 0.7418809699302191, "learning_rate": 0.00019389801533244218, "loss": 12.5722, "step": 5084 }, { "epoch": 0.2768985726246146, "grad_norm": 0.7422872676063248, "learning_rate": 0.00019389498173942756, "loss": 12.5761, "step": 5085 }, { "epoch": 0.2769530266211976, "grad_norm": 0.703580772909161, "learning_rate": 0.0001938919474162698, "loss": 12.5502, "step": 5086 }, { "epoch": 0.2770074806177806, "grad_norm": 0.7000256855033609, "learning_rate": 0.00019388891236299253, "loss": 12.6968, "step": 5087 }, { "epoch": 0.2770619346143636, "grad_norm": 0.7135620202619926, "learning_rate": 0.0001938858765796193, "loss": 12.6916, "step": 5088 }, { "epoch": 0.2771163886109466, "grad_norm": 0.7652487105619586, "learning_rate": 0.00019388284006617375, "loss": 12.3941, "step": 5089 }, { "epoch": 0.2771708426075296, "grad_norm": 0.690744559913229, "learning_rate": 0.0001938798028226795, "loss": 12.5351, "step": 5090 }, { "epoch": 0.27722529660411266, "grad_norm": 0.8156095234373029, "learning_rate": 0.0001938767648491602, "loss": 12.4954, "step": 5091 }, { "epoch": 0.27727975060069565, "grad_norm": 0.7090953026934278, "learning_rate": 0.00019387372614563936, "loss": 12.5321, "step": 5092 }, { "epoch": 0.27733420459727864, "grad_norm": 0.6409777844639443, "learning_rate": 0.00019387068671214072, "loss": 12.4822, "step": 5093 }, { "epoch": 0.2773886585938617, "grad_norm": 0.7587500601030793, "learning_rate": 0.0001938676465486879, "loss": 12.6798, "step": 5094 }, { "epoch": 0.2774431125904447, "grad_norm": 0.649528915312248, "learning_rate": 0.0001938646056553045, "loss": 12.502, "step": 5095 }, { "epoch": 0.2774975665870277, "grad_norm": 0.747554472997202, "learning_rate": 0.00019386156403201416, "loss": 12.5573, "step": 5096 }, { "epoch": 0.2775520205836107, "grad_norm": 0.8592539276831329, "learning_rate": 0.00019385852167884057, "loss": 12.476, "step": 5097 }, { "epoch": 0.2776064745801937, "grad_norm": 0.7200503490316053, "learning_rate": 0.00019385547859580743, "loss": 12.5044, "step": 5098 }, { "epoch": 0.2776609285767767, "grad_norm": 0.6728688453119079, "learning_rate": 0.00019385243478293828, "loss": 12.6178, "step": 5099 }, { "epoch": 0.27771538257335976, "grad_norm": 0.6660773015222236, "learning_rate": 0.0001938493902402569, "loss": 12.6218, "step": 5100 }, { "epoch": 0.27776983656994275, "grad_norm": 0.6954282062789127, "learning_rate": 0.00019384634496778688, "loss": 12.4654, "step": 5101 }, { "epoch": 0.27782429056652574, "grad_norm": 0.7156888717451666, "learning_rate": 0.00019384329896555196, "loss": 12.6465, "step": 5102 }, { "epoch": 0.2778787445631088, "grad_norm": 0.7179172725399301, "learning_rate": 0.00019384025223357582, "loss": 12.5168, "step": 5103 }, { "epoch": 0.2779331985596918, "grad_norm": 0.7759017642906217, "learning_rate": 0.00019383720477188216, "loss": 12.602, "step": 5104 }, { "epoch": 0.2779876525562748, "grad_norm": 0.6510398797162207, "learning_rate": 0.00019383415658049465, "loss": 12.4956, "step": 5105 }, { "epoch": 0.2780421065528578, "grad_norm": 0.6642562478621538, "learning_rate": 0.00019383110765943697, "loss": 12.4895, "step": 5106 }, { "epoch": 0.2780965605494408, "grad_norm": 0.6856961004079563, "learning_rate": 0.00019382805800873288, "loss": 12.519, "step": 5107 }, { "epoch": 0.2781510145460238, "grad_norm": 0.6423899847602803, "learning_rate": 0.0001938250076284061, "loss": 12.5387, "step": 5108 }, { "epoch": 0.27820546854260686, "grad_norm": 0.636575246825305, "learning_rate": 0.00019382195651848028, "loss": 12.7195, "step": 5109 }, { "epoch": 0.27825992253918985, "grad_norm": 0.6970889423434784, "learning_rate": 0.00019381890467897922, "loss": 12.6636, "step": 5110 }, { "epoch": 0.27831437653577284, "grad_norm": 0.626582862445139, "learning_rate": 0.00019381585210992663, "loss": 12.5385, "step": 5111 }, { "epoch": 0.2783688305323559, "grad_norm": 0.7246283791178956, "learning_rate": 0.00019381279881134625, "loss": 12.573, "step": 5112 }, { "epoch": 0.2784232845289389, "grad_norm": 0.6601903163083505, "learning_rate": 0.00019380974478326178, "loss": 12.6021, "step": 5113 }, { "epoch": 0.27847773852552193, "grad_norm": 0.762912600904577, "learning_rate": 0.000193806690025697, "loss": 12.6755, "step": 5114 }, { "epoch": 0.2785321925221049, "grad_norm": 0.676212979343806, "learning_rate": 0.0001938036345386757, "loss": 12.5951, "step": 5115 }, { "epoch": 0.2785866465186879, "grad_norm": 0.7940947673405626, "learning_rate": 0.0001938005783222216, "loss": 12.5618, "step": 5116 }, { "epoch": 0.27864110051527097, "grad_norm": 0.6346251485123232, "learning_rate": 0.00019379752137635848, "loss": 12.4973, "step": 5117 }, { "epoch": 0.27869555451185396, "grad_norm": 0.8342476054070347, "learning_rate": 0.00019379446370111007, "loss": 12.6665, "step": 5118 }, { "epoch": 0.27875000850843695, "grad_norm": 0.715632773565721, "learning_rate": 0.0001937914052965002, "loss": 12.641, "step": 5119 }, { "epoch": 0.27880446250502, "grad_norm": 0.6382536033591416, "learning_rate": 0.00019378834616255264, "loss": 12.6341, "step": 5120 }, { "epoch": 0.278858916501603, "grad_norm": 0.6833566071913983, "learning_rate": 0.0001937852862992912, "loss": 12.6692, "step": 5121 }, { "epoch": 0.278913370498186, "grad_norm": 0.7187158872693135, "learning_rate": 0.00019378222570673955, "loss": 12.5714, "step": 5122 }, { "epoch": 0.27896782449476903, "grad_norm": 0.7008947274359243, "learning_rate": 0.00019377916438492168, "loss": 12.577, "step": 5123 }, { "epoch": 0.279022278491352, "grad_norm": 0.803851898243202, "learning_rate": 0.00019377610233386124, "loss": 12.7355, "step": 5124 }, { "epoch": 0.279076732487935, "grad_norm": 0.7559303634246921, "learning_rate": 0.00019377303955358217, "loss": 12.6045, "step": 5125 }, { "epoch": 0.27913118648451807, "grad_norm": 0.8192745121555173, "learning_rate": 0.00019376997604410816, "loss": 12.7426, "step": 5126 }, { "epoch": 0.27918564048110106, "grad_norm": 0.7247911238027126, "learning_rate": 0.0001937669118054631, "loss": 12.598, "step": 5127 }, { "epoch": 0.27924009447768405, "grad_norm": 0.7163926957202394, "learning_rate": 0.00019376384683767085, "loss": 12.5991, "step": 5128 }, { "epoch": 0.2792945484742671, "grad_norm": 0.633941715979679, "learning_rate": 0.00019376078114075515, "loss": 12.509, "step": 5129 }, { "epoch": 0.2793490024708501, "grad_norm": 0.8875319613920101, "learning_rate": 0.00019375771471473994, "loss": 12.5074, "step": 5130 }, { "epoch": 0.2794034564674331, "grad_norm": 0.6848286176502122, "learning_rate": 0.00019375464755964897, "loss": 12.605, "step": 5131 }, { "epoch": 0.27945791046401613, "grad_norm": 0.7319492347428458, "learning_rate": 0.00019375157967550617, "loss": 12.6155, "step": 5132 }, { "epoch": 0.2795123644605991, "grad_norm": 0.9091714525701203, "learning_rate": 0.00019374851106233534, "loss": 12.7187, "step": 5133 }, { "epoch": 0.2795668184571821, "grad_norm": 0.7162150438667014, "learning_rate": 0.0001937454417201604, "loss": 12.5939, "step": 5134 }, { "epoch": 0.27962127245376517, "grad_norm": 0.9315398296781449, "learning_rate": 0.00019374237164900514, "loss": 12.4741, "step": 5135 }, { "epoch": 0.27967572645034816, "grad_norm": 0.7612621418120459, "learning_rate": 0.00019373930084889352, "loss": 12.5645, "step": 5136 }, { "epoch": 0.27973018044693115, "grad_norm": 0.8713698804427293, "learning_rate": 0.00019373622931984934, "loss": 12.4994, "step": 5137 }, { "epoch": 0.2797846344435142, "grad_norm": 0.8327126204853291, "learning_rate": 0.00019373315706189655, "loss": 12.3497, "step": 5138 }, { "epoch": 0.2798390884400972, "grad_norm": 0.7275140927615509, "learning_rate": 0.000193730084075059, "loss": 12.6424, "step": 5139 }, { "epoch": 0.2798935424366802, "grad_norm": 0.7794188688974611, "learning_rate": 0.0001937270103593606, "loss": 12.5054, "step": 5140 }, { "epoch": 0.27994799643326324, "grad_norm": 0.6425933749368322, "learning_rate": 0.00019372393591482524, "loss": 12.5495, "step": 5141 }, { "epoch": 0.28000245042984623, "grad_norm": 0.7633781746174405, "learning_rate": 0.00019372086074147685, "loss": 12.634, "step": 5142 }, { "epoch": 0.2800569044264292, "grad_norm": 0.7500822577947447, "learning_rate": 0.00019371778483933934, "loss": 12.7041, "step": 5143 }, { "epoch": 0.28011135842301227, "grad_norm": 0.6833744271936957, "learning_rate": 0.0001937147082084366, "loss": 12.5521, "step": 5144 }, { "epoch": 0.28016581241959526, "grad_norm": 0.7690187012529756, "learning_rate": 0.00019371163084879256, "loss": 12.4258, "step": 5145 }, { "epoch": 0.2802202664161783, "grad_norm": 0.7467336494036554, "learning_rate": 0.00019370855276043121, "loss": 12.5721, "step": 5146 }, { "epoch": 0.2802747204127613, "grad_norm": 0.7625607151593078, "learning_rate": 0.0001937054739433764, "loss": 12.5985, "step": 5147 }, { "epoch": 0.2803291744093443, "grad_norm": 0.8703165636039294, "learning_rate": 0.0001937023943976521, "loss": 12.638, "step": 5148 }, { "epoch": 0.28038362840592734, "grad_norm": 0.725423606749526, "learning_rate": 0.0001936993141232823, "loss": 12.5103, "step": 5149 }, { "epoch": 0.28043808240251034, "grad_norm": 0.763692107957062, "learning_rate": 0.0001936962331202909, "loss": 12.5039, "step": 5150 }, { "epoch": 0.28049253639909333, "grad_norm": 0.6316565367613988, "learning_rate": 0.00019369315138870188, "loss": 12.4585, "step": 5151 }, { "epoch": 0.2805469903956764, "grad_norm": 0.7350835482152517, "learning_rate": 0.0001936900689285392, "loss": 12.6043, "step": 5152 }, { "epoch": 0.28060144439225937, "grad_norm": 0.6690143152131562, "learning_rate": 0.00019368698573982686, "loss": 12.5098, "step": 5153 }, { "epoch": 0.28065589838884236, "grad_norm": 0.7372146097111318, "learning_rate": 0.00019368390182258877, "loss": 12.5539, "step": 5154 }, { "epoch": 0.2807103523854254, "grad_norm": 0.6211436551518016, "learning_rate": 0.00019368081717684898, "loss": 12.5419, "step": 5155 }, { "epoch": 0.2807648063820084, "grad_norm": 0.7063545203040411, "learning_rate": 0.00019367773180263141, "loss": 12.4829, "step": 5156 }, { "epoch": 0.2808192603785914, "grad_norm": 0.7194962328640749, "learning_rate": 0.00019367464569996012, "loss": 12.6408, "step": 5157 }, { "epoch": 0.28087371437517444, "grad_norm": 0.6871823163250769, "learning_rate": 0.00019367155886885906, "loss": 12.6144, "step": 5158 }, { "epoch": 0.28092816837175744, "grad_norm": 0.6661938851213246, "learning_rate": 0.00019366847130935225, "loss": 12.5857, "step": 5159 }, { "epoch": 0.28098262236834043, "grad_norm": 0.6744656602031699, "learning_rate": 0.0001936653830214637, "loss": 12.5844, "step": 5160 }, { "epoch": 0.2810370763649235, "grad_norm": 0.6865644296890181, "learning_rate": 0.00019366229400521743, "loss": 12.4689, "step": 5161 }, { "epoch": 0.28109153036150647, "grad_norm": 0.7895194676119835, "learning_rate": 0.00019365920426063745, "loss": 12.7955, "step": 5162 }, { "epoch": 0.28114598435808946, "grad_norm": 0.6310374554477493, "learning_rate": 0.00019365611378774778, "loss": 12.6577, "step": 5163 }, { "epoch": 0.2812004383546725, "grad_norm": 1.0253218473524535, "learning_rate": 0.0001936530225865725, "loss": 12.6858, "step": 5164 }, { "epoch": 0.2812548923512555, "grad_norm": 0.6754084523502711, "learning_rate": 0.0001936499306571356, "loss": 12.6863, "step": 5165 }, { "epoch": 0.2813093463478385, "grad_norm": 0.6427002592447747, "learning_rate": 0.00019364683799946112, "loss": 12.5656, "step": 5166 }, { "epoch": 0.28136380034442154, "grad_norm": 0.674802174929845, "learning_rate": 0.0001936437446135731, "loss": 12.4581, "step": 5167 }, { "epoch": 0.28141825434100454, "grad_norm": 0.7627046858697148, "learning_rate": 0.00019364065049949566, "loss": 12.5511, "step": 5168 }, { "epoch": 0.28147270833758753, "grad_norm": 0.6432456260881265, "learning_rate": 0.00019363755565725277, "loss": 12.4937, "step": 5169 }, { "epoch": 0.2815271623341706, "grad_norm": 0.6403274159229349, "learning_rate": 0.0001936344600868686, "loss": 12.6198, "step": 5170 }, { "epoch": 0.28158161633075357, "grad_norm": 0.6892405055573441, "learning_rate": 0.00019363136378836712, "loss": 12.5993, "step": 5171 }, { "epoch": 0.28163607032733656, "grad_norm": 0.6442568030583643, "learning_rate": 0.00019362826676177247, "loss": 12.5813, "step": 5172 }, { "epoch": 0.2816905243239196, "grad_norm": 0.6189612613344044, "learning_rate": 0.00019362516900710874, "loss": 12.56, "step": 5173 }, { "epoch": 0.2817449783205026, "grad_norm": 0.6617043807798307, "learning_rate": 0.00019362207052439997, "loss": 12.5102, "step": 5174 }, { "epoch": 0.2817994323170856, "grad_norm": 0.6492188492603169, "learning_rate": 0.00019361897131367028, "loss": 12.5758, "step": 5175 }, { "epoch": 0.28185388631366864, "grad_norm": 0.7473061625828011, "learning_rate": 0.00019361587137494378, "loss": 12.6179, "step": 5176 }, { "epoch": 0.28190834031025164, "grad_norm": 0.6547901694630822, "learning_rate": 0.00019361277070824455, "loss": 12.539, "step": 5177 }, { "epoch": 0.28196279430683463, "grad_norm": 0.7669279291636536, "learning_rate": 0.00019360966931359673, "loss": 12.5469, "step": 5178 }, { "epoch": 0.2820172483034177, "grad_norm": 0.7186642421638226, "learning_rate": 0.0001936065671910244, "loss": 12.5702, "step": 5179 }, { "epoch": 0.28207170230000067, "grad_norm": 0.6731075258080949, "learning_rate": 0.00019360346434055172, "loss": 12.51, "step": 5180 }, { "epoch": 0.2821261562965837, "grad_norm": 0.6667099900813783, "learning_rate": 0.00019360036076220282, "loss": 12.5498, "step": 5181 }, { "epoch": 0.2821806102931667, "grad_norm": 0.7785210314930021, "learning_rate": 0.0001935972564560018, "loss": 12.6939, "step": 5182 }, { "epoch": 0.2822350642897497, "grad_norm": 0.7067372272734197, "learning_rate": 0.0001935941514219728, "loss": 12.5571, "step": 5183 }, { "epoch": 0.28228951828633275, "grad_norm": 0.678421616002225, "learning_rate": 0.00019359104566014, "loss": 12.599, "step": 5184 }, { "epoch": 0.28234397228291574, "grad_norm": 0.6551604392091743, "learning_rate": 0.00019358793917052756, "loss": 12.5489, "step": 5185 }, { "epoch": 0.28239842627949874, "grad_norm": 0.7103555805268724, "learning_rate": 0.00019358483195315958, "loss": 12.5438, "step": 5186 }, { "epoch": 0.2824528802760818, "grad_norm": 0.7069284288372881, "learning_rate": 0.00019358172400806028, "loss": 12.5381, "step": 5187 }, { "epoch": 0.2825073342726648, "grad_norm": 0.6722826182924235, "learning_rate": 0.00019357861533525377, "loss": 12.586, "step": 5188 }, { "epoch": 0.28256178826924777, "grad_norm": 0.699610418795585, "learning_rate": 0.00019357550593476425, "loss": 12.5382, "step": 5189 }, { "epoch": 0.2826162422658308, "grad_norm": 0.8746550655544723, "learning_rate": 0.00019357239580661593, "loss": 12.4991, "step": 5190 }, { "epoch": 0.2826706962624138, "grad_norm": 0.668798828037185, "learning_rate": 0.00019356928495083297, "loss": 12.6543, "step": 5191 }, { "epoch": 0.2827251502589968, "grad_norm": 0.7044621987621039, "learning_rate": 0.00019356617336743951, "loss": 12.6718, "step": 5192 }, { "epoch": 0.28277960425557985, "grad_norm": 0.6422951635514824, "learning_rate": 0.00019356306105645983, "loss": 12.5407, "step": 5193 }, { "epoch": 0.28283405825216285, "grad_norm": 0.684927068062873, "learning_rate": 0.00019355994801791812, "loss": 12.532, "step": 5194 }, { "epoch": 0.28288851224874584, "grad_norm": 0.723283260192857, "learning_rate": 0.00019355683425183854, "loss": 12.5527, "step": 5195 }, { "epoch": 0.2829429662453289, "grad_norm": 0.7360719862470695, "learning_rate": 0.0001935537197582453, "loss": 12.6376, "step": 5196 }, { "epoch": 0.2829974202419119, "grad_norm": 0.6883098067111958, "learning_rate": 0.0001935506045371627, "loss": 12.5532, "step": 5197 }, { "epoch": 0.28305187423849487, "grad_norm": 0.6584212997077407, "learning_rate": 0.00019354748858861487, "loss": 12.5902, "step": 5198 }, { "epoch": 0.2831063282350779, "grad_norm": 0.6992412627582681, "learning_rate": 0.0001935443719126261, "loss": 12.5555, "step": 5199 }, { "epoch": 0.2831607822316609, "grad_norm": 0.7044431102255994, "learning_rate": 0.0001935412545092206, "loss": 12.5461, "step": 5200 }, { "epoch": 0.2832152362282439, "grad_norm": 0.7073752555792435, "learning_rate": 0.00019353813637842265, "loss": 12.4574, "step": 5201 }, { "epoch": 0.28326969022482695, "grad_norm": 0.7015761112063352, "learning_rate": 0.00019353501752025643, "loss": 12.6205, "step": 5202 }, { "epoch": 0.28332414422140995, "grad_norm": 0.6979274453509807, "learning_rate": 0.00019353189793474619, "loss": 12.7032, "step": 5203 }, { "epoch": 0.28337859821799294, "grad_norm": 0.6957153333269988, "learning_rate": 0.0001935287776219163, "loss": 12.5931, "step": 5204 }, { "epoch": 0.283433052214576, "grad_norm": 0.6897842774068781, "learning_rate": 0.00019352565658179092, "loss": 12.5964, "step": 5205 }, { "epoch": 0.283487506211159, "grad_norm": 0.6774486360522814, "learning_rate": 0.00019352253481439436, "loss": 12.6081, "step": 5206 }, { "epoch": 0.28354196020774197, "grad_norm": 0.6775675886557151, "learning_rate": 0.00019351941231975087, "loss": 12.5461, "step": 5207 }, { "epoch": 0.283596414204325, "grad_norm": 0.6820558537395075, "learning_rate": 0.00019351628909788473, "loss": 12.4608, "step": 5208 }, { "epoch": 0.283650868200908, "grad_norm": 0.6534199168216551, "learning_rate": 0.00019351316514882027, "loss": 12.5736, "step": 5209 }, { "epoch": 0.283705322197491, "grad_norm": 0.6448033586536517, "learning_rate": 0.00019351004047258176, "loss": 12.6143, "step": 5210 }, { "epoch": 0.28375977619407405, "grad_norm": 0.6961255459081882, "learning_rate": 0.0001935069150691935, "loss": 12.5625, "step": 5211 }, { "epoch": 0.28381423019065705, "grad_norm": 0.6788903485850866, "learning_rate": 0.00019350378893867975, "loss": 12.6184, "step": 5212 }, { "epoch": 0.2838686841872401, "grad_norm": 0.6912612299861243, "learning_rate": 0.00019350066208106487, "loss": 12.608, "step": 5213 }, { "epoch": 0.2839231381838231, "grad_norm": 0.6830298112202225, "learning_rate": 0.00019349753449637318, "loss": 12.7429, "step": 5214 }, { "epoch": 0.2839775921804061, "grad_norm": 0.6651906648917761, "learning_rate": 0.00019349440618462898, "loss": 12.5816, "step": 5215 }, { "epoch": 0.28403204617698913, "grad_norm": 0.7197247296245165, "learning_rate": 0.00019349127714585657, "loss": 12.4892, "step": 5216 }, { "epoch": 0.2840865001735721, "grad_norm": 0.6601689123815974, "learning_rate": 0.00019348814738008035, "loss": 12.5705, "step": 5217 }, { "epoch": 0.2841409541701551, "grad_norm": 0.8551875521767103, "learning_rate": 0.00019348501688732462, "loss": 12.6959, "step": 5218 }, { "epoch": 0.28419540816673816, "grad_norm": 0.6824147784459194, "learning_rate": 0.00019348188566761367, "loss": 12.6101, "step": 5219 }, { "epoch": 0.28424986216332115, "grad_norm": 0.6444811599986673, "learning_rate": 0.00019347875372097194, "loss": 12.5973, "step": 5220 }, { "epoch": 0.28430431615990415, "grad_norm": 0.7913232678020938, "learning_rate": 0.00019347562104742375, "loss": 12.5736, "step": 5221 }, { "epoch": 0.2843587701564872, "grad_norm": 0.6667682774936895, "learning_rate": 0.0001934724876469934, "loss": 12.5546, "step": 5222 }, { "epoch": 0.2844132241530702, "grad_norm": 0.6818675106959271, "learning_rate": 0.00019346935351970536, "loss": 12.5956, "step": 5223 }, { "epoch": 0.2844676781496532, "grad_norm": 0.741997237016835, "learning_rate": 0.00019346621866558395, "loss": 12.6094, "step": 5224 }, { "epoch": 0.28452213214623623, "grad_norm": 0.7304820963044663, "learning_rate": 0.00019346308308465355, "loss": 12.5719, "step": 5225 }, { "epoch": 0.2845765861428192, "grad_norm": 0.7698383608402133, "learning_rate": 0.00019345994677693855, "loss": 12.6115, "step": 5226 }, { "epoch": 0.2846310401394022, "grad_norm": 0.6658509131843733, "learning_rate": 0.0001934568097424633, "loss": 12.4676, "step": 5227 }, { "epoch": 0.28468549413598526, "grad_norm": 0.7863371286802923, "learning_rate": 0.00019345367198125225, "loss": 12.4797, "step": 5228 }, { "epoch": 0.28473994813256825, "grad_norm": 0.7296124497290241, "learning_rate": 0.00019345053349332977, "loss": 12.6438, "step": 5229 }, { "epoch": 0.28479440212915125, "grad_norm": 0.7198469126781113, "learning_rate": 0.00019344739427872026, "loss": 12.6073, "step": 5230 }, { "epoch": 0.2848488561257343, "grad_norm": 0.7259120593841006, "learning_rate": 0.00019344425433744813, "loss": 12.4623, "step": 5231 }, { "epoch": 0.2849033101223173, "grad_norm": 0.7251504404787701, "learning_rate": 0.00019344111366953782, "loss": 12.6461, "step": 5232 }, { "epoch": 0.2849577641189003, "grad_norm": 0.7413458025890929, "learning_rate": 0.00019343797227501375, "loss": 12.6042, "step": 5233 }, { "epoch": 0.28501221811548333, "grad_norm": 0.7515706622044661, "learning_rate": 0.00019343483015390033, "loss": 12.6015, "step": 5234 }, { "epoch": 0.2850666721120663, "grad_norm": 0.7603678090629596, "learning_rate": 0.000193431687306222, "loss": 12.6083, "step": 5235 }, { "epoch": 0.2851211261086493, "grad_norm": 0.6856133174388271, "learning_rate": 0.0001934285437320032, "loss": 12.5179, "step": 5236 }, { "epoch": 0.28517558010523236, "grad_norm": 0.6688520904320578, "learning_rate": 0.0001934253994312684, "loss": 12.5741, "step": 5237 }, { "epoch": 0.28523003410181536, "grad_norm": 0.6992197464644883, "learning_rate": 0.000193422254404042, "loss": 12.5731, "step": 5238 }, { "epoch": 0.28528448809839835, "grad_norm": 0.7092290730706611, "learning_rate": 0.0001934191086503485, "loss": 12.649, "step": 5239 }, { "epoch": 0.2853389420949814, "grad_norm": 0.7770595485512367, "learning_rate": 0.00019341596217021235, "loss": 12.5758, "step": 5240 }, { "epoch": 0.2853933960915644, "grad_norm": 0.654164035203944, "learning_rate": 0.00019341281496365798, "loss": 12.6354, "step": 5241 }, { "epoch": 0.2854478500881474, "grad_norm": 0.6837349818438978, "learning_rate": 0.0001934096670307099, "loss": 12.6693, "step": 5242 }, { "epoch": 0.28550230408473043, "grad_norm": 0.7154457385795903, "learning_rate": 0.00019340651837139257, "loss": 12.5421, "step": 5243 }, { "epoch": 0.2855567580813134, "grad_norm": 0.7576311967450255, "learning_rate": 0.00019340336898573054, "loss": 12.6667, "step": 5244 }, { "epoch": 0.2856112120778964, "grad_norm": 0.709766937442989, "learning_rate": 0.0001934002188737482, "loss": 12.6011, "step": 5245 }, { "epoch": 0.28566566607447946, "grad_norm": 0.6936480129371836, "learning_rate": 0.00019339706803547015, "loss": 12.692, "step": 5246 }, { "epoch": 0.28572012007106246, "grad_norm": 0.7300076513644107, "learning_rate": 0.0001933939164709208, "loss": 12.5928, "step": 5247 }, { "epoch": 0.2857745740676455, "grad_norm": 0.8340857027312786, "learning_rate": 0.0001933907641801247, "loss": 12.775, "step": 5248 }, { "epoch": 0.2858290280642285, "grad_norm": 0.7522241994359548, "learning_rate": 0.00019338761116310634, "loss": 12.626, "step": 5249 }, { "epoch": 0.2858834820608115, "grad_norm": 0.7167487172392022, "learning_rate": 0.0001933844574198903, "loss": 12.5693, "step": 5250 }, { "epoch": 0.28593793605739454, "grad_norm": 0.6799345059630523, "learning_rate": 0.000193381302950501, "loss": 12.4642, "step": 5251 }, { "epoch": 0.28599239005397753, "grad_norm": 0.8655447933860508, "learning_rate": 0.00019337814775496307, "loss": 12.6, "step": 5252 }, { "epoch": 0.2860468440505605, "grad_norm": 0.6945184162411392, "learning_rate": 0.000193374991833301, "loss": 12.5597, "step": 5253 }, { "epoch": 0.28610129804714357, "grad_norm": 0.7442352463692852, "learning_rate": 0.00019337183518553931, "loss": 12.5904, "step": 5254 }, { "epoch": 0.28615575204372656, "grad_norm": 0.8143236594223469, "learning_rate": 0.0001933686778117026, "loss": 12.6048, "step": 5255 }, { "epoch": 0.28621020604030956, "grad_norm": 0.6790276390562144, "learning_rate": 0.0001933655197118154, "loss": 12.5126, "step": 5256 }, { "epoch": 0.2862646600368926, "grad_norm": 0.6440104432738986, "learning_rate": 0.0001933623608859022, "loss": 12.532, "step": 5257 }, { "epoch": 0.2863191140334756, "grad_norm": 0.6313806996060203, "learning_rate": 0.0001933592013339877, "loss": 12.5107, "step": 5258 }, { "epoch": 0.2863735680300586, "grad_norm": 0.6764511258985463, "learning_rate": 0.00019335604105609632, "loss": 12.4815, "step": 5259 }, { "epoch": 0.28642802202664164, "grad_norm": 0.6467709401645445, "learning_rate": 0.00019335288005225277, "loss": 12.6475, "step": 5260 }, { "epoch": 0.28648247602322463, "grad_norm": 0.6753532669897918, "learning_rate": 0.00019334971832248154, "loss": 12.5827, "step": 5261 }, { "epoch": 0.2865369300198076, "grad_norm": 0.6827578003307204, "learning_rate": 0.00019334655586680723, "loss": 12.6088, "step": 5262 }, { "epoch": 0.28659138401639067, "grad_norm": 0.7450610764899795, "learning_rate": 0.0001933433926852545, "loss": 12.6325, "step": 5263 }, { "epoch": 0.28664583801297366, "grad_norm": 0.7387077864867498, "learning_rate": 0.00019334022877784786, "loss": 12.5494, "step": 5264 }, { "epoch": 0.28670029200955666, "grad_norm": 0.8186123622954318, "learning_rate": 0.00019333706414461195, "loss": 12.7283, "step": 5265 }, { "epoch": 0.2867547460061397, "grad_norm": 0.718727468080562, "learning_rate": 0.00019333389878557137, "loss": 12.5985, "step": 5266 }, { "epoch": 0.2868092000027227, "grad_norm": 0.6681241377843442, "learning_rate": 0.00019333073270075076, "loss": 12.5538, "step": 5267 }, { "epoch": 0.2868636539993057, "grad_norm": 0.6432945678201138, "learning_rate": 0.0001933275658901747, "loss": 12.5511, "step": 5268 }, { "epoch": 0.28691810799588874, "grad_norm": 0.7267145364610287, "learning_rate": 0.00019332439835386786, "loss": 12.4169, "step": 5269 }, { "epoch": 0.28697256199247173, "grad_norm": 0.6547373420430387, "learning_rate": 0.00019332123009185482, "loss": 12.54, "step": 5270 }, { "epoch": 0.2870270159890547, "grad_norm": 0.6464456886624865, "learning_rate": 0.00019331806110416027, "loss": 12.6696, "step": 5271 }, { "epoch": 0.28708146998563777, "grad_norm": 0.639416556182046, "learning_rate": 0.0001933148913908088, "loss": 12.606, "step": 5272 }, { "epoch": 0.28713592398222076, "grad_norm": 0.765463803610551, "learning_rate": 0.00019331172095182511, "loss": 12.5596, "step": 5273 }, { "epoch": 0.28719037797880376, "grad_norm": 0.6475594591585864, "learning_rate": 0.00019330854978723383, "loss": 12.57, "step": 5274 }, { "epoch": 0.2872448319753868, "grad_norm": 0.738671421515384, "learning_rate": 0.00019330537789705963, "loss": 12.6256, "step": 5275 }, { "epoch": 0.2872992859719698, "grad_norm": 0.6778383446733212, "learning_rate": 0.00019330220528132713, "loss": 12.4934, "step": 5276 }, { "epoch": 0.2873537399685528, "grad_norm": 0.7386529725271362, "learning_rate": 0.00019329903194006105, "loss": 12.5469, "step": 5277 }, { "epoch": 0.28740819396513584, "grad_norm": 0.714724000030084, "learning_rate": 0.00019329585787328607, "loss": 12.6229, "step": 5278 }, { "epoch": 0.28746264796171883, "grad_norm": 0.6353740118316481, "learning_rate": 0.00019329268308102685, "loss": 12.5172, "step": 5279 }, { "epoch": 0.2875171019583019, "grad_norm": 0.7049945078654178, "learning_rate": 0.00019328950756330803, "loss": 12.5439, "step": 5280 }, { "epoch": 0.28757155595488487, "grad_norm": 0.7747680558953951, "learning_rate": 0.00019328633132015442, "loss": 12.6087, "step": 5281 }, { "epoch": 0.28762600995146786, "grad_norm": 0.6793194770644609, "learning_rate": 0.00019328315435159058, "loss": 12.5076, "step": 5282 }, { "epoch": 0.2876804639480509, "grad_norm": 0.7425325939186668, "learning_rate": 0.00019327997665764137, "loss": 12.6259, "step": 5283 }, { "epoch": 0.2877349179446339, "grad_norm": 0.6843901494952742, "learning_rate": 0.00019327679823833135, "loss": 12.6013, "step": 5284 }, { "epoch": 0.2877893719412169, "grad_norm": 0.6285854157813631, "learning_rate": 0.00019327361909368535, "loss": 12.6006, "step": 5285 }, { "epoch": 0.28784382593779995, "grad_norm": 0.6295413056282074, "learning_rate": 0.00019327043922372802, "loss": 12.4413, "step": 5286 }, { "epoch": 0.28789827993438294, "grad_norm": 0.7771093608805613, "learning_rate": 0.00019326725862848414, "loss": 12.5233, "step": 5287 }, { "epoch": 0.28795273393096593, "grad_norm": 0.6761632961873533, "learning_rate": 0.0001932640773079784, "loss": 12.5032, "step": 5288 }, { "epoch": 0.288007187927549, "grad_norm": 0.6379880807498205, "learning_rate": 0.00019326089526223558, "loss": 12.434, "step": 5289 }, { "epoch": 0.288061641924132, "grad_norm": 0.6797398725626824, "learning_rate": 0.00019325771249128034, "loss": 12.6128, "step": 5290 }, { "epoch": 0.28811609592071497, "grad_norm": 0.828936749194387, "learning_rate": 0.00019325452899513753, "loss": 12.6434, "step": 5291 }, { "epoch": 0.288170549917298, "grad_norm": 0.6184918134959372, "learning_rate": 0.00019325134477383188, "loss": 12.5812, "step": 5292 }, { "epoch": 0.288225003913881, "grad_norm": 0.750997225216585, "learning_rate": 0.0001932481598273881, "loss": 12.5099, "step": 5293 }, { "epoch": 0.288279457910464, "grad_norm": 0.7042831646975033, "learning_rate": 0.000193244974155831, "loss": 12.6465, "step": 5294 }, { "epoch": 0.28833391190704705, "grad_norm": 0.8397389045457825, "learning_rate": 0.00019324178775918536, "loss": 12.6856, "step": 5295 }, { "epoch": 0.28838836590363004, "grad_norm": 0.6364067470380718, "learning_rate": 0.0001932386006374759, "loss": 12.604, "step": 5296 }, { "epoch": 0.28844281990021303, "grad_norm": 0.697749718400721, "learning_rate": 0.00019323541279072748, "loss": 12.6742, "step": 5297 }, { "epoch": 0.2884972738967961, "grad_norm": 0.6885876972827212, "learning_rate": 0.00019323222421896484, "loss": 12.5667, "step": 5298 }, { "epoch": 0.2885517278933791, "grad_norm": 0.6932645326014508, "learning_rate": 0.00019322903492221283, "loss": 12.4531, "step": 5299 }, { "epoch": 0.28860618188996207, "grad_norm": 0.73021297394276, "learning_rate": 0.00019322584490049616, "loss": 12.5136, "step": 5300 }, { "epoch": 0.2886606358865451, "grad_norm": 0.7518670769526901, "learning_rate": 0.00019322265415383969, "loss": 12.5631, "step": 5301 }, { "epoch": 0.2887150898831281, "grad_norm": 0.7825021435770262, "learning_rate": 0.00019321946268226824, "loss": 12.4216, "step": 5302 }, { "epoch": 0.2887695438797111, "grad_norm": 0.7673677711990196, "learning_rate": 0.00019321627048580662, "loss": 12.5662, "step": 5303 }, { "epoch": 0.28882399787629415, "grad_norm": 0.7472072722363353, "learning_rate": 0.00019321307756447963, "loss": 12.5825, "step": 5304 }, { "epoch": 0.28887845187287714, "grad_norm": 0.8010391264329372, "learning_rate": 0.00019320988391831217, "loss": 12.4988, "step": 5305 }, { "epoch": 0.28893290586946013, "grad_norm": 0.6811847137339496, "learning_rate": 0.00019320668954732898, "loss": 12.5728, "step": 5306 }, { "epoch": 0.2889873598660432, "grad_norm": 0.7944142383842702, "learning_rate": 0.00019320349445155492, "loss": 12.4664, "step": 5307 }, { "epoch": 0.2890418138626262, "grad_norm": 0.792613651605558, "learning_rate": 0.0001932002986310149, "loss": 12.7375, "step": 5308 }, { "epoch": 0.28909626785920917, "grad_norm": 0.7122210755604018, "learning_rate": 0.0001931971020857337, "loss": 12.7662, "step": 5309 }, { "epoch": 0.2891507218557922, "grad_norm": 0.734802658973589, "learning_rate": 0.0001931939048157362, "loss": 12.627, "step": 5310 }, { "epoch": 0.2892051758523752, "grad_norm": 0.6693328648027058, "learning_rate": 0.00019319070682104731, "loss": 12.601, "step": 5311 }, { "epoch": 0.2892596298489582, "grad_norm": 0.7485113286192445, "learning_rate": 0.00019318750810169184, "loss": 12.5305, "step": 5312 }, { "epoch": 0.28931408384554125, "grad_norm": 0.7768309672774144, "learning_rate": 0.00019318430865769464, "loss": 12.5792, "step": 5313 }, { "epoch": 0.28936853784212424, "grad_norm": 0.6765733125020098, "learning_rate": 0.00019318110848908065, "loss": 12.7013, "step": 5314 }, { "epoch": 0.2894229918387073, "grad_norm": 0.8730525986186539, "learning_rate": 0.00019317790759587475, "loss": 12.5474, "step": 5315 }, { "epoch": 0.2894774458352903, "grad_norm": 0.6296560239385222, "learning_rate": 0.0001931747059781018, "loss": 12.448, "step": 5316 }, { "epoch": 0.2895318998318733, "grad_norm": 0.8603087911123338, "learning_rate": 0.0001931715036357867, "loss": 12.6103, "step": 5317 }, { "epoch": 0.2895863538284563, "grad_norm": 1.3755849613015414, "learning_rate": 0.0001931683005689544, "loss": 12.4477, "step": 5318 }, { "epoch": 0.2896408078250393, "grad_norm": 1.2027269928655746, "learning_rate": 0.00019316509677762974, "loss": 12.5979, "step": 5319 }, { "epoch": 0.2896952618216223, "grad_norm": 0.8655129824139789, "learning_rate": 0.00019316189226183767, "loss": 12.4687, "step": 5320 }, { "epoch": 0.28974971581820536, "grad_norm": 3.817913808390665, "learning_rate": 0.00019315868702160312, "loss": 12.7316, "step": 5321 }, { "epoch": 0.28980416981478835, "grad_norm": 0.8343931440789721, "learning_rate": 0.00019315548105695098, "loss": 12.6442, "step": 5322 }, { "epoch": 0.28985862381137134, "grad_norm": 6.952518704615793, "learning_rate": 0.00019315227436790623, "loss": 12.8677, "step": 5323 }, { "epoch": 0.2899130778079544, "grad_norm": 0.9131341069424375, "learning_rate": 0.0001931490669544937, "loss": 12.7066, "step": 5324 }, { "epoch": 0.2899675318045374, "grad_norm": 1.789670303581222, "learning_rate": 0.00019314585881673846, "loss": 12.8639, "step": 5325 }, { "epoch": 0.2900219858011204, "grad_norm": 1.0026867171007399, "learning_rate": 0.0001931426499546654, "loss": 12.7645, "step": 5326 }, { "epoch": 0.2900764397977034, "grad_norm": 1.0203134278952588, "learning_rate": 0.00019313944036829944, "loss": 12.7672, "step": 5327 }, { "epoch": 0.2901308937942864, "grad_norm": 1.0440168423497518, "learning_rate": 0.0001931362300576656, "loss": 12.6295, "step": 5328 }, { "epoch": 0.2901853477908694, "grad_norm": 1.0227780364237082, "learning_rate": 0.0001931330190227888, "loss": 12.6797, "step": 5329 }, { "epoch": 0.29023980178745246, "grad_norm": 1.139990750255168, "learning_rate": 0.00019312980726369404, "loss": 12.6139, "step": 5330 }, { "epoch": 0.29029425578403545, "grad_norm": 1.1023503359236262, "learning_rate": 0.00019312659478040628, "loss": 12.6556, "step": 5331 }, { "epoch": 0.29034870978061844, "grad_norm": 0.8425326234254836, "learning_rate": 0.00019312338157295052, "loss": 12.7766, "step": 5332 }, { "epoch": 0.2904031637772015, "grad_norm": 0.9347222571312567, "learning_rate": 0.0001931201676413517, "loss": 12.7023, "step": 5333 }, { "epoch": 0.2904576177737845, "grad_norm": 0.7686786588022239, "learning_rate": 0.00019311695298563484, "loss": 12.7143, "step": 5334 }, { "epoch": 0.2905120717703675, "grad_norm": 0.892660220273949, "learning_rate": 0.00019311373760582494, "loss": 12.7365, "step": 5335 }, { "epoch": 0.2905665257669505, "grad_norm": 0.7462257993513306, "learning_rate": 0.00019311052150194699, "loss": 12.7928, "step": 5336 }, { "epoch": 0.2906209797635335, "grad_norm": 0.9456338911665619, "learning_rate": 0.00019310730467402603, "loss": 12.6129, "step": 5337 }, { "epoch": 0.2906754337601165, "grad_norm": 0.8339349763476298, "learning_rate": 0.00019310408712208706, "loss": 12.6664, "step": 5338 }, { "epoch": 0.29072988775669956, "grad_norm": 0.7848635052755665, "learning_rate": 0.00019310086884615507, "loss": 12.6457, "step": 5339 }, { "epoch": 0.29078434175328255, "grad_norm": 0.9228880011106196, "learning_rate": 0.00019309764984625513, "loss": 12.5757, "step": 5340 }, { "epoch": 0.29083879574986554, "grad_norm": 0.7168949910507982, "learning_rate": 0.00019309443012241226, "loss": 12.5764, "step": 5341 }, { "epoch": 0.2908932497464486, "grad_norm": 0.8025730823863417, "learning_rate": 0.00019309120967465147, "loss": 12.6072, "step": 5342 }, { "epoch": 0.2909477037430316, "grad_norm": 0.767974913734144, "learning_rate": 0.00019308798850299784, "loss": 12.7317, "step": 5343 }, { "epoch": 0.2910021577396146, "grad_norm": 0.7643361303317291, "learning_rate": 0.0001930847666074764, "loss": 12.6485, "step": 5344 }, { "epoch": 0.2910566117361976, "grad_norm": 0.7860919414875597, "learning_rate": 0.00019308154398811218, "loss": 12.597, "step": 5345 }, { "epoch": 0.2911110657327806, "grad_norm": 0.7617826115160656, "learning_rate": 0.00019307832064493027, "loss": 12.7591, "step": 5346 }, { "epoch": 0.29116551972936366, "grad_norm": 0.6984577542248323, "learning_rate": 0.00019307509657795575, "loss": 12.519, "step": 5347 }, { "epoch": 0.29121997372594666, "grad_norm": 0.7105852071424831, "learning_rate": 0.00019307187178721366, "loss": 12.6628, "step": 5348 }, { "epoch": 0.29127442772252965, "grad_norm": 0.6818024501380333, "learning_rate": 0.0001930686462727291, "loss": 12.5448, "step": 5349 }, { "epoch": 0.2913288817191127, "grad_norm": 0.6718881976533685, "learning_rate": 0.00019306542003452712, "loss": 12.6965, "step": 5350 }, { "epoch": 0.2913833357156957, "grad_norm": 0.7582962470318719, "learning_rate": 0.00019306219307263284, "loss": 12.6478, "step": 5351 }, { "epoch": 0.2914377897122787, "grad_norm": 0.8820400481683287, "learning_rate": 0.0001930589653870713, "loss": 12.6285, "step": 5352 }, { "epoch": 0.29149224370886173, "grad_norm": 0.9586742288093614, "learning_rate": 0.0001930557369778677, "loss": 12.7713, "step": 5353 }, { "epoch": 0.2915466977054447, "grad_norm": 0.7338990360689434, "learning_rate": 0.00019305250784504706, "loss": 12.6302, "step": 5354 }, { "epoch": 0.2916011517020277, "grad_norm": 0.7582419862630847, "learning_rate": 0.0001930492779886345, "loss": 12.6738, "step": 5355 }, { "epoch": 0.29165560569861076, "grad_norm": 0.840787783063441, "learning_rate": 0.00019304604740865515, "loss": 12.6798, "step": 5356 }, { "epoch": 0.29171005969519376, "grad_norm": 0.6931359406914137, "learning_rate": 0.00019304281610513414, "loss": 12.505, "step": 5357 }, { "epoch": 0.29176451369177675, "grad_norm": 0.8046820695731451, "learning_rate": 0.00019303958407809656, "loss": 12.545, "step": 5358 }, { "epoch": 0.2918189676883598, "grad_norm": 0.7504096676621198, "learning_rate": 0.00019303635132756762, "loss": 12.488, "step": 5359 }, { "epoch": 0.2918734216849428, "grad_norm": 0.6977037675551906, "learning_rate": 0.0001930331178535724, "loss": 12.4553, "step": 5360 }, { "epoch": 0.2919278756815258, "grad_norm": 0.7411988969206796, "learning_rate": 0.00019302988365613603, "loss": 12.6309, "step": 5361 }, { "epoch": 0.29198232967810883, "grad_norm": 0.7279922787640098, "learning_rate": 0.0001930266487352837, "loss": 12.5111, "step": 5362 }, { "epoch": 0.2920367836746918, "grad_norm": 0.7811018864235276, "learning_rate": 0.00019302341309104055, "loss": 12.6204, "step": 5363 }, { "epoch": 0.2920912376712748, "grad_norm": 0.7559002866108901, "learning_rate": 0.00019302017672343172, "loss": 12.4904, "step": 5364 }, { "epoch": 0.29214569166785787, "grad_norm": 0.7466752855960884, "learning_rate": 0.00019301693963248243, "loss": 12.7218, "step": 5365 }, { "epoch": 0.29220014566444086, "grad_norm": 0.7993271004037386, "learning_rate": 0.00019301370181821782, "loss": 12.6146, "step": 5366 }, { "epoch": 0.29225459966102385, "grad_norm": 0.6983084821208461, "learning_rate": 0.00019301046328066304, "loss": 12.5839, "step": 5367 }, { "epoch": 0.2923090536576069, "grad_norm": 0.8523665600366496, "learning_rate": 0.00019300722401984332, "loss": 12.6677, "step": 5368 }, { "epoch": 0.2923635076541899, "grad_norm": 0.8207120293181236, "learning_rate": 0.0001930039840357838, "loss": 12.5995, "step": 5369 }, { "epoch": 0.2924179616507729, "grad_norm": 0.6721964030090202, "learning_rate": 0.0001930007433285097, "loss": 12.4554, "step": 5370 }, { "epoch": 0.29247241564735593, "grad_norm": 0.6800829699572934, "learning_rate": 0.00019299750189804624, "loss": 12.6383, "step": 5371 }, { "epoch": 0.2925268696439389, "grad_norm": 0.8289854580672297, "learning_rate": 0.00019299425974441862, "loss": 12.6417, "step": 5372 }, { "epoch": 0.2925813236405219, "grad_norm": 0.7095324885178306, "learning_rate": 0.00019299101686765205, "loss": 12.5994, "step": 5373 }, { "epoch": 0.29263577763710497, "grad_norm": 0.6896328151277851, "learning_rate": 0.00019298777326777171, "loss": 12.3786, "step": 5374 }, { "epoch": 0.29269023163368796, "grad_norm": 0.6994439095041498, "learning_rate": 0.00019298452894480286, "loss": 12.6018, "step": 5375 }, { "epoch": 0.29274468563027095, "grad_norm": 0.80451622927954, "learning_rate": 0.00019298128389877073, "loss": 12.6109, "step": 5376 }, { "epoch": 0.292799139626854, "grad_norm": 0.6689060517208213, "learning_rate": 0.00019297803812970052, "loss": 12.5704, "step": 5377 }, { "epoch": 0.292853593623437, "grad_norm": 0.687941492721622, "learning_rate": 0.00019297479163761755, "loss": 12.6013, "step": 5378 }, { "epoch": 0.29290804762002, "grad_norm": 0.6930940676293417, "learning_rate": 0.00019297154442254693, "loss": 12.5302, "step": 5379 }, { "epoch": 0.29296250161660303, "grad_norm": 0.7573733554612111, "learning_rate": 0.00019296829648451404, "loss": 12.6814, "step": 5380 }, { "epoch": 0.293016955613186, "grad_norm": 0.6483541127785282, "learning_rate": 0.00019296504782354408, "loss": 12.5151, "step": 5381 }, { "epoch": 0.2930714096097691, "grad_norm": 0.6872342229388375, "learning_rate": 0.0001929617984396623, "loss": 12.5216, "step": 5382 }, { "epoch": 0.29312586360635207, "grad_norm": 0.7146685548807061, "learning_rate": 0.000192958548332894, "loss": 12.8383, "step": 5383 }, { "epoch": 0.29318031760293506, "grad_norm": 0.7128076251945845, "learning_rate": 0.00019295529750326443, "loss": 12.5799, "step": 5384 }, { "epoch": 0.2932347715995181, "grad_norm": 0.6913826142462048, "learning_rate": 0.0001929520459507989, "loss": 12.5626, "step": 5385 }, { "epoch": 0.2932892255961011, "grad_norm": 0.7745424972710939, "learning_rate": 0.00019294879367552263, "loss": 12.6744, "step": 5386 }, { "epoch": 0.2933436795926841, "grad_norm": 0.6975593164609588, "learning_rate": 0.00019294554067746098, "loss": 12.5228, "step": 5387 }, { "epoch": 0.29339813358926714, "grad_norm": 0.7678680088261888, "learning_rate": 0.0001929422869566392, "loss": 12.6008, "step": 5388 }, { "epoch": 0.29345258758585013, "grad_norm": 0.692367853277397, "learning_rate": 0.00019293903251308266, "loss": 12.6218, "step": 5389 }, { "epoch": 0.2935070415824331, "grad_norm": 0.8232991412974388, "learning_rate": 0.00019293577734681656, "loss": 12.7118, "step": 5390 }, { "epoch": 0.2935614955790162, "grad_norm": 0.6167356791640606, "learning_rate": 0.0001929325214578663, "loss": 12.4945, "step": 5391 }, { "epoch": 0.29361594957559917, "grad_norm": 0.6438436500011632, "learning_rate": 0.00019292926484625714, "loss": 12.6473, "step": 5392 }, { "epoch": 0.29367040357218216, "grad_norm": 0.7587583883204099, "learning_rate": 0.00019292600751201448, "loss": 12.5496, "step": 5393 }, { "epoch": 0.2937248575687652, "grad_norm": 0.7076624710372246, "learning_rate": 0.00019292274945516359, "loss": 12.3377, "step": 5394 }, { "epoch": 0.2937793115653482, "grad_norm": 0.8745028515304532, "learning_rate": 0.00019291949067572978, "loss": 12.5201, "step": 5395 }, { "epoch": 0.2938337655619312, "grad_norm": 0.7570972500445633, "learning_rate": 0.00019291623117373847, "loss": 12.5978, "step": 5396 }, { "epoch": 0.29388821955851424, "grad_norm": 0.7985090072616512, "learning_rate": 0.00019291297094921494, "loss": 12.5774, "step": 5397 }, { "epoch": 0.29394267355509723, "grad_norm": 0.8410990037792181, "learning_rate": 0.00019290971000218457, "loss": 12.7778, "step": 5398 }, { "epoch": 0.2939971275516802, "grad_norm": 0.6325474282429254, "learning_rate": 0.0001929064483326727, "loss": 12.5482, "step": 5399 }, { "epoch": 0.2940515815482633, "grad_norm": 0.7215869597593331, "learning_rate": 0.00019290318594070475, "loss": 12.6419, "step": 5400 }, { "epoch": 0.29410603554484627, "grad_norm": 0.7388693424632827, "learning_rate": 0.00019289992282630602, "loss": 12.5937, "step": 5401 }, { "epoch": 0.29416048954142926, "grad_norm": 0.9311053731409538, "learning_rate": 0.0001928966589895019, "loss": 12.673, "step": 5402 }, { "epoch": 0.2942149435380123, "grad_norm": 0.7564163948266747, "learning_rate": 0.00019289339443031778, "loss": 12.5103, "step": 5403 }, { "epoch": 0.2942693975345953, "grad_norm": 0.7845040973885391, "learning_rate": 0.00019289012914877905, "loss": 12.5595, "step": 5404 }, { "epoch": 0.2943238515311783, "grad_norm": 0.7268199640406403, "learning_rate": 0.00019288686314491115, "loss": 12.5784, "step": 5405 }, { "epoch": 0.29437830552776134, "grad_norm": 0.7117435618249173, "learning_rate": 0.00019288359641873935, "loss": 12.5584, "step": 5406 }, { "epoch": 0.29443275952434433, "grad_norm": 0.7430669941676982, "learning_rate": 0.00019288032897028917, "loss": 12.5339, "step": 5407 }, { "epoch": 0.2944872135209273, "grad_norm": 0.7309612448126213, "learning_rate": 0.00019287706079958595, "loss": 12.6017, "step": 5408 }, { "epoch": 0.2945416675175104, "grad_norm": 0.7591821592955744, "learning_rate": 0.00019287379190665517, "loss": 12.5533, "step": 5409 }, { "epoch": 0.29459612151409337, "grad_norm": 0.7451068498540538, "learning_rate": 0.0001928705222915222, "loss": 12.4671, "step": 5410 }, { "epoch": 0.29465057551067636, "grad_norm": 0.7720985974444161, "learning_rate": 0.00019286725195421243, "loss": 12.6444, "step": 5411 }, { "epoch": 0.2947050295072594, "grad_norm": 0.686883058089928, "learning_rate": 0.00019286398089475134, "loss": 12.5692, "step": 5412 }, { "epoch": 0.2947594835038424, "grad_norm": 0.7205956053915606, "learning_rate": 0.0001928607091131644, "loss": 12.7151, "step": 5413 }, { "epoch": 0.29481393750042545, "grad_norm": 0.7336566961369465, "learning_rate": 0.000192857436609477, "loss": 12.5186, "step": 5414 }, { "epoch": 0.29486839149700844, "grad_norm": 0.6770882797888417, "learning_rate": 0.0001928541633837146, "loss": 12.5483, "step": 5415 }, { "epoch": 0.29492284549359143, "grad_norm": 0.6642706676974497, "learning_rate": 0.00019285088943590267, "loss": 12.6158, "step": 5416 }, { "epoch": 0.2949772994901745, "grad_norm": 0.6612267667643044, "learning_rate": 0.00019284761476606662, "loss": 12.6026, "step": 5417 }, { "epoch": 0.2950317534867575, "grad_norm": 0.8225131551740218, "learning_rate": 0.00019284433937423196, "loss": 12.7422, "step": 5418 }, { "epoch": 0.29508620748334047, "grad_norm": 0.9312293048136258, "learning_rate": 0.00019284106326042415, "loss": 12.5606, "step": 5419 }, { "epoch": 0.2951406614799235, "grad_norm": 0.686764621159285, "learning_rate": 0.00019283778642466864, "loss": 12.6587, "step": 5420 }, { "epoch": 0.2951951154765065, "grad_norm": 0.7232553800589134, "learning_rate": 0.000192834508866991, "loss": 12.6868, "step": 5421 }, { "epoch": 0.2952495694730895, "grad_norm": 0.7771593086870223, "learning_rate": 0.0001928312305874166, "loss": 12.6444, "step": 5422 }, { "epoch": 0.29530402346967255, "grad_norm": 0.6821004747227135, "learning_rate": 0.00019282795158597098, "loss": 12.5451, "step": 5423 }, { "epoch": 0.29535847746625554, "grad_norm": 0.7158378326219672, "learning_rate": 0.00019282467186267966, "loss": 12.621, "step": 5424 }, { "epoch": 0.29541293146283853, "grad_norm": 0.748752470958464, "learning_rate": 0.0001928213914175681, "loss": 12.6017, "step": 5425 }, { "epoch": 0.2954673854594216, "grad_norm": 0.709105723596565, "learning_rate": 0.00019281811025066183, "loss": 12.6099, "step": 5426 }, { "epoch": 0.2955218394560046, "grad_norm": 0.658589593936804, "learning_rate": 0.0001928148283619864, "loss": 12.6061, "step": 5427 }, { "epoch": 0.29557629345258757, "grad_norm": 0.7269034855071365, "learning_rate": 0.0001928115457515673, "loss": 12.6086, "step": 5428 }, { "epoch": 0.2956307474491706, "grad_norm": 0.8569243295411642, "learning_rate": 0.00019280826241943003, "loss": 12.7698, "step": 5429 }, { "epoch": 0.2956852014457536, "grad_norm": 0.7756779258888888, "learning_rate": 0.00019280497836560016, "loss": 12.569, "step": 5430 }, { "epoch": 0.2957396554423366, "grad_norm": 0.7274438498794342, "learning_rate": 0.00019280169359010322, "loss": 12.7412, "step": 5431 }, { "epoch": 0.29579410943891965, "grad_norm": 0.6509255649787569, "learning_rate": 0.00019279840809296474, "loss": 12.4884, "step": 5432 }, { "epoch": 0.29584856343550264, "grad_norm": 0.6538988898602316, "learning_rate": 0.0001927951218742103, "loss": 12.525, "step": 5433 }, { "epoch": 0.29590301743208564, "grad_norm": 0.678004336147839, "learning_rate": 0.00019279183493386542, "loss": 12.663, "step": 5434 }, { "epoch": 0.2959574714286687, "grad_norm": 0.6604899315721932, "learning_rate": 0.00019278854727195564, "loss": 12.5462, "step": 5435 }, { "epoch": 0.2960119254252517, "grad_norm": 0.650865153177635, "learning_rate": 0.00019278525888850658, "loss": 12.5251, "step": 5436 }, { "epoch": 0.29606637942183467, "grad_norm": 0.6905167141476374, "learning_rate": 0.0001927819697835438, "loss": 12.4289, "step": 5437 }, { "epoch": 0.2961208334184177, "grad_norm": 0.6775595408948848, "learning_rate": 0.00019277867995709286, "loss": 12.5849, "step": 5438 }, { "epoch": 0.2961752874150007, "grad_norm": 0.6750514376962531, "learning_rate": 0.0001927753894091793, "loss": 12.5668, "step": 5439 }, { "epoch": 0.2962297414115837, "grad_norm": 0.7530309678722148, "learning_rate": 0.0001927720981398288, "loss": 12.4954, "step": 5440 }, { "epoch": 0.29628419540816675, "grad_norm": 0.6601666121573037, "learning_rate": 0.0001927688061490669, "loss": 12.4699, "step": 5441 }, { "epoch": 0.29633864940474974, "grad_norm": 0.6954527578087399, "learning_rate": 0.0001927655134369192, "loss": 12.5202, "step": 5442 }, { "epoch": 0.29639310340133274, "grad_norm": 0.6441927111198292, "learning_rate": 0.0001927622200034113, "loss": 12.5601, "step": 5443 }, { "epoch": 0.2964475573979158, "grad_norm": 0.6474561946353591, "learning_rate": 0.00019275892584856883, "loss": 12.485, "step": 5444 }, { "epoch": 0.2965020113944988, "grad_norm": 0.6933453342076746, "learning_rate": 0.0001927556309724174, "loss": 12.4715, "step": 5445 }, { "epoch": 0.29655646539108177, "grad_norm": 0.7682158885479328, "learning_rate": 0.00019275233537498264, "loss": 12.6883, "step": 5446 }, { "epoch": 0.2966109193876648, "grad_norm": 0.6739621499325937, "learning_rate": 0.00019274903905629014, "loss": 12.6544, "step": 5447 }, { "epoch": 0.2966653733842478, "grad_norm": 0.6354207232839544, "learning_rate": 0.00019274574201636556, "loss": 12.4936, "step": 5448 }, { "epoch": 0.29671982738083086, "grad_norm": 0.658172915455953, "learning_rate": 0.00019274244425523455, "loss": 12.6144, "step": 5449 }, { "epoch": 0.29677428137741385, "grad_norm": 0.6766134115227272, "learning_rate": 0.00019273914577292274, "loss": 12.5919, "step": 5450 }, { "epoch": 0.29682873537399684, "grad_norm": 0.6557699542841391, "learning_rate": 0.0001927358465694558, "loss": 12.6577, "step": 5451 }, { "epoch": 0.2968831893705799, "grad_norm": 0.6875351309441434, "learning_rate": 0.00019273254664485933, "loss": 12.6079, "step": 5452 }, { "epoch": 0.2969376433671629, "grad_norm": 0.667682654878768, "learning_rate": 0.00019272924599915902, "loss": 12.5817, "step": 5453 }, { "epoch": 0.2969920973637459, "grad_norm": 0.6779324528523384, "learning_rate": 0.00019272594463238057, "loss": 12.5989, "step": 5454 }, { "epoch": 0.2970465513603289, "grad_norm": 0.624243957794944, "learning_rate": 0.00019272264254454962, "loss": 12.521, "step": 5455 }, { "epoch": 0.2971010053569119, "grad_norm": 1.088983734238408, "learning_rate": 0.00019271933973569186, "loss": 12.5177, "step": 5456 }, { "epoch": 0.2971554593534949, "grad_norm": 0.7190029177790371, "learning_rate": 0.00019271603620583293, "loss": 12.5043, "step": 5457 }, { "epoch": 0.29720991335007796, "grad_norm": 0.6624008667037063, "learning_rate": 0.00019271273195499856, "loss": 12.5598, "step": 5458 }, { "epoch": 0.29726436734666095, "grad_norm": 0.829880367133911, "learning_rate": 0.0001927094269832145, "loss": 12.7031, "step": 5459 }, { "epoch": 0.29731882134324394, "grad_norm": 0.6624179427876619, "learning_rate": 0.00019270612129050632, "loss": 12.496, "step": 5460 }, { "epoch": 0.297373275339827, "grad_norm": 0.7302525172954663, "learning_rate": 0.00019270281487689982, "loss": 12.6206, "step": 5461 }, { "epoch": 0.29742772933641, "grad_norm": 0.7417326761674216, "learning_rate": 0.0001926995077424207, "loss": 12.7291, "step": 5462 }, { "epoch": 0.297482183332993, "grad_norm": 0.6515096566908593, "learning_rate": 0.00019269619988709466, "loss": 12.4573, "step": 5463 }, { "epoch": 0.297536637329576, "grad_norm": 0.660489757020803, "learning_rate": 0.0001926928913109474, "loss": 12.6159, "step": 5464 }, { "epoch": 0.297591091326159, "grad_norm": 0.6976139432730505, "learning_rate": 0.00019268958201400466, "loss": 12.4181, "step": 5465 }, { "epoch": 0.297645545322742, "grad_norm": 0.7180785514358637, "learning_rate": 0.0001926862719962922, "loss": 12.5845, "step": 5466 }, { "epoch": 0.29769999931932506, "grad_norm": 0.6611172259802919, "learning_rate": 0.00019268296125783576, "loss": 12.6051, "step": 5467 }, { "epoch": 0.29775445331590805, "grad_norm": 0.836722423389593, "learning_rate": 0.00019267964979866108, "loss": 12.5857, "step": 5468 }, { "epoch": 0.29780890731249104, "grad_norm": 0.6652025661518768, "learning_rate": 0.0001926763376187939, "loss": 12.6493, "step": 5469 }, { "epoch": 0.2978633613090741, "grad_norm": 0.7509908152819234, "learning_rate": 0.00019267302471825994, "loss": 12.657, "step": 5470 }, { "epoch": 0.2979178153056571, "grad_norm": 0.6440705345244854, "learning_rate": 0.00019266971109708502, "loss": 12.6235, "step": 5471 }, { "epoch": 0.2979722693022401, "grad_norm": 0.7864417869922518, "learning_rate": 0.0001926663967552949, "loss": 12.6673, "step": 5472 }, { "epoch": 0.2980267232988231, "grad_norm": 0.675295748172897, "learning_rate": 0.00019266308169291533, "loss": 12.5634, "step": 5473 }, { "epoch": 0.2980811772954061, "grad_norm": 0.7196578050262258, "learning_rate": 0.00019265976590997208, "loss": 12.5545, "step": 5474 }, { "epoch": 0.2981356312919891, "grad_norm": 0.771184431209636, "learning_rate": 0.00019265644940649095, "loss": 12.6641, "step": 5475 }, { "epoch": 0.29819008528857216, "grad_norm": 0.6625549461496372, "learning_rate": 0.00019265313218249776, "loss": 12.646, "step": 5476 }, { "epoch": 0.29824453928515515, "grad_norm": 0.6714856696134495, "learning_rate": 0.00019264981423801824, "loss": 12.4311, "step": 5477 }, { "epoch": 0.29829899328173815, "grad_norm": 0.6932694439785722, "learning_rate": 0.00019264649557307825, "loss": 12.66, "step": 5478 }, { "epoch": 0.2983534472783212, "grad_norm": 0.7019140285607096, "learning_rate": 0.00019264317618770358, "loss": 12.5443, "step": 5479 }, { "epoch": 0.2984079012749042, "grad_norm": 0.7285887391443051, "learning_rate": 0.00019263985608192004, "loss": 12.5274, "step": 5480 }, { "epoch": 0.29846235527148723, "grad_norm": 0.6743474455149153, "learning_rate": 0.0001926365352557534, "loss": 12.4967, "step": 5481 }, { "epoch": 0.2985168092680702, "grad_norm": 0.7497025945227477, "learning_rate": 0.00019263321370922956, "loss": 12.6501, "step": 5482 }, { "epoch": 0.2985712632646532, "grad_norm": 0.7301840359950457, "learning_rate": 0.0001926298914423743, "loss": 12.5288, "step": 5483 }, { "epoch": 0.29862571726123627, "grad_norm": 0.6743215481536896, "learning_rate": 0.0001926265684552135, "loss": 12.5529, "step": 5484 }, { "epoch": 0.29868017125781926, "grad_norm": 0.7238826933992838, "learning_rate": 0.00019262324474777297, "loss": 12.5079, "step": 5485 }, { "epoch": 0.29873462525440225, "grad_norm": 0.7257838708320444, "learning_rate": 0.00019261992032007852, "loss": 12.5381, "step": 5486 }, { "epoch": 0.2987890792509853, "grad_norm": 0.7158109479170218, "learning_rate": 0.00019261659517215608, "loss": 12.555, "step": 5487 }, { "epoch": 0.2988435332475683, "grad_norm": 0.8007473442264141, "learning_rate": 0.00019261326930403142, "loss": 12.6532, "step": 5488 }, { "epoch": 0.2988979872441513, "grad_norm": 0.6721247192677079, "learning_rate": 0.00019260994271573048, "loss": 12.6069, "step": 5489 }, { "epoch": 0.29895244124073433, "grad_norm": 0.7861205605607333, "learning_rate": 0.00019260661540727907, "loss": 12.5759, "step": 5490 }, { "epoch": 0.2990068952373173, "grad_norm": 0.6645713018669855, "learning_rate": 0.0001926032873787031, "loss": 12.55, "step": 5491 }, { "epoch": 0.2990613492339003, "grad_norm": 0.6227237580241888, "learning_rate": 0.00019259995863002844, "loss": 12.5693, "step": 5492 }, { "epoch": 0.29911580323048337, "grad_norm": 0.7182566580451716, "learning_rate": 0.00019259662916128097, "loss": 12.6455, "step": 5493 }, { "epoch": 0.29917025722706636, "grad_norm": 0.753398157729987, "learning_rate": 0.00019259329897248657, "loss": 12.6879, "step": 5494 }, { "epoch": 0.29922471122364935, "grad_norm": 0.6709225872282627, "learning_rate": 0.00019258996806367117, "loss": 12.6203, "step": 5495 }, { "epoch": 0.2992791652202324, "grad_norm": 0.7117214422038949, "learning_rate": 0.00019258663643486067, "loss": 12.569, "step": 5496 }, { "epoch": 0.2993336192168154, "grad_norm": 0.6641688877669308, "learning_rate": 0.00019258330408608088, "loss": 12.5621, "step": 5497 }, { "epoch": 0.2993880732133984, "grad_norm": 0.753378664160101, "learning_rate": 0.00019257997101735787, "loss": 12.6489, "step": 5498 }, { "epoch": 0.29944252720998144, "grad_norm": 0.6934161511406282, "learning_rate": 0.00019257663722871746, "loss": 12.584, "step": 5499 }, { "epoch": 0.2994969812065644, "grad_norm": 0.6696183362421898, "learning_rate": 0.0001925733027201856, "loss": 12.6701, "step": 5500 }, { "epoch": 0.2995514352031474, "grad_norm": 0.7221308273553433, "learning_rate": 0.0001925699674917882, "loss": 12.5649, "step": 5501 }, { "epoch": 0.29960588919973047, "grad_norm": 0.6253773296377263, "learning_rate": 0.00019256663154355118, "loss": 12.4283, "step": 5502 }, { "epoch": 0.29966034319631346, "grad_norm": 0.6572697114319784, "learning_rate": 0.00019256329487550054, "loss": 12.6613, "step": 5503 }, { "epoch": 0.29971479719289645, "grad_norm": 0.747338700790793, "learning_rate": 0.0001925599574876622, "loss": 12.5483, "step": 5504 }, { "epoch": 0.2997692511894795, "grad_norm": 0.6906470887856688, "learning_rate": 0.0001925566193800621, "loss": 12.5226, "step": 5505 }, { "epoch": 0.2998237051860625, "grad_norm": 0.7248451578017936, "learning_rate": 0.00019255328055272624, "loss": 12.3915, "step": 5506 }, { "epoch": 0.2998781591826455, "grad_norm": 0.6518355865071961, "learning_rate": 0.0001925499410056805, "loss": 12.5653, "step": 5507 }, { "epoch": 0.29993261317922854, "grad_norm": 0.6928218232220432, "learning_rate": 0.00019254660073895092, "loss": 12.5504, "step": 5508 }, { "epoch": 0.29998706717581153, "grad_norm": 0.7400814092127778, "learning_rate": 0.00019254325975256344, "loss": 12.6619, "step": 5509 }, { "epoch": 0.3000415211723945, "grad_norm": 0.6781743427380154, "learning_rate": 0.00019253991804654407, "loss": 12.4567, "step": 5510 }, { "epoch": 0.30009597516897757, "grad_norm": 0.6651358657090074, "learning_rate": 0.00019253657562091876, "loss": 12.6494, "step": 5511 }, { "epoch": 0.30015042916556056, "grad_norm": 0.7184827329752417, "learning_rate": 0.00019253323247571356, "loss": 12.6796, "step": 5512 }, { "epoch": 0.30020488316214355, "grad_norm": 0.9321121206445516, "learning_rate": 0.0001925298886109544, "loss": 12.7286, "step": 5513 }, { "epoch": 0.3002593371587266, "grad_norm": 0.7634100043428852, "learning_rate": 0.00019252654402666727, "loss": 12.7075, "step": 5514 }, { "epoch": 0.3003137911553096, "grad_norm": 0.6500345565420961, "learning_rate": 0.00019252319872287824, "loss": 12.5077, "step": 5515 }, { "epoch": 0.30036824515189264, "grad_norm": 0.7131473979329023, "learning_rate": 0.00019251985269961336, "loss": 12.6077, "step": 5516 }, { "epoch": 0.30042269914847564, "grad_norm": 0.7203919344303918, "learning_rate": 0.00019251650595689853, "loss": 12.56, "step": 5517 }, { "epoch": 0.30047715314505863, "grad_norm": 0.6665760403028634, "learning_rate": 0.00019251315849475983, "loss": 12.578, "step": 5518 }, { "epoch": 0.3005316071416417, "grad_norm": 0.7873972857774381, "learning_rate": 0.00019250981031322334, "loss": 12.6244, "step": 5519 }, { "epoch": 0.30058606113822467, "grad_norm": 0.6749580655656026, "learning_rate": 0.00019250646141231502, "loss": 12.4792, "step": 5520 }, { "epoch": 0.30064051513480766, "grad_norm": 0.670393709534476, "learning_rate": 0.00019250311179206092, "loss": 12.4619, "step": 5521 }, { "epoch": 0.3006949691313907, "grad_norm": 0.6852404075991394, "learning_rate": 0.00019249976145248714, "loss": 12.5381, "step": 5522 }, { "epoch": 0.3007494231279737, "grad_norm": 0.7438713110773497, "learning_rate": 0.0001924964103936197, "loss": 12.5529, "step": 5523 }, { "epoch": 0.3008038771245567, "grad_norm": 0.9422519483715919, "learning_rate": 0.00019249305861548466, "loss": 12.4879, "step": 5524 }, { "epoch": 0.30085833112113974, "grad_norm": 0.7523445937435863, "learning_rate": 0.00019248970611810808, "loss": 12.572, "step": 5525 }, { "epoch": 0.30091278511772274, "grad_norm": 0.7152895724954156, "learning_rate": 0.00019248635290151605, "loss": 12.5248, "step": 5526 }, { "epoch": 0.30096723911430573, "grad_norm": 0.6343277394603853, "learning_rate": 0.0001924829989657346, "loss": 12.5778, "step": 5527 }, { "epoch": 0.3010216931108888, "grad_norm": 0.8243064110016005, "learning_rate": 0.00019247964431078987, "loss": 12.5656, "step": 5528 }, { "epoch": 0.30107614710747177, "grad_norm": 0.691016405989972, "learning_rate": 0.0001924762889367079, "loss": 12.5543, "step": 5529 }, { "epoch": 0.30113060110405476, "grad_norm": 0.6264685213458856, "learning_rate": 0.00019247293284351482, "loss": 12.443, "step": 5530 }, { "epoch": 0.3011850551006378, "grad_norm": 0.7283528943500993, "learning_rate": 0.00019246957603123667, "loss": 12.7175, "step": 5531 }, { "epoch": 0.3012395090972208, "grad_norm": 0.7925468803778901, "learning_rate": 0.00019246621849989962, "loss": 12.6486, "step": 5532 }, { "epoch": 0.3012939630938038, "grad_norm": 0.7083148796953568, "learning_rate": 0.00019246286024952975, "loss": 12.4917, "step": 5533 }, { "epoch": 0.30134841709038684, "grad_norm": 0.6956661001652812, "learning_rate": 0.00019245950128015315, "loss": 12.6302, "step": 5534 }, { "epoch": 0.30140287108696984, "grad_norm": 0.7563057436080856, "learning_rate": 0.000192456141591796, "loss": 12.224, "step": 5535 }, { "epoch": 0.30145732508355283, "grad_norm": 0.6947294851805417, "learning_rate": 0.00019245278118448436, "loss": 12.5657, "step": 5536 }, { "epoch": 0.3015117790801359, "grad_norm": 0.824002618811491, "learning_rate": 0.00019244942005824437, "loss": 12.6116, "step": 5537 }, { "epoch": 0.30156623307671887, "grad_norm": 0.7311410585858291, "learning_rate": 0.00019244605821310223, "loss": 12.5115, "step": 5538 }, { "epoch": 0.30162068707330186, "grad_norm": 0.6903199645204022, "learning_rate": 0.000192442695649084, "loss": 12.6615, "step": 5539 }, { "epoch": 0.3016751410698849, "grad_norm": 0.7494884431333453, "learning_rate": 0.0001924393323662159, "loss": 12.6129, "step": 5540 }, { "epoch": 0.3017295950664679, "grad_norm": 0.6206778446236714, "learning_rate": 0.00019243596836452404, "loss": 12.5177, "step": 5541 }, { "epoch": 0.3017840490630509, "grad_norm": 0.6255558313734257, "learning_rate": 0.00019243260364403458, "loss": 12.503, "step": 5542 }, { "epoch": 0.30183850305963394, "grad_norm": 0.7196447240519136, "learning_rate": 0.00019242923820477368, "loss": 12.4861, "step": 5543 }, { "epoch": 0.30189295705621694, "grad_norm": 0.6812891385581262, "learning_rate": 0.00019242587204676754, "loss": 12.5954, "step": 5544 }, { "epoch": 0.30194741105279993, "grad_norm": 0.7749909003181377, "learning_rate": 0.0001924225051700423, "loss": 12.7635, "step": 5545 }, { "epoch": 0.302001865049383, "grad_norm": 0.6774056944182675, "learning_rate": 0.00019241913757462418, "loss": 12.6497, "step": 5546 }, { "epoch": 0.30205631904596597, "grad_norm": 0.7271543948033461, "learning_rate": 0.00019241576926053936, "loss": 12.5526, "step": 5547 }, { "epoch": 0.302110773042549, "grad_norm": 0.7058606456982123, "learning_rate": 0.00019241240022781398, "loss": 12.5594, "step": 5548 }, { "epoch": 0.302165227039132, "grad_norm": 0.6679446080027783, "learning_rate": 0.0001924090304764743, "loss": 12.6517, "step": 5549 }, { "epoch": 0.302219681035715, "grad_norm": 0.749234625813053, "learning_rate": 0.00019240566000654653, "loss": 12.6246, "step": 5550 }, { "epoch": 0.30227413503229805, "grad_norm": 0.7261391403812401, "learning_rate": 0.0001924022888180568, "loss": 12.5897, "step": 5551 }, { "epoch": 0.30232858902888105, "grad_norm": 0.7054391146170403, "learning_rate": 0.00019239891691103143, "loss": 12.5725, "step": 5552 }, { "epoch": 0.30238304302546404, "grad_norm": 0.7040813463132223, "learning_rate": 0.00019239554428549655, "loss": 12.5343, "step": 5553 }, { "epoch": 0.3024374970220471, "grad_norm": 0.7371343689528318, "learning_rate": 0.00019239217094147844, "loss": 12.7817, "step": 5554 }, { "epoch": 0.3024919510186301, "grad_norm": 0.6871079768052895, "learning_rate": 0.0001923887968790033, "loss": 12.5964, "step": 5555 }, { "epoch": 0.30254640501521307, "grad_norm": 0.789857271839963, "learning_rate": 0.0001923854220980974, "loss": 12.5383, "step": 5556 }, { "epoch": 0.3026008590117961, "grad_norm": 0.640539924809829, "learning_rate": 0.00019238204659878692, "loss": 12.4824, "step": 5557 }, { "epoch": 0.3026553130083791, "grad_norm": 0.727503213368253, "learning_rate": 0.00019237867038109823, "loss": 12.6039, "step": 5558 }, { "epoch": 0.3027097670049621, "grad_norm": 0.773168800791757, "learning_rate": 0.00019237529344505745, "loss": 12.5989, "step": 5559 }, { "epoch": 0.30276422100154515, "grad_norm": 0.6301257451951049, "learning_rate": 0.0001923719157906909, "loss": 12.4452, "step": 5560 }, { "epoch": 0.30281867499812815, "grad_norm": 0.7305946483618813, "learning_rate": 0.00019236853741802485, "loss": 12.6938, "step": 5561 }, { "epoch": 0.30287312899471114, "grad_norm": 0.8053865211101275, "learning_rate": 0.00019236515832708558, "loss": 12.6093, "step": 5562 }, { "epoch": 0.3029275829912942, "grad_norm": 0.9633833175581472, "learning_rate": 0.00019236177851789931, "loss": 12.6175, "step": 5563 }, { "epoch": 0.3029820369878772, "grad_norm": 0.6972993933779662, "learning_rate": 0.0001923583979904924, "loss": 12.4328, "step": 5564 }, { "epoch": 0.30303649098446017, "grad_norm": 0.778723825604669, "learning_rate": 0.0001923550167448911, "loss": 12.592, "step": 5565 }, { "epoch": 0.3030909449810432, "grad_norm": 0.6520394554064838, "learning_rate": 0.00019235163478112166, "loss": 12.5261, "step": 5566 }, { "epoch": 0.3031453989776262, "grad_norm": 0.6738229807373549, "learning_rate": 0.00019234825209921047, "loss": 12.4971, "step": 5567 }, { "epoch": 0.3031998529742092, "grad_norm": 0.7779394650488719, "learning_rate": 0.00019234486869918377, "loss": 12.5064, "step": 5568 }, { "epoch": 0.30325430697079225, "grad_norm": 0.678783994289326, "learning_rate": 0.00019234148458106785, "loss": 12.5691, "step": 5569 }, { "epoch": 0.30330876096737525, "grad_norm": 0.830383539000743, "learning_rate": 0.0001923380997448891, "loss": 12.559, "step": 5570 }, { "epoch": 0.30336321496395824, "grad_norm": 0.7130216579918996, "learning_rate": 0.00019233471419067378, "loss": 12.5613, "step": 5571 }, { "epoch": 0.3034176689605413, "grad_norm": 0.6777881662814074, "learning_rate": 0.00019233132791844827, "loss": 12.4879, "step": 5572 }, { "epoch": 0.3034721229571243, "grad_norm": 0.6787392866295541, "learning_rate": 0.00019232794092823884, "loss": 12.5894, "step": 5573 }, { "epoch": 0.30352657695370727, "grad_norm": 0.694352921112891, "learning_rate": 0.00019232455322007184, "loss": 12.4459, "step": 5574 }, { "epoch": 0.3035810309502903, "grad_norm": 0.7222532512087805, "learning_rate": 0.00019232116479397365, "loss": 12.6226, "step": 5575 }, { "epoch": 0.3036354849468733, "grad_norm": 0.7314508042146828, "learning_rate": 0.0001923177756499706, "loss": 12.689, "step": 5576 }, { "epoch": 0.3036899389434563, "grad_norm": 0.6587607739908065, "learning_rate": 0.00019231438578808907, "loss": 12.4694, "step": 5577 }, { "epoch": 0.30374439294003935, "grad_norm": 0.7635540326543027, "learning_rate": 0.00019231099520835535, "loss": 12.5989, "step": 5578 }, { "epoch": 0.30379884693662235, "grad_norm": 0.7361430250165957, "learning_rate": 0.0001923076039107959, "loss": 12.5804, "step": 5579 }, { "epoch": 0.30385330093320534, "grad_norm": 0.7585903650131236, "learning_rate": 0.000192304211895437, "loss": 12.5181, "step": 5580 }, { "epoch": 0.3039077549297884, "grad_norm": 0.7600365444488244, "learning_rate": 0.0001923008191623051, "loss": 12.5854, "step": 5581 }, { "epoch": 0.3039622089263714, "grad_norm": 0.8182750314826289, "learning_rate": 0.00019229742571142655, "loss": 12.4639, "step": 5582 }, { "epoch": 0.30401666292295443, "grad_norm": 0.7193099733132268, "learning_rate": 0.00019229403154282773, "loss": 12.4747, "step": 5583 }, { "epoch": 0.3040711169195374, "grad_norm": 0.7270686488441531, "learning_rate": 0.00019229063665653504, "loss": 12.5602, "step": 5584 }, { "epoch": 0.3041255709161204, "grad_norm": 0.7135070121903767, "learning_rate": 0.00019228724105257487, "loss": 12.5432, "step": 5585 }, { "epoch": 0.30418002491270346, "grad_norm": 0.6997931480434294, "learning_rate": 0.00019228384473097366, "loss": 12.6373, "step": 5586 }, { "epoch": 0.30423447890928645, "grad_norm": 0.7479379443675384, "learning_rate": 0.0001922804476917578, "loss": 12.4373, "step": 5587 }, { "epoch": 0.30428893290586945, "grad_norm": 0.6981561283860745, "learning_rate": 0.0001922770499349537, "loss": 12.6439, "step": 5588 }, { "epoch": 0.3043433869024525, "grad_norm": 0.6339087812412747, "learning_rate": 0.00019227365146058775, "loss": 12.5173, "step": 5589 }, { "epoch": 0.3043978408990355, "grad_norm": 0.8609219927489593, "learning_rate": 0.00019227025226868644, "loss": 12.5421, "step": 5590 }, { "epoch": 0.3044522948956185, "grad_norm": 0.7118254434995729, "learning_rate": 0.00019226685235927617, "loss": 12.5478, "step": 5591 }, { "epoch": 0.30450674889220153, "grad_norm": 0.6794508342052569, "learning_rate": 0.0001922634517323834, "loss": 12.5299, "step": 5592 }, { "epoch": 0.3045612028887845, "grad_norm": 0.6586240853434104, "learning_rate": 0.00019226005038803452, "loss": 12.6055, "step": 5593 }, { "epoch": 0.3046156568853675, "grad_norm": 0.77236087411886, "learning_rate": 0.00019225664832625604, "loss": 12.5975, "step": 5594 }, { "epoch": 0.30467011088195056, "grad_norm": 0.6719406111328031, "learning_rate": 0.0001922532455470744, "loss": 12.5498, "step": 5595 }, { "epoch": 0.30472456487853355, "grad_norm": 0.6505332143229099, "learning_rate": 0.00019224984205051603, "loss": 12.5016, "step": 5596 }, { "epoch": 0.30477901887511655, "grad_norm": 0.6457023063792332, "learning_rate": 0.00019224643783660744, "loss": 12.5565, "step": 5597 }, { "epoch": 0.3048334728716996, "grad_norm": 0.7990481600920323, "learning_rate": 0.00019224303290537508, "loss": 12.7244, "step": 5598 }, { "epoch": 0.3048879268682826, "grad_norm": 0.6815455908401977, "learning_rate": 0.00019223962725684542, "loss": 12.5248, "step": 5599 }, { "epoch": 0.3049423808648656, "grad_norm": 0.7270449504186137, "learning_rate": 0.0001922362208910449, "loss": 12.7111, "step": 5600 }, { "epoch": 0.30499683486144863, "grad_norm": 0.7763686333017574, "learning_rate": 0.0001922328138080001, "loss": 12.7218, "step": 5601 }, { "epoch": 0.3050512888580316, "grad_norm": 0.7284964808752892, "learning_rate": 0.0001922294060077375, "loss": 12.5986, "step": 5602 }, { "epoch": 0.3051057428546146, "grad_norm": 0.6652999267288491, "learning_rate": 0.00019222599749028354, "loss": 12.6224, "step": 5603 }, { "epoch": 0.30516019685119766, "grad_norm": 0.6816881250974832, "learning_rate": 0.00019222258825566478, "loss": 12.5041, "step": 5604 }, { "epoch": 0.30521465084778066, "grad_norm": 1.0545143198680806, "learning_rate": 0.0001922191783039077, "loss": 12.4324, "step": 5605 }, { "epoch": 0.30526910484436365, "grad_norm": 0.6908851851058719, "learning_rate": 0.00019221576763503882, "loss": 12.5667, "step": 5606 }, { "epoch": 0.3053235588409467, "grad_norm": 0.7155566720462256, "learning_rate": 0.00019221235624908466, "loss": 12.4893, "step": 5607 }, { "epoch": 0.3053780128375297, "grad_norm": 0.7677589294724539, "learning_rate": 0.00019220894414607176, "loss": 12.5424, "step": 5608 }, { "epoch": 0.3054324668341127, "grad_norm": 0.6674925417573994, "learning_rate": 0.00019220553132602664, "loss": 12.4021, "step": 5609 }, { "epoch": 0.30548692083069573, "grad_norm": 0.7080962937903494, "learning_rate": 0.00019220211778897585, "loss": 12.6609, "step": 5610 }, { "epoch": 0.3055413748272787, "grad_norm": 0.6452885530540028, "learning_rate": 0.0001921987035349459, "loss": 12.4599, "step": 5611 }, { "epoch": 0.3055958288238617, "grad_norm": 0.6681628070045235, "learning_rate": 0.00019219528856396343, "loss": 12.5753, "step": 5612 }, { "epoch": 0.30565028282044476, "grad_norm": 0.6669997947535996, "learning_rate": 0.00019219187287605491, "loss": 12.4837, "step": 5613 }, { "epoch": 0.30570473681702776, "grad_norm": 0.7083059515105996, "learning_rate": 0.0001921884564712469, "loss": 12.6538, "step": 5614 }, { "epoch": 0.3057591908136108, "grad_norm": 0.642663359481727, "learning_rate": 0.00019218503934956602, "loss": 12.5575, "step": 5615 }, { "epoch": 0.3058136448101938, "grad_norm": 0.6625459280194421, "learning_rate": 0.0001921816215110388, "loss": 12.6102, "step": 5616 }, { "epoch": 0.3058680988067768, "grad_norm": 0.7182820810684242, "learning_rate": 0.00019217820295569185, "loss": 12.6836, "step": 5617 }, { "epoch": 0.30592255280335984, "grad_norm": 0.753708065496197, "learning_rate": 0.00019217478368355173, "loss": 12.557, "step": 5618 }, { "epoch": 0.30597700679994283, "grad_norm": 0.7136433532288005, "learning_rate": 0.00019217136369464503, "loss": 12.6235, "step": 5619 }, { "epoch": 0.3060314607965258, "grad_norm": 0.7244893505472123, "learning_rate": 0.00019216794298899833, "loss": 12.5793, "step": 5620 }, { "epoch": 0.30608591479310887, "grad_norm": 0.6818378516149921, "learning_rate": 0.00019216452156663828, "loss": 12.5673, "step": 5621 }, { "epoch": 0.30614036878969186, "grad_norm": 0.6811251930879814, "learning_rate": 0.00019216109942759145, "loss": 12.6225, "step": 5622 }, { "epoch": 0.30619482278627486, "grad_norm": 0.6603144888339486, "learning_rate": 0.00019215767657188444, "loss": 12.4698, "step": 5623 }, { "epoch": 0.3062492767828579, "grad_norm": 0.6823421129114194, "learning_rate": 0.00019215425299954389, "loss": 12.5283, "step": 5624 }, { "epoch": 0.3063037307794409, "grad_norm": 0.6790904544641594, "learning_rate": 0.0001921508287105964, "loss": 12.5425, "step": 5625 }, { "epoch": 0.3063581847760239, "grad_norm": 0.6295559442425349, "learning_rate": 0.00019214740370506863, "loss": 12.554, "step": 5626 }, { "epoch": 0.30641263877260694, "grad_norm": 0.6822492643876135, "learning_rate": 0.0001921439779829872, "loss": 12.5317, "step": 5627 }, { "epoch": 0.30646709276918993, "grad_norm": 0.7730070563114577, "learning_rate": 0.00019214055154437873, "loss": 12.6159, "step": 5628 }, { "epoch": 0.3065215467657729, "grad_norm": 0.865180713811351, "learning_rate": 0.00019213712438926987, "loss": 12.6358, "step": 5629 }, { "epoch": 0.30657600076235597, "grad_norm": 0.651411926195066, "learning_rate": 0.00019213369651768732, "loss": 12.5468, "step": 5630 }, { "epoch": 0.30663045475893896, "grad_norm": 0.6433491563576922, "learning_rate": 0.00019213026792965767, "loss": 12.5849, "step": 5631 }, { "epoch": 0.30668490875552196, "grad_norm": 0.7643907663694384, "learning_rate": 0.00019212683862520756, "loss": 12.7352, "step": 5632 }, { "epoch": 0.306739362752105, "grad_norm": 0.6868282637682263, "learning_rate": 0.00019212340860436377, "loss": 12.5962, "step": 5633 }, { "epoch": 0.306793816748688, "grad_norm": 0.7598560634641103, "learning_rate": 0.00019211997786715287, "loss": 12.6059, "step": 5634 }, { "epoch": 0.306848270745271, "grad_norm": 0.7288171464459738, "learning_rate": 0.0001921165464136016, "loss": 12.5483, "step": 5635 }, { "epoch": 0.30690272474185404, "grad_norm": 0.6673891343969206, "learning_rate": 0.00019211311424373662, "loss": 12.6215, "step": 5636 }, { "epoch": 0.30695717873843703, "grad_norm": 0.6744339225596823, "learning_rate": 0.00019210968135758457, "loss": 12.5486, "step": 5637 }, { "epoch": 0.30701163273502, "grad_norm": 0.7259913596649055, "learning_rate": 0.00019210624775517225, "loss": 12.5398, "step": 5638 }, { "epoch": 0.30706608673160307, "grad_norm": 0.6830246301584015, "learning_rate": 0.00019210281343652626, "loss": 12.7559, "step": 5639 }, { "epoch": 0.30712054072818606, "grad_norm": 0.6785998966172879, "learning_rate": 0.00019209937840167338, "loss": 12.4974, "step": 5640 }, { "epoch": 0.30717499472476906, "grad_norm": 0.8090827555182567, "learning_rate": 0.00019209594265064025, "loss": 12.5941, "step": 5641 }, { "epoch": 0.3072294487213521, "grad_norm": 0.7481489237592109, "learning_rate": 0.00019209250618345368, "loss": 12.5914, "step": 5642 }, { "epoch": 0.3072839027179351, "grad_norm": 0.6838197491142659, "learning_rate": 0.0001920890690001403, "loss": 12.6042, "step": 5643 }, { "epoch": 0.3073383567145181, "grad_norm": 0.7250501351581554, "learning_rate": 0.00019208563110072687, "loss": 12.512, "step": 5644 }, { "epoch": 0.30739281071110114, "grad_norm": 0.8233771978321007, "learning_rate": 0.00019208219248524014, "loss": 12.5859, "step": 5645 }, { "epoch": 0.30744726470768413, "grad_norm": 0.6777946678039877, "learning_rate": 0.0001920787531537068, "loss": 12.6482, "step": 5646 }, { "epoch": 0.3075017187042671, "grad_norm": 0.7460758660384199, "learning_rate": 0.0001920753131061537, "loss": 12.6217, "step": 5647 }, { "epoch": 0.30755617270085017, "grad_norm": 0.6870262122278785, "learning_rate": 0.0001920718723426075, "loss": 12.6333, "step": 5648 }, { "epoch": 0.30761062669743316, "grad_norm": 0.7277903111871953, "learning_rate": 0.00019206843086309498, "loss": 12.511, "step": 5649 }, { "epoch": 0.3076650806940162, "grad_norm": 0.698143994457559, "learning_rate": 0.00019206498866764288, "loss": 12.5323, "step": 5650 }, { "epoch": 0.3077195346905992, "grad_norm": 0.6871120504062017, "learning_rate": 0.00019206154575627802, "loss": 12.3899, "step": 5651 }, { "epoch": 0.3077739886871822, "grad_norm": 0.8309989003900052, "learning_rate": 0.00019205810212902713, "loss": 12.3704, "step": 5652 }, { "epoch": 0.30782844268376525, "grad_norm": 0.8425343080407433, "learning_rate": 0.00019205465778591698, "loss": 12.6307, "step": 5653 }, { "epoch": 0.30788289668034824, "grad_norm": 0.6798149257167375, "learning_rate": 0.0001920512127269744, "loss": 12.6095, "step": 5654 }, { "epoch": 0.30793735067693123, "grad_norm": 1.059836564785302, "learning_rate": 0.00019204776695222616, "loss": 12.655, "step": 5655 }, { "epoch": 0.3079918046735143, "grad_norm": 0.7725726412551481, "learning_rate": 0.00019204432046169903, "loss": 12.6174, "step": 5656 }, { "epoch": 0.3080462586700973, "grad_norm": 0.8319369769046256, "learning_rate": 0.0001920408732554198, "loss": 12.627, "step": 5657 }, { "epoch": 0.30810071266668027, "grad_norm": 0.7314135357215268, "learning_rate": 0.00019203742533341534, "loss": 12.6254, "step": 5658 }, { "epoch": 0.3081551666632633, "grad_norm": 0.761529435714816, "learning_rate": 0.00019203397669571243, "loss": 12.6114, "step": 5659 }, { "epoch": 0.3082096206598463, "grad_norm": 0.8174963212126977, "learning_rate": 0.00019203052734233786, "loss": 12.6282, "step": 5660 }, { "epoch": 0.3082640746564293, "grad_norm": 0.6595276890115349, "learning_rate": 0.0001920270772733185, "loss": 12.5181, "step": 5661 }, { "epoch": 0.30831852865301235, "grad_norm": 0.7448298217772923, "learning_rate": 0.00019202362648868112, "loss": 12.5468, "step": 5662 }, { "epoch": 0.30837298264959534, "grad_norm": 0.7681547956272415, "learning_rate": 0.00019202017498845265, "loss": 12.5377, "step": 5663 }, { "epoch": 0.30842743664617833, "grad_norm": 0.6643886990380933, "learning_rate": 0.00019201672277265982, "loss": 12.6046, "step": 5664 }, { "epoch": 0.3084818906427614, "grad_norm": 0.792858194541144, "learning_rate": 0.00019201326984132953, "loss": 12.5158, "step": 5665 }, { "epoch": 0.3085363446393444, "grad_norm": 0.7871328263741095, "learning_rate": 0.00019200981619448863, "loss": 12.6348, "step": 5666 }, { "epoch": 0.30859079863592737, "grad_norm": 0.6989003752746422, "learning_rate": 0.00019200636183216397, "loss": 12.5612, "step": 5667 }, { "epoch": 0.3086452526325104, "grad_norm": 0.840356018473379, "learning_rate": 0.0001920029067543824, "loss": 12.5499, "step": 5668 }, { "epoch": 0.3086997066290934, "grad_norm": 0.6120244188572662, "learning_rate": 0.0001919994509611708, "loss": 12.5905, "step": 5669 }, { "epoch": 0.3087541606256764, "grad_norm": 0.7276319280590692, "learning_rate": 0.00019199599445255606, "loss": 12.5964, "step": 5670 }, { "epoch": 0.30880861462225945, "grad_norm": 0.7024582912295813, "learning_rate": 0.000191992537228565, "loss": 12.5345, "step": 5671 }, { "epoch": 0.30886306861884244, "grad_norm": 0.6446613012579606, "learning_rate": 0.00019198907928922457, "loss": 12.5057, "step": 5672 }, { "epoch": 0.30891752261542543, "grad_norm": 0.7342596906337274, "learning_rate": 0.00019198562063456163, "loss": 12.5339, "step": 5673 }, { "epoch": 0.3089719766120085, "grad_norm": 0.6693775956414059, "learning_rate": 0.00019198216126460306, "loss": 12.497, "step": 5674 }, { "epoch": 0.3090264306085915, "grad_norm": 0.654154004134321, "learning_rate": 0.0001919787011793758, "loss": 12.5619, "step": 5675 }, { "epoch": 0.30908088460517447, "grad_norm": 0.6961529048076709, "learning_rate": 0.00019197524037890674, "loss": 12.5442, "step": 5676 }, { "epoch": 0.3091353386017575, "grad_norm": 0.6364479952408855, "learning_rate": 0.0001919717788632228, "loss": 12.4765, "step": 5677 }, { "epoch": 0.3091897925983405, "grad_norm": 0.7031647693885875, "learning_rate": 0.00019196831663235083, "loss": 12.496, "step": 5678 }, { "epoch": 0.3092442465949235, "grad_norm": 0.6375027088057562, "learning_rate": 0.00019196485368631785, "loss": 12.4551, "step": 5679 }, { "epoch": 0.30929870059150655, "grad_norm": 0.6646869200935568, "learning_rate": 0.00019196139002515073, "loss": 12.6098, "step": 5680 }, { "epoch": 0.30935315458808954, "grad_norm": 0.7643407814102523, "learning_rate": 0.00019195792564887643, "loss": 12.7625, "step": 5681 }, { "epoch": 0.3094076085846726, "grad_norm": 0.6965512867985716, "learning_rate": 0.00019195446055752187, "loss": 12.6885, "step": 5682 }, { "epoch": 0.3094620625812556, "grad_norm": 0.7183214873245715, "learning_rate": 0.000191950994751114, "loss": 12.671, "step": 5683 }, { "epoch": 0.3095165165778386, "grad_norm": 0.7341201497898837, "learning_rate": 0.0001919475282296798, "loss": 12.5902, "step": 5684 }, { "epoch": 0.3095709705744216, "grad_norm": 0.7231880725512029, "learning_rate": 0.00019194406099324614, "loss": 12.7504, "step": 5685 }, { "epoch": 0.3096254245710046, "grad_norm": 0.8481118115905432, "learning_rate": 0.0001919405930418401, "loss": 12.5931, "step": 5686 }, { "epoch": 0.3096798785675876, "grad_norm": 0.60463448001354, "learning_rate": 0.00019193712437548858, "loss": 12.5207, "step": 5687 }, { "epoch": 0.30973433256417066, "grad_norm": 0.8003196827589169, "learning_rate": 0.00019193365499421857, "loss": 12.5607, "step": 5688 }, { "epoch": 0.30978878656075365, "grad_norm": 0.6569732989360393, "learning_rate": 0.000191930184898057, "loss": 12.5348, "step": 5689 }, { "epoch": 0.30984324055733664, "grad_norm": 0.6507547624096993, "learning_rate": 0.00019192671408703094, "loss": 12.5218, "step": 5690 }, { "epoch": 0.3098976945539197, "grad_norm": 0.8016253940452907, "learning_rate": 0.00019192324256116732, "loss": 12.6849, "step": 5691 }, { "epoch": 0.3099521485505027, "grad_norm": 0.6636968700459538, "learning_rate": 0.00019191977032049313, "loss": 12.4833, "step": 5692 }, { "epoch": 0.3100066025470857, "grad_norm": 0.7364071722048001, "learning_rate": 0.00019191629736503544, "loss": 12.5185, "step": 5693 }, { "epoch": 0.3100610565436687, "grad_norm": 0.7051502639556526, "learning_rate": 0.00019191282369482115, "loss": 12.6904, "step": 5694 }, { "epoch": 0.3101155105402517, "grad_norm": 0.6651267585232153, "learning_rate": 0.00019190934930987736, "loss": 12.6454, "step": 5695 }, { "epoch": 0.3101699645368347, "grad_norm": 0.6720576053944531, "learning_rate": 0.00019190587421023106, "loss": 12.6355, "step": 5696 }, { "epoch": 0.31022441853341776, "grad_norm": 0.7403081220658613, "learning_rate": 0.00019190239839590926, "loss": 12.6477, "step": 5697 }, { "epoch": 0.31027887253000075, "grad_norm": 0.6856887471881359, "learning_rate": 0.000191898921866939, "loss": 12.5374, "step": 5698 }, { "epoch": 0.31033332652658374, "grad_norm": 0.7849792672826814, "learning_rate": 0.00019189544462334731, "loss": 12.5885, "step": 5699 }, { "epoch": 0.3103877805231668, "grad_norm": 0.6940421079361554, "learning_rate": 0.00019189196666516124, "loss": 12.5049, "step": 5700 }, { "epoch": 0.3104422345197498, "grad_norm": 0.7815347783955346, "learning_rate": 0.00019188848799240782, "loss": 12.6357, "step": 5701 }, { "epoch": 0.3104966885163328, "grad_norm": 0.7871432775569294, "learning_rate": 0.00019188500860511408, "loss": 12.5207, "step": 5702 }, { "epoch": 0.3105511425129158, "grad_norm": 0.729806048478542, "learning_rate": 0.00019188152850330717, "loss": 12.6383, "step": 5703 }, { "epoch": 0.3106055965094988, "grad_norm": 0.6487578342015209, "learning_rate": 0.00019187804768701404, "loss": 12.4904, "step": 5704 }, { "epoch": 0.3106600505060818, "grad_norm": 0.6137037639031049, "learning_rate": 0.0001918745661562618, "loss": 12.3875, "step": 5705 }, { "epoch": 0.31071450450266486, "grad_norm": 0.6880702484472031, "learning_rate": 0.00019187108391107756, "loss": 12.5841, "step": 5706 }, { "epoch": 0.31076895849924785, "grad_norm": 0.7192930692423495, "learning_rate": 0.00019186760095148833, "loss": 12.5358, "step": 5707 }, { "epoch": 0.31082341249583084, "grad_norm": 0.6777797822609798, "learning_rate": 0.00019186411727752125, "loss": 12.6532, "step": 5708 }, { "epoch": 0.3108778664924139, "grad_norm": 0.6988579087077, "learning_rate": 0.00019186063288920336, "loss": 12.4749, "step": 5709 }, { "epoch": 0.3109323204889969, "grad_norm": 0.8106200711091491, "learning_rate": 0.00019185714778656183, "loss": 12.7109, "step": 5710 }, { "epoch": 0.3109867744855799, "grad_norm": 0.7307713134379897, "learning_rate": 0.00019185366196962367, "loss": 12.5756, "step": 5711 }, { "epoch": 0.3110412284821629, "grad_norm": 0.7003160909230988, "learning_rate": 0.00019185017543841605, "loss": 12.5781, "step": 5712 }, { "epoch": 0.3110956824787459, "grad_norm": 0.7035305537630274, "learning_rate": 0.00019184668819296604, "loss": 12.592, "step": 5713 }, { "epoch": 0.3111501364753289, "grad_norm": 0.6408295726992614, "learning_rate": 0.00019184320023330083, "loss": 12.4214, "step": 5714 }, { "epoch": 0.31120459047191196, "grad_norm": 0.6757175305885018, "learning_rate": 0.00019183971155944748, "loss": 12.5193, "step": 5715 }, { "epoch": 0.31125904446849495, "grad_norm": 0.6714370645069598, "learning_rate": 0.0001918362221714331, "loss": 12.5045, "step": 5716 }, { "epoch": 0.311313498465078, "grad_norm": 0.763641142450625, "learning_rate": 0.00019183273206928487, "loss": 12.466, "step": 5717 }, { "epoch": 0.311367952461661, "grad_norm": 0.6373620796361511, "learning_rate": 0.0001918292412530299, "loss": 12.4351, "step": 5718 }, { "epoch": 0.311422406458244, "grad_norm": 0.773134594117127, "learning_rate": 0.00019182574972269537, "loss": 12.7381, "step": 5719 }, { "epoch": 0.31147686045482703, "grad_norm": 0.6326104987283147, "learning_rate": 0.0001918222574783084, "loss": 12.5616, "step": 5720 }, { "epoch": 0.31153131445141, "grad_norm": 0.7155966148875553, "learning_rate": 0.0001918187645198962, "loss": 12.6321, "step": 5721 }, { "epoch": 0.311585768447993, "grad_norm": 0.7879595861603473, "learning_rate": 0.00019181527084748582, "loss": 12.5777, "step": 5722 }, { "epoch": 0.31164022244457606, "grad_norm": 0.6353999496292784, "learning_rate": 0.00019181177646110454, "loss": 12.6064, "step": 5723 }, { "epoch": 0.31169467644115906, "grad_norm": 0.7480236445199707, "learning_rate": 0.00019180828136077947, "loss": 12.5245, "step": 5724 }, { "epoch": 0.31174913043774205, "grad_norm": 0.7011891796684117, "learning_rate": 0.00019180478554653782, "loss": 12.6022, "step": 5725 }, { "epoch": 0.3118035844343251, "grad_norm": 0.7226576397297431, "learning_rate": 0.00019180128901840677, "loss": 12.461, "step": 5726 }, { "epoch": 0.3118580384309081, "grad_norm": 0.8105551964611454, "learning_rate": 0.0001917977917764135, "loss": 12.5971, "step": 5727 }, { "epoch": 0.3119124924274911, "grad_norm": 0.7105623987225386, "learning_rate": 0.00019179429382058517, "loss": 12.5568, "step": 5728 }, { "epoch": 0.31196694642407413, "grad_norm": 0.7423286623670033, "learning_rate": 0.00019179079515094908, "loss": 12.5355, "step": 5729 }, { "epoch": 0.3120214004206571, "grad_norm": 0.6921990475242991, "learning_rate": 0.0001917872957675323, "loss": 12.5362, "step": 5730 }, { "epoch": 0.3120758544172401, "grad_norm": 0.6326208002108714, "learning_rate": 0.00019178379567036217, "loss": 12.4887, "step": 5731 }, { "epoch": 0.31213030841382317, "grad_norm": 0.8282457364681612, "learning_rate": 0.00019178029485946585, "loss": 12.6543, "step": 5732 }, { "epoch": 0.31218476241040616, "grad_norm": 0.6377328782854843, "learning_rate": 0.00019177679333487056, "loss": 12.5946, "step": 5733 }, { "epoch": 0.31223921640698915, "grad_norm": 0.6920475291113465, "learning_rate": 0.0001917732910966035, "loss": 12.5284, "step": 5734 }, { "epoch": 0.3122936704035722, "grad_norm": 0.7101041584136139, "learning_rate": 0.00019176978814469198, "loss": 12.5032, "step": 5735 }, { "epoch": 0.3123481244001552, "grad_norm": 0.6111176119060407, "learning_rate": 0.0001917662844791632, "loss": 12.5313, "step": 5736 }, { "epoch": 0.3124025783967382, "grad_norm": 0.6681293165538871, "learning_rate": 0.00019176278010004435, "loss": 12.5214, "step": 5737 }, { "epoch": 0.31245703239332123, "grad_norm": 0.7689966914982728, "learning_rate": 0.00019175927500736278, "loss": 12.5465, "step": 5738 }, { "epoch": 0.3125114863899042, "grad_norm": 0.6566846990967126, "learning_rate": 0.00019175576920114567, "loss": 12.4811, "step": 5739 }, { "epoch": 0.3125659403864872, "grad_norm": 0.8515807575793801, "learning_rate": 0.00019175226268142032, "loss": 12.7253, "step": 5740 }, { "epoch": 0.31262039438307027, "grad_norm": 0.8194373785790895, "learning_rate": 0.00019174875544821402, "loss": 12.5978, "step": 5741 }, { "epoch": 0.31267484837965326, "grad_norm": 0.5959364512025092, "learning_rate": 0.00019174524750155398, "loss": 12.4635, "step": 5742 }, { "epoch": 0.31272930237623625, "grad_norm": 0.7182012662497026, "learning_rate": 0.00019174173884146752, "loss": 12.6004, "step": 5743 }, { "epoch": 0.3127837563728193, "grad_norm": 0.6191882074588925, "learning_rate": 0.00019173822946798191, "loss": 12.5388, "step": 5744 }, { "epoch": 0.3128382103694023, "grad_norm": 0.7284139974989752, "learning_rate": 0.00019173471938112443, "loss": 12.5659, "step": 5745 }, { "epoch": 0.3128926643659853, "grad_norm": 0.7308799859614893, "learning_rate": 0.00019173120858092242, "loss": 12.5344, "step": 5746 }, { "epoch": 0.31294711836256833, "grad_norm": 0.7161742868249728, "learning_rate": 0.0001917276970674031, "loss": 12.6181, "step": 5747 }, { "epoch": 0.3130015723591513, "grad_norm": 0.6887630271894791, "learning_rate": 0.0001917241848405939, "loss": 12.5654, "step": 5748 }, { "epoch": 0.3130560263557344, "grad_norm": 0.7334236423854301, "learning_rate": 0.000191720671900522, "loss": 12.6554, "step": 5749 }, { "epoch": 0.31311048035231737, "grad_norm": 0.7280109583218388, "learning_rate": 0.00019171715824721478, "loss": 12.6293, "step": 5750 }, { "epoch": 0.31316493434890036, "grad_norm": 0.7742369837794514, "learning_rate": 0.00019171364388069958, "loss": 12.537, "step": 5751 }, { "epoch": 0.3132193883454834, "grad_norm": 0.6332978382946135, "learning_rate": 0.0001917101288010037, "loss": 12.5003, "step": 5752 }, { "epoch": 0.3132738423420664, "grad_norm": 0.8109885888601659, "learning_rate": 0.00019170661300815445, "loss": 12.5463, "step": 5753 }, { "epoch": 0.3133282963386494, "grad_norm": 0.6776696760063646, "learning_rate": 0.0001917030965021792, "loss": 12.4903, "step": 5754 }, { "epoch": 0.31338275033523244, "grad_norm": 0.7367968361944557, "learning_rate": 0.00019169957928310533, "loss": 12.5046, "step": 5755 }, { "epoch": 0.31343720433181543, "grad_norm": 0.6710299186422611, "learning_rate": 0.0001916960613509601, "loss": 12.5471, "step": 5756 }, { "epoch": 0.3134916583283984, "grad_norm": 0.8285856812284736, "learning_rate": 0.00019169254270577098, "loss": 12.6954, "step": 5757 }, { "epoch": 0.3135461123249815, "grad_norm": 0.6826103991424295, "learning_rate": 0.00019168902334756524, "loss": 12.3569, "step": 5758 }, { "epoch": 0.31360056632156447, "grad_norm": 0.7371646823095679, "learning_rate": 0.0001916855032763703, "loss": 12.5832, "step": 5759 }, { "epoch": 0.31365502031814746, "grad_norm": 0.6338402069507585, "learning_rate": 0.00019168198249221348, "loss": 12.5793, "step": 5760 }, { "epoch": 0.3137094743147305, "grad_norm": 0.6892790788000476, "learning_rate": 0.00019167846099512218, "loss": 12.5516, "step": 5761 }, { "epoch": 0.3137639283113135, "grad_norm": 0.685422960143123, "learning_rate": 0.00019167493878512382, "loss": 12.6479, "step": 5762 }, { "epoch": 0.3138183823078965, "grad_norm": 0.6800269200015937, "learning_rate": 0.00019167141586224576, "loss": 12.5571, "step": 5763 }, { "epoch": 0.31387283630447954, "grad_norm": 0.8151275310663422, "learning_rate": 0.00019166789222651537, "loss": 12.7188, "step": 5764 }, { "epoch": 0.31392729030106253, "grad_norm": 0.69512271148721, "learning_rate": 0.0001916643678779601, "loss": 12.6066, "step": 5765 }, { "epoch": 0.3139817442976455, "grad_norm": 0.8421123782899708, "learning_rate": 0.00019166084281660735, "loss": 12.5477, "step": 5766 }, { "epoch": 0.3140361982942286, "grad_norm": 0.7250909770675339, "learning_rate": 0.0001916573170424845, "loss": 12.6669, "step": 5767 }, { "epoch": 0.31409065229081157, "grad_norm": 0.7765303098532939, "learning_rate": 0.00019165379055561895, "loss": 12.5521, "step": 5768 }, { "epoch": 0.31414510628739456, "grad_norm": 0.757240010661953, "learning_rate": 0.0001916502633560382, "loss": 12.515, "step": 5769 }, { "epoch": 0.3141995602839776, "grad_norm": 0.7924596813156907, "learning_rate": 0.00019164673544376962, "loss": 12.6334, "step": 5770 }, { "epoch": 0.3142540142805606, "grad_norm": 0.6340821796162117, "learning_rate": 0.00019164320681884064, "loss": 12.5287, "step": 5771 }, { "epoch": 0.3143084682771436, "grad_norm": 0.6938443956294725, "learning_rate": 0.00019163967748127874, "loss": 12.591, "step": 5772 }, { "epoch": 0.31436292227372664, "grad_norm": 0.8052442207081102, "learning_rate": 0.00019163614743111134, "loss": 12.6676, "step": 5773 }, { "epoch": 0.31441737627030963, "grad_norm": 0.6897969950775128, "learning_rate": 0.00019163261666836588, "loss": 12.5717, "step": 5774 }, { "epoch": 0.3144718302668926, "grad_norm": 0.7234430598331005, "learning_rate": 0.00019162908519306982, "loss": 12.7961, "step": 5775 }, { "epoch": 0.3145262842634757, "grad_norm": 0.7182030630629905, "learning_rate": 0.00019162555300525062, "loss": 12.6843, "step": 5776 }, { "epoch": 0.31458073826005867, "grad_norm": 0.7212326014240544, "learning_rate": 0.00019162202010493577, "loss": 12.5077, "step": 5777 }, { "epoch": 0.31463519225664166, "grad_norm": 0.7180839647466242, "learning_rate": 0.00019161848649215272, "loss": 12.6407, "step": 5778 }, { "epoch": 0.3146896462532247, "grad_norm": 0.6995245318344423, "learning_rate": 0.00019161495216692896, "loss": 12.594, "step": 5779 }, { "epoch": 0.3147441002498077, "grad_norm": 0.7030108004030603, "learning_rate": 0.00019161141712929197, "loss": 12.487, "step": 5780 }, { "epoch": 0.3147985542463907, "grad_norm": 0.7292919566984026, "learning_rate": 0.0001916078813792692, "loss": 12.585, "step": 5781 }, { "epoch": 0.31485300824297374, "grad_norm": 0.759506562847832, "learning_rate": 0.00019160434491688824, "loss": 12.4386, "step": 5782 }, { "epoch": 0.31490746223955673, "grad_norm": 0.9639081006195463, "learning_rate": 0.00019160080774217647, "loss": 12.6828, "step": 5783 }, { "epoch": 0.3149619162361398, "grad_norm": 0.7434314130278653, "learning_rate": 0.00019159726985516152, "loss": 12.4645, "step": 5784 }, { "epoch": 0.3150163702327228, "grad_norm": 0.6400421001900726, "learning_rate": 0.00019159373125587082, "loss": 12.3502, "step": 5785 }, { "epoch": 0.31507082422930577, "grad_norm": 0.6867545808438131, "learning_rate": 0.00019159019194433188, "loss": 12.5432, "step": 5786 }, { "epoch": 0.3151252782258888, "grad_norm": 0.7896890316318025, "learning_rate": 0.00019158665192057229, "loss": 12.5461, "step": 5787 }, { "epoch": 0.3151797322224718, "grad_norm": 0.6951193183704759, "learning_rate": 0.00019158311118461948, "loss": 12.6574, "step": 5788 }, { "epoch": 0.3152341862190548, "grad_norm": 0.7847001380500844, "learning_rate": 0.00019157956973650108, "loss": 12.6444, "step": 5789 }, { "epoch": 0.31528864021563785, "grad_norm": 0.7231280042579047, "learning_rate": 0.0001915760275762446, "loss": 12.5887, "step": 5790 }, { "epoch": 0.31534309421222084, "grad_norm": 0.710818117137646, "learning_rate": 0.00019157248470387753, "loss": 12.3551, "step": 5791 }, { "epoch": 0.31539754820880384, "grad_norm": 0.8236673096850398, "learning_rate": 0.00019156894111942746, "loss": 12.6124, "step": 5792 }, { "epoch": 0.3154520022053869, "grad_norm": 0.6464910311423513, "learning_rate": 0.00019156539682292197, "loss": 12.5541, "step": 5793 }, { "epoch": 0.3155064562019699, "grad_norm": 0.8011589161346442, "learning_rate": 0.00019156185181438861, "loss": 12.5346, "step": 5794 }, { "epoch": 0.31556091019855287, "grad_norm": 0.7003550705119012, "learning_rate": 0.00019155830609385492, "loss": 12.5535, "step": 5795 }, { "epoch": 0.3156153641951359, "grad_norm": 0.6682420425845063, "learning_rate": 0.0001915547596613485, "loss": 12.551, "step": 5796 }, { "epoch": 0.3156698181917189, "grad_norm": 0.6748930099795875, "learning_rate": 0.00019155121251689689, "loss": 12.5044, "step": 5797 }, { "epoch": 0.3157242721883019, "grad_norm": 0.650553657834858, "learning_rate": 0.00019154766466052773, "loss": 12.5407, "step": 5798 }, { "epoch": 0.31577872618488495, "grad_norm": 0.702942885517404, "learning_rate": 0.00019154411609226854, "loss": 12.4614, "step": 5799 }, { "epoch": 0.31583318018146794, "grad_norm": 0.6481075120474772, "learning_rate": 0.000191540566812147, "loss": 12.535, "step": 5800 }, { "epoch": 0.31588763417805094, "grad_norm": 1.0794965868639268, "learning_rate": 0.00019153701682019062, "loss": 12.4537, "step": 5801 }, { "epoch": 0.315942088174634, "grad_norm": 0.7350571771371223, "learning_rate": 0.00019153346611642706, "loss": 12.5537, "step": 5802 }, { "epoch": 0.315996542171217, "grad_norm": 0.6826378435926155, "learning_rate": 0.0001915299147008839, "loss": 12.632, "step": 5803 }, { "epoch": 0.31605099616779997, "grad_norm": 0.731159866122895, "learning_rate": 0.0001915263625735888, "loss": 12.564, "step": 5804 }, { "epoch": 0.316105450164383, "grad_norm": 0.7153238353345873, "learning_rate": 0.00019152280973456934, "loss": 12.6477, "step": 5805 }, { "epoch": 0.316159904160966, "grad_norm": 0.7530356054837524, "learning_rate": 0.0001915192561838532, "loss": 12.6045, "step": 5806 }, { "epoch": 0.316214358157549, "grad_norm": 0.7768597439292041, "learning_rate": 0.00019151570192146793, "loss": 12.6109, "step": 5807 }, { "epoch": 0.31626881215413205, "grad_norm": 0.7119692150042135, "learning_rate": 0.00019151214694744124, "loss": 12.5061, "step": 5808 }, { "epoch": 0.31632326615071504, "grad_norm": 0.6405228772522945, "learning_rate": 0.00019150859126180073, "loss": 12.6042, "step": 5809 }, { "epoch": 0.31637772014729804, "grad_norm": 0.7223526465464335, "learning_rate": 0.00019150503486457408, "loss": 12.5468, "step": 5810 }, { "epoch": 0.3164321741438811, "grad_norm": 0.783686761881016, "learning_rate": 0.00019150147775578893, "loss": 12.5976, "step": 5811 }, { "epoch": 0.3164866281404641, "grad_norm": 0.6661836937905445, "learning_rate": 0.00019149791993547296, "loss": 12.4926, "step": 5812 }, { "epoch": 0.31654108213704707, "grad_norm": 0.7332835327296134, "learning_rate": 0.00019149436140365378, "loss": 12.6416, "step": 5813 }, { "epoch": 0.3165955361336301, "grad_norm": 0.689079313091683, "learning_rate": 0.00019149080216035916, "loss": 12.521, "step": 5814 }, { "epoch": 0.3166499901302131, "grad_norm": 0.7952304139427612, "learning_rate": 0.00019148724220561665, "loss": 12.6607, "step": 5815 }, { "epoch": 0.31670444412679616, "grad_norm": 0.6523041678617995, "learning_rate": 0.00019148368153945407, "loss": 12.491, "step": 5816 }, { "epoch": 0.31675889812337915, "grad_norm": 0.6227683054782411, "learning_rate": 0.000191480120161899, "loss": 12.5964, "step": 5817 }, { "epoch": 0.31681335211996214, "grad_norm": 0.6687279981265427, "learning_rate": 0.00019147655807297918, "loss": 12.6218, "step": 5818 }, { "epoch": 0.3168678061165452, "grad_norm": 0.6862606107733062, "learning_rate": 0.0001914729952727223, "loss": 12.5118, "step": 5819 }, { "epoch": 0.3169222601131282, "grad_norm": 0.7142573441962577, "learning_rate": 0.0001914694317611561, "loss": 12.5311, "step": 5820 }, { "epoch": 0.3169767141097112, "grad_norm": 0.6552484972372591, "learning_rate": 0.0001914658675383082, "loss": 12.5544, "step": 5821 }, { "epoch": 0.3170311681062942, "grad_norm": 0.9383767153400039, "learning_rate": 0.00019146230260420644, "loss": 12.6369, "step": 5822 }, { "epoch": 0.3170856221028772, "grad_norm": 0.7370541874647787, "learning_rate": 0.00019145873695887843, "loss": 12.639, "step": 5823 }, { "epoch": 0.3171400760994602, "grad_norm": 0.7141483693702555, "learning_rate": 0.00019145517060235195, "loss": 12.6066, "step": 5824 }, { "epoch": 0.31719453009604326, "grad_norm": 0.7333381815447372, "learning_rate": 0.00019145160353465474, "loss": 12.5564, "step": 5825 }, { "epoch": 0.31724898409262625, "grad_norm": 0.6183938441937711, "learning_rate": 0.00019144803575581453, "loss": 12.5607, "step": 5826 }, { "epoch": 0.31730343808920924, "grad_norm": 0.6892142292238302, "learning_rate": 0.00019144446726585904, "loss": 12.5779, "step": 5827 }, { "epoch": 0.3173578920857923, "grad_norm": 0.6630403431973069, "learning_rate": 0.00019144089806481606, "loss": 12.5383, "step": 5828 }, { "epoch": 0.3174123460823753, "grad_norm": 0.6879898272065529, "learning_rate": 0.0001914373281527133, "loss": 12.6813, "step": 5829 }, { "epoch": 0.3174668000789583, "grad_norm": 0.9279147458061492, "learning_rate": 0.00019143375752957856, "loss": 12.5834, "step": 5830 }, { "epoch": 0.3175212540755413, "grad_norm": 0.8261607137141636, "learning_rate": 0.0001914301861954396, "loss": 12.6757, "step": 5831 }, { "epoch": 0.3175757080721243, "grad_norm": 0.7070172220832147, "learning_rate": 0.00019142661415032415, "loss": 12.5782, "step": 5832 }, { "epoch": 0.3176301620687073, "grad_norm": 0.7416069134775134, "learning_rate": 0.00019142304139426, "loss": 12.4508, "step": 5833 }, { "epoch": 0.31768461606529036, "grad_norm": 0.6759809445170305, "learning_rate": 0.000191419467927275, "loss": 12.4728, "step": 5834 }, { "epoch": 0.31773907006187335, "grad_norm": 0.7479228483812418, "learning_rate": 0.00019141589374939685, "loss": 12.5918, "step": 5835 }, { "epoch": 0.31779352405845634, "grad_norm": 0.7018073897109439, "learning_rate": 0.0001914123188606534, "loss": 12.6481, "step": 5836 }, { "epoch": 0.3178479780550394, "grad_norm": 0.6509538967378394, "learning_rate": 0.0001914087432610724, "loss": 12.6216, "step": 5837 }, { "epoch": 0.3179024320516224, "grad_norm": 0.6956653775989627, "learning_rate": 0.0001914051669506817, "loss": 12.4914, "step": 5838 }, { "epoch": 0.3179568860482054, "grad_norm": 0.6488704800793494, "learning_rate": 0.0001914015899295091, "loss": 12.4812, "step": 5839 }, { "epoch": 0.3180113400447884, "grad_norm": 0.6687389598321076, "learning_rate": 0.00019139801219758242, "loss": 12.5834, "step": 5840 }, { "epoch": 0.3180657940413714, "grad_norm": 0.67578294702017, "learning_rate": 0.00019139443375492944, "loss": 12.5554, "step": 5841 }, { "epoch": 0.3181202480379544, "grad_norm": 0.6408985153317925, "learning_rate": 0.00019139085460157803, "loss": 12.5064, "step": 5842 }, { "epoch": 0.31817470203453746, "grad_norm": 0.6129408061020057, "learning_rate": 0.00019138727473755603, "loss": 12.5581, "step": 5843 }, { "epoch": 0.31822915603112045, "grad_norm": 0.7117051176119991, "learning_rate": 0.00019138369416289122, "loss": 12.6514, "step": 5844 }, { "epoch": 0.31828361002770345, "grad_norm": 0.6685305734872339, "learning_rate": 0.0001913801128776115, "loss": 12.6237, "step": 5845 }, { "epoch": 0.3183380640242865, "grad_norm": 0.7935940982999596, "learning_rate": 0.0001913765308817447, "loss": 12.6416, "step": 5846 }, { "epoch": 0.3183925180208695, "grad_norm": 0.6777446131367032, "learning_rate": 0.00019137294817531863, "loss": 12.4684, "step": 5847 }, { "epoch": 0.3184469720174525, "grad_norm": 0.6927049733411546, "learning_rate": 0.00019136936475836126, "loss": 12.4775, "step": 5848 }, { "epoch": 0.3185014260140355, "grad_norm": 0.6803616170636894, "learning_rate": 0.00019136578063090034, "loss": 12.4689, "step": 5849 }, { "epoch": 0.3185558800106185, "grad_norm": 0.6612387529143992, "learning_rate": 0.0001913621957929638, "loss": 12.61, "step": 5850 }, { "epoch": 0.31861033400720157, "grad_norm": 0.6785997938224911, "learning_rate": 0.0001913586102445795, "loss": 12.6917, "step": 5851 }, { "epoch": 0.31866478800378456, "grad_norm": 0.6857052369825005, "learning_rate": 0.00019135502398577532, "loss": 12.6363, "step": 5852 }, { "epoch": 0.31871924200036755, "grad_norm": 0.6627549169132189, "learning_rate": 0.00019135143701657915, "loss": 12.5624, "step": 5853 }, { "epoch": 0.3187736959969506, "grad_norm": 0.6077596656319667, "learning_rate": 0.00019134784933701892, "loss": 12.4125, "step": 5854 }, { "epoch": 0.3188281499935336, "grad_norm": 0.7000816467705235, "learning_rate": 0.00019134426094712245, "loss": 12.636, "step": 5855 }, { "epoch": 0.3188826039901166, "grad_norm": 0.6772191962377853, "learning_rate": 0.00019134067184691772, "loss": 12.6985, "step": 5856 }, { "epoch": 0.31893705798669963, "grad_norm": 0.7770632506722829, "learning_rate": 0.0001913370820364326, "loss": 12.5842, "step": 5857 }, { "epoch": 0.3189915119832826, "grad_norm": 0.7579582324057819, "learning_rate": 0.00019133349151569503, "loss": 12.4904, "step": 5858 }, { "epoch": 0.3190459659798656, "grad_norm": 0.7548108905123265, "learning_rate": 0.0001913299002847329, "loss": 12.5497, "step": 5859 }, { "epoch": 0.31910041997644867, "grad_norm": 0.6842505571032034, "learning_rate": 0.00019132630834357413, "loss": 12.5264, "step": 5860 }, { "epoch": 0.31915487397303166, "grad_norm": 0.7389234910801799, "learning_rate": 0.0001913227156922467, "loss": 12.5738, "step": 5861 }, { "epoch": 0.31920932796961465, "grad_norm": 0.6907600700325771, "learning_rate": 0.0001913191223307785, "loss": 12.5408, "step": 5862 }, { "epoch": 0.3192637819661977, "grad_norm": 0.6885413039328351, "learning_rate": 0.0001913155282591975, "loss": 12.4737, "step": 5863 }, { "epoch": 0.3193182359627807, "grad_norm": 0.7449988965819492, "learning_rate": 0.00019131193347753163, "loss": 12.6084, "step": 5864 }, { "epoch": 0.3193726899593637, "grad_norm": 0.8251864478061979, "learning_rate": 0.00019130833798580886, "loss": 12.5428, "step": 5865 }, { "epoch": 0.31942714395594674, "grad_norm": 0.6841118849731743, "learning_rate": 0.00019130474178405714, "loss": 12.641, "step": 5866 }, { "epoch": 0.31948159795252973, "grad_norm": 0.7122261775745119, "learning_rate": 0.00019130114487230442, "loss": 12.5943, "step": 5867 }, { "epoch": 0.3195360519491127, "grad_norm": 0.6901954748558911, "learning_rate": 0.0001912975472505787, "loss": 12.5068, "step": 5868 }, { "epoch": 0.31959050594569577, "grad_norm": 0.7314037157651961, "learning_rate": 0.00019129394891890793, "loss": 12.4722, "step": 5869 }, { "epoch": 0.31964495994227876, "grad_norm": 0.7117573572189002, "learning_rate": 0.00019129034987732012, "loss": 12.578, "step": 5870 }, { "epoch": 0.31969941393886175, "grad_norm": 0.7010924063861873, "learning_rate": 0.00019128675012584326, "loss": 12.6654, "step": 5871 }, { "epoch": 0.3197538679354448, "grad_norm": 0.6064541420225625, "learning_rate": 0.00019128314966450528, "loss": 12.55, "step": 5872 }, { "epoch": 0.3198083219320278, "grad_norm": 0.6321466814864312, "learning_rate": 0.00019127954849333423, "loss": 12.5765, "step": 5873 }, { "epoch": 0.3198627759286108, "grad_norm": 0.6947632669120551, "learning_rate": 0.0001912759466123581, "loss": 12.514, "step": 5874 }, { "epoch": 0.31991722992519384, "grad_norm": 0.6828212967587132, "learning_rate": 0.0001912723440216049, "loss": 12.5452, "step": 5875 }, { "epoch": 0.31997168392177683, "grad_norm": 0.7090710584294178, "learning_rate": 0.00019126874072110267, "loss": 12.5601, "step": 5876 }, { "epoch": 0.3200261379183598, "grad_norm": 0.641093483322115, "learning_rate": 0.0001912651367108794, "loss": 12.5624, "step": 5877 }, { "epoch": 0.32008059191494287, "grad_norm": 0.6538327455468403, "learning_rate": 0.0001912615319909631, "loss": 12.5216, "step": 5878 }, { "epoch": 0.32013504591152586, "grad_norm": 0.6449607621299951, "learning_rate": 0.00019125792656138186, "loss": 12.5711, "step": 5879 }, { "epoch": 0.32018949990810885, "grad_norm": 0.7118519253223596, "learning_rate": 0.00019125432042216365, "loss": 12.5681, "step": 5880 }, { "epoch": 0.3202439539046919, "grad_norm": 0.6947313023045424, "learning_rate": 0.00019125071357333658, "loss": 12.5832, "step": 5881 }, { "epoch": 0.3202984079012749, "grad_norm": 0.719459911799742, "learning_rate": 0.0001912471060149286, "loss": 12.5015, "step": 5882 }, { "epoch": 0.32035286189785794, "grad_norm": 0.662669731033275, "learning_rate": 0.00019124349774696787, "loss": 12.5893, "step": 5883 }, { "epoch": 0.32040731589444094, "grad_norm": 0.7102087719102528, "learning_rate": 0.00019123988876948236, "loss": 12.537, "step": 5884 }, { "epoch": 0.32046176989102393, "grad_norm": 0.6084472187136899, "learning_rate": 0.0001912362790825002, "loss": 12.5018, "step": 5885 }, { "epoch": 0.320516223887607, "grad_norm": 0.6703985463238232, "learning_rate": 0.0001912326686860494, "loss": 12.5211, "step": 5886 }, { "epoch": 0.32057067788418997, "grad_norm": 0.6735533812325952, "learning_rate": 0.00019122905758015812, "loss": 12.5738, "step": 5887 }, { "epoch": 0.32062513188077296, "grad_norm": 0.8245981215897119, "learning_rate": 0.00019122544576485434, "loss": 12.6971, "step": 5888 }, { "epoch": 0.320679585877356, "grad_norm": 0.7387246587967972, "learning_rate": 0.00019122183324016625, "loss": 12.7761, "step": 5889 }, { "epoch": 0.320734039873939, "grad_norm": 0.6311846714662387, "learning_rate": 0.00019121822000612185, "loss": 12.6251, "step": 5890 }, { "epoch": 0.320788493870522, "grad_norm": 0.6762614014696763, "learning_rate": 0.0001912146060627493, "loss": 12.5361, "step": 5891 }, { "epoch": 0.32084294786710504, "grad_norm": 0.7312297128165434, "learning_rate": 0.00019121099141007663, "loss": 12.6024, "step": 5892 }, { "epoch": 0.32089740186368804, "grad_norm": 0.6691564713248157, "learning_rate": 0.00019120737604813205, "loss": 12.6048, "step": 5893 }, { "epoch": 0.32095185586027103, "grad_norm": 0.6445853731650664, "learning_rate": 0.00019120375997694358, "loss": 12.5692, "step": 5894 }, { "epoch": 0.3210063098568541, "grad_norm": 0.68421869894897, "learning_rate": 0.00019120014319653938, "loss": 12.583, "step": 5895 }, { "epoch": 0.32106076385343707, "grad_norm": 0.8049595047614536, "learning_rate": 0.0001911965257069476, "loss": 12.7101, "step": 5896 }, { "epoch": 0.32111521785002006, "grad_norm": 0.667047744432576, "learning_rate": 0.00019119290750819633, "loss": 12.5683, "step": 5897 }, { "epoch": 0.3211696718466031, "grad_norm": 0.623587825427952, "learning_rate": 0.00019118928860031368, "loss": 12.5035, "step": 5898 }, { "epoch": 0.3212241258431861, "grad_norm": 0.7162232836644334, "learning_rate": 0.00019118566898332787, "loss": 12.5323, "step": 5899 }, { "epoch": 0.3212785798397691, "grad_norm": 0.6843716242945724, "learning_rate": 0.000191182048657267, "loss": 12.5902, "step": 5900 }, { "epoch": 0.32133303383635214, "grad_norm": 0.6313048020288865, "learning_rate": 0.00019117842762215922, "loss": 12.5645, "step": 5901 }, { "epoch": 0.32138748783293514, "grad_norm": 0.6700544330362783, "learning_rate": 0.0001911748058780327, "loss": 12.5279, "step": 5902 }, { "epoch": 0.32144194182951813, "grad_norm": 0.6428991072690676, "learning_rate": 0.0001911711834249156, "loss": 12.4831, "step": 5903 }, { "epoch": 0.3214963958261012, "grad_norm": 0.7827750281315372, "learning_rate": 0.00019116756026283608, "loss": 12.7236, "step": 5904 }, { "epoch": 0.32155084982268417, "grad_norm": 0.655512340891206, "learning_rate": 0.00019116393639182232, "loss": 12.5124, "step": 5905 }, { "epoch": 0.32160530381926716, "grad_norm": 0.6778863662038578, "learning_rate": 0.00019116031181190253, "loss": 12.488, "step": 5906 }, { "epoch": 0.3216597578158502, "grad_norm": 0.7940130979158228, "learning_rate": 0.00019115668652310486, "loss": 12.6014, "step": 5907 }, { "epoch": 0.3217142118124332, "grad_norm": 0.7597082595800774, "learning_rate": 0.0001911530605254575, "loss": 12.6357, "step": 5908 }, { "epoch": 0.3217686658090162, "grad_norm": 0.6902457091288527, "learning_rate": 0.00019114943381898865, "loss": 12.6692, "step": 5909 }, { "epoch": 0.32182311980559924, "grad_norm": 0.7174150884814654, "learning_rate": 0.0001911458064037265, "loss": 12.5264, "step": 5910 }, { "epoch": 0.32187757380218224, "grad_norm": 0.6886629116964688, "learning_rate": 0.00019114217827969932, "loss": 12.5167, "step": 5911 }, { "epoch": 0.32193202779876523, "grad_norm": 0.717546620919752, "learning_rate": 0.00019113854944693523, "loss": 12.5693, "step": 5912 }, { "epoch": 0.3219864817953483, "grad_norm": 0.6188720374301961, "learning_rate": 0.00019113491990546252, "loss": 12.459, "step": 5913 }, { "epoch": 0.32204093579193127, "grad_norm": 0.6450251252181151, "learning_rate": 0.00019113128965530943, "loss": 12.458, "step": 5914 }, { "epoch": 0.32209538978851426, "grad_norm": 0.6846813998165503, "learning_rate": 0.00019112765869650405, "loss": 12.4775, "step": 5915 }, { "epoch": 0.3221498437850973, "grad_norm": 0.6962122434622048, "learning_rate": 0.0001911240270290748, "loss": 12.6427, "step": 5916 }, { "epoch": 0.3222042977816803, "grad_norm": 0.6254373248490377, "learning_rate": 0.0001911203946530498, "loss": 12.5056, "step": 5917 }, { "epoch": 0.32225875177826335, "grad_norm": 0.6217407116597037, "learning_rate": 0.00019111676156845735, "loss": 12.4804, "step": 5918 }, { "epoch": 0.32231320577484635, "grad_norm": 0.7202595630434092, "learning_rate": 0.00019111312777532566, "loss": 12.5083, "step": 5919 }, { "epoch": 0.32236765977142934, "grad_norm": 0.6820159788858451, "learning_rate": 0.000191109493273683, "loss": 12.4608, "step": 5920 }, { "epoch": 0.3224221137680124, "grad_norm": 0.7085165494770425, "learning_rate": 0.0001911058580635577, "loss": 12.4741, "step": 5921 }, { "epoch": 0.3224765677645954, "grad_norm": 0.6757057788462777, "learning_rate": 0.0001911022221449779, "loss": 12.4002, "step": 5922 }, { "epoch": 0.32253102176117837, "grad_norm": 0.7049609976493082, "learning_rate": 0.00019109858551797198, "loss": 12.5587, "step": 5923 }, { "epoch": 0.3225854757577614, "grad_norm": 0.6373738244859181, "learning_rate": 0.00019109494818256816, "loss": 12.3764, "step": 5924 }, { "epoch": 0.3226399297543444, "grad_norm": 0.6628983708514188, "learning_rate": 0.00019109131013879475, "loss": 12.4796, "step": 5925 }, { "epoch": 0.3226943837509274, "grad_norm": 0.6350872853299196, "learning_rate": 0.00019108767138668005, "loss": 12.4353, "step": 5926 }, { "epoch": 0.32274883774751045, "grad_norm": 0.6424512411543644, "learning_rate": 0.00019108403192625236, "loss": 12.4423, "step": 5927 }, { "epoch": 0.32280329174409345, "grad_norm": 0.7047099081442071, "learning_rate": 0.00019108039175753992, "loss": 12.6113, "step": 5928 }, { "epoch": 0.32285774574067644, "grad_norm": 0.6543119269839901, "learning_rate": 0.00019107675088057108, "loss": 12.6195, "step": 5929 }, { "epoch": 0.3229121997372595, "grad_norm": 0.717057102160912, "learning_rate": 0.00019107310929537417, "loss": 12.3956, "step": 5930 }, { "epoch": 0.3229666537338425, "grad_norm": 0.7362917478799178, "learning_rate": 0.0001910694670019775, "loss": 12.6826, "step": 5931 }, { "epoch": 0.32302110773042547, "grad_norm": 0.6330396736954499, "learning_rate": 0.00019106582400040938, "loss": 12.5617, "step": 5932 }, { "epoch": 0.3230755617270085, "grad_norm": 0.6650985491791709, "learning_rate": 0.00019106218029069812, "loss": 12.6838, "step": 5933 }, { "epoch": 0.3231300157235915, "grad_norm": 0.6495988085164426, "learning_rate": 0.0001910585358728721, "loss": 12.4752, "step": 5934 }, { "epoch": 0.3231844697201745, "grad_norm": 0.595367641986107, "learning_rate": 0.0001910548907469596, "loss": 12.49, "step": 5935 }, { "epoch": 0.32323892371675755, "grad_norm": 0.7195058905322125, "learning_rate": 0.00019105124491298906, "loss": 12.5126, "step": 5936 }, { "epoch": 0.32329337771334055, "grad_norm": 0.6263694786258669, "learning_rate": 0.00019104759837098868, "loss": 12.6228, "step": 5937 }, { "epoch": 0.32334783170992354, "grad_norm": 0.6735130785459177, "learning_rate": 0.000191043951120987, "loss": 12.4233, "step": 5938 }, { "epoch": 0.3234022857065066, "grad_norm": 0.6343518628030865, "learning_rate": 0.00019104030316301223, "loss": 12.5953, "step": 5939 }, { "epoch": 0.3234567397030896, "grad_norm": 0.6339707761405831, "learning_rate": 0.0001910366544970928, "loss": 12.4378, "step": 5940 }, { "epoch": 0.3235111936996726, "grad_norm": 0.9740575909840364, "learning_rate": 0.00019103300512325708, "loss": 12.6041, "step": 5941 }, { "epoch": 0.3235656476962556, "grad_norm": 0.6471335562445306, "learning_rate": 0.00019102935504153348, "loss": 12.3995, "step": 5942 }, { "epoch": 0.3236201016928386, "grad_norm": 0.6965000222741758, "learning_rate": 0.00019102570425195032, "loss": 12.4683, "step": 5943 }, { "epoch": 0.3236745556894216, "grad_norm": 0.6845613506290676, "learning_rate": 0.000191022052754536, "loss": 12.5708, "step": 5944 }, { "epoch": 0.32372900968600465, "grad_norm": 1.055426781939791, "learning_rate": 0.00019101840054931897, "loss": 12.5449, "step": 5945 }, { "epoch": 0.32378346368258765, "grad_norm": 0.6478370530528846, "learning_rate": 0.0001910147476363276, "loss": 12.4678, "step": 5946 }, { "epoch": 0.32383791767917064, "grad_norm": 0.7236229953266451, "learning_rate": 0.00019101109401559025, "loss": 12.45, "step": 5947 }, { "epoch": 0.3238923716757537, "grad_norm": 0.7776354757900491, "learning_rate": 0.0001910074396871354, "loss": 12.7088, "step": 5948 }, { "epoch": 0.3239468256723367, "grad_norm": 0.747095157378711, "learning_rate": 0.00019100378465099143, "loss": 12.4241, "step": 5949 }, { "epoch": 0.32400127966891973, "grad_norm": 0.7026491659725063, "learning_rate": 0.00019100012890718674, "loss": 12.5919, "step": 5950 }, { "epoch": 0.3240557336655027, "grad_norm": 0.6995609889373499, "learning_rate": 0.00019099647245574981, "loss": 12.6247, "step": 5951 }, { "epoch": 0.3241101876620857, "grad_norm": 0.8774373263694469, "learning_rate": 0.00019099281529670907, "loss": 12.4489, "step": 5952 }, { "epoch": 0.32416464165866876, "grad_norm": 0.6405476430956029, "learning_rate": 0.0001909891574300929, "loss": 12.5104, "step": 5953 }, { "epoch": 0.32421909565525175, "grad_norm": 0.6562550029524373, "learning_rate": 0.00019098549885592983, "loss": 12.663, "step": 5954 }, { "epoch": 0.32427354965183475, "grad_norm": 0.66271688148055, "learning_rate": 0.00019098183957424824, "loss": 12.5827, "step": 5955 }, { "epoch": 0.3243280036484178, "grad_norm": 0.6147209762320107, "learning_rate": 0.0001909781795850766, "loss": 12.5188, "step": 5956 }, { "epoch": 0.3243824576450008, "grad_norm": 0.7348392864009599, "learning_rate": 0.00019097451888844337, "loss": 12.5578, "step": 5957 }, { "epoch": 0.3244369116415838, "grad_norm": 0.6520863870566146, "learning_rate": 0.00019097085748437704, "loss": 12.2885, "step": 5958 }, { "epoch": 0.32449136563816683, "grad_norm": 0.6079312001278564, "learning_rate": 0.00019096719537290606, "loss": 12.3898, "step": 5959 }, { "epoch": 0.3245458196347498, "grad_norm": 0.6314519112721095, "learning_rate": 0.00019096353255405892, "loss": 12.4193, "step": 5960 }, { "epoch": 0.3246002736313328, "grad_norm": 0.6570340381863177, "learning_rate": 0.0001909598690278641, "loss": 12.4791, "step": 5961 }, { "epoch": 0.32465472762791586, "grad_norm": 0.6753086555455631, "learning_rate": 0.0001909562047943501, "loss": 12.5465, "step": 5962 }, { "epoch": 0.32470918162449885, "grad_norm": 0.730108073855115, "learning_rate": 0.00019095253985354534, "loss": 12.5237, "step": 5963 }, { "epoch": 0.32476363562108185, "grad_norm": 0.6205806638320429, "learning_rate": 0.00019094887420547844, "loss": 12.5215, "step": 5964 }, { "epoch": 0.3248180896176649, "grad_norm": 0.6353200889527242, "learning_rate": 0.0001909452078501778, "loss": 12.5001, "step": 5965 }, { "epoch": 0.3248725436142479, "grad_norm": 0.9432560778936758, "learning_rate": 0.000190941540787672, "loss": 12.6029, "step": 5966 }, { "epoch": 0.3249269976108309, "grad_norm": 0.6341818292480685, "learning_rate": 0.00019093787301798952, "loss": 12.5276, "step": 5967 }, { "epoch": 0.32498145160741393, "grad_norm": 0.6416082367670348, "learning_rate": 0.00019093420454115886, "loss": 12.593, "step": 5968 }, { "epoch": 0.3250359056039969, "grad_norm": 0.8068450582418325, "learning_rate": 0.00019093053535720861, "loss": 12.5589, "step": 5969 }, { "epoch": 0.3250903596005799, "grad_norm": 0.7187539550802996, "learning_rate": 0.00019092686546616725, "loss": 12.4668, "step": 5970 }, { "epoch": 0.32514481359716296, "grad_norm": 0.7413479385730268, "learning_rate": 0.00019092319486806335, "loss": 12.5876, "step": 5971 }, { "epoch": 0.32519926759374596, "grad_norm": 0.7031008087918882, "learning_rate": 0.00019091952356292544, "loss": 12.5985, "step": 5972 }, { "epoch": 0.32525372159032895, "grad_norm": 0.715757330929554, "learning_rate": 0.00019091585155078206, "loss": 12.5552, "step": 5973 }, { "epoch": 0.325308175586912, "grad_norm": 0.8596223200264557, "learning_rate": 0.00019091217883166178, "loss": 12.5456, "step": 5974 }, { "epoch": 0.325362629583495, "grad_norm": 0.607716778638219, "learning_rate": 0.00019090850540559316, "loss": 12.3831, "step": 5975 }, { "epoch": 0.325417083580078, "grad_norm": 0.69632444557811, "learning_rate": 0.00019090483127260472, "loss": 12.5443, "step": 5976 }, { "epoch": 0.32547153757666103, "grad_norm": 0.7908830884226349, "learning_rate": 0.00019090115643272508, "loss": 12.6423, "step": 5977 }, { "epoch": 0.325525991573244, "grad_norm": 0.7317983369210814, "learning_rate": 0.00019089748088598282, "loss": 12.6053, "step": 5978 }, { "epoch": 0.325580445569827, "grad_norm": 0.6860666876617846, "learning_rate": 0.0001908938046324065, "loss": 12.4089, "step": 5979 }, { "epoch": 0.32563489956641006, "grad_norm": 0.7133453663417441, "learning_rate": 0.0001908901276720247, "loss": 12.65, "step": 5980 }, { "epoch": 0.32568935356299306, "grad_norm": 0.6771444424428702, "learning_rate": 0.00019088645000486603, "loss": 12.6196, "step": 5981 }, { "epoch": 0.32574380755957605, "grad_norm": 0.6618186331476609, "learning_rate": 0.0001908827716309591, "loss": 12.4131, "step": 5982 }, { "epoch": 0.3257982615561591, "grad_norm": 0.666251776743328, "learning_rate": 0.0001908790925503325, "loss": 12.5763, "step": 5983 }, { "epoch": 0.3258527155527421, "grad_norm": 0.6423401599328059, "learning_rate": 0.00019087541276301479, "loss": 12.6437, "step": 5984 }, { "epoch": 0.32590716954932514, "grad_norm": 0.684078308546453, "learning_rate": 0.00019087173226903467, "loss": 12.4504, "step": 5985 }, { "epoch": 0.32596162354590813, "grad_norm": 0.679832543699596, "learning_rate": 0.00019086805106842072, "loss": 12.6542, "step": 5986 }, { "epoch": 0.3260160775424911, "grad_norm": 0.6848783699223823, "learning_rate": 0.00019086436916120153, "loss": 12.6109, "step": 5987 }, { "epoch": 0.32607053153907417, "grad_norm": 0.6741532940091434, "learning_rate": 0.0001908606865474058, "loss": 12.492, "step": 5988 }, { "epoch": 0.32612498553565716, "grad_norm": 0.6973214200387228, "learning_rate": 0.00019085700322706215, "loss": 12.5396, "step": 5989 }, { "epoch": 0.32617943953224016, "grad_norm": 0.6819143354267655, "learning_rate": 0.00019085331920019917, "loss": 12.5189, "step": 5990 }, { "epoch": 0.3262338935288232, "grad_norm": 0.7607303886975703, "learning_rate": 0.00019084963446684556, "loss": 12.5383, "step": 5991 }, { "epoch": 0.3262883475254062, "grad_norm": 0.7104535783369613, "learning_rate": 0.00019084594902702996, "loss": 12.6959, "step": 5992 }, { "epoch": 0.3263428015219892, "grad_norm": 0.6869245037086184, "learning_rate": 0.000190842262880781, "loss": 12.4045, "step": 5993 }, { "epoch": 0.32639725551857224, "grad_norm": 0.6891889185433268, "learning_rate": 0.0001908385760281274, "loss": 12.6741, "step": 5994 }, { "epoch": 0.32645170951515523, "grad_norm": 0.7146884740935388, "learning_rate": 0.0001908348884690978, "loss": 12.5421, "step": 5995 }, { "epoch": 0.3265061635117382, "grad_norm": 0.660700226334539, "learning_rate": 0.00019083120020372087, "loss": 12.4129, "step": 5996 }, { "epoch": 0.32656061750832127, "grad_norm": 0.6804830605995829, "learning_rate": 0.0001908275112320253, "loss": 12.3702, "step": 5997 }, { "epoch": 0.32661507150490426, "grad_norm": 0.6558070236451653, "learning_rate": 0.00019082382155403976, "loss": 12.5424, "step": 5998 }, { "epoch": 0.32666952550148726, "grad_norm": 0.6585152289440808, "learning_rate": 0.00019082013116979293, "loss": 12.6123, "step": 5999 }, { "epoch": 0.3267239794980703, "grad_norm": 0.6741903332937523, "learning_rate": 0.00019081644007931355, "loss": 12.4616, "step": 6000 }, { "epoch": 0.3267784334946533, "grad_norm": 0.6166158213759575, "learning_rate": 0.0001908127482826303, "loss": 12.6367, "step": 6001 }, { "epoch": 0.3268328874912363, "grad_norm": 0.6880785638149681, "learning_rate": 0.0001908090557797719, "loss": 12.4693, "step": 6002 }, { "epoch": 0.32688734148781934, "grad_norm": 0.6656609169611086, "learning_rate": 0.00019080536257076706, "loss": 12.5524, "step": 6003 }, { "epoch": 0.32694179548440233, "grad_norm": 0.8859792489670578, "learning_rate": 0.00019080166865564446, "loss": 12.4053, "step": 6004 }, { "epoch": 0.3269962494809853, "grad_norm": 0.6772545056208928, "learning_rate": 0.0001907979740344329, "loss": 12.5152, "step": 6005 }, { "epoch": 0.32705070347756837, "grad_norm": 0.7193161190009105, "learning_rate": 0.00019079427870716105, "loss": 12.575, "step": 6006 }, { "epoch": 0.32710515747415136, "grad_norm": 0.6766156295467368, "learning_rate": 0.00019079058267385763, "loss": 12.5792, "step": 6007 }, { "epoch": 0.32715961147073436, "grad_norm": 0.65637458444529, "learning_rate": 0.00019078688593455144, "loss": 12.4607, "step": 6008 }, { "epoch": 0.3272140654673174, "grad_norm": 0.7477260194565057, "learning_rate": 0.0001907831884892712, "loss": 12.6327, "step": 6009 }, { "epoch": 0.3272685194639004, "grad_norm": 0.7543706752417337, "learning_rate": 0.00019077949033804566, "loss": 12.5272, "step": 6010 }, { "epoch": 0.3273229734604834, "grad_norm": 0.7882627811447465, "learning_rate": 0.0001907757914809036, "loss": 12.5733, "step": 6011 }, { "epoch": 0.32737742745706644, "grad_norm": 0.6942572401963698, "learning_rate": 0.00019077209191787375, "loss": 12.4568, "step": 6012 }, { "epoch": 0.32743188145364943, "grad_norm": 0.71472777836806, "learning_rate": 0.00019076839164898488, "loss": 12.503, "step": 6013 }, { "epoch": 0.3274863354502324, "grad_norm": 0.6458760780360772, "learning_rate": 0.00019076469067426578, "loss": 12.5897, "step": 6014 }, { "epoch": 0.3275407894468155, "grad_norm": 0.6523758206718518, "learning_rate": 0.0001907609889937452, "loss": 12.5079, "step": 6015 }, { "epoch": 0.32759524344339847, "grad_norm": 0.6639159221337562, "learning_rate": 0.00019075728660745197, "loss": 12.4423, "step": 6016 }, { "epoch": 0.3276496974399815, "grad_norm": 0.7536010462227256, "learning_rate": 0.00019075358351541488, "loss": 12.5858, "step": 6017 }, { "epoch": 0.3277041514365645, "grad_norm": 0.7421258119115142, "learning_rate": 0.0001907498797176627, "loss": 12.6127, "step": 6018 }, { "epoch": 0.3277586054331475, "grad_norm": 0.6908788776147277, "learning_rate": 0.00019074617521422423, "loss": 12.561, "step": 6019 }, { "epoch": 0.32781305942973055, "grad_norm": 0.6923049654838966, "learning_rate": 0.00019074247000512825, "loss": 12.3786, "step": 6020 }, { "epoch": 0.32786751342631354, "grad_norm": 0.6881176792638315, "learning_rate": 0.00019073876409040366, "loss": 12.5307, "step": 6021 }, { "epoch": 0.32792196742289653, "grad_norm": 0.6819501022563224, "learning_rate": 0.0001907350574700792, "loss": 12.5123, "step": 6022 }, { "epoch": 0.3279764214194796, "grad_norm": 0.6768439916521216, "learning_rate": 0.0001907313501441837, "loss": 12.626, "step": 6023 }, { "epoch": 0.3280308754160626, "grad_norm": 0.7041701961335308, "learning_rate": 0.000190727642112746, "loss": 12.4595, "step": 6024 }, { "epoch": 0.32808532941264557, "grad_norm": 0.704760083849161, "learning_rate": 0.00019072393337579499, "loss": 12.5799, "step": 6025 }, { "epoch": 0.3281397834092286, "grad_norm": 0.6995993768892956, "learning_rate": 0.00019072022393335942, "loss": 12.384, "step": 6026 }, { "epoch": 0.3281942374058116, "grad_norm": 0.7527545179313321, "learning_rate": 0.00019071651378546817, "loss": 12.5722, "step": 6027 }, { "epoch": 0.3282486914023946, "grad_norm": 0.6389152098343975, "learning_rate": 0.0001907128029321501, "loss": 12.4403, "step": 6028 }, { "epoch": 0.32830314539897765, "grad_norm": 0.6888724064811103, "learning_rate": 0.00019070909137343408, "loss": 12.6418, "step": 6029 }, { "epoch": 0.32835759939556064, "grad_norm": 0.6751648270814726, "learning_rate": 0.00019070537910934895, "loss": 12.6121, "step": 6030 }, { "epoch": 0.32841205339214363, "grad_norm": 0.7575202162096621, "learning_rate": 0.00019070166613992357, "loss": 12.6031, "step": 6031 }, { "epoch": 0.3284665073887267, "grad_norm": 0.6958833801819441, "learning_rate": 0.00019069795246518683, "loss": 12.6046, "step": 6032 }, { "epoch": 0.3285209613853097, "grad_norm": 0.6457389212528368, "learning_rate": 0.0001906942380851676, "loss": 12.4216, "step": 6033 }, { "epoch": 0.32857541538189267, "grad_norm": 0.6765810427732396, "learning_rate": 0.00019069052299989475, "loss": 12.5173, "step": 6034 }, { "epoch": 0.3286298693784757, "grad_norm": 0.6486662081210898, "learning_rate": 0.00019068680720939722, "loss": 12.5056, "step": 6035 }, { "epoch": 0.3286843233750587, "grad_norm": 0.6393177510255944, "learning_rate": 0.00019068309071370386, "loss": 12.5253, "step": 6036 }, { "epoch": 0.3287387773716417, "grad_norm": 0.7043933590676535, "learning_rate": 0.00019067937351284356, "loss": 12.4581, "step": 6037 }, { "epoch": 0.32879323136822475, "grad_norm": 0.7073559719836241, "learning_rate": 0.00019067565560684525, "loss": 12.6616, "step": 6038 }, { "epoch": 0.32884768536480774, "grad_norm": 0.6695162577213956, "learning_rate": 0.00019067193699573784, "loss": 12.4207, "step": 6039 }, { "epoch": 0.32890213936139073, "grad_norm": 0.6957632386249147, "learning_rate": 0.00019066821767955023, "loss": 12.6009, "step": 6040 }, { "epoch": 0.3289565933579738, "grad_norm": 0.6786726297932553, "learning_rate": 0.00019066449765831135, "loss": 12.5736, "step": 6041 }, { "epoch": 0.3290110473545568, "grad_norm": 0.7542246599893674, "learning_rate": 0.00019066077693205018, "loss": 12.4497, "step": 6042 }, { "epoch": 0.32906550135113977, "grad_norm": 0.6266163221327751, "learning_rate": 0.00019065705550079556, "loss": 12.4521, "step": 6043 }, { "epoch": 0.3291199553477228, "grad_norm": 0.6632501992328229, "learning_rate": 0.0001906533333645765, "loss": 12.5465, "step": 6044 }, { "epoch": 0.3291744093443058, "grad_norm": 0.7173246364462722, "learning_rate": 0.0001906496105234219, "loss": 12.4319, "step": 6045 }, { "epoch": 0.3292288633408888, "grad_norm": 0.6601271095282973, "learning_rate": 0.00019064588697736073, "loss": 12.6277, "step": 6046 }, { "epoch": 0.32928331733747185, "grad_norm": 0.7662957908838449, "learning_rate": 0.00019064216272642192, "loss": 12.4888, "step": 6047 }, { "epoch": 0.32933777133405484, "grad_norm": 0.9193653350114078, "learning_rate": 0.00019063843777063447, "loss": 12.7076, "step": 6048 }, { "epoch": 0.32939222533063783, "grad_norm": 0.9441997690647264, "learning_rate": 0.00019063471211002732, "loss": 12.5137, "step": 6049 }, { "epoch": 0.3294466793272209, "grad_norm": 0.7440618082238332, "learning_rate": 0.0001906309857446295, "loss": 12.6509, "step": 6050 }, { "epoch": 0.3295011333238039, "grad_norm": 0.6706221552479642, "learning_rate": 0.00019062725867446985, "loss": 12.4081, "step": 6051 }, { "epoch": 0.3295555873203869, "grad_norm": 0.7669635585081318, "learning_rate": 0.0001906235308995775, "loss": 12.3935, "step": 6052 }, { "epoch": 0.3296100413169699, "grad_norm": 0.8102674228184634, "learning_rate": 0.00019061980241998137, "loss": 12.6809, "step": 6053 }, { "epoch": 0.3296644953135529, "grad_norm": 0.6051046325273823, "learning_rate": 0.00019061607323571042, "loss": 12.3263, "step": 6054 }, { "epoch": 0.32971894931013596, "grad_norm": 0.6912898651248617, "learning_rate": 0.00019061234334679373, "loss": 12.4846, "step": 6055 }, { "epoch": 0.32977340330671895, "grad_norm": 0.7745620986720093, "learning_rate": 0.00019060861275326026, "loss": 12.6085, "step": 6056 }, { "epoch": 0.32982785730330194, "grad_norm": 0.6763436075375078, "learning_rate": 0.000190604881455139, "loss": 12.6575, "step": 6057 }, { "epoch": 0.329882311299885, "grad_norm": 0.7307302740737248, "learning_rate": 0.000190601149452459, "loss": 12.4515, "step": 6058 }, { "epoch": 0.329936765296468, "grad_norm": 0.7506534175175649, "learning_rate": 0.00019059741674524924, "loss": 12.7086, "step": 6059 }, { "epoch": 0.329991219293051, "grad_norm": 0.6310610641892026, "learning_rate": 0.0001905936833335388, "loss": 12.5889, "step": 6060 }, { "epoch": 0.330045673289634, "grad_norm": 0.749115126468661, "learning_rate": 0.00019058994921735672, "loss": 12.6396, "step": 6061 }, { "epoch": 0.330100127286217, "grad_norm": 0.6680972574726077, "learning_rate": 0.00019058621439673194, "loss": 12.6789, "step": 6062 }, { "epoch": 0.3301545812828, "grad_norm": 0.6498361336323334, "learning_rate": 0.00019058247887169361, "loss": 12.5572, "step": 6063 }, { "epoch": 0.33020903527938306, "grad_norm": 0.728884306740224, "learning_rate": 0.0001905787426422707, "loss": 12.4432, "step": 6064 }, { "epoch": 0.33026348927596605, "grad_norm": 0.6381536620973719, "learning_rate": 0.00019057500570849234, "loss": 12.4457, "step": 6065 }, { "epoch": 0.33031794327254904, "grad_norm": 0.6814152295949502, "learning_rate": 0.00019057126807038753, "loss": 12.4274, "step": 6066 }, { "epoch": 0.3303723972691321, "grad_norm": 0.7586037938328459, "learning_rate": 0.00019056752972798532, "loss": 12.4917, "step": 6067 }, { "epoch": 0.3304268512657151, "grad_norm": 0.7013635463082079, "learning_rate": 0.00019056379068131484, "loss": 12.5281, "step": 6068 }, { "epoch": 0.3304813052622981, "grad_norm": 0.749680286113315, "learning_rate": 0.00019056005093040512, "loss": 12.5624, "step": 6069 }, { "epoch": 0.3305357592588811, "grad_norm": 0.7209385704762635, "learning_rate": 0.00019055631047528528, "loss": 12.5035, "step": 6070 }, { "epoch": 0.3305902132554641, "grad_norm": 0.7540812916934254, "learning_rate": 0.00019055256931598438, "loss": 12.5679, "step": 6071 }, { "epoch": 0.3306446672520471, "grad_norm": 0.7230512092731918, "learning_rate": 0.0001905488274525315, "loss": 12.6442, "step": 6072 }, { "epoch": 0.33069912124863016, "grad_norm": 1.0561995452306372, "learning_rate": 0.00019054508488495575, "loss": 12.5927, "step": 6073 }, { "epoch": 0.33075357524521315, "grad_norm": 0.7502687032082127, "learning_rate": 0.00019054134161328626, "loss": 12.5629, "step": 6074 }, { "epoch": 0.33080802924179614, "grad_norm": 0.7999552476061841, "learning_rate": 0.00019053759763755209, "loss": 12.5188, "step": 6075 }, { "epoch": 0.3308624832383792, "grad_norm": 0.7713322123206169, "learning_rate": 0.0001905338529577824, "loss": 12.5704, "step": 6076 }, { "epoch": 0.3309169372349622, "grad_norm": 0.8188983023010487, "learning_rate": 0.00019053010757400624, "loss": 12.6, "step": 6077 }, { "epoch": 0.3309713912315452, "grad_norm": 0.9511635742495728, "learning_rate": 0.00019052636148625282, "loss": 12.5989, "step": 6078 }, { "epoch": 0.3310258452281282, "grad_norm": 0.6767946812635767, "learning_rate": 0.00019052261469455122, "loss": 12.5205, "step": 6079 }, { "epoch": 0.3310802992247112, "grad_norm": 0.9301201404025043, "learning_rate": 0.0001905188671989306, "loss": 12.7243, "step": 6080 }, { "epoch": 0.3311347532212942, "grad_norm": 0.9015049000301367, "learning_rate": 0.0001905151189994201, "loss": 12.4499, "step": 6081 }, { "epoch": 0.33118920721787726, "grad_norm": 0.8382596251722875, "learning_rate": 0.0001905113700960488, "loss": 12.7573, "step": 6082 }, { "epoch": 0.33124366121446025, "grad_norm": 0.7181926507073133, "learning_rate": 0.00019050762048884596, "loss": 12.473, "step": 6083 }, { "epoch": 0.3312981152110433, "grad_norm": 0.8447246018943421, "learning_rate": 0.0001905038701778407, "loss": 12.515, "step": 6084 }, { "epoch": 0.3313525692076263, "grad_norm": 0.752685655651504, "learning_rate": 0.00019050011916306212, "loss": 12.5246, "step": 6085 }, { "epoch": 0.3314070232042093, "grad_norm": 0.8405698619472585, "learning_rate": 0.00019049636744453944, "loss": 12.5166, "step": 6086 }, { "epoch": 0.33146147720079233, "grad_norm": 0.7746313603997682, "learning_rate": 0.00019049261502230184, "loss": 12.4498, "step": 6087 }, { "epoch": 0.3315159311973753, "grad_norm": 0.8372687652387514, "learning_rate": 0.00019048886189637848, "loss": 12.5878, "step": 6088 }, { "epoch": 0.3315703851939583, "grad_norm": 0.814258741770328, "learning_rate": 0.0001904851080667986, "loss": 12.4877, "step": 6089 }, { "epoch": 0.33162483919054137, "grad_norm": 0.7280978774222061, "learning_rate": 0.00019048135353359129, "loss": 12.5206, "step": 6090 }, { "epoch": 0.33167929318712436, "grad_norm": 0.7893481669060788, "learning_rate": 0.00019047759829678585, "loss": 12.6034, "step": 6091 }, { "epoch": 0.33173374718370735, "grad_norm": 0.7136790992150998, "learning_rate": 0.00019047384235641138, "loss": 12.5442, "step": 6092 }, { "epoch": 0.3317882011802904, "grad_norm": 0.725570200940181, "learning_rate": 0.00019047008571249717, "loss": 12.5172, "step": 6093 }, { "epoch": 0.3318426551768734, "grad_norm": 0.7172907098779102, "learning_rate": 0.0001904663283650724, "loss": 12.4823, "step": 6094 }, { "epoch": 0.3318971091734564, "grad_norm": 0.650578796409706, "learning_rate": 0.0001904625703141663, "loss": 12.4808, "step": 6095 }, { "epoch": 0.33195156317003943, "grad_norm": 0.7862084399830296, "learning_rate": 0.00019045881155980808, "loss": 12.5653, "step": 6096 }, { "epoch": 0.3320060171666224, "grad_norm": 0.6625604436200386, "learning_rate": 0.00019045505210202698, "loss": 12.6665, "step": 6097 }, { "epoch": 0.3320604711632054, "grad_norm": 0.6707462864315522, "learning_rate": 0.00019045129194085217, "loss": 12.6802, "step": 6098 }, { "epoch": 0.33211492515978847, "grad_norm": 0.6954853299279311, "learning_rate": 0.000190447531076313, "loss": 12.6003, "step": 6099 }, { "epoch": 0.33216937915637146, "grad_norm": 0.7880675830322864, "learning_rate": 0.00019044376950843862, "loss": 12.6436, "step": 6100 }, { "epoch": 0.33222383315295445, "grad_norm": 0.8561969283621422, "learning_rate": 0.00019044000723725837, "loss": 12.6777, "step": 6101 }, { "epoch": 0.3322782871495375, "grad_norm": 0.7027593267734076, "learning_rate": 0.0001904362442628014, "loss": 12.5563, "step": 6102 }, { "epoch": 0.3323327411461205, "grad_norm": 0.9422237632774334, "learning_rate": 0.0001904324805850971, "loss": 12.5437, "step": 6103 }, { "epoch": 0.3323871951427035, "grad_norm": 0.6967880551838675, "learning_rate": 0.0001904287162041746, "loss": 12.485, "step": 6104 }, { "epoch": 0.33244164913928653, "grad_norm": 0.7064817408333989, "learning_rate": 0.00019042495112006326, "loss": 12.4872, "step": 6105 }, { "epoch": 0.3324961031358695, "grad_norm": 0.861640560379946, "learning_rate": 0.00019042118533279235, "loss": 12.5858, "step": 6106 }, { "epoch": 0.3325505571324525, "grad_norm": 0.7716607183906974, "learning_rate": 0.00019041741884239113, "loss": 12.5587, "step": 6107 }, { "epoch": 0.33260501112903557, "grad_norm": 0.763541649976914, "learning_rate": 0.00019041365164888888, "loss": 12.7393, "step": 6108 }, { "epoch": 0.33265946512561856, "grad_norm": 0.637888834883395, "learning_rate": 0.00019040988375231495, "loss": 12.5298, "step": 6109 }, { "epoch": 0.33271391912220155, "grad_norm": 0.7149384308769094, "learning_rate": 0.00019040611515269858, "loss": 12.6393, "step": 6110 }, { "epoch": 0.3327683731187846, "grad_norm": 0.6569658440919112, "learning_rate": 0.0001904023458500691, "loss": 12.5275, "step": 6111 }, { "epoch": 0.3328228271153676, "grad_norm": 0.6572020856486586, "learning_rate": 0.0001903985758444558, "loss": 12.6111, "step": 6112 }, { "epoch": 0.3328772811119506, "grad_norm": 0.6516354181852332, "learning_rate": 0.00019039480513588806, "loss": 12.5082, "step": 6113 }, { "epoch": 0.33293173510853363, "grad_norm": 0.680454891451818, "learning_rate": 0.00019039103372439512, "loss": 12.5876, "step": 6114 }, { "epoch": 0.3329861891051166, "grad_norm": 0.6523523263440932, "learning_rate": 0.00019038726161000634, "loss": 12.5589, "step": 6115 }, { "epoch": 0.3330406431016996, "grad_norm": 0.6829586009320958, "learning_rate": 0.00019038348879275106, "loss": 12.6797, "step": 6116 }, { "epoch": 0.33309509709828267, "grad_norm": 0.6661967631864086, "learning_rate": 0.0001903797152726586, "loss": 12.5284, "step": 6117 }, { "epoch": 0.33314955109486566, "grad_norm": 0.6074346941940245, "learning_rate": 0.00019037594104975836, "loss": 12.4863, "step": 6118 }, { "epoch": 0.3332040050914487, "grad_norm": 0.7862727576794416, "learning_rate": 0.00019037216612407962, "loss": 12.5459, "step": 6119 }, { "epoch": 0.3332584590880317, "grad_norm": 0.6660531417792179, "learning_rate": 0.00019036839049565177, "loss": 12.4761, "step": 6120 }, { "epoch": 0.3333129130846147, "grad_norm": 0.6365992142476833, "learning_rate": 0.00019036461416450416, "loss": 12.5827, "step": 6121 }, { "epoch": 0.33336736708119774, "grad_norm": 0.6579708556000738, "learning_rate": 0.00019036083713066612, "loss": 12.6584, "step": 6122 }, { "epoch": 0.33342182107778073, "grad_norm": 0.6397074316751428, "learning_rate": 0.0001903570593941671, "loss": 12.5182, "step": 6123 }, { "epoch": 0.3334762750743637, "grad_norm": 0.6753784333459871, "learning_rate": 0.00019035328095503643, "loss": 12.4446, "step": 6124 }, { "epoch": 0.3335307290709468, "grad_norm": 0.6293854289115678, "learning_rate": 0.00019034950181330348, "loss": 12.4655, "step": 6125 }, { "epoch": 0.33358518306752977, "grad_norm": 0.6768015115197763, "learning_rate": 0.00019034572196899766, "loss": 12.6518, "step": 6126 }, { "epoch": 0.33363963706411276, "grad_norm": 0.6028883086935058, "learning_rate": 0.00019034194142214834, "loss": 12.4527, "step": 6127 }, { "epoch": 0.3336940910606958, "grad_norm": 0.7512506851451719, "learning_rate": 0.00019033816017278497, "loss": 12.6985, "step": 6128 }, { "epoch": 0.3337485450572788, "grad_norm": 0.6914936250403905, "learning_rate": 0.0001903343782209369, "loss": 12.4452, "step": 6129 }, { "epoch": 0.3338029990538618, "grad_norm": 0.6491276706247793, "learning_rate": 0.00019033059556663353, "loss": 12.6077, "step": 6130 }, { "epoch": 0.33385745305044484, "grad_norm": 0.6610362402021156, "learning_rate": 0.0001903268122099043, "loss": 12.4978, "step": 6131 }, { "epoch": 0.33391190704702783, "grad_norm": 0.6583859042445529, "learning_rate": 0.00019032302815077866, "loss": 12.5562, "step": 6132 }, { "epoch": 0.3339663610436108, "grad_norm": 0.6849355025464567, "learning_rate": 0.000190319243389286, "loss": 12.5127, "step": 6133 }, { "epoch": 0.3340208150401939, "grad_norm": 0.6238656111143649, "learning_rate": 0.00019031545792545576, "loss": 12.478, "step": 6134 }, { "epoch": 0.33407526903677687, "grad_norm": 0.6796712942676156, "learning_rate": 0.00019031167175931736, "loss": 12.4635, "step": 6135 }, { "epoch": 0.33412972303335986, "grad_norm": 0.6388572352631766, "learning_rate": 0.00019030788489090027, "loss": 12.6054, "step": 6136 }, { "epoch": 0.3341841770299429, "grad_norm": 0.7070234751908636, "learning_rate": 0.0001903040973202339, "loss": 12.5226, "step": 6137 }, { "epoch": 0.3342386310265259, "grad_norm": 0.7598039478084103, "learning_rate": 0.00019030030904734774, "loss": 12.423, "step": 6138 }, { "epoch": 0.3342930850231089, "grad_norm": 0.7158751505478054, "learning_rate": 0.00019029652007227123, "loss": 12.5786, "step": 6139 }, { "epoch": 0.33434753901969194, "grad_norm": 0.6145429949826542, "learning_rate": 0.00019029273039503387, "loss": 12.4051, "step": 6140 }, { "epoch": 0.33440199301627493, "grad_norm": 1.1949463795614987, "learning_rate": 0.00019028894001566507, "loss": 12.7602, "step": 6141 }, { "epoch": 0.3344564470128579, "grad_norm": 0.6342220342865346, "learning_rate": 0.00019028514893419432, "loss": 12.4909, "step": 6142 }, { "epoch": 0.334510901009441, "grad_norm": 0.7442484705837343, "learning_rate": 0.0001902813571506511, "loss": 12.7064, "step": 6143 }, { "epoch": 0.33456535500602397, "grad_norm": 0.6796848417099655, "learning_rate": 0.00019027756466506493, "loss": 12.5268, "step": 6144 }, { "epoch": 0.33461980900260696, "grad_norm": 0.6546437357082819, "learning_rate": 0.00019027377147746524, "loss": 12.4814, "step": 6145 }, { "epoch": 0.33467426299919, "grad_norm": 0.7005982307981831, "learning_rate": 0.00019026997758788162, "loss": 12.6283, "step": 6146 }, { "epoch": 0.334728716995773, "grad_norm": 0.6438492247460936, "learning_rate": 0.0001902661829963435, "loss": 12.5332, "step": 6147 }, { "epoch": 0.334783170992356, "grad_norm": 0.7068632930100087, "learning_rate": 0.0001902623877028804, "loss": 12.5771, "step": 6148 }, { "epoch": 0.33483762498893904, "grad_norm": 0.740471689430596, "learning_rate": 0.00019025859170752183, "loss": 12.5317, "step": 6149 }, { "epoch": 0.33489207898552203, "grad_norm": 0.6222370742701101, "learning_rate": 0.0001902547950102973, "loss": 12.5265, "step": 6150 }, { "epoch": 0.3349465329821051, "grad_norm": 0.683206095891851, "learning_rate": 0.00019025099761123637, "loss": 12.6167, "step": 6151 }, { "epoch": 0.3350009869786881, "grad_norm": 0.7004754573579197, "learning_rate": 0.00019024719951036856, "loss": 12.6346, "step": 6152 }, { "epoch": 0.33505544097527107, "grad_norm": 0.677115356865327, "learning_rate": 0.00019024340070772336, "loss": 12.5155, "step": 6153 }, { "epoch": 0.3351098949718541, "grad_norm": 0.6041487063468916, "learning_rate": 0.00019023960120333037, "loss": 12.5213, "step": 6154 }, { "epoch": 0.3351643489684371, "grad_norm": 0.6440215634195646, "learning_rate": 0.00019023580099721907, "loss": 12.4842, "step": 6155 }, { "epoch": 0.3352188029650201, "grad_norm": 0.7218149914408177, "learning_rate": 0.00019023200008941912, "loss": 12.5914, "step": 6156 }, { "epoch": 0.33527325696160315, "grad_norm": 0.6775480085437992, "learning_rate": 0.00019022819847995992, "loss": 12.608, "step": 6157 }, { "epoch": 0.33532771095818614, "grad_norm": 0.6925979630379857, "learning_rate": 0.00019022439616887116, "loss": 12.6059, "step": 6158 }, { "epoch": 0.33538216495476914, "grad_norm": 0.6479672547492501, "learning_rate": 0.00019022059315618238, "loss": 12.4175, "step": 6159 }, { "epoch": 0.3354366189513522, "grad_norm": 0.767868850600018, "learning_rate": 0.0001902167894419231, "loss": 12.5171, "step": 6160 }, { "epoch": 0.3354910729479352, "grad_norm": 0.6072374536074713, "learning_rate": 0.00019021298502612294, "loss": 12.4668, "step": 6161 }, { "epoch": 0.33554552694451817, "grad_norm": 0.8744038116543146, "learning_rate": 0.0001902091799088115, "loss": 12.5322, "step": 6162 }, { "epoch": 0.3355999809411012, "grad_norm": 0.6870120413339742, "learning_rate": 0.00019020537409001836, "loss": 12.5823, "step": 6163 }, { "epoch": 0.3356544349376842, "grad_norm": 0.6446608768392295, "learning_rate": 0.00019020156756977309, "loss": 12.5155, "step": 6164 }, { "epoch": 0.3357088889342672, "grad_norm": 0.6695021313655353, "learning_rate": 0.00019019776034810527, "loss": 12.2837, "step": 6165 }, { "epoch": 0.33576334293085025, "grad_norm": 0.6905675103604029, "learning_rate": 0.00019019395242504458, "loss": 12.5365, "step": 6166 }, { "epoch": 0.33581779692743324, "grad_norm": 0.6892280744834528, "learning_rate": 0.0001901901438006206, "loss": 12.5121, "step": 6167 }, { "epoch": 0.33587225092401624, "grad_norm": 0.6263608033387138, "learning_rate": 0.00019018633447486288, "loss": 12.5616, "step": 6168 }, { "epoch": 0.3359267049205993, "grad_norm": 0.7290340194306969, "learning_rate": 0.00019018252444780116, "loss": 12.5323, "step": 6169 }, { "epoch": 0.3359811589171823, "grad_norm": 0.6877483949236548, "learning_rate": 0.00019017871371946498, "loss": 12.5425, "step": 6170 }, { "epoch": 0.33603561291376527, "grad_norm": 0.6121751565433697, "learning_rate": 0.000190174902289884, "loss": 12.4145, "step": 6171 }, { "epoch": 0.3360900669103483, "grad_norm": 0.6668410447130814, "learning_rate": 0.00019017109015908784, "loss": 12.6139, "step": 6172 }, { "epoch": 0.3361445209069313, "grad_norm": 0.6548950675385099, "learning_rate": 0.0001901672773271062, "loss": 12.7411, "step": 6173 }, { "epoch": 0.3361989749035143, "grad_norm": 0.6620132852747327, "learning_rate": 0.00019016346379396867, "loss": 12.5068, "step": 6174 }, { "epoch": 0.33625342890009735, "grad_norm": 0.6744886194570338, "learning_rate": 0.00019015964955970493, "loss": 12.3716, "step": 6175 }, { "epoch": 0.33630788289668034, "grad_norm": 0.6460492116216238, "learning_rate": 0.00019015583462434464, "loss": 12.6015, "step": 6176 }, { "epoch": 0.33636233689326334, "grad_norm": 0.7586681691462406, "learning_rate": 0.00019015201898791743, "loss": 12.5306, "step": 6177 }, { "epoch": 0.3364167908898464, "grad_norm": 0.7473788029604737, "learning_rate": 0.00019014820265045304, "loss": 12.5624, "step": 6178 }, { "epoch": 0.3364712448864294, "grad_norm": 0.6473311838707987, "learning_rate": 0.0001901443856119811, "loss": 12.446, "step": 6179 }, { "epoch": 0.33652569888301237, "grad_norm": 0.7744353742659558, "learning_rate": 0.0001901405678725313, "loss": 12.6546, "step": 6180 }, { "epoch": 0.3365801528795954, "grad_norm": 0.6991532130330834, "learning_rate": 0.00019013674943213328, "loss": 12.5082, "step": 6181 }, { "epoch": 0.3366346068761784, "grad_norm": 0.7557822736917306, "learning_rate": 0.00019013293029081685, "loss": 12.436, "step": 6182 }, { "epoch": 0.3366890608727614, "grad_norm": 0.6549872291957558, "learning_rate": 0.00019012911044861158, "loss": 12.5209, "step": 6183 }, { "epoch": 0.33674351486934445, "grad_norm": 0.6970846774936678, "learning_rate": 0.00019012528990554727, "loss": 12.5207, "step": 6184 }, { "epoch": 0.33679796886592744, "grad_norm": 0.8749360840578242, "learning_rate": 0.00019012146866165358, "loss": 12.5774, "step": 6185 }, { "epoch": 0.3368524228625105, "grad_norm": 0.7170587629888132, "learning_rate": 0.00019011764671696027, "loss": 12.4228, "step": 6186 }, { "epoch": 0.3369068768590935, "grad_norm": 0.7733647766370366, "learning_rate": 0.000190113824071497, "loss": 12.4979, "step": 6187 }, { "epoch": 0.3369613308556765, "grad_norm": 0.7267443064106669, "learning_rate": 0.00019011000072529348, "loss": 12.6086, "step": 6188 }, { "epoch": 0.3370157848522595, "grad_norm": 0.656912096319067, "learning_rate": 0.00019010617667837953, "loss": 12.4963, "step": 6189 }, { "epoch": 0.3370702388488425, "grad_norm": 0.7539748798624772, "learning_rate": 0.00019010235193078482, "loss": 12.5574, "step": 6190 }, { "epoch": 0.3371246928454255, "grad_norm": 0.7244926493537558, "learning_rate": 0.00019009852648253913, "loss": 12.6162, "step": 6191 }, { "epoch": 0.33717914684200856, "grad_norm": 0.6673239984305686, "learning_rate": 0.00019009470033367218, "loss": 12.4831, "step": 6192 }, { "epoch": 0.33723360083859155, "grad_norm": 0.6975327703524771, "learning_rate": 0.00019009087348421372, "loss": 12.5692, "step": 6193 }, { "epoch": 0.33728805483517454, "grad_norm": 0.7778432350781286, "learning_rate": 0.00019008704593419354, "loss": 12.5819, "step": 6194 }, { "epoch": 0.3373425088317576, "grad_norm": 0.6864969326899306, "learning_rate": 0.0001900832176836414, "loss": 12.4972, "step": 6195 }, { "epoch": 0.3373969628283406, "grad_norm": 0.6202551937185172, "learning_rate": 0.00019007938873258698, "loss": 12.5221, "step": 6196 }, { "epoch": 0.3374514168249236, "grad_norm": 0.6537274528177562, "learning_rate": 0.0001900755590810602, "loss": 12.5386, "step": 6197 }, { "epoch": 0.3375058708215066, "grad_norm": 0.7079124921438565, "learning_rate": 0.00019007172872909073, "loss": 12.5338, "step": 6198 }, { "epoch": 0.3375603248180896, "grad_norm": 0.6657694746805412, "learning_rate": 0.00019006789767670842, "loss": 12.4443, "step": 6199 }, { "epoch": 0.3376147788146726, "grad_norm": 0.730262940364132, "learning_rate": 0.000190064065923943, "loss": 12.5203, "step": 6200 }, { "epoch": 0.33766923281125566, "grad_norm": 0.630319901736221, "learning_rate": 0.0001900602334708243, "loss": 12.33, "step": 6201 }, { "epoch": 0.33772368680783865, "grad_norm": 0.7177306256126688, "learning_rate": 0.00019005640031738216, "loss": 12.6088, "step": 6202 }, { "epoch": 0.33777814080442164, "grad_norm": 0.7407616600834069, "learning_rate": 0.00019005256646364632, "loss": 12.551, "step": 6203 }, { "epoch": 0.3378325948010047, "grad_norm": 0.6405237750463625, "learning_rate": 0.00019004873190964664, "loss": 12.7041, "step": 6204 }, { "epoch": 0.3378870487975877, "grad_norm": 0.5858757409387871, "learning_rate": 0.0001900448966554129, "loss": 12.4482, "step": 6205 }, { "epoch": 0.3379415027941707, "grad_norm": 0.7294245367979105, "learning_rate": 0.00019004106070097496, "loss": 12.5935, "step": 6206 }, { "epoch": 0.3379959567907537, "grad_norm": 0.6463929455412016, "learning_rate": 0.0001900372240463626, "loss": 12.4734, "step": 6207 }, { "epoch": 0.3380504107873367, "grad_norm": 0.6904944812387054, "learning_rate": 0.0001900333866916057, "loss": 12.5885, "step": 6208 }, { "epoch": 0.3381048647839197, "grad_norm": 0.685170401259956, "learning_rate": 0.0001900295486367341, "loss": 12.5826, "step": 6209 }, { "epoch": 0.33815931878050276, "grad_norm": 0.6357849225796319, "learning_rate": 0.00019002570988177763, "loss": 12.5158, "step": 6210 }, { "epoch": 0.33821377277708575, "grad_norm": 0.6683542317848408, "learning_rate": 0.00019002187042676613, "loss": 12.5072, "step": 6211 }, { "epoch": 0.33826822677366875, "grad_norm": 0.6495159611040929, "learning_rate": 0.0001900180302717295, "loss": 12.5352, "step": 6212 }, { "epoch": 0.3383226807702518, "grad_norm": 0.6391516161165911, "learning_rate": 0.00019001418941669754, "loss": 12.5244, "step": 6213 }, { "epoch": 0.3383771347668348, "grad_norm": 0.7089891987518301, "learning_rate": 0.00019001034786170014, "loss": 12.8155, "step": 6214 }, { "epoch": 0.3384315887634178, "grad_norm": 0.7321755456555036, "learning_rate": 0.0001900065056067672, "loss": 12.6221, "step": 6215 }, { "epoch": 0.3384860427600008, "grad_norm": 0.7487920427898623, "learning_rate": 0.0001900026626519286, "loss": 12.4454, "step": 6216 }, { "epoch": 0.3385404967565838, "grad_norm": 0.7044242558236338, "learning_rate": 0.00018999881899721416, "loss": 12.5365, "step": 6217 }, { "epoch": 0.33859495075316687, "grad_norm": 0.6597639513487883, "learning_rate": 0.00018999497464265383, "loss": 12.5274, "step": 6218 }, { "epoch": 0.33864940474974986, "grad_norm": 0.76686473507453, "learning_rate": 0.00018999112958827748, "loss": 12.5673, "step": 6219 }, { "epoch": 0.33870385874633285, "grad_norm": 0.6161336047920143, "learning_rate": 0.000189987283834115, "loss": 12.5084, "step": 6220 }, { "epoch": 0.3387583127429159, "grad_norm": 0.8074532039395405, "learning_rate": 0.00018998343738019634, "loss": 12.4663, "step": 6221 }, { "epoch": 0.3388127667394989, "grad_norm": 0.6652786762546103, "learning_rate": 0.00018997959022655137, "loss": 12.4228, "step": 6222 }, { "epoch": 0.3388672207360819, "grad_norm": 0.7768352951927515, "learning_rate": 0.00018997574237321002, "loss": 12.5579, "step": 6223 }, { "epoch": 0.33892167473266493, "grad_norm": 0.688925320784393, "learning_rate": 0.0001899718938202022, "loss": 12.4842, "step": 6224 }, { "epoch": 0.3389761287292479, "grad_norm": 0.6166350003751984, "learning_rate": 0.00018996804456755784, "loss": 12.5745, "step": 6225 }, { "epoch": 0.3390305827258309, "grad_norm": 0.7968250998062604, "learning_rate": 0.00018996419461530687, "loss": 12.4601, "step": 6226 }, { "epoch": 0.33908503672241397, "grad_norm": 0.7204375657832244, "learning_rate": 0.00018996034396347929, "loss": 12.4444, "step": 6227 }, { "epoch": 0.33913949071899696, "grad_norm": 0.7435334241715121, "learning_rate": 0.00018995649261210497, "loss": 12.5888, "step": 6228 }, { "epoch": 0.33919394471557995, "grad_norm": 0.7132449890620907, "learning_rate": 0.0001899526405612138, "loss": 12.4273, "step": 6229 }, { "epoch": 0.339248398712163, "grad_norm": 0.647762025838811, "learning_rate": 0.00018994878781083589, "loss": 12.4589, "step": 6230 }, { "epoch": 0.339302852708746, "grad_norm": 0.8074144202446832, "learning_rate": 0.00018994493436100108, "loss": 12.5515, "step": 6231 }, { "epoch": 0.339357306705329, "grad_norm": 0.7421418526430787, "learning_rate": 0.00018994108021173943, "loss": 12.6725, "step": 6232 }, { "epoch": 0.33941176070191204, "grad_norm": 0.6315796994855236, "learning_rate": 0.0001899372253630808, "loss": 12.5219, "step": 6233 }, { "epoch": 0.33946621469849503, "grad_norm": 0.7867514236741496, "learning_rate": 0.00018993336981505527, "loss": 12.5488, "step": 6234 }, { "epoch": 0.339520668695078, "grad_norm": 0.6825545070067882, "learning_rate": 0.00018992951356769274, "loss": 12.6453, "step": 6235 }, { "epoch": 0.33957512269166107, "grad_norm": 0.5581581779692462, "learning_rate": 0.00018992565662102323, "loss": 12.4608, "step": 6236 }, { "epoch": 0.33962957668824406, "grad_norm": 0.6695781279945339, "learning_rate": 0.00018992179897507679, "loss": 12.5149, "step": 6237 }, { "epoch": 0.33968403068482705, "grad_norm": 0.7055852494170761, "learning_rate": 0.00018991794062988331, "loss": 12.5044, "step": 6238 }, { "epoch": 0.3397384846814101, "grad_norm": 0.7226116530536991, "learning_rate": 0.00018991408158547285, "loss": 12.4421, "step": 6239 }, { "epoch": 0.3397929386779931, "grad_norm": 0.7363087436848126, "learning_rate": 0.0001899102218418754, "loss": 12.499, "step": 6240 }, { "epoch": 0.3398473926745761, "grad_norm": 0.6300687166747003, "learning_rate": 0.00018990636139912102, "loss": 12.5233, "step": 6241 }, { "epoch": 0.33990184667115914, "grad_norm": 0.793004713938736, "learning_rate": 0.00018990250025723967, "loss": 12.5717, "step": 6242 }, { "epoch": 0.33995630066774213, "grad_norm": 0.6699161458232288, "learning_rate": 0.0001898986384162614, "loss": 12.5451, "step": 6243 }, { "epoch": 0.3400107546643251, "grad_norm": 0.7459639388831515, "learning_rate": 0.00018989477587621627, "loss": 12.5257, "step": 6244 }, { "epoch": 0.34006520866090817, "grad_norm": 0.7132170474264726, "learning_rate": 0.00018989091263713428, "loss": 12.5845, "step": 6245 }, { "epoch": 0.34011966265749116, "grad_norm": 0.6045019422774155, "learning_rate": 0.00018988704869904547, "loss": 12.3912, "step": 6246 }, { "epoch": 0.34017411665407415, "grad_norm": 0.666948712286019, "learning_rate": 0.0001898831840619799, "loss": 12.3408, "step": 6247 }, { "epoch": 0.3402285706506572, "grad_norm": 0.7105971968384324, "learning_rate": 0.0001898793187259676, "loss": 12.5605, "step": 6248 }, { "epoch": 0.3402830246472402, "grad_norm": 0.628138207855354, "learning_rate": 0.00018987545269103865, "loss": 12.46, "step": 6249 }, { "epoch": 0.3403374786438232, "grad_norm": 0.7078715466744697, "learning_rate": 0.00018987158595722313, "loss": 12.5245, "step": 6250 }, { "epoch": 0.34039193264040624, "grad_norm": 0.659737373649185, "learning_rate": 0.00018986771852455109, "loss": 12.604, "step": 6251 }, { "epoch": 0.34044638663698923, "grad_norm": 0.7146806557781517, "learning_rate": 0.00018986385039305255, "loss": 12.5897, "step": 6252 }, { "epoch": 0.3405008406335723, "grad_norm": 0.6435362637324684, "learning_rate": 0.00018985998156275765, "loss": 12.5073, "step": 6253 }, { "epoch": 0.34055529463015527, "grad_norm": 0.6518305515914652, "learning_rate": 0.00018985611203369652, "loss": 12.5177, "step": 6254 }, { "epoch": 0.34060974862673826, "grad_norm": 0.7293022805036647, "learning_rate": 0.00018985224180589913, "loss": 12.3901, "step": 6255 }, { "epoch": 0.3406642026233213, "grad_norm": 0.7226616833868063, "learning_rate": 0.00018984837087939567, "loss": 12.5533, "step": 6256 }, { "epoch": 0.3407186566199043, "grad_norm": 0.6316932747556198, "learning_rate": 0.00018984449925421622, "loss": 12.2969, "step": 6257 }, { "epoch": 0.3407731106164873, "grad_norm": 0.650964042997637, "learning_rate": 0.00018984062693039086, "loss": 12.545, "step": 6258 }, { "epoch": 0.34082756461307034, "grad_norm": 0.6313622298812942, "learning_rate": 0.00018983675390794972, "loss": 12.5437, "step": 6259 }, { "epoch": 0.34088201860965334, "grad_norm": 0.6856129479064164, "learning_rate": 0.0001898328801869229, "loss": 12.5644, "step": 6260 }, { "epoch": 0.34093647260623633, "grad_norm": 0.6754733609937804, "learning_rate": 0.0001898290057673406, "loss": 12.5439, "step": 6261 }, { "epoch": 0.3409909266028194, "grad_norm": 0.6175904268818588, "learning_rate": 0.00018982513064923283, "loss": 12.4148, "step": 6262 }, { "epoch": 0.34104538059940237, "grad_norm": 0.7029133910496881, "learning_rate": 0.00018982125483262978, "loss": 12.6239, "step": 6263 }, { "epoch": 0.34109983459598536, "grad_norm": 0.6764524896672984, "learning_rate": 0.0001898173783175616, "loss": 12.4716, "step": 6264 }, { "epoch": 0.3411542885925684, "grad_norm": 0.6473075046714889, "learning_rate": 0.00018981350110405844, "loss": 12.3433, "step": 6265 }, { "epoch": 0.3412087425891514, "grad_norm": 0.7340041196610504, "learning_rate": 0.0001898096231921504, "loss": 12.6536, "step": 6266 }, { "epoch": 0.3412631965857344, "grad_norm": 0.6704673154087119, "learning_rate": 0.00018980574458186774, "loss": 12.5468, "step": 6267 }, { "epoch": 0.34131765058231744, "grad_norm": 0.6352924231527934, "learning_rate": 0.00018980186527324048, "loss": 12.5107, "step": 6268 }, { "epoch": 0.34137210457890044, "grad_norm": 0.6933220771845349, "learning_rate": 0.00018979798526629887, "loss": 12.4702, "step": 6269 }, { "epoch": 0.34142655857548343, "grad_norm": 0.6339519467155723, "learning_rate": 0.00018979410456107306, "loss": 12.3719, "step": 6270 }, { "epoch": 0.3414810125720665, "grad_norm": 0.7721477942860392, "learning_rate": 0.00018979022315759325, "loss": 12.6534, "step": 6271 }, { "epoch": 0.34153546656864947, "grad_norm": 0.6153794386548143, "learning_rate": 0.00018978634105588961, "loss": 12.574, "step": 6272 }, { "epoch": 0.34158992056523246, "grad_norm": 0.6769230590319805, "learning_rate": 0.00018978245825599234, "loss": 12.4373, "step": 6273 }, { "epoch": 0.3416443745618155, "grad_norm": 0.8119995792072666, "learning_rate": 0.00018977857475793158, "loss": 12.5873, "step": 6274 }, { "epoch": 0.3416988285583985, "grad_norm": 0.6749571198734036, "learning_rate": 0.0001897746905617376, "loss": 12.6444, "step": 6275 }, { "epoch": 0.3417532825549815, "grad_norm": 0.7317901747313105, "learning_rate": 0.00018977080566744055, "loss": 12.469, "step": 6276 }, { "epoch": 0.34180773655156454, "grad_norm": 0.635670753116196, "learning_rate": 0.00018976692007507067, "loss": 12.3512, "step": 6277 }, { "epoch": 0.34186219054814754, "grad_norm": 0.7243933147841601, "learning_rate": 0.00018976303378465814, "loss": 12.5583, "step": 6278 }, { "epoch": 0.34191664454473053, "grad_norm": 0.6084842547409487, "learning_rate": 0.00018975914679623325, "loss": 12.538, "step": 6279 }, { "epoch": 0.3419710985413136, "grad_norm": 0.7593564909409162, "learning_rate": 0.00018975525910982615, "loss": 12.6837, "step": 6280 }, { "epoch": 0.34202555253789657, "grad_norm": 0.636182795196838, "learning_rate": 0.0001897513707254671, "loss": 12.5124, "step": 6281 }, { "epoch": 0.34208000653447956, "grad_norm": 0.8155321522237161, "learning_rate": 0.00018974748164318636, "loss": 12.5131, "step": 6282 }, { "epoch": 0.3421344605310626, "grad_norm": 0.6426416580788399, "learning_rate": 0.00018974359186301417, "loss": 12.5234, "step": 6283 }, { "epoch": 0.3421889145276456, "grad_norm": 0.8193055100539444, "learning_rate": 0.00018973970138498071, "loss": 12.5375, "step": 6284 }, { "epoch": 0.34224336852422865, "grad_norm": 0.7185734747917539, "learning_rate": 0.00018973581020911634, "loss": 12.4833, "step": 6285 }, { "epoch": 0.34229782252081165, "grad_norm": 0.6288019118690862, "learning_rate": 0.0001897319183354512, "loss": 12.6266, "step": 6286 }, { "epoch": 0.34235227651739464, "grad_norm": 0.7132135055944064, "learning_rate": 0.00018972802576401566, "loss": 12.6449, "step": 6287 }, { "epoch": 0.3424067305139777, "grad_norm": 0.5785610189135727, "learning_rate": 0.00018972413249483992, "loss": 12.4041, "step": 6288 }, { "epoch": 0.3424611845105607, "grad_norm": 0.703198984063344, "learning_rate": 0.00018972023852795427, "loss": 12.5444, "step": 6289 }, { "epoch": 0.34251563850714367, "grad_norm": 0.668041192115363, "learning_rate": 0.000189716343863389, "loss": 12.5276, "step": 6290 }, { "epoch": 0.3425700925037267, "grad_norm": 0.7179533760536456, "learning_rate": 0.00018971244850117443, "loss": 12.4907, "step": 6291 }, { "epoch": 0.3426245465003097, "grad_norm": 0.6547689597350955, "learning_rate": 0.0001897085524413408, "loss": 12.5585, "step": 6292 }, { "epoch": 0.3426790004968927, "grad_norm": 0.6892810246093969, "learning_rate": 0.0001897046556839184, "loss": 12.6043, "step": 6293 }, { "epoch": 0.34273345449347575, "grad_norm": 0.6293810182504478, "learning_rate": 0.0001897007582289376, "loss": 12.5168, "step": 6294 }, { "epoch": 0.34278790849005875, "grad_norm": 0.6465223613402978, "learning_rate": 0.0001896968600764286, "loss": 12.5322, "step": 6295 }, { "epoch": 0.34284236248664174, "grad_norm": 0.6450240234888404, "learning_rate": 0.00018969296122642185, "loss": 12.5049, "step": 6296 }, { "epoch": 0.3428968164832248, "grad_norm": 0.700539836061821, "learning_rate": 0.00018968906167894753, "loss": 12.5086, "step": 6297 }, { "epoch": 0.3429512704798078, "grad_norm": 0.6584639102315182, "learning_rate": 0.00018968516143403604, "loss": 12.4562, "step": 6298 }, { "epoch": 0.34300572447639077, "grad_norm": 0.6823128388229975, "learning_rate": 0.00018968126049171772, "loss": 12.5732, "step": 6299 }, { "epoch": 0.3430601784729738, "grad_norm": 0.6786200126620332, "learning_rate": 0.00018967735885202285, "loss": 12.353, "step": 6300 }, { "epoch": 0.3431146324695568, "grad_norm": 0.7928388839289368, "learning_rate": 0.0001896734565149818, "loss": 12.5546, "step": 6301 }, { "epoch": 0.3431690864661398, "grad_norm": 0.8269561615090272, "learning_rate": 0.00018966955348062494, "loss": 12.4672, "step": 6302 }, { "epoch": 0.34322354046272285, "grad_norm": 0.6666521103507326, "learning_rate": 0.00018966564974898256, "loss": 12.5375, "step": 6303 }, { "epoch": 0.34327799445930585, "grad_norm": 0.6701268520745489, "learning_rate": 0.00018966174532008507, "loss": 12.6131, "step": 6304 }, { "epoch": 0.34333244845588884, "grad_norm": 0.6766200647318046, "learning_rate": 0.0001896578401939628, "loss": 12.5149, "step": 6305 }, { "epoch": 0.3433869024524719, "grad_norm": 0.6599092895292733, "learning_rate": 0.00018965393437064614, "loss": 12.4065, "step": 6306 }, { "epoch": 0.3434413564490549, "grad_norm": 0.6659625059287979, "learning_rate": 0.00018965002785016543, "loss": 12.5598, "step": 6307 }, { "epoch": 0.3434958104456379, "grad_norm": 0.8131428911036774, "learning_rate": 0.0001896461206325511, "loss": 12.6066, "step": 6308 }, { "epoch": 0.3435502644422209, "grad_norm": 0.6293054241618713, "learning_rate": 0.00018964221271783349, "loss": 12.5573, "step": 6309 }, { "epoch": 0.3436047184388039, "grad_norm": 0.618067658836677, "learning_rate": 0.000189638304106043, "loss": 12.4552, "step": 6310 }, { "epoch": 0.3436591724353869, "grad_norm": 0.8179694432192177, "learning_rate": 0.00018963439479721003, "loss": 12.5569, "step": 6311 }, { "epoch": 0.34371362643196995, "grad_norm": 0.6206028828875811, "learning_rate": 0.00018963048479136497, "loss": 12.5951, "step": 6312 }, { "epoch": 0.34376808042855295, "grad_norm": 0.662690160695336, "learning_rate": 0.0001896265740885382, "loss": 12.555, "step": 6313 }, { "epoch": 0.34382253442513594, "grad_norm": 0.692073058932196, "learning_rate": 0.0001896226626887602, "loss": 12.4739, "step": 6314 }, { "epoch": 0.343876988421719, "grad_norm": 0.6636264210544816, "learning_rate": 0.00018961875059206136, "loss": 12.6116, "step": 6315 }, { "epoch": 0.343931442418302, "grad_norm": 0.7960881435996826, "learning_rate": 0.00018961483779847204, "loss": 12.5392, "step": 6316 }, { "epoch": 0.343985896414885, "grad_norm": 0.7024499992984288, "learning_rate": 0.00018961092430802275, "loss": 12.5088, "step": 6317 }, { "epoch": 0.344040350411468, "grad_norm": 0.5998701393444636, "learning_rate": 0.00018960701012074387, "loss": 12.4843, "step": 6318 }, { "epoch": 0.344094804408051, "grad_norm": 0.6386075283677938, "learning_rate": 0.00018960309523666585, "loss": 12.5588, "step": 6319 }, { "epoch": 0.34414925840463406, "grad_norm": 0.6187861240384069, "learning_rate": 0.00018959917965581912, "loss": 12.5536, "step": 6320 }, { "epoch": 0.34420371240121705, "grad_norm": 0.700612269892657, "learning_rate": 0.00018959526337823416, "loss": 12.5407, "step": 6321 }, { "epoch": 0.34425816639780005, "grad_norm": 0.713290950613392, "learning_rate": 0.00018959134640394141, "loss": 12.5776, "step": 6322 }, { "epoch": 0.3443126203943831, "grad_norm": 0.6599590469218795, "learning_rate": 0.0001895874287329713, "loss": 12.646, "step": 6323 }, { "epoch": 0.3443670743909661, "grad_norm": 0.7129764251208779, "learning_rate": 0.00018958351036535437, "loss": 12.5006, "step": 6324 }, { "epoch": 0.3444215283875491, "grad_norm": 0.6853621732756515, "learning_rate": 0.000189579591301121, "loss": 12.4555, "step": 6325 }, { "epoch": 0.34447598238413213, "grad_norm": 0.6806288970030699, "learning_rate": 0.00018957567154030173, "loss": 12.5047, "step": 6326 }, { "epoch": 0.3445304363807151, "grad_norm": 0.694590261163472, "learning_rate": 0.000189571751082927, "loss": 12.6542, "step": 6327 }, { "epoch": 0.3445848903772981, "grad_norm": 0.6753126132952569, "learning_rate": 0.0001895678299290273, "loss": 12.4212, "step": 6328 }, { "epoch": 0.34463934437388116, "grad_norm": 0.7192878280330849, "learning_rate": 0.00018956390807863316, "loss": 12.3633, "step": 6329 }, { "epoch": 0.34469379837046416, "grad_norm": 0.7326573316491729, "learning_rate": 0.00018955998553177504, "loss": 12.5626, "step": 6330 }, { "epoch": 0.34474825236704715, "grad_norm": 0.5918551403736377, "learning_rate": 0.00018955606228848347, "loss": 12.4205, "step": 6331 }, { "epoch": 0.3448027063636302, "grad_norm": 0.7891637583902474, "learning_rate": 0.00018955213834878892, "loss": 12.5029, "step": 6332 }, { "epoch": 0.3448571603602132, "grad_norm": 0.5980834101002779, "learning_rate": 0.00018954821371272194, "loss": 12.4273, "step": 6333 }, { "epoch": 0.3449116143567962, "grad_norm": 0.8125656724378687, "learning_rate": 0.000189544288380313, "loss": 12.5797, "step": 6334 }, { "epoch": 0.34496606835337923, "grad_norm": 0.7509595238380199, "learning_rate": 0.0001895403623515927, "loss": 12.5089, "step": 6335 }, { "epoch": 0.3450205223499622, "grad_norm": 0.7248800926147912, "learning_rate": 0.0001895364356265915, "loss": 12.5702, "step": 6336 }, { "epoch": 0.3450749763465452, "grad_norm": 0.8055583215818384, "learning_rate": 0.00018953250820533994, "loss": 12.5792, "step": 6337 }, { "epoch": 0.34512943034312826, "grad_norm": 0.5895339977865024, "learning_rate": 0.00018952858008786861, "loss": 12.5369, "step": 6338 }, { "epoch": 0.34518388433971126, "grad_norm": 0.7844342988424144, "learning_rate": 0.000189524651274208, "loss": 12.6507, "step": 6339 }, { "epoch": 0.34523833833629425, "grad_norm": 0.641138721002702, "learning_rate": 0.00018952072176438875, "loss": 12.473, "step": 6340 }, { "epoch": 0.3452927923328773, "grad_norm": 0.7272744757161392, "learning_rate": 0.0001895167915584413, "loss": 12.5863, "step": 6341 }, { "epoch": 0.3453472463294603, "grad_norm": 0.7097480150671507, "learning_rate": 0.0001895128606563963, "loss": 12.6004, "step": 6342 }, { "epoch": 0.3454017003260433, "grad_norm": 0.6246608664740043, "learning_rate": 0.0001895089290582843, "loss": 12.4141, "step": 6343 }, { "epoch": 0.34545615432262633, "grad_norm": 0.6930736444561254, "learning_rate": 0.0001895049967641358, "loss": 12.4728, "step": 6344 }, { "epoch": 0.3455106083192093, "grad_norm": 0.6358442922013844, "learning_rate": 0.00018950106377398147, "loss": 12.601, "step": 6345 }, { "epoch": 0.3455650623157923, "grad_norm": 0.6550122925149894, "learning_rate": 0.00018949713008785187, "loss": 12.6569, "step": 6346 }, { "epoch": 0.34561951631237536, "grad_norm": 0.6239208208894378, "learning_rate": 0.00018949319570577756, "loss": 12.5735, "step": 6347 }, { "epoch": 0.34567397030895836, "grad_norm": 0.6506837444783518, "learning_rate": 0.0001894892606277891, "loss": 12.5761, "step": 6348 }, { "epoch": 0.34572842430554135, "grad_norm": 0.6475058151362632, "learning_rate": 0.00018948532485391724, "loss": 12.516, "step": 6349 }, { "epoch": 0.3457828783021244, "grad_norm": 0.6793182543915407, "learning_rate": 0.00018948138838419243, "loss": 12.3702, "step": 6350 }, { "epoch": 0.3458373322987074, "grad_norm": 0.6628263555202152, "learning_rate": 0.00018947745121864534, "loss": 12.4945, "step": 6351 }, { "epoch": 0.34589178629529044, "grad_norm": 0.6251765624796839, "learning_rate": 0.0001894735133573066, "loss": 12.5348, "step": 6352 }, { "epoch": 0.34594624029187343, "grad_norm": 0.6143638650692151, "learning_rate": 0.0001894695748002068, "loss": 12.4181, "step": 6353 }, { "epoch": 0.3460006942884564, "grad_norm": 0.6529588496376133, "learning_rate": 0.0001894656355473766, "loss": 12.5347, "step": 6354 }, { "epoch": 0.34605514828503947, "grad_norm": 0.6120964415426231, "learning_rate": 0.0001894616955988466, "loss": 12.5119, "step": 6355 }, { "epoch": 0.34610960228162246, "grad_norm": 0.7211947140757892, "learning_rate": 0.00018945775495464746, "loss": 12.5204, "step": 6356 }, { "epoch": 0.34616405627820546, "grad_norm": 0.7094435979733846, "learning_rate": 0.0001894538136148098, "loss": 12.6202, "step": 6357 }, { "epoch": 0.3462185102747885, "grad_norm": 0.7143215980668297, "learning_rate": 0.00018944987157936433, "loss": 12.5306, "step": 6358 }, { "epoch": 0.3462729642713715, "grad_norm": 0.594273115652585, "learning_rate": 0.00018944592884834158, "loss": 12.5041, "step": 6359 }, { "epoch": 0.3463274182679545, "grad_norm": 0.6021587890983173, "learning_rate": 0.00018944198542177233, "loss": 12.4764, "step": 6360 }, { "epoch": 0.34638187226453754, "grad_norm": 0.6925011127854152, "learning_rate": 0.00018943804129968722, "loss": 12.4984, "step": 6361 }, { "epoch": 0.34643632626112053, "grad_norm": 0.6341869152621732, "learning_rate": 0.00018943409648211688, "loss": 12.5632, "step": 6362 }, { "epoch": 0.3464907802577035, "grad_norm": 0.6482645542487078, "learning_rate": 0.00018943015096909203, "loss": 12.539, "step": 6363 }, { "epoch": 0.34654523425428657, "grad_norm": 1.0140057634003605, "learning_rate": 0.0001894262047606433, "loss": 12.5651, "step": 6364 }, { "epoch": 0.34659968825086956, "grad_norm": 0.7749586692842871, "learning_rate": 0.0001894222578568014, "loss": 12.6487, "step": 6365 }, { "epoch": 0.34665414224745256, "grad_norm": 0.7511098701115632, "learning_rate": 0.00018941831025759705, "loss": 12.5013, "step": 6366 }, { "epoch": 0.3467085962440356, "grad_norm": 0.6900038024918674, "learning_rate": 0.00018941436196306092, "loss": 12.4541, "step": 6367 }, { "epoch": 0.3467630502406186, "grad_norm": 0.649129436938782, "learning_rate": 0.0001894104129732237, "loss": 12.5152, "step": 6368 }, { "epoch": 0.3468175042372016, "grad_norm": 0.6369552672624779, "learning_rate": 0.00018940646328811616, "loss": 12.4275, "step": 6369 }, { "epoch": 0.34687195823378464, "grad_norm": 0.6559936737005914, "learning_rate": 0.0001894025129077689, "loss": 12.5292, "step": 6370 }, { "epoch": 0.34692641223036763, "grad_norm": 0.6739824166369338, "learning_rate": 0.00018939856183221277, "loss": 12.5955, "step": 6371 }, { "epoch": 0.3469808662269506, "grad_norm": 0.6310087891162702, "learning_rate": 0.0001893946100614784, "loss": 12.5557, "step": 6372 }, { "epoch": 0.34703532022353367, "grad_norm": 0.7462373695583338, "learning_rate": 0.00018939065759559655, "loss": 12.498, "step": 6373 }, { "epoch": 0.34708977422011666, "grad_norm": 0.6208249439174864, "learning_rate": 0.00018938670443459797, "loss": 12.535, "step": 6374 }, { "epoch": 0.34714422821669966, "grad_norm": 0.6676441700139456, "learning_rate": 0.0001893827505785134, "loss": 12.5842, "step": 6375 }, { "epoch": 0.3471986822132827, "grad_norm": 0.6282155039035795, "learning_rate": 0.00018937879602737352, "loss": 12.5026, "step": 6376 }, { "epoch": 0.3472531362098657, "grad_norm": 0.7052680426249938, "learning_rate": 0.00018937484078120916, "loss": 12.4728, "step": 6377 }, { "epoch": 0.3473075902064487, "grad_norm": 0.6920639408081833, "learning_rate": 0.00018937088484005107, "loss": 12.5618, "step": 6378 }, { "epoch": 0.34736204420303174, "grad_norm": 0.6593306649432598, "learning_rate": 0.00018936692820392995, "loss": 12.5355, "step": 6379 }, { "epoch": 0.34741649819961473, "grad_norm": 0.6217853510305813, "learning_rate": 0.00018936297087287663, "loss": 12.4688, "step": 6380 }, { "epoch": 0.3474709521961977, "grad_norm": 0.7318749515701808, "learning_rate": 0.00018935901284692188, "loss": 12.475, "step": 6381 }, { "epoch": 0.3475254061927808, "grad_norm": 0.711444766056599, "learning_rate": 0.00018935505412609645, "loss": 12.6268, "step": 6382 }, { "epoch": 0.34757986018936377, "grad_norm": 0.7406652625637992, "learning_rate": 0.0001893510947104311, "loss": 12.5164, "step": 6383 }, { "epoch": 0.34763431418594676, "grad_norm": 0.7130865533038795, "learning_rate": 0.0001893471345999567, "loss": 12.5484, "step": 6384 }, { "epoch": 0.3476887681825298, "grad_norm": 0.6612167121536203, "learning_rate": 0.000189343173794704, "loss": 12.4729, "step": 6385 }, { "epoch": 0.3477432221791128, "grad_norm": 0.6065860871835124, "learning_rate": 0.00018933921229470375, "loss": 12.5035, "step": 6386 }, { "epoch": 0.34779767617569585, "grad_norm": 0.691402387102845, "learning_rate": 0.00018933525009998684, "loss": 12.5822, "step": 6387 }, { "epoch": 0.34785213017227884, "grad_norm": 0.7433675288628595, "learning_rate": 0.00018933128721058403, "loss": 12.6504, "step": 6388 }, { "epoch": 0.34790658416886183, "grad_norm": 0.6196010968741086, "learning_rate": 0.00018932732362652617, "loss": 12.4233, "step": 6389 }, { "epoch": 0.3479610381654449, "grad_norm": 0.668475042083779, "learning_rate": 0.00018932335934784407, "loss": 12.5057, "step": 6390 }, { "epoch": 0.3480154921620279, "grad_norm": 0.9062156252444816, "learning_rate": 0.0001893193943745685, "loss": 12.5867, "step": 6391 }, { "epoch": 0.34806994615861087, "grad_norm": 0.6297267785296019, "learning_rate": 0.0001893154287067304, "loss": 12.5889, "step": 6392 }, { "epoch": 0.3481244001551939, "grad_norm": 0.6707450566245595, "learning_rate": 0.00018931146234436047, "loss": 12.457, "step": 6393 }, { "epoch": 0.3481788541517769, "grad_norm": 0.7407808248953949, "learning_rate": 0.00018930749528748967, "loss": 12.5703, "step": 6394 }, { "epoch": 0.3482333081483599, "grad_norm": 0.679668366691635, "learning_rate": 0.0001893035275361488, "loss": 12.5665, "step": 6395 }, { "epoch": 0.34828776214494295, "grad_norm": 0.6529951348154104, "learning_rate": 0.0001892995590903688, "loss": 12.5542, "step": 6396 }, { "epoch": 0.34834221614152594, "grad_norm": 0.6383316905634563, "learning_rate": 0.00018929558995018036, "loss": 12.5076, "step": 6397 }, { "epoch": 0.34839667013810893, "grad_norm": 0.7291276863483566, "learning_rate": 0.00018929162011561447, "loss": 12.5493, "step": 6398 }, { "epoch": 0.348451124134692, "grad_norm": 0.6492707381007455, "learning_rate": 0.00018928764958670198, "loss": 12.4822, "step": 6399 }, { "epoch": 0.348505578131275, "grad_norm": 0.7079173277913959, "learning_rate": 0.00018928367836347373, "loss": 12.5809, "step": 6400 }, { "epoch": 0.34856003212785797, "grad_norm": 0.750570468667512, "learning_rate": 0.00018927970644596064, "loss": 12.481, "step": 6401 }, { "epoch": 0.348614486124441, "grad_norm": 0.6244912076523662, "learning_rate": 0.00018927573383419356, "loss": 12.4996, "step": 6402 }, { "epoch": 0.348668940121024, "grad_norm": 0.7211818342696633, "learning_rate": 0.0001892717605282034, "loss": 12.5767, "step": 6403 }, { "epoch": 0.348723394117607, "grad_norm": 0.6794779875901277, "learning_rate": 0.00018926778652802111, "loss": 12.458, "step": 6404 }, { "epoch": 0.34877784811419005, "grad_norm": 0.654629105489235, "learning_rate": 0.0001892638118336775, "loss": 12.5111, "step": 6405 }, { "epoch": 0.34883230211077304, "grad_norm": 0.6827810366255843, "learning_rate": 0.00018925983644520352, "loss": 12.563, "step": 6406 }, { "epoch": 0.34888675610735603, "grad_norm": 0.6691807604313643, "learning_rate": 0.0001892558603626301, "loss": 12.608, "step": 6407 }, { "epoch": 0.3489412101039391, "grad_norm": 0.6556237166333346, "learning_rate": 0.00018925188358598813, "loss": 12.499, "step": 6408 }, { "epoch": 0.3489956641005221, "grad_norm": 0.669163097908646, "learning_rate": 0.00018924790611530857, "loss": 12.563, "step": 6409 }, { "epoch": 0.34905011809710507, "grad_norm": 0.6070951405750545, "learning_rate": 0.00018924392795062226, "loss": 12.3274, "step": 6410 }, { "epoch": 0.3491045720936881, "grad_norm": 0.7249396675579225, "learning_rate": 0.00018923994909196025, "loss": 12.475, "step": 6411 }, { "epoch": 0.3491590260902711, "grad_norm": 0.7266796712993444, "learning_rate": 0.00018923596953935342, "loss": 12.6071, "step": 6412 }, { "epoch": 0.3492134800868541, "grad_norm": 0.6490803692654883, "learning_rate": 0.00018923198929283276, "loss": 12.5236, "step": 6413 }, { "epoch": 0.34926793408343715, "grad_norm": 0.741220300259277, "learning_rate": 0.00018922800835242915, "loss": 12.635, "step": 6414 }, { "epoch": 0.34932238808002014, "grad_norm": 0.7445484770609918, "learning_rate": 0.0001892240267181736, "loss": 12.6129, "step": 6415 }, { "epoch": 0.34937684207660313, "grad_norm": 0.6759528956057483, "learning_rate": 0.00018922004439009702, "loss": 12.4762, "step": 6416 }, { "epoch": 0.3494312960731862, "grad_norm": 0.6745366120055789, "learning_rate": 0.00018921606136823046, "loss": 12.6715, "step": 6417 }, { "epoch": 0.3494857500697692, "grad_norm": 0.8237305176788643, "learning_rate": 0.00018921207765260482, "loss": 12.5819, "step": 6418 }, { "epoch": 0.3495402040663522, "grad_norm": 0.7112539950806442, "learning_rate": 0.00018920809324325107, "loss": 12.5541, "step": 6419 }, { "epoch": 0.3495946580629352, "grad_norm": 0.6667071155633175, "learning_rate": 0.0001892041081402003, "loss": 12.5276, "step": 6420 }, { "epoch": 0.3496491120595182, "grad_norm": 0.6273084531789137, "learning_rate": 0.0001892001223434834, "loss": 12.5549, "step": 6421 }, { "epoch": 0.34970356605610126, "grad_norm": 0.6750038564921762, "learning_rate": 0.00018919613585313135, "loss": 12.4548, "step": 6422 }, { "epoch": 0.34975802005268425, "grad_norm": 0.6252492245083748, "learning_rate": 0.00018919214866917522, "loss": 12.4935, "step": 6423 }, { "epoch": 0.34981247404926724, "grad_norm": 0.6984788655048768, "learning_rate": 0.000189188160791646, "loss": 12.5693, "step": 6424 }, { "epoch": 0.3498669280458503, "grad_norm": 0.6671234434806845, "learning_rate": 0.00018918417222057467, "loss": 12.4412, "step": 6425 }, { "epoch": 0.3499213820424333, "grad_norm": 0.7258796397397406, "learning_rate": 0.00018918018295599224, "loss": 12.489, "step": 6426 }, { "epoch": 0.3499758360390163, "grad_norm": 0.6848281360826799, "learning_rate": 0.00018917619299792978, "loss": 12.555, "step": 6427 }, { "epoch": 0.3500302900355993, "grad_norm": 0.6007537268845069, "learning_rate": 0.00018917220234641828, "loss": 12.4034, "step": 6428 }, { "epoch": 0.3500847440321823, "grad_norm": 0.6459033432263563, "learning_rate": 0.00018916821100148877, "loss": 12.5938, "step": 6429 }, { "epoch": 0.3501391980287653, "grad_norm": 0.6104972833604558, "learning_rate": 0.00018916421896317232, "loss": 12.5256, "step": 6430 }, { "epoch": 0.35019365202534836, "grad_norm": 0.724356568467194, "learning_rate": 0.00018916022623149994, "loss": 12.5068, "step": 6431 }, { "epoch": 0.35024810602193135, "grad_norm": 0.6684551809083941, "learning_rate": 0.00018915623280650268, "loss": 12.4023, "step": 6432 }, { "epoch": 0.35030256001851434, "grad_norm": 0.6606085311908325, "learning_rate": 0.00018915223868821158, "loss": 12.3153, "step": 6433 }, { "epoch": 0.3503570140150974, "grad_norm": 0.6925034443062139, "learning_rate": 0.00018914824387665776, "loss": 12.5406, "step": 6434 }, { "epoch": 0.3504114680116804, "grad_norm": 0.658729625903655, "learning_rate": 0.00018914424837187225, "loss": 12.4132, "step": 6435 }, { "epoch": 0.3504659220082634, "grad_norm": 0.6684606670577931, "learning_rate": 0.0001891402521738861, "loss": 12.4707, "step": 6436 }, { "epoch": 0.3505203760048464, "grad_norm": 0.7201695038485614, "learning_rate": 0.0001891362552827304, "loss": 12.6224, "step": 6437 }, { "epoch": 0.3505748300014294, "grad_norm": 0.7326773653782573, "learning_rate": 0.00018913225769843624, "loss": 12.5076, "step": 6438 }, { "epoch": 0.3506292839980124, "grad_norm": 0.6495451475924698, "learning_rate": 0.00018912825942103467, "loss": 12.4381, "step": 6439 }, { "epoch": 0.35068373799459546, "grad_norm": 0.6452599673897227, "learning_rate": 0.00018912426045055683, "loss": 12.5916, "step": 6440 }, { "epoch": 0.35073819199117845, "grad_norm": 0.6765236346094069, "learning_rate": 0.0001891202607870338, "loss": 12.5231, "step": 6441 }, { "epoch": 0.35079264598776144, "grad_norm": 0.7173390388248864, "learning_rate": 0.00018911626043049666, "loss": 12.6167, "step": 6442 }, { "epoch": 0.3508470999843445, "grad_norm": 0.5881983259786162, "learning_rate": 0.0001891122593809765, "loss": 12.4536, "step": 6443 }, { "epoch": 0.3509015539809275, "grad_norm": 0.6353387106698005, "learning_rate": 0.00018910825763850456, "loss": 12.4374, "step": 6444 }, { "epoch": 0.3509560079775105, "grad_norm": 0.6819914447916594, "learning_rate": 0.0001891042552031118, "loss": 12.6692, "step": 6445 }, { "epoch": 0.3510104619740935, "grad_norm": 0.6744783603916592, "learning_rate": 0.00018910025207482942, "loss": 12.5202, "step": 6446 }, { "epoch": 0.3510649159706765, "grad_norm": 0.9038488748322342, "learning_rate": 0.00018909624825368853, "loss": 12.4483, "step": 6447 }, { "epoch": 0.3511193699672595, "grad_norm": 0.6540032197278525, "learning_rate": 0.00018909224373972027, "loss": 12.4902, "step": 6448 }, { "epoch": 0.35117382396384256, "grad_norm": 0.6431012659983741, "learning_rate": 0.0001890882385329558, "loss": 12.5164, "step": 6449 }, { "epoch": 0.35122827796042555, "grad_norm": 0.6713430875386474, "learning_rate": 0.00018908423263342626, "loss": 12.5303, "step": 6450 }, { "epoch": 0.35128273195700854, "grad_norm": 0.7401191797977572, "learning_rate": 0.00018908022604116276, "loss": 12.5052, "step": 6451 }, { "epoch": 0.3513371859535916, "grad_norm": 0.6505333356783289, "learning_rate": 0.00018907621875619647, "loss": 12.5267, "step": 6452 }, { "epoch": 0.3513916399501746, "grad_norm": 0.6812347254619805, "learning_rate": 0.00018907221077855862, "loss": 12.4048, "step": 6453 }, { "epoch": 0.35144609394675763, "grad_norm": 0.6833135113192058, "learning_rate": 0.00018906820210828028, "loss": 12.5132, "step": 6454 }, { "epoch": 0.3515005479433406, "grad_norm": 0.6871285856235322, "learning_rate": 0.00018906419274539266, "loss": 12.5483, "step": 6455 }, { "epoch": 0.3515550019399236, "grad_norm": 0.7299322555996667, "learning_rate": 0.00018906018268992694, "loss": 12.396, "step": 6456 }, { "epoch": 0.35160945593650667, "grad_norm": 0.6799131452832852, "learning_rate": 0.00018905617194191435, "loss": 12.4827, "step": 6457 }, { "epoch": 0.35166390993308966, "grad_norm": 0.6595334118817054, "learning_rate": 0.00018905216050138596, "loss": 12.5873, "step": 6458 }, { "epoch": 0.35171836392967265, "grad_norm": 0.7508304565503127, "learning_rate": 0.00018904814836837307, "loss": 12.5145, "step": 6459 }, { "epoch": 0.3517728179262557, "grad_norm": 0.6701814667017392, "learning_rate": 0.00018904413554290684, "loss": 12.5822, "step": 6460 }, { "epoch": 0.3518272719228387, "grad_norm": 0.6912066956582165, "learning_rate": 0.0001890401220250185, "loss": 12.6066, "step": 6461 }, { "epoch": 0.3518817259194217, "grad_norm": 0.6390939063604993, "learning_rate": 0.00018903610781473927, "loss": 12.4256, "step": 6462 }, { "epoch": 0.35193617991600473, "grad_norm": 0.7219330604492857, "learning_rate": 0.00018903209291210027, "loss": 12.6042, "step": 6463 }, { "epoch": 0.3519906339125877, "grad_norm": 0.7091152392780032, "learning_rate": 0.0001890280773171328, "loss": 12.7038, "step": 6464 }, { "epoch": 0.3520450879091707, "grad_norm": 0.8294803361278149, "learning_rate": 0.0001890240610298681, "loss": 12.5393, "step": 6465 }, { "epoch": 0.35209954190575377, "grad_norm": 0.7327900394265904, "learning_rate": 0.00018902004405033733, "loss": 12.6018, "step": 6466 }, { "epoch": 0.35215399590233676, "grad_norm": 0.7558412427812986, "learning_rate": 0.0001890160263785718, "loss": 12.3143, "step": 6467 }, { "epoch": 0.35220844989891975, "grad_norm": 0.6485480492908395, "learning_rate": 0.0001890120080146027, "loss": 12.4945, "step": 6468 }, { "epoch": 0.3522629038955028, "grad_norm": 0.7549308050018257, "learning_rate": 0.00018900798895846134, "loss": 12.4988, "step": 6469 }, { "epoch": 0.3523173578920858, "grad_norm": 0.7610598992375268, "learning_rate": 0.00018900396921017886, "loss": 12.5119, "step": 6470 }, { "epoch": 0.3523718118886688, "grad_norm": 0.6593766636270706, "learning_rate": 0.00018899994876978664, "loss": 12.5072, "step": 6471 }, { "epoch": 0.35242626588525183, "grad_norm": 0.5841234835806068, "learning_rate": 0.00018899592763731588, "loss": 12.5108, "step": 6472 }, { "epoch": 0.3524807198818348, "grad_norm": 0.6375815631140179, "learning_rate": 0.00018899190581279788, "loss": 12.5106, "step": 6473 }, { "epoch": 0.3525351738784178, "grad_norm": 0.7265728910133166, "learning_rate": 0.00018898788329626388, "loss": 12.61, "step": 6474 }, { "epoch": 0.35258962787500087, "grad_norm": 0.7442169125235223, "learning_rate": 0.00018898386008774515, "loss": 12.5473, "step": 6475 }, { "epoch": 0.35264408187158386, "grad_norm": 0.7688054198431855, "learning_rate": 0.00018897983618727305, "loss": 12.5165, "step": 6476 }, { "epoch": 0.35269853586816685, "grad_norm": 0.7814867856748376, "learning_rate": 0.00018897581159487879, "loss": 12.6789, "step": 6477 }, { "epoch": 0.3527529898647499, "grad_norm": 0.8538938343014641, "learning_rate": 0.00018897178631059372, "loss": 12.6351, "step": 6478 }, { "epoch": 0.3528074438613329, "grad_norm": 0.6388380559820188, "learning_rate": 0.00018896776033444908, "loss": 12.5478, "step": 6479 }, { "epoch": 0.3528618978579159, "grad_norm": 0.6054334899810893, "learning_rate": 0.00018896373366647623, "loss": 12.4419, "step": 6480 }, { "epoch": 0.35291635185449893, "grad_norm": 0.7531948587362995, "learning_rate": 0.00018895970630670653, "loss": 12.4213, "step": 6481 }, { "epoch": 0.3529708058510819, "grad_norm": 0.7431856849626534, "learning_rate": 0.00018895567825517117, "loss": 12.4706, "step": 6482 }, { "epoch": 0.3530252598476649, "grad_norm": 0.6282523964067674, "learning_rate": 0.00018895164951190154, "loss": 12.446, "step": 6483 }, { "epoch": 0.35307971384424797, "grad_norm": 0.812969482425293, "learning_rate": 0.00018894762007692898, "loss": 12.5923, "step": 6484 }, { "epoch": 0.35313416784083096, "grad_norm": 0.6418835602683038, "learning_rate": 0.00018894358995028481, "loss": 12.5747, "step": 6485 }, { "epoch": 0.353188621837414, "grad_norm": 0.7819665290679157, "learning_rate": 0.00018893955913200036, "loss": 12.5215, "step": 6486 }, { "epoch": 0.353243075833997, "grad_norm": 0.6917473065243283, "learning_rate": 0.000188935527622107, "loss": 12.4607, "step": 6487 }, { "epoch": 0.35329752983058, "grad_norm": 0.7319956830798008, "learning_rate": 0.00018893149542063603, "loss": 12.5191, "step": 6488 }, { "epoch": 0.35335198382716304, "grad_norm": 0.6774051839724597, "learning_rate": 0.00018892746252761888, "loss": 12.6299, "step": 6489 }, { "epoch": 0.35340643782374603, "grad_norm": 0.6421731835455767, "learning_rate": 0.00018892342894308683, "loss": 12.4119, "step": 6490 }, { "epoch": 0.353460891820329, "grad_norm": 0.6386697914025962, "learning_rate": 0.0001889193946670713, "loss": 12.4614, "step": 6491 }, { "epoch": 0.3535153458169121, "grad_norm": 0.694150605451831, "learning_rate": 0.00018891535969960368, "loss": 12.3783, "step": 6492 }, { "epoch": 0.35356979981349507, "grad_norm": 0.6789059976029038, "learning_rate": 0.0001889113240407153, "loss": 12.5107, "step": 6493 }, { "epoch": 0.35362425381007806, "grad_norm": 0.6227305036984996, "learning_rate": 0.0001889072876904375, "loss": 12.4732, "step": 6494 }, { "epoch": 0.3536787078066611, "grad_norm": 0.6914785944572073, "learning_rate": 0.00018890325064880177, "loss": 12.6268, "step": 6495 }, { "epoch": 0.3537331618032441, "grad_norm": 0.8133604433595188, "learning_rate": 0.00018889921291583944, "loss": 12.4998, "step": 6496 }, { "epoch": 0.3537876157998271, "grad_norm": 0.6797182621673394, "learning_rate": 0.00018889517449158192, "loss": 12.4768, "step": 6497 }, { "epoch": 0.35384206979641014, "grad_norm": 0.7804143495476945, "learning_rate": 0.0001888911353760606, "loss": 12.6219, "step": 6498 }, { "epoch": 0.35389652379299313, "grad_norm": 0.6579373319187583, "learning_rate": 0.00018888709556930694, "loss": 12.4734, "step": 6499 }, { "epoch": 0.3539509777895761, "grad_norm": 0.6642270385457227, "learning_rate": 0.00018888305507135228, "loss": 12.5715, "step": 6500 }, { "epoch": 0.3540054317861592, "grad_norm": 0.6342501988731931, "learning_rate": 0.0001888790138822281, "loss": 12.4374, "step": 6501 }, { "epoch": 0.35405988578274217, "grad_norm": 0.6461119173969245, "learning_rate": 0.0001888749720019658, "loss": 12.4934, "step": 6502 }, { "epoch": 0.35411433977932516, "grad_norm": 0.6868152071661218, "learning_rate": 0.0001888709294305968, "loss": 12.5943, "step": 6503 }, { "epoch": 0.3541687937759082, "grad_norm": 0.6077876589490357, "learning_rate": 0.00018886688616815258, "loss": 12.4525, "step": 6504 }, { "epoch": 0.3542232477724912, "grad_norm": 0.6724925347128431, "learning_rate": 0.00018886284221466455, "loss": 12.3494, "step": 6505 }, { "epoch": 0.3542777017690742, "grad_norm": 0.7110664925398358, "learning_rate": 0.00018885879757016413, "loss": 12.5749, "step": 6506 }, { "epoch": 0.35433215576565724, "grad_norm": 0.7291926327690469, "learning_rate": 0.00018885475223468282, "loss": 12.5879, "step": 6507 }, { "epoch": 0.35438660976224023, "grad_norm": 0.7734607293232016, "learning_rate": 0.00018885070620825202, "loss": 12.5403, "step": 6508 }, { "epoch": 0.3544410637588232, "grad_norm": 0.6355688010151955, "learning_rate": 0.00018884665949090327, "loss": 12.545, "step": 6509 }, { "epoch": 0.3544955177554063, "grad_norm": 0.6731065364021674, "learning_rate": 0.000188842612082668, "loss": 12.4158, "step": 6510 }, { "epoch": 0.35454997175198927, "grad_norm": 0.6785630727656202, "learning_rate": 0.00018883856398357765, "loss": 12.5455, "step": 6511 }, { "epoch": 0.35460442574857226, "grad_norm": 0.677040878512404, "learning_rate": 0.00018883451519366372, "loss": 12.5873, "step": 6512 }, { "epoch": 0.3546588797451553, "grad_norm": 0.65287559154684, "learning_rate": 0.00018883046571295772, "loss": 12.2615, "step": 6513 }, { "epoch": 0.3547133337417383, "grad_norm": 0.5831793815322747, "learning_rate": 0.00018882641554149112, "loss": 12.5245, "step": 6514 }, { "epoch": 0.3547677877383213, "grad_norm": 0.7114706506008506, "learning_rate": 0.00018882236467929542, "loss": 12.4625, "step": 6515 }, { "epoch": 0.35482224173490434, "grad_norm": 0.649963966810882, "learning_rate": 0.00018881831312640213, "loss": 12.5165, "step": 6516 }, { "epoch": 0.35487669573148733, "grad_norm": 0.6883907364877639, "learning_rate": 0.0001888142608828427, "loss": 12.6147, "step": 6517 }, { "epoch": 0.3549311497280703, "grad_norm": 0.6099915381134763, "learning_rate": 0.00018881020794864873, "loss": 12.4195, "step": 6518 }, { "epoch": 0.3549856037246534, "grad_norm": 0.6779392941887493, "learning_rate": 0.00018880615432385165, "loss": 12.5528, "step": 6519 }, { "epoch": 0.35504005772123637, "grad_norm": 0.6853390762420948, "learning_rate": 0.00018880210000848306, "loss": 12.4175, "step": 6520 }, { "epoch": 0.3550945117178194, "grad_norm": 0.7099141792622605, "learning_rate": 0.0001887980450025744, "loss": 12.576, "step": 6521 }, { "epoch": 0.3551489657144024, "grad_norm": 0.6741612375122882, "learning_rate": 0.0001887939893061573, "loss": 12.5621, "step": 6522 }, { "epoch": 0.3552034197109854, "grad_norm": 0.6367742915817244, "learning_rate": 0.00018878993291926324, "loss": 12.4885, "step": 6523 }, { "epoch": 0.35525787370756845, "grad_norm": 0.6563563804700083, "learning_rate": 0.00018878587584192374, "loss": 12.3315, "step": 6524 }, { "epoch": 0.35531232770415144, "grad_norm": 0.6755606542985129, "learning_rate": 0.00018878181807417042, "loss": 12.3851, "step": 6525 }, { "epoch": 0.35536678170073444, "grad_norm": 0.6810645317599997, "learning_rate": 0.00018877775961603476, "loss": 12.4542, "step": 6526 }, { "epoch": 0.3554212356973175, "grad_norm": 0.6584896277234938, "learning_rate": 0.00018877370046754838, "loss": 12.4904, "step": 6527 }, { "epoch": 0.3554756896939005, "grad_norm": 0.782531194635525, "learning_rate": 0.00018876964062874277, "loss": 12.6789, "step": 6528 }, { "epoch": 0.35553014369048347, "grad_norm": 0.7047130293938447, "learning_rate": 0.0001887655800996496, "loss": 12.4975, "step": 6529 }, { "epoch": 0.3555845976870665, "grad_norm": 0.6392556211825621, "learning_rate": 0.00018876151888030037, "loss": 12.4937, "step": 6530 }, { "epoch": 0.3556390516836495, "grad_norm": 0.6357688326994323, "learning_rate": 0.00018875745697072668, "loss": 12.498, "step": 6531 }, { "epoch": 0.3556935056802325, "grad_norm": 0.7523241658001276, "learning_rate": 0.00018875339437096012, "loss": 12.5092, "step": 6532 }, { "epoch": 0.35574795967681555, "grad_norm": 0.6611830202180823, "learning_rate": 0.0001887493310810323, "loss": 12.5987, "step": 6533 }, { "epoch": 0.35580241367339854, "grad_norm": 0.6434045027474856, "learning_rate": 0.0001887452671009748, "loss": 12.4229, "step": 6534 }, { "epoch": 0.35585686766998154, "grad_norm": 0.6348599415287733, "learning_rate": 0.0001887412024308192, "loss": 12.4465, "step": 6535 }, { "epoch": 0.3559113216665646, "grad_norm": 0.6012998013242754, "learning_rate": 0.00018873713707059716, "loss": 12.4638, "step": 6536 }, { "epoch": 0.3559657756631476, "grad_norm": 0.6225976072562621, "learning_rate": 0.00018873307102034023, "loss": 12.4929, "step": 6537 }, { "epoch": 0.35602022965973057, "grad_norm": 0.6537989187335137, "learning_rate": 0.00018872900428008004, "loss": 12.4924, "step": 6538 }, { "epoch": 0.3560746836563136, "grad_norm": 0.7310649052768982, "learning_rate": 0.00018872493684984827, "loss": 12.6, "step": 6539 }, { "epoch": 0.3561291376528966, "grad_norm": 0.619228080128155, "learning_rate": 0.00018872086872967652, "loss": 12.4596, "step": 6540 }, { "epoch": 0.3561835916494796, "grad_norm": 0.8602848445604844, "learning_rate": 0.0001887167999195964, "loss": 12.3719, "step": 6541 }, { "epoch": 0.35623804564606265, "grad_norm": 0.6542020442995943, "learning_rate": 0.00018871273041963954, "loss": 12.5089, "step": 6542 }, { "epoch": 0.35629249964264564, "grad_norm": 0.6450493139868647, "learning_rate": 0.00018870866022983765, "loss": 12.4721, "step": 6543 }, { "epoch": 0.35634695363922864, "grad_norm": 0.6627589095066768, "learning_rate": 0.00018870458935022234, "loss": 12.5715, "step": 6544 }, { "epoch": 0.3564014076358117, "grad_norm": 0.6679819323871122, "learning_rate": 0.00018870051778082525, "loss": 12.4713, "step": 6545 }, { "epoch": 0.3564558616323947, "grad_norm": 0.5891025212950688, "learning_rate": 0.00018869644552167803, "loss": 12.4168, "step": 6546 }, { "epoch": 0.35651031562897767, "grad_norm": 0.7443645315187565, "learning_rate": 0.00018869237257281243, "loss": 12.5215, "step": 6547 }, { "epoch": 0.3565647696255607, "grad_norm": 0.675935462558438, "learning_rate": 0.00018868829893426003, "loss": 12.3665, "step": 6548 }, { "epoch": 0.3566192236221437, "grad_norm": 0.6266163892118104, "learning_rate": 0.00018868422460605259, "loss": 12.4143, "step": 6549 }, { "epoch": 0.3566736776187267, "grad_norm": 0.7024481342728484, "learning_rate": 0.00018868014958822168, "loss": 12.4768, "step": 6550 }, { "epoch": 0.35672813161530975, "grad_norm": 0.7236654554637472, "learning_rate": 0.0001886760738807991, "loss": 12.5488, "step": 6551 }, { "epoch": 0.35678258561189274, "grad_norm": 0.6277720760414693, "learning_rate": 0.0001886719974838165, "loss": 12.5349, "step": 6552 }, { "epoch": 0.3568370396084758, "grad_norm": 0.5491554105623445, "learning_rate": 0.00018866792039730557, "loss": 12.4084, "step": 6553 }, { "epoch": 0.3568914936050588, "grad_norm": 0.6574591041317043, "learning_rate": 0.000188663842621298, "loss": 12.5417, "step": 6554 }, { "epoch": 0.3569459476016418, "grad_norm": 0.7825346624677734, "learning_rate": 0.00018865976415582557, "loss": 12.4618, "step": 6555 }, { "epoch": 0.3570004015982248, "grad_norm": 0.6397378451307003, "learning_rate": 0.0001886556850009199, "loss": 12.4972, "step": 6556 }, { "epoch": 0.3570548555948078, "grad_norm": 0.6964582007099602, "learning_rate": 0.00018865160515661278, "loss": 12.6336, "step": 6557 }, { "epoch": 0.3571093095913908, "grad_norm": 0.6838137992784039, "learning_rate": 0.0001886475246229359, "loss": 12.6239, "step": 6558 }, { "epoch": 0.35716376358797386, "grad_norm": 0.7702590269454724, "learning_rate": 0.000188643443399921, "loss": 12.2703, "step": 6559 }, { "epoch": 0.35721821758455685, "grad_norm": 0.6275611356284347, "learning_rate": 0.00018863936148759983, "loss": 12.487, "step": 6560 }, { "epoch": 0.35727267158113984, "grad_norm": 0.7105981531040471, "learning_rate": 0.0001886352788860041, "loss": 12.4876, "step": 6561 }, { "epoch": 0.3573271255777229, "grad_norm": 0.6785012603603872, "learning_rate": 0.0001886311955951656, "loss": 12.5144, "step": 6562 }, { "epoch": 0.3573815795743059, "grad_norm": 0.6480000289225968, "learning_rate": 0.00018862711161511607, "loss": 12.5371, "step": 6563 }, { "epoch": 0.3574360335708889, "grad_norm": 0.6836036858096097, "learning_rate": 0.0001886230269458872, "loss": 12.484, "step": 6564 }, { "epoch": 0.3574904875674719, "grad_norm": 0.6809995375722718, "learning_rate": 0.00018861894158751086, "loss": 12.5016, "step": 6565 }, { "epoch": 0.3575449415640549, "grad_norm": 0.6525177660409279, "learning_rate": 0.00018861485554001877, "loss": 12.5633, "step": 6566 }, { "epoch": 0.3575993955606379, "grad_norm": 0.6576641847219601, "learning_rate": 0.00018861076880344267, "loss": 12.4833, "step": 6567 }, { "epoch": 0.35765384955722096, "grad_norm": 0.5990481118703957, "learning_rate": 0.0001886066813778144, "loss": 12.6255, "step": 6568 }, { "epoch": 0.35770830355380395, "grad_norm": 0.805099690625791, "learning_rate": 0.0001886025932631657, "loss": 12.4749, "step": 6569 }, { "epoch": 0.35776275755038695, "grad_norm": 0.6835538764125585, "learning_rate": 0.0001885985044595284, "loss": 12.5702, "step": 6570 }, { "epoch": 0.35781721154697, "grad_norm": 0.6435072091519588, "learning_rate": 0.00018859441496693426, "loss": 12.7074, "step": 6571 }, { "epoch": 0.357871665543553, "grad_norm": 0.6384713238741512, "learning_rate": 0.0001885903247854151, "loss": 12.3787, "step": 6572 }, { "epoch": 0.357926119540136, "grad_norm": 0.7192638915760756, "learning_rate": 0.00018858623391500268, "loss": 12.5576, "step": 6573 }, { "epoch": 0.357980573536719, "grad_norm": 0.6350450338580725, "learning_rate": 0.0001885821423557289, "loss": 12.5939, "step": 6574 }, { "epoch": 0.358035027533302, "grad_norm": 0.7084619240659714, "learning_rate": 0.00018857805010762547, "loss": 12.5479, "step": 6575 }, { "epoch": 0.358089481529885, "grad_norm": 0.6182206352160496, "learning_rate": 0.0001885739571707243, "loss": 12.4144, "step": 6576 }, { "epoch": 0.35814393552646806, "grad_norm": 0.7050067760240177, "learning_rate": 0.0001885698635450572, "loss": 12.518, "step": 6577 }, { "epoch": 0.35819838952305105, "grad_norm": 0.6444317754366243, "learning_rate": 0.00018856576923065597, "loss": 12.5393, "step": 6578 }, { "epoch": 0.35825284351963405, "grad_norm": 0.6796109302689314, "learning_rate": 0.00018856167422755246, "loss": 12.3975, "step": 6579 }, { "epoch": 0.3583072975162171, "grad_norm": 0.6790355456078522, "learning_rate": 0.00018855757853577853, "loss": 12.478, "step": 6580 }, { "epoch": 0.3583617515128001, "grad_norm": 0.7346590134415578, "learning_rate": 0.000188553482155366, "loss": 12.5285, "step": 6581 }, { "epoch": 0.3584162055093831, "grad_norm": 0.6529363025771956, "learning_rate": 0.00018854938508634678, "loss": 12.5329, "step": 6582 }, { "epoch": 0.3584706595059661, "grad_norm": 0.6270728056967176, "learning_rate": 0.00018854528732875265, "loss": 12.4526, "step": 6583 }, { "epoch": 0.3585251135025491, "grad_norm": 0.6167445436474197, "learning_rate": 0.00018854118888261554, "loss": 12.5071, "step": 6584 }, { "epoch": 0.3585795674991321, "grad_norm": 0.6658190255876781, "learning_rate": 0.0001885370897479673, "loss": 12.611, "step": 6585 }, { "epoch": 0.35863402149571516, "grad_norm": 0.7280245914122708, "learning_rate": 0.0001885329899248398, "loss": 12.603, "step": 6586 }, { "epoch": 0.35868847549229815, "grad_norm": 0.6513125287893339, "learning_rate": 0.0001885288894132649, "loss": 12.4227, "step": 6587 }, { "epoch": 0.3587429294888812, "grad_norm": 0.6482208097248535, "learning_rate": 0.00018852478821327452, "loss": 12.4279, "step": 6588 }, { "epoch": 0.3587973834854642, "grad_norm": 0.7403836328311797, "learning_rate": 0.00018852068632490058, "loss": 12.5555, "step": 6589 }, { "epoch": 0.3588518374820472, "grad_norm": 0.6310367450242624, "learning_rate": 0.0001885165837481749, "loss": 12.4712, "step": 6590 }, { "epoch": 0.35890629147863023, "grad_norm": 0.7276121536456144, "learning_rate": 0.0001885124804831294, "loss": 12.5856, "step": 6591 }, { "epoch": 0.3589607454752132, "grad_norm": 0.6186713256945982, "learning_rate": 0.00018850837652979605, "loss": 12.2877, "step": 6592 }, { "epoch": 0.3590151994717962, "grad_norm": 0.6180211458265332, "learning_rate": 0.00018850427188820673, "loss": 12.553, "step": 6593 }, { "epoch": 0.35906965346837927, "grad_norm": 0.6810319920116736, "learning_rate": 0.0001885001665583933, "loss": 12.5653, "step": 6594 }, { "epoch": 0.35912410746496226, "grad_norm": 0.6478256244548984, "learning_rate": 0.00018849606054038777, "loss": 12.4771, "step": 6595 }, { "epoch": 0.35917856146154525, "grad_norm": 0.6203360210240151, "learning_rate": 0.00018849195383422202, "loss": 12.6188, "step": 6596 }, { "epoch": 0.3592330154581283, "grad_norm": 0.7394212994938031, "learning_rate": 0.000188487846439928, "loss": 12.4102, "step": 6597 }, { "epoch": 0.3592874694547113, "grad_norm": 0.6700090375629415, "learning_rate": 0.00018848373835753766, "loss": 12.3701, "step": 6598 }, { "epoch": 0.3593419234512943, "grad_norm": 0.6763191547744783, "learning_rate": 0.0001884796295870829, "loss": 12.5191, "step": 6599 }, { "epoch": 0.35939637744787734, "grad_norm": 0.7225861178544174, "learning_rate": 0.00018847552012859573, "loss": 12.5006, "step": 6600 }, { "epoch": 0.35945083144446033, "grad_norm": 0.6598424950049372, "learning_rate": 0.00018847140998210806, "loss": 12.4225, "step": 6601 }, { "epoch": 0.3595052854410433, "grad_norm": 0.8022917346730528, "learning_rate": 0.00018846729914765188, "loss": 12.4835, "step": 6602 }, { "epoch": 0.35955973943762637, "grad_norm": 0.7155075380930347, "learning_rate": 0.00018846318762525915, "loss": 12.6107, "step": 6603 }, { "epoch": 0.35961419343420936, "grad_norm": 0.5788593511681562, "learning_rate": 0.00018845907541496182, "loss": 12.345, "step": 6604 }, { "epoch": 0.35966864743079235, "grad_norm": 0.7692664908605247, "learning_rate": 0.00018845496251679192, "loss": 12.4208, "step": 6605 }, { "epoch": 0.3597231014273754, "grad_norm": 0.6398281023425616, "learning_rate": 0.00018845084893078136, "loss": 12.4587, "step": 6606 }, { "epoch": 0.3597775554239584, "grad_norm": 0.6285622503429014, "learning_rate": 0.00018844673465696218, "loss": 12.5134, "step": 6607 }, { "epoch": 0.3598320094205414, "grad_norm": 0.7085105210743461, "learning_rate": 0.00018844261969536637, "loss": 12.5605, "step": 6608 }, { "epoch": 0.35988646341712444, "grad_norm": 0.6771526198094195, "learning_rate": 0.00018843850404602587, "loss": 12.4348, "step": 6609 }, { "epoch": 0.35994091741370743, "grad_norm": 0.6476584687119452, "learning_rate": 0.0001884343877089728, "loss": 12.4236, "step": 6610 }, { "epoch": 0.3599953714102904, "grad_norm": 0.6365661190221357, "learning_rate": 0.00018843027068423903, "loss": 12.4869, "step": 6611 }, { "epoch": 0.36004982540687347, "grad_norm": 0.6116624437632676, "learning_rate": 0.0001884261529718567, "loss": 12.4376, "step": 6612 }, { "epoch": 0.36010427940345646, "grad_norm": 0.6161016949457048, "learning_rate": 0.00018842203457185777, "loss": 12.3886, "step": 6613 }, { "epoch": 0.36015873340003945, "grad_norm": 0.6629262293434118, "learning_rate": 0.00018841791548427427, "loss": 12.4196, "step": 6614 }, { "epoch": 0.3602131873966225, "grad_norm": 0.6301688987629763, "learning_rate": 0.00018841379570913821, "loss": 12.4263, "step": 6615 }, { "epoch": 0.3602676413932055, "grad_norm": 0.6423425681866812, "learning_rate": 0.00018840967524648165, "loss": 12.5683, "step": 6616 }, { "epoch": 0.3603220953897885, "grad_norm": 0.6722468104910277, "learning_rate": 0.00018840555409633665, "loss": 12.5797, "step": 6617 }, { "epoch": 0.36037654938637154, "grad_norm": 0.683282736727712, "learning_rate": 0.00018840143225873522, "loss": 12.4557, "step": 6618 }, { "epoch": 0.36043100338295453, "grad_norm": 0.5689102418426638, "learning_rate": 0.00018839730973370942, "loss": 12.4297, "step": 6619 }, { "epoch": 0.3604854573795376, "grad_norm": 0.6515763103187273, "learning_rate": 0.00018839318652129136, "loss": 12.6092, "step": 6620 }, { "epoch": 0.36053991137612057, "grad_norm": 0.6638057645135687, "learning_rate": 0.000188389062621513, "loss": 12.5255, "step": 6621 }, { "epoch": 0.36059436537270356, "grad_norm": 0.7578143155135398, "learning_rate": 0.0001883849380344065, "loss": 12.4897, "step": 6622 }, { "epoch": 0.3606488193692866, "grad_norm": 0.6929891599006907, "learning_rate": 0.00018838081276000387, "loss": 12.4673, "step": 6623 }, { "epoch": 0.3607032733658696, "grad_norm": 0.6098098382946556, "learning_rate": 0.00018837668679833725, "loss": 12.4919, "step": 6624 }, { "epoch": 0.3607577273624526, "grad_norm": 0.6995080045001343, "learning_rate": 0.0001883725601494387, "loss": 12.4121, "step": 6625 }, { "epoch": 0.36081218135903564, "grad_norm": 0.6393239609282849, "learning_rate": 0.00018836843281334024, "loss": 12.4995, "step": 6626 }, { "epoch": 0.36086663535561864, "grad_norm": 0.6179401434135583, "learning_rate": 0.0001883643047900741, "loss": 12.4329, "step": 6627 }, { "epoch": 0.36092108935220163, "grad_norm": 0.7364688329740939, "learning_rate": 0.00018836017607967227, "loss": 12.5791, "step": 6628 }, { "epoch": 0.3609755433487847, "grad_norm": 0.7445944340871038, "learning_rate": 0.0001883560466821669, "loss": 12.4741, "step": 6629 }, { "epoch": 0.36102999734536767, "grad_norm": 0.6936132071756218, "learning_rate": 0.00018835191659759008, "loss": 12.5278, "step": 6630 }, { "epoch": 0.36108445134195066, "grad_norm": 0.6341963179540083, "learning_rate": 0.00018834778582597396, "loss": 12.5872, "step": 6631 }, { "epoch": 0.3611389053385337, "grad_norm": 0.6296041806316938, "learning_rate": 0.00018834365436735064, "loss": 12.4316, "step": 6632 }, { "epoch": 0.3611933593351167, "grad_norm": 0.6374466468077414, "learning_rate": 0.00018833952222175224, "loss": 12.5258, "step": 6633 }, { "epoch": 0.3612478133316997, "grad_norm": 0.6999344709830713, "learning_rate": 0.0001883353893892109, "loss": 12.5829, "step": 6634 }, { "epoch": 0.36130226732828274, "grad_norm": 0.6904477138465861, "learning_rate": 0.00018833125586975878, "loss": 12.4849, "step": 6635 }, { "epoch": 0.36135672132486574, "grad_norm": 0.662277379149618, "learning_rate": 0.00018832712166342796, "loss": 12.5352, "step": 6636 }, { "epoch": 0.36141117532144873, "grad_norm": 0.821381962854756, "learning_rate": 0.00018832298677025068, "loss": 12.4751, "step": 6637 }, { "epoch": 0.3614656293180318, "grad_norm": 0.6317517204534076, "learning_rate": 0.000188318851190259, "loss": 12.461, "step": 6638 }, { "epoch": 0.36152008331461477, "grad_norm": 0.6556528541474681, "learning_rate": 0.00018831471492348513, "loss": 12.5782, "step": 6639 }, { "epoch": 0.36157453731119776, "grad_norm": 0.6910220177221039, "learning_rate": 0.00018831057796996124, "loss": 12.5669, "step": 6640 }, { "epoch": 0.3616289913077808, "grad_norm": 0.7937937585155153, "learning_rate": 0.0001883064403297195, "loss": 12.6712, "step": 6641 }, { "epoch": 0.3616834453043638, "grad_norm": 0.7329019212487958, "learning_rate": 0.00018830230200279205, "loss": 12.4129, "step": 6642 }, { "epoch": 0.3617378993009468, "grad_norm": 0.7721716116033265, "learning_rate": 0.0001882981629892111, "loss": 12.4551, "step": 6643 }, { "epoch": 0.36179235329752985, "grad_norm": 0.7340558824600818, "learning_rate": 0.00018829402328900883, "loss": 12.3573, "step": 6644 }, { "epoch": 0.36184680729411284, "grad_norm": 0.7346948738214002, "learning_rate": 0.0001882898829022174, "loss": 12.496, "step": 6645 }, { "epoch": 0.36190126129069583, "grad_norm": 0.8316671174602631, "learning_rate": 0.00018828574182886903, "loss": 12.5202, "step": 6646 }, { "epoch": 0.3619557152872789, "grad_norm": 0.7174804265086283, "learning_rate": 0.00018828160006899598, "loss": 12.4468, "step": 6647 }, { "epoch": 0.36201016928386187, "grad_norm": 0.7011147967368828, "learning_rate": 0.00018827745762263037, "loss": 12.5342, "step": 6648 }, { "epoch": 0.36206462328044486, "grad_norm": 0.683462054591662, "learning_rate": 0.00018827331448980443, "loss": 12.5567, "step": 6649 }, { "epoch": 0.3621190772770279, "grad_norm": 0.8090110863198164, "learning_rate": 0.00018826917067055044, "loss": 12.5956, "step": 6650 }, { "epoch": 0.3621735312736109, "grad_norm": 0.6607693188818475, "learning_rate": 0.00018826502616490053, "loss": 12.5962, "step": 6651 }, { "epoch": 0.3622279852701939, "grad_norm": 0.6767689806560603, "learning_rate": 0.000188260880972887, "loss": 12.5465, "step": 6652 }, { "epoch": 0.36228243926677695, "grad_norm": 0.6372209862019359, "learning_rate": 0.00018825673509454202, "loss": 12.2851, "step": 6653 }, { "epoch": 0.36233689326335994, "grad_norm": 0.635820161629741, "learning_rate": 0.0001882525885298979, "loss": 12.4901, "step": 6654 }, { "epoch": 0.362391347259943, "grad_norm": 0.7700231819144508, "learning_rate": 0.00018824844127898687, "loss": 12.6406, "step": 6655 }, { "epoch": 0.362445801256526, "grad_norm": 0.6610835813691188, "learning_rate": 0.00018824429334184112, "loss": 12.5198, "step": 6656 }, { "epoch": 0.36250025525310897, "grad_norm": 0.6923623425981027, "learning_rate": 0.000188240144718493, "loss": 12.5646, "step": 6657 }, { "epoch": 0.362554709249692, "grad_norm": 0.7295852877262856, "learning_rate": 0.00018823599540897465, "loss": 12.5918, "step": 6658 }, { "epoch": 0.362609163246275, "grad_norm": 0.5943802872107438, "learning_rate": 0.00018823184541331845, "loss": 12.3685, "step": 6659 }, { "epoch": 0.362663617242858, "grad_norm": 0.7152397393362633, "learning_rate": 0.0001882276947315566, "loss": 12.4319, "step": 6660 }, { "epoch": 0.36271807123944105, "grad_norm": 0.7250489702731249, "learning_rate": 0.0001882235433637214, "loss": 12.5264, "step": 6661 }, { "epoch": 0.36277252523602405, "grad_norm": 0.5924669756695498, "learning_rate": 0.00018821939130984517, "loss": 12.2873, "step": 6662 }, { "epoch": 0.36282697923260704, "grad_norm": 0.645723736104555, "learning_rate": 0.00018821523856996013, "loss": 12.5939, "step": 6663 }, { "epoch": 0.3628814332291901, "grad_norm": 0.5875140836827224, "learning_rate": 0.00018821108514409856, "loss": 12.474, "step": 6664 }, { "epoch": 0.3629358872257731, "grad_norm": 0.6503600619137254, "learning_rate": 0.00018820693103229288, "loss": 12.4141, "step": 6665 }, { "epoch": 0.36299034122235607, "grad_norm": 0.6845049787509762, "learning_rate": 0.00018820277623457526, "loss": 12.6379, "step": 6666 }, { "epoch": 0.3630447952189391, "grad_norm": 0.636216840869751, "learning_rate": 0.00018819862075097806, "loss": 12.3587, "step": 6667 }, { "epoch": 0.3630992492155221, "grad_norm": 0.6478000866250779, "learning_rate": 0.00018819446458153362, "loss": 12.4389, "step": 6668 }, { "epoch": 0.3631537032121051, "grad_norm": 0.6636993194800846, "learning_rate": 0.0001881903077262742, "loss": 12.4896, "step": 6669 }, { "epoch": 0.36320815720868815, "grad_norm": 0.6230684019836505, "learning_rate": 0.0001881861501852322, "loss": 12.6979, "step": 6670 }, { "epoch": 0.36326261120527115, "grad_norm": 0.6421208304249839, "learning_rate": 0.00018818199195843986, "loss": 12.4471, "step": 6671 }, { "epoch": 0.36331706520185414, "grad_norm": 0.6028078428535656, "learning_rate": 0.00018817783304592959, "loss": 12.4494, "step": 6672 }, { "epoch": 0.3633715191984372, "grad_norm": 0.6221622025387541, "learning_rate": 0.00018817367344773372, "loss": 12.4518, "step": 6673 }, { "epoch": 0.3634259731950202, "grad_norm": 0.6563182110144833, "learning_rate": 0.00018816951316388453, "loss": 12.6353, "step": 6674 }, { "epoch": 0.3634804271916032, "grad_norm": 0.6485631719952629, "learning_rate": 0.00018816535219441446, "loss": 12.4347, "step": 6675 }, { "epoch": 0.3635348811881862, "grad_norm": 0.6252855421360843, "learning_rate": 0.0001881611905393558, "loss": 12.4695, "step": 6676 }, { "epoch": 0.3635893351847692, "grad_norm": 0.6735701127245386, "learning_rate": 0.00018815702819874097, "loss": 12.5676, "step": 6677 }, { "epoch": 0.3636437891813522, "grad_norm": 0.7415341714654862, "learning_rate": 0.00018815286517260229, "loss": 12.764, "step": 6678 }, { "epoch": 0.36369824317793525, "grad_norm": 0.6322693357159745, "learning_rate": 0.00018814870146097214, "loss": 12.5009, "step": 6679 }, { "epoch": 0.36375269717451825, "grad_norm": 0.6458801056972988, "learning_rate": 0.00018814453706388287, "loss": 12.5047, "step": 6680 }, { "epoch": 0.36380715117110124, "grad_norm": 0.7096837355005496, "learning_rate": 0.00018814037198136698, "loss": 12.4631, "step": 6681 }, { "epoch": 0.3638616051676843, "grad_norm": 0.6345182119218137, "learning_rate": 0.0001881362062134567, "loss": 12.4933, "step": 6682 }, { "epoch": 0.3639160591642673, "grad_norm": 0.6692866959051389, "learning_rate": 0.00018813203976018455, "loss": 12.4697, "step": 6683 }, { "epoch": 0.3639705131608503, "grad_norm": 0.6995810148133702, "learning_rate": 0.00018812787262158286, "loss": 12.5705, "step": 6684 }, { "epoch": 0.3640249671574333, "grad_norm": 0.7825305327360113, "learning_rate": 0.00018812370479768406, "loss": 12.6368, "step": 6685 }, { "epoch": 0.3640794211540163, "grad_norm": 0.6714265725827628, "learning_rate": 0.00018811953628852056, "loss": 12.5814, "step": 6686 }, { "epoch": 0.3641338751505993, "grad_norm": 0.8482129985990133, "learning_rate": 0.00018811536709412475, "loss": 12.4584, "step": 6687 }, { "epoch": 0.36418832914718235, "grad_norm": 0.6692639872542804, "learning_rate": 0.00018811119721452908, "loss": 12.4091, "step": 6688 }, { "epoch": 0.36424278314376535, "grad_norm": 0.844682059447251, "learning_rate": 0.00018810702664976594, "loss": 12.72, "step": 6689 }, { "epoch": 0.3642972371403484, "grad_norm": 0.7272075202619086, "learning_rate": 0.0001881028553998678, "loss": 12.4081, "step": 6690 }, { "epoch": 0.3643516911369314, "grad_norm": 0.7004977677490962, "learning_rate": 0.0001880986834648671, "loss": 12.4538, "step": 6691 }, { "epoch": 0.3644061451335144, "grad_norm": 0.8629727368672526, "learning_rate": 0.00018809451084479624, "loss": 12.6221, "step": 6692 }, { "epoch": 0.36446059913009743, "grad_norm": 0.7851367318362038, "learning_rate": 0.0001880903375396877, "loss": 12.5663, "step": 6693 }, { "epoch": 0.3645150531266804, "grad_norm": 0.6854981472693442, "learning_rate": 0.0001880861635495739, "loss": 12.4976, "step": 6694 }, { "epoch": 0.3645695071232634, "grad_norm": 0.7607531587291991, "learning_rate": 0.00018808198887448736, "loss": 12.5451, "step": 6695 }, { "epoch": 0.36462396111984646, "grad_norm": 0.6346208042477164, "learning_rate": 0.00018807781351446048, "loss": 12.526, "step": 6696 }, { "epoch": 0.36467841511642946, "grad_norm": 0.669565982871631, "learning_rate": 0.0001880736374695258, "loss": 12.4964, "step": 6697 }, { "epoch": 0.36473286911301245, "grad_norm": 0.7089403356676109, "learning_rate": 0.00018806946073971569, "loss": 12.4569, "step": 6698 }, { "epoch": 0.3647873231095955, "grad_norm": 0.8414280784882965, "learning_rate": 0.0001880652833250627, "loss": 12.5536, "step": 6699 }, { "epoch": 0.3648417771061785, "grad_norm": 0.6479374855180915, "learning_rate": 0.00018806110522559926, "loss": 12.4727, "step": 6700 }, { "epoch": 0.3648962311027615, "grad_norm": 0.7031712491465153, "learning_rate": 0.00018805692644135796, "loss": 12.5094, "step": 6701 }, { "epoch": 0.36495068509934453, "grad_norm": 0.6561201324217154, "learning_rate": 0.00018805274697237119, "loss": 12.6912, "step": 6702 }, { "epoch": 0.3650051390959275, "grad_norm": 0.8083614773954539, "learning_rate": 0.00018804856681867152, "loss": 12.5316, "step": 6703 }, { "epoch": 0.3650595930925105, "grad_norm": 0.67374064745605, "learning_rate": 0.0001880443859802914, "loss": 12.6457, "step": 6704 }, { "epoch": 0.36511404708909356, "grad_norm": 0.6532324213824884, "learning_rate": 0.00018804020445726337, "loss": 12.5391, "step": 6705 }, { "epoch": 0.36516850108567656, "grad_norm": 0.6757495628646862, "learning_rate": 0.00018803602224962, "loss": 12.5225, "step": 6706 }, { "epoch": 0.36522295508225955, "grad_norm": 0.6432686278770082, "learning_rate": 0.0001880318393573937, "loss": 12.453, "step": 6707 }, { "epoch": 0.3652774090788426, "grad_norm": 0.7883001660334636, "learning_rate": 0.00018802765578061705, "loss": 12.5551, "step": 6708 }, { "epoch": 0.3653318630754256, "grad_norm": 0.6461071475432286, "learning_rate": 0.00018802347151932264, "loss": 12.55, "step": 6709 }, { "epoch": 0.3653863170720086, "grad_norm": 0.6691570814181542, "learning_rate": 0.0001880192865735429, "loss": 12.6225, "step": 6710 }, { "epoch": 0.36544077106859163, "grad_norm": 0.6922546666923483, "learning_rate": 0.00018801510094331047, "loss": 12.4039, "step": 6711 }, { "epoch": 0.3654952250651746, "grad_norm": 0.5936430837492487, "learning_rate": 0.00018801091462865784, "loss": 12.4361, "step": 6712 }, { "epoch": 0.3655496790617576, "grad_norm": 0.6664258393725, "learning_rate": 0.00018800672762961758, "loss": 12.4437, "step": 6713 }, { "epoch": 0.36560413305834066, "grad_norm": 0.5887534964830626, "learning_rate": 0.00018800253994622222, "loss": 12.476, "step": 6714 }, { "epoch": 0.36565858705492366, "grad_norm": 0.6531177932509925, "learning_rate": 0.00018799835157850439, "loss": 12.6041, "step": 6715 }, { "epoch": 0.36571304105150665, "grad_norm": 0.7136114805228129, "learning_rate": 0.00018799416252649658, "loss": 12.5732, "step": 6716 }, { "epoch": 0.3657674950480897, "grad_norm": 0.6176999560468083, "learning_rate": 0.00018798997279023143, "loss": 12.4697, "step": 6717 }, { "epoch": 0.3658219490446727, "grad_norm": 0.641058439634723, "learning_rate": 0.00018798578236974153, "loss": 12.5075, "step": 6718 }, { "epoch": 0.3658764030412557, "grad_norm": 0.693524325390353, "learning_rate": 0.00018798159126505936, "loss": 12.614, "step": 6719 }, { "epoch": 0.36593085703783873, "grad_norm": 0.6367206031439175, "learning_rate": 0.00018797739947621763, "loss": 12.5712, "step": 6720 }, { "epoch": 0.3659853110344217, "grad_norm": 0.6231257744211336, "learning_rate": 0.00018797320700324885, "loss": 12.4514, "step": 6721 }, { "epoch": 0.36603976503100477, "grad_norm": 0.6479835224474679, "learning_rate": 0.0001879690138461857, "loss": 12.6704, "step": 6722 }, { "epoch": 0.36609421902758776, "grad_norm": 0.6621267367927367, "learning_rate": 0.00018796482000506072, "loss": 12.4577, "step": 6723 }, { "epoch": 0.36614867302417076, "grad_norm": 0.6044248186047356, "learning_rate": 0.00018796062547990657, "loss": 12.3559, "step": 6724 }, { "epoch": 0.3662031270207538, "grad_norm": 0.6139721548320676, "learning_rate": 0.00018795643027075585, "loss": 12.503, "step": 6725 }, { "epoch": 0.3662575810173368, "grad_norm": 0.652693152929206, "learning_rate": 0.00018795223437764115, "loss": 12.4726, "step": 6726 }, { "epoch": 0.3663120350139198, "grad_norm": 0.5914756538338115, "learning_rate": 0.0001879480378005951, "loss": 12.3033, "step": 6727 }, { "epoch": 0.36636648901050284, "grad_norm": 0.7254217447618527, "learning_rate": 0.00018794384053965043, "loss": 12.5628, "step": 6728 }, { "epoch": 0.36642094300708583, "grad_norm": 0.5891298822369286, "learning_rate": 0.00018793964259483968, "loss": 12.4264, "step": 6729 }, { "epoch": 0.3664753970036688, "grad_norm": 0.6553598414916912, "learning_rate": 0.0001879354439661955, "loss": 12.5271, "step": 6730 }, { "epoch": 0.36652985100025187, "grad_norm": 0.6145565110030625, "learning_rate": 0.0001879312446537506, "loss": 12.3724, "step": 6731 }, { "epoch": 0.36658430499683486, "grad_norm": 0.664373293286463, "learning_rate": 0.00018792704465753755, "loss": 12.6268, "step": 6732 }, { "epoch": 0.36663875899341786, "grad_norm": 0.6838236991001653, "learning_rate": 0.00018792284397758908, "loss": 12.6584, "step": 6733 }, { "epoch": 0.3666932129900009, "grad_norm": 0.6663236308420681, "learning_rate": 0.00018791864261393784, "loss": 12.4839, "step": 6734 }, { "epoch": 0.3667476669865839, "grad_norm": 0.6418049564560676, "learning_rate": 0.00018791444056661646, "loss": 12.5051, "step": 6735 }, { "epoch": 0.3668021209831669, "grad_norm": 0.5569929085876848, "learning_rate": 0.0001879102378356577, "loss": 12.5471, "step": 6736 }, { "epoch": 0.36685657497974994, "grad_norm": 0.67123615672468, "learning_rate": 0.00018790603442109412, "loss": 12.4874, "step": 6737 }, { "epoch": 0.36691102897633293, "grad_norm": 0.6303929900678815, "learning_rate": 0.0001879018303229585, "loss": 12.415, "step": 6738 }, { "epoch": 0.3669654829729159, "grad_norm": 0.7224967526388294, "learning_rate": 0.0001878976255412835, "loss": 12.4743, "step": 6739 }, { "epoch": 0.36701993696949897, "grad_norm": 0.6074412775014366, "learning_rate": 0.00018789342007610187, "loss": 12.4987, "step": 6740 }, { "epoch": 0.36707439096608196, "grad_norm": 0.6347238589759355, "learning_rate": 0.00018788921392744624, "loss": 12.517, "step": 6741 }, { "epoch": 0.36712884496266496, "grad_norm": 0.6664557545586834, "learning_rate": 0.00018788500709534934, "loss": 12.55, "step": 6742 }, { "epoch": 0.367183298959248, "grad_norm": 0.6477748468933896, "learning_rate": 0.00018788079957984385, "loss": 12.4114, "step": 6743 }, { "epoch": 0.367237752955831, "grad_norm": 0.6270361143383161, "learning_rate": 0.00018787659138096258, "loss": 12.3699, "step": 6744 }, { "epoch": 0.367292206952414, "grad_norm": 0.6315587680247866, "learning_rate": 0.0001878723824987382, "loss": 12.5573, "step": 6745 }, { "epoch": 0.36734666094899704, "grad_norm": 0.6682517723733418, "learning_rate": 0.00018786817293320337, "loss": 12.4946, "step": 6746 }, { "epoch": 0.36740111494558003, "grad_norm": 0.5731857714905204, "learning_rate": 0.00018786396268439094, "loss": 12.5497, "step": 6747 }, { "epoch": 0.367455568942163, "grad_norm": 0.7154608334837631, "learning_rate": 0.00018785975175233363, "loss": 12.4118, "step": 6748 }, { "epoch": 0.3675100229387461, "grad_norm": 0.6314309590611179, "learning_rate": 0.00018785554013706413, "loss": 12.5279, "step": 6749 }, { "epoch": 0.36756447693532907, "grad_norm": 0.7327550590135451, "learning_rate": 0.0001878513278386152, "loss": 12.644, "step": 6750 }, { "epoch": 0.36761893093191206, "grad_norm": 0.6563415578448006, "learning_rate": 0.0001878471148570196, "loss": 12.4376, "step": 6751 }, { "epoch": 0.3676733849284951, "grad_norm": 0.7432858762702095, "learning_rate": 0.00018784290119231014, "loss": 12.5806, "step": 6752 }, { "epoch": 0.3677278389250781, "grad_norm": 0.6496962666478835, "learning_rate": 0.00018783868684451953, "loss": 12.4287, "step": 6753 }, { "epoch": 0.3677822929216611, "grad_norm": 0.5991457432589863, "learning_rate": 0.00018783447181368058, "loss": 12.4827, "step": 6754 }, { "epoch": 0.36783674691824414, "grad_norm": 0.8446082627576676, "learning_rate": 0.00018783025609982602, "loss": 12.4579, "step": 6755 }, { "epoch": 0.36789120091482713, "grad_norm": 0.6363909276299062, "learning_rate": 0.00018782603970298869, "loss": 12.4383, "step": 6756 }, { "epoch": 0.3679456549114102, "grad_norm": 0.6666559724643025, "learning_rate": 0.00018782182262320132, "loss": 12.6553, "step": 6757 }, { "epoch": 0.3680001089079932, "grad_norm": 0.6768404315222415, "learning_rate": 0.00018781760486049674, "loss": 12.5123, "step": 6758 }, { "epoch": 0.36805456290457617, "grad_norm": 0.6747314018648729, "learning_rate": 0.00018781338641490772, "loss": 12.5436, "step": 6759 }, { "epoch": 0.3681090169011592, "grad_norm": 0.7112379071050334, "learning_rate": 0.0001878091672864671, "loss": 12.5604, "step": 6760 }, { "epoch": 0.3681634708977422, "grad_norm": 0.7209862155449003, "learning_rate": 0.00018780494747520766, "loss": 12.4318, "step": 6761 }, { "epoch": 0.3682179248943252, "grad_norm": 0.6681822817895532, "learning_rate": 0.00018780072698116224, "loss": 12.5159, "step": 6762 }, { "epoch": 0.36827237889090825, "grad_norm": 0.6863679955143484, "learning_rate": 0.00018779650580436362, "loss": 12.5072, "step": 6763 }, { "epoch": 0.36832683288749124, "grad_norm": 0.6044769521952945, "learning_rate": 0.00018779228394484468, "loss": 12.4695, "step": 6764 }, { "epoch": 0.36838128688407423, "grad_norm": 0.6071552219295997, "learning_rate": 0.0001877880614026382, "loss": 12.4761, "step": 6765 }, { "epoch": 0.3684357408806573, "grad_norm": 0.7369894755682067, "learning_rate": 0.00018778383817777704, "loss": 12.614, "step": 6766 }, { "epoch": 0.3684901948772403, "grad_norm": 0.6885261413499081, "learning_rate": 0.000187779614270294, "loss": 12.6022, "step": 6767 }, { "epoch": 0.36854464887382327, "grad_norm": 0.6742656846843884, "learning_rate": 0.00018777538968022198, "loss": 12.3581, "step": 6768 }, { "epoch": 0.3685991028704063, "grad_norm": 0.7097374898586997, "learning_rate": 0.0001877711644075938, "loss": 12.4439, "step": 6769 }, { "epoch": 0.3686535568669893, "grad_norm": 0.7041533992553495, "learning_rate": 0.00018776693845244235, "loss": 12.5134, "step": 6770 }, { "epoch": 0.3687080108635723, "grad_norm": 0.6803788854145529, "learning_rate": 0.00018776271181480047, "loss": 12.5001, "step": 6771 }, { "epoch": 0.36876246486015535, "grad_norm": 0.6677003831250875, "learning_rate": 0.000187758484494701, "loss": 12.4216, "step": 6772 }, { "epoch": 0.36881691885673834, "grad_norm": 1.0267058549178372, "learning_rate": 0.00018775425649217685, "loss": 12.4736, "step": 6773 }, { "epoch": 0.36887137285332133, "grad_norm": 0.7316182162253633, "learning_rate": 0.0001877500278072609, "loss": 12.399, "step": 6774 }, { "epoch": 0.3689258268499044, "grad_norm": 0.6166976637914323, "learning_rate": 0.000187745798439986, "loss": 12.5217, "step": 6775 }, { "epoch": 0.3689802808464874, "grad_norm": 0.677681470141228, "learning_rate": 0.00018774156839038506, "loss": 12.3609, "step": 6776 }, { "epoch": 0.36903473484307037, "grad_norm": 0.6906842272517986, "learning_rate": 0.00018773733765849095, "loss": 12.4438, "step": 6777 }, { "epoch": 0.3690891888396534, "grad_norm": 0.636827348006488, "learning_rate": 0.0001877331062443366, "loss": 12.6218, "step": 6778 }, { "epoch": 0.3691436428362364, "grad_norm": 0.6668917508706977, "learning_rate": 0.00018772887414795494, "loss": 12.4354, "step": 6779 }, { "epoch": 0.3691980968328194, "grad_norm": 0.7166015009254231, "learning_rate": 0.00018772464136937878, "loss": 12.475, "step": 6780 }, { "epoch": 0.36925255082940245, "grad_norm": 0.5999520292287912, "learning_rate": 0.00018772040790864116, "loss": 12.498, "step": 6781 }, { "epoch": 0.36930700482598544, "grad_norm": 0.7259940123210213, "learning_rate": 0.00018771617376577487, "loss": 12.5116, "step": 6782 }, { "epoch": 0.36936145882256843, "grad_norm": 0.7414603041330218, "learning_rate": 0.00018771193894081295, "loss": 12.4914, "step": 6783 }, { "epoch": 0.3694159128191515, "grad_norm": 0.6098300155081475, "learning_rate": 0.00018770770343378828, "loss": 12.3549, "step": 6784 }, { "epoch": 0.3694703668157345, "grad_norm": 0.6771752107026059, "learning_rate": 0.0001877034672447338, "loss": 12.4367, "step": 6785 }, { "epoch": 0.36952482081231747, "grad_norm": 0.6127061024770842, "learning_rate": 0.00018769923037368244, "loss": 12.5099, "step": 6786 }, { "epoch": 0.3695792748089005, "grad_norm": 0.6713858656002362, "learning_rate": 0.00018769499282066717, "loss": 12.5197, "step": 6787 }, { "epoch": 0.3696337288054835, "grad_norm": 0.6113076049001404, "learning_rate": 0.0001876907545857209, "loss": 12.4514, "step": 6788 }, { "epoch": 0.36968818280206656, "grad_norm": 0.69200188488012, "learning_rate": 0.00018768651566887664, "loss": 12.5427, "step": 6789 }, { "epoch": 0.36974263679864955, "grad_norm": 0.649783035094314, "learning_rate": 0.00018768227607016735, "loss": 12.5515, "step": 6790 }, { "epoch": 0.36979709079523254, "grad_norm": 0.651731885718608, "learning_rate": 0.00018767803578962594, "loss": 12.3617, "step": 6791 }, { "epoch": 0.3698515447918156, "grad_norm": 0.6236780373250338, "learning_rate": 0.00018767379482728544, "loss": 12.4804, "step": 6792 }, { "epoch": 0.3699059987883986, "grad_norm": 0.6290590856325058, "learning_rate": 0.00018766955318317877, "loss": 12.5694, "step": 6793 }, { "epoch": 0.3699604527849816, "grad_norm": 0.994191559529947, "learning_rate": 0.000187665310857339, "loss": 12.3978, "step": 6794 }, { "epoch": 0.3700149067815646, "grad_norm": 0.6691599006801765, "learning_rate": 0.00018766106784979907, "loss": 12.5427, "step": 6795 }, { "epoch": 0.3700693607781476, "grad_norm": 0.6725550264378077, "learning_rate": 0.00018765682416059195, "loss": 12.5197, "step": 6796 }, { "epoch": 0.3701238147747306, "grad_norm": 0.7775858640825722, "learning_rate": 0.00018765257978975067, "loss": 12.565, "step": 6797 }, { "epoch": 0.37017826877131366, "grad_norm": 0.7820371214677028, "learning_rate": 0.00018764833473730824, "loss": 12.5492, "step": 6798 }, { "epoch": 0.37023272276789665, "grad_norm": 0.6070673240639204, "learning_rate": 0.00018764408900329767, "loss": 12.3199, "step": 6799 }, { "epoch": 0.37028717676447964, "grad_norm": 0.7546031888907322, "learning_rate": 0.00018763984258775195, "loss": 12.4831, "step": 6800 }, { "epoch": 0.3703416307610627, "grad_norm": 0.6614244889442318, "learning_rate": 0.00018763559549070413, "loss": 12.4839, "step": 6801 }, { "epoch": 0.3703960847576457, "grad_norm": 0.66700405636965, "learning_rate": 0.0001876313477121872, "loss": 12.444, "step": 6802 }, { "epoch": 0.3704505387542287, "grad_norm": 0.6430092498863155, "learning_rate": 0.00018762709925223422, "loss": 12.5453, "step": 6803 }, { "epoch": 0.3705049927508117, "grad_norm": 0.6240645971741429, "learning_rate": 0.00018762285011087823, "loss": 12.5121, "step": 6804 }, { "epoch": 0.3705594467473947, "grad_norm": 0.7551045479724068, "learning_rate": 0.00018761860028815227, "loss": 12.4146, "step": 6805 }, { "epoch": 0.3706139007439777, "grad_norm": 0.6744163269445749, "learning_rate": 0.00018761434978408937, "loss": 12.4616, "step": 6806 }, { "epoch": 0.37066835474056076, "grad_norm": 0.6083501446794017, "learning_rate": 0.00018761009859872259, "loss": 12.4124, "step": 6807 }, { "epoch": 0.37072280873714375, "grad_norm": 0.6741727963074209, "learning_rate": 0.000187605846732085, "loss": 12.4202, "step": 6808 }, { "epoch": 0.37077726273372674, "grad_norm": 0.6277622677085708, "learning_rate": 0.00018760159418420967, "loss": 12.482, "step": 6809 }, { "epoch": 0.3708317167303098, "grad_norm": 0.6334500184621313, "learning_rate": 0.00018759734095512962, "loss": 12.4696, "step": 6810 }, { "epoch": 0.3708861707268928, "grad_norm": 0.7533923247549392, "learning_rate": 0.00018759308704487796, "loss": 12.5491, "step": 6811 }, { "epoch": 0.3709406247234758, "grad_norm": 0.6355387749570965, "learning_rate": 0.0001875888324534878, "loss": 12.4132, "step": 6812 }, { "epoch": 0.3709950787200588, "grad_norm": 0.6199304004384589, "learning_rate": 0.00018758457718099213, "loss": 12.4855, "step": 6813 }, { "epoch": 0.3710495327166418, "grad_norm": 0.6562595002491339, "learning_rate": 0.00018758032122742415, "loss": 12.5171, "step": 6814 }, { "epoch": 0.3711039867132248, "grad_norm": 0.7378677389636937, "learning_rate": 0.0001875760645928169, "loss": 12.6526, "step": 6815 }, { "epoch": 0.37115844070980786, "grad_norm": 0.7079173817993047, "learning_rate": 0.00018757180727720348, "loss": 12.5506, "step": 6816 }, { "epoch": 0.37121289470639085, "grad_norm": 0.7743100219942269, "learning_rate": 0.000187567549280617, "loss": 12.42, "step": 6817 }, { "epoch": 0.37126734870297384, "grad_norm": 0.6741818318526057, "learning_rate": 0.00018756329060309055, "loss": 12.5836, "step": 6818 }, { "epoch": 0.3713218026995569, "grad_norm": 0.7489878835475556, "learning_rate": 0.0001875590312446573, "loss": 12.5678, "step": 6819 }, { "epoch": 0.3713762566961399, "grad_norm": 0.7337700505792882, "learning_rate": 0.0001875547712053503, "loss": 12.3313, "step": 6820 }, { "epoch": 0.3714307106927229, "grad_norm": 0.643565639957734, "learning_rate": 0.00018755051048520275, "loss": 12.3265, "step": 6821 }, { "epoch": 0.3714851646893059, "grad_norm": 0.7085073729411158, "learning_rate": 0.00018754624908424777, "loss": 12.5107, "step": 6822 }, { "epoch": 0.3715396186858889, "grad_norm": 0.7446332347373452, "learning_rate": 0.00018754198700251842, "loss": 12.5754, "step": 6823 }, { "epoch": 0.37159407268247197, "grad_norm": 0.7543591298912838, "learning_rate": 0.00018753772424004791, "loss": 12.5493, "step": 6824 }, { "epoch": 0.37164852667905496, "grad_norm": 0.779747215779012, "learning_rate": 0.00018753346079686942, "loss": 12.6352, "step": 6825 }, { "epoch": 0.37170298067563795, "grad_norm": 0.6777786449446311, "learning_rate": 0.00018752919667301603, "loss": 12.4106, "step": 6826 }, { "epoch": 0.371757434672221, "grad_norm": 0.7842274548150086, "learning_rate": 0.0001875249318685209, "loss": 12.545, "step": 6827 }, { "epoch": 0.371811888668804, "grad_norm": 0.6771490065998343, "learning_rate": 0.00018752066638341724, "loss": 12.4642, "step": 6828 }, { "epoch": 0.371866342665387, "grad_norm": 0.6722420889789897, "learning_rate": 0.00018751640021773822, "loss": 12.4239, "step": 6829 }, { "epoch": 0.37192079666197003, "grad_norm": 0.7656829797000084, "learning_rate": 0.00018751213337151699, "loss": 12.4541, "step": 6830 }, { "epoch": 0.371975250658553, "grad_norm": 0.6173941419610085, "learning_rate": 0.00018750786584478674, "loss": 12.5112, "step": 6831 }, { "epoch": 0.372029704655136, "grad_norm": 0.6862551318512679, "learning_rate": 0.00018750359763758064, "loss": 12.5843, "step": 6832 }, { "epoch": 0.37208415865171907, "grad_norm": 0.7438790929735515, "learning_rate": 0.00018749932874993191, "loss": 12.3711, "step": 6833 }, { "epoch": 0.37213861264830206, "grad_norm": 0.7148570406597187, "learning_rate": 0.00018749505918187368, "loss": 12.6131, "step": 6834 }, { "epoch": 0.37219306664488505, "grad_norm": 0.6099944499008384, "learning_rate": 0.00018749078893343923, "loss": 12.4924, "step": 6835 }, { "epoch": 0.3722475206414681, "grad_norm": 0.6416494040504213, "learning_rate": 0.00018748651800466176, "loss": 12.4666, "step": 6836 }, { "epoch": 0.3723019746380511, "grad_norm": 0.5960517211523131, "learning_rate": 0.0001874822463955744, "loss": 12.4964, "step": 6837 }, { "epoch": 0.3723564286346341, "grad_norm": 0.7202137666628853, "learning_rate": 0.00018747797410621043, "loss": 12.5145, "step": 6838 }, { "epoch": 0.37241088263121713, "grad_norm": 0.6686744255372455, "learning_rate": 0.0001874737011366031, "loss": 12.509, "step": 6839 }, { "epoch": 0.3724653366278001, "grad_norm": 0.5923281634322939, "learning_rate": 0.00018746942748678556, "loss": 12.3796, "step": 6840 }, { "epoch": 0.3725197906243831, "grad_norm": 0.6462348839480198, "learning_rate": 0.00018746515315679112, "loss": 12.4071, "step": 6841 }, { "epoch": 0.37257424462096617, "grad_norm": 0.6403103417823736, "learning_rate": 0.00018746087814665297, "loss": 12.3721, "step": 6842 }, { "epoch": 0.37262869861754916, "grad_norm": 0.6452511968744338, "learning_rate": 0.00018745660245640433, "loss": 12.4392, "step": 6843 }, { "epoch": 0.37268315261413215, "grad_norm": 0.8042292528683954, "learning_rate": 0.0001874523260860785, "loss": 12.56, "step": 6844 }, { "epoch": 0.3727376066107152, "grad_norm": 0.6047385552048565, "learning_rate": 0.00018744804903570873, "loss": 12.4637, "step": 6845 }, { "epoch": 0.3727920606072982, "grad_norm": 0.6150113789305852, "learning_rate": 0.00018744377130532826, "loss": 12.4973, "step": 6846 }, { "epoch": 0.3728465146038812, "grad_norm": 0.6082391745298686, "learning_rate": 0.00018743949289497035, "loss": 12.4028, "step": 6847 }, { "epoch": 0.37290096860046423, "grad_norm": 0.7418134326699819, "learning_rate": 0.00018743521380466832, "loss": 12.5765, "step": 6848 }, { "epoch": 0.3729554225970472, "grad_norm": 0.6251421328537706, "learning_rate": 0.00018743093403445537, "loss": 12.3914, "step": 6849 }, { "epoch": 0.3730098765936302, "grad_norm": 0.6643823954217485, "learning_rate": 0.00018742665358436483, "loss": 12.576, "step": 6850 }, { "epoch": 0.37306433059021327, "grad_norm": 0.6821878946854963, "learning_rate": 0.00018742237245442995, "loss": 12.4979, "step": 6851 }, { "epoch": 0.37311878458679626, "grad_norm": 0.6284956737925181, "learning_rate": 0.00018741809064468402, "loss": 12.3956, "step": 6852 }, { "epoch": 0.37317323858337925, "grad_norm": 0.5928727082407248, "learning_rate": 0.0001874138081551604, "loss": 12.3172, "step": 6853 }, { "epoch": 0.3732276925799623, "grad_norm": 0.7075238356745377, "learning_rate": 0.00018740952498589236, "loss": 12.5216, "step": 6854 }, { "epoch": 0.3732821465765453, "grad_norm": 0.8036941645145784, "learning_rate": 0.00018740524113691314, "loss": 12.7216, "step": 6855 }, { "epoch": 0.37333660057312834, "grad_norm": 0.6723133376471627, "learning_rate": 0.00018740095660825615, "loss": 12.4274, "step": 6856 }, { "epoch": 0.37339105456971133, "grad_norm": 0.691765799487332, "learning_rate": 0.00018739667139995464, "loss": 12.5515, "step": 6857 }, { "epoch": 0.3734455085662943, "grad_norm": 0.6374179067955774, "learning_rate": 0.00018739238551204198, "loss": 12.3448, "step": 6858 }, { "epoch": 0.3734999625628774, "grad_norm": 0.736793700634577, "learning_rate": 0.00018738809894455147, "loss": 12.4875, "step": 6859 }, { "epoch": 0.37355441655946037, "grad_norm": 0.714006602323917, "learning_rate": 0.00018738381169751644, "loss": 12.5406, "step": 6860 }, { "epoch": 0.37360887055604336, "grad_norm": 0.7584859501030283, "learning_rate": 0.00018737952377097025, "loss": 12.4817, "step": 6861 }, { "epoch": 0.3736633245526264, "grad_norm": 0.6704402496309448, "learning_rate": 0.0001873752351649462, "loss": 12.4883, "step": 6862 }, { "epoch": 0.3737177785492094, "grad_norm": 0.6354882856330369, "learning_rate": 0.0001873709458794777, "loss": 12.3731, "step": 6863 }, { "epoch": 0.3737722325457924, "grad_norm": 0.7091904296222025, "learning_rate": 0.0001873666559145981, "loss": 12.5834, "step": 6864 }, { "epoch": 0.37382668654237544, "grad_norm": 0.7834726155353686, "learning_rate": 0.00018736236527034067, "loss": 12.557, "step": 6865 }, { "epoch": 0.37388114053895843, "grad_norm": 0.6743082186571374, "learning_rate": 0.00018735807394673883, "loss": 12.4709, "step": 6866 }, { "epoch": 0.3739355945355414, "grad_norm": 0.6070605579786073, "learning_rate": 0.000187353781943826, "loss": 12.37, "step": 6867 }, { "epoch": 0.3739900485321245, "grad_norm": 0.6865918490759351, "learning_rate": 0.0001873494892616355, "loss": 12.5524, "step": 6868 }, { "epoch": 0.37404450252870747, "grad_norm": 0.662629494210567, "learning_rate": 0.00018734519590020071, "loss": 12.553, "step": 6869 }, { "epoch": 0.37409895652529046, "grad_norm": 0.6170089067050327, "learning_rate": 0.00018734090185955503, "loss": 12.4318, "step": 6870 }, { "epoch": 0.3741534105218735, "grad_norm": 0.6673199828990071, "learning_rate": 0.00018733660713973188, "loss": 12.344, "step": 6871 }, { "epoch": 0.3742078645184565, "grad_norm": 0.9030032262663705, "learning_rate": 0.0001873323117407646, "loss": 12.4672, "step": 6872 }, { "epoch": 0.3742623185150395, "grad_norm": 0.6476698207148913, "learning_rate": 0.00018732801566268662, "loss": 12.4269, "step": 6873 }, { "epoch": 0.37431677251162254, "grad_norm": 0.6389493481839894, "learning_rate": 0.00018732371890553136, "loss": 12.3736, "step": 6874 }, { "epoch": 0.37437122650820553, "grad_norm": 0.6581260506637848, "learning_rate": 0.0001873194214693322, "loss": 12.5084, "step": 6875 }, { "epoch": 0.3744256805047885, "grad_norm": 0.7331480328607267, "learning_rate": 0.0001873151233541226, "loss": 12.4586, "step": 6876 }, { "epoch": 0.3744801345013716, "grad_norm": 0.6757682925411165, "learning_rate": 0.00018731082455993595, "loss": 12.5669, "step": 6877 }, { "epoch": 0.37453458849795457, "grad_norm": 0.7647719878421511, "learning_rate": 0.00018730652508680567, "loss": 12.6505, "step": 6878 }, { "epoch": 0.37458904249453756, "grad_norm": 0.8106155568072291, "learning_rate": 0.0001873022249347652, "loss": 12.5532, "step": 6879 }, { "epoch": 0.3746434964911206, "grad_norm": 0.6475029617687691, "learning_rate": 0.000187297924103848, "loss": 12.3556, "step": 6880 }, { "epoch": 0.3746979504877036, "grad_norm": 0.6906212964994336, "learning_rate": 0.0001872936225940875, "loss": 12.5516, "step": 6881 }, { "epoch": 0.3747524044842866, "grad_norm": 0.8103337070967035, "learning_rate": 0.00018728932040551718, "loss": 12.4691, "step": 6882 }, { "epoch": 0.37480685848086964, "grad_norm": 0.6576683290497881, "learning_rate": 0.00018728501753817044, "loss": 12.5408, "step": 6883 }, { "epoch": 0.37486131247745264, "grad_norm": 0.7051994294658306, "learning_rate": 0.00018728071399208077, "loss": 12.342, "step": 6884 }, { "epoch": 0.3749157664740356, "grad_norm": 0.6795758079567125, "learning_rate": 0.00018727640976728163, "loss": 12.5536, "step": 6885 }, { "epoch": 0.3749702204706187, "grad_norm": 0.7042609974729568, "learning_rate": 0.00018727210486380649, "loss": 12.5584, "step": 6886 }, { "epoch": 0.37502467446720167, "grad_norm": 0.6567415913122908, "learning_rate": 0.00018726779928168882, "loss": 12.6032, "step": 6887 }, { "epoch": 0.37507912846378466, "grad_norm": 0.6822256992051228, "learning_rate": 0.00018726349302096212, "loss": 12.522, "step": 6888 }, { "epoch": 0.3751335824603677, "grad_norm": 0.7086191085604834, "learning_rate": 0.00018725918608165988, "loss": 12.5549, "step": 6889 }, { "epoch": 0.3751880364569507, "grad_norm": 0.7026318941518679, "learning_rate": 0.00018725487846381556, "loss": 12.498, "step": 6890 }, { "epoch": 0.37524249045353375, "grad_norm": 0.6746306298341949, "learning_rate": 0.0001872505701674627, "loss": 12.5457, "step": 6891 }, { "epoch": 0.37529694445011674, "grad_norm": 0.9491430206384743, "learning_rate": 0.0001872462611926347, "loss": 12.5156, "step": 6892 }, { "epoch": 0.37535139844669974, "grad_norm": 0.6081121088069118, "learning_rate": 0.0001872419515393652, "loss": 12.3423, "step": 6893 }, { "epoch": 0.3754058524432828, "grad_norm": 0.6289634087585562, "learning_rate": 0.00018723764120768762, "loss": 12.6402, "step": 6894 }, { "epoch": 0.3754603064398658, "grad_norm": 0.766105858999642, "learning_rate": 0.00018723333019763554, "loss": 12.333, "step": 6895 }, { "epoch": 0.37551476043644877, "grad_norm": 0.7784877505068041, "learning_rate": 0.00018722901850924247, "loss": 12.5083, "step": 6896 }, { "epoch": 0.3755692144330318, "grad_norm": 0.6293072113676585, "learning_rate": 0.0001872247061425419, "loss": 12.4921, "step": 6897 }, { "epoch": 0.3756236684296148, "grad_norm": 0.6539959921320518, "learning_rate": 0.00018722039309756737, "loss": 12.488, "step": 6898 }, { "epoch": 0.3756781224261978, "grad_norm": 0.7197826833160897, "learning_rate": 0.00018721607937435247, "loss": 12.2857, "step": 6899 }, { "epoch": 0.37573257642278085, "grad_norm": 0.7122843009448809, "learning_rate": 0.00018721176497293068, "loss": 12.4334, "step": 6900 }, { "epoch": 0.37578703041936384, "grad_norm": 0.6417828679475482, "learning_rate": 0.0001872074498933356, "loss": 12.4879, "step": 6901 }, { "epoch": 0.37584148441594684, "grad_norm": 0.7345247568517772, "learning_rate": 0.00018720313413560078, "loss": 12.5805, "step": 6902 }, { "epoch": 0.3758959384125299, "grad_norm": 0.6627147722447752, "learning_rate": 0.00018719881769975973, "loss": 12.4623, "step": 6903 }, { "epoch": 0.3759503924091129, "grad_norm": 0.7144649763094523, "learning_rate": 0.00018719450058584606, "loss": 12.5279, "step": 6904 }, { "epoch": 0.37600484640569587, "grad_norm": 0.7627784910727949, "learning_rate": 0.00018719018279389336, "loss": 12.713, "step": 6905 }, { "epoch": 0.3760593004022789, "grad_norm": 0.6574616799381727, "learning_rate": 0.00018718586432393515, "loss": 12.4512, "step": 6906 }, { "epoch": 0.3761137543988619, "grad_norm": 0.6138750975409948, "learning_rate": 0.00018718154517600503, "loss": 12.5478, "step": 6907 }, { "epoch": 0.3761682083954449, "grad_norm": 0.6381407623000527, "learning_rate": 0.00018717722535013662, "loss": 12.523, "step": 6908 }, { "epoch": 0.37622266239202795, "grad_norm": 0.5934615819263432, "learning_rate": 0.00018717290484636346, "loss": 12.4251, "step": 6909 }, { "epoch": 0.37627711638861094, "grad_norm": 0.6521590898875078, "learning_rate": 0.00018716858366471918, "loss": 12.5358, "step": 6910 }, { "epoch": 0.37633157038519394, "grad_norm": 0.7510537780545019, "learning_rate": 0.00018716426180523737, "loss": 12.4232, "step": 6911 }, { "epoch": 0.376386024381777, "grad_norm": 0.6803248714221944, "learning_rate": 0.00018715993926795167, "loss": 12.5625, "step": 6912 }, { "epoch": 0.37644047837836, "grad_norm": 0.6825366462464981, "learning_rate": 0.00018715561605289565, "loss": 12.4024, "step": 6913 }, { "epoch": 0.37649493237494297, "grad_norm": 0.6789555150219736, "learning_rate": 0.00018715129216010295, "loss": 12.5354, "step": 6914 }, { "epoch": 0.376549386371526, "grad_norm": 0.7318822770872114, "learning_rate": 0.0001871469675896072, "loss": 12.5837, "step": 6915 }, { "epoch": 0.376603840368109, "grad_norm": 0.6626231088289419, "learning_rate": 0.00018714264234144198, "loss": 12.5106, "step": 6916 }, { "epoch": 0.376658294364692, "grad_norm": 0.7217653612409614, "learning_rate": 0.00018713831641564097, "loss": 12.5852, "step": 6917 }, { "epoch": 0.37671274836127505, "grad_norm": 0.6263709870042876, "learning_rate": 0.0001871339898122378, "loss": 12.4448, "step": 6918 }, { "epoch": 0.37676720235785804, "grad_norm": 0.6811089588998432, "learning_rate": 0.0001871296625312661, "loss": 12.5556, "step": 6919 }, { "epoch": 0.37682165635444104, "grad_norm": 0.6573727791834589, "learning_rate": 0.0001871253345727596, "loss": 12.4701, "step": 6920 }, { "epoch": 0.3768761103510241, "grad_norm": 0.6684755993242253, "learning_rate": 0.00018712100593675182, "loss": 12.5007, "step": 6921 }, { "epoch": 0.3769305643476071, "grad_norm": 0.6443217936756908, "learning_rate": 0.0001871166766232765, "loss": 12.4511, "step": 6922 }, { "epoch": 0.3769850183441901, "grad_norm": 0.6535774774464812, "learning_rate": 0.0001871123466323673, "loss": 12.549, "step": 6923 }, { "epoch": 0.3770394723407731, "grad_norm": 0.6911624086653004, "learning_rate": 0.00018710801596405786, "loss": 12.6717, "step": 6924 }, { "epoch": 0.3770939263373561, "grad_norm": 0.6649678708702024, "learning_rate": 0.0001871036846183819, "loss": 12.3266, "step": 6925 }, { "epoch": 0.37714838033393916, "grad_norm": 0.6838715546443267, "learning_rate": 0.00018709935259537307, "loss": 12.5144, "step": 6926 }, { "epoch": 0.37720283433052215, "grad_norm": 0.6701906131016372, "learning_rate": 0.00018709501989506508, "loss": 12.6125, "step": 6927 }, { "epoch": 0.37725728832710514, "grad_norm": 0.6492567726196441, "learning_rate": 0.00018709068651749162, "loss": 12.5389, "step": 6928 }, { "epoch": 0.3773117423236882, "grad_norm": 0.7128276988468376, "learning_rate": 0.0001870863524626864, "loss": 12.5214, "step": 6929 }, { "epoch": 0.3773661963202712, "grad_norm": 0.7077479095340496, "learning_rate": 0.00018708201773068303, "loss": 12.5846, "step": 6930 }, { "epoch": 0.3774206503168542, "grad_norm": 0.6949116410713745, "learning_rate": 0.00018707768232151533, "loss": 12.6226, "step": 6931 }, { "epoch": 0.3774751043134372, "grad_norm": 0.6231198852264871, "learning_rate": 0.00018707334623521696, "loss": 12.3808, "step": 6932 }, { "epoch": 0.3775295583100202, "grad_norm": 0.6374499503869536, "learning_rate": 0.00018706900947182165, "loss": 12.378, "step": 6933 }, { "epoch": 0.3775840123066032, "grad_norm": 0.6319859637637218, "learning_rate": 0.00018706467203136312, "loss": 12.5027, "step": 6934 }, { "epoch": 0.37763846630318626, "grad_norm": 0.6557296163600854, "learning_rate": 0.0001870603339138751, "loss": 12.4791, "step": 6935 }, { "epoch": 0.37769292029976925, "grad_norm": 0.7869772154386102, "learning_rate": 0.0001870559951193913, "loss": 12.4481, "step": 6936 }, { "epoch": 0.37774737429635225, "grad_norm": 0.6653265908844587, "learning_rate": 0.0001870516556479455, "loss": 12.4008, "step": 6937 }, { "epoch": 0.3778018282929353, "grad_norm": 0.6172815715116846, "learning_rate": 0.00018704731549957146, "loss": 12.3995, "step": 6938 }, { "epoch": 0.3778562822895183, "grad_norm": 0.6468767865845889, "learning_rate": 0.00018704297467430286, "loss": 12.5116, "step": 6939 }, { "epoch": 0.3779107362861013, "grad_norm": 0.759668058569879, "learning_rate": 0.00018703863317217349, "loss": 12.3574, "step": 6940 }, { "epoch": 0.3779651902826843, "grad_norm": 0.7262095640332545, "learning_rate": 0.0001870342909932171, "loss": 12.5853, "step": 6941 }, { "epoch": 0.3780196442792673, "grad_norm": 0.7033921882821391, "learning_rate": 0.0001870299481374675, "loss": 12.4206, "step": 6942 }, { "epoch": 0.3780740982758503, "grad_norm": 0.6548268998794594, "learning_rate": 0.00018702560460495844, "loss": 12.4, "step": 6943 }, { "epoch": 0.37812855227243336, "grad_norm": 0.7040645449818776, "learning_rate": 0.00018702126039572364, "loss": 12.5144, "step": 6944 }, { "epoch": 0.37818300626901635, "grad_norm": 0.619790631113332, "learning_rate": 0.00018701691550979696, "loss": 12.4999, "step": 6945 }, { "epoch": 0.37823746026559935, "grad_norm": 0.6725003511870317, "learning_rate": 0.00018701256994721214, "loss": 12.4599, "step": 6946 }, { "epoch": 0.3782919142621824, "grad_norm": 0.833030015569806, "learning_rate": 0.000187008223708003, "loss": 12.405, "step": 6947 }, { "epoch": 0.3783463682587654, "grad_norm": 0.6901930567573182, "learning_rate": 0.00018700387679220328, "loss": 12.4261, "step": 6948 }, { "epoch": 0.3784008222553484, "grad_norm": 0.6502639678681048, "learning_rate": 0.00018699952919984684, "loss": 12.6263, "step": 6949 }, { "epoch": 0.3784552762519314, "grad_norm": 0.8842314476326208, "learning_rate": 0.0001869951809309675, "loss": 12.7245, "step": 6950 }, { "epoch": 0.3785097302485144, "grad_norm": 0.6350870700668845, "learning_rate": 0.00018699083198559904, "loss": 12.4577, "step": 6951 }, { "epoch": 0.3785641842450974, "grad_norm": 0.6456154832906754, "learning_rate": 0.00018698648236377524, "loss": 12.4396, "step": 6952 }, { "epoch": 0.37861863824168046, "grad_norm": 0.6606268237260083, "learning_rate": 0.00018698213206553001, "loss": 12.5647, "step": 6953 }, { "epoch": 0.37867309223826345, "grad_norm": 0.6516744393667526, "learning_rate": 0.00018697778109089713, "loss": 12.4016, "step": 6954 }, { "epoch": 0.37872754623484645, "grad_norm": 0.627243644658615, "learning_rate": 0.00018697342943991042, "loss": 12.4443, "step": 6955 }, { "epoch": 0.3787820002314295, "grad_norm": 0.5718938120004171, "learning_rate": 0.00018696907711260373, "loss": 12.3243, "step": 6956 }, { "epoch": 0.3788364542280125, "grad_norm": 0.6304957153919665, "learning_rate": 0.00018696472410901092, "loss": 12.5637, "step": 6957 }, { "epoch": 0.37889090822459554, "grad_norm": 0.6404029072045193, "learning_rate": 0.00018696037042916582, "loss": 12.4211, "step": 6958 }, { "epoch": 0.3789453622211785, "grad_norm": 0.5913884159550243, "learning_rate": 0.00018695601607310233, "loss": 12.4747, "step": 6959 }, { "epoch": 0.3789998162177615, "grad_norm": 0.697847973853796, "learning_rate": 0.00018695166104085425, "loss": 12.5801, "step": 6960 }, { "epoch": 0.37905427021434457, "grad_norm": 0.6430583472870945, "learning_rate": 0.00018694730533245547, "loss": 12.5845, "step": 6961 }, { "epoch": 0.37910872421092756, "grad_norm": 0.715986131250449, "learning_rate": 0.00018694294894793987, "loss": 12.4404, "step": 6962 }, { "epoch": 0.37916317820751055, "grad_norm": 0.6646346545669394, "learning_rate": 0.00018693859188734132, "loss": 12.5251, "step": 6963 }, { "epoch": 0.3792176322040936, "grad_norm": 0.6636199953215705, "learning_rate": 0.00018693423415069372, "loss": 12.5536, "step": 6964 }, { "epoch": 0.3792720862006766, "grad_norm": 0.688855654533608, "learning_rate": 0.00018692987573803088, "loss": 12.5833, "step": 6965 }, { "epoch": 0.3793265401972596, "grad_norm": 0.7349669171500565, "learning_rate": 0.0001869255166493868, "loss": 12.3737, "step": 6966 }, { "epoch": 0.37938099419384264, "grad_norm": 0.6417685318125261, "learning_rate": 0.00018692115688479532, "loss": 12.4549, "step": 6967 }, { "epoch": 0.37943544819042563, "grad_norm": 0.6182139840504983, "learning_rate": 0.00018691679644429034, "loss": 12.3984, "step": 6968 }, { "epoch": 0.3794899021870086, "grad_norm": 0.6748562831650746, "learning_rate": 0.00018691243532790576, "loss": 12.5668, "step": 6969 }, { "epoch": 0.37954435618359167, "grad_norm": 0.6298528138760133, "learning_rate": 0.00018690807353567552, "loss": 12.3831, "step": 6970 }, { "epoch": 0.37959881018017466, "grad_norm": 0.6123954408760172, "learning_rate": 0.00018690371106763355, "loss": 12.3804, "step": 6971 }, { "epoch": 0.37965326417675765, "grad_norm": 0.6024966858966703, "learning_rate": 0.0001868993479238137, "loss": 12.4899, "step": 6972 }, { "epoch": 0.3797077181733407, "grad_norm": 0.6696906147905399, "learning_rate": 0.00018689498410424997, "loss": 12.3153, "step": 6973 }, { "epoch": 0.3797621721699237, "grad_norm": 0.6218780017682547, "learning_rate": 0.00018689061960897626, "loss": 12.2652, "step": 6974 }, { "epoch": 0.3798166261665067, "grad_norm": 0.6388311787850542, "learning_rate": 0.00018688625443802654, "loss": 12.4931, "step": 6975 }, { "epoch": 0.37987108016308974, "grad_norm": 0.6041740522690894, "learning_rate": 0.00018688188859143474, "loss": 12.3498, "step": 6976 }, { "epoch": 0.37992553415967273, "grad_norm": 0.6607235444859734, "learning_rate": 0.0001868775220692348, "loss": 12.5813, "step": 6977 }, { "epoch": 0.3799799881562557, "grad_norm": 0.6593464381004138, "learning_rate": 0.00018687315487146065, "loss": 12.4265, "step": 6978 }, { "epoch": 0.38003444215283877, "grad_norm": 0.6444505430730013, "learning_rate": 0.00018686878699814629, "loss": 12.4545, "step": 6979 }, { "epoch": 0.38008889614942176, "grad_norm": 0.6341830350464173, "learning_rate": 0.0001868644184493257, "loss": 12.4751, "step": 6980 }, { "epoch": 0.38014335014600475, "grad_norm": 0.8000018745026319, "learning_rate": 0.0001868600492250328, "loss": 12.5567, "step": 6981 }, { "epoch": 0.3801978041425878, "grad_norm": 0.6539842272711024, "learning_rate": 0.00018685567932530162, "loss": 12.5653, "step": 6982 }, { "epoch": 0.3802522581391708, "grad_norm": 0.6947964022800268, "learning_rate": 0.0001868513087501661, "loss": 12.3735, "step": 6983 }, { "epoch": 0.3803067121357538, "grad_norm": 0.586767907830935, "learning_rate": 0.0001868469374996602, "loss": 12.4559, "step": 6984 }, { "epoch": 0.38036116613233684, "grad_norm": 0.650386122773557, "learning_rate": 0.000186842565573818, "loss": 12.3953, "step": 6985 }, { "epoch": 0.38041562012891983, "grad_norm": 0.6709438303267575, "learning_rate": 0.00018683819297267342, "loss": 12.4941, "step": 6986 }, { "epoch": 0.3804700741255028, "grad_norm": 0.6318954670614686, "learning_rate": 0.0001868338196962605, "loss": 12.4621, "step": 6987 }, { "epoch": 0.38052452812208587, "grad_norm": 0.5801655542248927, "learning_rate": 0.00018682944574461324, "loss": 12.2644, "step": 6988 }, { "epoch": 0.38057898211866886, "grad_norm": 0.6750535916093292, "learning_rate": 0.00018682507111776565, "loss": 12.5385, "step": 6989 }, { "epoch": 0.3806334361152519, "grad_norm": 0.6481330046989016, "learning_rate": 0.00018682069581575173, "loss": 12.5772, "step": 6990 }, { "epoch": 0.3806878901118349, "grad_norm": 0.6622877449441541, "learning_rate": 0.00018681631983860552, "loss": 12.4641, "step": 6991 }, { "epoch": 0.3807423441084179, "grad_norm": 0.6137140645342416, "learning_rate": 0.00018681194318636104, "loss": 12.4936, "step": 6992 }, { "epoch": 0.38079679810500094, "grad_norm": 0.6082401870926571, "learning_rate": 0.00018680756585905234, "loss": 12.4662, "step": 6993 }, { "epoch": 0.38085125210158394, "grad_norm": 0.6308495684452727, "learning_rate": 0.00018680318785671348, "loss": 12.6563, "step": 6994 }, { "epoch": 0.38090570609816693, "grad_norm": 0.7416546041029876, "learning_rate": 0.00018679880917937843, "loss": 12.5217, "step": 6995 }, { "epoch": 0.38096016009475, "grad_norm": 0.7087242752036145, "learning_rate": 0.00018679442982708132, "loss": 12.4414, "step": 6996 }, { "epoch": 0.38101461409133297, "grad_norm": 0.6176554026255653, "learning_rate": 0.00018679004979985615, "loss": 12.3972, "step": 6997 }, { "epoch": 0.38106906808791596, "grad_norm": 0.6720561068355646, "learning_rate": 0.00018678566909773698, "loss": 12.517, "step": 6998 }, { "epoch": 0.381123522084499, "grad_norm": 0.7147146419691247, "learning_rate": 0.00018678128772075793, "loss": 12.4048, "step": 6999 }, { "epoch": 0.381177976081082, "grad_norm": 0.8218624835163882, "learning_rate": 0.00018677690566895302, "loss": 12.3689, "step": 7000 }, { "epoch": 0.381232430077665, "grad_norm": 0.9732985061261495, "learning_rate": 0.00018677252294235634, "loss": 12.5755, "step": 7001 }, { "epoch": 0.38128688407424804, "grad_norm": 0.7098918965887916, "learning_rate": 0.00018676813954100196, "loss": 12.4009, "step": 7002 }, { "epoch": 0.38134133807083104, "grad_norm": 0.7461767152255029, "learning_rate": 0.00018676375546492396, "loss": 12.57, "step": 7003 }, { "epoch": 0.38139579206741403, "grad_norm": 0.8675423218585037, "learning_rate": 0.00018675937071415647, "loss": 12.3617, "step": 7004 }, { "epoch": 0.3814502460639971, "grad_norm": 0.717370084546687, "learning_rate": 0.0001867549852887336, "loss": 12.5304, "step": 7005 }, { "epoch": 0.38150470006058007, "grad_norm": 0.6231210159631485, "learning_rate": 0.00018675059918868935, "loss": 12.3976, "step": 7006 }, { "epoch": 0.38155915405716306, "grad_norm": 0.6817521240331398, "learning_rate": 0.00018674621241405792, "loss": 12.5481, "step": 7007 }, { "epoch": 0.3816136080537461, "grad_norm": 0.7397513221624799, "learning_rate": 0.0001867418249648734, "loss": 12.6074, "step": 7008 }, { "epoch": 0.3816680620503291, "grad_norm": 0.7386122621149642, "learning_rate": 0.0001867374368411699, "loss": 12.5742, "step": 7009 }, { "epoch": 0.3817225160469121, "grad_norm": 0.6308245493328805, "learning_rate": 0.00018673304804298156, "loss": 12.4646, "step": 7010 }, { "epoch": 0.38177697004349515, "grad_norm": 0.6765571167770338, "learning_rate": 0.00018672865857034246, "loss": 12.391, "step": 7011 }, { "epoch": 0.38183142404007814, "grad_norm": 0.6044597658565793, "learning_rate": 0.00018672426842328678, "loss": 12.4188, "step": 7012 }, { "epoch": 0.38188587803666113, "grad_norm": 0.6584114653028906, "learning_rate": 0.00018671987760184865, "loss": 12.513, "step": 7013 }, { "epoch": 0.3819403320332442, "grad_norm": 0.6697607658639393, "learning_rate": 0.0001867154861060622, "loss": 12.4682, "step": 7014 }, { "epoch": 0.38199478602982717, "grad_norm": 0.7005381190713698, "learning_rate": 0.00018671109393596157, "loss": 12.3883, "step": 7015 }, { "epoch": 0.38204924002641016, "grad_norm": 0.6644075644800487, "learning_rate": 0.00018670670109158097, "loss": 12.5448, "step": 7016 }, { "epoch": 0.3821036940229932, "grad_norm": 0.5827707034044087, "learning_rate": 0.00018670230757295453, "loss": 12.452, "step": 7017 }, { "epoch": 0.3821581480195762, "grad_norm": 0.6974112601121403, "learning_rate": 0.00018669791338011638, "loss": 12.4363, "step": 7018 }, { "epoch": 0.3822126020161592, "grad_norm": 0.6065298551477821, "learning_rate": 0.00018669351851310074, "loss": 12.4149, "step": 7019 }, { "epoch": 0.38226705601274225, "grad_norm": 0.649312763578988, "learning_rate": 0.0001866891229719417, "loss": 12.5342, "step": 7020 }, { "epoch": 0.38232151000932524, "grad_norm": 0.6438265673298778, "learning_rate": 0.00018668472675667354, "loss": 12.4505, "step": 7021 }, { "epoch": 0.38237596400590823, "grad_norm": 0.6989175412573948, "learning_rate": 0.00018668032986733044, "loss": 12.5572, "step": 7022 }, { "epoch": 0.3824304180024913, "grad_norm": 0.6689752525509498, "learning_rate": 0.0001866759323039465, "loss": 12.5695, "step": 7023 }, { "epoch": 0.38248487199907427, "grad_norm": 0.6255947812736262, "learning_rate": 0.00018667153406655605, "loss": 12.4277, "step": 7024 }, { "epoch": 0.3825393259956573, "grad_norm": 0.6846448014748062, "learning_rate": 0.00018666713515519314, "loss": 12.5029, "step": 7025 }, { "epoch": 0.3825937799922403, "grad_norm": 0.6256232122015504, "learning_rate": 0.0001866627355698921, "loss": 12.4771, "step": 7026 }, { "epoch": 0.3826482339888233, "grad_norm": 0.6257148059143375, "learning_rate": 0.0001866583353106871, "loss": 12.4713, "step": 7027 }, { "epoch": 0.38270268798540635, "grad_norm": 0.7104753379111266, "learning_rate": 0.00018665393437761231, "loss": 12.488, "step": 7028 }, { "epoch": 0.38275714198198935, "grad_norm": 0.6939144331346863, "learning_rate": 0.000186649532770702, "loss": 12.5109, "step": 7029 }, { "epoch": 0.38281159597857234, "grad_norm": 0.600769214359039, "learning_rate": 0.0001866451304899904, "loss": 12.4113, "step": 7030 }, { "epoch": 0.3828660499751554, "grad_norm": 0.569467143868859, "learning_rate": 0.00018664072753551175, "loss": 12.2896, "step": 7031 }, { "epoch": 0.3829205039717384, "grad_norm": 0.7355576933800445, "learning_rate": 0.0001866363239073003, "loss": 12.4322, "step": 7032 }, { "epoch": 0.3829749579683214, "grad_norm": 0.6909015301874856, "learning_rate": 0.00018663191960539022, "loss": 12.4023, "step": 7033 }, { "epoch": 0.3830294119649044, "grad_norm": 0.7983345034187423, "learning_rate": 0.0001866275146298158, "loss": 12.4993, "step": 7034 }, { "epoch": 0.3830838659614874, "grad_norm": 0.5951362455422403, "learning_rate": 0.00018662310898061134, "loss": 12.4463, "step": 7035 }, { "epoch": 0.3831383199580704, "grad_norm": 0.6606117787777263, "learning_rate": 0.00018661870265781103, "loss": 12.6477, "step": 7036 }, { "epoch": 0.38319277395465345, "grad_norm": 0.6052443542532031, "learning_rate": 0.00018661429566144917, "loss": 12.3899, "step": 7037 }, { "epoch": 0.38324722795123645, "grad_norm": 0.6912374558936648, "learning_rate": 0.00018660988799156002, "loss": 12.3956, "step": 7038 }, { "epoch": 0.38330168194781944, "grad_norm": 0.678500062864124, "learning_rate": 0.00018660547964817784, "loss": 12.4912, "step": 7039 }, { "epoch": 0.3833561359444025, "grad_norm": 0.6451129761422665, "learning_rate": 0.00018660107063133693, "loss": 12.4821, "step": 7040 }, { "epoch": 0.3834105899409855, "grad_norm": 0.7276540261481783, "learning_rate": 0.0001865966609410716, "loss": 12.5236, "step": 7041 }, { "epoch": 0.3834650439375685, "grad_norm": 0.7008677196954861, "learning_rate": 0.0001865922505774161, "loss": 12.4493, "step": 7042 }, { "epoch": 0.3835194979341515, "grad_norm": 0.6704835137426014, "learning_rate": 0.00018658783954040472, "loss": 12.6249, "step": 7043 }, { "epoch": 0.3835739519307345, "grad_norm": 0.7532813678875407, "learning_rate": 0.0001865834278300718, "loss": 12.4514, "step": 7044 }, { "epoch": 0.3836284059273175, "grad_norm": 0.6759469688196224, "learning_rate": 0.0001865790154464516, "loss": 12.6866, "step": 7045 }, { "epoch": 0.38368285992390055, "grad_norm": 0.5925743120960456, "learning_rate": 0.0001865746023895785, "loss": 12.3878, "step": 7046 }, { "epoch": 0.38373731392048355, "grad_norm": 0.7693887197413217, "learning_rate": 0.00018657018865948674, "loss": 12.6576, "step": 7047 }, { "epoch": 0.38379176791706654, "grad_norm": 0.6898680905211517, "learning_rate": 0.00018656577425621066, "loss": 12.5065, "step": 7048 }, { "epoch": 0.3838462219136496, "grad_norm": 0.7156536380503498, "learning_rate": 0.00018656135917978462, "loss": 12.4546, "step": 7049 }, { "epoch": 0.3839006759102326, "grad_norm": 0.6994971638710071, "learning_rate": 0.00018655694343024294, "loss": 12.6109, "step": 7050 }, { "epoch": 0.3839551299068156, "grad_norm": 0.7793553009856318, "learning_rate": 0.00018655252700761996, "loss": 12.4605, "step": 7051 }, { "epoch": 0.3840095839033986, "grad_norm": 0.6759341611546079, "learning_rate": 0.00018654810991195001, "loss": 12.464, "step": 7052 }, { "epoch": 0.3840640378999816, "grad_norm": 0.6301665691505982, "learning_rate": 0.00018654369214326746, "loss": 12.517, "step": 7053 }, { "epoch": 0.3841184918965646, "grad_norm": 0.7135243832459273, "learning_rate": 0.0001865392737016066, "loss": 12.4516, "step": 7054 }, { "epoch": 0.38417294589314765, "grad_norm": 0.6858399057276566, "learning_rate": 0.00018653485458700186, "loss": 12.5874, "step": 7055 }, { "epoch": 0.38422739988973065, "grad_norm": 0.8149053831416443, "learning_rate": 0.00018653043479948758, "loss": 12.5103, "step": 7056 }, { "epoch": 0.3842818538863137, "grad_norm": 0.7046987729073869, "learning_rate": 0.00018652601433909814, "loss": 12.5548, "step": 7057 }, { "epoch": 0.3843363078828967, "grad_norm": 0.6290864552411815, "learning_rate": 0.00018652159320586788, "loss": 12.499, "step": 7058 }, { "epoch": 0.3843907618794797, "grad_norm": 0.6916875542987843, "learning_rate": 0.00018651717139983123, "loss": 12.4951, "step": 7059 }, { "epoch": 0.38444521587606273, "grad_norm": 0.6978979497616465, "learning_rate": 0.0001865127489210225, "loss": 12.6373, "step": 7060 }, { "epoch": 0.3844996698726457, "grad_norm": 0.610515344658147, "learning_rate": 0.0001865083257694762, "loss": 12.5357, "step": 7061 }, { "epoch": 0.3845541238692287, "grad_norm": 0.6823700404070057, "learning_rate": 0.00018650390194522657, "loss": 12.5222, "step": 7062 }, { "epoch": 0.38460857786581176, "grad_norm": 0.6160180562682802, "learning_rate": 0.00018649947744830815, "loss": 12.4801, "step": 7063 }, { "epoch": 0.38466303186239476, "grad_norm": 0.6670446903986819, "learning_rate": 0.00018649505227875525, "loss": 12.6218, "step": 7064 }, { "epoch": 0.38471748585897775, "grad_norm": 0.5787244558977549, "learning_rate": 0.00018649062643660234, "loss": 12.3791, "step": 7065 }, { "epoch": 0.3847719398555608, "grad_norm": 0.6615016138111451, "learning_rate": 0.0001864861999218838, "loss": 12.4197, "step": 7066 }, { "epoch": 0.3848263938521438, "grad_norm": 0.6674308971925494, "learning_rate": 0.00018648177273463407, "loss": 12.5483, "step": 7067 }, { "epoch": 0.3848808478487268, "grad_norm": 0.6715041283823071, "learning_rate": 0.0001864773448748876, "loss": 12.4357, "step": 7068 }, { "epoch": 0.38493530184530983, "grad_norm": 0.6385821710498155, "learning_rate": 0.00018647291634267874, "loss": 12.45, "step": 7069 }, { "epoch": 0.3849897558418928, "grad_norm": 0.667305118023495, "learning_rate": 0.00018646848713804203, "loss": 12.5501, "step": 7070 }, { "epoch": 0.3850442098384758, "grad_norm": 0.6637185088679254, "learning_rate": 0.0001864640572610118, "loss": 12.5592, "step": 7071 }, { "epoch": 0.38509866383505886, "grad_norm": 0.6979028179275554, "learning_rate": 0.00018645962671162263, "loss": 12.4207, "step": 7072 }, { "epoch": 0.38515311783164186, "grad_norm": 0.7793517294375236, "learning_rate": 0.00018645519548990888, "loss": 12.4839, "step": 7073 }, { "epoch": 0.38520757182822485, "grad_norm": 0.6047285988296436, "learning_rate": 0.00018645076359590502, "loss": 12.4372, "step": 7074 }, { "epoch": 0.3852620258248079, "grad_norm": 0.6646751397762883, "learning_rate": 0.00018644633102964556, "loss": 12.593, "step": 7075 }, { "epoch": 0.3853164798213909, "grad_norm": 0.729705495106553, "learning_rate": 0.00018644189779116487, "loss": 12.4074, "step": 7076 }, { "epoch": 0.3853709338179739, "grad_norm": 0.6084925856486768, "learning_rate": 0.00018643746388049754, "loss": 12.5115, "step": 7077 }, { "epoch": 0.38542538781455693, "grad_norm": 0.7647209717282059, "learning_rate": 0.00018643302929767795, "loss": 12.5896, "step": 7078 }, { "epoch": 0.3854798418111399, "grad_norm": 0.5955347064856068, "learning_rate": 0.00018642859404274068, "loss": 12.5077, "step": 7079 }, { "epoch": 0.3855342958077229, "grad_norm": 0.6422483619212731, "learning_rate": 0.00018642415811572017, "loss": 12.5562, "step": 7080 }, { "epoch": 0.38558874980430596, "grad_norm": 0.694516033481041, "learning_rate": 0.00018641972151665085, "loss": 12.5192, "step": 7081 }, { "epoch": 0.38564320380088896, "grad_norm": 0.6587257113621006, "learning_rate": 0.00018641528424556735, "loss": 12.5755, "step": 7082 }, { "epoch": 0.38569765779747195, "grad_norm": 0.6763562072483827, "learning_rate": 0.00018641084630250407, "loss": 12.4134, "step": 7083 }, { "epoch": 0.385752111794055, "grad_norm": 0.6203052034426025, "learning_rate": 0.00018640640768749557, "loss": 12.5064, "step": 7084 }, { "epoch": 0.385806565790638, "grad_norm": 0.659861270783671, "learning_rate": 0.00018640196840057636, "loss": 12.4955, "step": 7085 }, { "epoch": 0.385861019787221, "grad_norm": 0.6437770506543342, "learning_rate": 0.00018639752844178093, "loss": 12.5114, "step": 7086 }, { "epoch": 0.38591547378380403, "grad_norm": 0.7570541110260534, "learning_rate": 0.00018639308781114386, "loss": 12.6241, "step": 7087 }, { "epoch": 0.385969927780387, "grad_norm": 0.7411434827368806, "learning_rate": 0.00018638864650869966, "loss": 12.4622, "step": 7088 }, { "epoch": 0.38602438177697, "grad_norm": 0.6590951056007902, "learning_rate": 0.0001863842045344828, "loss": 12.564, "step": 7089 }, { "epoch": 0.38607883577355306, "grad_norm": 0.6097965134684681, "learning_rate": 0.0001863797618885279, "loss": 12.5037, "step": 7090 }, { "epoch": 0.38613328977013606, "grad_norm": 0.6427854864860034, "learning_rate": 0.00018637531857086952, "loss": 12.4269, "step": 7091 }, { "epoch": 0.3861877437667191, "grad_norm": 0.7155316157969179, "learning_rate": 0.00018637087458154214, "loss": 12.5963, "step": 7092 }, { "epoch": 0.3862421977633021, "grad_norm": 0.6965946192322601, "learning_rate": 0.00018636642992058038, "loss": 12.5289, "step": 7093 }, { "epoch": 0.3862966517598851, "grad_norm": 0.6009755117826611, "learning_rate": 0.00018636198458801877, "loss": 12.4643, "step": 7094 }, { "epoch": 0.38635110575646814, "grad_norm": 0.7223297002920598, "learning_rate": 0.0001863575385838919, "loss": 12.6021, "step": 7095 }, { "epoch": 0.38640555975305113, "grad_norm": 0.6603037776913001, "learning_rate": 0.0001863530919082343, "loss": 12.5357, "step": 7096 }, { "epoch": 0.3864600137496341, "grad_norm": 0.6071533180146176, "learning_rate": 0.00018634864456108056, "loss": 12.4168, "step": 7097 }, { "epoch": 0.38651446774621717, "grad_norm": 0.7172644840488438, "learning_rate": 0.00018634419654246532, "loss": 12.5584, "step": 7098 }, { "epoch": 0.38656892174280016, "grad_norm": 0.7060305880193891, "learning_rate": 0.00018633974785242313, "loss": 12.4297, "step": 7099 }, { "epoch": 0.38662337573938316, "grad_norm": 0.6810705096067343, "learning_rate": 0.00018633529849098856, "loss": 12.5561, "step": 7100 }, { "epoch": 0.3866778297359662, "grad_norm": 0.6530001434960051, "learning_rate": 0.0001863308484581962, "loss": 12.5864, "step": 7101 }, { "epoch": 0.3867322837325492, "grad_norm": 0.6948748912939586, "learning_rate": 0.00018632639775408073, "loss": 12.4706, "step": 7102 }, { "epoch": 0.3867867377291322, "grad_norm": 0.6345644797992628, "learning_rate": 0.0001863219463786767, "loss": 12.395, "step": 7103 }, { "epoch": 0.38684119172571524, "grad_norm": 0.7069631467744489, "learning_rate": 0.00018631749433201876, "loss": 12.6182, "step": 7104 }, { "epoch": 0.38689564572229823, "grad_norm": 0.6557637249767786, "learning_rate": 0.0001863130416141415, "loss": 12.5655, "step": 7105 }, { "epoch": 0.3869500997188812, "grad_norm": 0.5965658094492697, "learning_rate": 0.00018630858822507956, "loss": 12.4235, "step": 7106 }, { "epoch": 0.3870045537154643, "grad_norm": 0.6341298768236069, "learning_rate": 0.00018630413416486754, "loss": 12.4438, "step": 7107 }, { "epoch": 0.38705900771204726, "grad_norm": 0.6500506288593676, "learning_rate": 0.0001862996794335401, "loss": 12.5798, "step": 7108 }, { "epoch": 0.38711346170863026, "grad_norm": 0.6411657990928703, "learning_rate": 0.0001862952240311319, "loss": 12.4585, "step": 7109 }, { "epoch": 0.3871679157052133, "grad_norm": 0.6437618395015029, "learning_rate": 0.00018629076795767755, "loss": 12.4662, "step": 7110 }, { "epoch": 0.3872223697017963, "grad_norm": 0.665479862318005, "learning_rate": 0.00018628631121321172, "loss": 12.4517, "step": 7111 }, { "epoch": 0.3872768236983793, "grad_norm": 0.6880421473175254, "learning_rate": 0.00018628185379776909, "loss": 12.4043, "step": 7112 }, { "epoch": 0.38733127769496234, "grad_norm": 0.6701975253629583, "learning_rate": 0.00018627739571138425, "loss": 12.4521, "step": 7113 }, { "epoch": 0.38738573169154533, "grad_norm": 0.6120212944128985, "learning_rate": 0.00018627293695409194, "loss": 12.5299, "step": 7114 }, { "epoch": 0.3874401856881283, "grad_norm": 0.6194184670856256, "learning_rate": 0.0001862684775259268, "loss": 12.4645, "step": 7115 }, { "epoch": 0.3874946396847114, "grad_norm": 0.7047969299610378, "learning_rate": 0.0001862640174269235, "loss": 12.6798, "step": 7116 }, { "epoch": 0.38754909368129437, "grad_norm": 0.7116460721530385, "learning_rate": 0.00018625955665711673, "loss": 12.5787, "step": 7117 }, { "epoch": 0.38760354767787736, "grad_norm": 0.7729800247148316, "learning_rate": 0.00018625509521654122, "loss": 12.462, "step": 7118 }, { "epoch": 0.3876580016744604, "grad_norm": 0.6599703454795484, "learning_rate": 0.0001862506331052316, "loss": 12.4132, "step": 7119 }, { "epoch": 0.3877124556710434, "grad_norm": 0.7320634611929326, "learning_rate": 0.0001862461703232226, "loss": 12.6311, "step": 7120 }, { "epoch": 0.3877669096676264, "grad_norm": 0.7085179533336992, "learning_rate": 0.0001862417068705489, "loss": 12.6353, "step": 7121 }, { "epoch": 0.38782136366420944, "grad_norm": 0.7532299118642597, "learning_rate": 0.00018623724274724522, "loss": 12.4862, "step": 7122 }, { "epoch": 0.38787581766079243, "grad_norm": 0.6448793596255141, "learning_rate": 0.00018623277795334632, "loss": 12.4253, "step": 7123 }, { "epoch": 0.3879302716573755, "grad_norm": 0.6352504803054885, "learning_rate": 0.00018622831248888682, "loss": 12.4379, "step": 7124 }, { "epoch": 0.3879847256539585, "grad_norm": 0.6218883827318556, "learning_rate": 0.00018622384635390152, "loss": 12.5006, "step": 7125 }, { "epoch": 0.38803917965054147, "grad_norm": 0.6190690293378829, "learning_rate": 0.00018621937954842516, "loss": 12.4908, "step": 7126 }, { "epoch": 0.3880936336471245, "grad_norm": 0.6275365107995106, "learning_rate": 0.00018621491207249243, "loss": 12.45, "step": 7127 }, { "epoch": 0.3881480876437075, "grad_norm": 0.6047005196824049, "learning_rate": 0.00018621044392613809, "loss": 12.3975, "step": 7128 }, { "epoch": 0.3882025416402905, "grad_norm": 0.6524608765328237, "learning_rate": 0.00018620597510939687, "loss": 12.4844, "step": 7129 }, { "epoch": 0.38825699563687355, "grad_norm": 0.7510200705796262, "learning_rate": 0.00018620150562230354, "loss": 12.6705, "step": 7130 }, { "epoch": 0.38831144963345654, "grad_norm": 0.73051352377799, "learning_rate": 0.00018619703546489286, "loss": 12.4825, "step": 7131 }, { "epoch": 0.38836590363003953, "grad_norm": 0.7746106714477208, "learning_rate": 0.00018619256463719953, "loss": 12.5045, "step": 7132 }, { "epoch": 0.3884203576266226, "grad_norm": 0.6517344434189947, "learning_rate": 0.0001861880931392584, "loss": 12.472, "step": 7133 }, { "epoch": 0.3884748116232056, "grad_norm": 0.7236585827856387, "learning_rate": 0.00018618362097110418, "loss": 12.4292, "step": 7134 }, { "epoch": 0.38852926561978857, "grad_norm": 0.7377438184733501, "learning_rate": 0.0001861791481327717, "loss": 12.52, "step": 7135 }, { "epoch": 0.3885837196163716, "grad_norm": 0.701077172486775, "learning_rate": 0.0001861746746242957, "loss": 12.3832, "step": 7136 }, { "epoch": 0.3886381736129546, "grad_norm": 0.7465602021994938, "learning_rate": 0.00018617020044571096, "loss": 12.477, "step": 7137 }, { "epoch": 0.3886926276095376, "grad_norm": 0.6913046599592584, "learning_rate": 0.00018616572559705232, "loss": 12.494, "step": 7138 }, { "epoch": 0.38874708160612065, "grad_norm": 0.798704738006824, "learning_rate": 0.00018616125007835454, "loss": 12.4876, "step": 7139 }, { "epoch": 0.38880153560270364, "grad_norm": 0.7090710177877242, "learning_rate": 0.0001861567738896524, "loss": 12.518, "step": 7140 }, { "epoch": 0.38885598959928663, "grad_norm": 0.6283988372735225, "learning_rate": 0.00018615229703098076, "loss": 12.612, "step": 7141 }, { "epoch": 0.3889104435958697, "grad_norm": 0.7791594674546557, "learning_rate": 0.00018614781950237444, "loss": 12.6456, "step": 7142 }, { "epoch": 0.3889648975924527, "grad_norm": 0.6618988345899924, "learning_rate": 0.0001861433413038682, "loss": 12.4756, "step": 7143 }, { "epoch": 0.38901935158903567, "grad_norm": 0.7549097356269053, "learning_rate": 0.0001861388624354969, "loss": 12.4658, "step": 7144 }, { "epoch": 0.3890738055856187, "grad_norm": 0.6821411593614526, "learning_rate": 0.00018613438289729535, "loss": 12.5978, "step": 7145 }, { "epoch": 0.3891282595822017, "grad_norm": 0.6596391875897409, "learning_rate": 0.00018612990268929838, "loss": 12.433, "step": 7146 }, { "epoch": 0.3891827135787847, "grad_norm": 0.6942742601285982, "learning_rate": 0.00018612542181154087, "loss": 12.4488, "step": 7147 }, { "epoch": 0.38923716757536775, "grad_norm": 0.6441158155215414, "learning_rate": 0.00018612094026405763, "loss": 12.3776, "step": 7148 }, { "epoch": 0.38929162157195074, "grad_norm": 0.6671097050311692, "learning_rate": 0.0001861164580468835, "loss": 12.6619, "step": 7149 }, { "epoch": 0.38934607556853373, "grad_norm": 0.6378455673853604, "learning_rate": 0.00018611197516005335, "loss": 12.401, "step": 7150 }, { "epoch": 0.3894005295651168, "grad_norm": 0.5825367655607089, "learning_rate": 0.0001861074916036021, "loss": 12.5309, "step": 7151 }, { "epoch": 0.3894549835616998, "grad_norm": 0.7563755785105186, "learning_rate": 0.00018610300737756448, "loss": 12.4888, "step": 7152 }, { "epoch": 0.38950943755828277, "grad_norm": 0.7009475975387238, "learning_rate": 0.00018609852248197546, "loss": 12.5647, "step": 7153 }, { "epoch": 0.3895638915548658, "grad_norm": 0.6796488958941794, "learning_rate": 0.0001860940369168699, "loss": 12.4961, "step": 7154 }, { "epoch": 0.3896183455514488, "grad_norm": 0.6557015786790266, "learning_rate": 0.00018608955068228267, "loss": 12.4476, "step": 7155 }, { "epoch": 0.3896727995480318, "grad_norm": 0.7001203977916104, "learning_rate": 0.00018608506377824864, "loss": 12.5892, "step": 7156 }, { "epoch": 0.38972725354461485, "grad_norm": 0.7131040980800413, "learning_rate": 0.00018608057620480274, "loss": 12.4833, "step": 7157 }, { "epoch": 0.38978170754119784, "grad_norm": 0.7123328847352828, "learning_rate": 0.00018607608796197982, "loss": 12.5396, "step": 7158 }, { "epoch": 0.3898361615377809, "grad_norm": 0.6952953693606627, "learning_rate": 0.00018607159904981483, "loss": 12.3928, "step": 7159 }, { "epoch": 0.3898906155343639, "grad_norm": 0.6573785347664906, "learning_rate": 0.0001860671094683426, "loss": 12.4525, "step": 7160 }, { "epoch": 0.3899450695309469, "grad_norm": 0.5640682053021612, "learning_rate": 0.00018606261921759814, "loss": 12.4185, "step": 7161 }, { "epoch": 0.3899995235275299, "grad_norm": 0.6663834979866167, "learning_rate": 0.00018605812829761633, "loss": 12.4792, "step": 7162 }, { "epoch": 0.3900539775241129, "grad_norm": 0.6167571085476997, "learning_rate": 0.00018605363670843206, "loss": 12.3879, "step": 7163 }, { "epoch": 0.3901084315206959, "grad_norm": 0.6064142565549342, "learning_rate": 0.0001860491444500803, "loss": 12.5378, "step": 7164 }, { "epoch": 0.39016288551727896, "grad_norm": 0.5654870240016255, "learning_rate": 0.00018604465152259595, "loss": 12.4184, "step": 7165 }, { "epoch": 0.39021733951386195, "grad_norm": 0.7203300327021557, "learning_rate": 0.00018604015792601396, "loss": 12.4794, "step": 7166 }, { "epoch": 0.39027179351044494, "grad_norm": 0.6751225100880477, "learning_rate": 0.00018603566366036923, "loss": 12.6103, "step": 7167 }, { "epoch": 0.390326247507028, "grad_norm": 0.6162717287525183, "learning_rate": 0.00018603116872569682, "loss": 12.3781, "step": 7168 }, { "epoch": 0.390380701503611, "grad_norm": 0.6081743008463881, "learning_rate": 0.00018602667312203158, "loss": 12.4616, "step": 7169 }, { "epoch": 0.390435155500194, "grad_norm": 0.7200821163330738, "learning_rate": 0.0001860221768494085, "loss": 12.2856, "step": 7170 }, { "epoch": 0.390489609496777, "grad_norm": 0.6801818674283662, "learning_rate": 0.00018601767990786256, "loss": 12.4423, "step": 7171 }, { "epoch": 0.39054406349336, "grad_norm": 0.7458239719186429, "learning_rate": 0.00018601318229742874, "loss": 12.4531, "step": 7172 }, { "epoch": 0.390598517489943, "grad_norm": 0.636396501553983, "learning_rate": 0.00018600868401814194, "loss": 12.418, "step": 7173 }, { "epoch": 0.39065297148652606, "grad_norm": 0.6982245766018101, "learning_rate": 0.00018600418507003723, "loss": 12.6615, "step": 7174 }, { "epoch": 0.39070742548310905, "grad_norm": 0.6725384848733218, "learning_rate": 0.00018599968545314951, "loss": 12.5772, "step": 7175 }, { "epoch": 0.39076187947969204, "grad_norm": 0.6217640478486886, "learning_rate": 0.00018599518516751386, "loss": 12.4921, "step": 7176 }, { "epoch": 0.3908163334762751, "grad_norm": 0.735065262741298, "learning_rate": 0.0001859906842131652, "loss": 12.606, "step": 7177 }, { "epoch": 0.3908707874728581, "grad_norm": 0.6181369701959938, "learning_rate": 0.00018598618259013856, "loss": 12.3427, "step": 7178 }, { "epoch": 0.3909252414694411, "grad_norm": 0.6777786413899306, "learning_rate": 0.00018598168029846895, "loss": 12.4036, "step": 7179 }, { "epoch": 0.3909796954660241, "grad_norm": 0.679731794899129, "learning_rate": 0.00018597717733819137, "loss": 12.4271, "step": 7180 }, { "epoch": 0.3910341494626071, "grad_norm": 0.6111586941549961, "learning_rate": 0.00018597267370934085, "loss": 12.4846, "step": 7181 }, { "epoch": 0.3910886034591901, "grad_norm": 0.6457344417871698, "learning_rate": 0.0001859681694119524, "loss": 12.5326, "step": 7182 }, { "epoch": 0.39114305745577316, "grad_norm": 0.6598166421712464, "learning_rate": 0.00018596366444606106, "loss": 12.3722, "step": 7183 }, { "epoch": 0.39119751145235615, "grad_norm": 0.6505653730095216, "learning_rate": 0.00018595915881170183, "loss": 12.4866, "step": 7184 }, { "epoch": 0.39125196544893914, "grad_norm": 0.8942655461387717, "learning_rate": 0.00018595465250890975, "loss": 12.6271, "step": 7185 }, { "epoch": 0.3913064194455222, "grad_norm": 0.683973537855153, "learning_rate": 0.00018595014553771992, "loss": 12.4369, "step": 7186 }, { "epoch": 0.3913608734421052, "grad_norm": 0.7404856511844542, "learning_rate": 0.00018594563789816734, "loss": 12.4747, "step": 7187 }, { "epoch": 0.3914153274386882, "grad_norm": 0.7896839713826467, "learning_rate": 0.00018594112959028706, "loss": 12.3406, "step": 7188 }, { "epoch": 0.3914697814352712, "grad_norm": 0.6195008716658261, "learning_rate": 0.00018593662061411413, "loss": 12.4288, "step": 7189 }, { "epoch": 0.3915242354318542, "grad_norm": 0.7143577956920403, "learning_rate": 0.00018593211096968362, "loss": 12.4196, "step": 7190 }, { "epoch": 0.39157868942843727, "grad_norm": 0.7425400955747433, "learning_rate": 0.0001859276006570306, "loss": 12.606, "step": 7191 }, { "epoch": 0.39163314342502026, "grad_norm": 0.6396423199370846, "learning_rate": 0.00018592308967619017, "loss": 12.6347, "step": 7192 }, { "epoch": 0.39168759742160325, "grad_norm": 0.6885635209124368, "learning_rate": 0.00018591857802719737, "loss": 12.5374, "step": 7193 }, { "epoch": 0.3917420514181863, "grad_norm": 0.6645824603123485, "learning_rate": 0.0001859140657100873, "loss": 12.5546, "step": 7194 }, { "epoch": 0.3917965054147693, "grad_norm": 0.5879980669589716, "learning_rate": 0.00018590955272489504, "loss": 12.3882, "step": 7195 }, { "epoch": 0.3918509594113523, "grad_norm": 0.7996186092060785, "learning_rate": 0.00018590503907165573, "loss": 12.5485, "step": 7196 }, { "epoch": 0.39190541340793533, "grad_norm": 0.6731604785694854, "learning_rate": 0.0001859005247504044, "loss": 12.5588, "step": 7197 }, { "epoch": 0.3919598674045183, "grad_norm": 0.7331086743184092, "learning_rate": 0.00018589600976117617, "loss": 12.3912, "step": 7198 }, { "epoch": 0.3920143214011013, "grad_norm": 0.6348125847667513, "learning_rate": 0.0001858914941040062, "loss": 12.5074, "step": 7199 }, { "epoch": 0.39206877539768437, "grad_norm": 0.6554620268542093, "learning_rate": 0.00018588697777892953, "loss": 12.4549, "step": 7200 }, { "epoch": 0.39212322939426736, "grad_norm": 0.6242822708657442, "learning_rate": 0.00018588246078598135, "loss": 12.5546, "step": 7201 }, { "epoch": 0.39217768339085035, "grad_norm": 0.762153351802112, "learning_rate": 0.00018587794312519674, "loss": 12.5397, "step": 7202 }, { "epoch": 0.3922321373874334, "grad_norm": 0.6007328039341426, "learning_rate": 0.00018587342479661084, "loss": 12.3427, "step": 7203 }, { "epoch": 0.3922865913840164, "grad_norm": 0.6532959864928064, "learning_rate": 0.00018586890580025878, "loss": 12.5656, "step": 7204 }, { "epoch": 0.3923410453805994, "grad_norm": 0.6905479326225938, "learning_rate": 0.0001858643861361757, "loss": 12.4903, "step": 7205 }, { "epoch": 0.39239549937718243, "grad_norm": 0.851211578891341, "learning_rate": 0.00018585986580439682, "loss": 12.5757, "step": 7206 }, { "epoch": 0.3924499533737654, "grad_norm": 0.683472164164967, "learning_rate": 0.00018585534480495714, "loss": 12.3313, "step": 7207 }, { "epoch": 0.3925044073703484, "grad_norm": 0.607373149852656, "learning_rate": 0.00018585082313789196, "loss": 12.3781, "step": 7208 }, { "epoch": 0.39255886136693147, "grad_norm": 0.6533168461112862, "learning_rate": 0.00018584630080323633, "loss": 12.5263, "step": 7209 }, { "epoch": 0.39261331536351446, "grad_norm": 0.6703700361785917, "learning_rate": 0.00018584177780102553, "loss": 12.3924, "step": 7210 }, { "epoch": 0.39266776936009745, "grad_norm": 0.7017657038020355, "learning_rate": 0.00018583725413129462, "loss": 12.6166, "step": 7211 }, { "epoch": 0.3927222233566805, "grad_norm": 0.6458042616638657, "learning_rate": 0.00018583272979407885, "loss": 12.4746, "step": 7212 }, { "epoch": 0.3927766773532635, "grad_norm": 0.6663919039166154, "learning_rate": 0.00018582820478941337, "loss": 12.4783, "step": 7213 }, { "epoch": 0.3928311313498465, "grad_norm": 0.6678956163369926, "learning_rate": 0.00018582367911733339, "loss": 12.6578, "step": 7214 }, { "epoch": 0.39288558534642953, "grad_norm": 0.6717646126286817, "learning_rate": 0.00018581915277787406, "loss": 12.5549, "step": 7215 }, { "epoch": 0.3929400393430125, "grad_norm": 0.6514951420285618, "learning_rate": 0.00018581462577107062, "loss": 12.4215, "step": 7216 }, { "epoch": 0.3929944933395955, "grad_norm": 0.6131590893663512, "learning_rate": 0.00018581009809695828, "loss": 12.413, "step": 7217 }, { "epoch": 0.39304894733617857, "grad_norm": 0.7061939089881658, "learning_rate": 0.0001858055697555722, "loss": 12.4528, "step": 7218 }, { "epoch": 0.39310340133276156, "grad_norm": 0.6396540860826965, "learning_rate": 0.00018580104074694765, "loss": 12.4773, "step": 7219 }, { "epoch": 0.39315785532934455, "grad_norm": 0.6813268667296958, "learning_rate": 0.00018579651107111979, "loss": 12.1887, "step": 7220 }, { "epoch": 0.3932123093259276, "grad_norm": 0.652895067238895, "learning_rate": 0.00018579198072812386, "loss": 12.5702, "step": 7221 }, { "epoch": 0.3932667633225106, "grad_norm": 0.6312854585604019, "learning_rate": 0.00018578744971799513, "loss": 12.4433, "step": 7222 }, { "epoch": 0.3933212173190936, "grad_norm": 0.7119148682281505, "learning_rate": 0.0001857829180407688, "loss": 12.4816, "step": 7223 }, { "epoch": 0.39337567131567663, "grad_norm": 0.5874724663556574, "learning_rate": 0.00018577838569648012, "loss": 12.3591, "step": 7224 }, { "epoch": 0.3934301253122596, "grad_norm": 0.724354991397679, "learning_rate": 0.0001857738526851643, "loss": 12.483, "step": 7225 }, { "epoch": 0.3934845793088427, "grad_norm": 0.6301934564762179, "learning_rate": 0.00018576931900685665, "loss": 12.421, "step": 7226 }, { "epoch": 0.39353903330542567, "grad_norm": 0.6071793867462213, "learning_rate": 0.00018576478466159237, "loss": 12.4453, "step": 7227 }, { "epoch": 0.39359348730200866, "grad_norm": 0.6099905450348952, "learning_rate": 0.00018576024964940673, "loss": 12.3518, "step": 7228 }, { "epoch": 0.3936479412985917, "grad_norm": 0.6502863285735471, "learning_rate": 0.00018575571397033504, "loss": 12.2838, "step": 7229 }, { "epoch": 0.3937023952951747, "grad_norm": 0.6212062647781128, "learning_rate": 0.00018575117762441252, "loss": 12.5731, "step": 7230 }, { "epoch": 0.3937568492917577, "grad_norm": 0.6241215749927159, "learning_rate": 0.00018574664061167447, "loss": 12.4702, "step": 7231 }, { "epoch": 0.39381130328834074, "grad_norm": 0.6667971540441856, "learning_rate": 0.00018574210293215615, "loss": 12.5083, "step": 7232 }, { "epoch": 0.39386575728492373, "grad_norm": 0.612237029823004, "learning_rate": 0.00018573756458589288, "loss": 12.4686, "step": 7233 }, { "epoch": 0.3939202112815067, "grad_norm": 0.6123887965534771, "learning_rate": 0.00018573302557291989, "loss": 12.4532, "step": 7234 }, { "epoch": 0.3939746652780898, "grad_norm": 0.5806781769345054, "learning_rate": 0.00018572848589327255, "loss": 12.4606, "step": 7235 }, { "epoch": 0.39402911927467277, "grad_norm": 0.6702610377858681, "learning_rate": 0.00018572394554698614, "loss": 12.5989, "step": 7236 }, { "epoch": 0.39408357327125576, "grad_norm": 0.6998756911883727, "learning_rate": 0.0001857194045340959, "loss": 12.3319, "step": 7237 }, { "epoch": 0.3941380272678388, "grad_norm": 0.6148350823215484, "learning_rate": 0.00018571486285463723, "loss": 12.5016, "step": 7238 }, { "epoch": 0.3941924812644218, "grad_norm": 0.6236150418109734, "learning_rate": 0.0001857103205086454, "loss": 12.5312, "step": 7239 }, { "epoch": 0.3942469352610048, "grad_norm": 0.6139467227040993, "learning_rate": 0.00018570577749615577, "loss": 12.6076, "step": 7240 }, { "epoch": 0.39430138925758784, "grad_norm": 0.6040187667043337, "learning_rate": 0.00018570123381720364, "loss": 12.4072, "step": 7241 }, { "epoch": 0.39435584325417083, "grad_norm": 0.6223642956678106, "learning_rate": 0.0001856966894718243, "loss": 12.4661, "step": 7242 }, { "epoch": 0.3944102972507538, "grad_norm": 0.659730055040347, "learning_rate": 0.00018569214446005316, "loss": 12.3883, "step": 7243 }, { "epoch": 0.3944647512473369, "grad_norm": 0.6723695378689196, "learning_rate": 0.00018568759878192554, "loss": 12.3468, "step": 7244 }, { "epoch": 0.39451920524391987, "grad_norm": 0.6392869809761946, "learning_rate": 0.00018568305243747677, "loss": 12.4045, "step": 7245 }, { "epoch": 0.39457365924050286, "grad_norm": 0.6810339540317356, "learning_rate": 0.0001856785054267422, "loss": 12.5154, "step": 7246 }, { "epoch": 0.3946281132370859, "grad_norm": 0.6434168951571108, "learning_rate": 0.00018567395774975724, "loss": 12.5282, "step": 7247 }, { "epoch": 0.3946825672336689, "grad_norm": 0.6729813495271544, "learning_rate": 0.0001856694094065572, "loss": 12.5041, "step": 7248 }, { "epoch": 0.3947370212302519, "grad_norm": 0.6876531212401764, "learning_rate": 0.00018566486039717749, "loss": 12.6692, "step": 7249 }, { "epoch": 0.39479147522683494, "grad_norm": 0.587843823790897, "learning_rate": 0.0001856603107216534, "loss": 12.3987, "step": 7250 }, { "epoch": 0.39484592922341794, "grad_norm": 0.6931917437986364, "learning_rate": 0.0001856557603800204, "loss": 12.3018, "step": 7251 }, { "epoch": 0.39490038322000093, "grad_norm": 0.6971045277469835, "learning_rate": 0.00018565120937231387, "loss": 12.4893, "step": 7252 }, { "epoch": 0.394954837216584, "grad_norm": 0.5889567317818012, "learning_rate": 0.00018564665769856914, "loss": 12.4292, "step": 7253 }, { "epoch": 0.39500929121316697, "grad_norm": 0.5758889736487871, "learning_rate": 0.00018564210535882168, "loss": 12.3916, "step": 7254 }, { "epoch": 0.39506374520974996, "grad_norm": 0.6136402012755989, "learning_rate": 0.00018563755235310677, "loss": 12.555, "step": 7255 }, { "epoch": 0.395118199206333, "grad_norm": 0.6967106844765542, "learning_rate": 0.00018563299868145996, "loss": 12.3989, "step": 7256 }, { "epoch": 0.395172653202916, "grad_norm": 0.6602235149415349, "learning_rate": 0.00018562844434391655, "loss": 12.5221, "step": 7257 }, { "epoch": 0.39522710719949905, "grad_norm": 0.5970856829434794, "learning_rate": 0.00018562388934051204, "loss": 12.5314, "step": 7258 }, { "epoch": 0.39528156119608204, "grad_norm": 0.702231835022106, "learning_rate": 0.00018561933367128175, "loss": 12.3386, "step": 7259 }, { "epoch": 0.39533601519266504, "grad_norm": 0.6349517268734202, "learning_rate": 0.0001856147773362612, "loss": 12.4374, "step": 7260 }, { "epoch": 0.3953904691892481, "grad_norm": 0.5998605612793768, "learning_rate": 0.00018561022033548578, "loss": 12.5123, "step": 7261 }, { "epoch": 0.3954449231858311, "grad_norm": 0.798835307386948, "learning_rate": 0.0001856056626689909, "loss": 12.5172, "step": 7262 }, { "epoch": 0.39549937718241407, "grad_norm": 0.6122143003084994, "learning_rate": 0.00018560110433681209, "loss": 12.4921, "step": 7263 }, { "epoch": 0.3955538311789971, "grad_norm": 0.6398409323570206, "learning_rate": 0.0001855965453389847, "loss": 12.5301, "step": 7264 }, { "epoch": 0.3956082851755801, "grad_norm": 0.6628705007558366, "learning_rate": 0.00018559198567554423, "loss": 12.5528, "step": 7265 }, { "epoch": 0.3956627391721631, "grad_norm": 0.6082496779379879, "learning_rate": 0.00018558742534652612, "loss": 12.4979, "step": 7266 }, { "epoch": 0.39571719316874615, "grad_norm": 0.6679068622193955, "learning_rate": 0.00018558286435196584, "loss": 12.4034, "step": 7267 }, { "epoch": 0.39577164716532914, "grad_norm": 0.6283397647402458, "learning_rate": 0.00018557830269189885, "loss": 12.4187, "step": 7268 }, { "epoch": 0.39582610116191214, "grad_norm": 0.6170647594859415, "learning_rate": 0.0001855737403663606, "loss": 12.4321, "step": 7269 }, { "epoch": 0.3958805551584952, "grad_norm": 0.6354956277518662, "learning_rate": 0.00018556917737538663, "loss": 12.531, "step": 7270 }, { "epoch": 0.3959350091550782, "grad_norm": 0.7097303096725949, "learning_rate": 0.0001855646137190124, "loss": 12.6097, "step": 7271 }, { "epoch": 0.39598946315166117, "grad_norm": 0.634232155487576, "learning_rate": 0.00018556004939727333, "loss": 12.3796, "step": 7272 }, { "epoch": 0.3960439171482442, "grad_norm": 0.6053037252161205, "learning_rate": 0.00018555548441020502, "loss": 12.4573, "step": 7273 }, { "epoch": 0.3960983711448272, "grad_norm": 0.6051827555115559, "learning_rate": 0.0001855509187578429, "loss": 12.4087, "step": 7274 }, { "epoch": 0.3961528251414102, "grad_norm": 0.5997554655096772, "learning_rate": 0.00018554635244022246, "loss": 12.5251, "step": 7275 }, { "epoch": 0.39620727913799325, "grad_norm": 0.5727144957820633, "learning_rate": 0.0001855417854573793, "loss": 12.3609, "step": 7276 }, { "epoch": 0.39626173313457624, "grad_norm": 0.626733753861516, "learning_rate": 0.00018553721780934884, "loss": 12.4887, "step": 7277 }, { "epoch": 0.39631618713115924, "grad_norm": 0.6508171147743776, "learning_rate": 0.0001855326494961666, "loss": 12.4415, "step": 7278 }, { "epoch": 0.3963706411277423, "grad_norm": 0.7338951360507143, "learning_rate": 0.00018552808051786816, "loss": 12.3495, "step": 7279 }, { "epoch": 0.3964250951243253, "grad_norm": 0.6598739504874858, "learning_rate": 0.00018552351087448903, "loss": 12.4492, "step": 7280 }, { "epoch": 0.39647954912090827, "grad_norm": 0.659297850454176, "learning_rate": 0.00018551894056606473, "loss": 12.5406, "step": 7281 }, { "epoch": 0.3965340031174913, "grad_norm": 0.7926530178887912, "learning_rate": 0.0001855143695926308, "loss": 12.5387, "step": 7282 }, { "epoch": 0.3965884571140743, "grad_norm": 0.8547824409637148, "learning_rate": 0.00018550979795422281, "loss": 12.4184, "step": 7283 }, { "epoch": 0.3966429111106573, "grad_norm": 0.6204336063723833, "learning_rate": 0.00018550522565087625, "loss": 12.409, "step": 7284 }, { "epoch": 0.39669736510724035, "grad_norm": 0.6544669397684993, "learning_rate": 0.00018550065268262676, "loss": 12.4706, "step": 7285 }, { "epoch": 0.39675181910382334, "grad_norm": 0.6906323331020744, "learning_rate": 0.00018549607904950983, "loss": 12.3418, "step": 7286 }, { "epoch": 0.39680627310040634, "grad_norm": 0.6146860200521059, "learning_rate": 0.00018549150475156108, "loss": 12.3718, "step": 7287 }, { "epoch": 0.3968607270969894, "grad_norm": 0.7514602294337431, "learning_rate": 0.00018548692978881601, "loss": 12.6076, "step": 7288 }, { "epoch": 0.3969151810935724, "grad_norm": 0.6983633388102153, "learning_rate": 0.00018548235416131025, "loss": 12.4947, "step": 7289 }, { "epoch": 0.39696963509015537, "grad_norm": 0.7084821502082099, "learning_rate": 0.0001854777778690794, "loss": 12.3876, "step": 7290 }, { "epoch": 0.3970240890867384, "grad_norm": 0.6327235755021157, "learning_rate": 0.00018547320091215897, "loss": 12.4699, "step": 7291 }, { "epoch": 0.3970785430833214, "grad_norm": 0.623885800366473, "learning_rate": 0.00018546862329058464, "loss": 12.4228, "step": 7292 }, { "epoch": 0.39713299707990446, "grad_norm": 0.752140264223175, "learning_rate": 0.00018546404500439194, "loss": 12.5489, "step": 7293 }, { "epoch": 0.39718745107648745, "grad_norm": 0.6581033958309009, "learning_rate": 0.0001854594660536165, "loss": 12.1787, "step": 7294 }, { "epoch": 0.39724190507307044, "grad_norm": 0.6692429709274312, "learning_rate": 0.0001854548864382939, "loss": 12.5248, "step": 7295 }, { "epoch": 0.3972963590696535, "grad_norm": 0.6222330407924297, "learning_rate": 0.00018545030615845978, "loss": 12.3521, "step": 7296 }, { "epoch": 0.3973508130662365, "grad_norm": 0.7111786469242476, "learning_rate": 0.00018544572521414976, "loss": 12.4401, "step": 7297 }, { "epoch": 0.3974052670628195, "grad_norm": 0.6415092416534993, "learning_rate": 0.00018544114360539947, "loss": 12.3767, "step": 7298 }, { "epoch": 0.3974597210594025, "grad_norm": 0.6301328211565949, "learning_rate": 0.0001854365613322445, "loss": 12.4496, "step": 7299 }, { "epoch": 0.3975141750559855, "grad_norm": 0.5882984345484891, "learning_rate": 0.00018543197839472047, "loss": 12.4798, "step": 7300 }, { "epoch": 0.3975686290525685, "grad_norm": 0.7185448650080746, "learning_rate": 0.00018542739479286309, "loss": 12.6752, "step": 7301 }, { "epoch": 0.39762308304915156, "grad_norm": 0.5885969522111261, "learning_rate": 0.00018542281052670795, "loss": 12.3972, "step": 7302 }, { "epoch": 0.39767753704573455, "grad_norm": 0.6529425986067049, "learning_rate": 0.00018541822559629072, "loss": 12.5375, "step": 7303 }, { "epoch": 0.39773199104231755, "grad_norm": 0.6470559603557152, "learning_rate": 0.00018541364000164702, "loss": 12.4317, "step": 7304 }, { "epoch": 0.3977864450389006, "grad_norm": 0.7064940258511272, "learning_rate": 0.00018540905374281254, "loss": 12.3257, "step": 7305 }, { "epoch": 0.3978408990354836, "grad_norm": 0.7124757808162602, "learning_rate": 0.00018540446681982294, "loss": 12.4732, "step": 7306 }, { "epoch": 0.3978953530320666, "grad_norm": 0.6063168223396287, "learning_rate": 0.0001853998792327139, "loss": 12.4021, "step": 7307 }, { "epoch": 0.3979498070286496, "grad_norm": 0.6382347270216853, "learning_rate": 0.00018539529098152103, "loss": 12.49, "step": 7308 }, { "epoch": 0.3980042610252326, "grad_norm": 0.6361698118939236, "learning_rate": 0.0001853907020662801, "loss": 12.4024, "step": 7309 }, { "epoch": 0.3980587150218156, "grad_norm": 0.6411437562384148, "learning_rate": 0.00018538611248702675, "loss": 12.4099, "step": 7310 }, { "epoch": 0.39811316901839866, "grad_norm": 0.7301694611151364, "learning_rate": 0.00018538152224379666, "loss": 12.5862, "step": 7311 }, { "epoch": 0.39816762301498165, "grad_norm": 0.5686964007656635, "learning_rate": 0.00018537693133662553, "loss": 12.3635, "step": 7312 }, { "epoch": 0.39822207701156465, "grad_norm": 0.669652844274776, "learning_rate": 0.00018537233976554906, "loss": 12.5609, "step": 7313 }, { "epoch": 0.3982765310081477, "grad_norm": 0.653851941082085, "learning_rate": 0.00018536774753060299, "loss": 12.4567, "step": 7314 }, { "epoch": 0.3983309850047307, "grad_norm": 0.6460005068343455, "learning_rate": 0.00018536315463182294, "loss": 12.498, "step": 7315 }, { "epoch": 0.3983854390013137, "grad_norm": 0.6315188681360169, "learning_rate": 0.00018535856106924472, "loss": 12.3967, "step": 7316 }, { "epoch": 0.3984398929978967, "grad_norm": 0.7126080248284609, "learning_rate": 0.00018535396684290402, "loss": 12.4171, "step": 7317 }, { "epoch": 0.3984943469944797, "grad_norm": 0.6610001603463862, "learning_rate": 0.00018534937195283658, "loss": 12.3529, "step": 7318 }, { "epoch": 0.3985488009910627, "grad_norm": 0.622858169622087, "learning_rate": 0.00018534477639907805, "loss": 12.4491, "step": 7319 }, { "epoch": 0.39860325498764576, "grad_norm": 0.7276069110047648, "learning_rate": 0.00018534018018166428, "loss": 12.4966, "step": 7320 }, { "epoch": 0.39865770898422875, "grad_norm": 0.6699895849428359, "learning_rate": 0.00018533558330063095, "loss": 12.5369, "step": 7321 }, { "epoch": 0.39871216298081175, "grad_norm": 0.7054366604545919, "learning_rate": 0.0001853309857560138, "loss": 12.4876, "step": 7322 }, { "epoch": 0.3987666169773948, "grad_norm": 0.6194120015384266, "learning_rate": 0.00018532638754784858, "loss": 12.4925, "step": 7323 }, { "epoch": 0.3988210709739778, "grad_norm": 0.6784120549830108, "learning_rate": 0.00018532178867617107, "loss": 12.4642, "step": 7324 }, { "epoch": 0.39887552497056084, "grad_norm": 0.619911638459784, "learning_rate": 0.00018531718914101703, "loss": 12.5518, "step": 7325 }, { "epoch": 0.39892997896714383, "grad_norm": 0.5986202288989504, "learning_rate": 0.00018531258894242223, "loss": 12.4409, "step": 7326 }, { "epoch": 0.3989844329637268, "grad_norm": 0.6360512448361856, "learning_rate": 0.0001853079880804224, "loss": 12.478, "step": 7327 }, { "epoch": 0.39903888696030987, "grad_norm": 0.6720223179345861, "learning_rate": 0.0001853033865550534, "loss": 12.4442, "step": 7328 }, { "epoch": 0.39909334095689286, "grad_norm": 0.6887841681858036, "learning_rate": 0.0001852987843663509, "loss": 12.5266, "step": 7329 }, { "epoch": 0.39914779495347585, "grad_norm": 0.6453691945791873, "learning_rate": 0.0001852941815143508, "loss": 12.4266, "step": 7330 }, { "epoch": 0.3992022489500589, "grad_norm": 0.6340510744267103, "learning_rate": 0.00018528957799908882, "loss": 12.5852, "step": 7331 }, { "epoch": 0.3992567029466419, "grad_norm": 0.6540445359669609, "learning_rate": 0.00018528497382060076, "loss": 12.5478, "step": 7332 }, { "epoch": 0.3993111569432249, "grad_norm": 0.6299686222054321, "learning_rate": 0.00018528036897892246, "loss": 12.5436, "step": 7333 }, { "epoch": 0.39936561093980794, "grad_norm": 0.6467089196451699, "learning_rate": 0.0001852757634740897, "loss": 12.4378, "step": 7334 }, { "epoch": 0.39942006493639093, "grad_norm": 0.6326128820218682, "learning_rate": 0.0001852711573061383, "loss": 12.5551, "step": 7335 }, { "epoch": 0.3994745189329739, "grad_norm": 0.6174531091360957, "learning_rate": 0.0001852665504751041, "loss": 12.4177, "step": 7336 }, { "epoch": 0.39952897292955697, "grad_norm": 0.621876904130212, "learning_rate": 0.0001852619429810229, "loss": 12.3719, "step": 7337 }, { "epoch": 0.39958342692613996, "grad_norm": 0.6295489899432883, "learning_rate": 0.00018525733482393055, "loss": 12.4287, "step": 7338 }, { "epoch": 0.39963788092272295, "grad_norm": 0.6920851383728188, "learning_rate": 0.00018525272600386283, "loss": 12.5537, "step": 7339 }, { "epoch": 0.399692334919306, "grad_norm": 0.5565562726035974, "learning_rate": 0.00018524811652085563, "loss": 12.2755, "step": 7340 }, { "epoch": 0.399746788915889, "grad_norm": 0.7016828793560161, "learning_rate": 0.0001852435063749448, "loss": 12.4688, "step": 7341 }, { "epoch": 0.399801242912472, "grad_norm": 0.682530812325876, "learning_rate": 0.00018523889556616612, "loss": 12.5191, "step": 7342 }, { "epoch": 0.39985569690905504, "grad_norm": 0.6143786563448566, "learning_rate": 0.00018523428409455555, "loss": 12.4387, "step": 7343 }, { "epoch": 0.39991015090563803, "grad_norm": 0.5801256496016015, "learning_rate": 0.00018522967196014887, "loss": 12.4552, "step": 7344 }, { "epoch": 0.399964604902221, "grad_norm": 0.6826321396517108, "learning_rate": 0.00018522505916298196, "loss": 12.5167, "step": 7345 }, { "epoch": 0.40001905889880407, "grad_norm": 0.6173642642335142, "learning_rate": 0.0001852204457030907, "loss": 12.3756, "step": 7346 }, { "epoch": 0.40007351289538706, "grad_norm": 0.7812678314690079, "learning_rate": 0.00018521583158051093, "loss": 12.5332, "step": 7347 }, { "epoch": 0.40012796689197005, "grad_norm": 0.6466449436057614, "learning_rate": 0.00018521121679527865, "loss": 12.4713, "step": 7348 }, { "epoch": 0.4001824208885531, "grad_norm": 0.6781616505750094, "learning_rate": 0.00018520660134742958, "loss": 12.5313, "step": 7349 }, { "epoch": 0.4002368748851361, "grad_norm": 0.6356853248099988, "learning_rate": 0.00018520198523699972, "loss": 12.5831, "step": 7350 }, { "epoch": 0.4002913288817191, "grad_norm": 0.6273074559411165, "learning_rate": 0.00018519736846402493, "loss": 12.5613, "step": 7351 }, { "epoch": 0.40034578287830214, "grad_norm": 0.7374084228928206, "learning_rate": 0.00018519275102854113, "loss": 12.6294, "step": 7352 }, { "epoch": 0.40040023687488513, "grad_norm": 0.6254505452815294, "learning_rate": 0.00018518813293058419, "loss": 12.5451, "step": 7353 }, { "epoch": 0.4004546908714681, "grad_norm": 0.6133781391891753, "learning_rate": 0.00018518351417019005, "loss": 12.3828, "step": 7354 }, { "epoch": 0.40050914486805117, "grad_norm": 0.6191631938029492, "learning_rate": 0.0001851788947473946, "loss": 12.5555, "step": 7355 }, { "epoch": 0.40056359886463416, "grad_norm": 0.6316920146031418, "learning_rate": 0.0001851742746622338, "loss": 12.35, "step": 7356 }, { "epoch": 0.40061805286121716, "grad_norm": 0.6218804069826839, "learning_rate": 0.00018516965391474354, "loss": 12.498, "step": 7357 }, { "epoch": 0.4006725068578002, "grad_norm": 0.6234070247722966, "learning_rate": 0.0001851650325049598, "loss": 12.5307, "step": 7358 }, { "epoch": 0.4007269608543832, "grad_norm": 0.6824202368507671, "learning_rate": 0.00018516041043291844, "loss": 12.5668, "step": 7359 }, { "epoch": 0.40078141485096624, "grad_norm": 0.6674063532644566, "learning_rate": 0.0001851557876986555, "loss": 12.4694, "step": 7360 }, { "epoch": 0.40083586884754924, "grad_norm": 0.7075034857406978, "learning_rate": 0.00018515116430220684, "loss": 12.5614, "step": 7361 }, { "epoch": 0.40089032284413223, "grad_norm": 0.6642130474985378, "learning_rate": 0.00018514654024360847, "loss": 12.4663, "step": 7362 }, { "epoch": 0.4009447768407153, "grad_norm": 0.7743771292825637, "learning_rate": 0.0001851419155228963, "loss": 12.6379, "step": 7363 }, { "epoch": 0.40099923083729827, "grad_norm": 0.7207920923280146, "learning_rate": 0.00018513729014010632, "loss": 12.4966, "step": 7364 }, { "epoch": 0.40105368483388126, "grad_norm": 0.6934995219290194, "learning_rate": 0.0001851326640952745, "loss": 12.4124, "step": 7365 }, { "epoch": 0.4011081388304643, "grad_norm": 0.6028082259589631, "learning_rate": 0.0001851280373884368, "loss": 12.3458, "step": 7366 }, { "epoch": 0.4011625928270473, "grad_norm": 0.6905902445618471, "learning_rate": 0.0001851234100196292, "loss": 12.3137, "step": 7367 }, { "epoch": 0.4012170468236303, "grad_norm": 0.73889018111692, "learning_rate": 0.0001851187819888877, "loss": 12.4944, "step": 7368 }, { "epoch": 0.40127150082021334, "grad_norm": 0.6805249740030216, "learning_rate": 0.00018511415329624828, "loss": 12.51, "step": 7369 }, { "epoch": 0.40132595481679634, "grad_norm": 0.6078115817769721, "learning_rate": 0.00018510952394174695, "loss": 12.3567, "step": 7370 }, { "epoch": 0.40138040881337933, "grad_norm": 0.655832926013966, "learning_rate": 0.00018510489392541964, "loss": 12.421, "step": 7371 }, { "epoch": 0.4014348628099624, "grad_norm": 0.6681102398311158, "learning_rate": 0.00018510026324730246, "loss": 12.5392, "step": 7372 }, { "epoch": 0.40148931680654537, "grad_norm": 0.6801679482138852, "learning_rate": 0.0001850956319074313, "loss": 12.5106, "step": 7373 }, { "epoch": 0.40154377080312836, "grad_norm": 0.6802545273425757, "learning_rate": 0.00018509099990584227, "loss": 12.3287, "step": 7374 }, { "epoch": 0.4015982247997114, "grad_norm": 0.6294918505790813, "learning_rate": 0.00018508636724257136, "loss": 12.4195, "step": 7375 }, { "epoch": 0.4016526787962944, "grad_norm": 0.7557878557287484, "learning_rate": 0.00018508173391765457, "loss": 12.4527, "step": 7376 }, { "epoch": 0.4017071327928774, "grad_norm": 0.5810675161036324, "learning_rate": 0.00018507709993112795, "loss": 12.4873, "step": 7377 }, { "epoch": 0.40176158678946045, "grad_norm": 0.7014067302253529, "learning_rate": 0.00018507246528302757, "loss": 12.5783, "step": 7378 }, { "epoch": 0.40181604078604344, "grad_norm": 0.7097681235618133, "learning_rate": 0.00018506782997338938, "loss": 12.5405, "step": 7379 }, { "epoch": 0.40187049478262643, "grad_norm": 0.7227925006604735, "learning_rate": 0.00018506319400224953, "loss": 12.5748, "step": 7380 }, { "epoch": 0.4019249487792095, "grad_norm": 0.6721929918043782, "learning_rate": 0.000185058557369644, "loss": 12.5959, "step": 7381 }, { "epoch": 0.40197940277579247, "grad_norm": 0.6215806863778806, "learning_rate": 0.00018505392007560882, "loss": 12.6075, "step": 7382 }, { "epoch": 0.40203385677237546, "grad_norm": 0.6407617755681561, "learning_rate": 0.00018504928212018015, "loss": 12.5772, "step": 7383 }, { "epoch": 0.4020883107689585, "grad_norm": 0.6070598483952332, "learning_rate": 0.00018504464350339398, "loss": 12.4512, "step": 7384 }, { "epoch": 0.4021427647655415, "grad_norm": 0.715882401990176, "learning_rate": 0.0001850400042252864, "loss": 12.6478, "step": 7385 }, { "epoch": 0.4021972187621245, "grad_norm": 0.7149775663341427, "learning_rate": 0.00018503536428589348, "loss": 12.6278, "step": 7386 }, { "epoch": 0.40225167275870755, "grad_norm": 0.618326261294556, "learning_rate": 0.00018503072368525133, "loss": 12.4231, "step": 7387 }, { "epoch": 0.40230612675529054, "grad_norm": 0.6969232561838311, "learning_rate": 0.000185026082423396, "loss": 12.5187, "step": 7388 }, { "epoch": 0.40236058075187353, "grad_norm": 0.6183554586340254, "learning_rate": 0.00018502144050036356, "loss": 12.6005, "step": 7389 }, { "epoch": 0.4024150347484566, "grad_norm": 0.5869917083721967, "learning_rate": 0.00018501679791619018, "loss": 12.3899, "step": 7390 }, { "epoch": 0.40246948874503957, "grad_norm": 0.6950697425232363, "learning_rate": 0.0001850121546709119, "loss": 12.5312, "step": 7391 }, { "epoch": 0.4025239427416226, "grad_norm": 0.666716020931147, "learning_rate": 0.0001850075107645649, "loss": 12.5483, "step": 7392 }, { "epoch": 0.4025783967382056, "grad_norm": 0.6547101411207449, "learning_rate": 0.00018500286619718516, "loss": 12.432, "step": 7393 }, { "epoch": 0.4026328507347886, "grad_norm": 0.598399101548866, "learning_rate": 0.00018499822096880894, "loss": 12.3849, "step": 7394 }, { "epoch": 0.40268730473137165, "grad_norm": 0.6814945968918992, "learning_rate": 0.0001849935750794723, "loss": 12.3536, "step": 7395 }, { "epoch": 0.40274175872795465, "grad_norm": 0.6626293967806715, "learning_rate": 0.00018498892852921134, "loss": 12.4122, "step": 7396 }, { "epoch": 0.40279621272453764, "grad_norm": 0.6900476293869696, "learning_rate": 0.0001849842813180622, "loss": 12.55, "step": 7397 }, { "epoch": 0.4028506667211207, "grad_norm": 0.6745622136773487, "learning_rate": 0.00018497963344606106, "loss": 12.4947, "step": 7398 }, { "epoch": 0.4029051207177037, "grad_norm": 0.7267336187426208, "learning_rate": 0.00018497498491324406, "loss": 12.5677, "step": 7399 }, { "epoch": 0.4029595747142867, "grad_norm": 0.6591495967434877, "learning_rate": 0.00018497033571964727, "loss": 12.5507, "step": 7400 }, { "epoch": 0.4030140287108697, "grad_norm": 0.6366391704310337, "learning_rate": 0.00018496568586530695, "loss": 12.5178, "step": 7401 }, { "epoch": 0.4030684827074527, "grad_norm": 0.6943015623607408, "learning_rate": 0.00018496103535025918, "loss": 12.4766, "step": 7402 }, { "epoch": 0.4031229367040357, "grad_norm": 0.6496156865013774, "learning_rate": 0.00018495638417454017, "loss": 12.3915, "step": 7403 }, { "epoch": 0.40317739070061875, "grad_norm": 0.7010064864414909, "learning_rate": 0.0001849517323381861, "loss": 12.4634, "step": 7404 }, { "epoch": 0.40323184469720175, "grad_norm": 0.6761556369951273, "learning_rate": 0.00018494707984123307, "loss": 12.3944, "step": 7405 }, { "epoch": 0.40328629869378474, "grad_norm": 0.6087198016655698, "learning_rate": 0.0001849424266837173, "loss": 12.4833, "step": 7406 }, { "epoch": 0.4033407526903678, "grad_norm": 0.7169273106444296, "learning_rate": 0.00018493777286567498, "loss": 12.5492, "step": 7407 }, { "epoch": 0.4033952066869508, "grad_norm": 0.6440946600719528, "learning_rate": 0.00018493311838714232, "loss": 12.4559, "step": 7408 }, { "epoch": 0.4034496606835338, "grad_norm": 0.6450930034300398, "learning_rate": 0.00018492846324815547, "loss": 12.6247, "step": 7409 }, { "epoch": 0.4035041146801168, "grad_norm": 0.6440379111155046, "learning_rate": 0.0001849238074487506, "loss": 12.4345, "step": 7410 }, { "epoch": 0.4035585686766998, "grad_norm": 0.7276183434116561, "learning_rate": 0.00018491915098896403, "loss": 12.4772, "step": 7411 }, { "epoch": 0.4036130226732828, "grad_norm": 0.6384627542574353, "learning_rate": 0.0001849144938688319, "loss": 12.3952, "step": 7412 }, { "epoch": 0.40366747666986585, "grad_norm": 0.6931831770467287, "learning_rate": 0.0001849098360883904, "loss": 12.3868, "step": 7413 }, { "epoch": 0.40372193066644885, "grad_norm": 0.6550735530488863, "learning_rate": 0.00018490517764767578, "loss": 12.5256, "step": 7414 }, { "epoch": 0.40377638466303184, "grad_norm": 0.743756648354596, "learning_rate": 0.00018490051854672424, "loss": 12.4004, "step": 7415 }, { "epoch": 0.4038308386596149, "grad_norm": 0.6575060670971092, "learning_rate": 0.00018489585878557206, "loss": 12.5674, "step": 7416 }, { "epoch": 0.4038852926561979, "grad_norm": 0.6628270930000479, "learning_rate": 0.00018489119836425543, "loss": 12.3725, "step": 7417 }, { "epoch": 0.4039397466527809, "grad_norm": 0.6239260359465777, "learning_rate": 0.0001848865372828106, "loss": 12.3349, "step": 7418 }, { "epoch": 0.4039942006493639, "grad_norm": 0.6977175489948938, "learning_rate": 0.00018488187554127383, "loss": 12.545, "step": 7419 }, { "epoch": 0.4040486546459469, "grad_norm": 0.7582230177928753, "learning_rate": 0.00018487721313968137, "loss": 12.348, "step": 7420 }, { "epoch": 0.4041031086425299, "grad_norm": 0.6080224584126379, "learning_rate": 0.00018487255007806945, "loss": 12.5413, "step": 7421 }, { "epoch": 0.40415756263911295, "grad_norm": 0.6923559615321203, "learning_rate": 0.00018486788635647435, "loss": 12.5759, "step": 7422 }, { "epoch": 0.40421201663569595, "grad_norm": 0.6912263414510522, "learning_rate": 0.00018486322197493234, "loss": 12.4712, "step": 7423 }, { "epoch": 0.40426647063227894, "grad_norm": 0.6863212712827147, "learning_rate": 0.00018485855693347968, "loss": 12.4547, "step": 7424 }, { "epoch": 0.404320924628862, "grad_norm": 0.6403074472885916, "learning_rate": 0.00018485389123215265, "loss": 12.534, "step": 7425 }, { "epoch": 0.404375378625445, "grad_norm": 0.6368847513628307, "learning_rate": 0.00018484922487098753, "loss": 12.3169, "step": 7426 }, { "epoch": 0.40442983262202803, "grad_norm": 0.6936139800096027, "learning_rate": 0.00018484455785002063, "loss": 12.4682, "step": 7427 }, { "epoch": 0.404484286618611, "grad_norm": 0.6983739029881817, "learning_rate": 0.00018483989016928817, "loss": 12.564, "step": 7428 }, { "epoch": 0.404538740615194, "grad_norm": 0.6408473930715737, "learning_rate": 0.00018483522182882655, "loss": 12.4528, "step": 7429 }, { "epoch": 0.40459319461177706, "grad_norm": 0.9015349932579146, "learning_rate": 0.000184830552828672, "loss": 12.5665, "step": 7430 }, { "epoch": 0.40464764860836006, "grad_norm": 0.6763429151139749, "learning_rate": 0.00018482588316886083, "loss": 12.5239, "step": 7431 }, { "epoch": 0.40470210260494305, "grad_norm": 0.6513052832654872, "learning_rate": 0.0001848212128494294, "loss": 12.3781, "step": 7432 }, { "epoch": 0.4047565566015261, "grad_norm": 0.6240966308784069, "learning_rate": 0.00018481654187041396, "loss": 12.4837, "step": 7433 }, { "epoch": 0.4048110105981091, "grad_norm": 0.7165993138055703, "learning_rate": 0.00018481187023185086, "loss": 12.5838, "step": 7434 }, { "epoch": 0.4048654645946921, "grad_norm": 0.5818743862161894, "learning_rate": 0.0001848071979337765, "loss": 12.3615, "step": 7435 }, { "epoch": 0.40491991859127513, "grad_norm": 0.6636559493509343, "learning_rate": 0.00018480252497622706, "loss": 12.505, "step": 7436 }, { "epoch": 0.4049743725878581, "grad_norm": 0.646612671491056, "learning_rate": 0.00018479785135923905, "loss": 12.5242, "step": 7437 }, { "epoch": 0.4050288265844411, "grad_norm": 0.6143601759897361, "learning_rate": 0.00018479317708284865, "loss": 12.2706, "step": 7438 }, { "epoch": 0.40508328058102416, "grad_norm": 0.7183560522499322, "learning_rate": 0.00018478850214709232, "loss": 12.3756, "step": 7439 }, { "epoch": 0.40513773457760716, "grad_norm": 0.6479854547071566, "learning_rate": 0.00018478382655200636, "loss": 12.4938, "step": 7440 }, { "epoch": 0.40519218857419015, "grad_norm": 0.8079384121207523, "learning_rate": 0.00018477915029762717, "loss": 12.2812, "step": 7441 }, { "epoch": 0.4052466425707732, "grad_norm": 0.6780665302072778, "learning_rate": 0.00018477447338399107, "loss": 12.5561, "step": 7442 }, { "epoch": 0.4053010965673562, "grad_norm": 0.6485134302769748, "learning_rate": 0.00018476979581113449, "loss": 12.3967, "step": 7443 }, { "epoch": 0.4053555505639392, "grad_norm": 0.646095910126911, "learning_rate": 0.00018476511757909374, "loss": 12.5835, "step": 7444 }, { "epoch": 0.40541000456052223, "grad_norm": 0.5917501713499067, "learning_rate": 0.0001847604386879052, "loss": 12.4826, "step": 7445 }, { "epoch": 0.4054644585571052, "grad_norm": 0.6986606862332128, "learning_rate": 0.00018475575913760528, "loss": 12.429, "step": 7446 }, { "epoch": 0.4055189125536882, "grad_norm": 0.6715650695644763, "learning_rate": 0.00018475107892823039, "loss": 12.5835, "step": 7447 }, { "epoch": 0.40557336655027126, "grad_norm": 0.6512536707903471, "learning_rate": 0.00018474639805981686, "loss": 12.4181, "step": 7448 }, { "epoch": 0.40562782054685426, "grad_norm": 0.6601484346024479, "learning_rate": 0.00018474171653240116, "loss": 12.4515, "step": 7449 }, { "epoch": 0.40568227454343725, "grad_norm": 0.7259142491346082, "learning_rate": 0.00018473703434601963, "loss": 12.6206, "step": 7450 }, { "epoch": 0.4057367285400203, "grad_norm": 0.7088826468538713, "learning_rate": 0.00018473235150070873, "loss": 12.4792, "step": 7451 }, { "epoch": 0.4057911825366033, "grad_norm": 0.6843809362146842, "learning_rate": 0.00018472766799650485, "loss": 12.542, "step": 7452 }, { "epoch": 0.4058456365331863, "grad_norm": 0.6461760765926458, "learning_rate": 0.0001847229838334444, "loss": 12.4608, "step": 7453 }, { "epoch": 0.40590009052976933, "grad_norm": 0.6445512760636828, "learning_rate": 0.00018471829901156386, "loss": 12.592, "step": 7454 }, { "epoch": 0.4059545445263523, "grad_norm": 0.6575265356329029, "learning_rate": 0.0001847136135308996, "loss": 12.4246, "step": 7455 }, { "epoch": 0.4060089985229353, "grad_norm": 0.6039633442973562, "learning_rate": 0.00018470892739148807, "loss": 12.4342, "step": 7456 }, { "epoch": 0.40606345251951836, "grad_norm": 0.7255842086748665, "learning_rate": 0.0001847042405933657, "loss": 12.5407, "step": 7457 }, { "epoch": 0.40611790651610136, "grad_norm": 0.7658324790055303, "learning_rate": 0.000184699553136569, "loss": 12.4578, "step": 7458 }, { "epoch": 0.4061723605126844, "grad_norm": 0.6736769880323731, "learning_rate": 0.00018469486502113432, "loss": 12.4223, "step": 7459 }, { "epoch": 0.4062268145092674, "grad_norm": 0.646846684475632, "learning_rate": 0.00018469017624709818, "loss": 12.4377, "step": 7460 }, { "epoch": 0.4062812685058504, "grad_norm": 0.643852904553272, "learning_rate": 0.00018468548681449702, "loss": 12.4088, "step": 7461 }, { "epoch": 0.40633572250243344, "grad_norm": 0.6213086606944596, "learning_rate": 0.00018468079672336732, "loss": 12.474, "step": 7462 }, { "epoch": 0.40639017649901643, "grad_norm": 0.7137248569844133, "learning_rate": 0.00018467610597374553, "loss": 12.5347, "step": 7463 }, { "epoch": 0.4064446304955994, "grad_norm": 0.6181355049536756, "learning_rate": 0.0001846714145656682, "loss": 12.4671, "step": 7464 }, { "epoch": 0.40649908449218247, "grad_norm": 0.7793233068249615, "learning_rate": 0.0001846667224991717, "loss": 12.5739, "step": 7465 }, { "epoch": 0.40655353848876546, "grad_norm": 0.6253303433337799, "learning_rate": 0.00018466202977429256, "loss": 12.4636, "step": 7466 }, { "epoch": 0.40660799248534846, "grad_norm": 0.6846509132803774, "learning_rate": 0.00018465733639106728, "loss": 12.4586, "step": 7467 }, { "epoch": 0.4066624464819315, "grad_norm": 0.604509255157627, "learning_rate": 0.00018465264234953236, "loss": 12.5897, "step": 7468 }, { "epoch": 0.4067169004785145, "grad_norm": 0.6414263010065017, "learning_rate": 0.00018464794764972434, "loss": 12.5562, "step": 7469 }, { "epoch": 0.4067713544750975, "grad_norm": 0.6395195367346062, "learning_rate": 0.00018464325229167961, "loss": 12.5577, "step": 7470 }, { "epoch": 0.40682580847168054, "grad_norm": 0.8378691952895028, "learning_rate": 0.00018463855627543483, "loss": 12.5377, "step": 7471 }, { "epoch": 0.40688026246826353, "grad_norm": 0.5838339172660505, "learning_rate": 0.0001846338596010264, "loss": 12.4138, "step": 7472 }, { "epoch": 0.4069347164648465, "grad_norm": 0.7445508194750182, "learning_rate": 0.0001846291622684909, "loss": 12.5826, "step": 7473 }, { "epoch": 0.4069891704614296, "grad_norm": 0.7064093062710597, "learning_rate": 0.0001846244642778648, "loss": 12.525, "step": 7474 }, { "epoch": 0.40704362445801257, "grad_norm": 0.6330305455995636, "learning_rate": 0.00018461976562918471, "loss": 12.471, "step": 7475 }, { "epoch": 0.40709807845459556, "grad_norm": 0.6030418041469815, "learning_rate": 0.00018461506632248714, "loss": 12.4623, "step": 7476 }, { "epoch": 0.4071525324511786, "grad_norm": 0.7572696367663821, "learning_rate": 0.00018461036635780863, "loss": 12.4131, "step": 7477 }, { "epoch": 0.4072069864477616, "grad_norm": 0.7001598074872516, "learning_rate": 0.0001846056657351857, "loss": 12.4616, "step": 7478 }, { "epoch": 0.4072614404443446, "grad_norm": 0.6940485999902011, "learning_rate": 0.0001846009644546549, "loss": 12.4128, "step": 7479 }, { "epoch": 0.40731589444092764, "grad_norm": 0.70654138952586, "learning_rate": 0.00018459626251625286, "loss": 12.4186, "step": 7480 }, { "epoch": 0.40737034843751063, "grad_norm": 0.923957097435168, "learning_rate": 0.0001845915599200161, "loss": 12.4704, "step": 7481 }, { "epoch": 0.4074248024340936, "grad_norm": 0.787704032185711, "learning_rate": 0.00018458685666598114, "loss": 12.5002, "step": 7482 }, { "epoch": 0.4074792564306767, "grad_norm": 0.8237434142055088, "learning_rate": 0.00018458215275418463, "loss": 12.6203, "step": 7483 }, { "epoch": 0.40753371042725967, "grad_norm": 0.7488862790251631, "learning_rate": 0.0001845774481846631, "loss": 12.3982, "step": 7484 }, { "epoch": 0.40758816442384266, "grad_norm": 0.7451931572702298, "learning_rate": 0.00018457274295745316, "loss": 12.5894, "step": 7485 }, { "epoch": 0.4076426184204257, "grad_norm": 0.6763644886168447, "learning_rate": 0.0001845680370725914, "loss": 12.4027, "step": 7486 }, { "epoch": 0.4076970724170087, "grad_norm": 0.8090062704601079, "learning_rate": 0.00018456333053011437, "loss": 12.3973, "step": 7487 }, { "epoch": 0.4077515264135917, "grad_norm": 0.6766492711891675, "learning_rate": 0.00018455862333005872, "loss": 12.5136, "step": 7488 }, { "epoch": 0.40780598041017474, "grad_norm": 0.7354799773717808, "learning_rate": 0.000184553915472461, "loss": 12.5156, "step": 7489 }, { "epoch": 0.40786043440675773, "grad_norm": 0.7358787223055255, "learning_rate": 0.0001845492069573579, "loss": 12.5716, "step": 7490 }, { "epoch": 0.4079148884033407, "grad_norm": 0.7940397944309273, "learning_rate": 0.00018454449778478597, "loss": 12.6003, "step": 7491 }, { "epoch": 0.4079693423999238, "grad_norm": 0.7276279277087789, "learning_rate": 0.00018453978795478183, "loss": 12.5072, "step": 7492 }, { "epoch": 0.40802379639650677, "grad_norm": 0.65622616537559, "learning_rate": 0.00018453507746738217, "loss": 12.4548, "step": 7493 }, { "epoch": 0.4080782503930898, "grad_norm": 0.6544295804803718, "learning_rate": 0.00018453036632262352, "loss": 12.5034, "step": 7494 }, { "epoch": 0.4081327043896728, "grad_norm": 0.6667584813889628, "learning_rate": 0.0001845256545205426, "loss": 12.3758, "step": 7495 }, { "epoch": 0.4081871583862558, "grad_norm": 0.7293652650925002, "learning_rate": 0.000184520942061176, "loss": 12.4498, "step": 7496 }, { "epoch": 0.40824161238283885, "grad_norm": 0.6768095263738398, "learning_rate": 0.00018451622894456038, "loss": 12.4563, "step": 7497 }, { "epoch": 0.40829606637942184, "grad_norm": 0.7193329277422399, "learning_rate": 0.0001845115151707324, "loss": 12.6072, "step": 7498 }, { "epoch": 0.40835052037600483, "grad_norm": 0.6847329382109661, "learning_rate": 0.00018450680073972867, "loss": 12.4992, "step": 7499 }, { "epoch": 0.4084049743725879, "grad_norm": 0.6414975022868673, "learning_rate": 0.00018450208565158594, "loss": 12.4932, "step": 7500 }, { "epoch": 0.4084594283691709, "grad_norm": 0.6553741533966808, "learning_rate": 0.0001844973699063408, "loss": 12.4239, "step": 7501 }, { "epoch": 0.40851388236575387, "grad_norm": 0.6885331621744704, "learning_rate": 0.00018449265350402994, "loss": 12.4297, "step": 7502 }, { "epoch": 0.4085683363623369, "grad_norm": 0.6928052080715973, "learning_rate": 0.00018448793644469002, "loss": 12.485, "step": 7503 }, { "epoch": 0.4086227903589199, "grad_norm": 0.6302817729042274, "learning_rate": 0.00018448321872835773, "loss": 12.4015, "step": 7504 }, { "epoch": 0.4086772443555029, "grad_norm": 0.7513722166710587, "learning_rate": 0.0001844785003550698, "loss": 12.3403, "step": 7505 }, { "epoch": 0.40873169835208595, "grad_norm": 0.6321223134835887, "learning_rate": 0.00018447378132486288, "loss": 12.3748, "step": 7506 }, { "epoch": 0.40878615234866894, "grad_norm": 0.7182095663938053, "learning_rate": 0.00018446906163777365, "loss": 12.5056, "step": 7507 }, { "epoch": 0.40884060634525193, "grad_norm": 0.7045543146464835, "learning_rate": 0.00018446434129383885, "loss": 12.435, "step": 7508 }, { "epoch": 0.408895060341835, "grad_norm": 0.6874610053789847, "learning_rate": 0.00018445962029309514, "loss": 12.548, "step": 7509 }, { "epoch": 0.408949514338418, "grad_norm": 0.6395404723303102, "learning_rate": 0.00018445489863557927, "loss": 12.5048, "step": 7510 }, { "epoch": 0.40900396833500097, "grad_norm": 0.6515443035408928, "learning_rate": 0.00018445017632132794, "loss": 12.407, "step": 7511 }, { "epoch": 0.409058422331584, "grad_norm": 0.6420344734182722, "learning_rate": 0.0001844454533503779, "loss": 12.4882, "step": 7512 }, { "epoch": 0.409112876328167, "grad_norm": 0.6720582995151158, "learning_rate": 0.00018444072972276584, "loss": 12.7021, "step": 7513 }, { "epoch": 0.40916733032475, "grad_norm": 0.6124770996425045, "learning_rate": 0.00018443600543852851, "loss": 12.3944, "step": 7514 }, { "epoch": 0.40922178432133305, "grad_norm": 0.6943339952126241, "learning_rate": 0.00018443128049770263, "loss": 12.4117, "step": 7515 }, { "epoch": 0.40927623831791604, "grad_norm": 0.5849011559469621, "learning_rate": 0.00018442655490032498, "loss": 12.2855, "step": 7516 }, { "epoch": 0.40933069231449903, "grad_norm": 0.7283499647656516, "learning_rate": 0.00018442182864643228, "loss": 12.4723, "step": 7517 }, { "epoch": 0.4093851463110821, "grad_norm": 0.7162779493886587, "learning_rate": 0.00018441710173606123, "loss": 12.593, "step": 7518 }, { "epoch": 0.4094396003076651, "grad_norm": 0.6180021705599357, "learning_rate": 0.00018441237416924868, "loss": 12.5123, "step": 7519 }, { "epoch": 0.40949405430424807, "grad_norm": 0.6582569756408991, "learning_rate": 0.00018440764594603135, "loss": 12.4382, "step": 7520 }, { "epoch": 0.4095485083008311, "grad_norm": 0.6772835849000756, "learning_rate": 0.00018440291706644602, "loss": 12.5194, "step": 7521 }, { "epoch": 0.4096029622974141, "grad_norm": 0.6426675103958575, "learning_rate": 0.00018439818753052944, "loss": 12.4174, "step": 7522 }, { "epoch": 0.4096574162939971, "grad_norm": 0.737761035297852, "learning_rate": 0.0001843934573383184, "loss": 12.4666, "step": 7523 }, { "epoch": 0.40971187029058015, "grad_norm": 0.7037342785824447, "learning_rate": 0.00018438872648984965, "loss": 12.5767, "step": 7524 }, { "epoch": 0.40976632428716314, "grad_norm": 0.7693803436884855, "learning_rate": 0.00018438399498516006, "loss": 12.6765, "step": 7525 }, { "epoch": 0.4098207782837462, "grad_norm": 0.6823533278812086, "learning_rate": 0.00018437926282428637, "loss": 12.5121, "step": 7526 }, { "epoch": 0.4098752322803292, "grad_norm": 0.7116566941304763, "learning_rate": 0.00018437453000726538, "loss": 12.5272, "step": 7527 }, { "epoch": 0.4099296862769122, "grad_norm": 0.6472268275714508, "learning_rate": 0.00018436979653413385, "loss": 12.619, "step": 7528 }, { "epoch": 0.4099841402734952, "grad_norm": 0.625849243204641, "learning_rate": 0.0001843650624049287, "loss": 12.3751, "step": 7529 }, { "epoch": 0.4100385942700782, "grad_norm": 0.6331093988199687, "learning_rate": 0.00018436032761968662, "loss": 12.4024, "step": 7530 }, { "epoch": 0.4100930482666612, "grad_norm": 0.6527662119389117, "learning_rate": 0.00018435559217844452, "loss": 12.324, "step": 7531 }, { "epoch": 0.41014750226324426, "grad_norm": 0.6435280590335135, "learning_rate": 0.0001843508560812392, "loss": 12.5237, "step": 7532 }, { "epoch": 0.41020195625982725, "grad_norm": 0.602130591757125, "learning_rate": 0.00018434611932810743, "loss": 12.5467, "step": 7533 }, { "epoch": 0.41025641025641024, "grad_norm": 0.6666486375237347, "learning_rate": 0.00018434138191908615, "loss": 12.5347, "step": 7534 }, { "epoch": 0.4103108642529933, "grad_norm": 0.63944993563378, "learning_rate": 0.0001843366438542121, "loss": 12.6362, "step": 7535 }, { "epoch": 0.4103653182495763, "grad_norm": 0.6376619610146421, "learning_rate": 0.00018433190513352218, "loss": 12.4419, "step": 7536 }, { "epoch": 0.4104197722461593, "grad_norm": 0.6372513569437053, "learning_rate": 0.0001843271657570532, "loss": 12.5039, "step": 7537 }, { "epoch": 0.4104742262427423, "grad_norm": 0.913361086553666, "learning_rate": 0.00018432242572484205, "loss": 12.601, "step": 7538 }, { "epoch": 0.4105286802393253, "grad_norm": 0.6746220751420223, "learning_rate": 0.00018431768503692557, "loss": 12.5318, "step": 7539 }, { "epoch": 0.4105831342359083, "grad_norm": 0.5908373611740666, "learning_rate": 0.00018431294369334065, "loss": 12.4197, "step": 7540 }, { "epoch": 0.41063758823249136, "grad_norm": 0.5960354209167404, "learning_rate": 0.00018430820169412413, "loss": 12.3466, "step": 7541 }, { "epoch": 0.41069204222907435, "grad_norm": 0.6649575614542876, "learning_rate": 0.0001843034590393129, "loss": 12.4657, "step": 7542 }, { "epoch": 0.41074649622565734, "grad_norm": 0.6638348440625013, "learning_rate": 0.0001842987157289438, "loss": 12.5338, "step": 7543 }, { "epoch": 0.4108009502222404, "grad_norm": 0.6651550518063203, "learning_rate": 0.00018429397176305382, "loss": 12.337, "step": 7544 }, { "epoch": 0.4108554042188234, "grad_norm": 0.6785908004988509, "learning_rate": 0.0001842892271416797, "loss": 12.4929, "step": 7545 }, { "epoch": 0.4109098582154064, "grad_norm": 0.6328267972406906, "learning_rate": 0.00018428448186485848, "loss": 12.4313, "step": 7546 }, { "epoch": 0.4109643122119894, "grad_norm": 0.5950867640188962, "learning_rate": 0.00018427973593262696, "loss": 12.3173, "step": 7547 }, { "epoch": 0.4110187662085724, "grad_norm": 0.7421067335083728, "learning_rate": 0.0001842749893450221, "loss": 12.7487, "step": 7548 }, { "epoch": 0.4110732202051554, "grad_norm": 0.6206223920094129, "learning_rate": 0.00018427024210208078, "loss": 12.3902, "step": 7549 }, { "epoch": 0.41112767420173846, "grad_norm": 0.6604135706333086, "learning_rate": 0.0001842654942038399, "loss": 12.3789, "step": 7550 }, { "epoch": 0.41118212819832145, "grad_norm": 0.6180331174072714, "learning_rate": 0.00018426074565033645, "loss": 12.5446, "step": 7551 }, { "epoch": 0.41123658219490444, "grad_norm": 0.6358907770850538, "learning_rate": 0.00018425599644160726, "loss": 12.3478, "step": 7552 }, { "epoch": 0.4112910361914875, "grad_norm": 0.7649458824755346, "learning_rate": 0.0001842512465776894, "loss": 12.4298, "step": 7553 }, { "epoch": 0.4113454901880705, "grad_norm": 0.661010889082727, "learning_rate": 0.0001842464960586196, "loss": 12.4604, "step": 7554 }, { "epoch": 0.4113999441846535, "grad_norm": 0.5670705848084314, "learning_rate": 0.000184241744884435, "loss": 12.3934, "step": 7555 }, { "epoch": 0.4114543981812365, "grad_norm": 0.6599957599839233, "learning_rate": 0.00018423699305517244, "loss": 12.5605, "step": 7556 }, { "epoch": 0.4115088521778195, "grad_norm": 0.6912314311352966, "learning_rate": 0.00018423224057086888, "loss": 12.56, "step": 7557 }, { "epoch": 0.4115633061744025, "grad_norm": 0.6548538166488451, "learning_rate": 0.00018422748743156134, "loss": 12.347, "step": 7558 }, { "epoch": 0.41161776017098556, "grad_norm": 0.6104316468510993, "learning_rate": 0.0001842227336372867, "loss": 12.5359, "step": 7559 }, { "epoch": 0.41167221416756855, "grad_norm": 0.7374021548975841, "learning_rate": 0.00018421797918808194, "loss": 12.5152, "step": 7560 }, { "epoch": 0.4117266681641516, "grad_norm": 0.6458091136832962, "learning_rate": 0.00018421322408398408, "loss": 12.5076, "step": 7561 }, { "epoch": 0.4117811221607346, "grad_norm": 0.6777619018612451, "learning_rate": 0.00018420846832503006, "loss": 12.6523, "step": 7562 }, { "epoch": 0.4118355761573176, "grad_norm": 0.5836581592065011, "learning_rate": 0.00018420371191125686, "loss": 12.2898, "step": 7563 }, { "epoch": 0.41189003015390063, "grad_norm": 0.728462007314925, "learning_rate": 0.0001841989548427015, "loss": 12.6287, "step": 7564 }, { "epoch": 0.4119444841504836, "grad_norm": 0.7025051926962401, "learning_rate": 0.00018419419711940093, "loss": 12.4986, "step": 7565 }, { "epoch": 0.4119989381470666, "grad_norm": 0.6196466337363593, "learning_rate": 0.00018418943874139217, "loss": 12.4648, "step": 7566 }, { "epoch": 0.41205339214364967, "grad_norm": 0.6256416043269872, "learning_rate": 0.00018418467970871222, "loss": 12.3932, "step": 7567 }, { "epoch": 0.41210784614023266, "grad_norm": 0.636122420202276, "learning_rate": 0.00018417992002139807, "loss": 12.3566, "step": 7568 }, { "epoch": 0.41216230013681565, "grad_norm": 0.6657010523105482, "learning_rate": 0.00018417515967948672, "loss": 12.3594, "step": 7569 }, { "epoch": 0.4122167541333987, "grad_norm": 0.6719196758228524, "learning_rate": 0.00018417039868301528, "loss": 12.5234, "step": 7570 }, { "epoch": 0.4122712081299817, "grad_norm": 0.5998339464207478, "learning_rate": 0.00018416563703202064, "loss": 12.5577, "step": 7571 }, { "epoch": 0.4123256621265647, "grad_norm": 0.8298355480163812, "learning_rate": 0.00018416087472653992, "loss": 12.6147, "step": 7572 }, { "epoch": 0.41238011612314773, "grad_norm": 0.6592033463612728, "learning_rate": 0.0001841561117666101, "loss": 12.3951, "step": 7573 }, { "epoch": 0.4124345701197307, "grad_norm": 0.7207947352174678, "learning_rate": 0.00018415134815226826, "loss": 12.4531, "step": 7574 }, { "epoch": 0.4124890241163137, "grad_norm": 0.6148861802788756, "learning_rate": 0.00018414658388355145, "loss": 12.4126, "step": 7575 }, { "epoch": 0.41254347811289677, "grad_norm": 0.6689104279292235, "learning_rate": 0.00018414181896049664, "loss": 12.5419, "step": 7576 }, { "epoch": 0.41259793210947976, "grad_norm": 0.7014344990842885, "learning_rate": 0.00018413705338314097, "loss": 12.466, "step": 7577 }, { "epoch": 0.41265238610606275, "grad_norm": 0.6714153913382441, "learning_rate": 0.00018413228715152147, "loss": 12.5583, "step": 7578 }, { "epoch": 0.4127068401026458, "grad_norm": 0.7751802041986378, "learning_rate": 0.00018412752026567518, "loss": 12.6571, "step": 7579 }, { "epoch": 0.4127612940992288, "grad_norm": 0.6278998134838513, "learning_rate": 0.0001841227527256392, "loss": 12.51, "step": 7580 }, { "epoch": 0.4128157480958118, "grad_norm": 0.7082243757340546, "learning_rate": 0.00018411798453145056, "loss": 12.5604, "step": 7581 }, { "epoch": 0.41287020209239483, "grad_norm": 0.6326366117703875, "learning_rate": 0.00018411321568314638, "loss": 12.3858, "step": 7582 }, { "epoch": 0.4129246560889778, "grad_norm": 0.6677705390508786, "learning_rate": 0.00018410844618076372, "loss": 12.5072, "step": 7583 }, { "epoch": 0.4129791100855608, "grad_norm": 0.6679816766096263, "learning_rate": 0.00018410367602433972, "loss": 12.5658, "step": 7584 }, { "epoch": 0.41303356408214387, "grad_norm": 0.7019511202137079, "learning_rate": 0.00018409890521391137, "loss": 12.5263, "step": 7585 }, { "epoch": 0.41308801807872686, "grad_norm": 0.6543623414632546, "learning_rate": 0.00018409413374951587, "loss": 12.4733, "step": 7586 }, { "epoch": 0.41314247207530985, "grad_norm": 0.6004737182626931, "learning_rate": 0.0001840893616311903, "loss": 12.4799, "step": 7587 }, { "epoch": 0.4131969260718929, "grad_norm": 0.6714918977825168, "learning_rate": 0.0001840845888589717, "loss": 12.5035, "step": 7588 }, { "epoch": 0.4132513800684759, "grad_norm": 0.6687905292852764, "learning_rate": 0.00018407981543289726, "loss": 12.4394, "step": 7589 }, { "epoch": 0.4133058340650589, "grad_norm": 0.5822307505939365, "learning_rate": 0.00018407504135300407, "loss": 12.4167, "step": 7590 }, { "epoch": 0.41336028806164193, "grad_norm": 0.6797044733620369, "learning_rate": 0.00018407026661932928, "loss": 12.5009, "step": 7591 }, { "epoch": 0.4134147420582249, "grad_norm": 0.6662696811313099, "learning_rate": 0.00018406549123190996, "loss": 12.4285, "step": 7592 }, { "epoch": 0.413469196054808, "grad_norm": 0.6384663985910454, "learning_rate": 0.0001840607151907833, "loss": 12.4338, "step": 7593 }, { "epoch": 0.41352365005139097, "grad_norm": 0.6563451161415367, "learning_rate": 0.00018405593849598644, "loss": 12.5337, "step": 7594 }, { "epoch": 0.41357810404797396, "grad_norm": 0.6007396621690622, "learning_rate": 0.00018405116114755647, "loss": 12.4726, "step": 7595 }, { "epoch": 0.413632558044557, "grad_norm": 0.6356480095698379, "learning_rate": 0.00018404638314553062, "loss": 12.3176, "step": 7596 }, { "epoch": 0.41368701204114, "grad_norm": 0.6362794038608103, "learning_rate": 0.00018404160448994597, "loss": 12.454, "step": 7597 }, { "epoch": 0.413741466037723, "grad_norm": 0.6294171250211859, "learning_rate": 0.0001840368251808397, "loss": 12.415, "step": 7598 }, { "epoch": 0.41379592003430604, "grad_norm": 0.6220974427648912, "learning_rate": 0.00018403204521824903, "loss": 12.5613, "step": 7599 }, { "epoch": 0.41385037403088903, "grad_norm": 0.7140220218064345, "learning_rate": 0.00018402726460221104, "loss": 12.4242, "step": 7600 }, { "epoch": 0.413904828027472, "grad_norm": 0.6264698242072783, "learning_rate": 0.00018402248333276297, "loss": 12.4845, "step": 7601 }, { "epoch": 0.4139592820240551, "grad_norm": 0.6131916443343639, "learning_rate": 0.00018401770140994198, "loss": 12.4454, "step": 7602 }, { "epoch": 0.41401373602063807, "grad_norm": 0.7080235427810602, "learning_rate": 0.00018401291883378523, "loss": 12.4933, "step": 7603 }, { "epoch": 0.41406819001722106, "grad_norm": 0.7726758600328698, "learning_rate": 0.00018400813560432997, "loss": 12.4119, "step": 7604 }, { "epoch": 0.4141226440138041, "grad_norm": 0.6969997285025378, "learning_rate": 0.00018400335172161333, "loss": 12.4852, "step": 7605 }, { "epoch": 0.4141770980103871, "grad_norm": 0.6923251834896971, "learning_rate": 0.00018399856718567256, "loss": 12.5334, "step": 7606 }, { "epoch": 0.4142315520069701, "grad_norm": 0.709215148883989, "learning_rate": 0.00018399378199654486, "loss": 12.4814, "step": 7607 }, { "epoch": 0.41428600600355314, "grad_norm": 0.7182335665032548, "learning_rate": 0.00018398899615426737, "loss": 12.5802, "step": 7608 }, { "epoch": 0.41434046000013613, "grad_norm": 0.7747472579587373, "learning_rate": 0.00018398420965887738, "loss": 12.3487, "step": 7609 }, { "epoch": 0.4143949139967191, "grad_norm": 0.6641584285672352, "learning_rate": 0.00018397942251041212, "loss": 12.4149, "step": 7610 }, { "epoch": 0.4144493679933022, "grad_norm": 0.6559558849248063, "learning_rate": 0.00018397463470890877, "loss": 12.4061, "step": 7611 }, { "epoch": 0.41450382198988517, "grad_norm": 0.6929051117161943, "learning_rate": 0.00018396984625440458, "loss": 12.4807, "step": 7612 }, { "epoch": 0.41455827598646816, "grad_norm": 0.6789925394878573, "learning_rate": 0.00018396505714693678, "loss": 12.5116, "step": 7613 }, { "epoch": 0.4146127299830512, "grad_norm": 0.7897268899930915, "learning_rate": 0.00018396026738654264, "loss": 12.432, "step": 7614 }, { "epoch": 0.4146671839796342, "grad_norm": 0.6807339651106654, "learning_rate": 0.00018395547697325933, "loss": 12.3411, "step": 7615 }, { "epoch": 0.4147216379762172, "grad_norm": 0.6682991423741861, "learning_rate": 0.00018395068590712417, "loss": 12.3832, "step": 7616 }, { "epoch": 0.41477609197280024, "grad_norm": 0.7746811225957453, "learning_rate": 0.00018394589418817443, "loss": 12.4631, "step": 7617 }, { "epoch": 0.41483054596938324, "grad_norm": 0.6964451730482353, "learning_rate": 0.00018394110181644733, "loss": 12.3519, "step": 7618 }, { "epoch": 0.41488499996596623, "grad_norm": 0.6527959058968399, "learning_rate": 0.00018393630879198013, "loss": 12.4443, "step": 7619 }, { "epoch": 0.4149394539625493, "grad_norm": 0.7411057629441736, "learning_rate": 0.0001839315151148101, "loss": 12.3707, "step": 7620 }, { "epoch": 0.41499390795913227, "grad_norm": 0.742397366365453, "learning_rate": 0.00018392672078497454, "loss": 12.5815, "step": 7621 }, { "epoch": 0.41504836195571526, "grad_norm": 0.6845206549741516, "learning_rate": 0.00018392192580251075, "loss": 12.5448, "step": 7622 }, { "epoch": 0.4151028159522983, "grad_norm": 0.8617869176209406, "learning_rate": 0.00018391713016745596, "loss": 12.5105, "step": 7623 }, { "epoch": 0.4151572699488813, "grad_norm": 0.6332044702163018, "learning_rate": 0.00018391233387984754, "loss": 12.4949, "step": 7624 }, { "epoch": 0.4152117239454643, "grad_norm": 0.6921877222300677, "learning_rate": 0.0001839075369397227, "loss": 12.44, "step": 7625 }, { "epoch": 0.41526617794204734, "grad_norm": 0.7304182216130374, "learning_rate": 0.0001839027393471188, "loss": 12.5507, "step": 7626 }, { "epoch": 0.41532063193863034, "grad_norm": 0.6844024377851525, "learning_rate": 0.00018389794110207312, "loss": 12.5578, "step": 7627 }, { "epoch": 0.4153750859352134, "grad_norm": 0.6777229470414581, "learning_rate": 0.00018389314220462296, "loss": 12.4082, "step": 7628 }, { "epoch": 0.4154295399317964, "grad_norm": 0.6432218551933113, "learning_rate": 0.0001838883426548057, "loss": 12.5191, "step": 7629 }, { "epoch": 0.41548399392837937, "grad_norm": 0.6859378464003325, "learning_rate": 0.00018388354245265858, "loss": 12.5486, "step": 7630 }, { "epoch": 0.4155384479249624, "grad_norm": 0.6671747399551233, "learning_rate": 0.000183878741598219, "loss": 12.5289, "step": 7631 }, { "epoch": 0.4155929019215454, "grad_norm": 0.6850227729337616, "learning_rate": 0.00018387394009152425, "loss": 12.5237, "step": 7632 }, { "epoch": 0.4156473559181284, "grad_norm": 0.7907722026545728, "learning_rate": 0.00018386913793261167, "loss": 12.6377, "step": 7633 }, { "epoch": 0.41570180991471145, "grad_norm": 0.7024232324059682, "learning_rate": 0.0001838643351215186, "loss": 12.4349, "step": 7634 }, { "epoch": 0.41575626391129444, "grad_norm": 0.703812480485606, "learning_rate": 0.0001838595316582824, "loss": 12.5366, "step": 7635 }, { "epoch": 0.41581071790787744, "grad_norm": 0.7252726705505731, "learning_rate": 0.00018385472754294042, "loss": 12.5311, "step": 7636 }, { "epoch": 0.4158651719044605, "grad_norm": 0.7160236603812885, "learning_rate": 0.00018384992277553001, "loss": 12.5574, "step": 7637 }, { "epoch": 0.4159196259010435, "grad_norm": 0.6561769782974751, "learning_rate": 0.00018384511735608855, "loss": 12.3621, "step": 7638 }, { "epoch": 0.41597407989762647, "grad_norm": 0.7642371517971974, "learning_rate": 0.0001838403112846534, "loss": 12.4601, "step": 7639 }, { "epoch": 0.4160285338942095, "grad_norm": 0.6832445256349677, "learning_rate": 0.00018383550456126192, "loss": 12.3823, "step": 7640 }, { "epoch": 0.4160829878907925, "grad_norm": 0.9046044716944612, "learning_rate": 0.00018383069718595153, "loss": 12.681, "step": 7641 }, { "epoch": 0.4161374418873755, "grad_norm": 0.6990758835934934, "learning_rate": 0.00018382588915875952, "loss": 12.5077, "step": 7642 }, { "epoch": 0.41619189588395855, "grad_norm": 0.818672399509247, "learning_rate": 0.00018382108047972336, "loss": 12.4796, "step": 7643 }, { "epoch": 0.41624634988054154, "grad_norm": 0.6347657355980698, "learning_rate": 0.00018381627114888045, "loss": 12.5621, "step": 7644 }, { "epoch": 0.41630080387712454, "grad_norm": 0.7051436257130543, "learning_rate": 0.00018381146116626816, "loss": 12.3868, "step": 7645 }, { "epoch": 0.4163552578737076, "grad_norm": 0.6580907573187873, "learning_rate": 0.00018380665053192386, "loss": 12.5146, "step": 7646 }, { "epoch": 0.4164097118702906, "grad_norm": 0.7377588067672651, "learning_rate": 0.00018380183924588498, "loss": 12.5493, "step": 7647 }, { "epoch": 0.41646416586687357, "grad_norm": 0.6434057170985927, "learning_rate": 0.000183797027308189, "loss": 12.4146, "step": 7648 }, { "epoch": 0.4165186198634566, "grad_norm": 0.6201660562859044, "learning_rate": 0.00018379221471887325, "loss": 12.4045, "step": 7649 }, { "epoch": 0.4165730738600396, "grad_norm": 0.7312758361049857, "learning_rate": 0.00018378740147797517, "loss": 12.5651, "step": 7650 }, { "epoch": 0.4166275278566226, "grad_norm": 0.6562949596475576, "learning_rate": 0.00018378258758553222, "loss": 12.4739, "step": 7651 }, { "epoch": 0.41668198185320565, "grad_norm": 0.8109608827473153, "learning_rate": 0.00018377777304158182, "loss": 12.3613, "step": 7652 }, { "epoch": 0.41673643584978864, "grad_norm": 0.6547422099606501, "learning_rate": 0.00018377295784616142, "loss": 12.5337, "step": 7653 }, { "epoch": 0.41679088984637164, "grad_norm": 0.7061811226970356, "learning_rate": 0.00018376814199930842, "loss": 12.5456, "step": 7654 }, { "epoch": 0.4168453438429547, "grad_norm": 0.6012608861041727, "learning_rate": 0.00018376332550106033, "loss": 12.533, "step": 7655 }, { "epoch": 0.4168997978395377, "grad_norm": 0.7006711823775731, "learning_rate": 0.00018375850835145456, "loss": 12.5389, "step": 7656 }, { "epoch": 0.41695425183612067, "grad_norm": 0.6580649051091263, "learning_rate": 0.0001837536905505286, "loss": 12.4394, "step": 7657 }, { "epoch": 0.4170087058327037, "grad_norm": 0.6760449622537299, "learning_rate": 0.00018374887209831987, "loss": 12.3788, "step": 7658 }, { "epoch": 0.4170631598292867, "grad_norm": 0.6497353580956746, "learning_rate": 0.00018374405299486588, "loss": 12.4658, "step": 7659 }, { "epoch": 0.41711761382586976, "grad_norm": 0.652611595647118, "learning_rate": 0.0001837392332402041, "loss": 12.4612, "step": 7660 }, { "epoch": 0.41717206782245275, "grad_norm": 0.6578766692847864, "learning_rate": 0.00018373441283437198, "loss": 12.4691, "step": 7661 }, { "epoch": 0.41722652181903574, "grad_norm": 0.6382698831671395, "learning_rate": 0.00018372959177740704, "loss": 12.4599, "step": 7662 }, { "epoch": 0.4172809758156188, "grad_norm": 0.6850091154967107, "learning_rate": 0.00018372477006934674, "loss": 12.4671, "step": 7663 }, { "epoch": 0.4173354298122018, "grad_norm": 0.6298292742381474, "learning_rate": 0.0001837199477102286, "loss": 12.4673, "step": 7664 }, { "epoch": 0.4173898838087848, "grad_norm": 0.6113991567651845, "learning_rate": 0.00018371512470009008, "loss": 12.3753, "step": 7665 }, { "epoch": 0.4174443378053678, "grad_norm": 0.7055999705123408, "learning_rate": 0.00018371030103896872, "loss": 12.5082, "step": 7666 }, { "epoch": 0.4174987918019508, "grad_norm": 0.6371229065148822, "learning_rate": 0.00018370547672690206, "loss": 12.4813, "step": 7667 }, { "epoch": 0.4175532457985338, "grad_norm": 0.7015226954955563, "learning_rate": 0.00018370065176392752, "loss": 12.4849, "step": 7668 }, { "epoch": 0.41760769979511686, "grad_norm": 0.6310519097840122, "learning_rate": 0.00018369582615008272, "loss": 12.4796, "step": 7669 }, { "epoch": 0.41766215379169985, "grad_norm": 0.5944925755085689, "learning_rate": 0.00018369099988540513, "loss": 12.2906, "step": 7670 }, { "epoch": 0.41771660778828285, "grad_norm": 0.6578983386471594, "learning_rate": 0.00018368617296993226, "loss": 12.5302, "step": 7671 }, { "epoch": 0.4177710617848659, "grad_norm": 0.6437522523863247, "learning_rate": 0.00018368134540370173, "loss": 12.4882, "step": 7672 }, { "epoch": 0.4178255157814489, "grad_norm": 0.6162412646017736, "learning_rate": 0.000183676517186751, "loss": 12.4843, "step": 7673 }, { "epoch": 0.4178799697780319, "grad_norm": 0.6899640431171089, "learning_rate": 0.0001836716883191176, "loss": 12.5048, "step": 7674 }, { "epoch": 0.4179344237746149, "grad_norm": 0.6672292118932008, "learning_rate": 0.00018366685880083914, "loss": 12.4681, "step": 7675 }, { "epoch": 0.4179888777711979, "grad_norm": 0.6079786159449238, "learning_rate": 0.00018366202863195316, "loss": 12.4824, "step": 7676 }, { "epoch": 0.4180433317677809, "grad_norm": 0.7544946764584993, "learning_rate": 0.00018365719781249725, "loss": 12.5876, "step": 7677 }, { "epoch": 0.41809778576436396, "grad_norm": 0.5962533245030253, "learning_rate": 0.0001836523663425089, "loss": 12.4612, "step": 7678 }, { "epoch": 0.41815223976094695, "grad_norm": 0.6123575398733694, "learning_rate": 0.00018364753422202575, "loss": 12.5313, "step": 7679 }, { "epoch": 0.41820669375752995, "grad_norm": 0.6940503398342436, "learning_rate": 0.00018364270145108531, "loss": 12.5112, "step": 7680 }, { "epoch": 0.418261147754113, "grad_norm": 0.6680628632067475, "learning_rate": 0.00018363786802972522, "loss": 12.4506, "step": 7681 }, { "epoch": 0.418315601750696, "grad_norm": 0.6543354862728545, "learning_rate": 0.00018363303395798304, "loss": 12.4137, "step": 7682 }, { "epoch": 0.418370055747279, "grad_norm": 0.6030576150582967, "learning_rate": 0.00018362819923589636, "loss": 12.4018, "step": 7683 }, { "epoch": 0.418424509743862, "grad_norm": 0.6680144702062961, "learning_rate": 0.00018362336386350275, "loss": 12.4991, "step": 7684 }, { "epoch": 0.418478963740445, "grad_norm": 0.6222373278682574, "learning_rate": 0.00018361852784083991, "loss": 12.3989, "step": 7685 }, { "epoch": 0.418533417737028, "grad_norm": 0.6886589633076576, "learning_rate": 0.0001836136911679453, "loss": 12.402, "step": 7686 }, { "epoch": 0.41858787173361106, "grad_norm": 0.6532456915278813, "learning_rate": 0.00018360885384485664, "loss": 12.4871, "step": 7687 }, { "epoch": 0.41864232573019405, "grad_norm": 1.1426480829233576, "learning_rate": 0.0001836040158716115, "loss": 12.3851, "step": 7688 }, { "epoch": 0.41869677972677705, "grad_norm": 0.7287143760029692, "learning_rate": 0.00018359917724824752, "loss": 12.582, "step": 7689 }, { "epoch": 0.4187512337233601, "grad_norm": 0.6414605281108499, "learning_rate": 0.00018359433797480234, "loss": 12.2483, "step": 7690 }, { "epoch": 0.4188056877199431, "grad_norm": 0.5842099261062814, "learning_rate": 0.00018358949805131352, "loss": 12.373, "step": 7691 }, { "epoch": 0.4188601417165261, "grad_norm": 0.6257821953679695, "learning_rate": 0.00018358465747781878, "loss": 12.4152, "step": 7692 }, { "epoch": 0.41891459571310913, "grad_norm": 0.7349478349237475, "learning_rate": 0.00018357981625435573, "loss": 12.4382, "step": 7693 }, { "epoch": 0.4189690497096921, "grad_norm": 0.6866824978335202, "learning_rate": 0.000183574974380962, "loss": 12.5315, "step": 7694 }, { "epoch": 0.41902350370627517, "grad_norm": 0.7215980246716968, "learning_rate": 0.00018357013185767526, "loss": 12.6287, "step": 7695 }, { "epoch": 0.41907795770285816, "grad_norm": 0.7164121518155415, "learning_rate": 0.00018356528868453316, "loss": 12.5496, "step": 7696 }, { "epoch": 0.41913241169944115, "grad_norm": 0.6174833109503086, "learning_rate": 0.00018356044486157334, "loss": 12.4704, "step": 7697 }, { "epoch": 0.4191868656960242, "grad_norm": 0.7061268318151425, "learning_rate": 0.00018355560038883353, "loss": 12.4084, "step": 7698 }, { "epoch": 0.4192413196926072, "grad_norm": 0.7113906455959584, "learning_rate": 0.00018355075526635132, "loss": 12.5068, "step": 7699 }, { "epoch": 0.4192957736891902, "grad_norm": 0.6160925389795541, "learning_rate": 0.00018354590949416446, "loss": 12.3842, "step": 7700 }, { "epoch": 0.41935022768577324, "grad_norm": 0.764056388356585, "learning_rate": 0.00018354106307231057, "loss": 12.5408, "step": 7701 }, { "epoch": 0.41940468168235623, "grad_norm": 0.6184383724182737, "learning_rate": 0.00018353621600082737, "loss": 12.4184, "step": 7702 }, { "epoch": 0.4194591356789392, "grad_norm": 0.7259817883996311, "learning_rate": 0.00018353136827975255, "loss": 12.4332, "step": 7703 }, { "epoch": 0.41951358967552227, "grad_norm": 0.7038668489917369, "learning_rate": 0.0001835265199091238, "loss": 12.4895, "step": 7704 }, { "epoch": 0.41956804367210526, "grad_norm": 0.6629703207999516, "learning_rate": 0.0001835216708889788, "loss": 12.4573, "step": 7705 }, { "epoch": 0.41962249766868825, "grad_norm": 0.6143947927874756, "learning_rate": 0.0001835168212193553, "loss": 12.5232, "step": 7706 }, { "epoch": 0.4196769516652713, "grad_norm": 0.6349297012237477, "learning_rate": 0.000183511970900291, "loss": 12.567, "step": 7707 }, { "epoch": 0.4197314056618543, "grad_norm": 0.824480006731654, "learning_rate": 0.0001835071199318236, "loss": 12.451, "step": 7708 }, { "epoch": 0.4197858596584373, "grad_norm": 0.658652130143729, "learning_rate": 0.00018350226831399084, "loss": 12.4251, "step": 7709 }, { "epoch": 0.41984031365502034, "grad_norm": 0.6313674763579805, "learning_rate": 0.00018349741604683045, "loss": 12.435, "step": 7710 }, { "epoch": 0.41989476765160333, "grad_norm": 0.6637861440684033, "learning_rate": 0.00018349256313038013, "loss": 12.4185, "step": 7711 }, { "epoch": 0.4199492216481863, "grad_norm": 0.7100167755541632, "learning_rate": 0.00018348770956467766, "loss": 12.5442, "step": 7712 }, { "epoch": 0.42000367564476937, "grad_norm": 0.6598267047884887, "learning_rate": 0.0001834828553497607, "loss": 12.3989, "step": 7713 }, { "epoch": 0.42005812964135236, "grad_norm": 0.6461052791122609, "learning_rate": 0.0001834780004856671, "loss": 12.3846, "step": 7714 }, { "epoch": 0.42011258363793536, "grad_norm": 0.6652200852057961, "learning_rate": 0.00018347314497243458, "loss": 12.634, "step": 7715 }, { "epoch": 0.4201670376345184, "grad_norm": 0.6463769020763129, "learning_rate": 0.0001834682888101009, "loss": 12.3472, "step": 7716 }, { "epoch": 0.4202214916311014, "grad_norm": 0.7017130866496301, "learning_rate": 0.00018346343199870374, "loss": 12.4745, "step": 7717 }, { "epoch": 0.4202759456276844, "grad_norm": 0.8069620567128515, "learning_rate": 0.000183458574538281, "loss": 12.4645, "step": 7718 }, { "epoch": 0.42033039962426744, "grad_norm": 0.6694114333255915, "learning_rate": 0.00018345371642887034, "loss": 12.4017, "step": 7719 }, { "epoch": 0.42038485362085043, "grad_norm": 0.6409125471734358, "learning_rate": 0.0001834488576705096, "loss": 12.4359, "step": 7720 }, { "epoch": 0.4204393076174334, "grad_norm": 0.697416875461442, "learning_rate": 0.0001834439982632366, "loss": 12.5134, "step": 7721 }, { "epoch": 0.42049376161401647, "grad_norm": 0.6530676618799656, "learning_rate": 0.00018343913820708903, "loss": 12.5292, "step": 7722 }, { "epoch": 0.42054821561059946, "grad_norm": 0.6277856617161583, "learning_rate": 0.00018343427750210474, "loss": 12.5292, "step": 7723 }, { "epoch": 0.42060266960718246, "grad_norm": 0.6596959812273022, "learning_rate": 0.00018342941614832149, "loss": 12.5169, "step": 7724 }, { "epoch": 0.4206571236037655, "grad_norm": 0.6553076426372618, "learning_rate": 0.00018342455414577713, "loss": 12.5536, "step": 7725 }, { "epoch": 0.4207115776003485, "grad_norm": 0.7052686980586507, "learning_rate": 0.00018341969149450943, "loss": 12.4405, "step": 7726 }, { "epoch": 0.42076603159693154, "grad_norm": 0.6433534034167304, "learning_rate": 0.00018341482819455625, "loss": 12.5039, "step": 7727 }, { "epoch": 0.42082048559351454, "grad_norm": 0.6185445890377947, "learning_rate": 0.00018340996424595537, "loss": 12.4427, "step": 7728 }, { "epoch": 0.42087493959009753, "grad_norm": 0.6349314110007439, "learning_rate": 0.0001834050996487446, "loss": 12.5068, "step": 7729 }, { "epoch": 0.4209293935866806, "grad_norm": 0.6737060455245588, "learning_rate": 0.00018340023440296182, "loss": 12.4331, "step": 7730 }, { "epoch": 0.42098384758326357, "grad_norm": 0.6676306550303723, "learning_rate": 0.00018339536850864478, "loss": 12.59, "step": 7731 }, { "epoch": 0.42103830157984656, "grad_norm": 0.673849100112351, "learning_rate": 0.00018339050196583144, "loss": 12.4062, "step": 7732 }, { "epoch": 0.4210927555764296, "grad_norm": 0.6657925192980901, "learning_rate": 0.0001833856347745595, "loss": 12.4642, "step": 7733 }, { "epoch": 0.4211472095730126, "grad_norm": 0.7768256422367489, "learning_rate": 0.00018338076693486693, "loss": 12.54, "step": 7734 }, { "epoch": 0.4212016635695956, "grad_norm": 0.6839333718371754, "learning_rate": 0.00018337589844679152, "loss": 12.6317, "step": 7735 }, { "epoch": 0.42125611756617865, "grad_norm": 0.7189502447091988, "learning_rate": 0.00018337102931037112, "loss": 12.4953, "step": 7736 }, { "epoch": 0.42131057156276164, "grad_norm": 0.622438423831334, "learning_rate": 0.00018336615952564364, "loss": 12.4795, "step": 7737 }, { "epoch": 0.42136502555934463, "grad_norm": 0.6967805388351358, "learning_rate": 0.00018336128909264692, "loss": 12.2899, "step": 7738 }, { "epoch": 0.4214194795559277, "grad_norm": 0.7283347715661034, "learning_rate": 0.00018335641801141883, "loss": 12.5149, "step": 7739 }, { "epoch": 0.42147393355251067, "grad_norm": 0.7300681925656116, "learning_rate": 0.00018335154628199726, "loss": 12.4365, "step": 7740 }, { "epoch": 0.42152838754909366, "grad_norm": 0.886704235630511, "learning_rate": 0.0001833466739044201, "loss": 12.4463, "step": 7741 }, { "epoch": 0.4215828415456767, "grad_norm": 0.6432836336371802, "learning_rate": 0.0001833418008787252, "loss": 12.4491, "step": 7742 }, { "epoch": 0.4216372955422597, "grad_norm": 0.7927372042689552, "learning_rate": 0.0001833369272049505, "loss": 12.4499, "step": 7743 }, { "epoch": 0.4216917495388427, "grad_norm": 0.6784228826890297, "learning_rate": 0.00018333205288313385, "loss": 12.4603, "step": 7744 }, { "epoch": 0.42174620353542575, "grad_norm": 0.6279210026813303, "learning_rate": 0.0001833271779133132, "loss": 12.5222, "step": 7745 }, { "epoch": 0.42180065753200874, "grad_norm": 0.6901241842062449, "learning_rate": 0.00018332230229552645, "loss": 12.5653, "step": 7746 }, { "epoch": 0.42185511152859173, "grad_norm": 0.7646765823088739, "learning_rate": 0.0001833174260298115, "loss": 12.4381, "step": 7747 }, { "epoch": 0.4219095655251748, "grad_norm": 0.5871207013967358, "learning_rate": 0.00018331254911620626, "loss": 12.3703, "step": 7748 }, { "epoch": 0.42196401952175777, "grad_norm": 0.6341582639526494, "learning_rate": 0.00018330767155474867, "loss": 12.5204, "step": 7749 }, { "epoch": 0.42201847351834076, "grad_norm": 0.7186670813039878, "learning_rate": 0.00018330279334547668, "loss": 12.5855, "step": 7750 }, { "epoch": 0.4220729275149238, "grad_norm": 0.7608887056167734, "learning_rate": 0.0001832979144884282, "loss": 12.4069, "step": 7751 }, { "epoch": 0.4221273815115068, "grad_norm": 0.7286736354425516, "learning_rate": 0.00018329303498364113, "loss": 12.5926, "step": 7752 }, { "epoch": 0.4221818355080898, "grad_norm": 0.6667611301995382, "learning_rate": 0.00018328815483115344, "loss": 12.364, "step": 7753 }, { "epoch": 0.42223628950467285, "grad_norm": 0.8559370596551145, "learning_rate": 0.00018328327403100314, "loss": 12.4806, "step": 7754 }, { "epoch": 0.42229074350125584, "grad_norm": 0.5907731907724925, "learning_rate": 0.00018327839258322812, "loss": 12.3898, "step": 7755 }, { "epoch": 0.42234519749783883, "grad_norm": 0.7229290398251434, "learning_rate": 0.00018327351048786635, "loss": 12.6165, "step": 7756 }, { "epoch": 0.4223996514944219, "grad_norm": 0.7068869446224217, "learning_rate": 0.00018326862774495578, "loss": 12.5796, "step": 7757 }, { "epoch": 0.42245410549100487, "grad_norm": 0.6291494939304955, "learning_rate": 0.00018326374435453441, "loss": 12.4155, "step": 7758 }, { "epoch": 0.42250855948758786, "grad_norm": 0.6855015933078603, "learning_rate": 0.00018325886031664022, "loss": 12.5138, "step": 7759 }, { "epoch": 0.4225630134841709, "grad_norm": 0.6254927977595047, "learning_rate": 0.00018325397563131115, "loss": 12.4501, "step": 7760 }, { "epoch": 0.4226174674807539, "grad_norm": 0.5962974777849228, "learning_rate": 0.0001832490902985852, "loss": 12.5191, "step": 7761 }, { "epoch": 0.42267192147733695, "grad_norm": 0.6723955151607713, "learning_rate": 0.00018324420431850037, "loss": 12.298, "step": 7762 }, { "epoch": 0.42272637547391995, "grad_norm": 0.7398066987283984, "learning_rate": 0.00018323931769109465, "loss": 12.5349, "step": 7763 }, { "epoch": 0.42278082947050294, "grad_norm": 0.6969458920048713, "learning_rate": 0.00018323443041640602, "loss": 12.4679, "step": 7764 }, { "epoch": 0.422835283467086, "grad_norm": 0.6126110579796679, "learning_rate": 0.00018322954249447252, "loss": 12.4865, "step": 7765 }, { "epoch": 0.422889737463669, "grad_norm": 0.7092533999165815, "learning_rate": 0.00018322465392533216, "loss": 12.5509, "step": 7766 }, { "epoch": 0.422944191460252, "grad_norm": 0.6474367495039208, "learning_rate": 0.0001832197647090229, "loss": 12.3412, "step": 7767 }, { "epoch": 0.422998645456835, "grad_norm": 0.6735126182790927, "learning_rate": 0.00018321487484558276, "loss": 12.508, "step": 7768 }, { "epoch": 0.423053099453418, "grad_norm": 0.6488168446659008, "learning_rate": 0.00018320998433504987, "loss": 12.5817, "step": 7769 }, { "epoch": 0.423107553450001, "grad_norm": 0.706447803660833, "learning_rate": 0.00018320509317746217, "loss": 12.4491, "step": 7770 }, { "epoch": 0.42316200744658405, "grad_norm": 0.690598879814916, "learning_rate": 0.00018320020137285766, "loss": 12.4504, "step": 7771 }, { "epoch": 0.42321646144316705, "grad_norm": 0.6087048075104945, "learning_rate": 0.0001831953089212745, "loss": 12.554, "step": 7772 }, { "epoch": 0.42327091543975004, "grad_norm": 0.6844601908693717, "learning_rate": 0.00018319041582275062, "loss": 12.4907, "step": 7773 }, { "epoch": 0.4233253694363331, "grad_norm": 0.6368271217345263, "learning_rate": 0.00018318552207732415, "loss": 12.5252, "step": 7774 }, { "epoch": 0.4233798234329161, "grad_norm": 0.6215840343054171, "learning_rate": 0.0001831806276850331, "loss": 12.5888, "step": 7775 }, { "epoch": 0.4234342774294991, "grad_norm": 0.6224392953398991, "learning_rate": 0.00018317573264591553, "loss": 12.4872, "step": 7776 }, { "epoch": 0.4234887314260821, "grad_norm": 0.7030493089317692, "learning_rate": 0.0001831708369600095, "loss": 12.5528, "step": 7777 }, { "epoch": 0.4235431854226651, "grad_norm": 0.6884725730053268, "learning_rate": 0.0001831659406273531, "loss": 12.4928, "step": 7778 }, { "epoch": 0.4235976394192481, "grad_norm": 0.6763106331873534, "learning_rate": 0.00018316104364798444, "loss": 12.4183, "step": 7779 }, { "epoch": 0.42365209341583115, "grad_norm": 0.6405527677746562, "learning_rate": 0.00018315614602194152, "loss": 12.4737, "step": 7780 }, { "epoch": 0.42370654741241415, "grad_norm": 0.6945702310438716, "learning_rate": 0.00018315124774926248, "loss": 12.5326, "step": 7781 }, { "epoch": 0.42376100140899714, "grad_norm": 0.6044615472501069, "learning_rate": 0.00018314634882998538, "loss": 12.3923, "step": 7782 }, { "epoch": 0.4238154554055802, "grad_norm": 0.6661273133787237, "learning_rate": 0.00018314144926414834, "loss": 12.399, "step": 7783 }, { "epoch": 0.4238699094021632, "grad_norm": 0.7311006197242605, "learning_rate": 0.00018313654905178944, "loss": 12.4251, "step": 7784 }, { "epoch": 0.4239243633987462, "grad_norm": 0.6717220372865804, "learning_rate": 0.00018313164819294678, "loss": 12.4721, "step": 7785 }, { "epoch": 0.4239788173953292, "grad_norm": 0.613043221582493, "learning_rate": 0.0001831267466876585, "loss": 12.3669, "step": 7786 }, { "epoch": 0.4240332713919122, "grad_norm": 1.0101013612937542, "learning_rate": 0.00018312184453596269, "loss": 12.5856, "step": 7787 }, { "epoch": 0.4240877253884952, "grad_norm": 0.6966337078675802, "learning_rate": 0.00018311694173789748, "loss": 12.5597, "step": 7788 }, { "epoch": 0.42414217938507826, "grad_norm": 0.6240157771149879, "learning_rate": 0.000183112038293501, "loss": 12.4881, "step": 7789 }, { "epoch": 0.42419663338166125, "grad_norm": 0.6642561139413364, "learning_rate": 0.00018310713420281133, "loss": 12.5333, "step": 7790 }, { "epoch": 0.42425108737824424, "grad_norm": 0.6291943374062113, "learning_rate": 0.00018310222946586667, "loss": 12.3764, "step": 7791 }, { "epoch": 0.4243055413748273, "grad_norm": 0.6930968219291624, "learning_rate": 0.00018309732408270516, "loss": 12.4469, "step": 7792 }, { "epoch": 0.4243599953714103, "grad_norm": 0.5953476898224157, "learning_rate": 0.00018309241805336492, "loss": 12.4047, "step": 7793 }, { "epoch": 0.42441444936799333, "grad_norm": 0.5844333523660687, "learning_rate": 0.0001830875113778841, "loss": 12.5129, "step": 7794 }, { "epoch": 0.4244689033645763, "grad_norm": 0.7102053139363087, "learning_rate": 0.00018308260405630085, "loss": 12.4892, "step": 7795 }, { "epoch": 0.4245233573611593, "grad_norm": 0.6652312253117758, "learning_rate": 0.00018307769608865332, "loss": 12.5766, "step": 7796 }, { "epoch": 0.42457781135774236, "grad_norm": 0.7115652187451849, "learning_rate": 0.00018307278747497973, "loss": 12.4086, "step": 7797 }, { "epoch": 0.42463226535432536, "grad_norm": 0.735827519599051, "learning_rate": 0.00018306787821531818, "loss": 12.6014, "step": 7798 }, { "epoch": 0.42468671935090835, "grad_norm": 0.6433664841516628, "learning_rate": 0.00018306296830970692, "loss": 12.3885, "step": 7799 }, { "epoch": 0.4247411733474914, "grad_norm": 0.6139720272683106, "learning_rate": 0.00018305805775818405, "loss": 12.4495, "step": 7800 }, { "epoch": 0.4247956273440744, "grad_norm": 0.6668703605246699, "learning_rate": 0.0001830531465607878, "loss": 12.5359, "step": 7801 }, { "epoch": 0.4248500813406574, "grad_norm": 0.630135798486746, "learning_rate": 0.00018304823471755637, "loss": 12.3706, "step": 7802 }, { "epoch": 0.42490453533724043, "grad_norm": 0.6994353450861902, "learning_rate": 0.00018304332222852793, "loss": 12.5729, "step": 7803 }, { "epoch": 0.4249589893338234, "grad_norm": 1.0254774883538031, "learning_rate": 0.0001830384090937407, "loss": 12.4567, "step": 7804 }, { "epoch": 0.4250134433304064, "grad_norm": 0.7018597963711026, "learning_rate": 0.00018303349531323287, "loss": 12.4913, "step": 7805 }, { "epoch": 0.42506789732698946, "grad_norm": 0.6812768819901065, "learning_rate": 0.00018302858088704263, "loss": 12.4232, "step": 7806 }, { "epoch": 0.42512235132357246, "grad_norm": 0.6189707932825785, "learning_rate": 0.00018302366581520824, "loss": 12.4999, "step": 7807 }, { "epoch": 0.42517680532015545, "grad_norm": 0.7008221094901383, "learning_rate": 0.00018301875009776793, "loss": 12.3748, "step": 7808 }, { "epoch": 0.4252312593167385, "grad_norm": 0.685224508822657, "learning_rate": 0.00018301383373475988, "loss": 12.483, "step": 7809 }, { "epoch": 0.4252857133133215, "grad_norm": 0.6439226473701067, "learning_rate": 0.00018300891672622232, "loss": 12.427, "step": 7810 }, { "epoch": 0.4253401673099045, "grad_norm": 0.7611108891203666, "learning_rate": 0.0001830039990721935, "loss": 12.5046, "step": 7811 }, { "epoch": 0.42539462130648753, "grad_norm": 0.67708873861166, "learning_rate": 0.0001829990807727117, "loss": 12.5311, "step": 7812 }, { "epoch": 0.4254490753030705, "grad_norm": 0.5961418714371193, "learning_rate": 0.00018299416182781508, "loss": 12.3877, "step": 7813 }, { "epoch": 0.4255035292996535, "grad_norm": 0.6176383889070957, "learning_rate": 0.00018298924223754198, "loss": 12.3607, "step": 7814 }, { "epoch": 0.42555798329623656, "grad_norm": 0.629423468089237, "learning_rate": 0.0001829843220019306, "loss": 12.42, "step": 7815 }, { "epoch": 0.42561243729281956, "grad_norm": 0.5963945048028937, "learning_rate": 0.0001829794011210192, "loss": 12.3582, "step": 7816 }, { "epoch": 0.42566689128940255, "grad_norm": 0.6719580459725143, "learning_rate": 0.00018297447959484607, "loss": 12.4179, "step": 7817 }, { "epoch": 0.4257213452859856, "grad_norm": 0.6363496362644461, "learning_rate": 0.00018296955742344947, "loss": 12.5432, "step": 7818 }, { "epoch": 0.4257757992825686, "grad_norm": 0.7035535391398001, "learning_rate": 0.0001829646346068677, "loss": 12.4458, "step": 7819 }, { "epoch": 0.4258302532791516, "grad_norm": 0.7125369757670239, "learning_rate": 0.00018295971114513901, "loss": 12.4325, "step": 7820 }, { "epoch": 0.42588470727573463, "grad_norm": 0.5689821498560794, "learning_rate": 0.00018295478703830167, "loss": 12.4462, "step": 7821 }, { "epoch": 0.4259391612723176, "grad_norm": 0.651504419915653, "learning_rate": 0.00018294986228639402, "loss": 12.5262, "step": 7822 }, { "epoch": 0.4259936152689006, "grad_norm": 0.6846627781301227, "learning_rate": 0.00018294493688945432, "loss": 12.4642, "step": 7823 }, { "epoch": 0.42604806926548366, "grad_norm": 0.5897441762051296, "learning_rate": 0.0001829400108475209, "loss": 12.3916, "step": 7824 }, { "epoch": 0.42610252326206666, "grad_norm": 0.7217302351067372, "learning_rate": 0.000182935084160632, "loss": 12.4843, "step": 7825 }, { "epoch": 0.42615697725864965, "grad_norm": 0.6421094690571366, "learning_rate": 0.000182930156828826, "loss": 12.416, "step": 7826 }, { "epoch": 0.4262114312552327, "grad_norm": 0.5840267657072733, "learning_rate": 0.00018292522885214122, "loss": 12.448, "step": 7827 }, { "epoch": 0.4262658852518157, "grad_norm": 0.5861991036359835, "learning_rate": 0.00018292030023061594, "loss": 12.4205, "step": 7828 }, { "epoch": 0.42632033924839874, "grad_norm": 0.7221993662996262, "learning_rate": 0.00018291537096428847, "loss": 12.3286, "step": 7829 }, { "epoch": 0.42637479324498173, "grad_norm": 0.6868999607560279, "learning_rate": 0.00018291044105319721, "loss": 12.5275, "step": 7830 }, { "epoch": 0.4264292472415647, "grad_norm": 0.6912120530089493, "learning_rate": 0.00018290551049738042, "loss": 12.4169, "step": 7831 }, { "epoch": 0.42648370123814777, "grad_norm": 0.6580756550522271, "learning_rate": 0.00018290057929687653, "loss": 12.6147, "step": 7832 }, { "epoch": 0.42653815523473076, "grad_norm": 0.6170758835490263, "learning_rate": 0.00018289564745172377, "loss": 12.2711, "step": 7833 }, { "epoch": 0.42659260923131376, "grad_norm": 0.7117110220895022, "learning_rate": 0.0001828907149619606, "loss": 12.4925, "step": 7834 }, { "epoch": 0.4266470632278968, "grad_norm": 0.6381204388014844, "learning_rate": 0.00018288578182762533, "loss": 12.5005, "step": 7835 }, { "epoch": 0.4267015172244798, "grad_norm": 0.6543463778852519, "learning_rate": 0.00018288084804875626, "loss": 12.6284, "step": 7836 }, { "epoch": 0.4267559712210628, "grad_norm": 0.6685516297944075, "learning_rate": 0.00018287591362539188, "loss": 12.5296, "step": 7837 }, { "epoch": 0.42681042521764584, "grad_norm": 0.8009985268061635, "learning_rate": 0.0001828709785575705, "loss": 12.407, "step": 7838 }, { "epoch": 0.42686487921422883, "grad_norm": 0.6821136473506321, "learning_rate": 0.00018286604284533045, "loss": 12.5061, "step": 7839 }, { "epoch": 0.4269193332108118, "grad_norm": 0.7352034903137306, "learning_rate": 0.0001828611064887102, "loss": 12.7067, "step": 7840 }, { "epoch": 0.4269737872073949, "grad_norm": 0.6683441393984274, "learning_rate": 0.00018285616948774807, "loss": 12.5754, "step": 7841 }, { "epoch": 0.42702824120397787, "grad_norm": 0.6816842155196888, "learning_rate": 0.00018285123184248243, "loss": 12.338, "step": 7842 }, { "epoch": 0.42708269520056086, "grad_norm": 0.6624536746483938, "learning_rate": 0.00018284629355295174, "loss": 12.5175, "step": 7843 }, { "epoch": 0.4271371491971439, "grad_norm": 0.7013469603718336, "learning_rate": 0.0001828413546191944, "loss": 12.5453, "step": 7844 }, { "epoch": 0.4271916031937269, "grad_norm": 0.6046632729996528, "learning_rate": 0.0001828364150412488, "loss": 12.2746, "step": 7845 }, { "epoch": 0.4272460571903099, "grad_norm": 0.625481111917555, "learning_rate": 0.00018283147481915334, "loss": 12.4797, "step": 7846 }, { "epoch": 0.42730051118689294, "grad_norm": 0.6424818253362229, "learning_rate": 0.00018282653395294642, "loss": 12.5027, "step": 7847 }, { "epoch": 0.42735496518347593, "grad_norm": 0.6601364261491262, "learning_rate": 0.0001828215924426665, "loss": 12.5228, "step": 7848 }, { "epoch": 0.4274094191800589, "grad_norm": 0.6606556794480325, "learning_rate": 0.00018281665028835198, "loss": 12.351, "step": 7849 }, { "epoch": 0.427463873176642, "grad_norm": 0.7391279774495332, "learning_rate": 0.0001828117074900413, "loss": 12.6615, "step": 7850 }, { "epoch": 0.42751832717322497, "grad_norm": 0.666721835432075, "learning_rate": 0.0001828067640477729, "loss": 12.4141, "step": 7851 }, { "epoch": 0.42757278116980796, "grad_norm": 0.6556499417303358, "learning_rate": 0.00018280181996158522, "loss": 12.5608, "step": 7852 }, { "epoch": 0.427627235166391, "grad_norm": 0.7136819769413728, "learning_rate": 0.0001827968752315167, "loss": 12.6186, "step": 7853 }, { "epoch": 0.427681689162974, "grad_norm": 0.6494044088806241, "learning_rate": 0.00018279192985760578, "loss": 12.4636, "step": 7854 }, { "epoch": 0.427736143159557, "grad_norm": 0.6681216329881955, "learning_rate": 0.00018278698383989092, "loss": 12.5388, "step": 7855 }, { "epoch": 0.42779059715614004, "grad_norm": 0.6351791202571165, "learning_rate": 0.00018278203717841062, "loss": 12.3689, "step": 7856 }, { "epoch": 0.42784505115272303, "grad_norm": 0.7121964039171422, "learning_rate": 0.0001827770898732033, "loss": 12.475, "step": 7857 }, { "epoch": 0.427899505149306, "grad_norm": 0.7322361257100048, "learning_rate": 0.00018277214192430745, "loss": 12.3005, "step": 7858 }, { "epoch": 0.4279539591458891, "grad_norm": 0.7110469435756508, "learning_rate": 0.00018276719333176154, "loss": 12.5725, "step": 7859 }, { "epoch": 0.42800841314247207, "grad_norm": 0.7114305715474563, "learning_rate": 0.00018276224409560405, "loss": 12.3835, "step": 7860 }, { "epoch": 0.4280628671390551, "grad_norm": 0.6554373743972242, "learning_rate": 0.00018275729421587348, "loss": 12.4473, "step": 7861 }, { "epoch": 0.4281173211356381, "grad_norm": 0.614966738063642, "learning_rate": 0.0001827523436926083, "loss": 12.5055, "step": 7862 }, { "epoch": 0.4281717751322211, "grad_norm": 0.7763242654060049, "learning_rate": 0.00018274739252584706, "loss": 12.589, "step": 7863 }, { "epoch": 0.42822622912880415, "grad_norm": 0.595923750032374, "learning_rate": 0.00018274244071562817, "loss": 12.4712, "step": 7864 }, { "epoch": 0.42828068312538714, "grad_norm": 0.66789073012322, "learning_rate": 0.0001827374882619902, "loss": 12.5092, "step": 7865 }, { "epoch": 0.42833513712197013, "grad_norm": 0.6993718554608768, "learning_rate": 0.00018273253516497168, "loss": 12.2774, "step": 7866 }, { "epoch": 0.4283895911185532, "grad_norm": 0.6853933435979174, "learning_rate": 0.00018272758142461103, "loss": 12.4401, "step": 7867 }, { "epoch": 0.4284440451151362, "grad_norm": 0.6046127331466988, "learning_rate": 0.00018272262704094688, "loss": 12.4843, "step": 7868 }, { "epoch": 0.42849849911171917, "grad_norm": 0.7031626455104183, "learning_rate": 0.0001827176720140177, "loss": 12.4454, "step": 7869 }, { "epoch": 0.4285529531083022, "grad_norm": 0.6102750691205855, "learning_rate": 0.000182712716343862, "loss": 12.2383, "step": 7870 }, { "epoch": 0.4286074071048852, "grad_norm": 0.6430406643586948, "learning_rate": 0.0001827077600305184, "loss": 12.3706, "step": 7871 }, { "epoch": 0.4286618611014682, "grad_norm": 0.7153973691384076, "learning_rate": 0.00018270280307402533, "loss": 12.4636, "step": 7872 }, { "epoch": 0.42871631509805125, "grad_norm": 0.6111169539665425, "learning_rate": 0.00018269784547442143, "loss": 12.5269, "step": 7873 }, { "epoch": 0.42877076909463424, "grad_norm": 0.6605820807457244, "learning_rate": 0.00018269288723174522, "loss": 12.5237, "step": 7874 }, { "epoch": 0.42882522309121723, "grad_norm": 0.7294481892645149, "learning_rate": 0.0001826879283460352, "loss": 12.3791, "step": 7875 }, { "epoch": 0.4288796770878003, "grad_norm": 0.7080461100945729, "learning_rate": 0.00018268296881733005, "loss": 12.378, "step": 7876 }, { "epoch": 0.4289341310843833, "grad_norm": 0.6910783653995367, "learning_rate": 0.00018267800864566825, "loss": 12.4422, "step": 7877 }, { "epoch": 0.42898858508096627, "grad_norm": 0.6612434872957814, "learning_rate": 0.00018267304783108838, "loss": 12.3455, "step": 7878 }, { "epoch": 0.4290430390775493, "grad_norm": 0.6711938850299602, "learning_rate": 0.000182668086373629, "loss": 12.3548, "step": 7879 }, { "epoch": 0.4290974930741323, "grad_norm": 0.626907677513012, "learning_rate": 0.00018266312427332878, "loss": 12.5163, "step": 7880 }, { "epoch": 0.4291519470707153, "grad_norm": 0.6803065095321489, "learning_rate": 0.0001826581615302262, "loss": 12.3781, "step": 7881 }, { "epoch": 0.42920640106729835, "grad_norm": 0.7196134342169394, "learning_rate": 0.00018265319814435988, "loss": 12.4378, "step": 7882 }, { "epoch": 0.42926085506388134, "grad_norm": 0.6925727268427583, "learning_rate": 0.00018264823411576846, "loss": 12.599, "step": 7883 }, { "epoch": 0.42931530906046433, "grad_norm": 0.6023802851561011, "learning_rate": 0.00018264326944449048, "loss": 12.4611, "step": 7884 }, { "epoch": 0.4293697630570474, "grad_norm": 0.7294334658982401, "learning_rate": 0.0001826383041305646, "loss": 12.4378, "step": 7885 }, { "epoch": 0.4294242170536304, "grad_norm": 0.6625800290992043, "learning_rate": 0.0001826333381740294, "loss": 12.4643, "step": 7886 }, { "epoch": 0.42947867105021337, "grad_norm": 0.7471763593863193, "learning_rate": 0.00018262837157492353, "loss": 12.4879, "step": 7887 }, { "epoch": 0.4295331250467964, "grad_norm": 0.6568300177128303, "learning_rate": 0.00018262340433328558, "loss": 12.3321, "step": 7888 }, { "epoch": 0.4295875790433794, "grad_norm": 0.7096946912181274, "learning_rate": 0.00018261843644915417, "loss": 12.5937, "step": 7889 }, { "epoch": 0.4296420330399624, "grad_norm": 0.6339033309753559, "learning_rate": 0.00018261346792256794, "loss": 12.5053, "step": 7890 }, { "epoch": 0.42969648703654545, "grad_norm": 0.6136133084916588, "learning_rate": 0.00018260849875356553, "loss": 12.4255, "step": 7891 }, { "epoch": 0.42975094103312844, "grad_norm": 0.5945688377929977, "learning_rate": 0.0001826035289421856, "loss": 12.4445, "step": 7892 }, { "epoch": 0.42980539502971143, "grad_norm": 0.7462867216348839, "learning_rate": 0.00018259855848846675, "loss": 12.4613, "step": 7893 }, { "epoch": 0.4298598490262945, "grad_norm": 0.6569392321802703, "learning_rate": 0.00018259358739244766, "loss": 12.5073, "step": 7894 }, { "epoch": 0.4299143030228775, "grad_norm": 0.5983744188237977, "learning_rate": 0.00018258861565416702, "loss": 12.4993, "step": 7895 }, { "epoch": 0.4299687570194605, "grad_norm": 0.7053622283322101, "learning_rate": 0.0001825836432736634, "loss": 12.3607, "step": 7896 }, { "epoch": 0.4300232110160435, "grad_norm": 0.7054846914578173, "learning_rate": 0.00018257867025097554, "loss": 12.5458, "step": 7897 }, { "epoch": 0.4300776650126265, "grad_norm": 0.6949317721624084, "learning_rate": 0.00018257369658614212, "loss": 12.4957, "step": 7898 }, { "epoch": 0.43013211900920956, "grad_norm": 0.6836307012239016, "learning_rate": 0.00018256872227920173, "loss": 12.479, "step": 7899 }, { "epoch": 0.43018657300579255, "grad_norm": 0.7228802923883042, "learning_rate": 0.00018256374733019315, "loss": 12.3583, "step": 7900 }, { "epoch": 0.43024102700237554, "grad_norm": 0.9284099241529163, "learning_rate": 0.00018255877173915504, "loss": 12.5031, "step": 7901 }, { "epoch": 0.4302954809989586, "grad_norm": 0.6694914115117954, "learning_rate": 0.00018255379550612605, "loss": 12.4756, "step": 7902 }, { "epoch": 0.4303499349955416, "grad_norm": 0.6478362302886246, "learning_rate": 0.0001825488186311449, "loss": 12.558, "step": 7903 }, { "epoch": 0.4304043889921246, "grad_norm": 0.6546490002593253, "learning_rate": 0.0001825438411142503, "loss": 12.4417, "step": 7904 }, { "epoch": 0.4304588429887076, "grad_norm": 0.7087779462299327, "learning_rate": 0.00018253886295548094, "loss": 12.5224, "step": 7905 }, { "epoch": 0.4305132969852906, "grad_norm": 0.6993477111814185, "learning_rate": 0.00018253388415487556, "loss": 12.5954, "step": 7906 }, { "epoch": 0.4305677509818736, "grad_norm": 0.6535366877281594, "learning_rate": 0.00018252890471247285, "loss": 12.3998, "step": 7907 }, { "epoch": 0.43062220497845666, "grad_norm": 0.6509191262966944, "learning_rate": 0.00018252392462831153, "loss": 12.3903, "step": 7908 }, { "epoch": 0.43067665897503965, "grad_norm": 0.7174800548505674, "learning_rate": 0.00018251894390243031, "loss": 12.4249, "step": 7909 }, { "epoch": 0.43073111297162264, "grad_norm": 0.7117353466917853, "learning_rate": 0.00018251396253486798, "loss": 12.5, "step": 7910 }, { "epoch": 0.4307855669682057, "grad_norm": 0.5842736337850675, "learning_rate": 0.00018250898052566322, "loss": 12.3333, "step": 7911 }, { "epoch": 0.4308400209647887, "grad_norm": 0.626341575150017, "learning_rate": 0.0001825039978748548, "loss": 12.4355, "step": 7912 }, { "epoch": 0.4308944749613717, "grad_norm": 0.6902559185033911, "learning_rate": 0.00018249901458248146, "loss": 12.5578, "step": 7913 }, { "epoch": 0.4309489289579547, "grad_norm": 0.6996091778217305, "learning_rate": 0.00018249403064858193, "loss": 12.4151, "step": 7914 }, { "epoch": 0.4310033829545377, "grad_norm": 0.6589561733097002, "learning_rate": 0.000182489046073195, "loss": 12.5248, "step": 7915 }, { "epoch": 0.4310578369511207, "grad_norm": 0.6274633751809366, "learning_rate": 0.00018248406085635943, "loss": 12.3801, "step": 7916 }, { "epoch": 0.43111229094770376, "grad_norm": 0.7625790300943449, "learning_rate": 0.00018247907499811393, "loss": 12.5618, "step": 7917 }, { "epoch": 0.43116674494428675, "grad_norm": 0.6880714481481783, "learning_rate": 0.00018247408849849734, "loss": 12.322, "step": 7918 }, { "epoch": 0.43122119894086974, "grad_norm": 0.7149825408684415, "learning_rate": 0.0001824691013575484, "loss": 12.551, "step": 7919 }, { "epoch": 0.4312756529374528, "grad_norm": 0.6958054940177517, "learning_rate": 0.0001824641135753059, "loss": 12.5937, "step": 7920 }, { "epoch": 0.4313301069340358, "grad_norm": 0.6496322803509679, "learning_rate": 0.00018245912515180862, "loss": 12.4297, "step": 7921 }, { "epoch": 0.4313845609306188, "grad_norm": 0.7743435913615766, "learning_rate": 0.00018245413608709534, "loss": 12.3787, "step": 7922 }, { "epoch": 0.4314390149272018, "grad_norm": 0.6499155579167446, "learning_rate": 0.0001824491463812049, "loss": 12.4618, "step": 7923 }, { "epoch": 0.4314934689237848, "grad_norm": 0.6323364011027064, "learning_rate": 0.00018244415603417603, "loss": 12.5198, "step": 7924 }, { "epoch": 0.4315479229203678, "grad_norm": 0.6275726046028748, "learning_rate": 0.0001824391650460476, "loss": 12.4412, "step": 7925 }, { "epoch": 0.43160237691695086, "grad_norm": 0.6563516386645423, "learning_rate": 0.0001824341734168584, "loss": 12.3843, "step": 7926 }, { "epoch": 0.43165683091353385, "grad_norm": 0.5944999411676104, "learning_rate": 0.00018242918114664725, "loss": 12.4227, "step": 7927 }, { "epoch": 0.4317112849101169, "grad_norm": 0.6126075950113996, "learning_rate": 0.00018242418823545298, "loss": 12.5283, "step": 7928 }, { "epoch": 0.4317657389066999, "grad_norm": 0.591985788952654, "learning_rate": 0.00018241919468331435, "loss": 12.4677, "step": 7929 }, { "epoch": 0.4318201929032829, "grad_norm": 0.6383852269541036, "learning_rate": 0.00018241420049027027, "loss": 12.474, "step": 7930 }, { "epoch": 0.43187464689986593, "grad_norm": 0.5852504651876139, "learning_rate": 0.00018240920565635956, "loss": 12.3616, "step": 7931 }, { "epoch": 0.4319291008964489, "grad_norm": 0.6130822749181293, "learning_rate": 0.00018240421018162102, "loss": 12.4868, "step": 7932 }, { "epoch": 0.4319835548930319, "grad_norm": 0.6200945418246628, "learning_rate": 0.00018239921406609352, "loss": 12.3153, "step": 7933 }, { "epoch": 0.43203800888961497, "grad_norm": 0.5977586740609638, "learning_rate": 0.00018239421730981595, "loss": 12.3454, "step": 7934 }, { "epoch": 0.43209246288619796, "grad_norm": 0.7620159440437332, "learning_rate": 0.00018238921991282708, "loss": 12.5599, "step": 7935 }, { "epoch": 0.43214691688278095, "grad_norm": 0.6388958721890771, "learning_rate": 0.00018238422187516587, "loss": 12.4949, "step": 7936 }, { "epoch": 0.432201370879364, "grad_norm": 0.6201417753511387, "learning_rate": 0.0001823792231968711, "loss": 12.4605, "step": 7937 }, { "epoch": 0.432255824875947, "grad_norm": 0.7082720364920682, "learning_rate": 0.00018237422387798168, "loss": 12.5924, "step": 7938 }, { "epoch": 0.43231027887253, "grad_norm": 0.6764522855907903, "learning_rate": 0.00018236922391853648, "loss": 12.5394, "step": 7939 }, { "epoch": 0.43236473286911303, "grad_norm": 0.6027504108616288, "learning_rate": 0.00018236422331857437, "loss": 12.3855, "step": 7940 }, { "epoch": 0.432419186865696, "grad_norm": 0.7383961195275949, "learning_rate": 0.00018235922207813428, "loss": 12.6475, "step": 7941 }, { "epoch": 0.432473640862279, "grad_norm": 0.6480989207450246, "learning_rate": 0.00018235422019725507, "loss": 12.4974, "step": 7942 }, { "epoch": 0.43252809485886207, "grad_norm": 0.7626322657347785, "learning_rate": 0.00018234921767597558, "loss": 12.458, "step": 7943 }, { "epoch": 0.43258254885544506, "grad_norm": 0.7024604934391259, "learning_rate": 0.00018234421451433482, "loss": 12.474, "step": 7944 }, { "epoch": 0.43263700285202805, "grad_norm": 0.6236580831047059, "learning_rate": 0.0001823392107123716, "loss": 12.3832, "step": 7945 }, { "epoch": 0.4326914568486111, "grad_norm": 0.7460718232888852, "learning_rate": 0.0001823342062701249, "loss": 12.639, "step": 7946 }, { "epoch": 0.4327459108451941, "grad_norm": 0.6447139498024131, "learning_rate": 0.00018232920118763356, "loss": 12.5126, "step": 7947 }, { "epoch": 0.4328003648417771, "grad_norm": 0.6928880261448563, "learning_rate": 0.00018232419546493657, "loss": 12.6822, "step": 7948 }, { "epoch": 0.43285481883836013, "grad_norm": 0.625071578737955, "learning_rate": 0.00018231918910207283, "loss": 12.3265, "step": 7949 }, { "epoch": 0.4329092728349431, "grad_norm": 0.5987110493663569, "learning_rate": 0.0001823141820990813, "loss": 12.5354, "step": 7950 }, { "epoch": 0.4329637268315261, "grad_norm": 0.620178423324587, "learning_rate": 0.00018230917445600085, "loss": 12.4393, "step": 7951 }, { "epoch": 0.43301818082810917, "grad_norm": 0.6307608473905716, "learning_rate": 0.00018230416617287046, "loss": 12.4671, "step": 7952 }, { "epoch": 0.43307263482469216, "grad_norm": 0.7122894827254327, "learning_rate": 0.0001822991572497291, "loss": 12.4647, "step": 7953 }, { "epoch": 0.43312708882127515, "grad_norm": 0.5671356599600159, "learning_rate": 0.00018229414768661565, "loss": 12.3425, "step": 7954 }, { "epoch": 0.4331815428178582, "grad_norm": 0.6333834878199174, "learning_rate": 0.00018228913748356913, "loss": 12.4106, "step": 7955 }, { "epoch": 0.4332359968144412, "grad_norm": 0.6650636281838411, "learning_rate": 0.0001822841266406285, "loss": 12.4607, "step": 7956 }, { "epoch": 0.4332904508110242, "grad_norm": 0.585052172740277, "learning_rate": 0.00018227911515783266, "loss": 12.4691, "step": 7957 }, { "epoch": 0.43334490480760723, "grad_norm": 0.6751958569542258, "learning_rate": 0.00018227410303522064, "loss": 12.3421, "step": 7958 }, { "epoch": 0.4333993588041902, "grad_norm": 0.6306906606932072, "learning_rate": 0.0001822690902728314, "loss": 12.306, "step": 7959 }, { "epoch": 0.4334538128007732, "grad_norm": 0.6095238580428013, "learning_rate": 0.0001822640768707039, "loss": 12.4476, "step": 7960 }, { "epoch": 0.43350826679735627, "grad_norm": 0.6542022762270828, "learning_rate": 0.00018225906282887718, "loss": 12.4173, "step": 7961 }, { "epoch": 0.43356272079393926, "grad_norm": 0.6344690057285265, "learning_rate": 0.00018225404814739012, "loss": 12.4391, "step": 7962 }, { "epoch": 0.4336171747905223, "grad_norm": 0.7428938770750612, "learning_rate": 0.00018224903282628186, "loss": 12.4032, "step": 7963 }, { "epoch": 0.4336716287871053, "grad_norm": 0.6437215141879193, "learning_rate": 0.0001822440168655913, "loss": 12.4873, "step": 7964 }, { "epoch": 0.4337260827836883, "grad_norm": 0.6611723911944084, "learning_rate": 0.00018223900026535748, "loss": 12.4769, "step": 7965 }, { "epoch": 0.43378053678027134, "grad_norm": 0.6673601417317901, "learning_rate": 0.00018223398302561942, "loss": 12.4524, "step": 7966 }, { "epoch": 0.43383499077685433, "grad_norm": 0.7049890622715992, "learning_rate": 0.0001822289651464161, "loss": 12.4558, "step": 7967 }, { "epoch": 0.4338894447734373, "grad_norm": 0.6010186771172481, "learning_rate": 0.0001822239466277865, "loss": 12.5184, "step": 7968 }, { "epoch": 0.4339438987700204, "grad_norm": 0.6782184525687398, "learning_rate": 0.0001822189274697698, "loss": 12.5598, "step": 7969 }, { "epoch": 0.43399835276660337, "grad_norm": 0.7535593508707078, "learning_rate": 0.0001822139076724049, "loss": 12.4382, "step": 7970 }, { "epoch": 0.43405280676318636, "grad_norm": 0.7476354959202245, "learning_rate": 0.00018220888723573082, "loss": 12.3689, "step": 7971 }, { "epoch": 0.4341072607597694, "grad_norm": 0.6468310309468458, "learning_rate": 0.0001822038661597867, "loss": 12.4683, "step": 7972 }, { "epoch": 0.4341617147563524, "grad_norm": 0.6425677167614992, "learning_rate": 0.0001821988444446115, "loss": 12.5546, "step": 7973 }, { "epoch": 0.4342161687529354, "grad_norm": 0.6616730114954977, "learning_rate": 0.0001821938220902443, "loss": 12.5815, "step": 7974 }, { "epoch": 0.43427062274951844, "grad_norm": 0.6706196272119236, "learning_rate": 0.0001821887990967242, "loss": 12.4704, "step": 7975 }, { "epoch": 0.43432507674610143, "grad_norm": 0.7037258987322563, "learning_rate": 0.00018218377546409017, "loss": 12.5475, "step": 7976 }, { "epoch": 0.4343795307426844, "grad_norm": 0.6489047278433996, "learning_rate": 0.00018217875119238132, "loss": 12.3292, "step": 7977 }, { "epoch": 0.4344339847392675, "grad_norm": 0.6616035776812114, "learning_rate": 0.00018217372628163674, "loss": 12.2886, "step": 7978 }, { "epoch": 0.43448843873585047, "grad_norm": 0.7245041333919243, "learning_rate": 0.00018216870073189546, "loss": 12.3057, "step": 7979 }, { "epoch": 0.43454289273243346, "grad_norm": 0.6823812931676292, "learning_rate": 0.00018216367454319663, "loss": 12.3488, "step": 7980 }, { "epoch": 0.4345973467290165, "grad_norm": 0.6928454505678266, "learning_rate": 0.00018215864771557925, "loss": 12.4474, "step": 7981 }, { "epoch": 0.4346518007255995, "grad_norm": 0.6692762111560462, "learning_rate": 0.00018215362024908246, "loss": 12.3722, "step": 7982 }, { "epoch": 0.4347062547221825, "grad_norm": 0.7217401785786631, "learning_rate": 0.0001821485921437453, "loss": 12.5339, "step": 7983 }, { "epoch": 0.43476070871876554, "grad_norm": 0.7266825040044337, "learning_rate": 0.00018214356339960698, "loss": 12.3977, "step": 7984 }, { "epoch": 0.43481516271534854, "grad_norm": 0.6826875691189774, "learning_rate": 0.0001821385340167065, "loss": 12.4072, "step": 7985 }, { "epoch": 0.43486961671193153, "grad_norm": 0.7064241066509669, "learning_rate": 0.000182133503995083, "loss": 12.4456, "step": 7986 }, { "epoch": 0.4349240707085146, "grad_norm": 0.6175230417065565, "learning_rate": 0.0001821284733347756, "loss": 12.4795, "step": 7987 }, { "epoch": 0.43497852470509757, "grad_norm": 0.6079407316866163, "learning_rate": 0.00018212344203582344, "loss": 12.3473, "step": 7988 }, { "epoch": 0.43503297870168056, "grad_norm": 0.7163691665704485, "learning_rate": 0.0001821184100982656, "loss": 12.3703, "step": 7989 }, { "epoch": 0.4350874326982636, "grad_norm": 0.6258311004616853, "learning_rate": 0.00018211337752214123, "loss": 12.3942, "step": 7990 }, { "epoch": 0.4351418866948466, "grad_norm": 0.671124784124222, "learning_rate": 0.0001821083443074895, "loss": 12.4334, "step": 7991 }, { "epoch": 0.4351963406914296, "grad_norm": 0.6216233607643935, "learning_rate": 0.00018210331045434949, "loss": 12.4195, "step": 7992 }, { "epoch": 0.43525079468801264, "grad_norm": 0.742741508242918, "learning_rate": 0.00018209827596276035, "loss": 12.4239, "step": 7993 }, { "epoch": 0.43530524868459564, "grad_norm": 0.6300698638468436, "learning_rate": 0.00018209324083276126, "loss": 12.3753, "step": 7994 }, { "epoch": 0.4353597026811787, "grad_norm": 0.751409905949325, "learning_rate": 0.00018208820506439137, "loss": 12.529, "step": 7995 }, { "epoch": 0.4354141566777617, "grad_norm": 0.6947725654359397, "learning_rate": 0.00018208316865768985, "loss": 12.5276, "step": 7996 }, { "epoch": 0.43546861067434467, "grad_norm": 0.9447376665430275, "learning_rate": 0.00018207813161269583, "loss": 12.4355, "step": 7997 }, { "epoch": 0.4355230646709277, "grad_norm": 0.6723041201443456, "learning_rate": 0.00018207309392944846, "loss": 12.4632, "step": 7998 }, { "epoch": 0.4355775186675107, "grad_norm": 0.7976902368051231, "learning_rate": 0.000182068055607987, "loss": 12.6122, "step": 7999 }, { "epoch": 0.4356319726640937, "grad_norm": 0.7345131489832951, "learning_rate": 0.00018206301664835058, "loss": 12.5908, "step": 8000 }, { "epoch": 0.43568642666067675, "grad_norm": 0.6454712970992138, "learning_rate": 0.00018205797705057834, "loss": 12.5204, "step": 8001 }, { "epoch": 0.43574088065725974, "grad_norm": 0.6711649663357349, "learning_rate": 0.00018205293681470953, "loss": 12.5056, "step": 8002 }, { "epoch": 0.43579533465384274, "grad_norm": 0.6728369086830224, "learning_rate": 0.0001820478959407833, "loss": 12.3627, "step": 8003 }, { "epoch": 0.4358497886504258, "grad_norm": 0.6597979370043341, "learning_rate": 0.0001820428544288389, "loss": 12.3749, "step": 8004 }, { "epoch": 0.4359042426470088, "grad_norm": 0.6209904486774009, "learning_rate": 0.0001820378122789155, "loss": 12.4336, "step": 8005 }, { "epoch": 0.43595869664359177, "grad_norm": 0.7713948991466982, "learning_rate": 0.00018203276949105233, "loss": 12.4182, "step": 8006 }, { "epoch": 0.4360131506401748, "grad_norm": 0.6469371586470584, "learning_rate": 0.00018202772606528856, "loss": 12.3034, "step": 8007 }, { "epoch": 0.4360676046367578, "grad_norm": 0.6995538373363558, "learning_rate": 0.00018202268200166342, "loss": 12.4704, "step": 8008 }, { "epoch": 0.4361220586333408, "grad_norm": 0.666288730336914, "learning_rate": 0.00018201763730021618, "loss": 12.4474, "step": 8009 }, { "epoch": 0.43617651262992385, "grad_norm": 0.6211419327657228, "learning_rate": 0.00018201259196098604, "loss": 12.4149, "step": 8010 }, { "epoch": 0.43623096662650684, "grad_norm": 0.628883970811886, "learning_rate": 0.0001820075459840122, "loss": 12.4724, "step": 8011 }, { "epoch": 0.43628542062308984, "grad_norm": 0.5950168445018583, "learning_rate": 0.00018200249936933398, "loss": 12.4613, "step": 8012 }, { "epoch": 0.4363398746196729, "grad_norm": 0.6312701310341498, "learning_rate": 0.00018199745211699053, "loss": 12.4223, "step": 8013 }, { "epoch": 0.4363943286162559, "grad_norm": 0.6932151999657433, "learning_rate": 0.00018199240422702117, "loss": 12.4752, "step": 8014 }, { "epoch": 0.43644878261283887, "grad_norm": 0.6294977807573285, "learning_rate": 0.0001819873556994651, "loss": 12.4771, "step": 8015 }, { "epoch": 0.4365032366094219, "grad_norm": 0.6264943199859186, "learning_rate": 0.0001819823065343616, "loss": 12.484, "step": 8016 }, { "epoch": 0.4365576906060049, "grad_norm": 0.6616444184130431, "learning_rate": 0.00018197725673174998, "loss": 12.5049, "step": 8017 }, { "epoch": 0.4366121446025879, "grad_norm": 0.5744172061896488, "learning_rate": 0.00018197220629166943, "loss": 12.4327, "step": 8018 }, { "epoch": 0.43666659859917095, "grad_norm": 0.6289380832372987, "learning_rate": 0.00018196715521415926, "loss": 12.3483, "step": 8019 }, { "epoch": 0.43672105259575394, "grad_norm": 0.6403498509864031, "learning_rate": 0.00018196210349925873, "loss": 12.438, "step": 8020 }, { "epoch": 0.43677550659233694, "grad_norm": 0.579175241208905, "learning_rate": 0.00018195705114700713, "loss": 12.3951, "step": 8021 }, { "epoch": 0.43682996058892, "grad_norm": 0.6219905875288234, "learning_rate": 0.00018195199815744382, "loss": 12.4101, "step": 8022 }, { "epoch": 0.436884414585503, "grad_norm": 0.5955832250530326, "learning_rate": 0.000181946944530608, "loss": 12.3245, "step": 8023 }, { "epoch": 0.43693886858208597, "grad_norm": 0.6072932595615831, "learning_rate": 0.00018194189026653898, "loss": 12.4663, "step": 8024 }, { "epoch": 0.436993322578669, "grad_norm": 0.7165004470076959, "learning_rate": 0.00018193683536527609, "loss": 12.4848, "step": 8025 }, { "epoch": 0.437047776575252, "grad_norm": 0.6513696149725915, "learning_rate": 0.00018193177982685858, "loss": 12.4224, "step": 8026 }, { "epoch": 0.437102230571835, "grad_norm": 0.7428711180542187, "learning_rate": 0.00018192672365132588, "loss": 12.5178, "step": 8027 }, { "epoch": 0.43715668456841805, "grad_norm": 0.6427803787757064, "learning_rate": 0.0001819216668387172, "loss": 12.4145, "step": 8028 }, { "epoch": 0.43721113856500105, "grad_norm": 0.6207320706711867, "learning_rate": 0.00018191660938907193, "loss": 12.4405, "step": 8029 }, { "epoch": 0.4372655925615841, "grad_norm": 0.6852852916241587, "learning_rate": 0.00018191155130242934, "loss": 12.4623, "step": 8030 }, { "epoch": 0.4373200465581671, "grad_norm": 0.6840507058001489, "learning_rate": 0.0001819064925788288, "loss": 12.4987, "step": 8031 }, { "epoch": 0.4373745005547501, "grad_norm": 0.6140790978364469, "learning_rate": 0.00018190143321830963, "loss": 12.373, "step": 8032 }, { "epoch": 0.4374289545513331, "grad_norm": 0.6775222151229636, "learning_rate": 0.0001818963732209112, "loss": 12.5089, "step": 8033 }, { "epoch": 0.4374834085479161, "grad_norm": 0.590068771249146, "learning_rate": 0.00018189131258667282, "loss": 12.4183, "step": 8034 }, { "epoch": 0.4375378625444991, "grad_norm": 0.6183153379587679, "learning_rate": 0.00018188625131563386, "loss": 12.4238, "step": 8035 }, { "epoch": 0.43759231654108216, "grad_norm": 0.7422525928327838, "learning_rate": 0.00018188118940783368, "loss": 12.5106, "step": 8036 }, { "epoch": 0.43764677053766515, "grad_norm": 0.6068305483295992, "learning_rate": 0.00018187612686331166, "loss": 12.4831, "step": 8037 }, { "epoch": 0.43770122453424815, "grad_norm": 0.6158291631235349, "learning_rate": 0.00018187106368210712, "loss": 12.4306, "step": 8038 }, { "epoch": 0.4377556785308312, "grad_norm": 0.6327278698588021, "learning_rate": 0.00018186599986425947, "loss": 12.4772, "step": 8039 }, { "epoch": 0.4378101325274142, "grad_norm": 0.691860427152302, "learning_rate": 0.00018186093540980807, "loss": 12.5121, "step": 8040 }, { "epoch": 0.4378645865239972, "grad_norm": 0.6848902755048658, "learning_rate": 0.00018185587031879232, "loss": 12.3507, "step": 8041 }, { "epoch": 0.4379190405205802, "grad_norm": 0.6024814082483306, "learning_rate": 0.0001818508045912516, "loss": 12.4528, "step": 8042 }, { "epoch": 0.4379734945171632, "grad_norm": 0.6652691577868063, "learning_rate": 0.00018184573822722529, "loss": 12.3157, "step": 8043 }, { "epoch": 0.4380279485137462, "grad_norm": 0.6569694406846195, "learning_rate": 0.00018184067122675276, "loss": 12.459, "step": 8044 }, { "epoch": 0.43808240251032926, "grad_norm": 0.6420980248319899, "learning_rate": 0.00018183560358987349, "loss": 12.4096, "step": 8045 }, { "epoch": 0.43813685650691225, "grad_norm": 0.6627008318367901, "learning_rate": 0.00018183053531662684, "loss": 12.3847, "step": 8046 }, { "epoch": 0.43819131050349525, "grad_norm": 0.7059088848618572, "learning_rate": 0.0001818254664070522, "loss": 12.5215, "step": 8047 }, { "epoch": 0.4382457645000783, "grad_norm": 0.6854425080494145, "learning_rate": 0.00018182039686118903, "loss": 12.5269, "step": 8048 }, { "epoch": 0.4383002184966613, "grad_norm": 0.6535417405105993, "learning_rate": 0.00018181532667907671, "loss": 12.5775, "step": 8049 }, { "epoch": 0.4383546724932443, "grad_norm": 0.6389520951864824, "learning_rate": 0.0001818102558607547, "loss": 12.2756, "step": 8050 }, { "epoch": 0.4384091264898273, "grad_norm": 0.6091592745791563, "learning_rate": 0.00018180518440626245, "loss": 12.5519, "step": 8051 }, { "epoch": 0.4384635804864103, "grad_norm": 0.592034414821979, "learning_rate": 0.00018180011231563935, "loss": 12.3247, "step": 8052 }, { "epoch": 0.4385180344829933, "grad_norm": 0.6822262235027153, "learning_rate": 0.00018179503958892483, "loss": 12.4094, "step": 8053 }, { "epoch": 0.43857248847957636, "grad_norm": 0.6903442897493688, "learning_rate": 0.0001817899662261584, "loss": 12.4012, "step": 8054 }, { "epoch": 0.43862694247615935, "grad_norm": 0.5746722202516459, "learning_rate": 0.00018178489222737946, "loss": 12.3903, "step": 8055 }, { "epoch": 0.43868139647274235, "grad_norm": 0.7103112826336199, "learning_rate": 0.00018177981759262747, "loss": 12.4608, "step": 8056 }, { "epoch": 0.4387358504693254, "grad_norm": 0.7133739481466947, "learning_rate": 0.00018177474232194195, "loss": 12.3867, "step": 8057 }, { "epoch": 0.4387903044659084, "grad_norm": 0.6869948568749044, "learning_rate": 0.0001817696664153623, "loss": 12.4986, "step": 8058 }, { "epoch": 0.4388447584624914, "grad_norm": 0.67170120481675, "learning_rate": 0.00018176458987292799, "loss": 12.4175, "step": 8059 }, { "epoch": 0.43889921245907443, "grad_norm": 0.7517173895741974, "learning_rate": 0.00018175951269467855, "loss": 12.4281, "step": 8060 }, { "epoch": 0.4389536664556574, "grad_norm": 0.7072112142379776, "learning_rate": 0.0001817544348806534, "loss": 12.4413, "step": 8061 }, { "epoch": 0.43900812045224047, "grad_norm": 0.6774523087574283, "learning_rate": 0.00018174935643089203, "loss": 12.4525, "step": 8062 }, { "epoch": 0.43906257444882346, "grad_norm": 0.6340529043477183, "learning_rate": 0.000181744277345434, "loss": 12.3797, "step": 8063 }, { "epoch": 0.43911702844540645, "grad_norm": 0.718405082762463, "learning_rate": 0.00018173919762431875, "loss": 12.5314, "step": 8064 }, { "epoch": 0.4391714824419895, "grad_norm": 0.6952690412740806, "learning_rate": 0.0001817341172675858, "loss": 12.492, "step": 8065 }, { "epoch": 0.4392259364385725, "grad_norm": 0.584979726754951, "learning_rate": 0.00018172903627527463, "loss": 12.378, "step": 8066 }, { "epoch": 0.4392803904351555, "grad_norm": 0.7046022953559745, "learning_rate": 0.00018172395464742479, "loss": 12.5153, "step": 8067 }, { "epoch": 0.43933484443173854, "grad_norm": 0.6895392327708775, "learning_rate": 0.00018171887238407574, "loss": 12.4722, "step": 8068 }, { "epoch": 0.43938929842832153, "grad_norm": 0.593507928029048, "learning_rate": 0.00018171378948526705, "loss": 12.5153, "step": 8069 }, { "epoch": 0.4394437524249045, "grad_norm": 0.6536079851636544, "learning_rate": 0.00018170870595103823, "loss": 12.5043, "step": 8070 }, { "epoch": 0.43949820642148757, "grad_norm": 0.7102749722944064, "learning_rate": 0.0001817036217814288, "loss": 12.5844, "step": 8071 }, { "epoch": 0.43955266041807056, "grad_norm": 0.6250358275118798, "learning_rate": 0.0001816985369764783, "loss": 12.4398, "step": 8072 }, { "epoch": 0.43960711441465355, "grad_norm": 0.6833189631616499, "learning_rate": 0.00018169345153622632, "loss": 12.5777, "step": 8073 }, { "epoch": 0.4396615684112366, "grad_norm": 0.6326543135044489, "learning_rate": 0.00018168836546071234, "loss": 12.5301, "step": 8074 }, { "epoch": 0.4397160224078196, "grad_norm": 0.6599203053997212, "learning_rate": 0.00018168327874997593, "loss": 12.4672, "step": 8075 }, { "epoch": 0.4397704764044026, "grad_norm": 0.67676831770541, "learning_rate": 0.00018167819140405662, "loss": 12.5454, "step": 8076 }, { "epoch": 0.43982493040098564, "grad_norm": 0.6171632602120455, "learning_rate": 0.00018167310342299403, "loss": 12.3727, "step": 8077 }, { "epoch": 0.43987938439756863, "grad_norm": 0.8192129927985445, "learning_rate": 0.00018166801480682768, "loss": 12.5696, "step": 8078 }, { "epoch": 0.4399338383941516, "grad_norm": 0.6841813706928609, "learning_rate": 0.00018166292555559714, "loss": 12.4212, "step": 8079 }, { "epoch": 0.43998829239073467, "grad_norm": 0.6756316788448604, "learning_rate": 0.000181657835669342, "loss": 12.4523, "step": 8080 }, { "epoch": 0.44004274638731766, "grad_norm": 0.7117498961542793, "learning_rate": 0.00018165274514810186, "loss": 12.4312, "step": 8081 }, { "epoch": 0.44009720038390066, "grad_norm": 0.6798671037484373, "learning_rate": 0.00018164765399191626, "loss": 12.5288, "step": 8082 }, { "epoch": 0.4401516543804837, "grad_norm": 0.6398340227930036, "learning_rate": 0.0001816425622008248, "loss": 12.3392, "step": 8083 }, { "epoch": 0.4402061083770667, "grad_norm": 0.7515836639185821, "learning_rate": 0.0001816374697748671, "loss": 12.5069, "step": 8084 }, { "epoch": 0.4402605623736497, "grad_norm": 0.6114018054945423, "learning_rate": 0.00018163237671408276, "loss": 12.3876, "step": 8085 }, { "epoch": 0.44031501637023274, "grad_norm": 0.5820204184592364, "learning_rate": 0.00018162728301851134, "loss": 12.4475, "step": 8086 }, { "epoch": 0.44036947036681573, "grad_norm": 0.7451614160847416, "learning_rate": 0.0001816221886881925, "loss": 12.4133, "step": 8087 }, { "epoch": 0.4404239243633987, "grad_norm": 0.6376617665113111, "learning_rate": 0.0001816170937231658, "loss": 12.4494, "step": 8088 }, { "epoch": 0.44047837835998177, "grad_norm": 0.6485801097204811, "learning_rate": 0.00018161199812347093, "loss": 12.5021, "step": 8089 }, { "epoch": 0.44053283235656476, "grad_norm": 0.6536790997705955, "learning_rate": 0.00018160690188914744, "loss": 12.486, "step": 8090 }, { "epoch": 0.44058728635314776, "grad_norm": 0.6973380046290791, "learning_rate": 0.000181601805020235, "loss": 12.4213, "step": 8091 }, { "epoch": 0.4406417403497308, "grad_norm": 0.6153978517164419, "learning_rate": 0.00018159670751677326, "loss": 12.4879, "step": 8092 }, { "epoch": 0.4406961943463138, "grad_norm": 0.7662004917264686, "learning_rate": 0.00018159160937880183, "loss": 12.5327, "step": 8093 }, { "epoch": 0.4407506483428968, "grad_norm": 0.6315076862191303, "learning_rate": 0.00018158651060636038, "loss": 12.5029, "step": 8094 }, { "epoch": 0.44080510233947984, "grad_norm": 0.6570484528322555, "learning_rate": 0.00018158141119948854, "loss": 12.5021, "step": 8095 }, { "epoch": 0.44085955633606283, "grad_norm": 0.6217452499352483, "learning_rate": 0.00018157631115822595, "loss": 12.4358, "step": 8096 }, { "epoch": 0.4409140103326459, "grad_norm": 0.6097425662470402, "learning_rate": 0.0001815712104826123, "loss": 12.4138, "step": 8097 }, { "epoch": 0.44096846432922887, "grad_norm": 0.5895926618645863, "learning_rate": 0.0001815661091726872, "loss": 12.3695, "step": 8098 }, { "epoch": 0.44102291832581186, "grad_norm": 0.6377967778191228, "learning_rate": 0.00018156100722849038, "loss": 12.5448, "step": 8099 }, { "epoch": 0.4410773723223949, "grad_norm": 0.6401808345387747, "learning_rate": 0.0001815559046500615, "loss": 12.4537, "step": 8100 }, { "epoch": 0.4411318263189779, "grad_norm": 0.6287248883361566, "learning_rate": 0.0001815508014374402, "loss": 12.5005, "step": 8101 }, { "epoch": 0.4411862803155609, "grad_norm": 0.6944250053649565, "learning_rate": 0.00018154569759066625, "loss": 12.5913, "step": 8102 }, { "epoch": 0.44124073431214395, "grad_norm": 0.627346440387178, "learning_rate": 0.00018154059310977923, "loss": 12.4249, "step": 8103 }, { "epoch": 0.44129518830872694, "grad_norm": 0.6420492552682078, "learning_rate": 0.00018153548799481887, "loss": 12.3946, "step": 8104 }, { "epoch": 0.44134964230530993, "grad_norm": 0.681906372443578, "learning_rate": 0.00018153038224582492, "loss": 12.3387, "step": 8105 }, { "epoch": 0.441404096301893, "grad_norm": 0.6210574333231363, "learning_rate": 0.00018152527586283703, "loss": 12.5162, "step": 8106 }, { "epoch": 0.44145855029847597, "grad_norm": 0.64322418419475, "learning_rate": 0.0001815201688458949, "loss": 12.5622, "step": 8107 }, { "epoch": 0.44151300429505896, "grad_norm": 0.8190346317269288, "learning_rate": 0.0001815150611950383, "loss": 12.4488, "step": 8108 }, { "epoch": 0.441567458291642, "grad_norm": 0.6529614749866838, "learning_rate": 0.00018150995291030692, "loss": 12.4613, "step": 8109 }, { "epoch": 0.441621912288225, "grad_norm": 0.7258579649878754, "learning_rate": 0.00018150484399174046, "loss": 12.4522, "step": 8110 }, { "epoch": 0.441676366284808, "grad_norm": 0.6654211401665494, "learning_rate": 0.00018149973443937868, "loss": 12.3424, "step": 8111 }, { "epoch": 0.44173082028139105, "grad_norm": 0.6318212930391481, "learning_rate": 0.00018149462425326126, "loss": 12.4647, "step": 8112 }, { "epoch": 0.44178527427797404, "grad_norm": 0.8207170885860678, "learning_rate": 0.00018148951343342803, "loss": 12.4289, "step": 8113 }, { "epoch": 0.44183972827455703, "grad_norm": 0.6194663346605774, "learning_rate": 0.00018148440197991863, "loss": 12.3711, "step": 8114 }, { "epoch": 0.4418941822711401, "grad_norm": 0.7014168629119157, "learning_rate": 0.0001814792898927729, "loss": 12.3242, "step": 8115 }, { "epoch": 0.44194863626772307, "grad_norm": 0.656875765463606, "learning_rate": 0.00018147417717203048, "loss": 12.4609, "step": 8116 }, { "epoch": 0.44200309026430606, "grad_norm": 0.6422545749068618, "learning_rate": 0.00018146906381773126, "loss": 12.4707, "step": 8117 }, { "epoch": 0.4420575442608891, "grad_norm": 0.6240047670798762, "learning_rate": 0.0001814639498299149, "loss": 12.4928, "step": 8118 }, { "epoch": 0.4421119982574721, "grad_norm": 0.6781025678231234, "learning_rate": 0.00018145883520862121, "loss": 12.6163, "step": 8119 }, { "epoch": 0.4421664522540551, "grad_norm": 0.636615698424067, "learning_rate": 0.00018145371995388997, "loss": 12.4772, "step": 8120 }, { "epoch": 0.44222090625063815, "grad_norm": 0.6454832736621415, "learning_rate": 0.00018144860406576094, "loss": 12.3847, "step": 8121 }, { "epoch": 0.44227536024722114, "grad_norm": 0.7496269835050098, "learning_rate": 0.0001814434875442739, "loss": 12.2562, "step": 8122 }, { "epoch": 0.44232981424380413, "grad_norm": 0.5902049909043856, "learning_rate": 0.00018143837038946865, "loss": 12.5374, "step": 8123 }, { "epoch": 0.4423842682403872, "grad_norm": 0.6287680351391354, "learning_rate": 0.00018143325260138498, "loss": 12.4129, "step": 8124 }, { "epoch": 0.44243872223697017, "grad_norm": 0.6395184786336089, "learning_rate": 0.00018142813418006268, "loss": 12.5485, "step": 8125 }, { "epoch": 0.44249317623355316, "grad_norm": 0.583233484282126, "learning_rate": 0.00018142301512554153, "loss": 12.4745, "step": 8126 }, { "epoch": 0.4425476302301362, "grad_norm": 0.7393265090602186, "learning_rate": 0.00018141789543786135, "loss": 12.5784, "step": 8127 }, { "epoch": 0.4426020842267192, "grad_norm": 0.6099498576642015, "learning_rate": 0.000181412775117062, "loss": 12.3518, "step": 8128 }, { "epoch": 0.44265653822330225, "grad_norm": 0.616859217531763, "learning_rate": 0.00018140765416318325, "loss": 12.3706, "step": 8129 }, { "epoch": 0.44271099221988525, "grad_norm": 0.5738815684564759, "learning_rate": 0.00018140253257626493, "loss": 12.4688, "step": 8130 }, { "epoch": 0.44276544621646824, "grad_norm": 0.6287187273103042, "learning_rate": 0.00018139741035634686, "loss": 12.4894, "step": 8131 }, { "epoch": 0.4428199002130513, "grad_norm": 0.7150333260319802, "learning_rate": 0.00018139228750346887, "loss": 12.7204, "step": 8132 }, { "epoch": 0.4428743542096343, "grad_norm": 0.6655484781426367, "learning_rate": 0.0001813871640176708, "loss": 12.4733, "step": 8133 }, { "epoch": 0.4429288082062173, "grad_norm": 0.638583761056179, "learning_rate": 0.0001813820398989925, "loss": 12.4027, "step": 8134 }, { "epoch": 0.4429832622028003, "grad_norm": 0.6106168427470005, "learning_rate": 0.00018137691514747383, "loss": 12.35, "step": 8135 }, { "epoch": 0.4430377161993833, "grad_norm": 0.7678202216164216, "learning_rate": 0.0001813717897631546, "loss": 12.3463, "step": 8136 }, { "epoch": 0.4430921701959663, "grad_norm": 0.6351437344563395, "learning_rate": 0.0001813666637460747, "loss": 12.4133, "step": 8137 }, { "epoch": 0.44314662419254935, "grad_norm": 0.6594886319959798, "learning_rate": 0.00018136153709627398, "loss": 12.2562, "step": 8138 }, { "epoch": 0.44320107818913235, "grad_norm": 0.6110581042660779, "learning_rate": 0.00018135640981379228, "loss": 12.4918, "step": 8139 }, { "epoch": 0.44325553218571534, "grad_norm": 0.6397466953877733, "learning_rate": 0.0001813512818986695, "loss": 12.394, "step": 8140 }, { "epoch": 0.4433099861822984, "grad_norm": 0.6550859383422465, "learning_rate": 0.0001813461533509455, "loss": 12.4382, "step": 8141 }, { "epoch": 0.4433644401788814, "grad_norm": 0.6917336007640664, "learning_rate": 0.00018134102417066022, "loss": 12.4055, "step": 8142 }, { "epoch": 0.4434188941754644, "grad_norm": 0.5850970346286554, "learning_rate": 0.00018133589435785343, "loss": 12.4241, "step": 8143 }, { "epoch": 0.4434733481720474, "grad_norm": 0.5951134489105335, "learning_rate": 0.00018133076391256512, "loss": 12.3374, "step": 8144 }, { "epoch": 0.4435278021686304, "grad_norm": 0.6551033967638262, "learning_rate": 0.00018132563283483516, "loss": 12.487, "step": 8145 }, { "epoch": 0.4435822561652134, "grad_norm": 0.6538463123605098, "learning_rate": 0.0001813205011247034, "loss": 12.3122, "step": 8146 }, { "epoch": 0.44363671016179645, "grad_norm": 0.5950772399842815, "learning_rate": 0.00018131536878220983, "loss": 12.4683, "step": 8147 }, { "epoch": 0.44369116415837945, "grad_norm": 0.6694727358020924, "learning_rate": 0.0001813102358073943, "loss": 12.4369, "step": 8148 }, { "epoch": 0.44374561815496244, "grad_norm": 0.5837086135181214, "learning_rate": 0.00018130510220029674, "loss": 12.5195, "step": 8149 }, { "epoch": 0.4438000721515455, "grad_norm": 0.6162354476460444, "learning_rate": 0.00018129996796095705, "loss": 12.4831, "step": 8150 }, { "epoch": 0.4438545261481285, "grad_norm": 0.6581910316864154, "learning_rate": 0.0001812948330894152, "loss": 12.4733, "step": 8151 }, { "epoch": 0.4439089801447115, "grad_norm": 0.6213906461100707, "learning_rate": 0.0001812896975857111, "loss": 12.5239, "step": 8152 }, { "epoch": 0.4439634341412945, "grad_norm": 0.606727686748743, "learning_rate": 0.00018128456144988467, "loss": 12.5373, "step": 8153 }, { "epoch": 0.4440178881378775, "grad_norm": 0.735147788381272, "learning_rate": 0.00018127942468197582, "loss": 12.5172, "step": 8154 }, { "epoch": 0.4440723421344605, "grad_norm": 0.5921186091395286, "learning_rate": 0.0001812742872820246, "loss": 12.3883, "step": 8155 }, { "epoch": 0.44412679613104356, "grad_norm": 0.6844273116732905, "learning_rate": 0.00018126914925007087, "loss": 12.4229, "step": 8156 }, { "epoch": 0.44418125012762655, "grad_norm": 0.6966763682784651, "learning_rate": 0.00018126401058615458, "loss": 12.454, "step": 8157 }, { "epoch": 0.44423570412420954, "grad_norm": 0.7599245377713695, "learning_rate": 0.00018125887129031573, "loss": 12.5486, "step": 8158 }, { "epoch": 0.4442901581207926, "grad_norm": 0.5801118497495215, "learning_rate": 0.0001812537313625943, "loss": 12.4271, "step": 8159 }, { "epoch": 0.4443446121173756, "grad_norm": 0.679983449444894, "learning_rate": 0.00018124859080303018, "loss": 12.3809, "step": 8160 }, { "epoch": 0.4443990661139586, "grad_norm": 0.567276752664479, "learning_rate": 0.00018124344961166342, "loss": 12.3648, "step": 8161 }, { "epoch": 0.4444535201105416, "grad_norm": 0.6698341318989772, "learning_rate": 0.00018123830778853396, "loss": 12.4784, "step": 8162 }, { "epoch": 0.4445079741071246, "grad_norm": 0.6111490002980611, "learning_rate": 0.0001812331653336818, "loss": 12.432, "step": 8163 }, { "epoch": 0.44456242810370766, "grad_norm": 0.6109325508698443, "learning_rate": 0.00018122802224714693, "loss": 12.4912, "step": 8164 }, { "epoch": 0.44461688210029066, "grad_norm": 0.5743030968799749, "learning_rate": 0.00018122287852896933, "loss": 12.4442, "step": 8165 }, { "epoch": 0.44467133609687365, "grad_norm": 0.6070693152333769, "learning_rate": 0.00018121773417918902, "loss": 12.3768, "step": 8166 }, { "epoch": 0.4447257900934567, "grad_norm": 0.6136466125925997, "learning_rate": 0.00018121258919784596, "loss": 12.3442, "step": 8167 }, { "epoch": 0.4447802440900397, "grad_norm": 0.6061840102523574, "learning_rate": 0.0001812074435849802, "loss": 12.3715, "step": 8168 }, { "epoch": 0.4448346980866227, "grad_norm": 0.6503836273900797, "learning_rate": 0.00018120229734063176, "loss": 12.6093, "step": 8169 }, { "epoch": 0.44488915208320573, "grad_norm": 0.641563324903526, "learning_rate": 0.0001811971504648406, "loss": 12.4406, "step": 8170 }, { "epoch": 0.4449436060797887, "grad_norm": 0.7577204807990595, "learning_rate": 0.00018119200295764682, "loss": 12.5485, "step": 8171 }, { "epoch": 0.4449980600763717, "grad_norm": 0.6820962027227742, "learning_rate": 0.0001811868548190904, "loss": 12.5358, "step": 8172 }, { "epoch": 0.44505251407295476, "grad_norm": 0.6359158735353171, "learning_rate": 0.00018118170604921135, "loss": 12.5205, "step": 8173 }, { "epoch": 0.44510696806953776, "grad_norm": 0.6111147020344397, "learning_rate": 0.00018117655664804977, "loss": 12.4213, "step": 8174 }, { "epoch": 0.44516142206612075, "grad_norm": 0.6246736894085984, "learning_rate": 0.00018117140661564566, "loss": 12.4356, "step": 8175 }, { "epoch": 0.4452158760627038, "grad_norm": 0.5749923246542837, "learning_rate": 0.0001811662559520391, "loss": 12.3527, "step": 8176 }, { "epoch": 0.4452703300592868, "grad_norm": 0.6530224543649432, "learning_rate": 0.0001811611046572701, "loss": 12.5725, "step": 8177 }, { "epoch": 0.4453247840558698, "grad_norm": 0.5985662252307675, "learning_rate": 0.00018115595273137874, "loss": 12.4586, "step": 8178 }, { "epoch": 0.44537923805245283, "grad_norm": 0.7193898797443692, "learning_rate": 0.0001811508001744051, "loss": 12.4578, "step": 8179 }, { "epoch": 0.4454336920490358, "grad_norm": 0.6444061956976649, "learning_rate": 0.0001811456469863892, "loss": 12.5317, "step": 8180 }, { "epoch": 0.4454881460456188, "grad_norm": 0.5826868228600273, "learning_rate": 0.00018114049316737116, "loss": 12.5559, "step": 8181 }, { "epoch": 0.44554260004220186, "grad_norm": 0.6538082635561483, "learning_rate": 0.00018113533871739104, "loss": 12.5837, "step": 8182 }, { "epoch": 0.44559705403878486, "grad_norm": 0.6200677214450276, "learning_rate": 0.00018113018363648893, "loss": 12.4668, "step": 8183 }, { "epoch": 0.44565150803536785, "grad_norm": 0.6283652497127699, "learning_rate": 0.0001811250279247049, "loss": 12.4758, "step": 8184 }, { "epoch": 0.4457059620319509, "grad_norm": 0.7374868480204493, "learning_rate": 0.00018111987158207904, "loss": 12.3293, "step": 8185 }, { "epoch": 0.4457604160285339, "grad_norm": 0.6923032516638095, "learning_rate": 0.00018111471460865146, "loss": 12.2851, "step": 8186 }, { "epoch": 0.4458148700251169, "grad_norm": 0.6292759013526202, "learning_rate": 0.00018110955700446224, "loss": 12.4064, "step": 8187 }, { "epoch": 0.44586932402169993, "grad_norm": 0.7390498575776396, "learning_rate": 0.00018110439876955152, "loss": 12.4175, "step": 8188 }, { "epoch": 0.4459237780182829, "grad_norm": 0.6180650489407907, "learning_rate": 0.00018109923990395938, "loss": 12.3875, "step": 8189 }, { "epoch": 0.4459782320148659, "grad_norm": 0.6974498935982044, "learning_rate": 0.00018109408040772594, "loss": 12.6687, "step": 8190 }, { "epoch": 0.44603268601144896, "grad_norm": 0.7591191693971592, "learning_rate": 0.00018108892028089138, "loss": 12.4527, "step": 8191 }, { "epoch": 0.44608714000803196, "grad_norm": 0.6208047689810073, "learning_rate": 0.00018108375952349573, "loss": 12.4469, "step": 8192 }, { "epoch": 0.44614159400461495, "grad_norm": 0.781635480903214, "learning_rate": 0.0001810785981355792, "loss": 12.2055, "step": 8193 }, { "epoch": 0.446196048001198, "grad_norm": 0.7186489072431105, "learning_rate": 0.00018107343611718188, "loss": 12.6631, "step": 8194 }, { "epoch": 0.446250501997781, "grad_norm": 0.758734781288091, "learning_rate": 0.0001810682734683439, "loss": 12.2953, "step": 8195 }, { "epoch": 0.44630495599436404, "grad_norm": 0.6159585950601735, "learning_rate": 0.00018106311018910548, "loss": 12.4242, "step": 8196 }, { "epoch": 0.44635940999094703, "grad_norm": 0.7073770563576457, "learning_rate": 0.0001810579462795067, "loss": 12.5467, "step": 8197 }, { "epoch": 0.44641386398753, "grad_norm": 0.6096509478965852, "learning_rate": 0.00018105278173958774, "loss": 12.2076, "step": 8198 }, { "epoch": 0.44646831798411307, "grad_norm": 0.6392576052815786, "learning_rate": 0.00018104761656938871, "loss": 12.4672, "step": 8199 }, { "epoch": 0.44652277198069606, "grad_norm": 0.6049885538084858, "learning_rate": 0.00018104245076894988, "loss": 12.4106, "step": 8200 }, { "epoch": 0.44657722597727906, "grad_norm": 0.6642922311561321, "learning_rate": 0.00018103728433831133, "loss": 12.4219, "step": 8201 }, { "epoch": 0.4466316799738621, "grad_norm": 0.7173739433890999, "learning_rate": 0.00018103211727751327, "loss": 12.36, "step": 8202 }, { "epoch": 0.4466861339704451, "grad_norm": 0.6155354362488491, "learning_rate": 0.0001810269495865959, "loss": 12.4365, "step": 8203 }, { "epoch": 0.4467405879670281, "grad_norm": 0.5944106403284649, "learning_rate": 0.00018102178126559937, "loss": 12.4833, "step": 8204 }, { "epoch": 0.44679504196361114, "grad_norm": 0.6074043433035483, "learning_rate": 0.00018101661231456387, "loss": 12.4723, "step": 8205 }, { "epoch": 0.44684949596019413, "grad_norm": 0.6043312912035862, "learning_rate": 0.00018101144273352963, "loss": 12.3855, "step": 8206 }, { "epoch": 0.4469039499567771, "grad_norm": 0.6270599323257495, "learning_rate": 0.0001810062725225368, "loss": 12.4546, "step": 8207 }, { "epoch": 0.4469584039533602, "grad_norm": 0.6661394924846142, "learning_rate": 0.00018100110168162563, "loss": 12.3606, "step": 8208 }, { "epoch": 0.44701285794994317, "grad_norm": 0.7137124054573656, "learning_rate": 0.0001809959302108363, "loss": 12.589, "step": 8209 }, { "epoch": 0.44706731194652616, "grad_norm": 0.6665658000172924, "learning_rate": 0.00018099075811020904, "loss": 12.3141, "step": 8210 }, { "epoch": 0.4471217659431092, "grad_norm": 0.7127001415690272, "learning_rate": 0.00018098558537978405, "loss": 12.5324, "step": 8211 }, { "epoch": 0.4471762199396922, "grad_norm": 0.6345169385181538, "learning_rate": 0.00018098041201960156, "loss": 12.44, "step": 8212 }, { "epoch": 0.4472306739362752, "grad_norm": 0.6563976622677672, "learning_rate": 0.00018097523802970185, "loss": 12.3428, "step": 8213 }, { "epoch": 0.44728512793285824, "grad_norm": 0.6687606642611973, "learning_rate": 0.00018097006341012505, "loss": 12.4602, "step": 8214 }, { "epoch": 0.44733958192944123, "grad_norm": 0.7164323797540626, "learning_rate": 0.0001809648881609115, "loss": 12.5104, "step": 8215 }, { "epoch": 0.4473940359260242, "grad_norm": 0.6878977953960316, "learning_rate": 0.0001809597122821014, "loss": 12.4367, "step": 8216 }, { "epoch": 0.4474484899226073, "grad_norm": 0.5843306191896047, "learning_rate": 0.000180954535773735, "loss": 12.3788, "step": 8217 }, { "epoch": 0.44750294391919027, "grad_norm": 0.627590152184721, "learning_rate": 0.0001809493586358525, "loss": 12.335, "step": 8218 }, { "epoch": 0.44755739791577326, "grad_norm": 0.6730383060730796, "learning_rate": 0.00018094418086849428, "loss": 12.4502, "step": 8219 }, { "epoch": 0.4476118519123563, "grad_norm": 0.6622308298602019, "learning_rate": 0.0001809390024717005, "loss": 12.442, "step": 8220 }, { "epoch": 0.4476663059089393, "grad_norm": 0.5680820510071307, "learning_rate": 0.0001809338234455115, "loss": 12.5298, "step": 8221 }, { "epoch": 0.4477207599055223, "grad_norm": 0.6048364187339758, "learning_rate": 0.00018092864378996748, "loss": 12.4445, "step": 8222 }, { "epoch": 0.44777521390210534, "grad_norm": 0.7737050573569966, "learning_rate": 0.00018092346350510877, "loss": 12.5471, "step": 8223 }, { "epoch": 0.44782966789868833, "grad_norm": 0.6120992948950894, "learning_rate": 0.0001809182825909756, "loss": 12.4766, "step": 8224 }, { "epoch": 0.4478841218952713, "grad_norm": 0.7214950968754054, "learning_rate": 0.00018091310104760834, "loss": 12.4849, "step": 8225 }, { "epoch": 0.4479385758918544, "grad_norm": 0.6105295559177767, "learning_rate": 0.00018090791887504724, "loss": 12.4912, "step": 8226 }, { "epoch": 0.44799302988843737, "grad_norm": 0.5889511018642188, "learning_rate": 0.00018090273607333256, "loss": 12.5213, "step": 8227 }, { "epoch": 0.44804748388502036, "grad_norm": 0.6737149319485795, "learning_rate": 0.00018089755264250466, "loss": 12.3463, "step": 8228 }, { "epoch": 0.4481019378816034, "grad_norm": 0.6485216645092555, "learning_rate": 0.00018089236858260383, "loss": 12.4522, "step": 8229 }, { "epoch": 0.4481563918781864, "grad_norm": 0.6290966852441273, "learning_rate": 0.00018088718389367036, "loss": 12.479, "step": 8230 }, { "epoch": 0.44821084587476945, "grad_norm": 0.6435504811467402, "learning_rate": 0.00018088199857574456, "loss": 12.5209, "step": 8231 }, { "epoch": 0.44826529987135244, "grad_norm": 0.6477490585103859, "learning_rate": 0.00018087681262886682, "loss": 12.4887, "step": 8232 }, { "epoch": 0.44831975386793543, "grad_norm": 0.7152524441450054, "learning_rate": 0.0001808716260530774, "loss": 12.5928, "step": 8233 }, { "epoch": 0.4483742078645185, "grad_norm": 0.6169314782015952, "learning_rate": 0.00018086643884841666, "loss": 12.4819, "step": 8234 }, { "epoch": 0.4484286618611015, "grad_norm": 0.61995875690478, "learning_rate": 0.00018086125101492493, "loss": 12.5438, "step": 8235 }, { "epoch": 0.44848311585768447, "grad_norm": 0.6410424565512555, "learning_rate": 0.00018085606255264255, "loss": 12.3693, "step": 8236 }, { "epoch": 0.4485375698542675, "grad_norm": 0.675278433061094, "learning_rate": 0.00018085087346160988, "loss": 12.4955, "step": 8237 }, { "epoch": 0.4485920238508505, "grad_norm": 0.6860164489550601, "learning_rate": 0.00018084568374186722, "loss": 12.3339, "step": 8238 }, { "epoch": 0.4486464778474335, "grad_norm": 0.70350072710245, "learning_rate": 0.000180840493393455, "loss": 12.424, "step": 8239 }, { "epoch": 0.44870093184401655, "grad_norm": 0.6038702419154595, "learning_rate": 0.00018083530241641357, "loss": 12.5123, "step": 8240 }, { "epoch": 0.44875538584059954, "grad_norm": 0.6831009948912166, "learning_rate": 0.00018083011081078323, "loss": 12.4619, "step": 8241 }, { "epoch": 0.44880983983718253, "grad_norm": 0.6303418377962721, "learning_rate": 0.0001808249185766044, "loss": 12.4241, "step": 8242 }, { "epoch": 0.4488642938337656, "grad_norm": 0.6777704932279972, "learning_rate": 0.00018081972571391749, "loss": 12.538, "step": 8243 }, { "epoch": 0.4489187478303486, "grad_norm": 0.6216889573416485, "learning_rate": 0.00018081453222276278, "loss": 12.5019, "step": 8244 }, { "epoch": 0.44897320182693157, "grad_norm": 0.827652560719684, "learning_rate": 0.00018080933810318072, "loss": 12.473, "step": 8245 }, { "epoch": 0.4490276558235146, "grad_norm": 0.6836190786922434, "learning_rate": 0.00018080414335521172, "loss": 12.4538, "step": 8246 }, { "epoch": 0.4490821098200976, "grad_norm": 0.7056054941784216, "learning_rate": 0.00018079894797889615, "loss": 12.5438, "step": 8247 }, { "epoch": 0.4491365638166806, "grad_norm": 0.7109116869401612, "learning_rate": 0.00018079375197427438, "loss": 12.5076, "step": 8248 }, { "epoch": 0.44919101781326365, "grad_norm": 0.6384313871607461, "learning_rate": 0.00018078855534138687, "loss": 12.3335, "step": 8249 }, { "epoch": 0.44924547180984664, "grad_norm": 0.6014901136296119, "learning_rate": 0.00018078335808027402, "loss": 12.4224, "step": 8250 }, { "epoch": 0.44929992580642963, "grad_norm": 0.6995417512969619, "learning_rate": 0.00018077816019097622, "loss": 12.4247, "step": 8251 }, { "epoch": 0.4493543798030127, "grad_norm": 0.646866040388919, "learning_rate": 0.0001807729616735339, "loss": 12.3516, "step": 8252 }, { "epoch": 0.4494088337995957, "grad_norm": 0.6323119202761118, "learning_rate": 0.00018076776252798747, "loss": 12.466, "step": 8253 }, { "epoch": 0.44946328779617867, "grad_norm": 0.6573015764103591, "learning_rate": 0.0001807625627543774, "loss": 12.3854, "step": 8254 }, { "epoch": 0.4495177417927617, "grad_norm": 0.7660222464548213, "learning_rate": 0.0001807573623527441, "loss": 12.6872, "step": 8255 }, { "epoch": 0.4495721957893447, "grad_norm": 0.6917988160741411, "learning_rate": 0.00018075216132312796, "loss": 12.4, "step": 8256 }, { "epoch": 0.4496266497859277, "grad_norm": 0.6528921899765494, "learning_rate": 0.0001807469596655695, "loss": 12.4176, "step": 8257 }, { "epoch": 0.44968110378251075, "grad_norm": 0.6387928727990673, "learning_rate": 0.00018074175738010915, "loss": 12.553, "step": 8258 }, { "epoch": 0.44973555777909374, "grad_norm": 0.7032323497462504, "learning_rate": 0.00018073655446678734, "loss": 12.3964, "step": 8259 }, { "epoch": 0.44979001177567673, "grad_norm": 0.6201282183769875, "learning_rate": 0.00018073135092564456, "loss": 12.4337, "step": 8260 }, { "epoch": 0.4498444657722598, "grad_norm": 0.6546034000172873, "learning_rate": 0.00018072614675672123, "loss": 12.5565, "step": 8261 }, { "epoch": 0.4498989197688428, "grad_norm": 0.6167943409390654, "learning_rate": 0.00018072094196005788, "loss": 12.3856, "step": 8262 }, { "epoch": 0.4499533737654258, "grad_norm": 0.6411173044207982, "learning_rate": 0.0001807157365356949, "loss": 12.4758, "step": 8263 }, { "epoch": 0.4500078277620088, "grad_norm": 0.6660852274828428, "learning_rate": 0.00018071053048367283, "loss": 12.3944, "step": 8264 }, { "epoch": 0.4500622817585918, "grad_norm": 0.7313184201567402, "learning_rate": 0.00018070532380403217, "loss": 12.5177, "step": 8265 }, { "epoch": 0.45011673575517486, "grad_norm": 0.5888785217430297, "learning_rate": 0.00018070011649681336, "loss": 12.4226, "step": 8266 }, { "epoch": 0.45017118975175785, "grad_norm": 0.6271369849906314, "learning_rate": 0.00018069490856205693, "loss": 12.4255, "step": 8267 }, { "epoch": 0.45022564374834084, "grad_norm": 0.6877928172862263, "learning_rate": 0.00018068969999980334, "loss": 12.4457, "step": 8268 }, { "epoch": 0.4502800977449239, "grad_norm": 0.7126841747836358, "learning_rate": 0.00018068449081009308, "loss": 12.4507, "step": 8269 }, { "epoch": 0.4503345517415069, "grad_norm": 0.5935590021761236, "learning_rate": 0.00018067928099296676, "loss": 12.4129, "step": 8270 }, { "epoch": 0.4503890057380899, "grad_norm": 0.7332547699397844, "learning_rate": 0.00018067407054846477, "loss": 12.4179, "step": 8271 }, { "epoch": 0.4504434597346729, "grad_norm": 0.6439231739492874, "learning_rate": 0.00018066885947662768, "loss": 12.4201, "step": 8272 }, { "epoch": 0.4504979137312559, "grad_norm": 0.6404282685943609, "learning_rate": 0.00018066364777749602, "loss": 12.4724, "step": 8273 }, { "epoch": 0.4505523677278389, "grad_norm": 0.6757165721643505, "learning_rate": 0.00018065843545111033, "loss": 12.3736, "step": 8274 }, { "epoch": 0.45060682172442196, "grad_norm": 0.6436434529216677, "learning_rate": 0.00018065322249751108, "loss": 12.5005, "step": 8275 }, { "epoch": 0.45066127572100495, "grad_norm": 0.7705414807500481, "learning_rate": 0.00018064800891673887, "loss": 12.5556, "step": 8276 }, { "epoch": 0.45071572971758794, "grad_norm": 0.6253013831319388, "learning_rate": 0.0001806427947088342, "loss": 12.3188, "step": 8277 }, { "epoch": 0.450770183714171, "grad_norm": 0.6693402182742427, "learning_rate": 0.00018063757987383765, "loss": 12.4618, "step": 8278 }, { "epoch": 0.450824637710754, "grad_norm": 0.809093178489467, "learning_rate": 0.00018063236441178977, "loss": 12.3269, "step": 8279 }, { "epoch": 0.450879091707337, "grad_norm": 0.6960771578027255, "learning_rate": 0.0001806271483227311, "loss": 12.446, "step": 8280 }, { "epoch": 0.45093354570392, "grad_norm": 0.6836104620072424, "learning_rate": 0.00018062193160670216, "loss": 12.4956, "step": 8281 }, { "epoch": 0.450987999700503, "grad_norm": 0.6288308608485124, "learning_rate": 0.0001806167142637436, "loss": 12.4298, "step": 8282 }, { "epoch": 0.451042453697086, "grad_norm": 0.6590638845740401, "learning_rate": 0.00018061149629389594, "loss": 12.6313, "step": 8283 }, { "epoch": 0.45109690769366906, "grad_norm": 0.713252599730932, "learning_rate": 0.00018060627769719977, "loss": 12.5886, "step": 8284 }, { "epoch": 0.45115136169025205, "grad_norm": 0.6662160820965829, "learning_rate": 0.00018060105847369563, "loss": 12.4701, "step": 8285 }, { "epoch": 0.45120581568683504, "grad_norm": 0.8098651522302789, "learning_rate": 0.0001805958386234242, "loss": 12.5582, "step": 8286 }, { "epoch": 0.4512602696834181, "grad_norm": 0.5886671797308553, "learning_rate": 0.000180590618146426, "loss": 12.3877, "step": 8287 }, { "epoch": 0.4513147236800011, "grad_norm": 0.6010781282278521, "learning_rate": 0.00018058539704274163, "loss": 12.4755, "step": 8288 }, { "epoch": 0.4513691776765841, "grad_norm": 0.622368369814635, "learning_rate": 0.00018058017531241168, "loss": 12.4668, "step": 8289 }, { "epoch": 0.4514236316731671, "grad_norm": 0.5814870801804356, "learning_rate": 0.00018057495295547676, "loss": 12.3564, "step": 8290 }, { "epoch": 0.4514780856697501, "grad_norm": 0.6701604109652208, "learning_rate": 0.00018056972997197753, "loss": 12.5295, "step": 8291 }, { "epoch": 0.4515325396663331, "grad_norm": 0.6107798920489389, "learning_rate": 0.0001805645063619546, "loss": 12.4094, "step": 8292 }, { "epoch": 0.45158699366291616, "grad_norm": 0.6121065620221543, "learning_rate": 0.00018055928212544847, "loss": 12.5846, "step": 8293 }, { "epoch": 0.45164144765949915, "grad_norm": 0.6625822088074984, "learning_rate": 0.00018055405726249992, "loss": 12.3911, "step": 8294 }, { "epoch": 0.45169590165608214, "grad_norm": 0.6761019614906887, "learning_rate": 0.0001805488317731495, "loss": 12.5609, "step": 8295 }, { "epoch": 0.4517503556526652, "grad_norm": 0.6057534358071944, "learning_rate": 0.00018054360565743785, "loss": 12.5217, "step": 8296 }, { "epoch": 0.4518048096492482, "grad_norm": 0.5904779845769116, "learning_rate": 0.0001805383789154056, "loss": 12.506, "step": 8297 }, { "epoch": 0.45185926364583123, "grad_norm": 0.7203160399786142, "learning_rate": 0.00018053315154709342, "loss": 12.6001, "step": 8298 }, { "epoch": 0.4519137176424142, "grad_norm": 0.7114625436615308, "learning_rate": 0.00018052792355254197, "loss": 12.4177, "step": 8299 }, { "epoch": 0.4519681716389972, "grad_norm": 0.6171990688626064, "learning_rate": 0.00018052269493179184, "loss": 12.3536, "step": 8300 }, { "epoch": 0.45202262563558027, "grad_norm": 0.6358158470366418, "learning_rate": 0.00018051746568488377, "loss": 12.5709, "step": 8301 }, { "epoch": 0.45207707963216326, "grad_norm": 0.6387813901796725, "learning_rate": 0.00018051223581185837, "loss": 12.3938, "step": 8302 }, { "epoch": 0.45213153362874625, "grad_norm": 0.571110397702212, "learning_rate": 0.0001805070053127563, "loss": 12.4995, "step": 8303 }, { "epoch": 0.4521859876253293, "grad_norm": 0.6288736694560902, "learning_rate": 0.00018050177418761827, "loss": 12.2448, "step": 8304 }, { "epoch": 0.4522404416219123, "grad_norm": 0.6325902111994797, "learning_rate": 0.00018049654243648496, "loss": 12.3974, "step": 8305 }, { "epoch": 0.4522948956184953, "grad_norm": 0.6493021827097402, "learning_rate": 0.000180491310059397, "loss": 12.4227, "step": 8306 }, { "epoch": 0.45234934961507833, "grad_norm": 0.5846396599292224, "learning_rate": 0.00018048607705639517, "loss": 12.3472, "step": 8307 }, { "epoch": 0.4524038036116613, "grad_norm": 0.649321265011039, "learning_rate": 0.00018048084342752008, "loss": 12.5292, "step": 8308 }, { "epoch": 0.4524582576082443, "grad_norm": 0.6319409826716816, "learning_rate": 0.00018047560917281246, "loss": 12.5412, "step": 8309 }, { "epoch": 0.45251271160482737, "grad_norm": 0.7202103340396316, "learning_rate": 0.00018047037429231298, "loss": 12.538, "step": 8310 }, { "epoch": 0.45256716560141036, "grad_norm": 0.708175500993768, "learning_rate": 0.00018046513878606238, "loss": 12.6258, "step": 8311 }, { "epoch": 0.45262161959799335, "grad_norm": 0.5807181380628393, "learning_rate": 0.0001804599026541014, "loss": 12.3849, "step": 8312 }, { "epoch": 0.4526760735945764, "grad_norm": 0.6428716877562957, "learning_rate": 0.0001804546658964707, "loss": 12.4105, "step": 8313 }, { "epoch": 0.4527305275911594, "grad_norm": 0.6653068583401828, "learning_rate": 0.00018044942851321103, "loss": 12.3114, "step": 8314 }, { "epoch": 0.4527849815877424, "grad_norm": 0.6454680182879358, "learning_rate": 0.0001804441905043631, "loss": 12.532, "step": 8315 }, { "epoch": 0.45283943558432543, "grad_norm": 0.5780892948692522, "learning_rate": 0.00018043895186996766, "loss": 12.3504, "step": 8316 }, { "epoch": 0.4528938895809084, "grad_norm": 0.69638392730691, "learning_rate": 0.00018043371261006546, "loss": 12.4106, "step": 8317 }, { "epoch": 0.4529483435774914, "grad_norm": 0.635956986948982, "learning_rate": 0.0001804284727246972, "loss": 12.5283, "step": 8318 }, { "epoch": 0.45300279757407447, "grad_norm": 0.6313431542322339, "learning_rate": 0.00018042323221390368, "loss": 12.5512, "step": 8319 }, { "epoch": 0.45305725157065746, "grad_norm": 0.6489741452015877, "learning_rate": 0.0001804179910777256, "loss": 12.4485, "step": 8320 }, { "epoch": 0.45311170556724045, "grad_norm": 0.6315006712394421, "learning_rate": 0.00018041274931620372, "loss": 12.5214, "step": 8321 }, { "epoch": 0.4531661595638235, "grad_norm": 0.5990546472160445, "learning_rate": 0.00018040750692937881, "loss": 12.525, "step": 8322 }, { "epoch": 0.4532206135604065, "grad_norm": 0.5871850278370101, "learning_rate": 0.00018040226391729166, "loss": 12.3591, "step": 8323 }, { "epoch": 0.4532750675569895, "grad_norm": 0.658322856276941, "learning_rate": 0.00018039702027998304, "loss": 12.5054, "step": 8324 }, { "epoch": 0.45332952155357253, "grad_norm": 0.639803097756465, "learning_rate": 0.0001803917760174937, "loss": 12.5238, "step": 8325 }, { "epoch": 0.4533839755501555, "grad_norm": 0.8436479688153324, "learning_rate": 0.00018038653112986442, "loss": 12.4266, "step": 8326 }, { "epoch": 0.4534384295467385, "grad_norm": 0.662813388242909, "learning_rate": 0.00018038128561713597, "loss": 12.3991, "step": 8327 }, { "epoch": 0.45349288354332157, "grad_norm": 0.6802356628734042, "learning_rate": 0.00018037603947934917, "loss": 12.3814, "step": 8328 }, { "epoch": 0.45354733753990456, "grad_norm": 0.7191399813569995, "learning_rate": 0.00018037079271654484, "loss": 12.5178, "step": 8329 }, { "epoch": 0.4536017915364876, "grad_norm": 0.7728736862833322, "learning_rate": 0.00018036554532876374, "loss": 12.58, "step": 8330 }, { "epoch": 0.4536562455330706, "grad_norm": 0.7082727583696239, "learning_rate": 0.00018036029731604666, "loss": 12.3314, "step": 8331 }, { "epoch": 0.4537106995296536, "grad_norm": 0.63936538910493, "learning_rate": 0.00018035504867843446, "loss": 12.5019, "step": 8332 }, { "epoch": 0.45376515352623664, "grad_norm": 0.7309572506200637, "learning_rate": 0.0001803497994159679, "loss": 12.3034, "step": 8333 }, { "epoch": 0.45381960752281963, "grad_norm": 0.7346023329112903, "learning_rate": 0.00018034454952868783, "loss": 12.493, "step": 8334 }, { "epoch": 0.4538740615194026, "grad_norm": 0.6438331942903343, "learning_rate": 0.00018033929901663508, "loss": 12.4777, "step": 8335 }, { "epoch": 0.4539285155159857, "grad_norm": 0.6429282768622242, "learning_rate": 0.00018033404787985046, "loss": 12.5446, "step": 8336 }, { "epoch": 0.45398296951256867, "grad_norm": 0.6470283192872487, "learning_rate": 0.00018032879611837484, "loss": 12.4359, "step": 8337 }, { "epoch": 0.45403742350915166, "grad_norm": 0.6652689528162133, "learning_rate": 0.00018032354373224898, "loss": 12.4367, "step": 8338 }, { "epoch": 0.4540918775057347, "grad_norm": 0.7137004022132828, "learning_rate": 0.00018031829072151382, "loss": 12.4301, "step": 8339 }, { "epoch": 0.4541463315023177, "grad_norm": 0.6441967324262494, "learning_rate": 0.0001803130370862101, "loss": 12.4321, "step": 8340 }, { "epoch": 0.4542007854989007, "grad_norm": 0.7165948198137981, "learning_rate": 0.0001803077828263788, "loss": 12.3381, "step": 8341 }, { "epoch": 0.45425523949548374, "grad_norm": 0.6936232718909088, "learning_rate": 0.0001803025279420607, "loss": 12.4531, "step": 8342 }, { "epoch": 0.45430969349206674, "grad_norm": 0.6793974980558357, "learning_rate": 0.00018029727243329665, "loss": 12.4894, "step": 8343 }, { "epoch": 0.45436414748864973, "grad_norm": 0.7578080572478764, "learning_rate": 0.00018029201630012756, "loss": 12.4671, "step": 8344 }, { "epoch": 0.4544186014852328, "grad_norm": 0.6375758735624671, "learning_rate": 0.00018028675954259428, "loss": 12.4301, "step": 8345 }, { "epoch": 0.45447305548181577, "grad_norm": 0.6403854044238918, "learning_rate": 0.00018028150216073768, "loss": 12.5084, "step": 8346 }, { "epoch": 0.45452750947839876, "grad_norm": 0.744259474887641, "learning_rate": 0.00018027624415459869, "loss": 12.4074, "step": 8347 }, { "epoch": 0.4545819634749818, "grad_norm": 0.6532864185316472, "learning_rate": 0.00018027098552421812, "loss": 12.1952, "step": 8348 }, { "epoch": 0.4546364174715648, "grad_norm": 0.6709441299649114, "learning_rate": 0.00018026572626963697, "loss": 12.4102, "step": 8349 }, { "epoch": 0.4546908714681478, "grad_norm": 0.6527345750406884, "learning_rate": 0.000180260466390896, "loss": 12.5381, "step": 8350 }, { "epoch": 0.45474532546473084, "grad_norm": 0.5993932846023744, "learning_rate": 0.00018025520588803624, "loss": 12.3806, "step": 8351 }, { "epoch": 0.45479977946131384, "grad_norm": 0.6560079767341911, "learning_rate": 0.0001802499447610985, "loss": 12.426, "step": 8352 }, { "epoch": 0.45485423345789683, "grad_norm": 0.6208449905155884, "learning_rate": 0.00018024468301012375, "loss": 12.4476, "step": 8353 }, { "epoch": 0.4549086874544799, "grad_norm": 0.5938706449016852, "learning_rate": 0.00018023942063515288, "loss": 12.3896, "step": 8354 }, { "epoch": 0.45496314145106287, "grad_norm": 0.6840502037543044, "learning_rate": 0.00018023415763622684, "loss": 12.5287, "step": 8355 }, { "epoch": 0.45501759544764586, "grad_norm": 0.6484201237103069, "learning_rate": 0.0001802288940133865, "loss": 12.5393, "step": 8356 }, { "epoch": 0.4550720494442289, "grad_norm": 0.6536567142719277, "learning_rate": 0.00018022362976667286, "loss": 12.5158, "step": 8357 }, { "epoch": 0.4551265034408119, "grad_norm": 0.6495453175136698, "learning_rate": 0.00018021836489612682, "loss": 12.402, "step": 8358 }, { "epoch": 0.4551809574373949, "grad_norm": 0.6546075029096625, "learning_rate": 0.0001802130994017893, "loss": 12.4056, "step": 8359 }, { "epoch": 0.45523541143397794, "grad_norm": 0.6169108403795847, "learning_rate": 0.00018020783328370128, "loss": 12.3984, "step": 8360 }, { "epoch": 0.45528986543056094, "grad_norm": 0.7334837642317463, "learning_rate": 0.00018020256654190372, "loss": 12.5352, "step": 8361 }, { "epoch": 0.45534431942714393, "grad_norm": 0.6355316885852189, "learning_rate": 0.0001801972991764375, "loss": 12.4515, "step": 8362 }, { "epoch": 0.455398773423727, "grad_norm": 0.6762017192687889, "learning_rate": 0.00018019203118734364, "loss": 12.5213, "step": 8363 }, { "epoch": 0.45545322742030997, "grad_norm": 0.652040432310015, "learning_rate": 0.00018018676257466315, "loss": 12.3384, "step": 8364 }, { "epoch": 0.455507681416893, "grad_norm": 0.6052011344639239, "learning_rate": 0.0001801814933384369, "loss": 12.5054, "step": 8365 }, { "epoch": 0.455562135413476, "grad_norm": 0.7345528713970678, "learning_rate": 0.00018017622347870594, "loss": 12.2335, "step": 8366 }, { "epoch": 0.455616589410059, "grad_norm": 0.6469247135524236, "learning_rate": 0.0001801709529955112, "loss": 12.4192, "step": 8367 }, { "epoch": 0.45567104340664205, "grad_norm": 0.562462112853363, "learning_rate": 0.00018016568188889366, "loss": 12.3651, "step": 8368 }, { "epoch": 0.45572549740322504, "grad_norm": 0.7793939972367465, "learning_rate": 0.0001801604101588944, "loss": 12.5982, "step": 8369 }, { "epoch": 0.45577995139980804, "grad_norm": 0.7634582703853848, "learning_rate": 0.00018015513780555428, "loss": 12.4733, "step": 8370 }, { "epoch": 0.4558344053963911, "grad_norm": 0.7433426272281132, "learning_rate": 0.00018014986482891443, "loss": 12.3873, "step": 8371 }, { "epoch": 0.4558888593929741, "grad_norm": 0.7151847951356256, "learning_rate": 0.00018014459122901575, "loss": 12.4266, "step": 8372 }, { "epoch": 0.45594331338955707, "grad_norm": 0.6536945132481292, "learning_rate": 0.00018013931700589926, "loss": 12.4628, "step": 8373 }, { "epoch": 0.4559977673861401, "grad_norm": 0.6432829911573463, "learning_rate": 0.00018013404215960608, "loss": 12.5578, "step": 8374 }, { "epoch": 0.4560522213827231, "grad_norm": 0.7332230865995127, "learning_rate": 0.0001801287666901771, "loss": 12.4341, "step": 8375 }, { "epoch": 0.4561066753793061, "grad_norm": 0.6581836725868025, "learning_rate": 0.0001801234905976534, "loss": 12.4149, "step": 8376 }, { "epoch": 0.45616112937588915, "grad_norm": 0.7295789375243006, "learning_rate": 0.000180118213882076, "loss": 12.4868, "step": 8377 }, { "epoch": 0.45621558337247214, "grad_norm": 0.6554620231047964, "learning_rate": 0.00018011293654348592, "loss": 12.4085, "step": 8378 }, { "epoch": 0.45627003736905514, "grad_norm": 0.6970016512205633, "learning_rate": 0.00018010765858192425, "loss": 12.5611, "step": 8379 }, { "epoch": 0.4563244913656382, "grad_norm": 0.7069624152061406, "learning_rate": 0.00018010237999743195, "loss": 12.4065, "step": 8380 }, { "epoch": 0.4563789453622212, "grad_norm": 0.6534734284592699, "learning_rate": 0.00018009710079005013, "loss": 12.4033, "step": 8381 }, { "epoch": 0.45643339935880417, "grad_norm": 0.6733227018555901, "learning_rate": 0.00018009182095981983, "loss": 12.6301, "step": 8382 }, { "epoch": 0.4564878533553872, "grad_norm": 0.6883935790218004, "learning_rate": 0.00018008654050678208, "loss": 12.4693, "step": 8383 }, { "epoch": 0.4565423073519702, "grad_norm": 0.6744703967734476, "learning_rate": 0.000180081259430978, "loss": 12.3845, "step": 8384 }, { "epoch": 0.4565967613485532, "grad_norm": 0.7186218426749795, "learning_rate": 0.00018007597773244855, "loss": 12.3878, "step": 8385 }, { "epoch": 0.45665121534513625, "grad_norm": 0.6349234443623138, "learning_rate": 0.00018007069541123492, "loss": 12.4049, "step": 8386 }, { "epoch": 0.45670566934171924, "grad_norm": 0.5824037807653083, "learning_rate": 0.0001800654124673781, "loss": 12.3113, "step": 8387 }, { "epoch": 0.45676012333830224, "grad_norm": 0.6222886438219286, "learning_rate": 0.00018006012890091924, "loss": 12.4073, "step": 8388 }, { "epoch": 0.4568145773348853, "grad_norm": 0.7319420156442162, "learning_rate": 0.00018005484471189938, "loss": 12.5158, "step": 8389 }, { "epoch": 0.4568690313314683, "grad_norm": 0.7122592024847274, "learning_rate": 0.00018004955990035963, "loss": 12.6155, "step": 8390 }, { "epoch": 0.45692348532805127, "grad_norm": 0.8861540540847268, "learning_rate": 0.00018004427446634104, "loss": 12.4889, "step": 8391 }, { "epoch": 0.4569779393246343, "grad_norm": 0.5764384661397526, "learning_rate": 0.0001800389884098848, "loss": 12.3368, "step": 8392 }, { "epoch": 0.4570323933212173, "grad_norm": 0.6463869931658384, "learning_rate": 0.00018003370173103194, "loss": 12.4053, "step": 8393 }, { "epoch": 0.4570868473178003, "grad_norm": 0.7557404332112136, "learning_rate": 0.0001800284144298236, "loss": 12.4105, "step": 8394 }, { "epoch": 0.45714130131438335, "grad_norm": 0.6785861968861243, "learning_rate": 0.00018002312650630087, "loss": 12.5533, "step": 8395 }, { "epoch": 0.45719575531096635, "grad_norm": 0.5997132875084884, "learning_rate": 0.0001800178379605049, "loss": 12.3683, "step": 8396 }, { "epoch": 0.4572502093075494, "grad_norm": 0.6723666128208018, "learning_rate": 0.0001800125487924768, "loss": 12.4165, "step": 8397 }, { "epoch": 0.4573046633041324, "grad_norm": 0.604105015372839, "learning_rate": 0.00018000725900225767, "loss": 12.3035, "step": 8398 }, { "epoch": 0.4573591173007154, "grad_norm": 0.6976708969027182, "learning_rate": 0.00018000196858988874, "loss": 12.4663, "step": 8399 }, { "epoch": 0.4574135712972984, "grad_norm": 0.6360785426257929, "learning_rate": 0.00017999667755541106, "loss": 12.3827, "step": 8400 }, { "epoch": 0.4574680252938814, "grad_norm": 0.750727146607152, "learning_rate": 0.00017999138589886576, "loss": 12.4395, "step": 8401 }, { "epoch": 0.4575224792904644, "grad_norm": 0.6165410796762938, "learning_rate": 0.0001799860936202941, "loss": 12.4172, "step": 8402 }, { "epoch": 0.45757693328704746, "grad_norm": 0.664655318925348, "learning_rate": 0.00017998080071973712, "loss": 12.4702, "step": 8403 }, { "epoch": 0.45763138728363045, "grad_norm": 0.6866522603182079, "learning_rate": 0.00017997550719723603, "loss": 12.3881, "step": 8404 }, { "epoch": 0.45768584128021345, "grad_norm": 0.6207786327571557, "learning_rate": 0.000179970213052832, "loss": 12.5766, "step": 8405 }, { "epoch": 0.4577402952767965, "grad_norm": 0.6271054032970457, "learning_rate": 0.00017996491828656613, "loss": 12.4155, "step": 8406 }, { "epoch": 0.4577947492733795, "grad_norm": 0.788733076673143, "learning_rate": 0.00017995962289847972, "loss": 12.4865, "step": 8407 }, { "epoch": 0.4578492032699625, "grad_norm": 0.6451839502353852, "learning_rate": 0.00017995432688861383, "loss": 12.3857, "step": 8408 }, { "epoch": 0.4579036572665455, "grad_norm": 0.7922111769987087, "learning_rate": 0.00017994903025700967, "loss": 12.3343, "step": 8409 }, { "epoch": 0.4579581112631285, "grad_norm": 0.6291459222045038, "learning_rate": 0.00017994373300370847, "loss": 12.3404, "step": 8410 }, { "epoch": 0.4580125652597115, "grad_norm": 0.6257345055875672, "learning_rate": 0.0001799384351287514, "loss": 12.4469, "step": 8411 }, { "epoch": 0.45806701925629456, "grad_norm": 0.6765850397057749, "learning_rate": 0.00017993313663217962, "loss": 12.3578, "step": 8412 }, { "epoch": 0.45812147325287755, "grad_norm": 0.5731249213737087, "learning_rate": 0.0001799278375140344, "loss": 12.4651, "step": 8413 }, { "epoch": 0.45817592724946055, "grad_norm": 0.7520989664714446, "learning_rate": 0.00017992253777435687, "loss": 12.4347, "step": 8414 }, { "epoch": 0.4582303812460436, "grad_norm": 0.6548601863439822, "learning_rate": 0.00017991723741318834, "loss": 12.4428, "step": 8415 }, { "epoch": 0.4582848352426266, "grad_norm": 0.6742770381298215, "learning_rate": 0.0001799119364305699, "loss": 12.5476, "step": 8416 }, { "epoch": 0.4583392892392096, "grad_norm": 0.6649821067512534, "learning_rate": 0.00017990663482654292, "loss": 12.4915, "step": 8417 }, { "epoch": 0.45839374323579263, "grad_norm": 0.7172279380276158, "learning_rate": 0.0001799013326011485, "loss": 12.5574, "step": 8418 }, { "epoch": 0.4584481972323756, "grad_norm": 0.7547962084995847, "learning_rate": 0.00017989602975442793, "loss": 12.4675, "step": 8419 }, { "epoch": 0.4585026512289586, "grad_norm": 0.5859887915230573, "learning_rate": 0.00017989072628642243, "loss": 12.4824, "step": 8420 }, { "epoch": 0.45855710522554166, "grad_norm": 0.7088704213932157, "learning_rate": 0.00017988542219717326, "loss": 12.6085, "step": 8421 }, { "epoch": 0.45861155922212465, "grad_norm": 0.6530669807244944, "learning_rate": 0.00017988011748672162, "loss": 12.5474, "step": 8422 }, { "epoch": 0.45866601321870765, "grad_norm": 0.6012504516424516, "learning_rate": 0.00017987481215510882, "loss": 12.4466, "step": 8423 }, { "epoch": 0.4587204672152907, "grad_norm": 0.6321909432910274, "learning_rate": 0.00017986950620237609, "loss": 12.4394, "step": 8424 }, { "epoch": 0.4587749212118737, "grad_norm": 0.5955062332721126, "learning_rate": 0.00017986419962856464, "loss": 12.4106, "step": 8425 }, { "epoch": 0.4588293752084567, "grad_norm": 0.5834126337825684, "learning_rate": 0.00017985889243371582, "loss": 12.3952, "step": 8426 }, { "epoch": 0.45888382920503973, "grad_norm": 0.6306202539060862, "learning_rate": 0.00017985358461787082, "loss": 12.4875, "step": 8427 }, { "epoch": 0.4589382832016227, "grad_norm": 0.6098987214034494, "learning_rate": 0.00017984827618107102, "loss": 12.5203, "step": 8428 }, { "epoch": 0.4589927371982057, "grad_norm": 0.6298011928808441, "learning_rate": 0.00017984296712335757, "loss": 12.4491, "step": 8429 }, { "epoch": 0.45904719119478876, "grad_norm": 0.6316231234434623, "learning_rate": 0.00017983765744477185, "loss": 12.3603, "step": 8430 }, { "epoch": 0.45910164519137175, "grad_norm": 0.621224439891547, "learning_rate": 0.0001798323471453551, "loss": 12.5129, "step": 8431 }, { "epoch": 0.4591560991879548, "grad_norm": 0.6215508990689357, "learning_rate": 0.00017982703622514867, "loss": 12.4447, "step": 8432 }, { "epoch": 0.4592105531845378, "grad_norm": 0.6373085894914177, "learning_rate": 0.00017982172468419377, "loss": 12.3633, "step": 8433 }, { "epoch": 0.4592650071811208, "grad_norm": 0.5999470984578853, "learning_rate": 0.00017981641252253177, "loss": 12.3336, "step": 8434 }, { "epoch": 0.45931946117770384, "grad_norm": 0.6530057898257308, "learning_rate": 0.000179811099740204, "loss": 12.5767, "step": 8435 }, { "epoch": 0.45937391517428683, "grad_norm": 0.6462165261279232, "learning_rate": 0.00017980578633725166, "loss": 12.4114, "step": 8436 }, { "epoch": 0.4594283691708698, "grad_norm": 0.6665737763342021, "learning_rate": 0.0001798004723137162, "loss": 12.4862, "step": 8437 }, { "epoch": 0.45948282316745287, "grad_norm": 0.5854356255683455, "learning_rate": 0.00017979515766963888, "loss": 12.3757, "step": 8438 }, { "epoch": 0.45953727716403586, "grad_norm": 0.6429628084102862, "learning_rate": 0.00017978984240506101, "loss": 12.4527, "step": 8439 }, { "epoch": 0.45959173116061885, "grad_norm": 0.6224284086182738, "learning_rate": 0.00017978452652002392, "loss": 12.3866, "step": 8440 }, { "epoch": 0.4596461851572019, "grad_norm": 0.7085312267599722, "learning_rate": 0.00017977921001456902, "loss": 12.5016, "step": 8441 }, { "epoch": 0.4597006391537849, "grad_norm": 0.6625040975468844, "learning_rate": 0.0001797738928887376, "loss": 12.5766, "step": 8442 }, { "epoch": 0.4597550931503679, "grad_norm": 0.7112702932006277, "learning_rate": 0.000179768575142571, "loss": 12.4637, "step": 8443 }, { "epoch": 0.45980954714695094, "grad_norm": 0.6543475319952922, "learning_rate": 0.0001797632567761106, "loss": 12.4763, "step": 8444 }, { "epoch": 0.45986400114353393, "grad_norm": 0.6561221437463571, "learning_rate": 0.00017975793778939768, "loss": 12.2698, "step": 8445 }, { "epoch": 0.4599184551401169, "grad_norm": 0.6304476519096577, "learning_rate": 0.00017975261818247373, "loss": 12.4731, "step": 8446 }, { "epoch": 0.45997290913669997, "grad_norm": 0.6737388668545593, "learning_rate": 0.00017974729795538, "loss": 12.5866, "step": 8447 }, { "epoch": 0.46002736313328296, "grad_norm": 0.64445630023729, "learning_rate": 0.00017974197710815795, "loss": 12.3961, "step": 8448 }, { "epoch": 0.46008181712986596, "grad_norm": 0.570660478325581, "learning_rate": 0.0001797366556408489, "loss": 12.5089, "step": 8449 }, { "epoch": 0.460136271126449, "grad_norm": 0.7022380367884817, "learning_rate": 0.0001797313335534942, "loss": 12.3741, "step": 8450 }, { "epoch": 0.460190725123032, "grad_norm": 0.612890241562397, "learning_rate": 0.00017972601084613533, "loss": 12.4199, "step": 8451 }, { "epoch": 0.460245179119615, "grad_norm": 0.6156966291816607, "learning_rate": 0.0001797206875188136, "loss": 12.5195, "step": 8452 }, { "epoch": 0.46029963311619804, "grad_norm": 0.6462544029384523, "learning_rate": 0.00017971536357157044, "loss": 12.5192, "step": 8453 }, { "epoch": 0.46035408711278103, "grad_norm": 0.6231812162951743, "learning_rate": 0.00017971003900444727, "loss": 12.424, "step": 8454 }, { "epoch": 0.460408541109364, "grad_norm": 0.6855647949717323, "learning_rate": 0.00017970471381748544, "loss": 12.4334, "step": 8455 }, { "epoch": 0.46046299510594707, "grad_norm": 0.6311800182849413, "learning_rate": 0.0001796993880107264, "loss": 12.3238, "step": 8456 }, { "epoch": 0.46051744910253006, "grad_norm": 0.6337541676749251, "learning_rate": 0.00017969406158421157, "loss": 12.4585, "step": 8457 }, { "epoch": 0.46057190309911306, "grad_norm": 0.6208653234499459, "learning_rate": 0.0001796887345379823, "loss": 12.4279, "step": 8458 }, { "epoch": 0.4606263570956961, "grad_norm": 0.6330536996832912, "learning_rate": 0.00017968340687208012, "loss": 12.4568, "step": 8459 }, { "epoch": 0.4606808110922791, "grad_norm": 0.6090976625280496, "learning_rate": 0.0001796780785865464, "loss": 12.3771, "step": 8460 }, { "epoch": 0.4607352650888621, "grad_norm": 0.65006715249651, "learning_rate": 0.00017967274968142257, "loss": 12.5136, "step": 8461 }, { "epoch": 0.46078971908544514, "grad_norm": 0.6091210194797642, "learning_rate": 0.00017966742015675008, "loss": 12.4713, "step": 8462 }, { "epoch": 0.46084417308202813, "grad_norm": 0.5394330605868857, "learning_rate": 0.00017966209001257032, "loss": 12.3411, "step": 8463 }, { "epoch": 0.4608986270786112, "grad_norm": 0.6299230807970284, "learning_rate": 0.00017965675924892484, "loss": 12.3845, "step": 8464 }, { "epoch": 0.46095308107519417, "grad_norm": 0.6004734955181203, "learning_rate": 0.00017965142786585504, "loss": 12.2142, "step": 8465 }, { "epoch": 0.46100753507177716, "grad_norm": 0.6013644648463967, "learning_rate": 0.0001796460958634024, "loss": 12.4469, "step": 8466 }, { "epoch": 0.4610619890683602, "grad_norm": 0.6540201882122875, "learning_rate": 0.00017964076324160832, "loss": 12.4277, "step": 8467 }, { "epoch": 0.4611164430649432, "grad_norm": 0.6842417699779186, "learning_rate": 0.00017963543000051432, "loss": 12.4336, "step": 8468 }, { "epoch": 0.4611708970615262, "grad_norm": 0.6259991019467918, "learning_rate": 0.00017963009614016187, "loss": 12.4175, "step": 8469 }, { "epoch": 0.46122535105810925, "grad_norm": 0.7977560619398021, "learning_rate": 0.00017962476166059243, "loss": 12.6586, "step": 8470 }, { "epoch": 0.46127980505469224, "grad_norm": 0.7040812223013768, "learning_rate": 0.0001796194265618475, "loss": 12.4542, "step": 8471 }, { "epoch": 0.46133425905127523, "grad_norm": 0.6170972449417943, "learning_rate": 0.00017961409084396856, "loss": 12.2917, "step": 8472 }, { "epoch": 0.4613887130478583, "grad_norm": 0.5628385005146082, "learning_rate": 0.00017960875450699707, "loss": 12.4389, "step": 8473 }, { "epoch": 0.46144316704444127, "grad_norm": 0.7106466298776539, "learning_rate": 0.00017960341755097459, "loss": 12.4437, "step": 8474 }, { "epoch": 0.46149762104102426, "grad_norm": 0.6325480250324459, "learning_rate": 0.00017959807997594256, "loss": 12.3271, "step": 8475 }, { "epoch": 0.4615520750376073, "grad_norm": 0.6688376322815393, "learning_rate": 0.00017959274178194252, "loss": 12.5363, "step": 8476 }, { "epoch": 0.4616065290341903, "grad_norm": 0.6390089606861243, "learning_rate": 0.00017958740296901597, "loss": 12.5253, "step": 8477 }, { "epoch": 0.4616609830307733, "grad_norm": 0.6483652623580444, "learning_rate": 0.00017958206353720443, "loss": 12.3701, "step": 8478 }, { "epoch": 0.46171543702735635, "grad_norm": 0.5747024925879572, "learning_rate": 0.00017957672348654943, "loss": 12.3712, "step": 8479 }, { "epoch": 0.46176989102393934, "grad_norm": 0.6407691144277222, "learning_rate": 0.00017957138281709246, "loss": 12.5068, "step": 8480 }, { "epoch": 0.46182434502052233, "grad_norm": 0.5804922627948418, "learning_rate": 0.00017956604152887507, "loss": 12.4644, "step": 8481 }, { "epoch": 0.4618787990171054, "grad_norm": 0.7734467267607033, "learning_rate": 0.00017956069962193886, "loss": 12.5521, "step": 8482 }, { "epoch": 0.46193325301368837, "grad_norm": 0.5895896860218451, "learning_rate": 0.00017955535709632522, "loss": 12.441, "step": 8483 }, { "epoch": 0.46198770701027136, "grad_norm": 0.7151105273525564, "learning_rate": 0.00017955001395207585, "loss": 12.3842, "step": 8484 }, { "epoch": 0.4620421610068544, "grad_norm": 0.605451804966123, "learning_rate": 0.0001795446701892322, "loss": 12.3717, "step": 8485 }, { "epoch": 0.4620966150034374, "grad_norm": 0.6796432599907761, "learning_rate": 0.00017953932580783586, "loss": 12.5606, "step": 8486 }, { "epoch": 0.4621510690000204, "grad_norm": 0.6543516269832156, "learning_rate": 0.00017953398080792837, "loss": 12.3228, "step": 8487 }, { "epoch": 0.46220552299660345, "grad_norm": 0.7231693338943618, "learning_rate": 0.00017952863518955133, "loss": 12.561, "step": 8488 }, { "epoch": 0.46225997699318644, "grad_norm": 0.6967078738341969, "learning_rate": 0.0001795232889527463, "loss": 12.4842, "step": 8489 }, { "epoch": 0.46231443098976943, "grad_norm": 0.6900231716925654, "learning_rate": 0.0001795179420975548, "loss": 12.5378, "step": 8490 }, { "epoch": 0.4623688849863525, "grad_norm": 0.6565505110739019, "learning_rate": 0.00017951259462401845, "loss": 12.3519, "step": 8491 }, { "epoch": 0.4624233389829355, "grad_norm": 0.5804690618308558, "learning_rate": 0.00017950724653217884, "loss": 12.4108, "step": 8492 }, { "epoch": 0.46247779297951846, "grad_norm": 0.695013601610202, "learning_rate": 0.00017950189782207755, "loss": 12.4763, "step": 8493 }, { "epoch": 0.4625322469761015, "grad_norm": 0.6476402393977978, "learning_rate": 0.00017949654849375616, "loss": 12.5224, "step": 8494 }, { "epoch": 0.4625867009726845, "grad_norm": 0.650412323007607, "learning_rate": 0.00017949119854725632, "loss": 12.5714, "step": 8495 }, { "epoch": 0.4626411549692675, "grad_norm": 0.6177719603875415, "learning_rate": 0.00017948584798261954, "loss": 12.4933, "step": 8496 }, { "epoch": 0.46269560896585055, "grad_norm": 0.6140868763191671, "learning_rate": 0.0001794804967998875, "loss": 12.4619, "step": 8497 }, { "epoch": 0.46275006296243354, "grad_norm": 0.6042822927413228, "learning_rate": 0.00017947514499910177, "loss": 12.3968, "step": 8498 }, { "epoch": 0.4628045169590166, "grad_norm": 0.614738493564977, "learning_rate": 0.00017946979258030399, "loss": 12.4828, "step": 8499 }, { "epoch": 0.4628589709555996, "grad_norm": 0.5737063644687317, "learning_rate": 0.00017946443954353577, "loss": 12.3964, "step": 8500 }, { "epoch": 0.4629134249521826, "grad_norm": 0.6287436734117607, "learning_rate": 0.00017945908588883877, "loss": 12.5085, "step": 8501 }, { "epoch": 0.4629678789487656, "grad_norm": 0.6167967595743099, "learning_rate": 0.00017945373161625455, "loss": 12.3852, "step": 8502 }, { "epoch": 0.4630223329453486, "grad_norm": 0.6064207477081835, "learning_rate": 0.00017944837672582485, "loss": 12.3263, "step": 8503 }, { "epoch": 0.4630767869419316, "grad_norm": 0.6333852371143583, "learning_rate": 0.0001794430212175912, "loss": 12.3366, "step": 8504 }, { "epoch": 0.46313124093851465, "grad_norm": 0.6246520185592767, "learning_rate": 0.0001794376650915953, "loss": 12.3339, "step": 8505 }, { "epoch": 0.46318569493509765, "grad_norm": 0.6553154091068601, "learning_rate": 0.00017943230834787882, "loss": 12.3773, "step": 8506 }, { "epoch": 0.46324014893168064, "grad_norm": 0.6780757185364424, "learning_rate": 0.00017942695098648335, "loss": 12.4337, "step": 8507 }, { "epoch": 0.4632946029282637, "grad_norm": 0.5992151156206015, "learning_rate": 0.00017942159300745063, "loss": 12.4676, "step": 8508 }, { "epoch": 0.4633490569248467, "grad_norm": 0.7139367597885157, "learning_rate": 0.00017941623441082225, "loss": 12.4756, "step": 8509 }, { "epoch": 0.4634035109214297, "grad_norm": 0.6074562947215172, "learning_rate": 0.00017941087519663995, "loss": 12.3758, "step": 8510 }, { "epoch": 0.4634579649180127, "grad_norm": 0.5919073671557712, "learning_rate": 0.00017940551536494535, "loss": 12.4662, "step": 8511 }, { "epoch": 0.4635124189145957, "grad_norm": 0.6288349721285509, "learning_rate": 0.00017940015491578012, "loss": 12.4327, "step": 8512 }, { "epoch": 0.4635668729111787, "grad_norm": 0.6377686064725293, "learning_rate": 0.000179394793849186, "loss": 12.4457, "step": 8513 }, { "epoch": 0.46362132690776175, "grad_norm": 0.6331751229664899, "learning_rate": 0.00017938943216520462, "loss": 12.4591, "step": 8514 }, { "epoch": 0.46367578090434475, "grad_norm": 0.6205814254067495, "learning_rate": 0.00017938406986387775, "loss": 12.4356, "step": 8515 }, { "epoch": 0.46373023490092774, "grad_norm": 0.6120530025858775, "learning_rate": 0.000179378706945247, "loss": 12.465, "step": 8516 }, { "epoch": 0.4637846888975108, "grad_norm": 0.6576904060379765, "learning_rate": 0.00017937334340935413, "loss": 12.3719, "step": 8517 }, { "epoch": 0.4638391428940938, "grad_norm": 0.6302025635351302, "learning_rate": 0.0001793679792562408, "loss": 12.3565, "step": 8518 }, { "epoch": 0.4638935968906768, "grad_norm": 0.6226271009698168, "learning_rate": 0.00017936261448594877, "loss": 12.476, "step": 8519 }, { "epoch": 0.4639480508872598, "grad_norm": 0.7097366292174755, "learning_rate": 0.00017935724909851978, "loss": 12.4391, "step": 8520 }, { "epoch": 0.4640025048838428, "grad_norm": 0.6343089219332183, "learning_rate": 0.00017935188309399546, "loss": 12.411, "step": 8521 }, { "epoch": 0.4640569588804258, "grad_norm": 0.6863122806284503, "learning_rate": 0.00017934651647241764, "loss": 12.5285, "step": 8522 }, { "epoch": 0.46411141287700886, "grad_norm": 0.6400060317241383, "learning_rate": 0.00017934114923382798, "loss": 12.4462, "step": 8523 }, { "epoch": 0.46416586687359185, "grad_norm": 0.614577460142748, "learning_rate": 0.00017933578137826822, "loss": 12.4177, "step": 8524 }, { "epoch": 0.46422032087017484, "grad_norm": 0.6379323169224547, "learning_rate": 0.00017933041290578013, "loss": 12.45, "step": 8525 }, { "epoch": 0.4642747748667579, "grad_norm": 0.7842891853867016, "learning_rate": 0.00017932504381640545, "loss": 12.3224, "step": 8526 }, { "epoch": 0.4643292288633409, "grad_norm": 0.6253337426516982, "learning_rate": 0.0001793196741101859, "loss": 12.4774, "step": 8527 }, { "epoch": 0.4643836828599239, "grad_norm": 0.7597331992933967, "learning_rate": 0.00017931430378716328, "loss": 12.5435, "step": 8528 }, { "epoch": 0.4644381368565069, "grad_norm": 0.614380364429936, "learning_rate": 0.00017930893284737932, "loss": 12.3992, "step": 8529 }, { "epoch": 0.4644925908530899, "grad_norm": 0.6301733598215885, "learning_rate": 0.00017930356129087585, "loss": 12.4247, "step": 8530 }, { "epoch": 0.46454704484967296, "grad_norm": 0.7021075766021644, "learning_rate": 0.00017929818911769453, "loss": 12.5615, "step": 8531 }, { "epoch": 0.46460149884625596, "grad_norm": 0.6635152504671655, "learning_rate": 0.00017929281632787723, "loss": 12.4488, "step": 8532 }, { "epoch": 0.46465595284283895, "grad_norm": 0.6959204379962824, "learning_rate": 0.00017928744292146568, "loss": 12.3817, "step": 8533 }, { "epoch": 0.464710406839422, "grad_norm": 0.5937204435815098, "learning_rate": 0.00017928206889850169, "loss": 12.4555, "step": 8534 }, { "epoch": 0.464764860836005, "grad_norm": 0.7572250430526706, "learning_rate": 0.00017927669425902703, "loss": 12.4628, "step": 8535 }, { "epoch": 0.464819314832588, "grad_norm": 0.6405099518133247, "learning_rate": 0.00017927131900308347, "loss": 12.3633, "step": 8536 }, { "epoch": 0.46487376882917103, "grad_norm": 0.6410726229961785, "learning_rate": 0.0001792659431307129, "loss": 12.4437, "step": 8537 }, { "epoch": 0.464928222825754, "grad_norm": 0.6256073298072141, "learning_rate": 0.000179260566641957, "loss": 12.3884, "step": 8538 }, { "epoch": 0.464982676822337, "grad_norm": 0.620457959466047, "learning_rate": 0.00017925518953685766, "loss": 12.3345, "step": 8539 }, { "epoch": 0.46503713081892006, "grad_norm": 0.6852978455402273, "learning_rate": 0.0001792498118154567, "loss": 12.5612, "step": 8540 }, { "epoch": 0.46509158481550306, "grad_norm": 0.6724753812631421, "learning_rate": 0.0001792444334777959, "loss": 12.4961, "step": 8541 }, { "epoch": 0.46514603881208605, "grad_norm": 0.6879903993263202, "learning_rate": 0.0001792390545239171, "loss": 12.5168, "step": 8542 }, { "epoch": 0.4652004928086691, "grad_norm": 0.5520030988039244, "learning_rate": 0.0001792336749538621, "loss": 12.4215, "step": 8543 }, { "epoch": 0.4652549468052521, "grad_norm": 0.5474435428367739, "learning_rate": 0.00017922829476767278, "loss": 12.3678, "step": 8544 }, { "epoch": 0.4653094008018351, "grad_norm": 0.7329959158129233, "learning_rate": 0.00017922291396539093, "loss": 12.3445, "step": 8545 }, { "epoch": 0.46536385479841813, "grad_norm": 0.6512212767290448, "learning_rate": 0.00017921753254705844, "loss": 12.5902, "step": 8546 }, { "epoch": 0.4654183087950011, "grad_norm": 0.5896209592313086, "learning_rate": 0.0001792121505127171, "loss": 12.4124, "step": 8547 }, { "epoch": 0.4654727627915841, "grad_norm": 0.5841614441736784, "learning_rate": 0.00017920676786240883, "loss": 12.3758, "step": 8548 }, { "epoch": 0.46552721678816716, "grad_norm": 0.5898442729547501, "learning_rate": 0.00017920138459617544, "loss": 12.4799, "step": 8549 }, { "epoch": 0.46558167078475016, "grad_norm": 0.5748459449326601, "learning_rate": 0.00017919600071405881, "loss": 12.3012, "step": 8550 }, { "epoch": 0.46563612478133315, "grad_norm": 0.608117050757133, "learning_rate": 0.0001791906162161008, "loss": 12.4454, "step": 8551 }, { "epoch": 0.4656905787779162, "grad_norm": 0.6672970333611582, "learning_rate": 0.00017918523110234324, "loss": 12.3224, "step": 8552 }, { "epoch": 0.4657450327744992, "grad_norm": 0.7520661230221706, "learning_rate": 0.00017917984537282807, "loss": 12.354, "step": 8553 }, { "epoch": 0.4657994867710822, "grad_norm": 0.6302720683788045, "learning_rate": 0.00017917445902759714, "loss": 12.3604, "step": 8554 }, { "epoch": 0.46585394076766523, "grad_norm": 0.6579941205866369, "learning_rate": 0.00017916907206669237, "loss": 12.5587, "step": 8555 }, { "epoch": 0.4659083947642482, "grad_norm": 0.6165873606250319, "learning_rate": 0.00017916368449015556, "loss": 12.431, "step": 8556 }, { "epoch": 0.4659628487608312, "grad_norm": 0.7295377109985214, "learning_rate": 0.0001791582962980287, "loss": 12.3715, "step": 8557 }, { "epoch": 0.46601730275741426, "grad_norm": 0.6578045921999068, "learning_rate": 0.00017915290749035364, "loss": 12.3832, "step": 8558 }, { "epoch": 0.46607175675399726, "grad_norm": 0.6093581051436394, "learning_rate": 0.0001791475180671723, "loss": 12.5183, "step": 8559 }, { "epoch": 0.46612621075058025, "grad_norm": 0.6087113495662672, "learning_rate": 0.0001791421280285266, "loss": 12.5348, "step": 8560 }, { "epoch": 0.4661806647471633, "grad_norm": 0.6808914595686044, "learning_rate": 0.00017913673737445844, "loss": 12.5032, "step": 8561 }, { "epoch": 0.4662351187437463, "grad_norm": 0.6960835995102816, "learning_rate": 0.0001791313461050097, "loss": 12.5811, "step": 8562 }, { "epoch": 0.4662895727403293, "grad_norm": 0.7637939174979597, "learning_rate": 0.0001791259542202224, "loss": 12.4642, "step": 8563 }, { "epoch": 0.46634402673691233, "grad_norm": 0.6495379560707983, "learning_rate": 0.00017912056172013837, "loss": 12.4818, "step": 8564 }, { "epoch": 0.4663984807334953, "grad_norm": 0.644911863272382, "learning_rate": 0.0001791151686047996, "loss": 12.497, "step": 8565 }, { "epoch": 0.4664529347300784, "grad_norm": 0.7269018585601411, "learning_rate": 0.00017910977487424801, "loss": 12.5889, "step": 8566 }, { "epoch": 0.46650738872666137, "grad_norm": 0.5786874526945133, "learning_rate": 0.00017910438052852557, "loss": 12.2868, "step": 8567 }, { "epoch": 0.46656184272324436, "grad_norm": 0.6627592741179691, "learning_rate": 0.00017909898556767414, "loss": 12.4585, "step": 8568 }, { "epoch": 0.4666162967198274, "grad_norm": 0.693478438138713, "learning_rate": 0.00017909358999173577, "loss": 12.3521, "step": 8569 }, { "epoch": 0.4666707507164104, "grad_norm": 0.6801219655549972, "learning_rate": 0.0001790881938007524, "loss": 12.4614, "step": 8570 }, { "epoch": 0.4667252047129934, "grad_norm": 0.7301463752425134, "learning_rate": 0.00017908279699476592, "loss": 12.3549, "step": 8571 }, { "epoch": 0.46677965870957644, "grad_norm": 0.6346179560399473, "learning_rate": 0.00017907739957381838, "loss": 12.3989, "step": 8572 }, { "epoch": 0.46683411270615943, "grad_norm": 0.686851017492308, "learning_rate": 0.00017907200153795171, "loss": 12.3688, "step": 8573 }, { "epoch": 0.4668885667027424, "grad_norm": 0.6605494864320264, "learning_rate": 0.0001790666028872079, "loss": 12.4429, "step": 8574 }, { "epoch": 0.4669430206993255, "grad_norm": 0.7145770417283764, "learning_rate": 0.00017906120362162894, "loss": 12.3289, "step": 8575 }, { "epoch": 0.46699747469590847, "grad_norm": 0.6897110873551012, "learning_rate": 0.00017905580374125678, "loss": 12.4011, "step": 8576 }, { "epoch": 0.46705192869249146, "grad_norm": 0.7216304975696103, "learning_rate": 0.00017905040324613344, "loss": 12.472, "step": 8577 }, { "epoch": 0.4671063826890745, "grad_norm": 0.6970788524101713, "learning_rate": 0.00017904500213630092, "loss": 12.507, "step": 8578 }, { "epoch": 0.4671608366856575, "grad_norm": 0.631846296329978, "learning_rate": 0.0001790396004118012, "loss": 12.3616, "step": 8579 }, { "epoch": 0.4672152906822405, "grad_norm": 0.6871599189413196, "learning_rate": 0.0001790341980726763, "loss": 12.3542, "step": 8580 }, { "epoch": 0.46726974467882354, "grad_norm": 0.6227620922656812, "learning_rate": 0.0001790287951189682, "loss": 12.4218, "step": 8581 }, { "epoch": 0.46732419867540653, "grad_norm": 0.6327965646327595, "learning_rate": 0.00017902339155071896, "loss": 12.5839, "step": 8582 }, { "epoch": 0.4673786526719895, "grad_norm": 0.5943442388995044, "learning_rate": 0.00017901798736797054, "loss": 12.2484, "step": 8583 }, { "epoch": 0.4674331066685726, "grad_norm": 0.6691449075498682, "learning_rate": 0.00017901258257076503, "loss": 12.5749, "step": 8584 }, { "epoch": 0.46748756066515557, "grad_norm": 0.6469442060100173, "learning_rate": 0.00017900717715914444, "loss": 12.4268, "step": 8585 }, { "epoch": 0.46754201466173856, "grad_norm": 0.7164673653306719, "learning_rate": 0.00017900177113315075, "loss": 12.3637, "step": 8586 }, { "epoch": 0.4675964686583216, "grad_norm": 0.5329022400651572, "learning_rate": 0.00017899636449282606, "loss": 12.3726, "step": 8587 }, { "epoch": 0.4676509226549046, "grad_norm": 0.7268214005708902, "learning_rate": 0.0001789909572382124, "loss": 12.4756, "step": 8588 }, { "epoch": 0.4677053766514876, "grad_norm": 0.7438340959520051, "learning_rate": 0.0001789855493693518, "loss": 12.4556, "step": 8589 }, { "epoch": 0.46775983064807064, "grad_norm": 0.5711552451773573, "learning_rate": 0.00017898014088628633, "loss": 12.442, "step": 8590 }, { "epoch": 0.46781428464465363, "grad_norm": 0.760380130815662, "learning_rate": 0.00017897473178905805, "loss": 12.4894, "step": 8591 }, { "epoch": 0.4678687386412366, "grad_norm": 0.6237848382862727, "learning_rate": 0.00017896932207770895, "loss": 12.3818, "step": 8592 }, { "epoch": 0.4679231926378197, "grad_norm": 0.6191436468502493, "learning_rate": 0.00017896391175228123, "loss": 12.4273, "step": 8593 }, { "epoch": 0.46797764663440267, "grad_norm": 0.6690538751938576, "learning_rate": 0.00017895850081281687, "loss": 12.4312, "step": 8594 }, { "epoch": 0.46803210063098566, "grad_norm": 0.6807126771202414, "learning_rate": 0.00017895308925935794, "loss": 12.5251, "step": 8595 }, { "epoch": 0.4680865546275687, "grad_norm": 0.6639063805134303, "learning_rate": 0.00017894767709194658, "loss": 12.4868, "step": 8596 }, { "epoch": 0.4681410086241517, "grad_norm": 0.7908769442385336, "learning_rate": 0.00017894226431062481, "loss": 12.5073, "step": 8597 }, { "epoch": 0.46819546262073475, "grad_norm": 0.665810749723039, "learning_rate": 0.00017893685091543478, "loss": 12.502, "step": 8598 }, { "epoch": 0.46824991661731774, "grad_norm": 0.7095793243118013, "learning_rate": 0.00017893143690641855, "loss": 12.4091, "step": 8599 }, { "epoch": 0.46830437061390073, "grad_norm": 0.6582534562533506, "learning_rate": 0.00017892602228361824, "loss": 12.3877, "step": 8600 }, { "epoch": 0.4683588246104838, "grad_norm": 0.6099299580757082, "learning_rate": 0.0001789206070470759, "loss": 12.3216, "step": 8601 }, { "epoch": 0.4684132786070668, "grad_norm": 0.6673127073946039, "learning_rate": 0.00017891519119683376, "loss": 12.5892, "step": 8602 }, { "epoch": 0.46846773260364977, "grad_norm": 0.6467498612037779, "learning_rate": 0.0001789097747329338, "loss": 12.4686, "step": 8603 }, { "epoch": 0.4685221866002328, "grad_norm": 0.6261087762490277, "learning_rate": 0.00017890435765541821, "loss": 12.5421, "step": 8604 }, { "epoch": 0.4685766405968158, "grad_norm": 0.6264196217535764, "learning_rate": 0.00017889893996432914, "loss": 12.22, "step": 8605 }, { "epoch": 0.4686310945933988, "grad_norm": 0.6518593103011723, "learning_rate": 0.00017889352165970866, "loss": 12.4012, "step": 8606 }, { "epoch": 0.46868554858998185, "grad_norm": 0.5694633740086411, "learning_rate": 0.0001788881027415989, "loss": 12.45, "step": 8607 }, { "epoch": 0.46874000258656484, "grad_norm": 0.7154487883983486, "learning_rate": 0.00017888268321004203, "loss": 12.4673, "step": 8608 }, { "epoch": 0.46879445658314783, "grad_norm": 0.6391015557412403, "learning_rate": 0.00017887726306508022, "loss": 12.515, "step": 8609 }, { "epoch": 0.4688489105797309, "grad_norm": 0.6805762056289465, "learning_rate": 0.00017887184230675556, "loss": 12.6188, "step": 8610 }, { "epoch": 0.4689033645763139, "grad_norm": 0.6699372502313612, "learning_rate": 0.00017886642093511025, "loss": 12.5241, "step": 8611 }, { "epoch": 0.46895781857289687, "grad_norm": 0.6015103170519449, "learning_rate": 0.0001788609989501864, "loss": 12.4285, "step": 8612 }, { "epoch": 0.4690122725694799, "grad_norm": 0.6768216487927327, "learning_rate": 0.0001788555763520262, "loss": 12.3188, "step": 8613 }, { "epoch": 0.4690667265660629, "grad_norm": 0.6785633981073629, "learning_rate": 0.0001788501531406718, "loss": 12.3742, "step": 8614 }, { "epoch": 0.4691211805626459, "grad_norm": 0.6077029256627378, "learning_rate": 0.00017884472931616543, "loss": 12.3687, "step": 8615 }, { "epoch": 0.46917563455922895, "grad_norm": 0.6621690317050741, "learning_rate": 0.0001788393048785492, "loss": 12.3839, "step": 8616 }, { "epoch": 0.46923008855581194, "grad_norm": 0.6656474743344936, "learning_rate": 0.0001788338798278653, "loss": 12.3856, "step": 8617 }, { "epoch": 0.46928454255239493, "grad_norm": 0.66881792016721, "learning_rate": 0.00017882845416415595, "loss": 12.5676, "step": 8618 }, { "epoch": 0.469338996548978, "grad_norm": 0.767602290036045, "learning_rate": 0.0001788230278874633, "loss": 12.3283, "step": 8619 }, { "epoch": 0.469393450545561, "grad_norm": 0.7045073223901752, "learning_rate": 0.00017881760099782958, "loss": 12.5283, "step": 8620 }, { "epoch": 0.46944790454214397, "grad_norm": 0.6515931879334268, "learning_rate": 0.00017881217349529697, "loss": 12.4897, "step": 8621 }, { "epoch": 0.469502358538727, "grad_norm": 0.6473209646368532, "learning_rate": 0.0001788067453799077, "loss": 12.4732, "step": 8622 }, { "epoch": 0.46955681253531, "grad_norm": 0.7198939445665117, "learning_rate": 0.00017880131665170393, "loss": 12.465, "step": 8623 }, { "epoch": 0.469611266531893, "grad_norm": 0.6935062918663318, "learning_rate": 0.00017879588731072794, "loss": 12.4833, "step": 8624 }, { "epoch": 0.46966572052847605, "grad_norm": 0.6226308375788506, "learning_rate": 0.0001787904573570219, "loss": 12.5073, "step": 8625 }, { "epoch": 0.46972017452505904, "grad_norm": 0.6849934242523069, "learning_rate": 0.00017878502679062806, "loss": 12.3697, "step": 8626 }, { "epoch": 0.46977462852164203, "grad_norm": 0.6070916164445785, "learning_rate": 0.00017877959561158862, "loss": 12.4153, "step": 8627 }, { "epoch": 0.4698290825182251, "grad_norm": 0.6603216087072952, "learning_rate": 0.00017877416381994584, "loss": 12.4331, "step": 8628 }, { "epoch": 0.4698835365148081, "grad_norm": 0.5860755470881435, "learning_rate": 0.00017876873141574198, "loss": 12.4932, "step": 8629 }, { "epoch": 0.46993799051139107, "grad_norm": 0.607831027666431, "learning_rate": 0.00017876329839901922, "loss": 12.4657, "step": 8630 }, { "epoch": 0.4699924445079741, "grad_norm": 0.6506427431137368, "learning_rate": 0.00017875786476981986, "loss": 12.4483, "step": 8631 }, { "epoch": 0.4700468985045571, "grad_norm": 0.6116981851750112, "learning_rate": 0.0001787524305281861, "loss": 12.4156, "step": 8632 }, { "epoch": 0.47010135250114016, "grad_norm": 0.6386453334944827, "learning_rate": 0.00017874699567416028, "loss": 12.4169, "step": 8633 }, { "epoch": 0.47015580649772315, "grad_norm": 0.5990352000029187, "learning_rate": 0.0001787415602077846, "loss": 12.4613, "step": 8634 }, { "epoch": 0.47021026049430614, "grad_norm": 0.629306239232886, "learning_rate": 0.00017873612412910134, "loss": 12.3987, "step": 8635 }, { "epoch": 0.4702647144908892, "grad_norm": 0.6362931725753519, "learning_rate": 0.00017873068743815278, "loss": 12.3794, "step": 8636 }, { "epoch": 0.4703191684874722, "grad_norm": 0.6621394092121959, "learning_rate": 0.00017872525013498122, "loss": 12.4266, "step": 8637 }, { "epoch": 0.4703736224840552, "grad_norm": 0.6522150718394919, "learning_rate": 0.00017871981221962886, "loss": 12.4786, "step": 8638 }, { "epoch": 0.4704280764806382, "grad_norm": 0.6182797125326692, "learning_rate": 0.00017871437369213806, "loss": 12.3953, "step": 8639 }, { "epoch": 0.4704825304772212, "grad_norm": 0.741453587509592, "learning_rate": 0.0001787089345525511, "loss": 12.4706, "step": 8640 }, { "epoch": 0.4705369844738042, "grad_norm": 0.6616730918075981, "learning_rate": 0.00017870349480091023, "loss": 12.407, "step": 8641 }, { "epoch": 0.47059143847038726, "grad_norm": 0.6721799261869218, "learning_rate": 0.00017869805443725782, "loss": 12.4711, "step": 8642 }, { "epoch": 0.47064589246697025, "grad_norm": 0.6905179211399968, "learning_rate": 0.00017869261346163616, "loss": 12.4337, "step": 8643 }, { "epoch": 0.47070034646355324, "grad_norm": 0.5665557718261672, "learning_rate": 0.0001786871718740875, "loss": 12.3544, "step": 8644 }, { "epoch": 0.4707548004601363, "grad_norm": 0.6720406366448736, "learning_rate": 0.0001786817296746542, "loss": 12.3461, "step": 8645 }, { "epoch": 0.4708092544567193, "grad_norm": 0.7474814381158854, "learning_rate": 0.00017867628686337857, "loss": 12.5816, "step": 8646 }, { "epoch": 0.4708637084533023, "grad_norm": 0.6518981831942263, "learning_rate": 0.00017867084344030295, "loss": 12.4326, "step": 8647 }, { "epoch": 0.4709181624498853, "grad_norm": 0.6892336094441212, "learning_rate": 0.00017866539940546966, "loss": 12.4625, "step": 8648 }, { "epoch": 0.4709726164464683, "grad_norm": 0.68022474893785, "learning_rate": 0.00017865995475892105, "loss": 12.3368, "step": 8649 }, { "epoch": 0.4710270704430513, "grad_norm": 0.7638458776577514, "learning_rate": 0.00017865450950069943, "loss": 12.5662, "step": 8650 }, { "epoch": 0.47108152443963436, "grad_norm": 0.6359881616953548, "learning_rate": 0.00017864906363084714, "loss": 12.3063, "step": 8651 }, { "epoch": 0.47113597843621735, "grad_norm": 0.6350355813886267, "learning_rate": 0.00017864361714940653, "loss": 12.4142, "step": 8652 }, { "epoch": 0.47119043243280034, "grad_norm": 0.6276882384257606, "learning_rate": 0.00017863817005642002, "loss": 12.3826, "step": 8653 }, { "epoch": 0.4712448864293834, "grad_norm": 0.6387207175067731, "learning_rate": 0.00017863272235192987, "loss": 12.4255, "step": 8654 }, { "epoch": 0.4712993404259664, "grad_norm": 0.6642427666470622, "learning_rate": 0.00017862727403597848, "loss": 12.329, "step": 8655 }, { "epoch": 0.4713537944225494, "grad_norm": 0.7565854838138347, "learning_rate": 0.00017862182510860827, "loss": 12.6547, "step": 8656 }, { "epoch": 0.4714082484191324, "grad_norm": 0.655967108899123, "learning_rate": 0.00017861637556986152, "loss": 12.4853, "step": 8657 }, { "epoch": 0.4714627024157154, "grad_norm": 0.5841788295309343, "learning_rate": 0.00017861092541978063, "loss": 12.4258, "step": 8658 }, { "epoch": 0.4715171564122984, "grad_norm": 0.6802712400530733, "learning_rate": 0.000178605474658408, "loss": 12.164, "step": 8659 }, { "epoch": 0.47157161040888146, "grad_norm": 0.5812630193366193, "learning_rate": 0.00017860002328578606, "loss": 12.4212, "step": 8660 }, { "epoch": 0.47162606440546445, "grad_norm": 0.7439484017953842, "learning_rate": 0.00017859457130195715, "loss": 12.5639, "step": 8661 }, { "epoch": 0.47168051840204744, "grad_norm": 0.6444918782437785, "learning_rate": 0.00017858911870696366, "loss": 12.4413, "step": 8662 }, { "epoch": 0.4717349723986305, "grad_norm": 0.6042122509649913, "learning_rate": 0.00017858366550084801, "loss": 12.2887, "step": 8663 }, { "epoch": 0.4717894263952135, "grad_norm": 0.719636542138421, "learning_rate": 0.00017857821168365258, "loss": 12.3589, "step": 8664 }, { "epoch": 0.47184388039179653, "grad_norm": 0.6004943446157107, "learning_rate": 0.00017857275725541983, "loss": 12.55, "step": 8665 }, { "epoch": 0.4718983343883795, "grad_norm": 0.6690559070467175, "learning_rate": 0.00017856730221619212, "loss": 12.3805, "step": 8666 }, { "epoch": 0.4719527883849625, "grad_norm": 0.6520489856594532, "learning_rate": 0.00017856184656601189, "loss": 12.3617, "step": 8667 }, { "epoch": 0.47200724238154557, "grad_norm": 0.5842016736999017, "learning_rate": 0.00017855639030492156, "loss": 12.3109, "step": 8668 }, { "epoch": 0.47206169637812856, "grad_norm": 0.6875535840151342, "learning_rate": 0.0001785509334329636, "loss": 12.4851, "step": 8669 }, { "epoch": 0.47211615037471155, "grad_norm": 0.6728011912417281, "learning_rate": 0.0001785454759501804, "loss": 12.4484, "step": 8670 }, { "epoch": 0.4721706043712946, "grad_norm": 0.6394208019399653, "learning_rate": 0.0001785400178566144, "loss": 12.5169, "step": 8671 }, { "epoch": 0.4722250583678776, "grad_norm": 0.7233003380113386, "learning_rate": 0.00017853455915230803, "loss": 12.5152, "step": 8672 }, { "epoch": 0.4722795123644606, "grad_norm": 0.6577078659945517, "learning_rate": 0.00017852909983730376, "loss": 12.2267, "step": 8673 }, { "epoch": 0.47233396636104363, "grad_norm": 0.6681012762094728, "learning_rate": 0.00017852363991164406, "loss": 12.4857, "step": 8674 }, { "epoch": 0.4723884203576266, "grad_norm": 0.6492669412836646, "learning_rate": 0.00017851817937537137, "loss": 12.3342, "step": 8675 }, { "epoch": 0.4724428743542096, "grad_norm": 0.6258051004677605, "learning_rate": 0.00017851271822852817, "loss": 12.3255, "step": 8676 }, { "epoch": 0.47249732835079267, "grad_norm": 0.6177766546154925, "learning_rate": 0.00017850725647115684, "loss": 12.453, "step": 8677 }, { "epoch": 0.47255178234737566, "grad_norm": 0.5976114556341114, "learning_rate": 0.00017850179410329998, "loss": 12.354, "step": 8678 }, { "epoch": 0.47260623634395865, "grad_norm": 0.6943464826461088, "learning_rate": 0.00017849633112499997, "loss": 12.4133, "step": 8679 }, { "epoch": 0.4726606903405417, "grad_norm": 0.6629539463742055, "learning_rate": 0.00017849086753629934, "loss": 12.5159, "step": 8680 }, { "epoch": 0.4727151443371247, "grad_norm": 0.5857907651074736, "learning_rate": 0.00017848540333724054, "loss": 12.3979, "step": 8681 }, { "epoch": 0.4727695983337077, "grad_norm": 0.66682024503514, "learning_rate": 0.0001784799385278661, "loss": 12.3639, "step": 8682 }, { "epoch": 0.47282405233029073, "grad_norm": 0.6785937455203835, "learning_rate": 0.0001784744731082185, "loss": 12.7207, "step": 8683 }, { "epoch": 0.4728785063268737, "grad_norm": 0.6830103773165097, "learning_rate": 0.00017846900707834022, "loss": 12.4353, "step": 8684 }, { "epoch": 0.4729329603234567, "grad_norm": 0.6678547876212807, "learning_rate": 0.0001784635404382738, "loss": 12.5259, "step": 8685 }, { "epoch": 0.47298741432003977, "grad_norm": 0.6663746763710154, "learning_rate": 0.00017845807318806175, "loss": 12.3938, "step": 8686 }, { "epoch": 0.47304186831662276, "grad_norm": 0.6361131616374217, "learning_rate": 0.00017845260532774654, "loss": 12.271, "step": 8687 }, { "epoch": 0.47309632231320575, "grad_norm": 0.7273696714957184, "learning_rate": 0.00017844713685737069, "loss": 12.2901, "step": 8688 }, { "epoch": 0.4731507763097888, "grad_norm": 0.6327991357395276, "learning_rate": 0.00017844166777697678, "loss": 12.3317, "step": 8689 }, { "epoch": 0.4732052303063718, "grad_norm": 0.6234225324962389, "learning_rate": 0.0001784361980866073, "loss": 12.4677, "step": 8690 }, { "epoch": 0.4732596843029548, "grad_norm": 0.6428092940547633, "learning_rate": 0.00017843072778630478, "loss": 12.3023, "step": 8691 }, { "epoch": 0.47331413829953783, "grad_norm": 0.5903591289493301, "learning_rate": 0.00017842525687611179, "loss": 12.3877, "step": 8692 }, { "epoch": 0.4733685922961208, "grad_norm": 0.6341704384358905, "learning_rate": 0.0001784197853560708, "loss": 12.3853, "step": 8693 }, { "epoch": 0.4734230462927038, "grad_norm": 0.6473153367853635, "learning_rate": 0.00017841431322622447, "loss": 12.4951, "step": 8694 }, { "epoch": 0.47347750028928687, "grad_norm": 0.6562941393001934, "learning_rate": 0.00017840884048661527, "loss": 12.3385, "step": 8695 }, { "epoch": 0.47353195428586986, "grad_norm": 0.5803770674490643, "learning_rate": 0.0001784033671372858, "loss": 12.5054, "step": 8696 }, { "epoch": 0.47358640828245285, "grad_norm": 0.606714580039266, "learning_rate": 0.00017839789317827855, "loss": 12.4175, "step": 8697 }, { "epoch": 0.4736408622790359, "grad_norm": 0.6835740224685091, "learning_rate": 0.00017839241860963617, "loss": 12.3783, "step": 8698 }, { "epoch": 0.4736953162756189, "grad_norm": 0.6513242061593827, "learning_rate": 0.00017838694343140117, "loss": 12.4464, "step": 8699 }, { "epoch": 0.47374977027220194, "grad_norm": 0.7031548801620282, "learning_rate": 0.00017838146764361619, "loss": 12.3203, "step": 8700 }, { "epoch": 0.47380422426878493, "grad_norm": 0.6364147771234054, "learning_rate": 0.00017837599124632375, "loss": 12.3763, "step": 8701 }, { "epoch": 0.4738586782653679, "grad_norm": 0.6186036418075409, "learning_rate": 0.00017837051423956644, "loss": 12.3576, "step": 8702 }, { "epoch": 0.473913132261951, "grad_norm": 0.6176910401272563, "learning_rate": 0.00017836503662338688, "loss": 12.401, "step": 8703 }, { "epoch": 0.47396758625853397, "grad_norm": 0.6096101648023464, "learning_rate": 0.00017835955839782766, "loss": 12.3551, "step": 8704 }, { "epoch": 0.47402204025511696, "grad_norm": 0.5980960499670746, "learning_rate": 0.00017835407956293136, "loss": 12.3039, "step": 8705 }, { "epoch": 0.4740764942517, "grad_norm": 0.6500700845049932, "learning_rate": 0.00017834860011874064, "loss": 12.4876, "step": 8706 }, { "epoch": 0.474130948248283, "grad_norm": 0.6091034212568243, "learning_rate": 0.00017834312006529803, "loss": 12.4782, "step": 8707 }, { "epoch": 0.474185402244866, "grad_norm": 0.6550246652321426, "learning_rate": 0.00017833763940264618, "loss": 12.4359, "step": 8708 }, { "epoch": 0.47423985624144904, "grad_norm": 0.652718777215914, "learning_rate": 0.0001783321581308277, "loss": 12.5009, "step": 8709 }, { "epoch": 0.47429431023803204, "grad_norm": 0.6357131550301056, "learning_rate": 0.00017832667624988525, "loss": 12.3438, "step": 8710 }, { "epoch": 0.47434876423461503, "grad_norm": 0.6056142520938775, "learning_rate": 0.00017832119375986143, "loss": 12.3436, "step": 8711 }, { "epoch": 0.4744032182311981, "grad_norm": 0.6836617173901649, "learning_rate": 0.00017831571066079886, "loss": 12.4552, "step": 8712 }, { "epoch": 0.47445767222778107, "grad_norm": 0.6196554753439548, "learning_rate": 0.00017831022695274018, "loss": 12.4012, "step": 8713 }, { "epoch": 0.47451212622436406, "grad_norm": 0.5652293429605024, "learning_rate": 0.00017830474263572804, "loss": 12.3303, "step": 8714 }, { "epoch": 0.4745665802209471, "grad_norm": 0.6475256810886932, "learning_rate": 0.00017829925770980514, "loss": 12.5303, "step": 8715 }, { "epoch": 0.4746210342175301, "grad_norm": 0.6036166661171558, "learning_rate": 0.00017829377217501403, "loss": 12.383, "step": 8716 }, { "epoch": 0.4746754882141131, "grad_norm": 0.6985522299689985, "learning_rate": 0.00017828828603139743, "loss": 12.5098, "step": 8717 }, { "epoch": 0.47472994221069614, "grad_norm": 0.6070778484391904, "learning_rate": 0.00017828279927899798, "loss": 12.391, "step": 8718 }, { "epoch": 0.47478439620727914, "grad_norm": 0.5823182202407687, "learning_rate": 0.00017827731191785836, "loss": 12.3258, "step": 8719 }, { "epoch": 0.47483885020386213, "grad_norm": 0.5893513288995854, "learning_rate": 0.00017827182394802128, "loss": 12.2139, "step": 8720 }, { "epoch": 0.4748933042004452, "grad_norm": 0.7013688112671357, "learning_rate": 0.0001782663353695293, "loss": 12.5296, "step": 8721 }, { "epoch": 0.47494775819702817, "grad_norm": 0.5962121584629747, "learning_rate": 0.0001782608461824252, "loss": 12.4412, "step": 8722 }, { "epoch": 0.47500221219361116, "grad_norm": 0.658193756347851, "learning_rate": 0.00017825535638675165, "loss": 12.3744, "step": 8723 }, { "epoch": 0.4750566661901942, "grad_norm": 0.5829182717805368, "learning_rate": 0.00017824986598255133, "loss": 12.5226, "step": 8724 }, { "epoch": 0.4751111201867772, "grad_norm": 0.5963759135534142, "learning_rate": 0.0001782443749698669, "loss": 12.4221, "step": 8725 }, { "epoch": 0.4751655741833602, "grad_norm": 0.620977287497132, "learning_rate": 0.0001782388833487411, "loss": 12.3833, "step": 8726 }, { "epoch": 0.47522002817994324, "grad_norm": 0.6899744567652295, "learning_rate": 0.00017823339111921663, "loss": 12.4674, "step": 8727 }, { "epoch": 0.47527448217652624, "grad_norm": 0.6151290935196063, "learning_rate": 0.00017822789828133618, "loss": 12.5067, "step": 8728 }, { "epoch": 0.47532893617310923, "grad_norm": 0.569806095972506, "learning_rate": 0.0001782224048351425, "loss": 12.4047, "step": 8729 }, { "epoch": 0.4753833901696923, "grad_norm": 0.5995478910348392, "learning_rate": 0.00017821691078067823, "loss": 12.4148, "step": 8730 }, { "epoch": 0.47543784416627527, "grad_norm": 0.6286888717890325, "learning_rate": 0.00017821141611798618, "loss": 12.3511, "step": 8731 }, { "epoch": 0.4754922981628583, "grad_norm": 0.5648835797724097, "learning_rate": 0.00017820592084710906, "loss": 12.2943, "step": 8732 }, { "epoch": 0.4755467521594413, "grad_norm": 0.6112850266141724, "learning_rate": 0.00017820042496808955, "loss": 12.4158, "step": 8733 }, { "epoch": 0.4756012061560243, "grad_norm": 0.661691812372672, "learning_rate": 0.00017819492848097045, "loss": 12.4666, "step": 8734 }, { "epoch": 0.47565566015260735, "grad_norm": 0.594886371446287, "learning_rate": 0.00017818943138579445, "loss": 12.4039, "step": 8735 }, { "epoch": 0.47571011414919034, "grad_norm": 0.6531180638084092, "learning_rate": 0.00017818393368260432, "loss": 12.3811, "step": 8736 }, { "epoch": 0.47576456814577334, "grad_norm": 0.7078781658459506, "learning_rate": 0.0001781784353714428, "loss": 12.5008, "step": 8737 }, { "epoch": 0.4758190221423564, "grad_norm": 0.596332682579423, "learning_rate": 0.0001781729364523527, "loss": 12.466, "step": 8738 }, { "epoch": 0.4758734761389394, "grad_norm": 0.6160511904069175, "learning_rate": 0.0001781674369253767, "loss": 12.4099, "step": 8739 }, { "epoch": 0.47592793013552237, "grad_norm": 0.591545992505644, "learning_rate": 0.0001781619367905576, "loss": 12.3754, "step": 8740 }, { "epoch": 0.4759823841321054, "grad_norm": 0.7002224308550122, "learning_rate": 0.0001781564360479382, "loss": 12.3937, "step": 8741 }, { "epoch": 0.4760368381286884, "grad_norm": 0.6085928202462332, "learning_rate": 0.0001781509346975612, "loss": 12.3617, "step": 8742 }, { "epoch": 0.4760912921252714, "grad_norm": 0.6398433181849218, "learning_rate": 0.00017814543273946947, "loss": 12.4901, "step": 8743 }, { "epoch": 0.47614574612185445, "grad_norm": 0.7506931499875767, "learning_rate": 0.00017813993017370576, "loss": 12.495, "step": 8744 }, { "epoch": 0.47620020011843744, "grad_norm": 0.5799727249777821, "learning_rate": 0.00017813442700031283, "loss": 12.3573, "step": 8745 }, { "epoch": 0.47625465411502044, "grad_norm": 0.5869373549653786, "learning_rate": 0.00017812892321933345, "loss": 12.4455, "step": 8746 }, { "epoch": 0.4763091081116035, "grad_norm": 0.5965018552793068, "learning_rate": 0.00017812341883081053, "loss": 12.4557, "step": 8747 }, { "epoch": 0.4763635621081865, "grad_norm": 0.6541023786405988, "learning_rate": 0.00017811791383478675, "loss": 12.5107, "step": 8748 }, { "epoch": 0.47641801610476947, "grad_norm": 0.6206970523605138, "learning_rate": 0.000178112408231305, "loss": 12.5198, "step": 8749 }, { "epoch": 0.4764724701013525, "grad_norm": 0.6499952009932339, "learning_rate": 0.00017810690202040805, "loss": 12.4502, "step": 8750 }, { "epoch": 0.4765269240979355, "grad_norm": 0.6521865596139488, "learning_rate": 0.00017810139520213874, "loss": 12.4045, "step": 8751 }, { "epoch": 0.4765813780945185, "grad_norm": 0.5973388862310863, "learning_rate": 0.00017809588777653986, "loss": 12.4887, "step": 8752 }, { "epoch": 0.47663583209110155, "grad_norm": 0.5928298859565891, "learning_rate": 0.0001780903797436543, "loss": 12.376, "step": 8753 }, { "epoch": 0.47669028608768454, "grad_norm": 0.6457349298774065, "learning_rate": 0.00017808487110352483, "loss": 12.5605, "step": 8754 }, { "epoch": 0.47674474008426754, "grad_norm": 0.6119826626738841, "learning_rate": 0.00017807936185619433, "loss": 12.4986, "step": 8755 }, { "epoch": 0.4767991940808506, "grad_norm": 0.5863087139114767, "learning_rate": 0.0001780738520017056, "loss": 12.3536, "step": 8756 }, { "epoch": 0.4768536480774336, "grad_norm": 0.6428427397487143, "learning_rate": 0.0001780683415401015, "loss": 12.3435, "step": 8757 }, { "epoch": 0.47690810207401657, "grad_norm": 0.6967791103950431, "learning_rate": 0.00017806283047142488, "loss": 12.5013, "step": 8758 }, { "epoch": 0.4769625560705996, "grad_norm": 0.6705016723421287, "learning_rate": 0.00017805731879571858, "loss": 12.5209, "step": 8759 }, { "epoch": 0.4770170100671826, "grad_norm": 0.6148028932835333, "learning_rate": 0.00017805180651302553, "loss": 12.5144, "step": 8760 }, { "epoch": 0.4770714640637656, "grad_norm": 0.5768470841705992, "learning_rate": 0.00017804629362338852, "loss": 12.4492, "step": 8761 }, { "epoch": 0.47712591806034865, "grad_norm": 0.694438897567131, "learning_rate": 0.00017804078012685043, "loss": 12.4018, "step": 8762 }, { "epoch": 0.47718037205693165, "grad_norm": 0.6351675659575002, "learning_rate": 0.00017803526602345416, "loss": 12.5062, "step": 8763 }, { "epoch": 0.47723482605351464, "grad_norm": 0.6840557802934399, "learning_rate": 0.0001780297513132426, "loss": 12.4258, "step": 8764 }, { "epoch": 0.4772892800500977, "grad_norm": 0.6269476064332131, "learning_rate": 0.00017802423599625855, "loss": 12.3025, "step": 8765 }, { "epoch": 0.4773437340466807, "grad_norm": 0.748368075863479, "learning_rate": 0.000178018720072545, "loss": 12.45, "step": 8766 }, { "epoch": 0.4773981880432637, "grad_norm": 0.6079501771640944, "learning_rate": 0.00017801320354214476, "loss": 12.582, "step": 8767 }, { "epoch": 0.4774526420398467, "grad_norm": 0.6021377793201013, "learning_rate": 0.0001780076864051008, "loss": 12.3335, "step": 8768 }, { "epoch": 0.4775070960364297, "grad_norm": 0.6756093490145296, "learning_rate": 0.000178002168661456, "loss": 12.3745, "step": 8769 }, { "epoch": 0.47756155003301276, "grad_norm": 0.6197236151026202, "learning_rate": 0.0001779966503112532, "loss": 12.3967, "step": 8770 }, { "epoch": 0.47761600402959575, "grad_norm": 0.6525965658124591, "learning_rate": 0.00017799113135453541, "loss": 12.431, "step": 8771 }, { "epoch": 0.47767045802617875, "grad_norm": 0.7092955749743148, "learning_rate": 0.00017798561179134553, "loss": 12.3853, "step": 8772 }, { "epoch": 0.4777249120227618, "grad_norm": 0.6196543964995244, "learning_rate": 0.0001779800916217264, "loss": 12.4704, "step": 8773 }, { "epoch": 0.4777793660193448, "grad_norm": 0.7215102850249048, "learning_rate": 0.00017797457084572102, "loss": 12.4219, "step": 8774 }, { "epoch": 0.4778338200159278, "grad_norm": 0.6234504458117953, "learning_rate": 0.0001779690494633723, "loss": 12.4559, "step": 8775 }, { "epoch": 0.4778882740125108, "grad_norm": 0.6269716209072117, "learning_rate": 0.0001779635274747232, "loss": 12.4085, "step": 8776 }, { "epoch": 0.4779427280090938, "grad_norm": 0.7042413882289371, "learning_rate": 0.0001779580048798166, "loss": 12.2859, "step": 8777 }, { "epoch": 0.4779971820056768, "grad_norm": 0.5900419912112504, "learning_rate": 0.00017795248167869549, "loss": 12.3652, "step": 8778 }, { "epoch": 0.47805163600225986, "grad_norm": 0.6511384156373964, "learning_rate": 0.0001779469578714028, "loss": 12.3114, "step": 8779 }, { "epoch": 0.47810608999884285, "grad_norm": 0.6670601280040062, "learning_rate": 0.0001779414334579815, "loss": 12.3957, "step": 8780 }, { "epoch": 0.47816054399542585, "grad_norm": 0.5890634034622276, "learning_rate": 0.00017793590843847456, "loss": 12.3756, "step": 8781 }, { "epoch": 0.4782149979920089, "grad_norm": 0.6218526690363533, "learning_rate": 0.00017793038281292494, "loss": 12.4279, "step": 8782 }, { "epoch": 0.4782694519885919, "grad_norm": 0.6094031786819534, "learning_rate": 0.00017792485658137553, "loss": 12.3809, "step": 8783 }, { "epoch": 0.4783239059851749, "grad_norm": 0.6674236673610006, "learning_rate": 0.00017791932974386943, "loss": 12.3462, "step": 8784 }, { "epoch": 0.47837835998175793, "grad_norm": 0.8290033168357881, "learning_rate": 0.00017791380230044955, "loss": 12.4798, "step": 8785 }, { "epoch": 0.4784328139783409, "grad_norm": 0.5698319474926054, "learning_rate": 0.00017790827425115887, "loss": 12.3464, "step": 8786 }, { "epoch": 0.4784872679749239, "grad_norm": 0.7063867609709856, "learning_rate": 0.00017790274559604033, "loss": 12.4459, "step": 8787 }, { "epoch": 0.47854172197150696, "grad_norm": 0.6248814974218623, "learning_rate": 0.00017789721633513703, "loss": 12.3679, "step": 8788 }, { "epoch": 0.47859617596808995, "grad_norm": 0.6049202524282553, "learning_rate": 0.0001778916864684919, "loss": 12.4051, "step": 8789 }, { "epoch": 0.47865062996467295, "grad_norm": 0.6707745185056192, "learning_rate": 0.00017788615599614798, "loss": 12.3868, "step": 8790 }, { "epoch": 0.478705083961256, "grad_norm": 0.5512079569667026, "learning_rate": 0.0001778806249181482, "loss": 12.3534, "step": 8791 }, { "epoch": 0.478759537957839, "grad_norm": 0.6225053075193171, "learning_rate": 0.00017787509323453565, "loss": 12.4305, "step": 8792 }, { "epoch": 0.478813991954422, "grad_norm": 0.6709506145422299, "learning_rate": 0.00017786956094535333, "loss": 12.4308, "step": 8793 }, { "epoch": 0.47886844595100503, "grad_norm": 0.6751250487279195, "learning_rate": 0.0001778640280506442, "loss": 12.4982, "step": 8794 }, { "epoch": 0.478922899947588, "grad_norm": 0.6378585206417511, "learning_rate": 0.00017785849455045138, "loss": 12.4145, "step": 8795 }, { "epoch": 0.478977353944171, "grad_norm": 0.6245405864362273, "learning_rate": 0.0001778529604448178, "loss": 12.2687, "step": 8796 }, { "epoch": 0.47903180794075406, "grad_norm": 0.6113639663876627, "learning_rate": 0.0001778474257337866, "loss": 12.4223, "step": 8797 }, { "epoch": 0.47908626193733705, "grad_norm": 0.5721990527380328, "learning_rate": 0.0001778418904174007, "loss": 12.1567, "step": 8798 }, { "epoch": 0.4791407159339201, "grad_norm": 0.6724946226809764, "learning_rate": 0.00017783635449570326, "loss": 12.4662, "step": 8799 }, { "epoch": 0.4791951699305031, "grad_norm": 0.6178789728367964, "learning_rate": 0.00017783081796873725, "loss": 12.431, "step": 8800 }, { "epoch": 0.4792496239270861, "grad_norm": 0.6579330034517156, "learning_rate": 0.00017782528083654575, "loss": 12.2458, "step": 8801 }, { "epoch": 0.47930407792366914, "grad_norm": 0.6058708382556764, "learning_rate": 0.0001778197430991718, "loss": 12.442, "step": 8802 }, { "epoch": 0.47935853192025213, "grad_norm": 0.6377720679987039, "learning_rate": 0.0001778142047566585, "loss": 12.461, "step": 8803 }, { "epoch": 0.4794129859168351, "grad_norm": 0.5970891817149896, "learning_rate": 0.0001778086658090489, "loss": 12.4052, "step": 8804 }, { "epoch": 0.47946743991341817, "grad_norm": 0.6461955050830236, "learning_rate": 0.00017780312625638605, "loss": 12.3717, "step": 8805 }, { "epoch": 0.47952189391000116, "grad_norm": 0.6468828453737918, "learning_rate": 0.000177797586098713, "loss": 12.4399, "step": 8806 }, { "epoch": 0.47957634790658416, "grad_norm": 0.6621514434037283, "learning_rate": 0.00017779204533607297, "loss": 12.5993, "step": 8807 }, { "epoch": 0.4796308019031672, "grad_norm": 0.58841400655816, "learning_rate": 0.0001777865039685089, "loss": 12.4557, "step": 8808 }, { "epoch": 0.4796852558997502, "grad_norm": 0.6365515386699335, "learning_rate": 0.00017778096199606394, "loss": 12.3267, "step": 8809 }, { "epoch": 0.4797397098963332, "grad_norm": 0.6823179343753297, "learning_rate": 0.00017777541941878114, "loss": 12.4056, "step": 8810 }, { "epoch": 0.47979416389291624, "grad_norm": 0.5769821845021594, "learning_rate": 0.00017776987623670368, "loss": 12.4249, "step": 8811 }, { "epoch": 0.47984861788949923, "grad_norm": 0.6255846491013269, "learning_rate": 0.00017776433244987458, "loss": 12.4606, "step": 8812 }, { "epoch": 0.4799030718860822, "grad_norm": 0.6799133311882912, "learning_rate": 0.000177758788058337, "loss": 12.4782, "step": 8813 }, { "epoch": 0.47995752588266527, "grad_norm": 0.6058273235144871, "learning_rate": 0.00017775324306213406, "loss": 12.4778, "step": 8814 }, { "epoch": 0.48001197987924826, "grad_norm": 0.6519108191538442, "learning_rate": 0.00017774769746130886, "loss": 12.4488, "step": 8815 }, { "epoch": 0.48006643387583126, "grad_norm": 0.5844059895700789, "learning_rate": 0.00017774215125590455, "loss": 12.4286, "step": 8816 }, { "epoch": 0.4801208878724143, "grad_norm": 0.6788807574460705, "learning_rate": 0.00017773660444596418, "loss": 12.5376, "step": 8817 }, { "epoch": 0.4801753418689973, "grad_norm": 0.6717908703669483, "learning_rate": 0.00017773105703153096, "loss": 12.4666, "step": 8818 }, { "epoch": 0.4802297958655803, "grad_norm": 0.5889511648902014, "learning_rate": 0.00017772550901264803, "loss": 12.4461, "step": 8819 }, { "epoch": 0.48028424986216334, "grad_norm": 0.7001047237575522, "learning_rate": 0.00017771996038935846, "loss": 12.5566, "step": 8820 }, { "epoch": 0.48033870385874633, "grad_norm": 0.6073367192293863, "learning_rate": 0.0001777144111617055, "loss": 12.4075, "step": 8821 }, { "epoch": 0.4803931578553293, "grad_norm": 0.7157381884611609, "learning_rate": 0.0001777088613297322, "loss": 12.4349, "step": 8822 }, { "epoch": 0.48044761185191237, "grad_norm": 0.6451758187148338, "learning_rate": 0.0001777033108934818, "loss": 12.383, "step": 8823 }, { "epoch": 0.48050206584849536, "grad_norm": 0.6259662761328934, "learning_rate": 0.00017769775985299738, "loss": 12.3062, "step": 8824 }, { "epoch": 0.48055651984507836, "grad_norm": 0.5982251100488803, "learning_rate": 0.00017769220820832218, "loss": 12.2562, "step": 8825 }, { "epoch": 0.4806109738416614, "grad_norm": 0.5716512373091801, "learning_rate": 0.00017768665595949934, "loss": 12.3767, "step": 8826 }, { "epoch": 0.4806654278382444, "grad_norm": 0.6944768655221844, "learning_rate": 0.00017768110310657204, "loss": 12.5173, "step": 8827 }, { "epoch": 0.4807198818348274, "grad_norm": 0.7524092316030287, "learning_rate": 0.00017767554964958344, "loss": 12.2713, "step": 8828 }, { "epoch": 0.48077433583141044, "grad_norm": 0.6292076363016551, "learning_rate": 0.00017766999558857673, "loss": 12.3015, "step": 8829 }, { "epoch": 0.48082878982799343, "grad_norm": 0.6170640257155307, "learning_rate": 0.00017766444092359512, "loss": 12.3525, "step": 8830 }, { "epoch": 0.4808832438245764, "grad_norm": 0.63768550877622, "learning_rate": 0.00017765888565468178, "loss": 12.4197, "step": 8831 }, { "epoch": 0.48093769782115947, "grad_norm": 0.6341427907839081, "learning_rate": 0.00017765332978187997, "loss": 12.4103, "step": 8832 }, { "epoch": 0.48099215181774246, "grad_norm": 0.6629413848045236, "learning_rate": 0.00017764777330523283, "loss": 12.4884, "step": 8833 }, { "epoch": 0.4810466058143255, "grad_norm": 0.6164832896056446, "learning_rate": 0.00017764221622478354, "loss": 12.4355, "step": 8834 }, { "epoch": 0.4811010598109085, "grad_norm": 0.5901202811298281, "learning_rate": 0.00017763665854057537, "loss": 12.3688, "step": 8835 }, { "epoch": 0.4811555138074915, "grad_norm": 0.6196660565552202, "learning_rate": 0.00017763110025265154, "loss": 12.4374, "step": 8836 }, { "epoch": 0.48120996780407455, "grad_norm": 0.6734610187372511, "learning_rate": 0.00017762554136105524, "loss": 12.3679, "step": 8837 }, { "epoch": 0.48126442180065754, "grad_norm": 0.8116360626391834, "learning_rate": 0.00017761998186582972, "loss": 12.5266, "step": 8838 }, { "epoch": 0.48131887579724053, "grad_norm": 0.6981569929406607, "learning_rate": 0.00017761442176701824, "loss": 12.5177, "step": 8839 }, { "epoch": 0.4813733297938236, "grad_norm": 0.5768553293400086, "learning_rate": 0.00017760886106466396, "loss": 12.3916, "step": 8840 }, { "epoch": 0.48142778379040657, "grad_norm": 0.7085652244519366, "learning_rate": 0.00017760329975881017, "loss": 12.3696, "step": 8841 }, { "epoch": 0.48148223778698956, "grad_norm": 0.6562927526967182, "learning_rate": 0.0001775977378495001, "loss": 12.3849, "step": 8842 }, { "epoch": 0.4815366917835726, "grad_norm": 0.7220168778666222, "learning_rate": 0.00017759217533677702, "loss": 12.3901, "step": 8843 }, { "epoch": 0.4815911457801556, "grad_norm": 0.5920359098395669, "learning_rate": 0.00017758661222068415, "loss": 12.3168, "step": 8844 }, { "epoch": 0.4816455997767386, "grad_norm": 0.6872763879710608, "learning_rate": 0.0001775810485012648, "loss": 12.3874, "step": 8845 }, { "epoch": 0.48170005377332165, "grad_norm": 0.5996468591319443, "learning_rate": 0.00017757548417856217, "loss": 12.4146, "step": 8846 }, { "epoch": 0.48175450776990464, "grad_norm": 0.5938416569055235, "learning_rate": 0.00017756991925261962, "loss": 12.3266, "step": 8847 }, { "epoch": 0.48180896176648763, "grad_norm": 0.7101647878990114, "learning_rate": 0.00017756435372348034, "loss": 12.4614, "step": 8848 }, { "epoch": 0.4818634157630707, "grad_norm": 0.6568914198752358, "learning_rate": 0.00017755878759118763, "loss": 12.4564, "step": 8849 }, { "epoch": 0.48191786975965367, "grad_norm": 0.6646572763387945, "learning_rate": 0.00017755322085578478, "loss": 12.2684, "step": 8850 }, { "epoch": 0.48197232375623666, "grad_norm": 0.6122037385746242, "learning_rate": 0.0001775476535173151, "loss": 12.4584, "step": 8851 }, { "epoch": 0.4820267777528197, "grad_norm": 0.6091843939075892, "learning_rate": 0.00017754208557582186, "loss": 12.4766, "step": 8852 }, { "epoch": 0.4820812317494027, "grad_norm": 0.7753738208277444, "learning_rate": 0.00017753651703134832, "loss": 12.3508, "step": 8853 }, { "epoch": 0.4821356857459857, "grad_norm": 0.9314633806434001, "learning_rate": 0.00017753094788393786, "loss": 12.5979, "step": 8854 }, { "epoch": 0.48219013974256875, "grad_norm": 0.6731684795342191, "learning_rate": 0.00017752537813363372, "loss": 12.4653, "step": 8855 }, { "epoch": 0.48224459373915174, "grad_norm": 0.6428856940797861, "learning_rate": 0.00017751980778047928, "loss": 12.5004, "step": 8856 }, { "epoch": 0.48229904773573473, "grad_norm": 0.59991219571617, "learning_rate": 0.00017751423682451777, "loss": 12.3395, "step": 8857 }, { "epoch": 0.4823535017323178, "grad_norm": 0.6919336816693169, "learning_rate": 0.0001775086652657926, "loss": 12.4303, "step": 8858 }, { "epoch": 0.4824079557289008, "grad_norm": 0.7949891419946377, "learning_rate": 0.000177503093104347, "loss": 12.3935, "step": 8859 }, { "epoch": 0.48246240972548377, "grad_norm": 0.5947020100389356, "learning_rate": 0.0001774975203402244, "loss": 12.3886, "step": 8860 }, { "epoch": 0.4825168637220668, "grad_norm": 0.5988251917040165, "learning_rate": 0.00017749194697346804, "loss": 12.342, "step": 8861 }, { "epoch": 0.4825713177186498, "grad_norm": 0.6354760855965604, "learning_rate": 0.00017748637300412135, "loss": 12.4184, "step": 8862 }, { "epoch": 0.4826257717152328, "grad_norm": 0.6000525681944205, "learning_rate": 0.00017748079843222758, "loss": 12.4177, "step": 8863 }, { "epoch": 0.48268022571181585, "grad_norm": 0.7000608948503445, "learning_rate": 0.00017747522325783016, "loss": 12.4286, "step": 8864 }, { "epoch": 0.48273467970839884, "grad_norm": 0.6118907742416242, "learning_rate": 0.0001774696474809724, "loss": 12.3678, "step": 8865 }, { "epoch": 0.4827891337049819, "grad_norm": 0.5942633424266189, "learning_rate": 0.00017746407110169767, "loss": 12.4994, "step": 8866 }, { "epoch": 0.4828435877015649, "grad_norm": 0.6539654214633613, "learning_rate": 0.00017745849412004937, "loss": 12.3596, "step": 8867 }, { "epoch": 0.4828980416981479, "grad_norm": 0.6677852512885757, "learning_rate": 0.00017745291653607076, "loss": 12.3255, "step": 8868 }, { "epoch": 0.4829524956947309, "grad_norm": 0.651571708656775, "learning_rate": 0.00017744733834980532, "loss": 12.327, "step": 8869 }, { "epoch": 0.4830069496913139, "grad_norm": 0.6349770322266751, "learning_rate": 0.0001774417595612964, "loss": 12.487, "step": 8870 }, { "epoch": 0.4830614036878969, "grad_norm": 0.6019710222800663, "learning_rate": 0.00017743618017058735, "loss": 12.4126, "step": 8871 }, { "epoch": 0.48311585768447995, "grad_norm": 0.7285677786372385, "learning_rate": 0.0001774306001777216, "loss": 12.4012, "step": 8872 }, { "epoch": 0.48317031168106295, "grad_norm": 0.5809777718047995, "learning_rate": 0.00017742501958274248, "loss": 12.5225, "step": 8873 }, { "epoch": 0.48322476567764594, "grad_norm": 0.6272483646028337, "learning_rate": 0.00017741943838569347, "loss": 12.4226, "step": 8874 }, { "epoch": 0.483279219674229, "grad_norm": 0.6755453012097429, "learning_rate": 0.0001774138565866179, "loss": 12.3687, "step": 8875 }, { "epoch": 0.483333673670812, "grad_norm": 0.6564591893347475, "learning_rate": 0.0001774082741855592, "loss": 12.3911, "step": 8876 }, { "epoch": 0.483388127667395, "grad_norm": 0.6373866759242696, "learning_rate": 0.00017740269118256076, "loss": 12.5277, "step": 8877 }, { "epoch": 0.483442581663978, "grad_norm": 0.5944427936719368, "learning_rate": 0.00017739710757766605, "loss": 12.5127, "step": 8878 }, { "epoch": 0.483497035660561, "grad_norm": 0.6324204783412024, "learning_rate": 0.00017739152337091843, "loss": 12.4911, "step": 8879 }, { "epoch": 0.483551489657144, "grad_norm": 0.6001119109761424, "learning_rate": 0.00017738593856236133, "loss": 12.384, "step": 8880 }, { "epoch": 0.48360594365372706, "grad_norm": 0.6966066832215112, "learning_rate": 0.00017738035315203822, "loss": 12.3972, "step": 8881 }, { "epoch": 0.48366039765031005, "grad_norm": 0.6212816073096596, "learning_rate": 0.0001773747671399925, "loss": 12.4002, "step": 8882 }, { "epoch": 0.48371485164689304, "grad_norm": 0.5724581880947716, "learning_rate": 0.0001773691805262676, "loss": 12.427, "step": 8883 }, { "epoch": 0.4837693056434761, "grad_norm": 0.6334340981535187, "learning_rate": 0.000177363593310907, "loss": 12.4921, "step": 8884 }, { "epoch": 0.4838237596400591, "grad_norm": 0.5685247708491485, "learning_rate": 0.00017735800549395413, "loss": 12.4575, "step": 8885 }, { "epoch": 0.4838782136366421, "grad_norm": 0.5689435269396318, "learning_rate": 0.00017735241707545241, "loss": 12.3163, "step": 8886 }, { "epoch": 0.4839326676332251, "grad_norm": 0.6779432269703239, "learning_rate": 0.00017734682805544533, "loss": 12.3748, "step": 8887 }, { "epoch": 0.4839871216298081, "grad_norm": 0.6796854341083812, "learning_rate": 0.00017734123843397636, "loss": 12.3531, "step": 8888 }, { "epoch": 0.4840415756263911, "grad_norm": 0.5849010737268502, "learning_rate": 0.00017733564821108895, "loss": 12.5429, "step": 8889 }, { "epoch": 0.48409602962297416, "grad_norm": 0.6728716617375359, "learning_rate": 0.00017733005738682656, "loss": 12.4443, "step": 8890 }, { "epoch": 0.48415048361955715, "grad_norm": 0.6222616297984809, "learning_rate": 0.00017732446596123268, "loss": 12.3582, "step": 8891 }, { "epoch": 0.48420493761614014, "grad_norm": 0.6025245468972172, "learning_rate": 0.00017731887393435076, "loss": 12.3947, "step": 8892 }, { "epoch": 0.4842593916127232, "grad_norm": 0.6012661543715989, "learning_rate": 0.00017731328130622434, "loss": 12.33, "step": 8893 }, { "epoch": 0.4843138456093062, "grad_norm": 0.6221467903127814, "learning_rate": 0.00017730768807689687, "loss": 12.4557, "step": 8894 }, { "epoch": 0.4843682996058892, "grad_norm": 0.7082201340878812, "learning_rate": 0.00017730209424641187, "loss": 12.5036, "step": 8895 }, { "epoch": 0.4844227536024722, "grad_norm": 0.5989029302995673, "learning_rate": 0.00017729649981481277, "loss": 12.4931, "step": 8896 }, { "epoch": 0.4844772075990552, "grad_norm": 0.6217807482682669, "learning_rate": 0.00017729090478214317, "loss": 12.4975, "step": 8897 }, { "epoch": 0.4845316615956382, "grad_norm": 0.7228601677700004, "learning_rate": 0.00017728530914844654, "loss": 12.4275, "step": 8898 }, { "epoch": 0.48458611559222126, "grad_norm": 0.5895667014499496, "learning_rate": 0.00017727971291376635, "loss": 12.3745, "step": 8899 }, { "epoch": 0.48464056958880425, "grad_norm": 0.5981741009577994, "learning_rate": 0.0001772741160781462, "loss": 12.4311, "step": 8900 }, { "epoch": 0.4846950235853873, "grad_norm": 0.632341233499706, "learning_rate": 0.00017726851864162952, "loss": 12.5217, "step": 8901 }, { "epoch": 0.4847494775819703, "grad_norm": 0.5661173025837181, "learning_rate": 0.0001772629206042599, "loss": 12.2706, "step": 8902 }, { "epoch": 0.4848039315785533, "grad_norm": 0.5429235944245946, "learning_rate": 0.00017725732196608086, "loss": 12.3377, "step": 8903 }, { "epoch": 0.48485838557513633, "grad_norm": 0.5779148781970922, "learning_rate": 0.00017725172272713588, "loss": 12.4942, "step": 8904 }, { "epoch": 0.4849128395717193, "grad_norm": 0.5910247841671594, "learning_rate": 0.0001772461228874686, "loss": 12.4021, "step": 8905 }, { "epoch": 0.4849672935683023, "grad_norm": 0.5960390491561129, "learning_rate": 0.00017724052244712251, "loss": 12.476, "step": 8906 }, { "epoch": 0.48502174756488536, "grad_norm": 0.5979535906650704, "learning_rate": 0.00017723492140614115, "loss": 12.1532, "step": 8907 }, { "epoch": 0.48507620156146836, "grad_norm": 0.5835970639327157, "learning_rate": 0.0001772293197645681, "loss": 12.426, "step": 8908 }, { "epoch": 0.48513065555805135, "grad_norm": 0.578047193217132, "learning_rate": 0.00017722371752244687, "loss": 12.2607, "step": 8909 }, { "epoch": 0.4851851095546344, "grad_norm": 0.6360885630082748, "learning_rate": 0.00017721811467982112, "loss": 12.4493, "step": 8910 }, { "epoch": 0.4852395635512174, "grad_norm": 0.5881949679055104, "learning_rate": 0.0001772125112367343, "loss": 12.3724, "step": 8911 }, { "epoch": 0.4852940175478004, "grad_norm": 0.6163484189248782, "learning_rate": 0.0001772069071932301, "loss": 12.4753, "step": 8912 }, { "epoch": 0.48534847154438343, "grad_norm": 0.5804625073859097, "learning_rate": 0.000177201302549352, "loss": 12.4493, "step": 8913 }, { "epoch": 0.4854029255409664, "grad_norm": 0.5913429595420551, "learning_rate": 0.00017719569730514367, "loss": 12.4647, "step": 8914 }, { "epoch": 0.4854573795375494, "grad_norm": 0.6458546645180917, "learning_rate": 0.00017719009146064863, "loss": 12.402, "step": 8915 }, { "epoch": 0.48551183353413246, "grad_norm": 0.6266882741409003, "learning_rate": 0.00017718448501591048, "loss": 12.427, "step": 8916 }, { "epoch": 0.48556628753071546, "grad_norm": 0.7313824026603742, "learning_rate": 0.00017717887797097284, "loss": 12.3917, "step": 8917 }, { "epoch": 0.48562074152729845, "grad_norm": 0.6531470914819183, "learning_rate": 0.0001771732703258793, "loss": 12.4148, "step": 8918 }, { "epoch": 0.4856751955238815, "grad_norm": 0.5753462283529436, "learning_rate": 0.00017716766208067348, "loss": 12.2808, "step": 8919 }, { "epoch": 0.4857296495204645, "grad_norm": 0.6291815969960752, "learning_rate": 0.00017716205323539897, "loss": 12.4196, "step": 8920 }, { "epoch": 0.4857841035170475, "grad_norm": 0.6655582182857843, "learning_rate": 0.00017715644379009938, "loss": 12.6431, "step": 8921 }, { "epoch": 0.48583855751363053, "grad_norm": 0.6351500978892104, "learning_rate": 0.00017715083374481835, "loss": 12.1745, "step": 8922 }, { "epoch": 0.4858930115102135, "grad_norm": 0.7014680673123271, "learning_rate": 0.00017714522309959953, "loss": 12.5735, "step": 8923 }, { "epoch": 0.4859474655067965, "grad_norm": 0.5961403293878741, "learning_rate": 0.0001771396118544865, "loss": 12.3263, "step": 8924 }, { "epoch": 0.48600191950337956, "grad_norm": 0.6381696181668911, "learning_rate": 0.00017713400000952292, "loss": 12.5789, "step": 8925 }, { "epoch": 0.48605637349996256, "grad_norm": 0.5781356158913226, "learning_rate": 0.00017712838756475237, "loss": 12.3219, "step": 8926 }, { "epoch": 0.48611082749654555, "grad_norm": 0.6133175971860488, "learning_rate": 0.0001771227745202186, "loss": 12.5355, "step": 8927 }, { "epoch": 0.4861652814931286, "grad_norm": 0.6944077918723269, "learning_rate": 0.00017711716087596518, "loss": 12.4948, "step": 8928 }, { "epoch": 0.4862197354897116, "grad_norm": 0.6446646845681596, "learning_rate": 0.00017711154663203578, "loss": 12.5193, "step": 8929 }, { "epoch": 0.4862741894862946, "grad_norm": 0.6286728175168206, "learning_rate": 0.0001771059317884741, "loss": 12.5637, "step": 8930 }, { "epoch": 0.48632864348287763, "grad_norm": 0.6662026021201488, "learning_rate": 0.0001771003163453237, "loss": 12.4626, "step": 8931 }, { "epoch": 0.4863830974794606, "grad_norm": 0.6301674124609281, "learning_rate": 0.00017709470030262834, "loss": 12.5071, "step": 8932 }, { "epoch": 0.4864375514760437, "grad_norm": 0.5735200957479709, "learning_rate": 0.00017708908366043168, "loss": 12.4277, "step": 8933 }, { "epoch": 0.48649200547262667, "grad_norm": 0.616838235274588, "learning_rate": 0.00017708346641877735, "loss": 12.4254, "step": 8934 }, { "epoch": 0.48654645946920966, "grad_norm": 0.6986291067091521, "learning_rate": 0.00017707784857770906, "loss": 12.5306, "step": 8935 }, { "epoch": 0.4866009134657927, "grad_norm": 0.6414495094248273, "learning_rate": 0.00017707223013727053, "loss": 12.438, "step": 8936 }, { "epoch": 0.4866553674623757, "grad_norm": 0.7109706319146375, "learning_rate": 0.00017706661109750536, "loss": 12.3861, "step": 8937 }, { "epoch": 0.4867098214589587, "grad_norm": 0.6688809895812369, "learning_rate": 0.00017706099145845734, "loss": 12.3729, "step": 8938 }, { "epoch": 0.48676427545554174, "grad_norm": 0.6153038549431864, "learning_rate": 0.0001770553712201701, "loss": 12.4929, "step": 8939 }, { "epoch": 0.48681872945212473, "grad_norm": 0.7329807332996898, "learning_rate": 0.0001770497503826874, "loss": 12.4184, "step": 8940 }, { "epoch": 0.4868731834487077, "grad_norm": 0.7084924363855114, "learning_rate": 0.0001770441289460529, "loss": 12.3417, "step": 8941 }, { "epoch": 0.4869276374452908, "grad_norm": 0.6815236874363345, "learning_rate": 0.00017703850691031035, "loss": 12.4823, "step": 8942 }, { "epoch": 0.48698209144187377, "grad_norm": 0.577985264202812, "learning_rate": 0.00017703288427550342, "loss": 12.4893, "step": 8943 }, { "epoch": 0.48703654543845676, "grad_norm": 0.5997747016250154, "learning_rate": 0.0001770272610416759, "loss": 12.4236, "step": 8944 }, { "epoch": 0.4870909994350398, "grad_norm": 0.6303074713542345, "learning_rate": 0.00017702163720887144, "loss": 12.3766, "step": 8945 }, { "epoch": 0.4871454534316228, "grad_norm": 0.6148182246803541, "learning_rate": 0.00017701601277713382, "loss": 12.4035, "step": 8946 }, { "epoch": 0.4871999074282058, "grad_norm": 0.6129823580686586, "learning_rate": 0.0001770103877465068, "loss": 12.3825, "step": 8947 }, { "epoch": 0.48725436142478884, "grad_norm": 0.623778798045754, "learning_rate": 0.00017700476211703406, "loss": 12.4081, "step": 8948 }, { "epoch": 0.48730881542137183, "grad_norm": 0.6131756723701572, "learning_rate": 0.0001769991358887594, "loss": 12.346, "step": 8949 }, { "epoch": 0.4873632694179548, "grad_norm": 0.6497155540267017, "learning_rate": 0.00017699350906172655, "loss": 12.4192, "step": 8950 }, { "epoch": 0.4874177234145379, "grad_norm": 0.7017317517650884, "learning_rate": 0.00017698788163597923, "loss": 12.6197, "step": 8951 }, { "epoch": 0.48747217741112087, "grad_norm": 0.5495695820318853, "learning_rate": 0.00017698225361156126, "loss": 12.4219, "step": 8952 }, { "epoch": 0.48752663140770386, "grad_norm": 0.600004068120246, "learning_rate": 0.00017697662498851634, "loss": 12.4167, "step": 8953 }, { "epoch": 0.4875810854042869, "grad_norm": 0.5677519445784427, "learning_rate": 0.0001769709957668883, "loss": 12.4206, "step": 8954 }, { "epoch": 0.4876355394008699, "grad_norm": 0.6933956699220511, "learning_rate": 0.00017696536594672093, "loss": 12.4454, "step": 8955 }, { "epoch": 0.4876899933974529, "grad_norm": 0.5852886126317223, "learning_rate": 0.0001769597355280579, "loss": 12.4294, "step": 8956 }, { "epoch": 0.48774444739403594, "grad_norm": 0.6707874767566225, "learning_rate": 0.00017695410451094309, "loss": 12.3555, "step": 8957 }, { "epoch": 0.48779890139061893, "grad_norm": 0.6275576481656014, "learning_rate": 0.00017694847289542027, "loss": 12.3984, "step": 8958 }, { "epoch": 0.4878533553872019, "grad_norm": 0.7453159192884955, "learning_rate": 0.0001769428406815332, "loss": 12.3675, "step": 8959 }, { "epoch": 0.487907809383785, "grad_norm": 0.7149271919206965, "learning_rate": 0.0001769372078693257, "loss": 12.4132, "step": 8960 }, { "epoch": 0.48796226338036797, "grad_norm": 0.6762737105423414, "learning_rate": 0.00017693157445884157, "loss": 12.3941, "step": 8961 }, { "epoch": 0.48801671737695096, "grad_norm": 0.6419090179100578, "learning_rate": 0.00017692594045012463, "loss": 12.304, "step": 8962 }, { "epoch": 0.488071171373534, "grad_norm": 0.6168353578562813, "learning_rate": 0.00017692030584321862, "loss": 12.43, "step": 8963 }, { "epoch": 0.488125625370117, "grad_norm": 0.7407722298231637, "learning_rate": 0.0001769146706381675, "loss": 12.4277, "step": 8964 }, { "epoch": 0.4881800793667, "grad_norm": 0.6123927513441908, "learning_rate": 0.00017690903483501494, "loss": 12.3529, "step": 8965 }, { "epoch": 0.48823453336328304, "grad_norm": 0.6927569113790386, "learning_rate": 0.00017690339843380487, "loss": 12.3037, "step": 8966 }, { "epoch": 0.48828898735986603, "grad_norm": 0.7282325256842512, "learning_rate": 0.00017689776143458102, "loss": 12.4561, "step": 8967 }, { "epoch": 0.4883434413564491, "grad_norm": 0.5931461718947436, "learning_rate": 0.00017689212383738734, "loss": 12.5021, "step": 8968 }, { "epoch": 0.4883978953530321, "grad_norm": 0.6057001158655123, "learning_rate": 0.00017688648564226757, "loss": 12.3242, "step": 8969 }, { "epoch": 0.48845234934961507, "grad_norm": 0.6998284602710015, "learning_rate": 0.00017688084684926563, "loss": 12.3638, "step": 8970 }, { "epoch": 0.4885068033461981, "grad_norm": 0.6223628650538694, "learning_rate": 0.0001768752074584253, "loss": 12.3924, "step": 8971 }, { "epoch": 0.4885612573427811, "grad_norm": 0.6172249769850962, "learning_rate": 0.0001768695674697905, "loss": 12.3722, "step": 8972 }, { "epoch": 0.4886157113393641, "grad_norm": 0.6434240293370114, "learning_rate": 0.00017686392688340502, "loss": 12.4554, "step": 8973 }, { "epoch": 0.48867016533594715, "grad_norm": 0.6284914832648406, "learning_rate": 0.00017685828569931277, "loss": 12.3951, "step": 8974 }, { "epoch": 0.48872461933253014, "grad_norm": 0.6584633649303565, "learning_rate": 0.00017685264391755756, "loss": 12.4403, "step": 8975 }, { "epoch": 0.48877907332911313, "grad_norm": 0.6239764584426922, "learning_rate": 0.00017684700153818334, "loss": 12.445, "step": 8976 }, { "epoch": 0.4888335273256962, "grad_norm": 0.604252889534663, "learning_rate": 0.00017684135856123394, "loss": 12.3662, "step": 8977 }, { "epoch": 0.4888879813222792, "grad_norm": 0.6768630849815542, "learning_rate": 0.00017683571498675326, "loss": 12.4315, "step": 8978 }, { "epoch": 0.48894243531886217, "grad_norm": 0.561773748949832, "learning_rate": 0.0001768300708147852, "loss": 12.4046, "step": 8979 }, { "epoch": 0.4889968893154452, "grad_norm": 0.6191608849730387, "learning_rate": 0.00017682442604537358, "loss": 12.3913, "step": 8980 }, { "epoch": 0.4890513433120282, "grad_norm": 0.668467651031518, "learning_rate": 0.00017681878067856235, "loss": 12.427, "step": 8981 }, { "epoch": 0.4891057973086112, "grad_norm": 0.5852875868444413, "learning_rate": 0.0001768131347143954, "loss": 12.2236, "step": 8982 }, { "epoch": 0.48916025130519425, "grad_norm": 0.6495359491080023, "learning_rate": 0.00017680748815291662, "loss": 12.2845, "step": 8983 }, { "epoch": 0.48921470530177724, "grad_norm": 0.7043442632709124, "learning_rate": 0.00017680184099416995, "loss": 12.3416, "step": 8984 }, { "epoch": 0.48926915929836023, "grad_norm": 0.6821440893967539, "learning_rate": 0.0001767961932381993, "loss": 12.488, "step": 8985 }, { "epoch": 0.4893236132949433, "grad_norm": 0.7518808059202897, "learning_rate": 0.00017679054488504856, "loss": 12.4599, "step": 8986 }, { "epoch": 0.4893780672915263, "grad_norm": 0.6489067588764982, "learning_rate": 0.00017678489593476164, "loss": 12.3983, "step": 8987 }, { "epoch": 0.48943252128810927, "grad_norm": 0.5932349853729175, "learning_rate": 0.0001767792463873825, "loss": 12.3459, "step": 8988 }, { "epoch": 0.4894869752846923, "grad_norm": 0.6926889837411475, "learning_rate": 0.0001767735962429551, "loss": 12.4644, "step": 8989 }, { "epoch": 0.4895414292812753, "grad_norm": 0.6420778151456797, "learning_rate": 0.0001767679455015233, "loss": 12.3545, "step": 8990 }, { "epoch": 0.4895958832778583, "grad_norm": 0.6720821949354343, "learning_rate": 0.0001767622941631311, "loss": 12.4321, "step": 8991 }, { "epoch": 0.48965033727444135, "grad_norm": 0.6577540205055701, "learning_rate": 0.00017675664222782244, "loss": 12.503, "step": 8992 }, { "epoch": 0.48970479127102434, "grad_norm": 0.6245459109383175, "learning_rate": 0.00017675098969564124, "loss": 12.4008, "step": 8993 }, { "epoch": 0.48975924526760733, "grad_norm": 0.6799189419701791, "learning_rate": 0.0001767453365666315, "loss": 12.566, "step": 8994 }, { "epoch": 0.4898136992641904, "grad_norm": 0.6190799234397225, "learning_rate": 0.0001767396828408371, "loss": 12.4133, "step": 8995 }, { "epoch": 0.4898681532607734, "grad_norm": 0.5648051502723331, "learning_rate": 0.0001767340285183021, "loss": 12.3986, "step": 8996 }, { "epoch": 0.48992260725735637, "grad_norm": 0.7832200571204988, "learning_rate": 0.0001767283735990704, "loss": 12.4317, "step": 8997 }, { "epoch": 0.4899770612539394, "grad_norm": 0.5896019405323225, "learning_rate": 0.00017672271808318605, "loss": 12.3887, "step": 8998 }, { "epoch": 0.4900315152505224, "grad_norm": 0.5847485314578779, "learning_rate": 0.0001767170619706929, "loss": 12.2743, "step": 8999 }, { "epoch": 0.49008596924710546, "grad_norm": 0.6561473704564957, "learning_rate": 0.00017671140526163506, "loss": 12.3998, "step": 9000 }, { "epoch": 0.49014042324368845, "grad_norm": 0.6031748325909405, "learning_rate": 0.00017670574795605645, "loss": 12.3966, "step": 9001 }, { "epoch": 0.49019487724027144, "grad_norm": 0.5969040835414722, "learning_rate": 0.0001767000900540011, "loss": 12.4078, "step": 9002 }, { "epoch": 0.4902493312368545, "grad_norm": 0.7009410013108908, "learning_rate": 0.00017669443155551298, "loss": 12.3908, "step": 9003 }, { "epoch": 0.4903037852334375, "grad_norm": 0.6204813872425611, "learning_rate": 0.00017668877246063608, "loss": 12.3522, "step": 9004 }, { "epoch": 0.4903582392300205, "grad_norm": 0.6725773712069133, "learning_rate": 0.00017668311276941445, "loss": 12.4103, "step": 9005 }, { "epoch": 0.4904126932266035, "grad_norm": 0.5686997026451569, "learning_rate": 0.00017667745248189207, "loss": 12.1809, "step": 9006 }, { "epoch": 0.4904671472231865, "grad_norm": 0.6337705792607752, "learning_rate": 0.00017667179159811295, "loss": 12.3331, "step": 9007 }, { "epoch": 0.4905216012197695, "grad_norm": 0.6859319984556346, "learning_rate": 0.00017666613011812113, "loss": 12.4774, "step": 9008 }, { "epoch": 0.49057605521635256, "grad_norm": 0.68712970750493, "learning_rate": 0.0001766604680419606, "loss": 12.4661, "step": 9009 }, { "epoch": 0.49063050921293555, "grad_norm": 0.6337798565063828, "learning_rate": 0.00017665480536967546, "loss": 12.5433, "step": 9010 }, { "epoch": 0.49068496320951854, "grad_norm": 0.9691188287958652, "learning_rate": 0.00017664914210130966, "loss": 12.5172, "step": 9011 }, { "epoch": 0.4907394172061016, "grad_norm": 0.624849717001331, "learning_rate": 0.0001766434782369073, "loss": 12.4614, "step": 9012 }, { "epoch": 0.4907938712026846, "grad_norm": 0.6255963762613319, "learning_rate": 0.0001766378137765124, "loss": 12.4613, "step": 9013 }, { "epoch": 0.4908483251992676, "grad_norm": 0.6966420063072758, "learning_rate": 0.000176632148720169, "loss": 12.3903, "step": 9014 }, { "epoch": 0.4909027791958506, "grad_norm": 0.6181596567503763, "learning_rate": 0.00017662648306792118, "loss": 12.4234, "step": 9015 }, { "epoch": 0.4909572331924336, "grad_norm": 0.6472714093995697, "learning_rate": 0.00017662081681981296, "loss": 12.4579, "step": 9016 }, { "epoch": 0.4910116871890166, "grad_norm": 0.6307280492239105, "learning_rate": 0.0001766151499758884, "loss": 12.3809, "step": 9017 }, { "epoch": 0.49106614118559966, "grad_norm": 0.7145252730355572, "learning_rate": 0.0001766094825361916, "loss": 12.4365, "step": 9018 }, { "epoch": 0.49112059518218265, "grad_norm": 0.5779293889447368, "learning_rate": 0.0001766038145007666, "loss": 12.3208, "step": 9019 }, { "epoch": 0.49117504917876564, "grad_norm": 0.5970030202119183, "learning_rate": 0.00017659814586965753, "loss": 12.4748, "step": 9020 }, { "epoch": 0.4912295031753487, "grad_norm": 0.6713233594796585, "learning_rate": 0.00017659247664290843, "loss": 12.5403, "step": 9021 }, { "epoch": 0.4912839571719317, "grad_norm": 0.6389280094245742, "learning_rate": 0.00017658680682056336, "loss": 12.3417, "step": 9022 }, { "epoch": 0.4913384111685147, "grad_norm": 0.5997464523487829, "learning_rate": 0.00017658113640266646, "loss": 12.2971, "step": 9023 }, { "epoch": 0.4913928651650977, "grad_norm": 0.5998221515239658, "learning_rate": 0.00017657546538926176, "loss": 12.4103, "step": 9024 }, { "epoch": 0.4914473191616807, "grad_norm": 0.6621423715721438, "learning_rate": 0.00017656979378039345, "loss": 12.2766, "step": 9025 }, { "epoch": 0.4915017731582637, "grad_norm": 0.641780152243786, "learning_rate": 0.00017656412157610553, "loss": 12.4449, "step": 9026 }, { "epoch": 0.49155622715484676, "grad_norm": 0.630345320868909, "learning_rate": 0.00017655844877644222, "loss": 12.3306, "step": 9027 }, { "epoch": 0.49161068115142975, "grad_norm": 0.6483735385691813, "learning_rate": 0.00017655277538144754, "loss": 12.376, "step": 9028 }, { "epoch": 0.49166513514801274, "grad_norm": 0.6309651726102987, "learning_rate": 0.00017654710139116563, "loss": 12.5355, "step": 9029 }, { "epoch": 0.4917195891445958, "grad_norm": 0.600940677237466, "learning_rate": 0.00017654142680564064, "loss": 12.334, "step": 9030 }, { "epoch": 0.4917740431411788, "grad_norm": 0.5921764876537235, "learning_rate": 0.0001765357516249167, "loss": 12.2664, "step": 9031 }, { "epoch": 0.4918284971377618, "grad_norm": 0.544108968363809, "learning_rate": 0.0001765300758490379, "loss": 12.4143, "step": 9032 }, { "epoch": 0.4918829511343448, "grad_norm": 0.7068826009519629, "learning_rate": 0.00017652439947804838, "loss": 12.3461, "step": 9033 }, { "epoch": 0.4919374051309278, "grad_norm": 0.6469820503926543, "learning_rate": 0.00017651872251199229, "loss": 12.3202, "step": 9034 }, { "epoch": 0.49199185912751087, "grad_norm": 0.6601942119120693, "learning_rate": 0.0001765130449509138, "loss": 12.4268, "step": 9035 }, { "epoch": 0.49204631312409386, "grad_norm": 0.5950297306819458, "learning_rate": 0.00017650736679485706, "loss": 12.4843, "step": 9036 }, { "epoch": 0.49210076712067685, "grad_norm": 0.6617665756952725, "learning_rate": 0.00017650168804386618, "loss": 12.3563, "step": 9037 }, { "epoch": 0.4921552211172599, "grad_norm": 0.6127139296457449, "learning_rate": 0.00017649600869798534, "loss": 12.4384, "step": 9038 }, { "epoch": 0.4922096751138429, "grad_norm": 0.6596836507747746, "learning_rate": 0.00017649032875725873, "loss": 12.3869, "step": 9039 }, { "epoch": 0.4922641291104259, "grad_norm": 0.7322953833439002, "learning_rate": 0.00017648464822173047, "loss": 12.4327, "step": 9040 }, { "epoch": 0.49231858310700893, "grad_norm": 0.6495668511280798, "learning_rate": 0.0001764789670914448, "loss": 12.3549, "step": 9041 }, { "epoch": 0.4923730371035919, "grad_norm": 0.6118572627218264, "learning_rate": 0.00017647328536644582, "loss": 12.4796, "step": 9042 }, { "epoch": 0.4924274911001749, "grad_norm": 0.8118395035290029, "learning_rate": 0.00017646760304677775, "loss": 12.3732, "step": 9043 }, { "epoch": 0.49248194509675797, "grad_norm": 0.6024457061523186, "learning_rate": 0.0001764619201324848, "loss": 12.3939, "step": 9044 }, { "epoch": 0.49253639909334096, "grad_norm": 0.622607292398364, "learning_rate": 0.0001764562366236111, "loss": 12.4687, "step": 9045 }, { "epoch": 0.49259085308992395, "grad_norm": 0.6375419946487572, "learning_rate": 0.00017645055252020093, "loss": 12.4258, "step": 9046 }, { "epoch": 0.492645307086507, "grad_norm": 0.6396330427611137, "learning_rate": 0.0001764448678222984, "loss": 12.4937, "step": 9047 }, { "epoch": 0.49269976108309, "grad_norm": 0.6192244845607952, "learning_rate": 0.00017643918252994778, "loss": 12.1713, "step": 9048 }, { "epoch": 0.492754215079673, "grad_norm": 0.6223106593402868, "learning_rate": 0.00017643349664319327, "loss": 12.5063, "step": 9049 }, { "epoch": 0.49280866907625603, "grad_norm": 0.6008700229121132, "learning_rate": 0.00017642781016207902, "loss": 12.4324, "step": 9050 }, { "epoch": 0.492863123072839, "grad_norm": 0.6685584067925143, "learning_rate": 0.00017642212308664935, "loss": 12.4043, "step": 9051 }, { "epoch": 0.492917577069422, "grad_norm": 0.652765870070671, "learning_rate": 0.00017641643541694841, "loss": 12.2954, "step": 9052 }, { "epoch": 0.49297203106600507, "grad_norm": 0.5641445539183918, "learning_rate": 0.0001764107471530205, "loss": 12.2922, "step": 9053 }, { "epoch": 0.49302648506258806, "grad_norm": 0.7735158333144617, "learning_rate": 0.00017640505829490975, "loss": 12.4085, "step": 9054 }, { "epoch": 0.49308093905917105, "grad_norm": 0.7470430066736975, "learning_rate": 0.00017639936884266045, "loss": 12.349, "step": 9055 }, { "epoch": 0.4931353930557541, "grad_norm": 0.7206711098167883, "learning_rate": 0.00017639367879631687, "loss": 12.299, "step": 9056 }, { "epoch": 0.4931898470523371, "grad_norm": 0.7371347771651026, "learning_rate": 0.00017638798815592326, "loss": 12.4882, "step": 9057 }, { "epoch": 0.4932443010489201, "grad_norm": 0.595033333497349, "learning_rate": 0.0001763822969215238, "loss": 12.3871, "step": 9058 }, { "epoch": 0.49329875504550313, "grad_norm": 0.8054972406286517, "learning_rate": 0.0001763766050931628, "loss": 12.405, "step": 9059 }, { "epoch": 0.4933532090420861, "grad_norm": 0.6098101737336612, "learning_rate": 0.00017637091267088453, "loss": 12.4194, "step": 9060 }, { "epoch": 0.4934076630386691, "grad_norm": 0.838036559345574, "learning_rate": 0.00017636521965473323, "loss": 12.4134, "step": 9061 }, { "epoch": 0.49346211703525217, "grad_norm": 0.6159087356357568, "learning_rate": 0.00017635952604475318, "loss": 12.4425, "step": 9062 }, { "epoch": 0.49351657103183516, "grad_norm": 0.5772658512197129, "learning_rate": 0.0001763538318409886, "loss": 12.3848, "step": 9063 }, { "epoch": 0.49357102502841815, "grad_norm": 0.648307453696023, "learning_rate": 0.00017634813704348385, "loss": 12.4, "step": 9064 }, { "epoch": 0.4936254790250012, "grad_norm": 0.797999057121607, "learning_rate": 0.0001763424416522832, "loss": 12.4749, "step": 9065 }, { "epoch": 0.4936799330215842, "grad_norm": 0.6154663448292866, "learning_rate": 0.00017633674566743093, "loss": 12.4569, "step": 9066 }, { "epoch": 0.49373438701816724, "grad_norm": 0.6053841272700453, "learning_rate": 0.00017633104908897128, "loss": 12.4407, "step": 9067 }, { "epoch": 0.49378884101475023, "grad_norm": 0.6125434288709458, "learning_rate": 0.00017632535191694864, "loss": 12.3684, "step": 9068 }, { "epoch": 0.4938432950113332, "grad_norm": 0.5478148758765967, "learning_rate": 0.0001763196541514072, "loss": 12.466, "step": 9069 }, { "epoch": 0.4938977490079163, "grad_norm": 0.6767775722685464, "learning_rate": 0.00017631395579239138, "loss": 12.4683, "step": 9070 }, { "epoch": 0.49395220300449927, "grad_norm": 0.6565295047603227, "learning_rate": 0.00017630825683994546, "loss": 12.4402, "step": 9071 }, { "epoch": 0.49400665700108226, "grad_norm": 0.6186660536186829, "learning_rate": 0.0001763025572941137, "loss": 12.4184, "step": 9072 }, { "epoch": 0.4940611109976653, "grad_norm": 0.574512487793142, "learning_rate": 0.00017629685715494047, "loss": 12.3434, "step": 9073 }, { "epoch": 0.4941155649942483, "grad_norm": 0.5786501159465615, "learning_rate": 0.00017629115642247008, "loss": 12.2469, "step": 9074 }, { "epoch": 0.4941700189908313, "grad_norm": 0.6344422448863354, "learning_rate": 0.00017628545509674686, "loss": 12.3322, "step": 9075 }, { "epoch": 0.49422447298741434, "grad_norm": 0.5947827175109859, "learning_rate": 0.00017627975317781514, "loss": 12.3867, "step": 9076 }, { "epoch": 0.49427892698399734, "grad_norm": 0.7234156368573843, "learning_rate": 0.00017627405066571926, "loss": 12.4595, "step": 9077 }, { "epoch": 0.49433338098058033, "grad_norm": 0.5716608289086761, "learning_rate": 0.0001762683475605036, "loss": 12.3055, "step": 9078 }, { "epoch": 0.4943878349771634, "grad_norm": 0.6516898534652547, "learning_rate": 0.0001762626438622125, "loss": 12.2219, "step": 9079 }, { "epoch": 0.49444228897374637, "grad_norm": 0.5531591292155007, "learning_rate": 0.00017625693957089023, "loss": 12.2898, "step": 9080 }, { "epoch": 0.49449674297032936, "grad_norm": 0.6540053412369687, "learning_rate": 0.00017625123468658128, "loss": 12.3422, "step": 9081 }, { "epoch": 0.4945511969669124, "grad_norm": 0.5822114864652976, "learning_rate": 0.00017624552920932987, "loss": 12.3534, "step": 9082 }, { "epoch": 0.4946056509634954, "grad_norm": 0.647468952988309, "learning_rate": 0.00017623982313918048, "loss": 12.2724, "step": 9083 }, { "epoch": 0.4946601049600784, "grad_norm": 0.6538053463120066, "learning_rate": 0.00017623411647617744, "loss": 12.4656, "step": 9084 }, { "epoch": 0.49471455895666144, "grad_norm": 0.5912115377511528, "learning_rate": 0.00017622840922036512, "loss": 12.3684, "step": 9085 }, { "epoch": 0.49476901295324444, "grad_norm": 0.5638563271249786, "learning_rate": 0.00017622270137178793, "loss": 12.2539, "step": 9086 }, { "epoch": 0.49482346694982743, "grad_norm": 0.6885206221172212, "learning_rate": 0.00017621699293049017, "loss": 12.5263, "step": 9087 }, { "epoch": 0.4948779209464105, "grad_norm": 0.6695687649039862, "learning_rate": 0.00017621128389651633, "loss": 12.4807, "step": 9088 }, { "epoch": 0.49493237494299347, "grad_norm": 0.6267441366242752, "learning_rate": 0.00017620557426991078, "loss": 12.4484, "step": 9089 }, { "epoch": 0.49498682893957646, "grad_norm": 0.7064926132108872, "learning_rate": 0.00017619986405071787, "loss": 12.4488, "step": 9090 }, { "epoch": 0.4950412829361595, "grad_norm": 0.56968390851137, "learning_rate": 0.00017619415323898206, "loss": 12.3079, "step": 9091 }, { "epoch": 0.4950957369327425, "grad_norm": 0.6304071941633153, "learning_rate": 0.00017618844183474774, "loss": 12.512, "step": 9092 }, { "epoch": 0.4951501909293255, "grad_norm": 0.6627967334696621, "learning_rate": 0.0001761827298380593, "loss": 12.444, "step": 9093 }, { "epoch": 0.49520464492590854, "grad_norm": 0.6285211262951765, "learning_rate": 0.00017617701724896117, "loss": 12.3301, "step": 9094 }, { "epoch": 0.49525909892249154, "grad_norm": 0.6608859614262765, "learning_rate": 0.00017617130406749783, "loss": 12.4028, "step": 9095 }, { "epoch": 0.49531355291907453, "grad_norm": 0.6317947756451693, "learning_rate": 0.0001761655902937136, "loss": 12.4618, "step": 9096 }, { "epoch": 0.4953680069156576, "grad_norm": 0.7670429802659895, "learning_rate": 0.000176159875927653, "loss": 12.4435, "step": 9097 }, { "epoch": 0.49542246091224057, "grad_norm": 0.7483558764694811, "learning_rate": 0.00017615416096936043, "loss": 12.4271, "step": 9098 }, { "epoch": 0.49547691490882356, "grad_norm": 0.727556789374051, "learning_rate": 0.00017614844541888035, "loss": 12.2108, "step": 9099 }, { "epoch": 0.4955313689054066, "grad_norm": 0.9103992034491821, "learning_rate": 0.00017614272927625715, "loss": 12.4841, "step": 9100 }, { "epoch": 0.4955858229019896, "grad_norm": 0.593670948636598, "learning_rate": 0.0001761370125415353, "loss": 12.4133, "step": 9101 }, { "epoch": 0.49564027689857265, "grad_norm": 0.7023290626503857, "learning_rate": 0.00017613129521475932, "loss": 12.3761, "step": 9102 }, { "epoch": 0.49569473089515564, "grad_norm": 0.6374200651285543, "learning_rate": 0.0001761255772959736, "loss": 12.4001, "step": 9103 }, { "epoch": 0.49574918489173864, "grad_norm": 0.7084811765004031, "learning_rate": 0.00017611985878522265, "loss": 12.4626, "step": 9104 }, { "epoch": 0.4958036388883217, "grad_norm": 0.7987390974405981, "learning_rate": 0.00017611413968255087, "loss": 12.3393, "step": 9105 }, { "epoch": 0.4958580928849047, "grad_norm": 0.6722889300823013, "learning_rate": 0.00017610841998800283, "loss": 12.3658, "step": 9106 }, { "epoch": 0.49591254688148767, "grad_norm": 0.7000109463103993, "learning_rate": 0.0001761026997016229, "loss": 12.4182, "step": 9107 }, { "epoch": 0.4959670008780707, "grad_norm": 0.6721741169406789, "learning_rate": 0.00017609697882345565, "loss": 12.5067, "step": 9108 }, { "epoch": 0.4960214548746537, "grad_norm": 0.7401697808763813, "learning_rate": 0.0001760912573535455, "loss": 12.5111, "step": 9109 }, { "epoch": 0.4960759088712367, "grad_norm": 0.718441618807066, "learning_rate": 0.000176085535291937, "loss": 12.3193, "step": 9110 }, { "epoch": 0.49613036286781975, "grad_norm": 0.6037022793016471, "learning_rate": 0.0001760798126386746, "loss": 12.3863, "step": 9111 }, { "epoch": 0.49618481686440274, "grad_norm": 0.6269927356974456, "learning_rate": 0.00017607408939380282, "loss": 12.3418, "step": 9112 }, { "epoch": 0.49623927086098574, "grad_norm": 0.6458263636719211, "learning_rate": 0.00017606836555736618, "loss": 12.4122, "step": 9113 }, { "epoch": 0.4962937248575688, "grad_norm": 0.6077674815766129, "learning_rate": 0.0001760626411294092, "loss": 12.3997, "step": 9114 }, { "epoch": 0.4963481788541518, "grad_norm": 0.6827924582993539, "learning_rate": 0.00017605691610997633, "loss": 12.5016, "step": 9115 }, { "epoch": 0.49640263285073477, "grad_norm": 1.061163683682852, "learning_rate": 0.0001760511904991121, "loss": 12.4663, "step": 9116 }, { "epoch": 0.4964570868473178, "grad_norm": 0.6579332992213393, "learning_rate": 0.0001760454642968611, "loss": 12.4702, "step": 9117 }, { "epoch": 0.4965115408439008, "grad_norm": 0.6817706542101409, "learning_rate": 0.00017603973750326778, "loss": 12.4496, "step": 9118 }, { "epoch": 0.4965659948404838, "grad_norm": 0.6617212495815381, "learning_rate": 0.00017603401011837675, "loss": 12.4429, "step": 9119 }, { "epoch": 0.49662044883706685, "grad_norm": 0.7923159528782391, "learning_rate": 0.00017602828214223249, "loss": 12.459, "step": 9120 }, { "epoch": 0.49667490283364985, "grad_norm": 0.6616900559517679, "learning_rate": 0.00017602255357487953, "loss": 12.4547, "step": 9121 }, { "epoch": 0.49672935683023284, "grad_norm": 0.6740391179889882, "learning_rate": 0.00017601682441636246, "loss": 12.3776, "step": 9122 }, { "epoch": 0.4967838108268159, "grad_norm": 0.6543330850905006, "learning_rate": 0.00017601109466672585, "loss": 12.3831, "step": 9123 }, { "epoch": 0.4968382648233989, "grad_norm": 0.6138310059376589, "learning_rate": 0.00017600536432601417, "loss": 12.4033, "step": 9124 }, { "epoch": 0.49689271881998187, "grad_norm": 0.6241853205466411, "learning_rate": 0.00017599963339427207, "loss": 12.1249, "step": 9125 }, { "epoch": 0.4969471728165649, "grad_norm": 0.654784791879004, "learning_rate": 0.00017599390187154405, "loss": 12.34, "step": 9126 }, { "epoch": 0.4970016268131479, "grad_norm": 0.6626102053470619, "learning_rate": 0.00017598816975787475, "loss": 12.524, "step": 9127 }, { "epoch": 0.4970560808097309, "grad_norm": 0.7690448781237839, "learning_rate": 0.00017598243705330865, "loss": 12.3843, "step": 9128 }, { "epoch": 0.49711053480631395, "grad_norm": 0.5921755860108986, "learning_rate": 0.0001759767037578904, "loss": 12.3137, "step": 9129 }, { "epoch": 0.49716498880289695, "grad_norm": 0.6724512511904382, "learning_rate": 0.00017597096987166454, "loss": 12.4212, "step": 9130 }, { "epoch": 0.49721944279947994, "grad_norm": 0.6060506869183341, "learning_rate": 0.0001759652353946757, "loss": 12.4006, "step": 9131 }, { "epoch": 0.497273896796063, "grad_norm": 0.5649231816877845, "learning_rate": 0.00017595950032696844, "loss": 12.3956, "step": 9132 }, { "epoch": 0.497328350792646, "grad_norm": 0.7233811405518149, "learning_rate": 0.00017595376466858736, "loss": 12.4503, "step": 9133 }, { "epoch": 0.497382804789229, "grad_norm": 0.6628960967687958, "learning_rate": 0.0001759480284195771, "loss": 12.4771, "step": 9134 }, { "epoch": 0.497437258785812, "grad_norm": 0.68131829983016, "learning_rate": 0.00017594229157998222, "loss": 12.3811, "step": 9135 }, { "epoch": 0.497491712782395, "grad_norm": 0.6134912800656412, "learning_rate": 0.0001759365541498473, "loss": 12.3098, "step": 9136 }, { "epoch": 0.49754616677897806, "grad_norm": 0.6714589554358765, "learning_rate": 0.00017593081612921705, "loss": 12.411, "step": 9137 }, { "epoch": 0.49760062077556105, "grad_norm": 0.6379318436445025, "learning_rate": 0.00017592507751813601, "loss": 12.4893, "step": 9138 }, { "epoch": 0.49765507477214405, "grad_norm": 0.5473509143522587, "learning_rate": 0.00017591933831664886, "loss": 12.2698, "step": 9139 }, { "epoch": 0.4977095287687271, "grad_norm": 0.6009501607651135, "learning_rate": 0.0001759135985248002, "loss": 12.2894, "step": 9140 }, { "epoch": 0.4977639827653101, "grad_norm": 0.6799834772024416, "learning_rate": 0.00017590785814263466, "loss": 12.4481, "step": 9141 }, { "epoch": 0.4978184367618931, "grad_norm": 0.6630782242113525, "learning_rate": 0.00017590211717019688, "loss": 12.3452, "step": 9142 }, { "epoch": 0.4978728907584761, "grad_norm": 0.7008834707717136, "learning_rate": 0.00017589637560753153, "loss": 12.4784, "step": 9143 }, { "epoch": 0.4979273447550591, "grad_norm": 0.6515565727629259, "learning_rate": 0.0001758906334546832, "loss": 12.3688, "step": 9144 }, { "epoch": 0.4979817987516421, "grad_norm": 0.626932835931393, "learning_rate": 0.0001758848907116966, "loss": 12.3809, "step": 9145 }, { "epoch": 0.49803625274822516, "grad_norm": 0.6795840967085799, "learning_rate": 0.00017587914737861638, "loss": 12.5972, "step": 9146 }, { "epoch": 0.49809070674480815, "grad_norm": 0.6063465269047075, "learning_rate": 0.00017587340345548717, "loss": 12.4608, "step": 9147 }, { "epoch": 0.49814516074139115, "grad_norm": 0.5828488884039613, "learning_rate": 0.00017586765894235363, "loss": 12.3942, "step": 9148 }, { "epoch": 0.4981996147379742, "grad_norm": 0.5762418528638724, "learning_rate": 0.0001758619138392605, "loss": 12.3874, "step": 9149 }, { "epoch": 0.4982540687345572, "grad_norm": 0.7426995154173507, "learning_rate": 0.00017585616814625235, "loss": 12.3388, "step": 9150 }, { "epoch": 0.4983085227311402, "grad_norm": 0.6992365012527693, "learning_rate": 0.00017585042186337395, "loss": 12.4163, "step": 9151 }, { "epoch": 0.49836297672772323, "grad_norm": 0.6667318576146535, "learning_rate": 0.00017584467499066994, "loss": 12.486, "step": 9152 }, { "epoch": 0.4984174307243062, "grad_norm": 0.607362379750229, "learning_rate": 0.000175838927528185, "loss": 12.437, "step": 9153 }, { "epoch": 0.4984718847208892, "grad_norm": 0.6540776468373742, "learning_rate": 0.00017583317947596387, "loss": 12.5109, "step": 9154 }, { "epoch": 0.49852633871747226, "grad_norm": 0.6659725463738717, "learning_rate": 0.00017582743083405122, "loss": 12.3915, "step": 9155 }, { "epoch": 0.49858079271405525, "grad_norm": 0.6255060583128188, "learning_rate": 0.00017582168160249173, "loss": 12.2338, "step": 9156 }, { "epoch": 0.49863524671063825, "grad_norm": 0.5953756443739128, "learning_rate": 0.00017581593178133016, "loss": 12.3541, "step": 9157 }, { "epoch": 0.4986897007072213, "grad_norm": 0.6483309366016954, "learning_rate": 0.00017581018137061114, "loss": 12.2435, "step": 9158 }, { "epoch": 0.4987441547038043, "grad_norm": 0.5991782175764987, "learning_rate": 0.00017580443037037947, "loss": 12.4159, "step": 9159 }, { "epoch": 0.4987986087003873, "grad_norm": 0.6104988237916549, "learning_rate": 0.00017579867878067984, "loss": 12.4037, "step": 9160 }, { "epoch": 0.49885306269697033, "grad_norm": 0.6523693446003387, "learning_rate": 0.00017579292660155696, "loss": 12.4619, "step": 9161 }, { "epoch": 0.4989075166935533, "grad_norm": 0.5850126153423292, "learning_rate": 0.0001757871738330556, "loss": 12.3452, "step": 9162 }, { "epoch": 0.4989619706901363, "grad_norm": 0.6481344573786663, "learning_rate": 0.00017578142047522044, "loss": 12.4467, "step": 9163 }, { "epoch": 0.49901642468671936, "grad_norm": 0.6272323440376265, "learning_rate": 0.00017577566652809626, "loss": 12.3522, "step": 9164 }, { "epoch": 0.49907087868330235, "grad_norm": 0.6205903085328062, "learning_rate": 0.00017576991199172776, "loss": 12.5213, "step": 9165 }, { "epoch": 0.49912533267988535, "grad_norm": 0.6477408614756291, "learning_rate": 0.00017576415686615976, "loss": 12.4615, "step": 9166 }, { "epoch": 0.4991797866764684, "grad_norm": 0.7118857219939702, "learning_rate": 0.00017575840115143692, "loss": 12.5615, "step": 9167 }, { "epoch": 0.4992342406730514, "grad_norm": 0.5831572365350796, "learning_rate": 0.00017575264484760407, "loss": 12.4012, "step": 9168 }, { "epoch": 0.49928869466963444, "grad_norm": 0.6296968500770712, "learning_rate": 0.00017574688795470595, "loss": 12.5436, "step": 9169 }, { "epoch": 0.49934314866621743, "grad_norm": 0.6197900937902061, "learning_rate": 0.00017574113047278734, "loss": 12.3763, "step": 9170 }, { "epoch": 0.4993976026628004, "grad_norm": 0.695940103894005, "learning_rate": 0.00017573537240189298, "loss": 12.3472, "step": 9171 }, { "epoch": 0.49945205665938347, "grad_norm": 0.6499827974246789, "learning_rate": 0.00017572961374206768, "loss": 12.4526, "step": 9172 }, { "epoch": 0.49950651065596646, "grad_norm": 0.6880144057586078, "learning_rate": 0.0001757238544933562, "loss": 12.4985, "step": 9173 }, { "epoch": 0.49956096465254946, "grad_norm": 0.7149621700845834, "learning_rate": 0.00017571809465580332, "loss": 12.4272, "step": 9174 }, { "epoch": 0.4996154186491325, "grad_norm": 0.6413703173304228, "learning_rate": 0.00017571233422945386, "loss": 12.4654, "step": 9175 }, { "epoch": 0.4996698726457155, "grad_norm": 0.6444690707512226, "learning_rate": 0.00017570657321435256, "loss": 12.3721, "step": 9176 }, { "epoch": 0.4997243266422985, "grad_norm": 0.6632077405948433, "learning_rate": 0.00017570081161054425, "loss": 12.4597, "step": 9177 }, { "epoch": 0.49977878063888154, "grad_norm": 0.6920115258147176, "learning_rate": 0.00017569504941807376, "loss": 12.4457, "step": 9178 }, { "epoch": 0.49983323463546453, "grad_norm": 0.6958336434558215, "learning_rate": 0.00017568928663698588, "loss": 12.4001, "step": 9179 }, { "epoch": 0.4998876886320475, "grad_norm": 0.6470874095176246, "learning_rate": 0.00017568352326732538, "loss": 12.3732, "step": 9180 }, { "epoch": 0.49994214262863057, "grad_norm": 0.6266020668283614, "learning_rate": 0.00017567775930913714, "loss": 12.3153, "step": 9181 }, { "epoch": 0.49999659662521356, "grad_norm": 0.6116119982186802, "learning_rate": 0.00017567199476246591, "loss": 12.4759, "step": 9182 }, { "epoch": 0.5000510506217966, "grad_norm": 0.6341883591867439, "learning_rate": 0.00017566622962735662, "loss": 12.4495, "step": 9183 }, { "epoch": 0.5001055046183795, "grad_norm": 0.5901644660053987, "learning_rate": 0.00017566046390385398, "loss": 12.433, "step": 9184 }, { "epoch": 0.5001599586149627, "grad_norm": 0.6640999529367435, "learning_rate": 0.00017565469759200293, "loss": 12.2458, "step": 9185 }, { "epoch": 0.5002144126115456, "grad_norm": 0.6842580247261328, "learning_rate": 0.00017564893069184825, "loss": 12.5172, "step": 9186 }, { "epoch": 0.5002688666081286, "grad_norm": 0.5644833573755198, "learning_rate": 0.00017564316320343477, "loss": 12.3548, "step": 9187 }, { "epoch": 0.5003233206047116, "grad_norm": 0.6150274179512815, "learning_rate": 0.0001756373951268074, "loss": 12.2932, "step": 9188 }, { "epoch": 0.5003777746012946, "grad_norm": 0.5598091040693869, "learning_rate": 0.00017563162646201094, "loss": 12.4621, "step": 9189 }, { "epoch": 0.5004322285978776, "grad_norm": 0.6553818557013275, "learning_rate": 0.0001756258572090903, "loss": 12.5473, "step": 9190 }, { "epoch": 0.5004866825944607, "grad_norm": 0.8404702869084849, "learning_rate": 0.0001756200873680903, "loss": 12.4445, "step": 9191 }, { "epoch": 0.5005411365910437, "grad_norm": 0.5642230661996501, "learning_rate": 0.0001756143169390558, "loss": 12.4559, "step": 9192 }, { "epoch": 0.5005955905876267, "grad_norm": 0.6603679295883715, "learning_rate": 0.0001756085459220317, "loss": 12.4716, "step": 9193 }, { "epoch": 0.5006500445842097, "grad_norm": 0.7212196764091794, "learning_rate": 0.00017560277431706288, "loss": 12.5586, "step": 9194 }, { "epoch": 0.5007044985807927, "grad_norm": 0.6153404274085386, "learning_rate": 0.0001755970021241942, "loss": 12.3659, "step": 9195 }, { "epoch": 0.5007589525773757, "grad_norm": 0.7446022353173876, "learning_rate": 0.00017559122934347055, "loss": 12.5325, "step": 9196 }, { "epoch": 0.5008134065739588, "grad_norm": 0.6822957531520627, "learning_rate": 0.0001755854559749368, "loss": 12.284, "step": 9197 }, { "epoch": 0.5008678605705418, "grad_norm": 0.5839316572008065, "learning_rate": 0.00017557968201863792, "loss": 12.4315, "step": 9198 }, { "epoch": 0.5009223145671248, "grad_norm": 0.5920598310314543, "learning_rate": 0.00017557390747461872, "loss": 12.4369, "step": 9199 }, { "epoch": 0.5009767685637078, "grad_norm": 0.5746963240067243, "learning_rate": 0.0001755681323429242, "loss": 12.4049, "step": 9200 }, { "epoch": 0.5010312225602908, "grad_norm": 0.6387688032969914, "learning_rate": 0.00017556235662359915, "loss": 12.3825, "step": 9201 }, { "epoch": 0.5010856765568737, "grad_norm": 0.6220273180989577, "learning_rate": 0.00017555658031668853, "loss": 12.3686, "step": 9202 }, { "epoch": 0.5011401305534569, "grad_norm": 0.5955115588017998, "learning_rate": 0.0001755508034222373, "loss": 12.4406, "step": 9203 }, { "epoch": 0.5011945845500398, "grad_norm": 0.6381359428691479, "learning_rate": 0.00017554502594029038, "loss": 12.3493, "step": 9204 }, { "epoch": 0.5012490385466228, "grad_norm": 0.6268485567466849, "learning_rate": 0.00017553924787089262, "loss": 12.3149, "step": 9205 }, { "epoch": 0.5013034925432058, "grad_norm": 0.5994229081680599, "learning_rate": 0.000175533469214089, "loss": 12.3849, "step": 9206 }, { "epoch": 0.5013579465397888, "grad_norm": 0.6879692862918647, "learning_rate": 0.0001755276899699245, "loss": 12.3399, "step": 9207 }, { "epoch": 0.5014124005363718, "grad_norm": 0.6485348858208008, "learning_rate": 0.00017552191013844398, "loss": 12.37, "step": 9208 }, { "epoch": 0.5014668545329549, "grad_norm": 0.6290445813308777, "learning_rate": 0.00017551612971969247, "loss": 12.4224, "step": 9209 }, { "epoch": 0.5015213085295379, "grad_norm": 0.7839468838039373, "learning_rate": 0.0001755103487137148, "loss": 12.5299, "step": 9210 }, { "epoch": 0.5015757625261209, "grad_norm": 0.6271535640482853, "learning_rate": 0.00017550456712055605, "loss": 12.3695, "step": 9211 }, { "epoch": 0.5016302165227039, "grad_norm": 0.6818818059207238, "learning_rate": 0.00017549878494026112, "loss": 12.2653, "step": 9212 }, { "epoch": 0.5016846705192869, "grad_norm": 0.7178157128174575, "learning_rate": 0.00017549300217287495, "loss": 12.5114, "step": 9213 }, { "epoch": 0.5017391245158699, "grad_norm": 0.6376885520060535, "learning_rate": 0.00017548721881844255, "loss": 12.3963, "step": 9214 }, { "epoch": 0.501793578512453, "grad_norm": 0.7370977772095955, "learning_rate": 0.0001754814348770089, "loss": 12.5435, "step": 9215 }, { "epoch": 0.501848032509036, "grad_norm": 0.636511787689217, "learning_rate": 0.00017547565034861892, "loss": 12.396, "step": 9216 }, { "epoch": 0.501902486505619, "grad_norm": 0.8710913375149281, "learning_rate": 0.00017546986523331762, "loss": 12.4173, "step": 9217 }, { "epoch": 0.501956940502202, "grad_norm": 0.6260169853757912, "learning_rate": 0.00017546407953115002, "loss": 12.455, "step": 9218 }, { "epoch": 0.502011394498785, "grad_norm": 0.6306583641898258, "learning_rate": 0.00017545829324216108, "loss": 12.4029, "step": 9219 }, { "epoch": 0.5020658484953681, "grad_norm": 0.8238270270686283, "learning_rate": 0.0001754525063663958, "loss": 12.5189, "step": 9220 }, { "epoch": 0.502120302491951, "grad_norm": 0.5536911448623411, "learning_rate": 0.00017544671890389914, "loss": 12.3506, "step": 9221 }, { "epoch": 0.502174756488534, "grad_norm": 0.6744248023901999, "learning_rate": 0.00017544093085471616, "loss": 12.4723, "step": 9222 }, { "epoch": 0.502229210485117, "grad_norm": 0.6133959900954926, "learning_rate": 0.00017543514221889187, "loss": 12.4893, "step": 9223 }, { "epoch": 0.5022836644817, "grad_norm": 0.5929697847285679, "learning_rate": 0.00017542935299647122, "loss": 12.4983, "step": 9224 }, { "epoch": 0.502338118478283, "grad_norm": 0.6482710041361429, "learning_rate": 0.00017542356318749934, "loss": 12.287, "step": 9225 }, { "epoch": 0.5023925724748661, "grad_norm": 0.6812798436350104, "learning_rate": 0.0001754177727920211, "loss": 12.422, "step": 9226 }, { "epoch": 0.5024470264714491, "grad_norm": 0.5734535128505137, "learning_rate": 0.00017541198181008169, "loss": 12.3762, "step": 9227 }, { "epoch": 0.5025014804680321, "grad_norm": 0.7298704517473773, "learning_rate": 0.000175406190241726, "loss": 12.3909, "step": 9228 }, { "epoch": 0.5025559344646151, "grad_norm": 0.6211270568866112, "learning_rate": 0.0001754003980869992, "loss": 12.514, "step": 9229 }, { "epoch": 0.5026103884611981, "grad_norm": 0.6538190401861905, "learning_rate": 0.00017539460534594622, "loss": 12.4927, "step": 9230 }, { "epoch": 0.5026648424577811, "grad_norm": 0.649123256226189, "learning_rate": 0.00017538881201861214, "loss": 12.4245, "step": 9231 }, { "epoch": 0.5027192964543642, "grad_norm": 0.5442248840080184, "learning_rate": 0.00017538301810504203, "loss": 12.3251, "step": 9232 }, { "epoch": 0.5027737504509472, "grad_norm": 0.5847750120836829, "learning_rate": 0.0001753772236052809, "loss": 12.4215, "step": 9233 }, { "epoch": 0.5028282044475302, "grad_norm": 0.6489129014356729, "learning_rate": 0.00017537142851937386, "loss": 12.4467, "step": 9234 }, { "epoch": 0.5028826584441132, "grad_norm": 0.6464215828254797, "learning_rate": 0.00017536563284736593, "loss": 12.357, "step": 9235 }, { "epoch": 0.5029371124406962, "grad_norm": 0.5998675678204343, "learning_rate": 0.00017535983658930224, "loss": 12.4077, "step": 9236 }, { "epoch": 0.5029915664372792, "grad_norm": 0.6711699450059563, "learning_rate": 0.0001753540397452278, "loss": 12.4252, "step": 9237 }, { "epoch": 0.5030460204338623, "grad_norm": 0.6565266462253379, "learning_rate": 0.00017534824231518772, "loss": 12.5112, "step": 9238 }, { "epoch": 0.5031004744304453, "grad_norm": 0.7106207839496469, "learning_rate": 0.00017534244429922704, "loss": 12.3185, "step": 9239 }, { "epoch": 0.5031549284270282, "grad_norm": 0.6746786233771818, "learning_rate": 0.00017533664569739093, "loss": 12.4076, "step": 9240 }, { "epoch": 0.5032093824236112, "grad_norm": 0.7287877456392946, "learning_rate": 0.00017533084650972437, "loss": 12.4355, "step": 9241 }, { "epoch": 0.5032638364201942, "grad_norm": 0.7183288979000483, "learning_rate": 0.00017532504673627256, "loss": 12.3689, "step": 9242 }, { "epoch": 0.5033182904167772, "grad_norm": 0.5913008733418407, "learning_rate": 0.00017531924637708053, "loss": 12.3267, "step": 9243 }, { "epoch": 0.5033727444133603, "grad_norm": 0.7399657849568171, "learning_rate": 0.0001753134454321934, "loss": 12.3304, "step": 9244 }, { "epoch": 0.5034271984099433, "grad_norm": 0.6330603467608257, "learning_rate": 0.00017530764390165634, "loss": 12.3574, "step": 9245 }, { "epoch": 0.5034816524065263, "grad_norm": 0.6249736543216847, "learning_rate": 0.0001753018417855144, "loss": 12.4009, "step": 9246 }, { "epoch": 0.5035361064031093, "grad_norm": 0.6501824218490548, "learning_rate": 0.00017529603908381268, "loss": 12.4118, "step": 9247 }, { "epoch": 0.5035905603996923, "grad_norm": 0.5848558170314551, "learning_rate": 0.00017529023579659634, "loss": 12.3934, "step": 9248 }, { "epoch": 0.5036450143962753, "grad_norm": 0.6547297192921312, "learning_rate": 0.00017528443192391048, "loss": 12.4775, "step": 9249 }, { "epoch": 0.5036994683928584, "grad_norm": 0.9148181678227723, "learning_rate": 0.00017527862746580028, "loss": 12.502, "step": 9250 }, { "epoch": 0.5037539223894414, "grad_norm": 0.6406385901042989, "learning_rate": 0.00017527282242231083, "loss": 12.4692, "step": 9251 }, { "epoch": 0.5038083763860244, "grad_norm": 0.639199761303182, "learning_rate": 0.0001752670167934873, "loss": 12.4294, "step": 9252 }, { "epoch": 0.5038628303826074, "grad_norm": 0.6108791181730224, "learning_rate": 0.00017526121057937486, "loss": 12.566, "step": 9253 }, { "epoch": 0.5039172843791904, "grad_norm": 0.6462775981127912, "learning_rate": 0.00017525540378001856, "loss": 12.4095, "step": 9254 }, { "epoch": 0.5039717383757735, "grad_norm": 0.6117529957039444, "learning_rate": 0.00017524959639546368, "loss": 12.3417, "step": 9255 }, { "epoch": 0.5040261923723565, "grad_norm": 0.6653108100707444, "learning_rate": 0.0001752437884257553, "loss": 12.5033, "step": 9256 }, { "epoch": 0.5040806463689395, "grad_norm": 0.5761033208886187, "learning_rate": 0.00017523797987093858, "loss": 12.3672, "step": 9257 }, { "epoch": 0.5041351003655224, "grad_norm": 0.6018386004957551, "learning_rate": 0.00017523217073105873, "loss": 12.2592, "step": 9258 }, { "epoch": 0.5041895543621054, "grad_norm": 0.6977867384115372, "learning_rate": 0.0001752263610061609, "loss": 12.5126, "step": 9259 }, { "epoch": 0.5042440083586884, "grad_norm": 0.6070018997053899, "learning_rate": 0.00017522055069629027, "loss": 12.4393, "step": 9260 }, { "epoch": 0.5042984623552715, "grad_norm": 0.6458477465628019, "learning_rate": 0.00017521473980149202, "loss": 12.4836, "step": 9261 }, { "epoch": 0.5043529163518545, "grad_norm": 0.5913666593119621, "learning_rate": 0.00017520892832181134, "loss": 12.3034, "step": 9262 }, { "epoch": 0.5044073703484375, "grad_norm": 0.6387974436651321, "learning_rate": 0.00017520311625729345, "loss": 12.4383, "step": 9263 }, { "epoch": 0.5044618243450205, "grad_norm": 0.6663629819407951, "learning_rate": 0.00017519730360798346, "loss": 12.3579, "step": 9264 }, { "epoch": 0.5045162783416035, "grad_norm": 0.6743783218918941, "learning_rate": 0.00017519149037392668, "loss": 12.3556, "step": 9265 }, { "epoch": 0.5045707323381865, "grad_norm": 0.5934684892792085, "learning_rate": 0.0001751856765551682, "loss": 12.6217, "step": 9266 }, { "epoch": 0.5046251863347696, "grad_norm": 0.6542403394106991, "learning_rate": 0.00017517986215175336, "loss": 12.4348, "step": 9267 }, { "epoch": 0.5046796403313526, "grad_norm": 0.6140821063343879, "learning_rate": 0.00017517404716372727, "loss": 12.2976, "step": 9268 }, { "epoch": 0.5047340943279356, "grad_norm": 0.6302783978472125, "learning_rate": 0.0001751682315911352, "loss": 12.3735, "step": 9269 }, { "epoch": 0.5047885483245186, "grad_norm": 0.617713894873678, "learning_rate": 0.00017516241543402233, "loss": 12.4546, "step": 9270 }, { "epoch": 0.5048430023211016, "grad_norm": 0.6316766316871215, "learning_rate": 0.00017515659869243391, "loss": 12.3908, "step": 9271 }, { "epoch": 0.5048974563176846, "grad_norm": 0.6274417258446252, "learning_rate": 0.0001751507813664152, "loss": 12.4156, "step": 9272 }, { "epoch": 0.5049519103142677, "grad_norm": 0.6180856210866346, "learning_rate": 0.00017514496345601144, "loss": 12.4906, "step": 9273 }, { "epoch": 0.5050063643108507, "grad_norm": 0.6618825336368301, "learning_rate": 0.00017513914496126778, "loss": 12.4492, "step": 9274 }, { "epoch": 0.5050608183074337, "grad_norm": 0.6346775520196072, "learning_rate": 0.00017513332588222955, "loss": 12.4255, "step": 9275 }, { "epoch": 0.5051152723040166, "grad_norm": 0.5713866022890768, "learning_rate": 0.00017512750621894197, "loss": 12.3917, "step": 9276 }, { "epoch": 0.5051697263005996, "grad_norm": 0.6644568918871256, "learning_rate": 0.0001751216859714503, "loss": 12.5056, "step": 9277 }, { "epoch": 0.5052241802971826, "grad_norm": 0.6559421524036552, "learning_rate": 0.00017511586513979985, "loss": 12.4277, "step": 9278 }, { "epoch": 0.5052786342937657, "grad_norm": 0.5870845703090499, "learning_rate": 0.00017511004372403578, "loss": 12.5096, "step": 9279 }, { "epoch": 0.5053330882903487, "grad_norm": 0.706795464340835, "learning_rate": 0.00017510422172420343, "loss": 12.3108, "step": 9280 }, { "epoch": 0.5053875422869317, "grad_norm": 0.5700640206283255, "learning_rate": 0.00017509839914034808, "loss": 12.2902, "step": 9281 }, { "epoch": 0.5054419962835147, "grad_norm": 0.6128756126723002, "learning_rate": 0.00017509257597251497, "loss": 12.4242, "step": 9282 }, { "epoch": 0.5054964502800977, "grad_norm": 0.6653571480654208, "learning_rate": 0.00017508675222074936, "loss": 12.4248, "step": 9283 }, { "epoch": 0.5055509042766807, "grad_norm": 0.6167569171939026, "learning_rate": 0.00017508092788509662, "loss": 12.4268, "step": 9284 }, { "epoch": 0.5056053582732638, "grad_norm": 0.6094727507670129, "learning_rate": 0.00017507510296560196, "loss": 12.4102, "step": 9285 }, { "epoch": 0.5056598122698468, "grad_norm": 0.6261696978568257, "learning_rate": 0.00017506927746231075, "loss": 12.3727, "step": 9286 }, { "epoch": 0.5057142662664298, "grad_norm": 0.5853889054617658, "learning_rate": 0.00017506345137526823, "loss": 12.2828, "step": 9287 }, { "epoch": 0.5057687202630128, "grad_norm": 0.6989389592903248, "learning_rate": 0.00017505762470451972, "loss": 12.5096, "step": 9288 }, { "epoch": 0.5058231742595958, "grad_norm": 0.8410187354062965, "learning_rate": 0.00017505179745011055, "loss": 12.4025, "step": 9289 }, { "epoch": 0.5058776282561789, "grad_norm": 0.6356104259521266, "learning_rate": 0.000175045969612086, "loss": 12.4196, "step": 9290 }, { "epoch": 0.5059320822527619, "grad_norm": 0.8491671153494073, "learning_rate": 0.0001750401411904914, "loss": 12.3207, "step": 9291 }, { "epoch": 0.5059865362493449, "grad_norm": 0.6281540370395706, "learning_rate": 0.00017503431218537205, "loss": 12.4043, "step": 9292 }, { "epoch": 0.5060409902459279, "grad_norm": 0.7105148750537855, "learning_rate": 0.00017502848259677337, "loss": 12.4283, "step": 9293 }, { "epoch": 0.5060954442425109, "grad_norm": 0.7066195639781923, "learning_rate": 0.00017502265242474058, "loss": 12.4412, "step": 9294 }, { "epoch": 0.5061498982390938, "grad_norm": 0.7110921055568227, "learning_rate": 0.0001750168216693191, "loss": 12.4691, "step": 9295 }, { "epoch": 0.506204352235677, "grad_norm": 0.6143960464742542, "learning_rate": 0.00017501099033055418, "loss": 12.4342, "step": 9296 }, { "epoch": 0.5062588062322599, "grad_norm": 0.6211083975799827, "learning_rate": 0.00017500515840849126, "loss": 12.444, "step": 9297 }, { "epoch": 0.5063132602288429, "grad_norm": 0.6150695172377717, "learning_rate": 0.00017499932590317566, "loss": 12.4752, "step": 9298 }, { "epoch": 0.5063677142254259, "grad_norm": 0.5526792990362289, "learning_rate": 0.0001749934928146527, "loss": 12.4155, "step": 9299 }, { "epoch": 0.5064221682220089, "grad_norm": 0.6248669052704635, "learning_rate": 0.00017498765914296778, "loss": 12.4054, "step": 9300 }, { "epoch": 0.5064766222185919, "grad_norm": 0.5910774952254157, "learning_rate": 0.00017498182488816623, "loss": 12.3861, "step": 9301 }, { "epoch": 0.506531076215175, "grad_norm": 0.5865642470842327, "learning_rate": 0.00017497599005029344, "loss": 12.2948, "step": 9302 }, { "epoch": 0.506585530211758, "grad_norm": 0.6118141150631882, "learning_rate": 0.00017497015462939478, "loss": 12.4216, "step": 9303 }, { "epoch": 0.506639984208341, "grad_norm": 0.6419707275290791, "learning_rate": 0.00017496431862551562, "loss": 12.1743, "step": 9304 }, { "epoch": 0.506694438204924, "grad_norm": 0.6011774662281038, "learning_rate": 0.00017495848203870134, "loss": 12.3669, "step": 9305 }, { "epoch": 0.506748892201507, "grad_norm": 0.65660842217026, "learning_rate": 0.00017495264486899737, "loss": 12.3027, "step": 9306 }, { "epoch": 0.50680334619809, "grad_norm": 0.6817634176980636, "learning_rate": 0.00017494680711644902, "loss": 12.4326, "step": 9307 }, { "epoch": 0.5068578001946731, "grad_norm": 0.561583501971307, "learning_rate": 0.00017494096878110176, "loss": 12.4116, "step": 9308 }, { "epoch": 0.5069122541912561, "grad_norm": 0.5435703750824127, "learning_rate": 0.00017493512986300095, "loss": 12.3742, "step": 9309 }, { "epoch": 0.5069667081878391, "grad_norm": 0.6022867408072567, "learning_rate": 0.00017492929036219202, "loss": 12.3662, "step": 9310 }, { "epoch": 0.5070211621844221, "grad_norm": 0.7724953334573791, "learning_rate": 0.00017492345027872035, "loss": 12.5138, "step": 9311 }, { "epoch": 0.507075616181005, "grad_norm": 0.6585920328470743, "learning_rate": 0.00017491760961263136, "loss": 12.3403, "step": 9312 }, { "epoch": 0.507130070177588, "grad_norm": 0.6466273952386169, "learning_rate": 0.00017491176836397046, "loss": 12.4771, "step": 9313 }, { "epoch": 0.5071845241741711, "grad_norm": 0.6009781768170371, "learning_rate": 0.0001749059265327831, "loss": 12.4441, "step": 9314 }, { "epoch": 0.5072389781707541, "grad_norm": 0.6840403280864805, "learning_rate": 0.0001749000841191147, "loss": 12.5198, "step": 9315 }, { "epoch": 0.5072934321673371, "grad_norm": 0.6337536226529283, "learning_rate": 0.00017489424112301067, "loss": 12.4631, "step": 9316 }, { "epoch": 0.5073478861639201, "grad_norm": 0.6630045328127501, "learning_rate": 0.00017488839754451648, "loss": 12.4571, "step": 9317 }, { "epoch": 0.5074023401605031, "grad_norm": 0.674610953829062, "learning_rate": 0.00017488255338367754, "loss": 12.3755, "step": 9318 }, { "epoch": 0.5074567941570862, "grad_norm": 0.7346706962784108, "learning_rate": 0.0001748767086405393, "loss": 12.4546, "step": 9319 }, { "epoch": 0.5075112481536692, "grad_norm": 0.6197681874525446, "learning_rate": 0.00017487086331514725, "loss": 12.3235, "step": 9320 }, { "epoch": 0.5075657021502522, "grad_norm": 0.5401263427026369, "learning_rate": 0.00017486501740754677, "loss": 12.327, "step": 9321 }, { "epoch": 0.5076201561468352, "grad_norm": 0.6569790738085491, "learning_rate": 0.00017485917091778337, "loss": 12.2445, "step": 9322 }, { "epoch": 0.5076746101434182, "grad_norm": 0.6418331149863142, "learning_rate": 0.00017485332384590253, "loss": 12.3183, "step": 9323 }, { "epoch": 0.5077290641400012, "grad_norm": 0.6243981903237095, "learning_rate": 0.00017484747619194964, "loss": 12.3514, "step": 9324 }, { "epoch": 0.5077835181365843, "grad_norm": 0.714394137512393, "learning_rate": 0.00017484162795597028, "loss": 12.4041, "step": 9325 }, { "epoch": 0.5078379721331673, "grad_norm": 0.6336579308981977, "learning_rate": 0.00017483577913800984, "loss": 12.4572, "step": 9326 }, { "epoch": 0.5078924261297503, "grad_norm": 0.648306200933378, "learning_rate": 0.00017482992973811382, "loss": 12.4322, "step": 9327 }, { "epoch": 0.5079468801263333, "grad_norm": 0.6449632020500176, "learning_rate": 0.00017482407975632775, "loss": 12.2739, "step": 9328 }, { "epoch": 0.5080013341229163, "grad_norm": 0.560273711009082, "learning_rate": 0.00017481822919269705, "loss": 12.3498, "step": 9329 }, { "epoch": 0.5080557881194993, "grad_norm": 0.7294627898226387, "learning_rate": 0.00017481237804726728, "loss": 12.3475, "step": 9330 }, { "epoch": 0.5081102421160824, "grad_norm": 0.6666923064781625, "learning_rate": 0.00017480652632008393, "loss": 12.3618, "step": 9331 }, { "epoch": 0.5081646961126653, "grad_norm": 0.611908423405081, "learning_rate": 0.00017480067401119245, "loss": 12.3883, "step": 9332 }, { "epoch": 0.5082191501092483, "grad_norm": 0.5789056783313532, "learning_rate": 0.0001747948211206384, "loss": 12.3337, "step": 9333 }, { "epoch": 0.5082736041058313, "grad_norm": 0.6528298517402605, "learning_rate": 0.00017478896764846726, "loss": 12.3418, "step": 9334 }, { "epoch": 0.5083280581024143, "grad_norm": 0.586154787199463, "learning_rate": 0.00017478311359472458, "loss": 12.346, "step": 9335 }, { "epoch": 0.5083825120989973, "grad_norm": 0.7375603848486024, "learning_rate": 0.00017477725895945589, "loss": 12.4789, "step": 9336 }, { "epoch": 0.5084369660955804, "grad_norm": 0.5622036931182314, "learning_rate": 0.00017477140374270666, "loss": 12.3521, "step": 9337 }, { "epoch": 0.5084914200921634, "grad_norm": 0.7094470049438123, "learning_rate": 0.00017476554794452248, "loss": 12.3785, "step": 9338 }, { "epoch": 0.5085458740887464, "grad_norm": 0.5535802003127333, "learning_rate": 0.00017475969156494883, "loss": 12.3477, "step": 9339 }, { "epoch": 0.5086003280853294, "grad_norm": 0.6060587087426393, "learning_rate": 0.0001747538346040313, "loss": 12.4283, "step": 9340 }, { "epoch": 0.5086547820819124, "grad_norm": 0.6486830014576412, "learning_rate": 0.00017474797706181546, "loss": 12.5064, "step": 9341 }, { "epoch": 0.5087092360784954, "grad_norm": 0.650171933766708, "learning_rate": 0.00017474211893834672, "loss": 12.3925, "step": 9342 }, { "epoch": 0.5087636900750785, "grad_norm": 0.6178982347558384, "learning_rate": 0.0001747362602336708, "loss": 12.4251, "step": 9343 }, { "epoch": 0.5088181440716615, "grad_norm": 0.6098889569054531, "learning_rate": 0.00017473040094783318, "loss": 12.4814, "step": 9344 }, { "epoch": 0.5088725980682445, "grad_norm": 0.6397229844431023, "learning_rate": 0.00017472454108087942, "loss": 12.5545, "step": 9345 }, { "epoch": 0.5089270520648275, "grad_norm": 0.662132294090361, "learning_rate": 0.0001747186806328551, "loss": 12.5107, "step": 9346 }, { "epoch": 0.5089815060614105, "grad_norm": 0.7496901176361102, "learning_rate": 0.00017471281960380577, "loss": 12.4908, "step": 9347 }, { "epoch": 0.5090359600579935, "grad_norm": 0.5732062504922385, "learning_rate": 0.00017470695799377703, "loss": 12.3264, "step": 9348 }, { "epoch": 0.5090904140545766, "grad_norm": 0.6151031159507464, "learning_rate": 0.00017470109580281447, "loss": 12.304, "step": 9349 }, { "epoch": 0.5091448680511595, "grad_norm": 0.6959356182660499, "learning_rate": 0.00017469523303096363, "loss": 12.3683, "step": 9350 }, { "epoch": 0.5091993220477425, "grad_norm": 0.6226389570908005, "learning_rate": 0.00017468936967827018, "loss": 12.4791, "step": 9351 }, { "epoch": 0.5092537760443255, "grad_norm": 0.6946591122987917, "learning_rate": 0.00017468350574477965, "loss": 12.3415, "step": 9352 }, { "epoch": 0.5093082300409085, "grad_norm": 0.565289908611492, "learning_rate": 0.00017467764123053764, "loss": 12.3893, "step": 9353 }, { "epoch": 0.5093626840374916, "grad_norm": 0.589008060870395, "learning_rate": 0.0001746717761355898, "loss": 12.3961, "step": 9354 }, { "epoch": 0.5094171380340746, "grad_norm": 0.6423667999843898, "learning_rate": 0.00017466591045998167, "loss": 12.2483, "step": 9355 }, { "epoch": 0.5094715920306576, "grad_norm": 0.5878197206271527, "learning_rate": 0.0001746600442037589, "loss": 12.2345, "step": 9356 }, { "epoch": 0.5095260460272406, "grad_norm": 0.611952765287439, "learning_rate": 0.0001746541773669671, "loss": 12.3193, "step": 9357 }, { "epoch": 0.5095805000238236, "grad_norm": 0.7025299355851088, "learning_rate": 0.00017464830994965195, "loss": 12.6056, "step": 9358 }, { "epoch": 0.5096349540204066, "grad_norm": 0.5844492524972718, "learning_rate": 0.000174642441951859, "loss": 12.3313, "step": 9359 }, { "epoch": 0.5096894080169897, "grad_norm": 0.5870217567121213, "learning_rate": 0.00017463657337363388, "loss": 12.2468, "step": 9360 }, { "epoch": 0.5097438620135727, "grad_norm": 0.544716100069704, "learning_rate": 0.00017463070421502226, "loss": 12.4479, "step": 9361 }, { "epoch": 0.5097983160101557, "grad_norm": 0.709392125597355, "learning_rate": 0.00017462483447606977, "loss": 12.5405, "step": 9362 }, { "epoch": 0.5098527700067387, "grad_norm": 0.6876129323729496, "learning_rate": 0.00017461896415682206, "loss": 12.6349, "step": 9363 }, { "epoch": 0.5099072240033217, "grad_norm": 0.5646918644140049, "learning_rate": 0.00017461309325732474, "loss": 12.4339, "step": 9364 }, { "epoch": 0.5099616779999047, "grad_norm": 0.6483491971824025, "learning_rate": 0.00017460722177762352, "loss": 12.53, "step": 9365 }, { "epoch": 0.5100161319964878, "grad_norm": 0.5661502815396339, "learning_rate": 0.00017460134971776406, "loss": 12.3638, "step": 9366 }, { "epoch": 0.5100705859930708, "grad_norm": 0.6473770934053301, "learning_rate": 0.00017459547707779195, "loss": 12.4482, "step": 9367 }, { "epoch": 0.5101250399896538, "grad_norm": 0.5870198823363907, "learning_rate": 0.00017458960385775292, "loss": 12.3818, "step": 9368 }, { "epoch": 0.5101794939862367, "grad_norm": 0.622478533573811, "learning_rate": 0.0001745837300576926, "loss": 12.3057, "step": 9369 }, { "epoch": 0.5102339479828197, "grad_norm": 0.6348645706981006, "learning_rate": 0.00017457785567765672, "loss": 12.4243, "step": 9370 }, { "epoch": 0.5102884019794027, "grad_norm": 0.5427899853169943, "learning_rate": 0.00017457198071769091, "loss": 12.4867, "step": 9371 }, { "epoch": 0.5103428559759858, "grad_norm": 0.9573474443809805, "learning_rate": 0.00017456610517784088, "loss": 12.3764, "step": 9372 }, { "epoch": 0.5103973099725688, "grad_norm": 0.6348206524178484, "learning_rate": 0.00017456022905815228, "loss": 12.3435, "step": 9373 }, { "epoch": 0.5104517639691518, "grad_norm": 0.6032852796079554, "learning_rate": 0.00017455435235867088, "loss": 12.5172, "step": 9374 }, { "epoch": 0.5105062179657348, "grad_norm": 0.6296409528090846, "learning_rate": 0.0001745484750794423, "loss": 12.3565, "step": 9375 }, { "epoch": 0.5105606719623178, "grad_norm": 0.5653690358017915, "learning_rate": 0.0001745425972205123, "loss": 12.2817, "step": 9376 }, { "epoch": 0.5106151259589008, "grad_norm": 0.600763765793065, "learning_rate": 0.00017453671878192654, "loss": 12.3462, "step": 9377 }, { "epoch": 0.5106695799554839, "grad_norm": 0.5921711828910851, "learning_rate": 0.00017453083976373077, "loss": 12.4408, "step": 9378 }, { "epoch": 0.5107240339520669, "grad_norm": 0.653706867357765, "learning_rate": 0.0001745249601659707, "loss": 12.3856, "step": 9379 }, { "epoch": 0.5107784879486499, "grad_norm": 0.6498084542557725, "learning_rate": 0.00017451907998869204, "loss": 12.5023, "step": 9380 }, { "epoch": 0.5108329419452329, "grad_norm": 0.6621391340956829, "learning_rate": 0.00017451319923194046, "loss": 12.4081, "step": 9381 }, { "epoch": 0.5108873959418159, "grad_norm": 0.5955740821428355, "learning_rate": 0.0001745073178957618, "loss": 12.4427, "step": 9382 }, { "epoch": 0.5109418499383989, "grad_norm": 0.629955623693329, "learning_rate": 0.00017450143598020173, "loss": 12.4182, "step": 9383 }, { "epoch": 0.510996303934982, "grad_norm": 0.620450415710074, "learning_rate": 0.00017449555348530602, "loss": 12.4561, "step": 9384 }, { "epoch": 0.511050757931565, "grad_norm": 0.6694978927482998, "learning_rate": 0.0001744896704111204, "loss": 12.3695, "step": 9385 }, { "epoch": 0.511105211928148, "grad_norm": 0.5680924165840874, "learning_rate": 0.0001744837867576906, "loss": 12.3348, "step": 9386 }, { "epoch": 0.5111596659247309, "grad_norm": 0.6566945684404797, "learning_rate": 0.00017447790252506237, "loss": 12.3644, "step": 9387 }, { "epoch": 0.5112141199213139, "grad_norm": 0.6437455314024562, "learning_rate": 0.00017447201771328148, "loss": 12.4965, "step": 9388 }, { "epoch": 0.511268573917897, "grad_norm": 0.5732268946297673, "learning_rate": 0.00017446613232239368, "loss": 12.4202, "step": 9389 }, { "epoch": 0.51132302791448, "grad_norm": 0.5722288630732446, "learning_rate": 0.00017446024635244474, "loss": 12.3932, "step": 9390 }, { "epoch": 0.511377481911063, "grad_norm": 0.5989469527624901, "learning_rate": 0.00017445435980348046, "loss": 12.4469, "step": 9391 }, { "epoch": 0.511431935907646, "grad_norm": 0.5669576255239827, "learning_rate": 0.0001744484726755466, "loss": 12.2855, "step": 9392 }, { "epoch": 0.511486389904229, "grad_norm": 0.6190733787516781, "learning_rate": 0.00017444258496868896, "loss": 12.5061, "step": 9393 }, { "epoch": 0.511540843900812, "grad_norm": 0.5913540005436615, "learning_rate": 0.00017443669668295322, "loss": 12.4111, "step": 9394 }, { "epoch": 0.5115952978973951, "grad_norm": 0.6273924044684714, "learning_rate": 0.0001744308078183853, "loss": 12.4899, "step": 9395 }, { "epoch": 0.5116497518939781, "grad_norm": 0.7100710497105639, "learning_rate": 0.00017442491837503093, "loss": 12.4267, "step": 9396 }, { "epoch": 0.5117042058905611, "grad_norm": 0.6071059916931937, "learning_rate": 0.0001744190283529359, "loss": 12.342, "step": 9397 }, { "epoch": 0.5117586598871441, "grad_norm": 0.6198389253712389, "learning_rate": 0.000174413137752146, "loss": 12.345, "step": 9398 }, { "epoch": 0.5118131138837271, "grad_norm": 0.6068728098719426, "learning_rate": 0.0001744072465727071, "loss": 12.3248, "step": 9399 }, { "epoch": 0.5118675678803101, "grad_norm": 0.5786485279810574, "learning_rate": 0.00017440135481466495, "loss": 12.3396, "step": 9400 }, { "epoch": 0.5119220218768932, "grad_norm": 0.6021258048098241, "learning_rate": 0.0001743954624780654, "loss": 12.41, "step": 9401 }, { "epoch": 0.5119764758734762, "grad_norm": 0.5835711794474728, "learning_rate": 0.00017438956956295428, "loss": 12.3808, "step": 9402 }, { "epoch": 0.5120309298700592, "grad_norm": 0.6170318593463341, "learning_rate": 0.00017438367606937733, "loss": 12.4245, "step": 9403 }, { "epoch": 0.5120853838666422, "grad_norm": 0.6225798029500026, "learning_rate": 0.0001743777819973805, "loss": 12.2819, "step": 9404 }, { "epoch": 0.5121398378632251, "grad_norm": 0.6924692025557676, "learning_rate": 0.00017437188734700952, "loss": 12.3891, "step": 9405 }, { "epoch": 0.5121942918598081, "grad_norm": 0.6360638878142899, "learning_rate": 0.00017436599211831033, "loss": 12.5471, "step": 9406 }, { "epoch": 0.5122487458563912, "grad_norm": 0.6901262928698174, "learning_rate": 0.00017436009631132865, "loss": 12.4722, "step": 9407 }, { "epoch": 0.5123031998529742, "grad_norm": 0.5930470801184227, "learning_rate": 0.0001743541999261104, "loss": 12.3287, "step": 9408 }, { "epoch": 0.5123576538495572, "grad_norm": 0.6015812554557252, "learning_rate": 0.00017434830296270145, "loss": 12.4187, "step": 9409 }, { "epoch": 0.5124121078461402, "grad_norm": 0.6768925144012764, "learning_rate": 0.00017434240542114763, "loss": 12.3967, "step": 9410 }, { "epoch": 0.5124665618427232, "grad_norm": 0.5927749895779888, "learning_rate": 0.0001743365073014948, "loss": 12.329, "step": 9411 }, { "epoch": 0.5125210158393062, "grad_norm": 0.6560057717129133, "learning_rate": 0.0001743306086037888, "loss": 12.4162, "step": 9412 }, { "epoch": 0.5125754698358893, "grad_norm": 0.5679942053866345, "learning_rate": 0.00017432470932807553, "loss": 12.3746, "step": 9413 }, { "epoch": 0.5126299238324723, "grad_norm": 0.5901890825124537, "learning_rate": 0.00017431880947440086, "loss": 12.3553, "step": 9414 }, { "epoch": 0.5126843778290553, "grad_norm": 0.611893604121528, "learning_rate": 0.00017431290904281068, "loss": 12.4703, "step": 9415 }, { "epoch": 0.5127388318256383, "grad_norm": 0.7235144795413267, "learning_rate": 0.00017430700803335082, "loss": 12.3188, "step": 9416 }, { "epoch": 0.5127932858222213, "grad_norm": 0.5831544036131251, "learning_rate": 0.00017430110644606726, "loss": 12.4885, "step": 9417 }, { "epoch": 0.5128477398188043, "grad_norm": 0.6350100789777187, "learning_rate": 0.00017429520428100578, "loss": 12.289, "step": 9418 }, { "epoch": 0.5129021938153874, "grad_norm": 0.6235061151953825, "learning_rate": 0.00017428930153821237, "loss": 12.3618, "step": 9419 }, { "epoch": 0.5129566478119704, "grad_norm": 0.73164702015834, "learning_rate": 0.00017428339821773286, "loss": 12.4974, "step": 9420 }, { "epoch": 0.5130111018085534, "grad_norm": 0.5782408752389678, "learning_rate": 0.0001742774943196132, "loss": 12.2829, "step": 9421 }, { "epoch": 0.5130655558051364, "grad_norm": 0.6331879886470847, "learning_rate": 0.0001742715898438993, "loss": 12.3868, "step": 9422 }, { "epoch": 0.5131200098017193, "grad_norm": 0.667002434791157, "learning_rate": 0.00017426568479063706, "loss": 12.2425, "step": 9423 }, { "epoch": 0.5131744637983024, "grad_norm": 0.7599167814718416, "learning_rate": 0.0001742597791598724, "loss": 12.2946, "step": 9424 }, { "epoch": 0.5132289177948854, "grad_norm": 0.6900616178179273, "learning_rate": 0.00017425387295165123, "loss": 12.6106, "step": 9425 }, { "epoch": 0.5132833717914684, "grad_norm": 0.629354602395147, "learning_rate": 0.00017424796616601952, "loss": 12.4014, "step": 9426 }, { "epoch": 0.5133378257880514, "grad_norm": 0.6277706451765541, "learning_rate": 0.00017424205880302312, "loss": 12.4716, "step": 9427 }, { "epoch": 0.5133922797846344, "grad_norm": 0.7167530697844063, "learning_rate": 0.00017423615086270807, "loss": 12.4673, "step": 9428 }, { "epoch": 0.5134467337812174, "grad_norm": 0.5933066126575434, "learning_rate": 0.00017423024234512025, "loss": 12.3945, "step": 9429 }, { "epoch": 0.5135011877778005, "grad_norm": 0.6372901686720958, "learning_rate": 0.0001742243332503056, "loss": 12.522, "step": 9430 }, { "epoch": 0.5135556417743835, "grad_norm": 0.6803090489789331, "learning_rate": 0.0001742184235783101, "loss": 12.2965, "step": 9431 }, { "epoch": 0.5136100957709665, "grad_norm": 0.6042308174334214, "learning_rate": 0.0001742125133291797, "loss": 12.2807, "step": 9432 }, { "epoch": 0.5136645497675495, "grad_norm": 0.6633908205859076, "learning_rate": 0.00017420660250296033, "loss": 12.5998, "step": 9433 }, { "epoch": 0.5137190037641325, "grad_norm": 0.6149312761371097, "learning_rate": 0.000174200691099698, "loss": 12.4411, "step": 9434 }, { "epoch": 0.5137734577607155, "grad_norm": 0.5980568927740617, "learning_rate": 0.00017419477911943864, "loss": 12.2741, "step": 9435 }, { "epoch": 0.5138279117572986, "grad_norm": 0.7464389865428256, "learning_rate": 0.0001741888665622282, "loss": 12.4924, "step": 9436 }, { "epoch": 0.5138823657538816, "grad_norm": 0.6250246789630683, "learning_rate": 0.00017418295342811274, "loss": 12.4717, "step": 9437 }, { "epoch": 0.5139368197504646, "grad_norm": 0.7034476056047101, "learning_rate": 0.00017417703971713818, "loss": 12.499, "step": 9438 }, { "epoch": 0.5139912737470476, "grad_norm": 0.6775469200024059, "learning_rate": 0.00017417112542935054, "loss": 12.5894, "step": 9439 }, { "epoch": 0.5140457277436306, "grad_norm": 0.6495361024701459, "learning_rate": 0.00017416521056479577, "loss": 12.4821, "step": 9440 }, { "epoch": 0.5141001817402135, "grad_norm": 0.6612506125438407, "learning_rate": 0.0001741592951235199, "loss": 12.4193, "step": 9441 }, { "epoch": 0.5141546357367967, "grad_norm": 0.5411831783413918, "learning_rate": 0.00017415337910556888, "loss": 12.2917, "step": 9442 }, { "epoch": 0.5142090897333796, "grad_norm": 0.6008695783759355, "learning_rate": 0.00017414746251098878, "loss": 12.2273, "step": 9443 }, { "epoch": 0.5142635437299626, "grad_norm": 0.6741696018952588, "learning_rate": 0.00017414154533982554, "loss": 12.3734, "step": 9444 }, { "epoch": 0.5143179977265456, "grad_norm": 0.5877583006812053, "learning_rate": 0.00017413562759212525, "loss": 12.2825, "step": 9445 }, { "epoch": 0.5143724517231286, "grad_norm": 0.6357369551078746, "learning_rate": 0.00017412970926793388, "loss": 12.4142, "step": 9446 }, { "epoch": 0.5144269057197116, "grad_norm": 0.6243208273579955, "learning_rate": 0.00017412379036729745, "loss": 12.2237, "step": 9447 }, { "epoch": 0.5144813597162947, "grad_norm": 0.6491307238798837, "learning_rate": 0.00017411787089026198, "loss": 12.3881, "step": 9448 }, { "epoch": 0.5145358137128777, "grad_norm": 0.582036403334767, "learning_rate": 0.00017411195083687355, "loss": 12.3083, "step": 9449 }, { "epoch": 0.5145902677094607, "grad_norm": 0.6185441686619526, "learning_rate": 0.00017410603020717814, "loss": 12.4296, "step": 9450 }, { "epoch": 0.5146447217060437, "grad_norm": 0.557063880310617, "learning_rate": 0.00017410010900122182, "loss": 12.3116, "step": 9451 }, { "epoch": 0.5146991757026267, "grad_norm": 0.6471912907317706, "learning_rate": 0.0001740941872190506, "loss": 12.4337, "step": 9452 }, { "epoch": 0.5147536296992098, "grad_norm": 0.7343020253109421, "learning_rate": 0.0001740882648607106, "loss": 12.4328, "step": 9453 }, { "epoch": 0.5148080836957928, "grad_norm": 0.5955622730731929, "learning_rate": 0.0001740823419262478, "loss": 12.384, "step": 9454 }, { "epoch": 0.5148625376923758, "grad_norm": 0.6158589032229121, "learning_rate": 0.0001740764184157083, "loss": 12.3919, "step": 9455 }, { "epoch": 0.5149169916889588, "grad_norm": 0.5839526669803079, "learning_rate": 0.00017407049432913813, "loss": 12.3303, "step": 9456 }, { "epoch": 0.5149714456855418, "grad_norm": 0.6667413250980353, "learning_rate": 0.0001740645696665834, "loss": 12.4874, "step": 9457 }, { "epoch": 0.5150258996821248, "grad_norm": 0.5917080616601487, "learning_rate": 0.00017405864442809012, "loss": 12.4685, "step": 9458 }, { "epoch": 0.5150803536787079, "grad_norm": 0.6153657606574432, "learning_rate": 0.0001740527186137044, "loss": 12.4207, "step": 9459 }, { "epoch": 0.5151348076752909, "grad_norm": 0.6172502537558318, "learning_rate": 0.00017404679222347236, "loss": 12.4641, "step": 9460 }, { "epoch": 0.5151892616718738, "grad_norm": 0.5826857856034895, "learning_rate": 0.00017404086525744003, "loss": 12.2974, "step": 9461 }, { "epoch": 0.5152437156684568, "grad_norm": 0.676492290899336, "learning_rate": 0.00017403493771565352, "loss": 12.2323, "step": 9462 }, { "epoch": 0.5152981696650398, "grad_norm": 0.7872839686355796, "learning_rate": 0.0001740290095981589, "loss": 12.5144, "step": 9463 }, { "epoch": 0.5153526236616228, "grad_norm": 0.6425476955150632, "learning_rate": 0.0001740230809050023, "loss": 12.489, "step": 9464 }, { "epoch": 0.5154070776582059, "grad_norm": 0.5880650070662702, "learning_rate": 0.0001740171516362298, "loss": 12.3891, "step": 9465 }, { "epoch": 0.5154615316547889, "grad_norm": 0.6126832975083883, "learning_rate": 0.00017401122179188755, "loss": 12.3447, "step": 9466 }, { "epoch": 0.5155159856513719, "grad_norm": 0.6782429959572243, "learning_rate": 0.0001740052913720216, "loss": 12.3695, "step": 9467 }, { "epoch": 0.5155704396479549, "grad_norm": 0.5803531624554442, "learning_rate": 0.00017399936037667808, "loss": 12.3674, "step": 9468 }, { "epoch": 0.5156248936445379, "grad_norm": 0.5637358898329805, "learning_rate": 0.00017399342880590314, "loss": 12.4194, "step": 9469 }, { "epoch": 0.5156793476411209, "grad_norm": 0.6286820639773689, "learning_rate": 0.0001739874966597429, "loss": 12.4978, "step": 9470 }, { "epoch": 0.515733801637704, "grad_norm": 0.6761119351876996, "learning_rate": 0.00017398156393824346, "loss": 12.4509, "step": 9471 }, { "epoch": 0.515788255634287, "grad_norm": 0.6664367437684052, "learning_rate": 0.00017397563064145097, "loss": 12.3517, "step": 9472 }, { "epoch": 0.51584270963087, "grad_norm": 0.6423591597123725, "learning_rate": 0.0001739696967694116, "loss": 12.3267, "step": 9473 }, { "epoch": 0.515897163627453, "grad_norm": 0.5995208433571146, "learning_rate": 0.0001739637623221714, "loss": 12.5092, "step": 9474 }, { "epoch": 0.515951617624036, "grad_norm": 0.660880752926758, "learning_rate": 0.00017395782729977665, "loss": 12.4551, "step": 9475 }, { "epoch": 0.516006071620619, "grad_norm": 0.5987676654785823, "learning_rate": 0.0001739518917022734, "loss": 12.3006, "step": 9476 }, { "epoch": 0.5160605256172021, "grad_norm": 0.7086942101539682, "learning_rate": 0.00017394595552970782, "loss": 12.5718, "step": 9477 }, { "epoch": 0.516114979613785, "grad_norm": 0.7278895586541723, "learning_rate": 0.00017394001878212614, "loss": 12.6292, "step": 9478 }, { "epoch": 0.516169433610368, "grad_norm": 0.6225783838940412, "learning_rate": 0.00017393408145957443, "loss": 12.3287, "step": 9479 }, { "epoch": 0.516223887606951, "grad_norm": 0.6250124899804488, "learning_rate": 0.00017392814356209893, "loss": 12.3882, "step": 9480 }, { "epoch": 0.516278341603534, "grad_norm": 0.5823883657923915, "learning_rate": 0.00017392220508974577, "loss": 12.3334, "step": 9481 }, { "epoch": 0.516332795600117, "grad_norm": 0.6525559938701772, "learning_rate": 0.00017391626604256114, "loss": 12.4486, "step": 9482 }, { "epoch": 0.5163872495967001, "grad_norm": 0.6078119554876852, "learning_rate": 0.00017391032642059126, "loss": 12.4749, "step": 9483 }, { "epoch": 0.5164417035932831, "grad_norm": 0.6965773196307412, "learning_rate": 0.00017390438622388228, "loss": 12.3629, "step": 9484 }, { "epoch": 0.5164961575898661, "grad_norm": 0.648942053086012, "learning_rate": 0.0001738984454524804, "loss": 12.1682, "step": 9485 }, { "epoch": 0.5165506115864491, "grad_norm": 0.6131617496363042, "learning_rate": 0.0001738925041064318, "loss": 12.3335, "step": 9486 }, { "epoch": 0.5166050655830321, "grad_norm": 0.5930281381763447, "learning_rate": 0.00017388656218578268, "loss": 12.4831, "step": 9487 }, { "epoch": 0.5166595195796152, "grad_norm": 0.6726746559978551, "learning_rate": 0.0001738806196905793, "loss": 12.3556, "step": 9488 }, { "epoch": 0.5167139735761982, "grad_norm": 0.6729462439247543, "learning_rate": 0.00017387467662086784, "loss": 12.3627, "step": 9489 }, { "epoch": 0.5167684275727812, "grad_norm": 0.6840423787451996, "learning_rate": 0.00017386873297669446, "loss": 12.4291, "step": 9490 }, { "epoch": 0.5168228815693642, "grad_norm": 0.727027584770321, "learning_rate": 0.00017386278875810543, "loss": 12.3196, "step": 9491 }, { "epoch": 0.5168773355659472, "grad_norm": 0.618238532380933, "learning_rate": 0.000173856843965147, "loss": 12.3743, "step": 9492 }, { "epoch": 0.5169317895625302, "grad_norm": 0.633616462992701, "learning_rate": 0.00017385089859786533, "loss": 12.3825, "step": 9493 }, { "epoch": 0.5169862435591133, "grad_norm": 0.6852956095611265, "learning_rate": 0.00017384495265630672, "loss": 12.3855, "step": 9494 }, { "epoch": 0.5170406975556963, "grad_norm": 0.6353901270857567, "learning_rate": 0.00017383900614051733, "loss": 12.3742, "step": 9495 }, { "epoch": 0.5170951515522793, "grad_norm": 0.6365417873794621, "learning_rate": 0.00017383305905054348, "loss": 12.3855, "step": 9496 }, { "epoch": 0.5171496055488622, "grad_norm": 0.6392101507202402, "learning_rate": 0.0001738271113864314, "loss": 12.3915, "step": 9497 }, { "epoch": 0.5172040595454452, "grad_norm": 0.636169211044633, "learning_rate": 0.0001738211631482273, "loss": 12.364, "step": 9498 }, { "epoch": 0.5172585135420282, "grad_norm": 0.591556032814142, "learning_rate": 0.00017381521433597742, "loss": 12.3746, "step": 9499 }, { "epoch": 0.5173129675386113, "grad_norm": 0.5989499850668308, "learning_rate": 0.00017380926494972812, "loss": 12.4657, "step": 9500 }, { "epoch": 0.5173674215351943, "grad_norm": 0.5810893580636307, "learning_rate": 0.00017380331498952555, "loss": 12.3945, "step": 9501 }, { "epoch": 0.5174218755317773, "grad_norm": 0.6071717173521719, "learning_rate": 0.00017379736445541604, "loss": 12.4434, "step": 9502 }, { "epoch": 0.5174763295283603, "grad_norm": 0.5678552708006944, "learning_rate": 0.00017379141334744586, "loss": 12.3299, "step": 9503 }, { "epoch": 0.5175307835249433, "grad_norm": 0.5897790600042302, "learning_rate": 0.00017378546166566124, "loss": 12.3707, "step": 9504 }, { "epoch": 0.5175852375215263, "grad_norm": 0.722172830820239, "learning_rate": 0.00017377950941010854, "loss": 12.5973, "step": 9505 }, { "epoch": 0.5176396915181094, "grad_norm": 0.6169290540123696, "learning_rate": 0.000173773556580834, "loss": 12.4218, "step": 9506 }, { "epoch": 0.5176941455146924, "grad_norm": 0.5773241373097306, "learning_rate": 0.00017376760317788388, "loss": 12.5499, "step": 9507 }, { "epoch": 0.5177485995112754, "grad_norm": 0.7301449323538458, "learning_rate": 0.0001737616492013045, "loss": 12.3069, "step": 9508 }, { "epoch": 0.5178030535078584, "grad_norm": 0.5773852222233629, "learning_rate": 0.0001737556946511422, "loss": 12.3977, "step": 9509 }, { "epoch": 0.5178575075044414, "grad_norm": 0.6155233235574765, "learning_rate": 0.00017374973952744325, "loss": 12.4442, "step": 9510 }, { "epoch": 0.5179119615010244, "grad_norm": 0.6083803059234462, "learning_rate": 0.00017374378383025392, "loss": 12.4416, "step": 9511 }, { "epoch": 0.5179664154976075, "grad_norm": 0.6844408691725016, "learning_rate": 0.0001737378275596206, "loss": 12.3738, "step": 9512 }, { "epoch": 0.5180208694941905, "grad_norm": 0.6869586845702121, "learning_rate": 0.00017373187071558955, "loss": 12.5113, "step": 9513 }, { "epoch": 0.5180753234907735, "grad_norm": 0.6628685359190278, "learning_rate": 0.00017372591329820712, "loss": 12.3677, "step": 9514 }, { "epoch": 0.5181297774873564, "grad_norm": 0.6463424280414908, "learning_rate": 0.0001737199553075196, "loss": 12.6094, "step": 9515 }, { "epoch": 0.5181842314839394, "grad_norm": 0.7198397510770467, "learning_rate": 0.00017371399674357337, "loss": 12.4187, "step": 9516 }, { "epoch": 0.5182386854805224, "grad_norm": 0.5829815790571139, "learning_rate": 0.0001737080376064147, "loss": 12.4891, "step": 9517 }, { "epoch": 0.5182931394771055, "grad_norm": 0.6760100979816667, "learning_rate": 0.00017370207789609002, "loss": 12.3712, "step": 9518 }, { "epoch": 0.5183475934736885, "grad_norm": 0.6053151611373706, "learning_rate": 0.00017369611761264563, "loss": 12.3323, "step": 9519 }, { "epoch": 0.5184020474702715, "grad_norm": 0.6811088166907076, "learning_rate": 0.0001736901567561278, "loss": 12.5549, "step": 9520 }, { "epoch": 0.5184565014668545, "grad_norm": 0.6683517540045927, "learning_rate": 0.000173684195326583, "loss": 12.5542, "step": 9521 }, { "epoch": 0.5185109554634375, "grad_norm": 0.6680094107438264, "learning_rate": 0.00017367823332405756, "loss": 12.4991, "step": 9522 }, { "epoch": 0.5185654094600206, "grad_norm": 0.6365234439685792, "learning_rate": 0.00017367227074859776, "loss": 12.4027, "step": 9523 }, { "epoch": 0.5186198634566036, "grad_norm": 0.6213685256896416, "learning_rate": 0.0001736663076002501, "loss": 12.2578, "step": 9524 }, { "epoch": 0.5186743174531866, "grad_norm": 0.673525027278588, "learning_rate": 0.00017366034387906085, "loss": 12.6111, "step": 9525 }, { "epoch": 0.5187287714497696, "grad_norm": 0.6045280088778902, "learning_rate": 0.00017365437958507637, "loss": 12.2815, "step": 9526 }, { "epoch": 0.5187832254463526, "grad_norm": 0.6146377983697311, "learning_rate": 0.00017364841471834312, "loss": 12.3847, "step": 9527 }, { "epoch": 0.5188376794429356, "grad_norm": 0.6224125406584722, "learning_rate": 0.00017364244927890746, "loss": 12.4285, "step": 9528 }, { "epoch": 0.5188921334395187, "grad_norm": 0.6804900827370974, "learning_rate": 0.00017363648326681577, "loss": 12.3915, "step": 9529 }, { "epoch": 0.5189465874361017, "grad_norm": 0.6187006734875188, "learning_rate": 0.0001736305166821144, "loss": 12.3694, "step": 9530 }, { "epoch": 0.5190010414326847, "grad_norm": 0.6055164489750849, "learning_rate": 0.0001736245495248498, "loss": 12.5032, "step": 9531 }, { "epoch": 0.5190554954292677, "grad_norm": 0.6791913560434254, "learning_rate": 0.00017361858179506837, "loss": 12.4238, "step": 9532 }, { "epoch": 0.5191099494258506, "grad_norm": 0.6553159563366687, "learning_rate": 0.0001736126134928165, "loss": 12.3905, "step": 9533 }, { "epoch": 0.5191644034224336, "grad_norm": 0.659497625157703, "learning_rate": 0.00017360664461814058, "loss": 12.3904, "step": 9534 }, { "epoch": 0.5192188574190167, "grad_norm": 0.5837269312568923, "learning_rate": 0.00017360067517108705, "loss": 12.2439, "step": 9535 }, { "epoch": 0.5192733114155997, "grad_norm": 0.606262104777017, "learning_rate": 0.00017359470515170233, "loss": 12.3929, "step": 9536 }, { "epoch": 0.5193277654121827, "grad_norm": 0.6481663145224712, "learning_rate": 0.00017358873456003283, "loss": 12.3585, "step": 9537 }, { "epoch": 0.5193822194087657, "grad_norm": 0.5731228553672747, "learning_rate": 0.00017358276339612502, "loss": 12.2875, "step": 9538 }, { "epoch": 0.5194366734053487, "grad_norm": 0.641688670337266, "learning_rate": 0.00017357679166002526, "loss": 12.5662, "step": 9539 }, { "epoch": 0.5194911274019317, "grad_norm": 0.6234379087948, "learning_rate": 0.0001735708193517801, "loss": 12.3924, "step": 9540 }, { "epoch": 0.5195455813985148, "grad_norm": 0.6020449493619848, "learning_rate": 0.00017356484647143585, "loss": 12.6541, "step": 9541 }, { "epoch": 0.5196000353950978, "grad_norm": 0.5694624446011876, "learning_rate": 0.00017355887301903904, "loss": 12.3805, "step": 9542 }, { "epoch": 0.5196544893916808, "grad_norm": 0.66644121793506, "learning_rate": 0.00017355289899463608, "loss": 12.4674, "step": 9543 }, { "epoch": 0.5197089433882638, "grad_norm": 0.6037137741273337, "learning_rate": 0.00017354692439827346, "loss": 12.3857, "step": 9544 }, { "epoch": 0.5197633973848468, "grad_norm": 0.6096151676019201, "learning_rate": 0.0001735409492299976, "loss": 12.3534, "step": 9545 }, { "epoch": 0.5198178513814298, "grad_norm": 0.5819488973845875, "learning_rate": 0.00017353497348985503, "loss": 12.3819, "step": 9546 }, { "epoch": 0.5198723053780129, "grad_norm": 0.7652426584827898, "learning_rate": 0.00017352899717789212, "loss": 12.4079, "step": 9547 }, { "epoch": 0.5199267593745959, "grad_norm": 0.5772110029012527, "learning_rate": 0.00017352302029415543, "loss": 12.3658, "step": 9548 }, { "epoch": 0.5199812133711789, "grad_norm": 0.5710471850503702, "learning_rate": 0.0001735170428386914, "loss": 12.4582, "step": 9549 }, { "epoch": 0.5200356673677619, "grad_norm": 0.5670726519031118, "learning_rate": 0.00017351106481154654, "loss": 12.2941, "step": 9550 }, { "epoch": 0.5200901213643448, "grad_norm": 0.7440285830960065, "learning_rate": 0.00017350508621276727, "loss": 12.4657, "step": 9551 }, { "epoch": 0.5201445753609278, "grad_norm": 0.577867669932965, "learning_rate": 0.00017349910704240014, "loss": 12.2572, "step": 9552 }, { "epoch": 0.520199029357511, "grad_norm": 0.5761404019054847, "learning_rate": 0.00017349312730049164, "loss": 12.4117, "step": 9553 }, { "epoch": 0.5202534833540939, "grad_norm": 0.6203076031921424, "learning_rate": 0.00017348714698708825, "loss": 12.4216, "step": 9554 }, { "epoch": 0.5203079373506769, "grad_norm": 0.6088496568906002, "learning_rate": 0.00017348116610223647, "loss": 12.4522, "step": 9555 }, { "epoch": 0.5203623913472599, "grad_norm": 0.6520271374564597, "learning_rate": 0.00017347518464598283, "loss": 12.4652, "step": 9556 }, { "epoch": 0.5204168453438429, "grad_norm": 0.6710044566860749, "learning_rate": 0.00017346920261837384, "loss": 12.4546, "step": 9557 }, { "epoch": 0.520471299340426, "grad_norm": 0.6386795522228851, "learning_rate": 0.000173463220019456, "loss": 12.5289, "step": 9558 }, { "epoch": 0.520525753337009, "grad_norm": 0.5617419711380065, "learning_rate": 0.0001734572368492758, "loss": 12.3144, "step": 9559 }, { "epoch": 0.520580207333592, "grad_norm": 0.6464213236433154, "learning_rate": 0.00017345125310787987, "loss": 12.5115, "step": 9560 }, { "epoch": 0.520634661330175, "grad_norm": 0.5952176049191022, "learning_rate": 0.00017344526879531466, "loss": 12.3311, "step": 9561 }, { "epoch": 0.520689115326758, "grad_norm": 0.635195905401357, "learning_rate": 0.00017343928391162672, "loss": 12.4314, "step": 9562 }, { "epoch": 0.520743569323341, "grad_norm": 0.7401122077992383, "learning_rate": 0.0001734332984568626, "loss": 12.5615, "step": 9563 }, { "epoch": 0.5207980233199241, "grad_norm": 0.6328437476921704, "learning_rate": 0.0001734273124310688, "loss": 12.3782, "step": 9564 }, { "epoch": 0.5208524773165071, "grad_norm": 0.6492627760999075, "learning_rate": 0.00017342132583429197, "loss": 12.2496, "step": 9565 }, { "epoch": 0.5209069313130901, "grad_norm": 0.6882328974866949, "learning_rate": 0.00017341533866657854, "loss": 12.5301, "step": 9566 }, { "epoch": 0.5209613853096731, "grad_norm": 0.5539351239324724, "learning_rate": 0.00017340935092797515, "loss": 12.3704, "step": 9567 }, { "epoch": 0.5210158393062561, "grad_norm": 0.6120330624036531, "learning_rate": 0.0001734033626185283, "loss": 12.4241, "step": 9568 }, { "epoch": 0.521070293302839, "grad_norm": 0.6746644635526738, "learning_rate": 0.00017339737373828464, "loss": 12.3715, "step": 9569 }, { "epoch": 0.5211247472994222, "grad_norm": 0.6251845311684804, "learning_rate": 0.0001733913842872907, "loss": 12.3197, "step": 9570 }, { "epoch": 0.5211792012960051, "grad_norm": 0.6403476161548947, "learning_rate": 0.00017338539426559301, "loss": 12.4137, "step": 9571 }, { "epoch": 0.5212336552925881, "grad_norm": 0.6340704373689795, "learning_rate": 0.00017337940367323818, "loss": 12.3527, "step": 9572 }, { "epoch": 0.5212881092891711, "grad_norm": 0.693150477056537, "learning_rate": 0.00017337341251027284, "loss": 12.5483, "step": 9573 }, { "epoch": 0.5213425632857541, "grad_norm": 0.6887097515799869, "learning_rate": 0.00017336742077674354, "loss": 12.4556, "step": 9574 }, { "epoch": 0.5213970172823371, "grad_norm": 0.6742663211247635, "learning_rate": 0.00017336142847269685, "loss": 12.1687, "step": 9575 }, { "epoch": 0.5214514712789202, "grad_norm": 0.6037349611938911, "learning_rate": 0.00017335543559817936, "loss": 12.43, "step": 9576 }, { "epoch": 0.5215059252755032, "grad_norm": 0.7071535351856332, "learning_rate": 0.00017334944215323777, "loss": 12.3907, "step": 9577 }, { "epoch": 0.5215603792720862, "grad_norm": 0.6063097686747864, "learning_rate": 0.00017334344813791858, "loss": 12.4358, "step": 9578 }, { "epoch": 0.5216148332686692, "grad_norm": 0.6568445631972262, "learning_rate": 0.0001733374535522684, "loss": 12.3488, "step": 9579 }, { "epoch": 0.5216692872652522, "grad_norm": 0.6086562414750473, "learning_rate": 0.00017333145839633395, "loss": 12.3118, "step": 9580 }, { "epoch": 0.5217237412618352, "grad_norm": 0.6720034758914826, "learning_rate": 0.00017332546267016173, "loss": 12.3787, "step": 9581 }, { "epoch": 0.5217781952584183, "grad_norm": 0.6747441173462498, "learning_rate": 0.00017331946637379848, "loss": 12.3641, "step": 9582 }, { "epoch": 0.5218326492550013, "grad_norm": 0.6362744321253769, "learning_rate": 0.0001733134695072907, "loss": 12.3927, "step": 9583 }, { "epoch": 0.5218871032515843, "grad_norm": 0.584120914581976, "learning_rate": 0.00017330747207068513, "loss": 12.3041, "step": 9584 }, { "epoch": 0.5219415572481673, "grad_norm": 0.6898276146568693, "learning_rate": 0.00017330147406402835, "loss": 12.5067, "step": 9585 }, { "epoch": 0.5219960112447503, "grad_norm": 0.6092771119092754, "learning_rate": 0.000173295475487367, "loss": 12.3398, "step": 9586 }, { "epoch": 0.5220504652413334, "grad_norm": 0.6183883034009364, "learning_rate": 0.00017328947634074774, "loss": 12.514, "step": 9587 }, { "epoch": 0.5221049192379164, "grad_norm": 0.5953627715745956, "learning_rate": 0.00017328347662421724, "loss": 12.2748, "step": 9588 }, { "epoch": 0.5221593732344993, "grad_norm": 0.6705637092696672, "learning_rate": 0.00017327747633782213, "loss": 12.388, "step": 9589 }, { "epoch": 0.5222138272310823, "grad_norm": 0.5346323503006604, "learning_rate": 0.00017327147548160907, "loss": 12.384, "step": 9590 }, { "epoch": 0.5222682812276653, "grad_norm": 0.5853878712849345, "learning_rate": 0.00017326547405562473, "loss": 12.286, "step": 9591 }, { "epoch": 0.5223227352242483, "grad_norm": 0.5975763630320771, "learning_rate": 0.0001732594720599158, "loss": 12.4346, "step": 9592 }, { "epoch": 0.5223771892208314, "grad_norm": 0.6822777284872105, "learning_rate": 0.00017325346949452887, "loss": 12.3276, "step": 9593 }, { "epoch": 0.5224316432174144, "grad_norm": 0.5940144725337047, "learning_rate": 0.00017324746635951073, "loss": 12.4804, "step": 9594 }, { "epoch": 0.5224860972139974, "grad_norm": 0.6963264100080574, "learning_rate": 0.00017324146265490799, "loss": 12.4868, "step": 9595 }, { "epoch": 0.5225405512105804, "grad_norm": 0.6454399581047955, "learning_rate": 0.0001732354583807674, "loss": 12.4155, "step": 9596 }, { "epoch": 0.5225950052071634, "grad_norm": 0.5968822395243275, "learning_rate": 0.00017322945353713553, "loss": 12.4344, "step": 9597 }, { "epoch": 0.5226494592037464, "grad_norm": 0.5772180794144417, "learning_rate": 0.00017322344812405917, "loss": 12.4009, "step": 9598 }, { "epoch": 0.5227039132003295, "grad_norm": 0.6661396756168132, "learning_rate": 0.000173217442141585, "loss": 12.5368, "step": 9599 }, { "epoch": 0.5227583671969125, "grad_norm": 0.5916943710540453, "learning_rate": 0.00017321143558975973, "loss": 12.3441, "step": 9600 }, { "epoch": 0.5228128211934955, "grad_norm": 0.6980471395490951, "learning_rate": 0.00017320542846863005, "loss": 12.4568, "step": 9601 }, { "epoch": 0.5228672751900785, "grad_norm": 0.6134060077125584, "learning_rate": 0.00017319942077824273, "loss": 12.3968, "step": 9602 }, { "epoch": 0.5229217291866615, "grad_norm": 0.6653622553223292, "learning_rate": 0.00017319341251864439, "loss": 12.3345, "step": 9603 }, { "epoch": 0.5229761831832445, "grad_norm": 0.6385386789833732, "learning_rate": 0.00017318740368988178, "loss": 12.457, "step": 9604 }, { "epoch": 0.5230306371798276, "grad_norm": 0.6778974500953844, "learning_rate": 0.0001731813942920017, "loss": 12.3487, "step": 9605 }, { "epoch": 0.5230850911764106, "grad_norm": 0.6852089521188045, "learning_rate": 0.00017317538432505078, "loss": 12.576, "step": 9606 }, { "epoch": 0.5231395451729935, "grad_norm": 0.6892428008205174, "learning_rate": 0.00017316937378907582, "loss": 12.4506, "step": 9607 }, { "epoch": 0.5231939991695765, "grad_norm": 0.6417345906514075, "learning_rate": 0.00017316336268412353, "loss": 12.3974, "step": 9608 }, { "epoch": 0.5232484531661595, "grad_norm": 0.6643182481858203, "learning_rate": 0.00017315735101024066, "loss": 12.4555, "step": 9609 }, { "epoch": 0.5233029071627425, "grad_norm": 0.7006586374584878, "learning_rate": 0.000173151338767474, "loss": 12.3776, "step": 9610 }, { "epoch": 0.5233573611593256, "grad_norm": 0.6279718640944982, "learning_rate": 0.00017314532595587024, "loss": 12.4017, "step": 9611 }, { "epoch": 0.5234118151559086, "grad_norm": 0.7037251866782321, "learning_rate": 0.00017313931257547613, "loss": 12.4936, "step": 9612 }, { "epoch": 0.5234662691524916, "grad_norm": 0.6205506357515014, "learning_rate": 0.00017313329862633848, "loss": 12.4313, "step": 9613 }, { "epoch": 0.5235207231490746, "grad_norm": 0.6943185242804921, "learning_rate": 0.00017312728410850405, "loss": 12.4645, "step": 9614 }, { "epoch": 0.5235751771456576, "grad_norm": 0.6045707069949517, "learning_rate": 0.0001731212690220196, "loss": 12.4103, "step": 9615 }, { "epoch": 0.5236296311422406, "grad_norm": 0.5648009255297014, "learning_rate": 0.00017311525336693184, "loss": 12.1542, "step": 9616 }, { "epoch": 0.5236840851388237, "grad_norm": 0.6593904156040367, "learning_rate": 0.00017310923714328766, "loss": 12.3242, "step": 9617 }, { "epoch": 0.5237385391354067, "grad_norm": 0.7689015290127676, "learning_rate": 0.0001731032203511338, "loss": 12.3943, "step": 9618 }, { "epoch": 0.5237929931319897, "grad_norm": 0.6050682600697579, "learning_rate": 0.000173097202990517, "loss": 12.3524, "step": 9619 }, { "epoch": 0.5238474471285727, "grad_norm": 0.6355378601271892, "learning_rate": 0.00017309118506148412, "loss": 12.4862, "step": 9620 }, { "epoch": 0.5239019011251557, "grad_norm": 0.625236672043179, "learning_rate": 0.00017308516656408193, "loss": 12.3745, "step": 9621 }, { "epoch": 0.5239563551217388, "grad_norm": 0.6220329845669438, "learning_rate": 0.0001730791474983572, "loss": 12.4589, "step": 9622 }, { "epoch": 0.5240108091183218, "grad_norm": 0.6320089105516793, "learning_rate": 0.00017307312786435679, "loss": 12.5162, "step": 9623 }, { "epoch": 0.5240652631149048, "grad_norm": 0.617134722308437, "learning_rate": 0.00017306710766212748, "loss": 12.4909, "step": 9624 }, { "epoch": 0.5241197171114877, "grad_norm": 0.6240965637008747, "learning_rate": 0.00017306108689171606, "loss": 12.4501, "step": 9625 }, { "epoch": 0.5241741711080707, "grad_norm": 0.5989645747623337, "learning_rate": 0.00017305506555316942, "loss": 12.495, "step": 9626 }, { "epoch": 0.5242286251046537, "grad_norm": 0.6775237364987579, "learning_rate": 0.00017304904364653432, "loss": 12.372, "step": 9627 }, { "epoch": 0.5242830791012368, "grad_norm": 0.6764909063661962, "learning_rate": 0.0001730430211718576, "loss": 12.4157, "step": 9628 }, { "epoch": 0.5243375330978198, "grad_norm": 0.5941504029768336, "learning_rate": 0.0001730369981291861, "loss": 12.2141, "step": 9629 }, { "epoch": 0.5243919870944028, "grad_norm": 0.6937835193473201, "learning_rate": 0.00017303097451856666, "loss": 12.4662, "step": 9630 }, { "epoch": 0.5244464410909858, "grad_norm": 0.5794049013015833, "learning_rate": 0.0001730249503400461, "loss": 12.391, "step": 9631 }, { "epoch": 0.5245008950875688, "grad_norm": 0.5993816672770692, "learning_rate": 0.0001730189255936713, "loss": 12.4769, "step": 9632 }, { "epoch": 0.5245553490841518, "grad_norm": 0.6214200612444263, "learning_rate": 0.0001730129002794891, "loss": 12.3002, "step": 9633 }, { "epoch": 0.5246098030807349, "grad_norm": 0.6614838174941197, "learning_rate": 0.00017300687439754627, "loss": 12.4977, "step": 9634 }, { "epoch": 0.5246642570773179, "grad_norm": 0.6361890413636067, "learning_rate": 0.00017300084794788983, "loss": 12.432, "step": 9635 }, { "epoch": 0.5247187110739009, "grad_norm": 0.6870685161675998, "learning_rate": 0.00017299482093056652, "loss": 12.509, "step": 9636 }, { "epoch": 0.5247731650704839, "grad_norm": 0.6120446501238316, "learning_rate": 0.00017298879334562322, "loss": 12.3235, "step": 9637 }, { "epoch": 0.5248276190670669, "grad_norm": 0.782081122499058, "learning_rate": 0.00017298276519310687, "loss": 12.2507, "step": 9638 }, { "epoch": 0.5248820730636499, "grad_norm": 0.6666692624757277, "learning_rate": 0.00017297673647306426, "loss": 12.5445, "step": 9639 }, { "epoch": 0.524936527060233, "grad_norm": 0.5996083202027668, "learning_rate": 0.00017297070718554232, "loss": 12.3263, "step": 9640 }, { "epoch": 0.524990981056816, "grad_norm": 0.6918954093983186, "learning_rate": 0.00017296467733058792, "loss": 12.4189, "step": 9641 }, { "epoch": 0.525045435053399, "grad_norm": 0.5641966877474066, "learning_rate": 0.00017295864690824794, "loss": 12.4129, "step": 9642 }, { "epoch": 0.525099889049982, "grad_norm": 0.6507912778509167, "learning_rate": 0.0001729526159185693, "loss": 12.5057, "step": 9643 }, { "epoch": 0.5251543430465649, "grad_norm": 0.6194648051953271, "learning_rate": 0.0001729465843615989, "loss": 12.4627, "step": 9644 }, { "epoch": 0.5252087970431479, "grad_norm": 0.6426678578240965, "learning_rate": 0.00017294055223738362, "loss": 12.4603, "step": 9645 }, { "epoch": 0.525263251039731, "grad_norm": 0.7172559439221332, "learning_rate": 0.00017293451954597034, "loss": 12.3775, "step": 9646 }, { "epoch": 0.525317705036314, "grad_norm": 0.6184352607295612, "learning_rate": 0.00017292848628740605, "loss": 12.4496, "step": 9647 }, { "epoch": 0.525372159032897, "grad_norm": 0.66662565383955, "learning_rate": 0.00017292245246173761, "loss": 12.3351, "step": 9648 }, { "epoch": 0.52542661302948, "grad_norm": 0.6044453012503522, "learning_rate": 0.00017291641806901193, "loss": 12.3472, "step": 9649 }, { "epoch": 0.525481067026063, "grad_norm": 0.6094155769366099, "learning_rate": 0.000172910383109276, "loss": 12.2924, "step": 9650 }, { "epoch": 0.525535521022646, "grad_norm": 0.6619147532997315, "learning_rate": 0.00017290434758257666, "loss": 12.3786, "step": 9651 }, { "epoch": 0.5255899750192291, "grad_norm": 0.654740006199492, "learning_rate": 0.0001728983114889609, "loss": 12.5304, "step": 9652 }, { "epoch": 0.5256444290158121, "grad_norm": 0.6360731990373002, "learning_rate": 0.00017289227482847563, "loss": 12.4277, "step": 9653 }, { "epoch": 0.5256988830123951, "grad_norm": 0.7177684346359464, "learning_rate": 0.00017288623760116782, "loss": 12.2692, "step": 9654 }, { "epoch": 0.5257533370089781, "grad_norm": 0.594606411338279, "learning_rate": 0.0001728801998070844, "loss": 12.5538, "step": 9655 }, { "epoch": 0.5258077910055611, "grad_norm": 0.61978373942687, "learning_rate": 0.00017287416144627237, "loss": 12.4174, "step": 9656 }, { "epoch": 0.5258622450021442, "grad_norm": 0.6187951813746733, "learning_rate": 0.00017286812251877858, "loss": 12.34, "step": 9657 }, { "epoch": 0.5259166989987272, "grad_norm": 0.8047359145637732, "learning_rate": 0.0001728620830246501, "loss": 12.4556, "step": 9658 }, { "epoch": 0.5259711529953102, "grad_norm": 0.6399250732558073, "learning_rate": 0.00017285604296393377, "loss": 12.4849, "step": 9659 }, { "epoch": 0.5260256069918932, "grad_norm": 0.8625060189544411, "learning_rate": 0.00017285000233667667, "loss": 12.3901, "step": 9660 }, { "epoch": 0.5260800609884762, "grad_norm": 0.589516031343896, "learning_rate": 0.00017284396114292574, "loss": 12.2904, "step": 9661 }, { "epoch": 0.5261345149850591, "grad_norm": 0.6029145395554105, "learning_rate": 0.00017283791938272795, "loss": 12.2947, "step": 9662 }, { "epoch": 0.5261889689816422, "grad_norm": 0.6933086437955884, "learning_rate": 0.0001728318770561303, "loss": 12.4769, "step": 9663 }, { "epoch": 0.5262434229782252, "grad_norm": 0.7179526791292549, "learning_rate": 0.00017282583416317973, "loss": 12.5321, "step": 9664 }, { "epoch": 0.5262978769748082, "grad_norm": 0.5920105115456481, "learning_rate": 0.0001728197907039233, "loss": 12.358, "step": 9665 }, { "epoch": 0.5263523309713912, "grad_norm": 0.6865859863106162, "learning_rate": 0.00017281374667840792, "loss": 12.4423, "step": 9666 }, { "epoch": 0.5264067849679742, "grad_norm": 0.6678078695824575, "learning_rate": 0.00017280770208668065, "loss": 12.4424, "step": 9667 }, { "epoch": 0.5264612389645572, "grad_norm": 0.607173425284794, "learning_rate": 0.00017280165692878848, "loss": 12.378, "step": 9668 }, { "epoch": 0.5265156929611403, "grad_norm": 0.6363930188981803, "learning_rate": 0.00017279561120477844, "loss": 12.3906, "step": 9669 }, { "epoch": 0.5265701469577233, "grad_norm": 0.6234394296399584, "learning_rate": 0.0001727895649146975, "loss": 12.4683, "step": 9670 }, { "epoch": 0.5266246009543063, "grad_norm": 0.5749083886394103, "learning_rate": 0.0001727835180585927, "loss": 12.4641, "step": 9671 }, { "epoch": 0.5266790549508893, "grad_norm": 0.6452980338903855, "learning_rate": 0.00017277747063651106, "loss": 12.5147, "step": 9672 }, { "epoch": 0.5267335089474723, "grad_norm": 0.5563195642011151, "learning_rate": 0.0001727714226484996, "loss": 12.3238, "step": 9673 }, { "epoch": 0.5267879629440553, "grad_norm": 0.710159581261464, "learning_rate": 0.00017276537409460533, "loss": 12.4948, "step": 9674 }, { "epoch": 0.5268424169406384, "grad_norm": 0.588714729325823, "learning_rate": 0.00017275932497487536, "loss": 12.3773, "step": 9675 }, { "epoch": 0.5268968709372214, "grad_norm": 0.6465746286814458, "learning_rate": 0.00017275327528935662, "loss": 12.3818, "step": 9676 }, { "epoch": 0.5269513249338044, "grad_norm": 0.6322551956059813, "learning_rate": 0.00017274722503809624, "loss": 12.4437, "step": 9677 }, { "epoch": 0.5270057789303874, "grad_norm": 0.5644116231357436, "learning_rate": 0.00017274117422114125, "loss": 12.3734, "step": 9678 }, { "epoch": 0.5270602329269704, "grad_norm": 0.6304177061479034, "learning_rate": 0.0001727351228385387, "loss": 12.4745, "step": 9679 }, { "epoch": 0.5271146869235533, "grad_norm": 0.567406429966043, "learning_rate": 0.00017272907089033559, "loss": 12.4316, "step": 9680 }, { "epoch": 0.5271691409201364, "grad_norm": 0.5429050725645992, "learning_rate": 0.00017272301837657905, "loss": 12.3625, "step": 9681 }, { "epoch": 0.5272235949167194, "grad_norm": 0.6655180482040186, "learning_rate": 0.00017271696529731612, "loss": 12.5425, "step": 9682 }, { "epoch": 0.5272780489133024, "grad_norm": 0.6268527508329086, "learning_rate": 0.00017271091165259387, "loss": 12.5065, "step": 9683 }, { "epoch": 0.5273325029098854, "grad_norm": 0.5693725248078682, "learning_rate": 0.0001727048574424594, "loss": 12.3665, "step": 9684 }, { "epoch": 0.5273869569064684, "grad_norm": 0.6043402385345585, "learning_rate": 0.00017269880266695975, "loss": 12.4099, "step": 9685 }, { "epoch": 0.5274414109030514, "grad_norm": 0.6818348321281018, "learning_rate": 0.00017269274732614203, "loss": 12.2967, "step": 9686 }, { "epoch": 0.5274958648996345, "grad_norm": 0.7589572456632353, "learning_rate": 0.00017268669142005328, "loss": 12.3437, "step": 9687 }, { "epoch": 0.5275503188962175, "grad_norm": 0.701238628571967, "learning_rate": 0.0001726806349487407, "loss": 12.2364, "step": 9688 }, { "epoch": 0.5276047728928005, "grad_norm": 0.6545375754439466, "learning_rate": 0.00017267457791225125, "loss": 12.5199, "step": 9689 }, { "epoch": 0.5276592268893835, "grad_norm": 0.6524621393043788, "learning_rate": 0.0001726685203106321, "loss": 12.3886, "step": 9690 }, { "epoch": 0.5277136808859665, "grad_norm": 0.6160583806710476, "learning_rate": 0.00017266246214393038, "loss": 12.3516, "step": 9691 }, { "epoch": 0.5277681348825496, "grad_norm": 0.5785027843233695, "learning_rate": 0.00017265640341219314, "loss": 12.4351, "step": 9692 }, { "epoch": 0.5278225888791326, "grad_norm": 0.6324907448675565, "learning_rate": 0.00017265034411546753, "loss": 12.4376, "step": 9693 }, { "epoch": 0.5278770428757156, "grad_norm": 0.6337729746200753, "learning_rate": 0.00017264428425380068, "loss": 12.3728, "step": 9694 }, { "epoch": 0.5279314968722986, "grad_norm": 0.5697052599210165, "learning_rate": 0.0001726382238272397, "loss": 12.3349, "step": 9695 }, { "epoch": 0.5279859508688816, "grad_norm": 0.7966711094861342, "learning_rate": 0.00017263216283583166, "loss": 12.3451, "step": 9696 }, { "epoch": 0.5280404048654646, "grad_norm": 0.5574509545166038, "learning_rate": 0.0001726261012796238, "loss": 12.4332, "step": 9697 }, { "epoch": 0.5280948588620477, "grad_norm": 0.7417944850177249, "learning_rate": 0.00017262003915866317, "loss": 12.3422, "step": 9698 }, { "epoch": 0.5281493128586306, "grad_norm": 0.7367901278701788, "learning_rate": 0.0001726139764729969, "loss": 12.3471, "step": 9699 }, { "epoch": 0.5282037668552136, "grad_norm": 0.6468213805767332, "learning_rate": 0.00017260791322267223, "loss": 12.3907, "step": 9700 }, { "epoch": 0.5282582208517966, "grad_norm": 0.5935120222843647, "learning_rate": 0.00017260184940773621, "loss": 12.3381, "step": 9701 }, { "epoch": 0.5283126748483796, "grad_norm": 0.6189022965552061, "learning_rate": 0.00017259578502823604, "loss": 12.4009, "step": 9702 }, { "epoch": 0.5283671288449626, "grad_norm": 0.6342628207844949, "learning_rate": 0.0001725897200842189, "loss": 12.3978, "step": 9703 }, { "epoch": 0.5284215828415457, "grad_norm": 0.7407930942569653, "learning_rate": 0.0001725836545757319, "loss": 12.5439, "step": 9704 }, { "epoch": 0.5284760368381287, "grad_norm": 0.6286378879070083, "learning_rate": 0.00017257758850282225, "loss": 12.3976, "step": 9705 }, { "epoch": 0.5285304908347117, "grad_norm": 0.6695499155829157, "learning_rate": 0.00017257152186553708, "loss": 12.3466, "step": 9706 }, { "epoch": 0.5285849448312947, "grad_norm": 0.6015846987937623, "learning_rate": 0.00017256545466392357, "loss": 12.2866, "step": 9707 }, { "epoch": 0.5286393988278777, "grad_norm": 0.6078027280739121, "learning_rate": 0.00017255938689802894, "loss": 12.4379, "step": 9708 }, { "epoch": 0.5286938528244607, "grad_norm": 0.638874690111006, "learning_rate": 0.00017255331856790034, "loss": 12.4772, "step": 9709 }, { "epoch": 0.5287483068210438, "grad_norm": 0.672426844846369, "learning_rate": 0.00017254724967358497, "loss": 12.3671, "step": 9710 }, { "epoch": 0.5288027608176268, "grad_norm": 0.6827308364374988, "learning_rate": 0.00017254118021513005, "loss": 12.405, "step": 9711 }, { "epoch": 0.5288572148142098, "grad_norm": 0.6486871442666722, "learning_rate": 0.00017253511019258273, "loss": 12.4867, "step": 9712 }, { "epoch": 0.5289116688107928, "grad_norm": 0.7229718641019813, "learning_rate": 0.00017252903960599021, "loss": 12.6646, "step": 9713 }, { "epoch": 0.5289661228073758, "grad_norm": 0.5710992592647909, "learning_rate": 0.00017252296845539974, "loss": 12.3733, "step": 9714 }, { "epoch": 0.5290205768039588, "grad_norm": 0.5865725811347708, "learning_rate": 0.00017251689674085846, "loss": 12.4057, "step": 9715 }, { "epoch": 0.5290750308005419, "grad_norm": 0.574147815297626, "learning_rate": 0.00017251082446241366, "loss": 12.3963, "step": 9716 }, { "epoch": 0.5291294847971248, "grad_norm": 0.5596637567178822, "learning_rate": 0.00017250475162011256, "loss": 12.31, "step": 9717 }, { "epoch": 0.5291839387937078, "grad_norm": 0.83964741392583, "learning_rate": 0.0001724986782140023, "loss": 12.428, "step": 9718 }, { "epoch": 0.5292383927902908, "grad_norm": 0.5941991496411796, "learning_rate": 0.00017249260424413018, "loss": 12.2539, "step": 9719 }, { "epoch": 0.5292928467868738, "grad_norm": 0.6831774494563473, "learning_rate": 0.0001724865297105434, "loss": 12.4378, "step": 9720 }, { "epoch": 0.5293473007834569, "grad_norm": 0.6542378395756087, "learning_rate": 0.00017248045461328926, "loss": 12.4948, "step": 9721 }, { "epoch": 0.5294017547800399, "grad_norm": 0.8125360337002615, "learning_rate": 0.0001724743789524149, "loss": 12.5094, "step": 9722 }, { "epoch": 0.5294562087766229, "grad_norm": 0.6991273418397774, "learning_rate": 0.00017246830272796762, "loss": 12.5143, "step": 9723 }, { "epoch": 0.5295106627732059, "grad_norm": 0.5880633336171539, "learning_rate": 0.00017246222593999468, "loss": 12.3835, "step": 9724 }, { "epoch": 0.5295651167697889, "grad_norm": 0.5804574850995645, "learning_rate": 0.0001724561485885433, "loss": 12.2713, "step": 9725 }, { "epoch": 0.5296195707663719, "grad_norm": 0.7335910759132532, "learning_rate": 0.0001724500706736608, "loss": 12.3935, "step": 9726 }, { "epoch": 0.529674024762955, "grad_norm": 0.5965335798603199, "learning_rate": 0.00017244399219539436, "loss": 12.3415, "step": 9727 }, { "epoch": 0.529728478759538, "grad_norm": 0.5773773691831027, "learning_rate": 0.0001724379131537913, "loss": 12.3071, "step": 9728 }, { "epoch": 0.529782932756121, "grad_norm": 0.5851148451591887, "learning_rate": 0.00017243183354889887, "loss": 12.4117, "step": 9729 }, { "epoch": 0.529837386752704, "grad_norm": 0.8309450756095572, "learning_rate": 0.00017242575338076435, "loss": 12.4456, "step": 9730 }, { "epoch": 0.529891840749287, "grad_norm": 0.6938917740148158, "learning_rate": 0.00017241967264943506, "loss": 12.3174, "step": 9731 }, { "epoch": 0.52994629474587, "grad_norm": 0.5908224767547462, "learning_rate": 0.00017241359135495822, "loss": 12.4635, "step": 9732 }, { "epoch": 0.5300007487424531, "grad_norm": 0.7278377385476976, "learning_rate": 0.00017240750949738115, "loss": 12.5118, "step": 9733 }, { "epoch": 0.5300552027390361, "grad_norm": 0.6470128783211144, "learning_rate": 0.00017240142707675117, "loss": 12.4494, "step": 9734 }, { "epoch": 0.530109656735619, "grad_norm": 0.6891827007678933, "learning_rate": 0.00017239534409311554, "loss": 12.4469, "step": 9735 }, { "epoch": 0.530164110732202, "grad_norm": 0.6342013943166225, "learning_rate": 0.00017238926054652157, "loss": 12.3601, "step": 9736 }, { "epoch": 0.530218564728785, "grad_norm": 0.6399841926597989, "learning_rate": 0.00017238317643701655, "loss": 12.425, "step": 9737 }, { "epoch": 0.530273018725368, "grad_norm": 0.6107766450079387, "learning_rate": 0.00017237709176464783, "loss": 12.4542, "step": 9738 }, { "epoch": 0.5303274727219511, "grad_norm": 0.5742461087396051, "learning_rate": 0.00017237100652946273, "loss": 12.3553, "step": 9739 }, { "epoch": 0.5303819267185341, "grad_norm": 0.645047834726115, "learning_rate": 0.00017236492073150852, "loss": 12.306, "step": 9740 }, { "epoch": 0.5304363807151171, "grad_norm": 1.1717071985382235, "learning_rate": 0.00017235883437083252, "loss": 12.4642, "step": 9741 }, { "epoch": 0.5304908347117001, "grad_norm": 0.5822341358166635, "learning_rate": 0.00017235274744748216, "loss": 12.3577, "step": 9742 }, { "epoch": 0.5305452887082831, "grad_norm": 0.6196614983883354, "learning_rate": 0.00017234665996150464, "loss": 12.4747, "step": 9743 }, { "epoch": 0.5305997427048661, "grad_norm": 0.7803176794795792, "learning_rate": 0.00017234057191294739, "loss": 12.3551, "step": 9744 }, { "epoch": 0.5306541967014492, "grad_norm": 0.5862024454149802, "learning_rate": 0.00017233448330185774, "loss": 12.2299, "step": 9745 }, { "epoch": 0.5307086506980322, "grad_norm": 0.7186574421139408, "learning_rate": 0.00017232839412828298, "loss": 12.3433, "step": 9746 }, { "epoch": 0.5307631046946152, "grad_norm": 0.7315895849141418, "learning_rate": 0.0001723223043922705, "loss": 12.3404, "step": 9747 }, { "epoch": 0.5308175586911982, "grad_norm": 0.667927166615274, "learning_rate": 0.00017231621409386766, "loss": 12.5153, "step": 9748 }, { "epoch": 0.5308720126877812, "grad_norm": 0.7132958588280709, "learning_rate": 0.00017231012323312182, "loss": 12.4401, "step": 9749 }, { "epoch": 0.5309264666843642, "grad_norm": 0.9607096123762794, "learning_rate": 0.00017230403181008034, "loss": 12.4612, "step": 9750 }, { "epoch": 0.5309809206809473, "grad_norm": 0.5842123349197016, "learning_rate": 0.00017229793982479058, "loss": 12.3212, "step": 9751 }, { "epoch": 0.5310353746775303, "grad_norm": 0.6038032722093865, "learning_rate": 0.0001722918472772999, "loss": 12.3025, "step": 9752 }, { "epoch": 0.5310898286741133, "grad_norm": 0.6729744395491271, "learning_rate": 0.0001722857541676557, "loss": 12.4765, "step": 9753 }, { "epoch": 0.5311442826706962, "grad_norm": 0.6006912560201526, "learning_rate": 0.00017227966049590535, "loss": 12.4064, "step": 9754 }, { "epoch": 0.5311987366672792, "grad_norm": 0.6781132871835124, "learning_rate": 0.00017227356626209626, "loss": 12.3, "step": 9755 }, { "epoch": 0.5312531906638623, "grad_norm": 0.6127591248377919, "learning_rate": 0.0001722674714662758, "loss": 12.4081, "step": 9756 }, { "epoch": 0.5313076446604453, "grad_norm": 0.5918857358398967, "learning_rate": 0.00017226137610849133, "loss": 12.3187, "step": 9757 }, { "epoch": 0.5313620986570283, "grad_norm": 0.6277227353304781, "learning_rate": 0.00017225528018879028, "loss": 12.3859, "step": 9758 }, { "epoch": 0.5314165526536113, "grad_norm": 0.634504921022194, "learning_rate": 0.0001722491837072201, "loss": 12.3973, "step": 9759 }, { "epoch": 0.5314710066501943, "grad_norm": 0.662465830818154, "learning_rate": 0.0001722430866638281, "loss": 12.3916, "step": 9760 }, { "epoch": 0.5315254606467773, "grad_norm": 0.5891039439024768, "learning_rate": 0.00017223698905866178, "loss": 12.3908, "step": 9761 }, { "epoch": 0.5315799146433604, "grad_norm": 0.6549544766014891, "learning_rate": 0.0001722308908917685, "loss": 12.4351, "step": 9762 }, { "epoch": 0.5316343686399434, "grad_norm": 0.6505523821972475, "learning_rate": 0.0001722247921631957, "loss": 12.3605, "step": 9763 }, { "epoch": 0.5316888226365264, "grad_norm": 0.6311228049331078, "learning_rate": 0.0001722186928729908, "loss": 12.342, "step": 9764 }, { "epoch": 0.5317432766331094, "grad_norm": 1.0094269914095666, "learning_rate": 0.00017221259302120125, "loss": 12.3917, "step": 9765 }, { "epoch": 0.5317977306296924, "grad_norm": 0.6350641093722831, "learning_rate": 0.00017220649260787444, "loss": 12.3297, "step": 9766 }, { "epoch": 0.5318521846262754, "grad_norm": 0.5972895412440087, "learning_rate": 0.00017220039163305786, "loss": 12.3677, "step": 9767 }, { "epoch": 0.5319066386228585, "grad_norm": 0.5975628558195795, "learning_rate": 0.0001721942900967989, "loss": 12.3943, "step": 9768 }, { "epoch": 0.5319610926194415, "grad_norm": 0.6235024665581594, "learning_rate": 0.00017218818799914507, "loss": 12.4072, "step": 9769 }, { "epoch": 0.5320155466160245, "grad_norm": 0.5994943679323929, "learning_rate": 0.00017218208534014378, "loss": 12.4792, "step": 9770 }, { "epoch": 0.5320700006126075, "grad_norm": 0.663856021291008, "learning_rate": 0.00017217598211984248, "loss": 12.1824, "step": 9771 }, { "epoch": 0.5321244546091904, "grad_norm": 0.6260830279897032, "learning_rate": 0.00017216987833828861, "loss": 12.2323, "step": 9772 }, { "epoch": 0.5321789086057734, "grad_norm": 0.6039866668740309, "learning_rate": 0.0001721637739955297, "loss": 12.4352, "step": 9773 }, { "epoch": 0.5322333626023565, "grad_norm": 0.6712571763891201, "learning_rate": 0.00017215766909161318, "loss": 12.4429, "step": 9774 }, { "epoch": 0.5322878165989395, "grad_norm": 0.5770603415893366, "learning_rate": 0.0001721515636265865, "loss": 12.2967, "step": 9775 }, { "epoch": 0.5323422705955225, "grad_norm": 0.6857828943633366, "learning_rate": 0.0001721454576004972, "loss": 12.5149, "step": 9776 }, { "epoch": 0.5323967245921055, "grad_norm": 0.6672425660206375, "learning_rate": 0.00017213935101339267, "loss": 12.4305, "step": 9777 }, { "epoch": 0.5324511785886885, "grad_norm": 0.7012997332857596, "learning_rate": 0.00017213324386532047, "loss": 12.259, "step": 9778 }, { "epoch": 0.5325056325852715, "grad_norm": 0.6089271523873652, "learning_rate": 0.0001721271361563281, "loss": 12.3164, "step": 9779 }, { "epoch": 0.5325600865818546, "grad_norm": 0.6629686692249517, "learning_rate": 0.00017212102788646298, "loss": 12.4173, "step": 9780 }, { "epoch": 0.5326145405784376, "grad_norm": 0.5903774965381847, "learning_rate": 0.00017211491905577266, "loss": 12.2993, "step": 9781 }, { "epoch": 0.5326689945750206, "grad_norm": 0.6902349289697002, "learning_rate": 0.00017210880966430465, "loss": 12.3484, "step": 9782 }, { "epoch": 0.5327234485716036, "grad_norm": 0.6697846969405893, "learning_rate": 0.00017210269971210644, "loss": 12.4456, "step": 9783 }, { "epoch": 0.5327779025681866, "grad_norm": 0.6201544903874386, "learning_rate": 0.00017209658919922554, "loss": 12.4618, "step": 9784 }, { "epoch": 0.5328323565647696, "grad_norm": 0.6438666747424316, "learning_rate": 0.00017209047812570948, "loss": 12.3211, "step": 9785 }, { "epoch": 0.5328868105613527, "grad_norm": 0.7061246526826793, "learning_rate": 0.00017208436649160578, "loss": 12.5599, "step": 9786 }, { "epoch": 0.5329412645579357, "grad_norm": 0.6512120724465694, "learning_rate": 0.00017207825429696195, "loss": 12.3956, "step": 9787 }, { "epoch": 0.5329957185545187, "grad_norm": 0.6008826656968721, "learning_rate": 0.00017207214154182552, "loss": 12.3315, "step": 9788 }, { "epoch": 0.5330501725511017, "grad_norm": 0.6151984110148345, "learning_rate": 0.00017206602822624403, "loss": 12.3331, "step": 9789 }, { "epoch": 0.5331046265476846, "grad_norm": 0.7381432926540967, "learning_rate": 0.00017205991435026503, "loss": 12.2305, "step": 9790 }, { "epoch": 0.5331590805442677, "grad_norm": 0.5831377313507006, "learning_rate": 0.00017205379991393603, "loss": 12.4046, "step": 9791 }, { "epoch": 0.5332135345408507, "grad_norm": 0.7278580392286879, "learning_rate": 0.00017204768491730464, "loss": 12.3752, "step": 9792 }, { "epoch": 0.5332679885374337, "grad_norm": 0.6868404243386019, "learning_rate": 0.00017204156936041832, "loss": 12.475, "step": 9793 }, { "epoch": 0.5333224425340167, "grad_norm": 0.6386140397717891, "learning_rate": 0.00017203545324332472, "loss": 12.3795, "step": 9794 }, { "epoch": 0.5333768965305997, "grad_norm": 0.741678294977336, "learning_rate": 0.00017202933656607132, "loss": 12.3816, "step": 9795 }, { "epoch": 0.5334313505271827, "grad_norm": 0.6622029228786622, "learning_rate": 0.00017202321932870577, "loss": 12.5115, "step": 9796 }, { "epoch": 0.5334858045237658, "grad_norm": 0.6693469010395893, "learning_rate": 0.00017201710153127552, "loss": 12.2848, "step": 9797 }, { "epoch": 0.5335402585203488, "grad_norm": 0.6947314114981221, "learning_rate": 0.00017201098317382824, "loss": 12.4751, "step": 9798 }, { "epoch": 0.5335947125169318, "grad_norm": 0.6436666397792562, "learning_rate": 0.0001720048642564115, "loss": 12.4354, "step": 9799 }, { "epoch": 0.5336491665135148, "grad_norm": 0.587771081254023, "learning_rate": 0.00017199874477907283, "loss": 12.3793, "step": 9800 }, { "epoch": 0.5337036205100978, "grad_norm": 0.6547024454832397, "learning_rate": 0.00017199262474185988, "loss": 12.4499, "step": 9801 }, { "epoch": 0.5337580745066808, "grad_norm": 0.6429260236613371, "learning_rate": 0.00017198650414482019, "loss": 12.3718, "step": 9802 }, { "epoch": 0.5338125285032639, "grad_norm": 0.6317710786618224, "learning_rate": 0.00017198038298800136, "loss": 12.3614, "step": 9803 }, { "epoch": 0.5338669824998469, "grad_norm": 0.6850677083619697, "learning_rate": 0.000171974261271451, "loss": 12.3093, "step": 9804 }, { "epoch": 0.5339214364964299, "grad_norm": 0.6419486047294934, "learning_rate": 0.00017196813899521672, "loss": 12.3757, "step": 9805 }, { "epoch": 0.5339758904930129, "grad_norm": 0.6135614860091514, "learning_rate": 0.00017196201615934614, "loss": 12.3681, "step": 9806 }, { "epoch": 0.5340303444895959, "grad_norm": 0.7531851068565291, "learning_rate": 0.00017195589276388683, "loss": 12.5129, "step": 9807 }, { "epoch": 0.5340847984861788, "grad_norm": 0.6420256498410256, "learning_rate": 0.00017194976880888642, "loss": 12.4711, "step": 9808 }, { "epoch": 0.534139252482762, "grad_norm": 0.5705655579112456, "learning_rate": 0.00017194364429439258, "loss": 12.3657, "step": 9809 }, { "epoch": 0.5341937064793449, "grad_norm": 0.6295191539717218, "learning_rate": 0.00017193751922045286, "loss": 12.4479, "step": 9810 }, { "epoch": 0.5342481604759279, "grad_norm": 0.6177632784310391, "learning_rate": 0.00017193139358711497, "loss": 12.477, "step": 9811 }, { "epoch": 0.5343026144725109, "grad_norm": 0.6854705931839781, "learning_rate": 0.00017192526739442647, "loss": 12.4645, "step": 9812 }, { "epoch": 0.5343570684690939, "grad_norm": 0.613644425046222, "learning_rate": 0.000171919140642435, "loss": 12.4764, "step": 9813 }, { "epoch": 0.5344115224656769, "grad_norm": 0.5929606787046219, "learning_rate": 0.0001719130133311883, "loss": 12.4075, "step": 9814 }, { "epoch": 0.53446597646226, "grad_norm": 0.6068627512682941, "learning_rate": 0.0001719068854607339, "loss": 12.3778, "step": 9815 }, { "epoch": 0.534520430458843, "grad_norm": 0.6383825000704191, "learning_rate": 0.00017190075703111952, "loss": 12.4257, "step": 9816 }, { "epoch": 0.534574884455426, "grad_norm": 0.630913310539644, "learning_rate": 0.0001718946280423928, "loss": 12.3683, "step": 9817 }, { "epoch": 0.534629338452009, "grad_norm": 0.6614522218218793, "learning_rate": 0.00017188849849460137, "loss": 12.3796, "step": 9818 }, { "epoch": 0.534683792448592, "grad_norm": 0.6034520952779118, "learning_rate": 0.00017188236838779295, "loss": 12.3671, "step": 9819 }, { "epoch": 0.534738246445175, "grad_norm": 0.6531549455633217, "learning_rate": 0.00017187623772201515, "loss": 12.368, "step": 9820 }, { "epoch": 0.5347927004417581, "grad_norm": 0.5538706811573829, "learning_rate": 0.0001718701064973157, "loss": 12.3199, "step": 9821 }, { "epoch": 0.5348471544383411, "grad_norm": 0.6744276782703781, "learning_rate": 0.00017186397471374222, "loss": 12.5405, "step": 9822 }, { "epoch": 0.5349016084349241, "grad_norm": 0.6165108152495673, "learning_rate": 0.00017185784237134244, "loss": 12.4017, "step": 9823 }, { "epoch": 0.5349560624315071, "grad_norm": 0.6047927725391143, "learning_rate": 0.00017185170947016403, "loss": 12.4021, "step": 9824 }, { "epoch": 0.53501051642809, "grad_norm": 0.677679534013877, "learning_rate": 0.0001718455760102547, "loss": 12.3044, "step": 9825 }, { "epoch": 0.5350649704246732, "grad_norm": 0.5815455964482686, "learning_rate": 0.00017183944199166207, "loss": 12.4053, "step": 9826 }, { "epoch": 0.5351194244212562, "grad_norm": 0.6780133563415122, "learning_rate": 0.00017183330741443392, "loss": 12.285, "step": 9827 }, { "epoch": 0.5351738784178391, "grad_norm": 0.662949576246597, "learning_rate": 0.0001718271722786179, "loss": 12.4557, "step": 9828 }, { "epoch": 0.5352283324144221, "grad_norm": 0.6269948079958946, "learning_rate": 0.00017182103658426175, "loss": 12.3465, "step": 9829 }, { "epoch": 0.5352827864110051, "grad_norm": 0.846084103696285, "learning_rate": 0.00017181490033141322, "loss": 12.31, "step": 9830 }, { "epoch": 0.5353372404075881, "grad_norm": 0.54992669181176, "learning_rate": 0.00017180876352011995, "loss": 12.2473, "step": 9831 }, { "epoch": 0.5353916944041712, "grad_norm": 0.7478928913354632, "learning_rate": 0.0001718026261504297, "loss": 12.3651, "step": 9832 }, { "epoch": 0.5354461484007542, "grad_norm": 0.7367492449815537, "learning_rate": 0.00017179648822239016, "loss": 12.4024, "step": 9833 }, { "epoch": 0.5355006023973372, "grad_norm": 0.5685099530599846, "learning_rate": 0.00017179034973604913, "loss": 12.4241, "step": 9834 }, { "epoch": 0.5355550563939202, "grad_norm": 0.849747866660649, "learning_rate": 0.00017178421069145427, "loss": 12.4344, "step": 9835 }, { "epoch": 0.5356095103905032, "grad_norm": 0.6246212097436993, "learning_rate": 0.00017177807108865336, "loss": 12.2952, "step": 9836 }, { "epoch": 0.5356639643870862, "grad_norm": 0.6160026522839797, "learning_rate": 0.00017177193092769412, "loss": 12.3952, "step": 9837 }, { "epoch": 0.5357184183836693, "grad_norm": 0.6626744999205778, "learning_rate": 0.0001717657902086243, "loss": 12.23, "step": 9838 }, { "epoch": 0.5357728723802523, "grad_norm": 0.6631853663477942, "learning_rate": 0.0001717596489314917, "loss": 12.4028, "step": 9839 }, { "epoch": 0.5358273263768353, "grad_norm": 0.603045212597613, "learning_rate": 0.00017175350709634402, "loss": 12.3879, "step": 9840 }, { "epoch": 0.5358817803734183, "grad_norm": 0.7324693325573454, "learning_rate": 0.000171747364703229, "loss": 12.345, "step": 9841 }, { "epoch": 0.5359362343700013, "grad_norm": 0.6550016515664254, "learning_rate": 0.00017174122175219448, "loss": 12.4589, "step": 9842 }, { "epoch": 0.5359906883665843, "grad_norm": 0.641150171504333, "learning_rate": 0.00017173507824328819, "loss": 12.3711, "step": 9843 }, { "epoch": 0.5360451423631674, "grad_norm": 0.686298156521777, "learning_rate": 0.00017172893417655792, "loss": 12.2025, "step": 9844 }, { "epoch": 0.5360995963597504, "grad_norm": 0.7136610616206259, "learning_rate": 0.00017172278955205136, "loss": 12.6081, "step": 9845 }, { "epoch": 0.5361540503563333, "grad_norm": 0.600625392297172, "learning_rate": 0.00017171664436981644, "loss": 12.3953, "step": 9846 }, { "epoch": 0.5362085043529163, "grad_norm": 0.6301836378631536, "learning_rate": 0.00017171049862990082, "loss": 12.0426, "step": 9847 }, { "epoch": 0.5362629583494993, "grad_norm": 0.6714064351021483, "learning_rate": 0.00017170435233235235, "loss": 12.2334, "step": 9848 }, { "epoch": 0.5363174123460823, "grad_norm": 0.706595530166399, "learning_rate": 0.0001716982054772188, "loss": 12.3248, "step": 9849 }, { "epoch": 0.5363718663426654, "grad_norm": 0.9192217290029557, "learning_rate": 0.00017169205806454797, "loss": 12.5167, "step": 9850 }, { "epoch": 0.5364263203392484, "grad_norm": 0.6517521190636777, "learning_rate": 0.0001716859100943877, "loss": 12.416, "step": 9851 }, { "epoch": 0.5364807743358314, "grad_norm": 0.5578077998753249, "learning_rate": 0.00017167976156678576, "loss": 12.3878, "step": 9852 }, { "epoch": 0.5365352283324144, "grad_norm": 0.5882604834303083, "learning_rate": 0.00017167361248178996, "loss": 12.3207, "step": 9853 }, { "epoch": 0.5365896823289974, "grad_norm": 0.609667657721763, "learning_rate": 0.00017166746283944816, "loss": 12.4551, "step": 9854 }, { "epoch": 0.5366441363255805, "grad_norm": 0.582794617796146, "learning_rate": 0.00017166131263980812, "loss": 12.4657, "step": 9855 }, { "epoch": 0.5366985903221635, "grad_norm": 0.6743278602101175, "learning_rate": 0.00017165516188291774, "loss": 12.4754, "step": 9856 }, { "epoch": 0.5367530443187465, "grad_norm": 0.615568670619058, "learning_rate": 0.00017164901056882474, "loss": 12.4393, "step": 9857 }, { "epoch": 0.5368074983153295, "grad_norm": 0.6096022612379464, "learning_rate": 0.00017164285869757705, "loss": 12.3968, "step": 9858 }, { "epoch": 0.5368619523119125, "grad_norm": 0.6849749417805655, "learning_rate": 0.0001716367062692225, "loss": 12.3788, "step": 9859 }, { "epoch": 0.5369164063084955, "grad_norm": 0.6798822650795383, "learning_rate": 0.0001716305532838089, "loss": 12.3282, "step": 9860 }, { "epoch": 0.5369708603050786, "grad_norm": 0.6290777902183159, "learning_rate": 0.00017162439974138406, "loss": 12.2592, "step": 9861 }, { "epoch": 0.5370253143016616, "grad_norm": 0.6034768067187262, "learning_rate": 0.0001716182456419959, "loss": 12.2475, "step": 9862 }, { "epoch": 0.5370797682982446, "grad_norm": 0.5760830659366515, "learning_rate": 0.00017161209098569228, "loss": 12.2946, "step": 9863 }, { "epoch": 0.5371342222948275, "grad_norm": 0.7272255772367627, "learning_rate": 0.00017160593577252102, "loss": 12.2768, "step": 9864 }, { "epoch": 0.5371886762914105, "grad_norm": 0.6931591166323151, "learning_rate": 0.00017159978000252997, "loss": 12.4305, "step": 9865 }, { "epoch": 0.5372431302879935, "grad_norm": 0.5948832222548013, "learning_rate": 0.00017159362367576706, "loss": 12.4153, "step": 9866 }, { "epoch": 0.5372975842845766, "grad_norm": 0.5757398920114913, "learning_rate": 0.0001715874667922801, "loss": 12.4143, "step": 9867 }, { "epoch": 0.5373520382811596, "grad_norm": 0.6099368336620453, "learning_rate": 0.00017158130935211697, "loss": 12.4317, "step": 9868 }, { "epoch": 0.5374064922777426, "grad_norm": 0.577534146116071, "learning_rate": 0.00017157515135532563, "loss": 12.4152, "step": 9869 }, { "epoch": 0.5374609462743256, "grad_norm": 0.6264027513040179, "learning_rate": 0.00017156899280195388, "loss": 12.4539, "step": 9870 }, { "epoch": 0.5375154002709086, "grad_norm": 0.5972610104177685, "learning_rate": 0.00017156283369204964, "loss": 12.4466, "step": 9871 }, { "epoch": 0.5375698542674916, "grad_norm": 0.6230837192559272, "learning_rate": 0.00017155667402566081, "loss": 12.3657, "step": 9872 }, { "epoch": 0.5376243082640747, "grad_norm": 0.587195318824459, "learning_rate": 0.00017155051380283526, "loss": 12.4126, "step": 9873 }, { "epoch": 0.5376787622606577, "grad_norm": 0.6135653893384941, "learning_rate": 0.00017154435302362098, "loss": 12.5026, "step": 9874 }, { "epoch": 0.5377332162572407, "grad_norm": 0.7244856558646927, "learning_rate": 0.00017153819168806575, "loss": 12.3259, "step": 9875 }, { "epoch": 0.5377876702538237, "grad_norm": 0.6456028771473306, "learning_rate": 0.00017153202979621756, "loss": 12.4228, "step": 9876 }, { "epoch": 0.5378421242504067, "grad_norm": 0.6730816084709884, "learning_rate": 0.00017152586734812432, "loss": 12.3757, "step": 9877 }, { "epoch": 0.5378965782469897, "grad_norm": 0.6794971975431958, "learning_rate": 0.00017151970434383393, "loss": 12.2723, "step": 9878 }, { "epoch": 0.5379510322435728, "grad_norm": 0.6763756419411008, "learning_rate": 0.0001715135407833943, "loss": 12.2668, "step": 9879 }, { "epoch": 0.5380054862401558, "grad_norm": 0.6493172433037002, "learning_rate": 0.00017150737666685344, "loss": 12.2866, "step": 9880 }, { "epoch": 0.5380599402367388, "grad_norm": 0.59240170327952, "learning_rate": 0.0001715012119942592, "loss": 12.3289, "step": 9881 }, { "epoch": 0.5381143942333217, "grad_norm": 0.7180125834990535, "learning_rate": 0.00017149504676565954, "loss": 12.4702, "step": 9882 }, { "epoch": 0.5381688482299047, "grad_norm": 0.6399415374368775, "learning_rate": 0.00017148888098110244, "loss": 12.333, "step": 9883 }, { "epoch": 0.5382233022264877, "grad_norm": 0.6806379391970908, "learning_rate": 0.00017148271464063574, "loss": 12.4301, "step": 9884 }, { "epoch": 0.5382777562230708, "grad_norm": 0.687678562545243, "learning_rate": 0.00017147654774430753, "loss": 12.289, "step": 9885 }, { "epoch": 0.5383322102196538, "grad_norm": 0.7406076855620836, "learning_rate": 0.00017147038029216566, "loss": 12.5046, "step": 9886 }, { "epoch": 0.5383866642162368, "grad_norm": 0.7058845572303966, "learning_rate": 0.00017146421228425815, "loss": 12.3471, "step": 9887 }, { "epoch": 0.5384411182128198, "grad_norm": 0.6568949054306508, "learning_rate": 0.00017145804372063295, "loss": 12.4707, "step": 9888 }, { "epoch": 0.5384955722094028, "grad_norm": 0.8311696208643002, "learning_rate": 0.000171451874601338, "loss": 12.5002, "step": 9889 }, { "epoch": 0.5385500262059859, "grad_norm": 0.7688399949416185, "learning_rate": 0.00017144570492642127, "loss": 12.1362, "step": 9890 }, { "epoch": 0.5386044802025689, "grad_norm": 0.7953307230082599, "learning_rate": 0.0001714395346959308, "loss": 12.5111, "step": 9891 }, { "epoch": 0.5386589341991519, "grad_norm": 0.6708137368403178, "learning_rate": 0.00017143336390991451, "loss": 12.3523, "step": 9892 }, { "epoch": 0.5387133881957349, "grad_norm": 0.6617697777443491, "learning_rate": 0.0001714271925684204, "loss": 12.3565, "step": 9893 }, { "epoch": 0.5387678421923179, "grad_norm": 0.6677088180209257, "learning_rate": 0.00017142102067149647, "loss": 12.4431, "step": 9894 }, { "epoch": 0.5388222961889009, "grad_norm": 0.583972439542916, "learning_rate": 0.00017141484821919068, "loss": 12.4952, "step": 9895 }, { "epoch": 0.538876750185484, "grad_norm": 0.717347607231282, "learning_rate": 0.0001714086752115511, "loss": 12.3808, "step": 9896 }, { "epoch": 0.538931204182067, "grad_norm": 0.6595704544058548, "learning_rate": 0.00017140250164862563, "loss": 12.4887, "step": 9897 }, { "epoch": 0.53898565817865, "grad_norm": 0.6045318794148249, "learning_rate": 0.00017139632753046237, "loss": 12.3262, "step": 9898 }, { "epoch": 0.539040112175233, "grad_norm": 0.6248683294042436, "learning_rate": 0.0001713901528571093, "loss": 12.3629, "step": 9899 }, { "epoch": 0.539094566171816, "grad_norm": 0.6558946098725765, "learning_rate": 0.0001713839776286144, "loss": 12.5053, "step": 9900 }, { "epoch": 0.5391490201683989, "grad_norm": 0.6491145532761589, "learning_rate": 0.00017137780184502574, "loss": 12.4372, "step": 9901 }, { "epoch": 0.539203474164982, "grad_norm": 0.6929592259968642, "learning_rate": 0.0001713716255063913, "loss": 12.3385, "step": 9902 }, { "epoch": 0.539257928161565, "grad_norm": 0.6085672631263342, "learning_rate": 0.00017136544861275917, "loss": 12.3886, "step": 9903 }, { "epoch": 0.539312382158148, "grad_norm": 0.5951696288345016, "learning_rate": 0.0001713592711641773, "loss": 12.2177, "step": 9904 }, { "epoch": 0.539366836154731, "grad_norm": 0.6179769387240053, "learning_rate": 0.00017135309316069382, "loss": 12.3241, "step": 9905 }, { "epoch": 0.539421290151314, "grad_norm": 0.6229249331386542, "learning_rate": 0.00017134691460235667, "loss": 12.386, "step": 9906 }, { "epoch": 0.539475744147897, "grad_norm": 0.6238149637276051, "learning_rate": 0.000171340735489214, "loss": 12.4721, "step": 9907 }, { "epoch": 0.5395301981444801, "grad_norm": 0.6983604951623149, "learning_rate": 0.00017133455582131374, "loss": 12.4289, "step": 9908 }, { "epoch": 0.5395846521410631, "grad_norm": 0.6441007491976519, "learning_rate": 0.00017132837559870407, "loss": 12.4204, "step": 9909 }, { "epoch": 0.5396391061376461, "grad_norm": 0.5910950723368182, "learning_rate": 0.00017132219482143298, "loss": 12.5011, "step": 9910 }, { "epoch": 0.5396935601342291, "grad_norm": 0.6216212968450316, "learning_rate": 0.00017131601348954853, "loss": 12.4437, "step": 9911 }, { "epoch": 0.5397480141308121, "grad_norm": 0.5852352225909889, "learning_rate": 0.0001713098316030988, "loss": 12.2003, "step": 9912 }, { "epoch": 0.5398024681273951, "grad_norm": 0.6166938464575757, "learning_rate": 0.00017130364916213186, "loss": 12.3618, "step": 9913 }, { "epoch": 0.5398569221239782, "grad_norm": 0.6585778033684443, "learning_rate": 0.00017129746616669576, "loss": 12.4934, "step": 9914 }, { "epoch": 0.5399113761205612, "grad_norm": 0.5912480969666162, "learning_rate": 0.00017129128261683863, "loss": 12.3631, "step": 9915 }, { "epoch": 0.5399658301171442, "grad_norm": 0.580616927395157, "learning_rate": 0.00017128509851260858, "loss": 12.4729, "step": 9916 }, { "epoch": 0.5400202841137272, "grad_norm": 0.6584369629337092, "learning_rate": 0.0001712789138540536, "loss": 12.4848, "step": 9917 }, { "epoch": 0.5400747381103101, "grad_norm": 0.6410516271365128, "learning_rate": 0.0001712727286412218, "loss": 12.1675, "step": 9918 }, { "epoch": 0.5401291921068931, "grad_norm": 0.6732803345789796, "learning_rate": 0.00017126654287416137, "loss": 12.3771, "step": 9919 }, { "epoch": 0.5401836461034762, "grad_norm": 0.733088340455232, "learning_rate": 0.0001712603565529203, "loss": 12.5004, "step": 9920 }, { "epoch": 0.5402381001000592, "grad_norm": 0.580679752904814, "learning_rate": 0.0001712541696775468, "loss": 12.3632, "step": 9921 }, { "epoch": 0.5402925540966422, "grad_norm": 0.6136950209253756, "learning_rate": 0.00017124798224808888, "loss": 12.3828, "step": 9922 }, { "epoch": 0.5403470080932252, "grad_norm": 0.666839386087224, "learning_rate": 0.00017124179426459475, "loss": 12.3126, "step": 9923 }, { "epoch": 0.5404014620898082, "grad_norm": 0.5709082525476437, "learning_rate": 0.00017123560572711245, "loss": 12.3103, "step": 9924 }, { "epoch": 0.5404559160863913, "grad_norm": 0.7116277072509676, "learning_rate": 0.00017122941663569012, "loss": 12.4852, "step": 9925 }, { "epoch": 0.5405103700829743, "grad_norm": 0.6497855971920484, "learning_rate": 0.00017122322699037593, "loss": 12.441, "step": 9926 }, { "epoch": 0.5405648240795573, "grad_norm": 0.671279944463193, "learning_rate": 0.00017121703679121798, "loss": 12.3126, "step": 9927 }, { "epoch": 0.5406192780761403, "grad_norm": 0.5639881985662691, "learning_rate": 0.00017121084603826438, "loss": 12.3626, "step": 9928 }, { "epoch": 0.5406737320727233, "grad_norm": 0.6615971952124131, "learning_rate": 0.00017120465473156334, "loss": 12.501, "step": 9929 }, { "epoch": 0.5407281860693063, "grad_norm": 0.617198644141775, "learning_rate": 0.00017119846287116296, "loss": 12.466, "step": 9930 }, { "epoch": 0.5407826400658894, "grad_norm": 0.6471490533856712, "learning_rate": 0.00017119227045711135, "loss": 12.3989, "step": 9931 }, { "epoch": 0.5408370940624724, "grad_norm": 0.5967197680845255, "learning_rate": 0.00017118607748945673, "loss": 12.3919, "step": 9932 }, { "epoch": 0.5408915480590554, "grad_norm": 0.6255433894022774, "learning_rate": 0.00017117988396824724, "loss": 12.3807, "step": 9933 }, { "epoch": 0.5409460020556384, "grad_norm": 0.6593193505343585, "learning_rate": 0.00017117368989353105, "loss": 12.4509, "step": 9934 }, { "epoch": 0.5410004560522214, "grad_norm": 0.6377169358835266, "learning_rate": 0.00017116749526535627, "loss": 12.3728, "step": 9935 }, { "epoch": 0.5410549100488043, "grad_norm": 0.5641378918184573, "learning_rate": 0.00017116130008377117, "loss": 12.3931, "step": 9936 }, { "epoch": 0.5411093640453875, "grad_norm": 0.5968616645753267, "learning_rate": 0.0001711551043488238, "loss": 12.4199, "step": 9937 }, { "epoch": 0.5411638180419704, "grad_norm": 0.6743990900177875, "learning_rate": 0.00017114890806056243, "loss": 12.4669, "step": 9938 }, { "epoch": 0.5412182720385534, "grad_norm": 0.6191577597038501, "learning_rate": 0.0001711427112190352, "loss": 12.5016, "step": 9939 }, { "epoch": 0.5412727260351364, "grad_norm": 0.5782102735631727, "learning_rate": 0.0001711365138242904, "loss": 12.2758, "step": 9940 }, { "epoch": 0.5413271800317194, "grad_norm": 0.6275492666648163, "learning_rate": 0.00017113031587637608, "loss": 12.3231, "step": 9941 }, { "epoch": 0.5413816340283024, "grad_norm": 0.5737239024137321, "learning_rate": 0.0001711241173753405, "loss": 12.2454, "step": 9942 }, { "epoch": 0.5414360880248855, "grad_norm": 0.5841077817920884, "learning_rate": 0.00017111791832123184, "loss": 12.3617, "step": 9943 }, { "epoch": 0.5414905420214685, "grad_norm": 0.5125432900399025, "learning_rate": 0.00017111171871409835, "loss": 12.2638, "step": 9944 }, { "epoch": 0.5415449960180515, "grad_norm": 0.627035765756675, "learning_rate": 0.00017110551855398817, "loss": 12.427, "step": 9945 }, { "epoch": 0.5415994500146345, "grad_norm": 0.6375482313018308, "learning_rate": 0.0001710993178409496, "loss": 12.319, "step": 9946 }, { "epoch": 0.5416539040112175, "grad_norm": 0.6194514620473593, "learning_rate": 0.00017109311657503078, "loss": 12.3979, "step": 9947 }, { "epoch": 0.5417083580078005, "grad_norm": 0.6710169185222208, "learning_rate": 0.00017108691475627996, "loss": 12.3065, "step": 9948 }, { "epoch": 0.5417628120043836, "grad_norm": 0.6494699821871501, "learning_rate": 0.00017108071238474537, "loss": 12.4168, "step": 9949 }, { "epoch": 0.5418172660009666, "grad_norm": 0.6433398470683385, "learning_rate": 0.00017107450946047528, "loss": 12.3035, "step": 9950 }, { "epoch": 0.5418717199975496, "grad_norm": 0.5649414899964764, "learning_rate": 0.00017106830598351784, "loss": 12.3674, "step": 9951 }, { "epoch": 0.5419261739941326, "grad_norm": 0.5838873388720432, "learning_rate": 0.00017106210195392136, "loss": 12.4877, "step": 9952 }, { "epoch": 0.5419806279907156, "grad_norm": 0.6215618900230296, "learning_rate": 0.00017105589737173403, "loss": 12.3699, "step": 9953 }, { "epoch": 0.5420350819872986, "grad_norm": 0.5427453094602321, "learning_rate": 0.00017104969223700415, "loss": 12.2128, "step": 9954 }, { "epoch": 0.5420895359838817, "grad_norm": 0.5825640076004421, "learning_rate": 0.00017104348654977994, "loss": 12.3704, "step": 9955 }, { "epoch": 0.5421439899804646, "grad_norm": 0.611865041734815, "learning_rate": 0.00017103728031010967, "loss": 12.3571, "step": 9956 }, { "epoch": 0.5421984439770476, "grad_norm": 0.6584718062782783, "learning_rate": 0.0001710310735180416, "loss": 12.4224, "step": 9957 }, { "epoch": 0.5422528979736306, "grad_norm": 0.5394590262995481, "learning_rate": 0.000171024866173624, "loss": 12.3904, "step": 9958 }, { "epoch": 0.5423073519702136, "grad_norm": 0.6251349184276233, "learning_rate": 0.00017101865827690512, "loss": 12.4797, "step": 9959 }, { "epoch": 0.5423618059667967, "grad_norm": 0.6043745031335845, "learning_rate": 0.0001710124498279332, "loss": 12.4913, "step": 9960 }, { "epoch": 0.5424162599633797, "grad_norm": 0.6416028422300966, "learning_rate": 0.00017100624082675662, "loss": 12.3101, "step": 9961 }, { "epoch": 0.5424707139599627, "grad_norm": 0.586956971175586, "learning_rate": 0.00017100003127342358, "loss": 12.4128, "step": 9962 }, { "epoch": 0.5425251679565457, "grad_norm": 0.5895692580881068, "learning_rate": 0.0001709938211679824, "loss": 12.3444, "step": 9963 }, { "epoch": 0.5425796219531287, "grad_norm": 0.6892529704657986, "learning_rate": 0.00017098761051048133, "loss": 12.6901, "step": 9964 }, { "epoch": 0.5426340759497117, "grad_norm": 0.5922876095576688, "learning_rate": 0.00017098139930096874, "loss": 12.1998, "step": 9965 }, { "epoch": 0.5426885299462948, "grad_norm": 0.6353639822693112, "learning_rate": 0.00017097518753949286, "loss": 12.3397, "step": 9966 }, { "epoch": 0.5427429839428778, "grad_norm": 0.6014554204328489, "learning_rate": 0.00017096897522610202, "loss": 12.4212, "step": 9967 }, { "epoch": 0.5427974379394608, "grad_norm": 0.6129355783996963, "learning_rate": 0.00017096276236084452, "loss": 12.3241, "step": 9968 }, { "epoch": 0.5428518919360438, "grad_norm": 0.6168784082239562, "learning_rate": 0.0001709565489437687, "loss": 12.3898, "step": 9969 }, { "epoch": 0.5429063459326268, "grad_norm": 0.5455036782520887, "learning_rate": 0.00017095033497492286, "loss": 12.2893, "step": 9970 }, { "epoch": 0.5429607999292098, "grad_norm": 0.6710022912122282, "learning_rate": 0.00017094412045435528, "loss": 12.3584, "step": 9971 }, { "epoch": 0.5430152539257929, "grad_norm": 0.6179083424353565, "learning_rate": 0.00017093790538211433, "loss": 12.579, "step": 9972 }, { "epoch": 0.5430697079223759, "grad_norm": 0.5654658232435088, "learning_rate": 0.00017093168975824838, "loss": 12.3976, "step": 9973 }, { "epoch": 0.5431241619189588, "grad_norm": 0.5869301776598929, "learning_rate": 0.0001709254735828057, "loss": 12.4018, "step": 9974 }, { "epoch": 0.5431786159155418, "grad_norm": 0.5699624764075849, "learning_rate": 0.0001709192568558346, "loss": 12.3251, "step": 9975 }, { "epoch": 0.5432330699121248, "grad_norm": 0.6164089422163238, "learning_rate": 0.00017091303957738347, "loss": 12.4561, "step": 9976 }, { "epoch": 0.5432875239087078, "grad_norm": 0.6110377746277542, "learning_rate": 0.00017090682174750069, "loss": 12.3636, "step": 9977 }, { "epoch": 0.5433419779052909, "grad_norm": 0.6089179595565541, "learning_rate": 0.00017090060336623456, "loss": 12.4271, "step": 9978 }, { "epoch": 0.5433964319018739, "grad_norm": 0.6118550994671074, "learning_rate": 0.00017089438443363344, "loss": 12.4179, "step": 9979 }, { "epoch": 0.5434508858984569, "grad_norm": 0.7076959617147793, "learning_rate": 0.0001708881649497457, "loss": 12.4697, "step": 9980 }, { "epoch": 0.5435053398950399, "grad_norm": 0.6383711008143893, "learning_rate": 0.00017088194491461967, "loss": 12.1758, "step": 9981 }, { "epoch": 0.5435597938916229, "grad_norm": 0.6280069425682963, "learning_rate": 0.00017087572432830382, "loss": 12.3434, "step": 9982 }, { "epoch": 0.5436142478882059, "grad_norm": 0.6599043049497079, "learning_rate": 0.0001708695031908464, "loss": 12.4343, "step": 9983 }, { "epoch": 0.543668701884789, "grad_norm": 0.5897655562410326, "learning_rate": 0.00017086328150229586, "loss": 12.3205, "step": 9984 }, { "epoch": 0.543723155881372, "grad_norm": 0.7437039719219413, "learning_rate": 0.00017085705926270058, "loss": 12.4402, "step": 9985 }, { "epoch": 0.543777609877955, "grad_norm": 0.6357530106706325, "learning_rate": 0.00017085083647210887, "loss": 12.3601, "step": 9986 }, { "epoch": 0.543832063874538, "grad_norm": 0.6374285771150914, "learning_rate": 0.0001708446131305692, "loss": 12.5205, "step": 9987 }, { "epoch": 0.543886517871121, "grad_norm": 0.5896956056677658, "learning_rate": 0.00017083838923812993, "loss": 12.3106, "step": 9988 }, { "epoch": 0.5439409718677041, "grad_norm": 0.6115372665262078, "learning_rate": 0.00017083216479483947, "loss": 12.3934, "step": 9989 }, { "epoch": 0.5439954258642871, "grad_norm": 0.6755730245071239, "learning_rate": 0.00017082593980074625, "loss": 12.3121, "step": 9990 }, { "epoch": 0.5440498798608701, "grad_norm": 0.6178738428597439, "learning_rate": 0.00017081971425589857, "loss": 12.3926, "step": 9991 }, { "epoch": 0.544104333857453, "grad_norm": 0.6346592570491943, "learning_rate": 0.00017081348816034496, "loss": 12.5108, "step": 9992 }, { "epoch": 0.544158787854036, "grad_norm": 0.8524667556741318, "learning_rate": 0.00017080726151413381, "loss": 12.514, "step": 9993 }, { "epoch": 0.544213241850619, "grad_norm": 0.6071086314951164, "learning_rate": 0.00017080103431731352, "loss": 12.494, "step": 9994 }, { "epoch": 0.5442676958472021, "grad_norm": 0.5870626705245917, "learning_rate": 0.00017079480656993247, "loss": 12.3616, "step": 9995 }, { "epoch": 0.5443221498437851, "grad_norm": 0.6119188183797124, "learning_rate": 0.00017078857827203917, "loss": 12.5749, "step": 9996 }, { "epoch": 0.5443766038403681, "grad_norm": 0.6068760617708026, "learning_rate": 0.00017078234942368198, "loss": 12.4392, "step": 9997 }, { "epoch": 0.5444310578369511, "grad_norm": 0.6126603120868624, "learning_rate": 0.00017077612002490942, "loss": 12.3884, "step": 9998 }, { "epoch": 0.5444855118335341, "grad_norm": 0.5868375210438797, "learning_rate": 0.00017076989007576985, "loss": 12.3587, "step": 9999 }, { "epoch": 0.5445399658301171, "grad_norm": 0.6864597119491456, "learning_rate": 0.00017076365957631174, "loss": 12.4161, "step": 10000 }, { "epoch": 0.5445944198267002, "grad_norm": 0.622011095040674, "learning_rate": 0.00017075742852658355, "loss": 12.3495, "step": 10001 }, { "epoch": 0.5446488738232832, "grad_norm": 0.5766789005686187, "learning_rate": 0.00017075119692663374, "loss": 12.4083, "step": 10002 }, { "epoch": 0.5447033278198662, "grad_norm": 0.5888590620506866, "learning_rate": 0.0001707449647765108, "loss": 12.2743, "step": 10003 }, { "epoch": 0.5447577818164492, "grad_norm": 0.5877421821137575, "learning_rate": 0.00017073873207626309, "loss": 12.3029, "step": 10004 }, { "epoch": 0.5448122358130322, "grad_norm": 0.618434215481345, "learning_rate": 0.00017073249882593912, "loss": 12.2655, "step": 10005 }, { "epoch": 0.5448666898096152, "grad_norm": 0.6291436904487675, "learning_rate": 0.00017072626502558742, "loss": 12.3931, "step": 10006 }, { "epoch": 0.5449211438061983, "grad_norm": 0.5865487556526245, "learning_rate": 0.0001707200306752564, "loss": 12.2635, "step": 10007 }, { "epoch": 0.5449755978027813, "grad_norm": 0.6079297100272073, "learning_rate": 0.00017071379577499458, "loss": 12.4051, "step": 10008 }, { "epoch": 0.5450300517993643, "grad_norm": 0.6016245121971452, "learning_rate": 0.00017070756032485043, "loss": 12.3416, "step": 10009 }, { "epoch": 0.5450845057959472, "grad_norm": 0.5856170250429272, "learning_rate": 0.00017070132432487242, "loss": 12.3265, "step": 10010 }, { "epoch": 0.5451389597925302, "grad_norm": 0.6551742421540866, "learning_rate": 0.00017069508777510904, "loss": 12.4072, "step": 10011 }, { "epoch": 0.5451934137891132, "grad_norm": 0.666724847008867, "learning_rate": 0.00017068885067560884, "loss": 12.453, "step": 10012 }, { "epoch": 0.5452478677856963, "grad_norm": 0.5980244266505121, "learning_rate": 0.00017068261302642025, "loss": 12.4869, "step": 10013 }, { "epoch": 0.5453023217822793, "grad_norm": 0.6442562753056685, "learning_rate": 0.00017067637482759182, "loss": 12.4074, "step": 10014 }, { "epoch": 0.5453567757788623, "grad_norm": 0.5781895247879832, "learning_rate": 0.00017067013607917204, "loss": 12.2852, "step": 10015 }, { "epoch": 0.5454112297754453, "grad_norm": 0.5659964634909223, "learning_rate": 0.00017066389678120942, "loss": 12.3117, "step": 10016 }, { "epoch": 0.5454656837720283, "grad_norm": 0.5984647462305562, "learning_rate": 0.00017065765693375254, "loss": 12.2896, "step": 10017 }, { "epoch": 0.5455201377686113, "grad_norm": 0.5927320661463614, "learning_rate": 0.0001706514165368498, "loss": 12.3072, "step": 10018 }, { "epoch": 0.5455745917651944, "grad_norm": 0.6037064365778668, "learning_rate": 0.00017064517559054983, "loss": 12.3729, "step": 10019 }, { "epoch": 0.5456290457617774, "grad_norm": 0.5755367621481371, "learning_rate": 0.00017063893409490115, "loss": 12.409, "step": 10020 }, { "epoch": 0.5456834997583604, "grad_norm": 0.6320289142883861, "learning_rate": 0.00017063269204995222, "loss": 12.3564, "step": 10021 }, { "epoch": 0.5457379537549434, "grad_norm": 0.6002473882539896, "learning_rate": 0.00017062644945575167, "loss": 12.3511, "step": 10022 }, { "epoch": 0.5457924077515264, "grad_norm": 0.5886482899613877, "learning_rate": 0.000170620206312348, "loss": 12.3213, "step": 10023 }, { "epoch": 0.5458468617481095, "grad_norm": 0.5881266992555014, "learning_rate": 0.00017061396261978978, "loss": 12.3273, "step": 10024 }, { "epoch": 0.5459013157446925, "grad_norm": 0.6677865513934675, "learning_rate": 0.00017060771837812552, "loss": 12.448, "step": 10025 }, { "epoch": 0.5459557697412755, "grad_norm": 0.5722455480945151, "learning_rate": 0.0001706014735874038, "loss": 12.4596, "step": 10026 }, { "epoch": 0.5460102237378585, "grad_norm": 0.657568952206474, "learning_rate": 0.00017059522824767318, "loss": 12.4349, "step": 10027 }, { "epoch": 0.5460646777344415, "grad_norm": 0.6101702886040364, "learning_rate": 0.00017058898235898225, "loss": 12.1885, "step": 10028 }, { "epoch": 0.5461191317310244, "grad_norm": 0.6164690188553941, "learning_rate": 0.00017058273592137954, "loss": 12.3377, "step": 10029 }, { "epoch": 0.5461735857276075, "grad_norm": 0.5743311102393344, "learning_rate": 0.00017057648893491363, "loss": 12.3184, "step": 10030 }, { "epoch": 0.5462280397241905, "grad_norm": 0.6036523761968633, "learning_rate": 0.0001705702413996331, "loss": 12.3641, "step": 10031 }, { "epoch": 0.5462824937207735, "grad_norm": 0.6380111248095689, "learning_rate": 0.00017056399331558656, "loss": 12.3799, "step": 10032 }, { "epoch": 0.5463369477173565, "grad_norm": 0.7449245804750874, "learning_rate": 0.00017055774468282257, "loss": 12.4631, "step": 10033 }, { "epoch": 0.5463914017139395, "grad_norm": 0.602894598301536, "learning_rate": 0.00017055149550138974, "loss": 12.4042, "step": 10034 }, { "epoch": 0.5464458557105225, "grad_norm": 0.5657734285602912, "learning_rate": 0.0001705452457713366, "loss": 12.451, "step": 10035 }, { "epoch": 0.5465003097071056, "grad_norm": 0.576113903088083, "learning_rate": 0.0001705389954927118, "loss": 12.3255, "step": 10036 }, { "epoch": 0.5465547637036886, "grad_norm": 0.6616302304908845, "learning_rate": 0.000170532744665564, "loss": 12.4435, "step": 10037 }, { "epoch": 0.5466092177002716, "grad_norm": 0.6821592199096708, "learning_rate": 0.00017052649328994174, "loss": 12.5157, "step": 10038 }, { "epoch": 0.5466636716968546, "grad_norm": 0.6096340643614813, "learning_rate": 0.0001705202413658936, "loss": 12.4992, "step": 10039 }, { "epoch": 0.5467181256934376, "grad_norm": 0.6176713315553298, "learning_rate": 0.0001705139888934683, "loss": 12.2644, "step": 10040 }, { "epoch": 0.5467725796900206, "grad_norm": 0.5861837007873087, "learning_rate": 0.00017050773587271433, "loss": 12.2729, "step": 10041 }, { "epoch": 0.5468270336866037, "grad_norm": 0.5764941831254127, "learning_rate": 0.0001705014823036804, "loss": 12.3468, "step": 10042 }, { "epoch": 0.5468814876831867, "grad_norm": 0.6265747671891215, "learning_rate": 0.00017049522818641513, "loss": 12.3629, "step": 10043 }, { "epoch": 0.5469359416797697, "grad_norm": 0.8012125745132999, "learning_rate": 0.00017048897352096713, "loss": 12.3787, "step": 10044 }, { "epoch": 0.5469903956763527, "grad_norm": 0.6351309622415717, "learning_rate": 0.00017048271830738507, "loss": 12.4865, "step": 10045 }, { "epoch": 0.5470448496729357, "grad_norm": 0.5905862443496168, "learning_rate": 0.00017047646254571755, "loss": 12.322, "step": 10046 }, { "epoch": 0.5470993036695186, "grad_norm": 0.6815468712793522, "learning_rate": 0.00017047020623601328, "loss": 12.4611, "step": 10047 }, { "epoch": 0.5471537576661017, "grad_norm": 0.6174565560323156, "learning_rate": 0.00017046394937832084, "loss": 12.329, "step": 10048 }, { "epoch": 0.5472082116626847, "grad_norm": 0.5276648368815748, "learning_rate": 0.00017045769197268892, "loss": 12.2771, "step": 10049 }, { "epoch": 0.5472626656592677, "grad_norm": 0.6482574075836264, "learning_rate": 0.00017045143401916613, "loss": 12.3731, "step": 10050 }, { "epoch": 0.5473171196558507, "grad_norm": 0.7609587337450041, "learning_rate": 0.00017044517551780125, "loss": 12.3264, "step": 10051 }, { "epoch": 0.5473715736524337, "grad_norm": 0.5531086997696827, "learning_rate": 0.0001704389164686428, "loss": 12.3699, "step": 10052 }, { "epoch": 0.5474260276490167, "grad_norm": 0.6909288910934123, "learning_rate": 0.00017043265687173955, "loss": 12.4163, "step": 10053 }, { "epoch": 0.5474804816455998, "grad_norm": 0.6600937201127297, "learning_rate": 0.00017042639672714015, "loss": 12.3825, "step": 10054 }, { "epoch": 0.5475349356421828, "grad_norm": 0.69648958811755, "learning_rate": 0.0001704201360348933, "loss": 12.2439, "step": 10055 }, { "epoch": 0.5475893896387658, "grad_norm": 0.5877556274036734, "learning_rate": 0.00017041387479504764, "loss": 12.2162, "step": 10056 }, { "epoch": 0.5476438436353488, "grad_norm": 0.5761911274527789, "learning_rate": 0.00017040761300765188, "loss": 12.3922, "step": 10057 }, { "epoch": 0.5476982976319318, "grad_norm": 0.7246599447399307, "learning_rate": 0.00017040135067275473, "loss": 12.3671, "step": 10058 }, { "epoch": 0.5477527516285149, "grad_norm": 0.5770976085961329, "learning_rate": 0.00017039508779040485, "loss": 12.3825, "step": 10059 }, { "epoch": 0.5478072056250979, "grad_norm": 0.6303142158584125, "learning_rate": 0.00017038882436065097, "loss": 12.4369, "step": 10060 }, { "epoch": 0.5478616596216809, "grad_norm": 0.6858517319715612, "learning_rate": 0.0001703825603835418, "loss": 12.349, "step": 10061 }, { "epoch": 0.5479161136182639, "grad_norm": 0.6793237037550006, "learning_rate": 0.000170376295859126, "loss": 12.3091, "step": 10062 }, { "epoch": 0.5479705676148469, "grad_norm": 0.6272479682486032, "learning_rate": 0.00017037003078745238, "loss": 12.3884, "step": 10063 }, { "epoch": 0.5480250216114299, "grad_norm": 0.6504731696059936, "learning_rate": 0.00017036376516856955, "loss": 12.387, "step": 10064 }, { "epoch": 0.548079475608013, "grad_norm": 0.6217625065902543, "learning_rate": 0.00017035749900252628, "loss": 12.4143, "step": 10065 }, { "epoch": 0.548133929604596, "grad_norm": 0.5958225152012777, "learning_rate": 0.00017035123228937134, "loss": 12.2557, "step": 10066 }, { "epoch": 0.5481883836011789, "grad_norm": 0.6090956331991192, "learning_rate": 0.0001703449650291534, "loss": 12.3631, "step": 10067 }, { "epoch": 0.5482428375977619, "grad_norm": 0.6552031333626795, "learning_rate": 0.00017033869722192122, "loss": 12.4323, "step": 10068 }, { "epoch": 0.5482972915943449, "grad_norm": 0.6393619229917588, "learning_rate": 0.00017033242886772354, "loss": 12.3953, "step": 10069 }, { "epoch": 0.5483517455909279, "grad_norm": 0.5917431234863446, "learning_rate": 0.00017032615996660905, "loss": 12.4018, "step": 10070 }, { "epoch": 0.548406199587511, "grad_norm": 0.6115700758668392, "learning_rate": 0.0001703198905186266, "loss": 12.4389, "step": 10071 }, { "epoch": 0.548460653584094, "grad_norm": 0.5654829808206024, "learning_rate": 0.0001703136205238249, "loss": 12.3738, "step": 10072 }, { "epoch": 0.548515107580677, "grad_norm": 0.628121296976426, "learning_rate": 0.00017030734998225265, "loss": 12.5239, "step": 10073 }, { "epoch": 0.54856956157726, "grad_norm": 0.721825207992574, "learning_rate": 0.00017030107889395865, "loss": 12.4119, "step": 10074 }, { "epoch": 0.548624015573843, "grad_norm": 0.5710087081755535, "learning_rate": 0.0001702948072589917, "loss": 12.3945, "step": 10075 }, { "epoch": 0.548678469570426, "grad_norm": 0.5846170878386958, "learning_rate": 0.0001702885350774005, "loss": 12.3839, "step": 10076 }, { "epoch": 0.5487329235670091, "grad_norm": 0.6058575657945217, "learning_rate": 0.00017028226234923395, "loss": 12.3056, "step": 10077 }, { "epoch": 0.5487873775635921, "grad_norm": 0.5312676469096749, "learning_rate": 0.00017027598907454067, "loss": 12.4184, "step": 10078 }, { "epoch": 0.5488418315601751, "grad_norm": 0.6177746184293285, "learning_rate": 0.00017026971525336952, "loss": 12.4248, "step": 10079 }, { "epoch": 0.5488962855567581, "grad_norm": 0.7212320189974394, "learning_rate": 0.0001702634408857693, "loss": 12.3873, "step": 10080 }, { "epoch": 0.5489507395533411, "grad_norm": 0.6057541329016711, "learning_rate": 0.00017025716597178877, "loss": 12.4775, "step": 10081 }, { "epoch": 0.549005193549924, "grad_norm": 0.5892684255084142, "learning_rate": 0.00017025089051147675, "loss": 12.3358, "step": 10082 }, { "epoch": 0.5490596475465072, "grad_norm": 0.5561580135163213, "learning_rate": 0.00017024461450488202, "loss": 12.3502, "step": 10083 }, { "epoch": 0.5491141015430901, "grad_norm": 0.6613960500018959, "learning_rate": 0.00017023833795205338, "loss": 12.3686, "step": 10084 }, { "epoch": 0.5491685555396731, "grad_norm": 0.7155502582204928, "learning_rate": 0.00017023206085303965, "loss": 12.461, "step": 10085 }, { "epoch": 0.5492230095362561, "grad_norm": 0.6151275535683852, "learning_rate": 0.00017022578320788963, "loss": 12.3676, "step": 10086 }, { "epoch": 0.5492774635328391, "grad_norm": 0.6465807638215435, "learning_rate": 0.00017021950501665213, "loss": 12.3349, "step": 10087 }, { "epoch": 0.5493319175294221, "grad_norm": 0.6165783500464499, "learning_rate": 0.00017021322627937602, "loss": 12.4243, "step": 10088 }, { "epoch": 0.5493863715260052, "grad_norm": 0.6234214096780826, "learning_rate": 0.00017020694699611006, "loss": 12.4481, "step": 10089 }, { "epoch": 0.5494408255225882, "grad_norm": 0.6739255625363676, "learning_rate": 0.0001702006671669031, "loss": 12.4595, "step": 10090 }, { "epoch": 0.5494952795191712, "grad_norm": 0.6137959527599097, "learning_rate": 0.000170194386791804, "loss": 12.4325, "step": 10091 }, { "epoch": 0.5495497335157542, "grad_norm": 0.6374066951523512, "learning_rate": 0.00017018810587086155, "loss": 12.3797, "step": 10092 }, { "epoch": 0.5496041875123372, "grad_norm": 0.6517927348980738, "learning_rate": 0.00017018182440412468, "loss": 12.3723, "step": 10093 }, { "epoch": 0.5496586415089203, "grad_norm": 0.6960033350251431, "learning_rate": 0.0001701755423916421, "loss": 12.429, "step": 10094 }, { "epoch": 0.5497130955055033, "grad_norm": 0.6500400376490462, "learning_rate": 0.00017016925983346276, "loss": 12.4318, "step": 10095 }, { "epoch": 0.5497675495020863, "grad_norm": 0.6493288079349868, "learning_rate": 0.0001701629767296355, "loss": 12.3368, "step": 10096 }, { "epoch": 0.5498220034986693, "grad_norm": 0.6466650608087391, "learning_rate": 0.00017015669308020917, "loss": 12.3259, "step": 10097 }, { "epoch": 0.5498764574952523, "grad_norm": 0.620918020415698, "learning_rate": 0.00017015040888523263, "loss": 12.3392, "step": 10098 }, { "epoch": 0.5499309114918353, "grad_norm": 0.603558345767907, "learning_rate": 0.00017014412414475473, "loss": 12.3562, "step": 10099 }, { "epoch": 0.5499853654884184, "grad_norm": 0.688925327145977, "learning_rate": 0.00017013783885882434, "loss": 12.4577, "step": 10100 }, { "epoch": 0.5500398194850014, "grad_norm": 0.6370679160685081, "learning_rate": 0.00017013155302749038, "loss": 12.3625, "step": 10101 }, { "epoch": 0.5500942734815844, "grad_norm": 0.6302329868740851, "learning_rate": 0.0001701252666508017, "loss": 12.3363, "step": 10102 }, { "epoch": 0.5501487274781673, "grad_norm": 0.6859771904180949, "learning_rate": 0.0001701189797288072, "loss": 12.521, "step": 10103 }, { "epoch": 0.5502031814747503, "grad_norm": 0.5725597900784214, "learning_rate": 0.00017011269226155574, "loss": 12.2956, "step": 10104 }, { "epoch": 0.5502576354713333, "grad_norm": 0.6190597219118882, "learning_rate": 0.00017010640424909622, "loss": 12.4949, "step": 10105 }, { "epoch": 0.5503120894679164, "grad_norm": 0.6656002874448648, "learning_rate": 0.00017010011569147754, "loss": 12.3395, "step": 10106 }, { "epoch": 0.5503665434644994, "grad_norm": 0.6106957375460383, "learning_rate": 0.0001700938265887486, "loss": 12.2956, "step": 10107 }, { "epoch": 0.5504209974610824, "grad_norm": 0.6678328208838216, "learning_rate": 0.00017008753694095836, "loss": 12.2999, "step": 10108 }, { "epoch": 0.5504754514576654, "grad_norm": 0.6389549113603158, "learning_rate": 0.0001700812467481556, "loss": 12.3789, "step": 10109 }, { "epoch": 0.5505299054542484, "grad_norm": 0.6767434786700843, "learning_rate": 0.00017007495601038938, "loss": 12.4813, "step": 10110 }, { "epoch": 0.5505843594508314, "grad_norm": 0.6668464446484444, "learning_rate": 0.00017006866472770856, "loss": 12.3595, "step": 10111 }, { "epoch": 0.5506388134474145, "grad_norm": 0.5833039778043949, "learning_rate": 0.00017006237290016201, "loss": 12.3746, "step": 10112 }, { "epoch": 0.5506932674439975, "grad_norm": 0.6248730751987213, "learning_rate": 0.00017005608052779868, "loss": 12.3588, "step": 10113 }, { "epoch": 0.5507477214405805, "grad_norm": 0.6230806633482726, "learning_rate": 0.00017004978761066757, "loss": 12.4175, "step": 10114 }, { "epoch": 0.5508021754371635, "grad_norm": 0.6442464098858275, "learning_rate": 0.00017004349414881753, "loss": 12.3379, "step": 10115 }, { "epoch": 0.5508566294337465, "grad_norm": 0.6266003331634916, "learning_rate": 0.00017003720014229754, "loss": 12.2762, "step": 10116 }, { "epoch": 0.5509110834303295, "grad_norm": 0.552814280799876, "learning_rate": 0.00017003090559115656, "loss": 12.4019, "step": 10117 }, { "epoch": 0.5509655374269126, "grad_norm": 0.5373811486501685, "learning_rate": 0.0001700246104954435, "loss": 12.3237, "step": 10118 }, { "epoch": 0.5510199914234956, "grad_norm": 0.5684336237386249, "learning_rate": 0.0001700183148552073, "loss": 12.2904, "step": 10119 }, { "epoch": 0.5510744454200786, "grad_norm": 0.5888264690377857, "learning_rate": 0.00017001201867049696, "loss": 12.1247, "step": 10120 }, { "epoch": 0.5511288994166615, "grad_norm": 0.5779840814679416, "learning_rate": 0.0001700057219413614, "loss": 12.3402, "step": 10121 }, { "epoch": 0.5511833534132445, "grad_norm": 0.588683708129219, "learning_rate": 0.00016999942466784966, "loss": 12.4069, "step": 10122 }, { "epoch": 0.5512378074098276, "grad_norm": 0.678480509040718, "learning_rate": 0.00016999312685001062, "loss": 12.2873, "step": 10123 }, { "epoch": 0.5512922614064106, "grad_norm": 0.5976064724968893, "learning_rate": 0.0001699868284878933, "loss": 12.2787, "step": 10124 }, { "epoch": 0.5513467154029936, "grad_norm": 0.5916676144071182, "learning_rate": 0.00016998052958154666, "loss": 12.3731, "step": 10125 }, { "epoch": 0.5514011693995766, "grad_norm": 0.6197923034996309, "learning_rate": 0.00016997423013101966, "loss": 12.39, "step": 10126 }, { "epoch": 0.5514556233961596, "grad_norm": 0.6549741234780643, "learning_rate": 0.00016996793013636136, "loss": 12.4356, "step": 10127 }, { "epoch": 0.5515100773927426, "grad_norm": 0.6488966161442786, "learning_rate": 0.00016996162959762067, "loss": 12.4121, "step": 10128 }, { "epoch": 0.5515645313893257, "grad_norm": 0.5858223558061898, "learning_rate": 0.00016995532851484663, "loss": 12.4635, "step": 10129 }, { "epoch": 0.5516189853859087, "grad_norm": 0.6451824416075728, "learning_rate": 0.00016994902688808821, "loss": 12.5343, "step": 10130 }, { "epoch": 0.5516734393824917, "grad_norm": 0.6601972702580284, "learning_rate": 0.00016994272471739443, "loss": 12.3874, "step": 10131 }, { "epoch": 0.5517278933790747, "grad_norm": 0.6182476910168957, "learning_rate": 0.00016993642200281432, "loss": 12.1093, "step": 10132 }, { "epoch": 0.5517823473756577, "grad_norm": 0.6032874164440537, "learning_rate": 0.00016993011874439682, "loss": 12.5125, "step": 10133 }, { "epoch": 0.5518368013722407, "grad_norm": 0.6843307644197107, "learning_rate": 0.00016992381494219103, "loss": 12.4988, "step": 10134 }, { "epoch": 0.5518912553688238, "grad_norm": 0.6136490984502141, "learning_rate": 0.0001699175105962459, "loss": 12.4561, "step": 10135 }, { "epoch": 0.5519457093654068, "grad_norm": 0.5347265528309477, "learning_rate": 0.00016991120570661048, "loss": 12.365, "step": 10136 }, { "epoch": 0.5520001633619898, "grad_norm": 0.8540603107055513, "learning_rate": 0.00016990490027333385, "loss": 12.4968, "step": 10137 }, { "epoch": 0.5520546173585728, "grad_norm": 0.576818131022261, "learning_rate": 0.00016989859429646496, "loss": 12.3099, "step": 10138 }, { "epoch": 0.5521090713551557, "grad_norm": 0.6561478050780211, "learning_rate": 0.00016989228777605284, "loss": 12.3842, "step": 10139 }, { "epoch": 0.5521635253517387, "grad_norm": 0.655042351179101, "learning_rate": 0.0001698859807121466, "loss": 12.3027, "step": 10140 }, { "epoch": 0.5522179793483218, "grad_norm": 0.6798724729283924, "learning_rate": 0.00016987967310479527, "loss": 12.3973, "step": 10141 }, { "epoch": 0.5522724333449048, "grad_norm": 0.6240649516322961, "learning_rate": 0.00016987336495404788, "loss": 12.2627, "step": 10142 }, { "epoch": 0.5523268873414878, "grad_norm": 0.6839594413179446, "learning_rate": 0.00016986705625995346, "loss": 12.253, "step": 10143 }, { "epoch": 0.5523813413380708, "grad_norm": 0.7110332810078518, "learning_rate": 0.00016986074702256108, "loss": 12.3136, "step": 10144 }, { "epoch": 0.5524357953346538, "grad_norm": 0.6476071616334153, "learning_rate": 0.00016985443724191988, "loss": 12.2806, "step": 10145 }, { "epoch": 0.5524902493312368, "grad_norm": 0.6209525263190081, "learning_rate": 0.0001698481269180788, "loss": 12.3907, "step": 10146 }, { "epoch": 0.5525447033278199, "grad_norm": 0.7060291692093761, "learning_rate": 0.00016984181605108703, "loss": 12.3933, "step": 10147 }, { "epoch": 0.5525991573244029, "grad_norm": 0.6365440049787343, "learning_rate": 0.00016983550464099353, "loss": 12.3701, "step": 10148 }, { "epoch": 0.5526536113209859, "grad_norm": 0.6457145627912881, "learning_rate": 0.00016982919268784748, "loss": 12.3692, "step": 10149 }, { "epoch": 0.5527080653175689, "grad_norm": 0.5874194707290666, "learning_rate": 0.0001698228801916979, "loss": 12.339, "step": 10150 }, { "epoch": 0.5527625193141519, "grad_norm": 0.6827235132202331, "learning_rate": 0.0001698165671525939, "loss": 12.4536, "step": 10151 }, { "epoch": 0.5528169733107349, "grad_norm": 0.603995056239589, "learning_rate": 0.00016981025357058456, "loss": 12.2051, "step": 10152 }, { "epoch": 0.552871427307318, "grad_norm": 0.5555255216750008, "learning_rate": 0.00016980393944571897, "loss": 12.4311, "step": 10153 }, { "epoch": 0.552925881303901, "grad_norm": 0.6700662946116988, "learning_rate": 0.00016979762477804623, "loss": 12.2479, "step": 10154 }, { "epoch": 0.552980335300484, "grad_norm": 0.6441931767950304, "learning_rate": 0.0001697913095676155, "loss": 12.4213, "step": 10155 }, { "epoch": 0.553034789297067, "grad_norm": 0.6226291914939351, "learning_rate": 0.00016978499381447578, "loss": 12.5313, "step": 10156 }, { "epoch": 0.55308924329365, "grad_norm": 0.6237855423231016, "learning_rate": 0.0001697786775186763, "loss": 12.3455, "step": 10157 }, { "epoch": 0.553143697290233, "grad_norm": 0.650542570178941, "learning_rate": 0.0001697723606802661, "loss": 12.3525, "step": 10158 }, { "epoch": 0.553198151286816, "grad_norm": 0.650553829713924, "learning_rate": 0.00016976604329929434, "loss": 12.3732, "step": 10159 }, { "epoch": 0.553252605283399, "grad_norm": 0.6459488683075125, "learning_rate": 0.00016975972537581008, "loss": 12.3562, "step": 10160 }, { "epoch": 0.553307059279982, "grad_norm": 0.6128540893647996, "learning_rate": 0.00016975340690986252, "loss": 12.4002, "step": 10161 }, { "epoch": 0.553361513276565, "grad_norm": 0.5602485489371666, "learning_rate": 0.0001697470879015008, "loss": 12.2805, "step": 10162 }, { "epoch": 0.553415967273148, "grad_norm": 0.625146438344186, "learning_rate": 0.000169740768350774, "loss": 12.349, "step": 10163 }, { "epoch": 0.5534704212697311, "grad_norm": 0.6917484957140339, "learning_rate": 0.0001697344482577313, "loss": 12.3946, "step": 10164 }, { "epoch": 0.5535248752663141, "grad_norm": 0.5665765845757563, "learning_rate": 0.00016972812762242184, "loss": 12.3455, "step": 10165 }, { "epoch": 0.5535793292628971, "grad_norm": 0.7588018958176995, "learning_rate": 0.00016972180644489476, "loss": 12.3631, "step": 10166 }, { "epoch": 0.5536337832594801, "grad_norm": 0.6318823657954354, "learning_rate": 0.0001697154847251992, "loss": 12.3629, "step": 10167 }, { "epoch": 0.5536882372560631, "grad_norm": 0.6292295929223287, "learning_rate": 0.00016970916246338436, "loss": 12.3089, "step": 10168 }, { "epoch": 0.5537426912526461, "grad_norm": 0.6476934125782788, "learning_rate": 0.00016970283965949938, "loss": 12.3641, "step": 10169 }, { "epoch": 0.5537971452492292, "grad_norm": 0.5997142351922357, "learning_rate": 0.00016969651631359344, "loss": 12.3802, "step": 10170 }, { "epoch": 0.5538515992458122, "grad_norm": 0.6729543585756947, "learning_rate": 0.0001696901924257157, "loss": 12.3821, "step": 10171 }, { "epoch": 0.5539060532423952, "grad_norm": 0.6407345390836635, "learning_rate": 0.0001696838679959153, "loss": 12.3207, "step": 10172 }, { "epoch": 0.5539605072389782, "grad_norm": 0.5957211650140359, "learning_rate": 0.00016967754302424153, "loss": 12.3158, "step": 10173 }, { "epoch": 0.5540149612355612, "grad_norm": 0.6822451915992077, "learning_rate": 0.00016967121751074345, "loss": 12.4724, "step": 10174 }, { "epoch": 0.5540694152321441, "grad_norm": 0.6309367173751734, "learning_rate": 0.0001696648914554703, "loss": 12.2831, "step": 10175 }, { "epoch": 0.5541238692287273, "grad_norm": 0.6642919979889967, "learning_rate": 0.00016965856485847127, "loss": 12.4086, "step": 10176 }, { "epoch": 0.5541783232253102, "grad_norm": 0.6122523256520281, "learning_rate": 0.00016965223771979554, "loss": 12.3491, "step": 10177 }, { "epoch": 0.5542327772218932, "grad_norm": 0.5613470115381749, "learning_rate": 0.0001696459100394924, "loss": 12.2773, "step": 10178 }, { "epoch": 0.5542872312184762, "grad_norm": 0.667435452999538, "learning_rate": 0.0001696395818176109, "loss": 12.4032, "step": 10179 }, { "epoch": 0.5543416852150592, "grad_norm": 0.5723821218990318, "learning_rate": 0.00016963325305420038, "loss": 12.1993, "step": 10180 }, { "epoch": 0.5543961392116422, "grad_norm": 0.6263000496737242, "learning_rate": 0.00016962692374931, "loss": 12.4251, "step": 10181 }, { "epoch": 0.5544505932082253, "grad_norm": 0.5787263181251557, "learning_rate": 0.00016962059390298898, "loss": 12.2304, "step": 10182 }, { "epoch": 0.5545050472048083, "grad_norm": 0.6270003142827943, "learning_rate": 0.00016961426351528656, "loss": 12.3031, "step": 10183 }, { "epoch": 0.5545595012013913, "grad_norm": 0.654321820603327, "learning_rate": 0.00016960793258625193, "loss": 12.3821, "step": 10184 }, { "epoch": 0.5546139551979743, "grad_norm": 0.5579974955006948, "learning_rate": 0.00016960160111593434, "loss": 12.2972, "step": 10185 }, { "epoch": 0.5546684091945573, "grad_norm": 0.6189528881881449, "learning_rate": 0.00016959526910438304, "loss": 12.3706, "step": 10186 }, { "epoch": 0.5547228631911403, "grad_norm": 0.6198609688250306, "learning_rate": 0.00016958893655164725, "loss": 12.2323, "step": 10187 }, { "epoch": 0.5547773171877234, "grad_norm": 0.6120967966749081, "learning_rate": 0.00016958260345777623, "loss": 12.3337, "step": 10188 }, { "epoch": 0.5548317711843064, "grad_norm": 0.6552890680808466, "learning_rate": 0.00016957626982281926, "loss": 12.3121, "step": 10189 }, { "epoch": 0.5548862251808894, "grad_norm": 0.6545837701604202, "learning_rate": 0.00016956993564682548, "loss": 12.4834, "step": 10190 }, { "epoch": 0.5549406791774724, "grad_norm": 0.5906345025180717, "learning_rate": 0.0001695636009298442, "loss": 12.3531, "step": 10191 }, { "epoch": 0.5549951331740554, "grad_norm": 0.6952886347113761, "learning_rate": 0.00016955726567192473, "loss": 12.3136, "step": 10192 }, { "epoch": 0.5550495871706385, "grad_norm": 0.7026250302498258, "learning_rate": 0.0001695509298731163, "loss": 12.4446, "step": 10193 }, { "epoch": 0.5551040411672215, "grad_norm": 0.5808109078704449, "learning_rate": 0.00016954459353346818, "loss": 12.408, "step": 10194 }, { "epoch": 0.5551584951638044, "grad_norm": 0.5422649522193064, "learning_rate": 0.00016953825665302964, "loss": 12.4269, "step": 10195 }, { "epoch": 0.5552129491603874, "grad_norm": 0.5874935703337653, "learning_rate": 0.00016953191923184995, "loss": 12.3086, "step": 10196 }, { "epoch": 0.5552674031569704, "grad_norm": 0.7222743809111872, "learning_rate": 0.00016952558126997837, "loss": 12.4497, "step": 10197 }, { "epoch": 0.5553218571535534, "grad_norm": 0.61007669020962, "learning_rate": 0.00016951924276746425, "loss": 12.4958, "step": 10198 }, { "epoch": 0.5553763111501365, "grad_norm": 0.6786102403273151, "learning_rate": 0.0001695129037243568, "loss": 12.3535, "step": 10199 }, { "epoch": 0.5554307651467195, "grad_norm": 0.6785427181748215, "learning_rate": 0.00016950656414070538, "loss": 12.3854, "step": 10200 }, { "epoch": 0.5554852191433025, "grad_norm": 0.6132769542909614, "learning_rate": 0.00016950022401655926, "loss": 12.3423, "step": 10201 }, { "epoch": 0.5555396731398855, "grad_norm": 0.6373404879138806, "learning_rate": 0.00016949388335196774, "loss": 12.3078, "step": 10202 }, { "epoch": 0.5555941271364685, "grad_norm": 0.6435052463794952, "learning_rate": 0.0001694875421469801, "loss": 12.4221, "step": 10203 }, { "epoch": 0.5556485811330515, "grad_norm": 0.5621073972746748, "learning_rate": 0.00016948120040164572, "loss": 12.2795, "step": 10204 }, { "epoch": 0.5557030351296346, "grad_norm": 0.7960388659184452, "learning_rate": 0.00016947485811601384, "loss": 12.5329, "step": 10205 }, { "epoch": 0.5557574891262176, "grad_norm": 0.7107737798884084, "learning_rate": 0.00016946851529013384, "loss": 12.3655, "step": 10206 }, { "epoch": 0.5558119431228006, "grad_norm": 0.8029325573958841, "learning_rate": 0.00016946217192405501, "loss": 12.4019, "step": 10207 }, { "epoch": 0.5558663971193836, "grad_norm": 0.7149271685846226, "learning_rate": 0.0001694558280178267, "loss": 12.5062, "step": 10208 }, { "epoch": 0.5559208511159666, "grad_norm": 0.615736615125051, "learning_rate": 0.0001694494835714982, "loss": 12.2179, "step": 10209 }, { "epoch": 0.5559753051125496, "grad_norm": 0.7058656536912001, "learning_rate": 0.00016944313858511886, "loss": 12.3396, "step": 10210 }, { "epoch": 0.5560297591091327, "grad_norm": 0.7412585447567388, "learning_rate": 0.00016943679305873803, "loss": 12.343, "step": 10211 }, { "epoch": 0.5560842131057157, "grad_norm": 0.6208830350613329, "learning_rate": 0.00016943044699240507, "loss": 12.3917, "step": 10212 }, { "epoch": 0.5561386671022986, "grad_norm": 0.7483707455695057, "learning_rate": 0.00016942410038616932, "loss": 12.3829, "step": 10213 }, { "epoch": 0.5561931210988816, "grad_norm": 0.6240932002378885, "learning_rate": 0.00016941775324008009, "loss": 12.3216, "step": 10214 }, { "epoch": 0.5562475750954646, "grad_norm": 0.654778145902865, "learning_rate": 0.00016941140555418679, "loss": 12.314, "step": 10215 }, { "epoch": 0.5563020290920476, "grad_norm": 0.7072083853390658, "learning_rate": 0.00016940505732853875, "loss": 12.3792, "step": 10216 }, { "epoch": 0.5563564830886307, "grad_norm": 0.6776859696620402, "learning_rate": 0.00016939870856318533, "loss": 12.5402, "step": 10217 }, { "epoch": 0.5564109370852137, "grad_norm": 0.7617841105325746, "learning_rate": 0.00016939235925817595, "loss": 12.5152, "step": 10218 }, { "epoch": 0.5564653910817967, "grad_norm": 0.560249323418678, "learning_rate": 0.00016938600941355993, "loss": 12.3686, "step": 10219 }, { "epoch": 0.5565198450783797, "grad_norm": 0.6307457001537511, "learning_rate": 0.00016937965902938666, "loss": 12.3672, "step": 10220 }, { "epoch": 0.5565742990749627, "grad_norm": 0.6787027309352848, "learning_rate": 0.00016937330810570553, "loss": 12.4397, "step": 10221 }, { "epoch": 0.5566287530715457, "grad_norm": 0.5921244360742721, "learning_rate": 0.00016936695664256592, "loss": 12.3569, "step": 10222 }, { "epoch": 0.5566832070681288, "grad_norm": 0.6766229603763056, "learning_rate": 0.00016936060464001724, "loss": 12.2169, "step": 10223 }, { "epoch": 0.5567376610647118, "grad_norm": 0.592326425822779, "learning_rate": 0.00016935425209810883, "loss": 12.3279, "step": 10224 }, { "epoch": 0.5567921150612948, "grad_norm": 0.6994820038562872, "learning_rate": 0.00016934789901689018, "loss": 12.3537, "step": 10225 }, { "epoch": 0.5568465690578778, "grad_norm": 0.6581250385024046, "learning_rate": 0.00016934154539641061, "loss": 12.1664, "step": 10226 }, { "epoch": 0.5569010230544608, "grad_norm": 0.6607535805023356, "learning_rate": 0.00016933519123671955, "loss": 12.3211, "step": 10227 }, { "epoch": 0.5569554770510439, "grad_norm": 0.6100695309818525, "learning_rate": 0.00016932883653786643, "loss": 12.4688, "step": 10228 }, { "epoch": 0.5570099310476269, "grad_norm": 0.6388569267528083, "learning_rate": 0.0001693224812999006, "loss": 12.3869, "step": 10229 }, { "epoch": 0.5570643850442099, "grad_norm": 0.5718089310683809, "learning_rate": 0.0001693161255228716, "loss": 12.3244, "step": 10230 }, { "epoch": 0.5571188390407928, "grad_norm": 0.644449510908533, "learning_rate": 0.00016930976920682874, "loss": 12.4063, "step": 10231 }, { "epoch": 0.5571732930373758, "grad_norm": 0.5847832011670807, "learning_rate": 0.0001693034123518215, "loss": 12.2008, "step": 10232 }, { "epoch": 0.5572277470339588, "grad_norm": 0.6134128450599079, "learning_rate": 0.0001692970549578993, "loss": 12.2552, "step": 10233 }, { "epoch": 0.5572822010305419, "grad_norm": 0.627849807475507, "learning_rate": 0.00016929069702511157, "loss": 12.488, "step": 10234 }, { "epoch": 0.5573366550271249, "grad_norm": 0.6765909156462427, "learning_rate": 0.0001692843385535078, "loss": 12.4191, "step": 10235 }, { "epoch": 0.5573911090237079, "grad_norm": 0.6289812044691674, "learning_rate": 0.00016927797954313734, "loss": 12.3557, "step": 10236 }, { "epoch": 0.5574455630202909, "grad_norm": 0.6431241847241467, "learning_rate": 0.00016927161999404975, "loss": 12.267, "step": 10237 }, { "epoch": 0.5575000170168739, "grad_norm": 0.6980001507327088, "learning_rate": 0.00016926525990629442, "loss": 12.423, "step": 10238 }, { "epoch": 0.5575544710134569, "grad_norm": 0.6329606744797434, "learning_rate": 0.00016925889927992075, "loss": 12.228, "step": 10239 }, { "epoch": 0.55760892501004, "grad_norm": 0.7344798136238765, "learning_rate": 0.00016925253811497833, "loss": 12.4051, "step": 10240 }, { "epoch": 0.557663379006623, "grad_norm": 0.6263600033987291, "learning_rate": 0.0001692461764115165, "loss": 12.2633, "step": 10241 }, { "epoch": 0.557717833003206, "grad_norm": 0.5513225969056479, "learning_rate": 0.00016923981416958484, "loss": 12.2928, "step": 10242 }, { "epoch": 0.557772286999789, "grad_norm": 0.5850290222487247, "learning_rate": 0.00016923345138923277, "loss": 12.2904, "step": 10243 }, { "epoch": 0.557826740996372, "grad_norm": 0.6890330863275668, "learning_rate": 0.00016922708807050975, "loss": 12.3438, "step": 10244 }, { "epoch": 0.557881194992955, "grad_norm": 0.6781093042843492, "learning_rate": 0.0001692207242134653, "loss": 12.5031, "step": 10245 }, { "epoch": 0.5579356489895381, "grad_norm": 0.668948770477723, "learning_rate": 0.00016921435981814888, "loss": 12.405, "step": 10246 }, { "epoch": 0.5579901029861211, "grad_norm": 0.677180028277028, "learning_rate": 0.00016920799488461002, "loss": 12.4159, "step": 10247 }, { "epoch": 0.558044556982704, "grad_norm": 0.6147996023247381, "learning_rate": 0.00016920162941289814, "loss": 12.4116, "step": 10248 }, { "epoch": 0.558099010979287, "grad_norm": 0.6424965673205608, "learning_rate": 0.0001691952634030628, "loss": 12.4225, "step": 10249 }, { "epoch": 0.55815346497587, "grad_norm": 0.5620499261722912, "learning_rate": 0.0001691888968551535, "loss": 12.2864, "step": 10250 }, { "epoch": 0.558207918972453, "grad_norm": 0.6026066590718929, "learning_rate": 0.00016918252976921974, "loss": 12.2664, "step": 10251 }, { "epoch": 0.5582623729690361, "grad_norm": 0.6285570429220287, "learning_rate": 0.000169176162145311, "loss": 12.395, "step": 10252 }, { "epoch": 0.5583168269656191, "grad_norm": 0.5369568931221449, "learning_rate": 0.00016916979398347686, "loss": 12.2386, "step": 10253 }, { "epoch": 0.5583712809622021, "grad_norm": 0.6300205811992972, "learning_rate": 0.00016916342528376676, "loss": 12.3813, "step": 10254 }, { "epoch": 0.5584257349587851, "grad_norm": 0.5810627694012088, "learning_rate": 0.00016915705604623029, "loss": 12.3711, "step": 10255 }, { "epoch": 0.5584801889553681, "grad_norm": 0.6655122858572802, "learning_rate": 0.00016915068627091696, "loss": 12.2979, "step": 10256 }, { "epoch": 0.5585346429519512, "grad_norm": 0.6426723429091911, "learning_rate": 0.00016914431595787627, "loss": 12.349, "step": 10257 }, { "epoch": 0.5585890969485342, "grad_norm": 0.5969986660433423, "learning_rate": 0.00016913794510715785, "loss": 12.1996, "step": 10258 }, { "epoch": 0.5586435509451172, "grad_norm": 0.6376400979426741, "learning_rate": 0.0001691315737188111, "loss": 12.2946, "step": 10259 }, { "epoch": 0.5586980049417002, "grad_norm": 0.6622662726743559, "learning_rate": 0.00016912520179288566, "loss": 12.4308, "step": 10260 }, { "epoch": 0.5587524589382832, "grad_norm": 0.611903090619256, "learning_rate": 0.00016911882932943106, "loss": 12.3932, "step": 10261 }, { "epoch": 0.5588069129348662, "grad_norm": 0.6023841459818996, "learning_rate": 0.00016911245632849684, "loss": 12.3412, "step": 10262 }, { "epoch": 0.5588613669314493, "grad_norm": 0.5844063374490647, "learning_rate": 0.0001691060827901326, "loss": 12.2498, "step": 10263 }, { "epoch": 0.5589158209280323, "grad_norm": 0.599319382728331, "learning_rate": 0.00016909970871438788, "loss": 12.3185, "step": 10264 }, { "epoch": 0.5589702749246153, "grad_norm": 0.5960458771528314, "learning_rate": 0.0001690933341013122, "loss": 12.4048, "step": 10265 }, { "epoch": 0.5590247289211983, "grad_norm": 0.64571759754071, "learning_rate": 0.00016908695895095517, "loss": 12.4689, "step": 10266 }, { "epoch": 0.5590791829177812, "grad_norm": 0.8047723957217814, "learning_rate": 0.00016908058326336634, "loss": 12.4611, "step": 10267 }, { "epoch": 0.5591336369143642, "grad_norm": 0.683586842980497, "learning_rate": 0.00016907420703859538, "loss": 12.2063, "step": 10268 }, { "epoch": 0.5591880909109473, "grad_norm": 0.7147507808213387, "learning_rate": 0.00016906783027669176, "loss": 12.4311, "step": 10269 }, { "epoch": 0.5592425449075303, "grad_norm": 0.5674940897992446, "learning_rate": 0.0001690614529777051, "loss": 12.4471, "step": 10270 }, { "epoch": 0.5592969989041133, "grad_norm": 0.557602325062582, "learning_rate": 0.00016905507514168502, "loss": 12.4233, "step": 10271 }, { "epoch": 0.5593514529006963, "grad_norm": 0.6316895645364624, "learning_rate": 0.00016904869676868107, "loss": 12.4648, "step": 10272 }, { "epoch": 0.5594059068972793, "grad_norm": 0.6515018637341894, "learning_rate": 0.0001690423178587429, "loss": 12.4008, "step": 10273 }, { "epoch": 0.5594603608938623, "grad_norm": 0.5898901503061371, "learning_rate": 0.00016903593841192008, "loss": 12.3275, "step": 10274 }, { "epoch": 0.5595148148904454, "grad_norm": 0.5662931590043602, "learning_rate": 0.00016902955842826222, "loss": 12.3504, "step": 10275 }, { "epoch": 0.5595692688870284, "grad_norm": 0.6778051602663335, "learning_rate": 0.00016902317790781895, "loss": 12.25, "step": 10276 }, { "epoch": 0.5596237228836114, "grad_norm": 0.6092788463144142, "learning_rate": 0.00016901679685063986, "loss": 12.3418, "step": 10277 }, { "epoch": 0.5596781768801944, "grad_norm": 0.618180036489811, "learning_rate": 0.0001690104152567746, "loss": 12.3607, "step": 10278 }, { "epoch": 0.5597326308767774, "grad_norm": 0.5860970509812605, "learning_rate": 0.00016900403312627277, "loss": 12.3366, "step": 10279 }, { "epoch": 0.5597870848733604, "grad_norm": 0.601893208709934, "learning_rate": 0.00016899765045918401, "loss": 12.458, "step": 10280 }, { "epoch": 0.5598415388699435, "grad_norm": 0.5861072547051973, "learning_rate": 0.00016899126725555794, "loss": 12.4812, "step": 10281 }, { "epoch": 0.5598959928665265, "grad_norm": 0.5875897748674739, "learning_rate": 0.0001689848835154442, "loss": 12.1096, "step": 10282 }, { "epoch": 0.5599504468631095, "grad_norm": 0.6160324753910715, "learning_rate": 0.00016897849923889246, "loss": 12.3096, "step": 10283 }, { "epoch": 0.5600049008596925, "grad_norm": 0.6142925610254328, "learning_rate": 0.0001689721144259523, "loss": 12.3904, "step": 10284 }, { "epoch": 0.5600593548562754, "grad_norm": 0.5637627348753075, "learning_rate": 0.00016896572907667347, "loss": 12.2946, "step": 10285 }, { "epoch": 0.5601138088528584, "grad_norm": 0.5518503687523599, "learning_rate": 0.00016895934319110555, "loss": 12.3553, "step": 10286 }, { "epoch": 0.5601682628494415, "grad_norm": 0.6328617298624295, "learning_rate": 0.00016895295676929817, "loss": 12.4334, "step": 10287 }, { "epoch": 0.5602227168460245, "grad_norm": 0.620085531994173, "learning_rate": 0.0001689465698113011, "loss": 12.3402, "step": 10288 }, { "epoch": 0.5602771708426075, "grad_norm": 0.5559505540958086, "learning_rate": 0.00016894018231716385, "loss": 12.2649, "step": 10289 }, { "epoch": 0.5603316248391905, "grad_norm": 0.7008838030079539, "learning_rate": 0.00016893379428693626, "loss": 12.4094, "step": 10290 }, { "epoch": 0.5603860788357735, "grad_norm": 0.605239972915833, "learning_rate": 0.0001689274057206679, "loss": 12.3578, "step": 10291 }, { "epoch": 0.5604405328323566, "grad_norm": 0.6596637595277397, "learning_rate": 0.00016892101661840846, "loss": 12.2733, "step": 10292 }, { "epoch": 0.5604949868289396, "grad_norm": 0.7951158901442011, "learning_rate": 0.00016891462698020768, "loss": 12.4856, "step": 10293 }, { "epoch": 0.5605494408255226, "grad_norm": 0.6574492755937013, "learning_rate": 0.00016890823680611517, "loss": 12.2087, "step": 10294 }, { "epoch": 0.5606038948221056, "grad_norm": 0.653813488593268, "learning_rate": 0.00016890184609618064, "loss": 12.1862, "step": 10295 }, { "epoch": 0.5606583488186886, "grad_norm": 0.7244385612792106, "learning_rate": 0.0001688954548504538, "loss": 12.3884, "step": 10296 }, { "epoch": 0.5607128028152716, "grad_norm": 0.6141729334360738, "learning_rate": 0.00016888906306898436, "loss": 12.3005, "step": 10297 }, { "epoch": 0.5607672568118547, "grad_norm": 0.7246938177635958, "learning_rate": 0.00016888267075182206, "loss": 12.5213, "step": 10298 }, { "epoch": 0.5608217108084377, "grad_norm": 0.7038881657945165, "learning_rate": 0.00016887627789901653, "loss": 12.5174, "step": 10299 }, { "epoch": 0.5608761648050207, "grad_norm": 0.6321702896975392, "learning_rate": 0.00016886988451061749, "loss": 12.2754, "step": 10300 }, { "epoch": 0.5609306188016037, "grad_norm": 0.6103387747847123, "learning_rate": 0.0001688634905866747, "loss": 12.5386, "step": 10301 }, { "epoch": 0.5609850727981867, "grad_norm": 0.732025469317435, "learning_rate": 0.00016885709612723783, "loss": 12.4871, "step": 10302 }, { "epoch": 0.5610395267947696, "grad_norm": 0.6325210977683443, "learning_rate": 0.00016885070113235667, "loss": 12.4183, "step": 10303 }, { "epoch": 0.5610939807913528, "grad_norm": 0.6393350500913199, "learning_rate": 0.00016884430560208088, "loss": 12.4113, "step": 10304 }, { "epoch": 0.5611484347879357, "grad_norm": 0.5909162202743099, "learning_rate": 0.00016883790953646025, "loss": 12.3757, "step": 10305 }, { "epoch": 0.5612028887845187, "grad_norm": 0.7084515897215775, "learning_rate": 0.0001688315129355445, "loss": 12.3812, "step": 10306 }, { "epoch": 0.5612573427811017, "grad_norm": 0.6225796371499448, "learning_rate": 0.00016882511579938334, "loss": 12.1873, "step": 10307 }, { "epoch": 0.5613117967776847, "grad_norm": 0.5897419700844331, "learning_rate": 0.00016881871812802652, "loss": 12.2328, "step": 10308 }, { "epoch": 0.5613662507742677, "grad_norm": 0.626257819909121, "learning_rate": 0.00016881231992152385, "loss": 12.2687, "step": 10309 }, { "epoch": 0.5614207047708508, "grad_norm": 0.6783609424573152, "learning_rate": 0.00016880592117992502, "loss": 12.4556, "step": 10310 }, { "epoch": 0.5614751587674338, "grad_norm": 0.5433012075072357, "learning_rate": 0.0001687995219032798, "loss": 12.3478, "step": 10311 }, { "epoch": 0.5615296127640168, "grad_norm": 0.6673882896234298, "learning_rate": 0.00016879312209163797, "loss": 12.5079, "step": 10312 }, { "epoch": 0.5615840667605998, "grad_norm": 0.636024906690236, "learning_rate": 0.00016878672174504926, "loss": 12.3692, "step": 10313 }, { "epoch": 0.5616385207571828, "grad_norm": 0.5898485469427649, "learning_rate": 0.00016878032086356352, "loss": 12.4541, "step": 10314 }, { "epoch": 0.5616929747537658, "grad_norm": 0.5545109457042696, "learning_rate": 0.00016877391944723042, "loss": 12.3289, "step": 10315 }, { "epoch": 0.5617474287503489, "grad_norm": 0.6048015573465777, "learning_rate": 0.00016876751749609982, "loss": 12.043, "step": 10316 }, { "epoch": 0.5618018827469319, "grad_norm": 0.6191032621169081, "learning_rate": 0.00016876111501022147, "loss": 12.4121, "step": 10317 }, { "epoch": 0.5618563367435149, "grad_norm": 0.6512499912882861, "learning_rate": 0.00016875471198964513, "loss": 12.4435, "step": 10318 }, { "epoch": 0.5619107907400979, "grad_norm": 0.5871615324615288, "learning_rate": 0.00016874830843442067, "loss": 12.3567, "step": 10319 }, { "epoch": 0.5619652447366809, "grad_norm": 0.5919840115805446, "learning_rate": 0.00016874190434459777, "loss": 12.4784, "step": 10320 }, { "epoch": 0.5620196987332639, "grad_norm": 0.5943262467755525, "learning_rate": 0.00016873549972022634, "loss": 12.2569, "step": 10321 }, { "epoch": 0.562074152729847, "grad_norm": 0.6056333912357738, "learning_rate": 0.00016872909456135612, "loss": 12.2407, "step": 10322 }, { "epoch": 0.56212860672643, "grad_norm": 0.6412606462162442, "learning_rate": 0.00016872268886803692, "loss": 12.4993, "step": 10323 }, { "epoch": 0.5621830607230129, "grad_norm": 0.5951322402046102, "learning_rate": 0.00016871628264031864, "loss": 12.2904, "step": 10324 }, { "epoch": 0.5622375147195959, "grad_norm": 0.5957303297392885, "learning_rate": 0.00016870987587825094, "loss": 12.2501, "step": 10325 }, { "epoch": 0.5622919687161789, "grad_norm": 0.6737073150568589, "learning_rate": 0.00016870346858188374, "loss": 12.3033, "step": 10326 }, { "epoch": 0.562346422712762, "grad_norm": 0.5901110815473725, "learning_rate": 0.00016869706075126688, "loss": 12.3207, "step": 10327 }, { "epoch": 0.562400876709345, "grad_norm": 0.6913805237600109, "learning_rate": 0.00016869065238645013, "loss": 12.2452, "step": 10328 }, { "epoch": 0.562455330705928, "grad_norm": 0.597764961757747, "learning_rate": 0.00016868424348748335, "loss": 12.3216, "step": 10329 }, { "epoch": 0.562509784702511, "grad_norm": 0.5970006749445501, "learning_rate": 0.00016867783405441638, "loss": 12.1318, "step": 10330 }, { "epoch": 0.562564238699094, "grad_norm": 0.5930412764100742, "learning_rate": 0.00016867142408729904, "loss": 12.3634, "step": 10331 }, { "epoch": 0.562618692695677, "grad_norm": 0.6212977251363766, "learning_rate": 0.0001686650135861812, "loss": 12.3131, "step": 10332 }, { "epoch": 0.5626731466922601, "grad_norm": 0.5688540545730704, "learning_rate": 0.00016865860255111268, "loss": 12.3945, "step": 10333 }, { "epoch": 0.5627276006888431, "grad_norm": 0.5789013058650888, "learning_rate": 0.0001686521909821434, "loss": 12.2789, "step": 10334 }, { "epoch": 0.5627820546854261, "grad_norm": 0.6335481871931581, "learning_rate": 0.00016864577887932313, "loss": 12.4302, "step": 10335 }, { "epoch": 0.5628365086820091, "grad_norm": 0.5939013797957428, "learning_rate": 0.00016863936624270177, "loss": 12.3346, "step": 10336 }, { "epoch": 0.5628909626785921, "grad_norm": 0.6006353953254291, "learning_rate": 0.0001686329530723292, "loss": 12.2658, "step": 10337 }, { "epoch": 0.5629454166751751, "grad_norm": 0.6047800477910621, "learning_rate": 0.0001686265393682553, "loss": 12.3286, "step": 10338 }, { "epoch": 0.5629998706717582, "grad_norm": 0.6373248187535643, "learning_rate": 0.0001686201251305299, "loss": 12.3815, "step": 10339 }, { "epoch": 0.5630543246683412, "grad_norm": 0.6356603420763361, "learning_rate": 0.00016861371035920288, "loss": 12.3143, "step": 10340 }, { "epoch": 0.5631087786649241, "grad_norm": 0.5582684519094009, "learning_rate": 0.0001686072950543242, "loss": 12.3037, "step": 10341 }, { "epoch": 0.5631632326615071, "grad_norm": 0.5789571181814924, "learning_rate": 0.0001686008792159436, "loss": 12.317, "step": 10342 }, { "epoch": 0.5632176866580901, "grad_norm": 0.6739099289032652, "learning_rate": 0.00016859446284411112, "loss": 12.3593, "step": 10343 }, { "epoch": 0.5632721406546731, "grad_norm": 0.5921743437082129, "learning_rate": 0.00016858804593887657, "loss": 12.274, "step": 10344 }, { "epoch": 0.5633265946512562, "grad_norm": 0.5708854669236476, "learning_rate": 0.0001685816285002899, "loss": 12.2073, "step": 10345 }, { "epoch": 0.5633810486478392, "grad_norm": 0.6308677383448402, "learning_rate": 0.00016857521052840096, "loss": 12.4676, "step": 10346 }, { "epoch": 0.5634355026444222, "grad_norm": 0.6795680317404507, "learning_rate": 0.0001685687920232597, "loss": 12.4565, "step": 10347 }, { "epoch": 0.5634899566410052, "grad_norm": 0.5569416277182532, "learning_rate": 0.000168562372984916, "loss": 12.347, "step": 10348 }, { "epoch": 0.5635444106375882, "grad_norm": 0.562407880823792, "learning_rate": 0.00016855595341341977, "loss": 12.3733, "step": 10349 }, { "epoch": 0.5635988646341712, "grad_norm": 0.5202134266489101, "learning_rate": 0.000168549533308821, "loss": 12.2996, "step": 10350 }, { "epoch": 0.5636533186307543, "grad_norm": 0.6628255911322798, "learning_rate": 0.00016854311267116954, "loss": 12.4203, "step": 10351 }, { "epoch": 0.5637077726273373, "grad_norm": 0.596815972075363, "learning_rate": 0.00016853669150051535, "loss": 12.3696, "step": 10352 }, { "epoch": 0.5637622266239203, "grad_norm": 0.5586849210176513, "learning_rate": 0.00016853026979690833, "loss": 12.2975, "step": 10353 }, { "epoch": 0.5638166806205033, "grad_norm": 0.6823568075738485, "learning_rate": 0.00016852384756039848, "loss": 12.443, "step": 10354 }, { "epoch": 0.5638711346170863, "grad_norm": 0.5933142978854868, "learning_rate": 0.00016851742479103563, "loss": 12.2016, "step": 10355 }, { "epoch": 0.5639255886136693, "grad_norm": 0.597479194997447, "learning_rate": 0.00016851100148886986, "loss": 12.3549, "step": 10356 }, { "epoch": 0.5639800426102524, "grad_norm": 0.6636350663771243, "learning_rate": 0.00016850457765395102, "loss": 12.4796, "step": 10357 }, { "epoch": 0.5640344966068354, "grad_norm": 0.6607380152702863, "learning_rate": 0.0001684981532863291, "loss": 12.2784, "step": 10358 }, { "epoch": 0.5640889506034183, "grad_norm": 0.6877667224482812, "learning_rate": 0.00016849172838605408, "loss": 12.403, "step": 10359 }, { "epoch": 0.5641434046000013, "grad_norm": 0.5659503162516681, "learning_rate": 0.00016848530295317585, "loss": 12.44, "step": 10360 }, { "epoch": 0.5641978585965843, "grad_norm": 0.685369008360853, "learning_rate": 0.00016847887698774445, "loss": 12.4309, "step": 10361 }, { "epoch": 0.5642523125931674, "grad_norm": 0.6320646119966852, "learning_rate": 0.0001684724504898098, "loss": 12.339, "step": 10362 }, { "epoch": 0.5643067665897504, "grad_norm": 0.6212202368714012, "learning_rate": 0.00016846602345942191, "loss": 12.4303, "step": 10363 }, { "epoch": 0.5643612205863334, "grad_norm": 0.6367487206819554, "learning_rate": 0.00016845959589663074, "loss": 12.421, "step": 10364 }, { "epoch": 0.5644156745829164, "grad_norm": 0.681183679084473, "learning_rate": 0.00016845316780148627, "loss": 12.3895, "step": 10365 }, { "epoch": 0.5644701285794994, "grad_norm": 0.6529961642623131, "learning_rate": 0.00016844673917403849, "loss": 12.4642, "step": 10366 }, { "epoch": 0.5645245825760824, "grad_norm": 0.621415406056903, "learning_rate": 0.00016844031001433734, "loss": 12.2171, "step": 10367 }, { "epoch": 0.5645790365726655, "grad_norm": 0.5613170596312487, "learning_rate": 0.00016843388032243293, "loss": 12.2744, "step": 10368 }, { "epoch": 0.5646334905692485, "grad_norm": 0.7287747104215444, "learning_rate": 0.00016842745009837515, "loss": 12.4659, "step": 10369 }, { "epoch": 0.5646879445658315, "grad_norm": 0.544141330334673, "learning_rate": 0.00016842101934221406, "loss": 12.2132, "step": 10370 }, { "epoch": 0.5647423985624145, "grad_norm": 0.6041199910559678, "learning_rate": 0.00016841458805399965, "loss": 12.3917, "step": 10371 }, { "epoch": 0.5647968525589975, "grad_norm": 0.6225477911605025, "learning_rate": 0.0001684081562337819, "loss": 12.4877, "step": 10372 }, { "epoch": 0.5648513065555805, "grad_norm": 0.589768904637465, "learning_rate": 0.0001684017238816109, "loss": 12.3144, "step": 10373 }, { "epoch": 0.5649057605521636, "grad_norm": 0.6221244535619938, "learning_rate": 0.0001683952909975366, "loss": 12.4835, "step": 10374 }, { "epoch": 0.5649602145487466, "grad_norm": 0.6425525815174361, "learning_rate": 0.00016838885758160905, "loss": 12.4115, "step": 10375 }, { "epoch": 0.5650146685453296, "grad_norm": 0.6606842162669248, "learning_rate": 0.0001683824236338783, "loss": 12.3993, "step": 10376 }, { "epoch": 0.5650691225419125, "grad_norm": 0.6084374603777637, "learning_rate": 0.0001683759891543943, "loss": 12.3677, "step": 10377 }, { "epoch": 0.5651235765384955, "grad_norm": 0.6991656978018909, "learning_rate": 0.00016836955414320715, "loss": 12.3793, "step": 10378 }, { "epoch": 0.5651780305350785, "grad_norm": 0.6138426062310326, "learning_rate": 0.00016836311860036693, "loss": 12.3313, "step": 10379 }, { "epoch": 0.5652324845316616, "grad_norm": 0.6275506547891637, "learning_rate": 0.0001683566825259236, "loss": 12.2722, "step": 10380 }, { "epoch": 0.5652869385282446, "grad_norm": 0.9161209021743207, "learning_rate": 0.00016835024591992724, "loss": 12.3421, "step": 10381 }, { "epoch": 0.5653413925248276, "grad_norm": 0.6321254761895913, "learning_rate": 0.00016834380878242794, "loss": 12.2944, "step": 10382 }, { "epoch": 0.5653958465214106, "grad_norm": 0.6345049944779382, "learning_rate": 0.00016833737111347568, "loss": 12.484, "step": 10383 }, { "epoch": 0.5654503005179936, "grad_norm": 0.623691170627115, "learning_rate": 0.0001683309329131206, "loss": 12.3452, "step": 10384 }, { "epoch": 0.5655047545145766, "grad_norm": 0.6169359428690868, "learning_rate": 0.0001683244941814127, "loss": 12.2734, "step": 10385 }, { "epoch": 0.5655592085111597, "grad_norm": 0.5911971693461668, "learning_rate": 0.00016831805491840208, "loss": 12.4244, "step": 10386 }, { "epoch": 0.5656136625077427, "grad_norm": 0.6138794288462133, "learning_rate": 0.0001683116151241388, "loss": 12.3843, "step": 10387 }, { "epoch": 0.5656681165043257, "grad_norm": 0.6087658301053654, "learning_rate": 0.00016830517479867298, "loss": 12.2409, "step": 10388 }, { "epoch": 0.5657225705009087, "grad_norm": 0.5051577573517587, "learning_rate": 0.00016829873394205464, "loss": 12.2384, "step": 10389 }, { "epoch": 0.5657770244974917, "grad_norm": 0.5880845343996741, "learning_rate": 0.0001682922925543339, "loss": 12.3321, "step": 10390 }, { "epoch": 0.5658314784940748, "grad_norm": 0.5928845999091579, "learning_rate": 0.00016828585063556083, "loss": 12.2001, "step": 10391 }, { "epoch": 0.5658859324906578, "grad_norm": 0.6827691259284938, "learning_rate": 0.0001682794081857855, "loss": 12.5109, "step": 10392 }, { "epoch": 0.5659403864872408, "grad_norm": 0.5642687647023201, "learning_rate": 0.0001682729652050581, "loss": 12.3254, "step": 10393 }, { "epoch": 0.5659948404838238, "grad_norm": 0.6148651923790508, "learning_rate": 0.00016826652169342867, "loss": 12.3495, "step": 10394 }, { "epoch": 0.5660492944804068, "grad_norm": 0.6452765348559973, "learning_rate": 0.00016826007765094732, "loss": 12.3754, "step": 10395 }, { "epoch": 0.5661037484769897, "grad_norm": 0.5965890572179519, "learning_rate": 0.00016825363307766412, "loss": 12.418, "step": 10396 }, { "epoch": 0.5661582024735728, "grad_norm": 0.5471346501778647, "learning_rate": 0.00016824718797362923, "loss": 12.3318, "step": 10397 }, { "epoch": 0.5662126564701558, "grad_norm": 0.6751846501375386, "learning_rate": 0.00016824074233889278, "loss": 12.5202, "step": 10398 }, { "epoch": 0.5662671104667388, "grad_norm": 0.6218888957514784, "learning_rate": 0.00016823429617350487, "loss": 12.276, "step": 10399 }, { "epoch": 0.5663215644633218, "grad_norm": 0.6492663612660368, "learning_rate": 0.00016822784947751563, "loss": 12.391, "step": 10400 }, { "epoch": 0.5663760184599048, "grad_norm": 0.6232618935944115, "learning_rate": 0.0001682214022509752, "loss": 12.3163, "step": 10401 }, { "epoch": 0.5664304724564878, "grad_norm": 0.649271965491751, "learning_rate": 0.00016821495449393368, "loss": 12.4013, "step": 10402 }, { "epoch": 0.5664849264530709, "grad_norm": 0.66748998807262, "learning_rate": 0.00016820850620644125, "loss": 12.2451, "step": 10403 }, { "epoch": 0.5665393804496539, "grad_norm": 0.7082320217361083, "learning_rate": 0.00016820205738854804, "loss": 12.2669, "step": 10404 }, { "epoch": 0.5665938344462369, "grad_norm": 0.6083087122633456, "learning_rate": 0.0001681956080403042, "loss": 12.344, "step": 10405 }, { "epoch": 0.5666482884428199, "grad_norm": 0.5845446637283482, "learning_rate": 0.00016818915816175985, "loss": 12.3627, "step": 10406 }, { "epoch": 0.5667027424394029, "grad_norm": 0.6304622759286626, "learning_rate": 0.00016818270775296519, "loss": 12.3687, "step": 10407 }, { "epoch": 0.5667571964359859, "grad_norm": 0.5473269293115471, "learning_rate": 0.00016817625681397034, "loss": 12.213, "step": 10408 }, { "epoch": 0.566811650432569, "grad_norm": 0.6984369382628332, "learning_rate": 0.00016816980534482552, "loss": 12.2926, "step": 10409 }, { "epoch": 0.566866104429152, "grad_norm": 0.6662395723038156, "learning_rate": 0.00016816335334558083, "loss": 12.4141, "step": 10410 }, { "epoch": 0.566920558425735, "grad_norm": 0.6210259725286218, "learning_rate": 0.0001681569008162865, "loss": 12.3193, "step": 10411 }, { "epoch": 0.566975012422318, "grad_norm": 0.5429922620912881, "learning_rate": 0.00016815044775699266, "loss": 12.2855, "step": 10412 }, { "epoch": 0.567029466418901, "grad_norm": 0.5977332368552712, "learning_rate": 0.0001681439941677495, "loss": 12.2268, "step": 10413 }, { "epoch": 0.5670839204154839, "grad_norm": 0.6159050614116562, "learning_rate": 0.00016813754004860724, "loss": 12.1776, "step": 10414 }, { "epoch": 0.567138374412067, "grad_norm": 0.582597585497368, "learning_rate": 0.00016813108539961603, "loss": 12.3413, "step": 10415 }, { "epoch": 0.56719282840865, "grad_norm": 0.6513474497024757, "learning_rate": 0.00016812463022082607, "loss": 12.394, "step": 10416 }, { "epoch": 0.567247282405233, "grad_norm": 0.5895717171760396, "learning_rate": 0.00016811817451228757, "loss": 12.3976, "step": 10417 }, { "epoch": 0.567301736401816, "grad_norm": 0.6145716743427717, "learning_rate": 0.00016811171827405073, "loss": 12.4249, "step": 10418 }, { "epoch": 0.567356190398399, "grad_norm": 0.6429917849423017, "learning_rate": 0.00016810526150616572, "loss": 12.3861, "step": 10419 }, { "epoch": 0.567410644394982, "grad_norm": 0.5341869197548992, "learning_rate": 0.0001680988042086828, "loss": 12.2854, "step": 10420 }, { "epoch": 0.5674650983915651, "grad_norm": 0.6370657440114558, "learning_rate": 0.00016809234638165212, "loss": 12.3632, "step": 10421 }, { "epoch": 0.5675195523881481, "grad_norm": 0.6345636872655567, "learning_rate": 0.00016808588802512398, "loss": 12.3265, "step": 10422 }, { "epoch": 0.5675740063847311, "grad_norm": 0.5308037534905589, "learning_rate": 0.00016807942913914855, "loss": 12.2471, "step": 10423 }, { "epoch": 0.5676284603813141, "grad_norm": 0.7217080141177963, "learning_rate": 0.00016807296972377604, "loss": 12.3981, "step": 10424 }, { "epoch": 0.5676829143778971, "grad_norm": 0.6042083497364779, "learning_rate": 0.00016806650977905672, "loss": 12.2527, "step": 10425 }, { "epoch": 0.5677373683744802, "grad_norm": 0.5745092248979259, "learning_rate": 0.00016806004930504078, "loss": 12.1766, "step": 10426 }, { "epoch": 0.5677918223710632, "grad_norm": 0.5705259495585121, "learning_rate": 0.0001680535883017785, "loss": 12.2859, "step": 10427 }, { "epoch": 0.5678462763676462, "grad_norm": 0.5417193186976931, "learning_rate": 0.0001680471267693201, "loss": 12.4, "step": 10428 }, { "epoch": 0.5679007303642292, "grad_norm": 0.5834722615703474, "learning_rate": 0.00016804066470771584, "loss": 12.3393, "step": 10429 }, { "epoch": 0.5679551843608122, "grad_norm": 0.7036143280210932, "learning_rate": 0.00016803420211701598, "loss": 12.5283, "step": 10430 }, { "epoch": 0.5680096383573952, "grad_norm": 0.5979312547772383, "learning_rate": 0.00016802773899727072, "loss": 12.4094, "step": 10431 }, { "epoch": 0.5680640923539783, "grad_norm": 0.6090176540758172, "learning_rate": 0.00016802127534853035, "loss": 12.285, "step": 10432 }, { "epoch": 0.5681185463505612, "grad_norm": 0.6339843877396358, "learning_rate": 0.00016801481117084514, "loss": 12.3795, "step": 10433 }, { "epoch": 0.5681730003471442, "grad_norm": 0.6647505095307242, "learning_rate": 0.0001680083464642654, "loss": 12.4322, "step": 10434 }, { "epoch": 0.5682274543437272, "grad_norm": 0.5818159565641814, "learning_rate": 0.00016800188122884132, "loss": 12.4275, "step": 10435 }, { "epoch": 0.5682819083403102, "grad_norm": 0.6228052874764259, "learning_rate": 0.00016799541546462317, "loss": 12.3025, "step": 10436 }, { "epoch": 0.5683363623368932, "grad_norm": 0.6335954949628722, "learning_rate": 0.0001679889491716613, "loss": 12.4529, "step": 10437 }, { "epoch": 0.5683908163334763, "grad_norm": 0.6038600388041353, "learning_rate": 0.00016798248235000597, "loss": 12.3209, "step": 10438 }, { "epoch": 0.5684452703300593, "grad_norm": 0.6117645541689094, "learning_rate": 0.00016797601499970744, "loss": 12.3349, "step": 10439 }, { "epoch": 0.5684997243266423, "grad_norm": 0.7230541776819218, "learning_rate": 0.00016796954712081604, "loss": 12.4097, "step": 10440 }, { "epoch": 0.5685541783232253, "grad_norm": 0.6428774400716817, "learning_rate": 0.000167963078713382, "loss": 12.394, "step": 10441 }, { "epoch": 0.5686086323198083, "grad_norm": 0.6505580888411074, "learning_rate": 0.00016795660977745572, "loss": 12.1937, "step": 10442 }, { "epoch": 0.5686630863163913, "grad_norm": 0.6437749858847259, "learning_rate": 0.0001679501403130874, "loss": 12.2522, "step": 10443 }, { "epoch": 0.5687175403129744, "grad_norm": 0.5941359930214059, "learning_rate": 0.00016794367032032742, "loss": 12.3814, "step": 10444 }, { "epoch": 0.5687719943095574, "grad_norm": 0.6410446086869066, "learning_rate": 0.00016793719979922605, "loss": 12.4891, "step": 10445 }, { "epoch": 0.5688264483061404, "grad_norm": 0.5759340350528791, "learning_rate": 0.00016793072874983362, "loss": 12.3136, "step": 10446 }, { "epoch": 0.5688809023027234, "grad_norm": 0.6219826681075947, "learning_rate": 0.00016792425717220046, "loss": 12.4172, "step": 10447 }, { "epoch": 0.5689353562993064, "grad_norm": 0.5902531241211377, "learning_rate": 0.00016791778506637688, "loss": 12.3866, "step": 10448 }, { "epoch": 0.5689898102958894, "grad_norm": 0.5902582942414536, "learning_rate": 0.0001679113124324132, "loss": 12.3021, "step": 10449 }, { "epoch": 0.5690442642924725, "grad_norm": 0.5751312773984891, "learning_rate": 0.0001679048392703598, "loss": 12.3701, "step": 10450 }, { "epoch": 0.5690987182890554, "grad_norm": 0.6790559083330199, "learning_rate": 0.00016789836558026697, "loss": 12.3923, "step": 10451 }, { "epoch": 0.5691531722856384, "grad_norm": 0.6452000054378788, "learning_rate": 0.00016789189136218505, "loss": 12.3022, "step": 10452 }, { "epoch": 0.5692076262822214, "grad_norm": 0.5691936565874609, "learning_rate": 0.0001678854166161644, "loss": 12.3396, "step": 10453 }, { "epoch": 0.5692620802788044, "grad_norm": 0.5693653034216821, "learning_rate": 0.00016787894134225536, "loss": 12.386, "step": 10454 }, { "epoch": 0.5693165342753874, "grad_norm": 0.6508692340480876, "learning_rate": 0.0001678724655405083, "loss": 12.2845, "step": 10455 }, { "epoch": 0.5693709882719705, "grad_norm": 0.6186699806695385, "learning_rate": 0.00016786598921097358, "loss": 12.3824, "step": 10456 }, { "epoch": 0.5694254422685535, "grad_norm": 0.7158082493266653, "learning_rate": 0.00016785951235370153, "loss": 12.4277, "step": 10457 }, { "epoch": 0.5694798962651365, "grad_norm": 0.656346164357275, "learning_rate": 0.0001678530349687425, "loss": 12.3382, "step": 10458 }, { "epoch": 0.5695343502617195, "grad_norm": 0.7203813696149984, "learning_rate": 0.00016784655705614693, "loss": 12.3475, "step": 10459 }, { "epoch": 0.5695888042583025, "grad_norm": 0.6609864561370712, "learning_rate": 0.00016784007861596518, "loss": 12.4706, "step": 10460 }, { "epoch": 0.5696432582548856, "grad_norm": 0.6048227744145742, "learning_rate": 0.00016783359964824755, "loss": 12.3333, "step": 10461 }, { "epoch": 0.5696977122514686, "grad_norm": 0.7298358242850399, "learning_rate": 0.0001678271201530445, "loss": 12.3152, "step": 10462 }, { "epoch": 0.5697521662480516, "grad_norm": 0.6721553190851784, "learning_rate": 0.00016782064013040637, "loss": 12.3147, "step": 10463 }, { "epoch": 0.5698066202446346, "grad_norm": 0.6208164219046698, "learning_rate": 0.0001678141595803836, "loss": 12.3959, "step": 10464 }, { "epoch": 0.5698610742412176, "grad_norm": 0.7267473495148199, "learning_rate": 0.00016780767850302654, "loss": 12.2043, "step": 10465 }, { "epoch": 0.5699155282378006, "grad_norm": 0.5815895018152115, "learning_rate": 0.0001678011968983856, "loss": 12.3113, "step": 10466 }, { "epoch": 0.5699699822343837, "grad_norm": 0.6407164249371863, "learning_rate": 0.0001677947147665112, "loss": 12.335, "step": 10467 }, { "epoch": 0.5700244362309667, "grad_norm": 0.7096471186991803, "learning_rate": 0.00016778823210745366, "loss": 12.314, "step": 10468 }, { "epoch": 0.5700788902275497, "grad_norm": 0.6327973720906753, "learning_rate": 0.0001677817489212635, "loss": 12.3507, "step": 10469 }, { "epoch": 0.5701333442241326, "grad_norm": 0.5987693162255612, "learning_rate": 0.00016777526520799115, "loss": 12.2983, "step": 10470 }, { "epoch": 0.5701877982207156, "grad_norm": 0.6552558142622182, "learning_rate": 0.0001677687809676869, "loss": 12.3778, "step": 10471 }, { "epoch": 0.5702422522172986, "grad_norm": 0.5967141861925086, "learning_rate": 0.00016776229620040124, "loss": 12.3893, "step": 10472 }, { "epoch": 0.5702967062138817, "grad_norm": 0.6176664295765997, "learning_rate": 0.00016775581090618463, "loss": 12.4305, "step": 10473 }, { "epoch": 0.5703511602104647, "grad_norm": 0.6591993654093905, "learning_rate": 0.00016774932508508748, "loss": 12.3443, "step": 10474 }, { "epoch": 0.5704056142070477, "grad_norm": 0.584863376348845, "learning_rate": 0.0001677428387371602, "loss": 12.3306, "step": 10475 }, { "epoch": 0.5704600682036307, "grad_norm": 0.5963330913592126, "learning_rate": 0.00016773635186245324, "loss": 12.4846, "step": 10476 }, { "epoch": 0.5705145222002137, "grad_norm": 0.6438054090971593, "learning_rate": 0.00016772986446101707, "loss": 12.3858, "step": 10477 }, { "epoch": 0.5705689761967967, "grad_norm": 0.6064314578781214, "learning_rate": 0.0001677233765329021, "loss": 12.3667, "step": 10478 }, { "epoch": 0.5706234301933798, "grad_norm": 0.5348696070350466, "learning_rate": 0.00016771688807815883, "loss": 12.3922, "step": 10479 }, { "epoch": 0.5706778841899628, "grad_norm": 0.6136181266951442, "learning_rate": 0.00016771039909683767, "loss": 12.3923, "step": 10480 }, { "epoch": 0.5707323381865458, "grad_norm": 0.6356924851395139, "learning_rate": 0.00016770390958898904, "loss": 12.4682, "step": 10481 }, { "epoch": 0.5707867921831288, "grad_norm": 0.5611718815605147, "learning_rate": 0.0001676974195546635, "loss": 12.4042, "step": 10482 }, { "epoch": 0.5708412461797118, "grad_norm": 0.5599668471880284, "learning_rate": 0.00016769092899391146, "loss": 12.3262, "step": 10483 }, { "epoch": 0.5708957001762948, "grad_norm": 0.6186802999347444, "learning_rate": 0.0001676844379067834, "loss": 12.3928, "step": 10484 }, { "epoch": 0.5709501541728779, "grad_norm": 0.5786533893527239, "learning_rate": 0.00016767794629332983, "loss": 12.331, "step": 10485 }, { "epoch": 0.5710046081694609, "grad_norm": 0.5934158335680023, "learning_rate": 0.00016767145415360116, "loss": 12.1695, "step": 10486 }, { "epoch": 0.5710590621660439, "grad_norm": 0.6016145692911932, "learning_rate": 0.00016766496148764792, "loss": 12.2989, "step": 10487 }, { "epoch": 0.5711135161626268, "grad_norm": 0.6239976307642991, "learning_rate": 0.0001676584682955206, "loss": 12.3911, "step": 10488 }, { "epoch": 0.5711679701592098, "grad_norm": 0.6082768551019266, "learning_rate": 0.0001676519745772697, "loss": 12.0827, "step": 10489 }, { "epoch": 0.5712224241557928, "grad_norm": 0.5871346045320904, "learning_rate": 0.00016764548033294568, "loss": 12.4464, "step": 10490 }, { "epoch": 0.5712768781523759, "grad_norm": 0.613933828774959, "learning_rate": 0.00016763898556259907, "loss": 12.3568, "step": 10491 }, { "epoch": 0.5713313321489589, "grad_norm": 0.5991384426015036, "learning_rate": 0.00016763249026628037, "loss": 12.4625, "step": 10492 }, { "epoch": 0.5713857861455419, "grad_norm": 0.5514872373075452, "learning_rate": 0.00016762599444404005, "loss": 12.383, "step": 10493 }, { "epoch": 0.5714402401421249, "grad_norm": 0.5531371468233106, "learning_rate": 0.0001676194980959287, "loss": 12.3299, "step": 10494 }, { "epoch": 0.5714946941387079, "grad_norm": 0.5941667125298707, "learning_rate": 0.00016761300122199678, "loss": 12.2687, "step": 10495 }, { "epoch": 0.571549148135291, "grad_norm": 0.574816716156372, "learning_rate": 0.00016760650382229483, "loss": 12.1507, "step": 10496 }, { "epoch": 0.571603602131874, "grad_norm": 0.5661023144426395, "learning_rate": 0.00016760000589687336, "loss": 12.3588, "step": 10497 }, { "epoch": 0.571658056128457, "grad_norm": 0.5950806459266346, "learning_rate": 0.00016759350744578288, "loss": 12.3072, "step": 10498 }, { "epoch": 0.57171251012504, "grad_norm": 0.5788836627410617, "learning_rate": 0.00016758700846907401, "loss": 12.3097, "step": 10499 }, { "epoch": 0.571766964121623, "grad_norm": 0.5946412198769391, "learning_rate": 0.00016758050896679717, "loss": 12.3366, "step": 10500 }, { "epoch": 0.571821418118206, "grad_norm": 0.6167651308416774, "learning_rate": 0.000167574008939003, "loss": 12.3193, "step": 10501 }, { "epoch": 0.5718758721147891, "grad_norm": 0.6110161549280384, "learning_rate": 0.00016756750838574198, "loss": 12.1554, "step": 10502 }, { "epoch": 0.5719303261113721, "grad_norm": 0.596606633093422, "learning_rate": 0.00016756100730706467, "loss": 12.4674, "step": 10503 }, { "epoch": 0.5719847801079551, "grad_norm": 0.626567890368138, "learning_rate": 0.00016755450570302166, "loss": 12.3227, "step": 10504 }, { "epoch": 0.572039234104538, "grad_norm": 0.57946182580484, "learning_rate": 0.0001675480035736635, "loss": 12.2348, "step": 10505 }, { "epoch": 0.572093688101121, "grad_norm": 0.5888367222286589, "learning_rate": 0.00016754150091904073, "loss": 12.3907, "step": 10506 }, { "epoch": 0.572148142097704, "grad_norm": 0.5917082862865675, "learning_rate": 0.00016753499773920392, "loss": 12.4763, "step": 10507 }, { "epoch": 0.5722025960942871, "grad_norm": 0.5822949981872415, "learning_rate": 0.00016752849403420364, "loss": 12.4209, "step": 10508 }, { "epoch": 0.5722570500908701, "grad_norm": 0.6384365923747648, "learning_rate": 0.00016752198980409045, "loss": 12.578, "step": 10509 }, { "epoch": 0.5723115040874531, "grad_norm": 0.563623509323039, "learning_rate": 0.00016751548504891496, "loss": 12.2381, "step": 10510 }, { "epoch": 0.5723659580840361, "grad_norm": 0.5695499848134334, "learning_rate": 0.00016750897976872773, "loss": 12.3816, "step": 10511 }, { "epoch": 0.5724204120806191, "grad_norm": 0.5884756822216776, "learning_rate": 0.00016750247396357936, "loss": 12.4354, "step": 10512 }, { "epoch": 0.5724748660772021, "grad_norm": 0.6719139096999517, "learning_rate": 0.0001674959676335204, "loss": 12.3141, "step": 10513 }, { "epoch": 0.5725293200737852, "grad_norm": 0.6136542932337656, "learning_rate": 0.0001674894607786015, "loss": 12.1397, "step": 10514 }, { "epoch": 0.5725837740703682, "grad_norm": 0.6274914061291436, "learning_rate": 0.00016748295339887327, "loss": 12.3897, "step": 10515 }, { "epoch": 0.5726382280669512, "grad_norm": 0.7877708430325404, "learning_rate": 0.00016747644549438623, "loss": 12.4755, "step": 10516 }, { "epoch": 0.5726926820635342, "grad_norm": 0.6202086631901177, "learning_rate": 0.00016746993706519105, "loss": 12.4147, "step": 10517 }, { "epoch": 0.5727471360601172, "grad_norm": 0.5956267922455494, "learning_rate": 0.0001674634281113383, "loss": 12.3638, "step": 10518 }, { "epoch": 0.5728015900567002, "grad_norm": 0.6043432463810966, "learning_rate": 0.00016745691863287866, "loss": 12.3319, "step": 10519 }, { "epoch": 0.5728560440532833, "grad_norm": 0.6873759483732338, "learning_rate": 0.0001674504086298627, "loss": 12.5279, "step": 10520 }, { "epoch": 0.5729104980498663, "grad_norm": 0.6071589858995201, "learning_rate": 0.00016744389810234103, "loss": 12.1313, "step": 10521 }, { "epoch": 0.5729649520464493, "grad_norm": 0.6503637091496456, "learning_rate": 0.00016743738705036432, "loss": 12.3438, "step": 10522 }, { "epoch": 0.5730194060430323, "grad_norm": 0.6086095458823582, "learning_rate": 0.00016743087547398315, "loss": 12.3626, "step": 10523 }, { "epoch": 0.5730738600396152, "grad_norm": 0.5756750286388267, "learning_rate": 0.00016742436337324821, "loss": 12.4315, "step": 10524 }, { "epoch": 0.5731283140361983, "grad_norm": 0.5641999601089314, "learning_rate": 0.00016741785074821013, "loss": 12.1776, "step": 10525 }, { "epoch": 0.5731827680327813, "grad_norm": 0.5547702884056563, "learning_rate": 0.00016741133759891948, "loss": 12.1544, "step": 10526 }, { "epoch": 0.5732372220293643, "grad_norm": 0.542306466498164, "learning_rate": 0.00016740482392542703, "loss": 12.4514, "step": 10527 }, { "epoch": 0.5732916760259473, "grad_norm": 0.6474642132436456, "learning_rate": 0.00016739830972778332, "loss": 12.4235, "step": 10528 }, { "epoch": 0.5733461300225303, "grad_norm": 0.6199669347079713, "learning_rate": 0.00016739179500603902, "loss": 12.2876, "step": 10529 }, { "epoch": 0.5734005840191133, "grad_norm": 0.5828210115421902, "learning_rate": 0.0001673852797602449, "loss": 12.2469, "step": 10530 }, { "epoch": 0.5734550380156964, "grad_norm": 0.6730918250509988, "learning_rate": 0.0001673787639904515, "loss": 12.4733, "step": 10531 }, { "epoch": 0.5735094920122794, "grad_norm": 0.6046063478226098, "learning_rate": 0.00016737224769670955, "loss": 12.3557, "step": 10532 }, { "epoch": 0.5735639460088624, "grad_norm": 0.6125427393316394, "learning_rate": 0.0001673657308790697, "loss": 12.3181, "step": 10533 }, { "epoch": 0.5736184000054454, "grad_norm": 0.6463772232187843, "learning_rate": 0.0001673592135375826, "loss": 12.4027, "step": 10534 }, { "epoch": 0.5736728540020284, "grad_norm": 0.6079419740833918, "learning_rate": 0.00016735269567229902, "loss": 12.5024, "step": 10535 }, { "epoch": 0.5737273079986114, "grad_norm": 0.6303049403018266, "learning_rate": 0.00016734617728326952, "loss": 12.3737, "step": 10536 }, { "epoch": 0.5737817619951945, "grad_norm": 0.6191188706847653, "learning_rate": 0.0001673396583705449, "loss": 12.4352, "step": 10537 }, { "epoch": 0.5738362159917775, "grad_norm": 0.64668220420118, "learning_rate": 0.00016733313893417575, "loss": 12.3668, "step": 10538 }, { "epoch": 0.5738906699883605, "grad_norm": 0.59202145795084, "learning_rate": 0.00016732661897421284, "loss": 12.4155, "step": 10539 }, { "epoch": 0.5739451239849435, "grad_norm": 0.6330526863126181, "learning_rate": 0.0001673200984907069, "loss": 12.3955, "step": 10540 }, { "epoch": 0.5739995779815265, "grad_norm": 0.6282798030727328, "learning_rate": 0.00016731357748370852, "loss": 12.2411, "step": 10541 }, { "epoch": 0.5740540319781094, "grad_norm": 0.5847016640651334, "learning_rate": 0.00016730705595326847, "loss": 12.2756, "step": 10542 }, { "epoch": 0.5741084859746926, "grad_norm": 0.6828398564940577, "learning_rate": 0.00016730053389943752, "loss": 12.3235, "step": 10543 }, { "epoch": 0.5741629399712755, "grad_norm": 0.6756906716831694, "learning_rate": 0.0001672940113222663, "loss": 12.3097, "step": 10544 }, { "epoch": 0.5742173939678585, "grad_norm": 0.5620571029291483, "learning_rate": 0.00016728748822180558, "loss": 12.4714, "step": 10545 }, { "epoch": 0.5742718479644415, "grad_norm": 0.763273675651076, "learning_rate": 0.00016728096459810605, "loss": 12.3871, "step": 10546 }, { "epoch": 0.5743263019610245, "grad_norm": 0.6695252802059574, "learning_rate": 0.00016727444045121844, "loss": 12.4432, "step": 10547 }, { "epoch": 0.5743807559576075, "grad_norm": 0.5481293077792526, "learning_rate": 0.00016726791578119352, "loss": 12.352, "step": 10548 }, { "epoch": 0.5744352099541906, "grad_norm": 0.7292923862086983, "learning_rate": 0.00016726139058808204, "loss": 12.3642, "step": 10549 }, { "epoch": 0.5744896639507736, "grad_norm": 0.6413924900299116, "learning_rate": 0.00016725486487193466, "loss": 12.4177, "step": 10550 }, { "epoch": 0.5745441179473566, "grad_norm": 0.6287623470629832, "learning_rate": 0.0001672483386328022, "loss": 12.4007, "step": 10551 }, { "epoch": 0.5745985719439396, "grad_norm": 0.6708372301718295, "learning_rate": 0.00016724181187073532, "loss": 12.4079, "step": 10552 }, { "epoch": 0.5746530259405226, "grad_norm": 0.6457982109614273, "learning_rate": 0.0001672352845857849, "loss": 12.4311, "step": 10553 }, { "epoch": 0.5747074799371056, "grad_norm": 0.612411900516974, "learning_rate": 0.00016722875677800163, "loss": 12.4046, "step": 10554 }, { "epoch": 0.5747619339336887, "grad_norm": 0.719502918505751, "learning_rate": 0.00016722222844743625, "loss": 12.3984, "step": 10555 }, { "epoch": 0.5748163879302717, "grad_norm": 0.5937007029444668, "learning_rate": 0.00016721569959413955, "loss": 12.3199, "step": 10556 }, { "epoch": 0.5748708419268547, "grad_norm": 0.6120740818602591, "learning_rate": 0.00016720917021816232, "loss": 12.175, "step": 10557 }, { "epoch": 0.5749252959234377, "grad_norm": 0.5793545511529767, "learning_rate": 0.00016720264031955528, "loss": 12.3855, "step": 10558 }, { "epoch": 0.5749797499200207, "grad_norm": 0.611501589018356, "learning_rate": 0.00016719610989836926, "loss": 12.4145, "step": 10559 }, { "epoch": 0.5750342039166038, "grad_norm": 0.5726434094961663, "learning_rate": 0.000167189578954655, "loss": 12.3545, "step": 10560 }, { "epoch": 0.5750886579131868, "grad_norm": 0.6006267957757423, "learning_rate": 0.00016718304748846333, "loss": 12.3245, "step": 10561 }, { "epoch": 0.5751431119097697, "grad_norm": 0.6673176404464749, "learning_rate": 0.000167176515499845, "loss": 12.3836, "step": 10562 }, { "epoch": 0.5751975659063527, "grad_norm": 0.5485759701750896, "learning_rate": 0.00016716998298885082, "loss": 12.3111, "step": 10563 }, { "epoch": 0.5752520199029357, "grad_norm": 0.6530685793868931, "learning_rate": 0.00016716344995553159, "loss": 12.2943, "step": 10564 }, { "epoch": 0.5753064738995187, "grad_norm": 0.6586676988577265, "learning_rate": 0.00016715691639993812, "loss": 12.2898, "step": 10565 }, { "epoch": 0.5753609278961018, "grad_norm": 0.6148634157182052, "learning_rate": 0.00016715038232212122, "loss": 12.4235, "step": 10566 }, { "epoch": 0.5754153818926848, "grad_norm": 0.597136346674799, "learning_rate": 0.00016714384772213166, "loss": 12.3168, "step": 10567 }, { "epoch": 0.5754698358892678, "grad_norm": 0.6297749668051609, "learning_rate": 0.0001671373126000203, "loss": 12.3847, "step": 10568 }, { "epoch": 0.5755242898858508, "grad_norm": 0.5814828821447569, "learning_rate": 0.0001671307769558379, "loss": 12.3572, "step": 10569 }, { "epoch": 0.5755787438824338, "grad_norm": 0.5795019959416221, "learning_rate": 0.00016712424078963535, "loss": 12.3285, "step": 10570 }, { "epoch": 0.5756331978790168, "grad_norm": 0.6514041703969438, "learning_rate": 0.00016711770410146343, "loss": 12.3869, "step": 10571 }, { "epoch": 0.5756876518755999, "grad_norm": 0.5718901998179207, "learning_rate": 0.00016711116689137302, "loss": 12.3213, "step": 10572 }, { "epoch": 0.5757421058721829, "grad_norm": 0.6235036946951503, "learning_rate": 0.0001671046291594149, "loss": 12.3728, "step": 10573 }, { "epoch": 0.5757965598687659, "grad_norm": 0.49537043109391665, "learning_rate": 0.00016709809090563991, "loss": 12.1873, "step": 10574 }, { "epoch": 0.5758510138653489, "grad_norm": 0.5739977173419183, "learning_rate": 0.00016709155213009895, "loss": 12.4136, "step": 10575 }, { "epoch": 0.5759054678619319, "grad_norm": 0.5843541261690129, "learning_rate": 0.0001670850128328428, "loss": 12.3284, "step": 10576 }, { "epoch": 0.5759599218585149, "grad_norm": 0.6166488364641032, "learning_rate": 0.00016707847301392236, "loss": 12.3386, "step": 10577 }, { "epoch": 0.576014375855098, "grad_norm": 0.6460908741074868, "learning_rate": 0.00016707193267338844, "loss": 12.4182, "step": 10578 }, { "epoch": 0.576068829851681, "grad_norm": 0.6129110230694653, "learning_rate": 0.00016706539181129195, "loss": 12.2851, "step": 10579 }, { "epoch": 0.576123283848264, "grad_norm": 0.6352271764388896, "learning_rate": 0.00016705885042768372, "loss": 12.3625, "step": 10580 }, { "epoch": 0.5761777378448469, "grad_norm": 0.7409091782385997, "learning_rate": 0.00016705230852261465, "loss": 12.4322, "step": 10581 }, { "epoch": 0.5762321918414299, "grad_norm": 0.5790484078560002, "learning_rate": 0.00016704576609613553, "loss": 12.3396, "step": 10582 }, { "epoch": 0.5762866458380129, "grad_norm": 0.5480996478430834, "learning_rate": 0.0001670392231482973, "loss": 12.2522, "step": 10583 }, { "epoch": 0.576341099834596, "grad_norm": 0.70971966257255, "learning_rate": 0.00016703267967915086, "loss": 12.3536, "step": 10584 }, { "epoch": 0.576395553831179, "grad_norm": 0.6783815309016988, "learning_rate": 0.00016702613568874702, "loss": 12.3734, "step": 10585 }, { "epoch": 0.576450007827762, "grad_norm": 0.6505384157020722, "learning_rate": 0.00016701959117713675, "loss": 12.4639, "step": 10586 }, { "epoch": 0.576504461824345, "grad_norm": 0.5584833546030031, "learning_rate": 0.0001670130461443709, "loss": 12.245, "step": 10587 }, { "epoch": 0.576558915820928, "grad_norm": 0.6178251227046433, "learning_rate": 0.00016700650059050035, "loss": 12.3666, "step": 10588 }, { "epoch": 0.576613369817511, "grad_norm": 0.6046084629237625, "learning_rate": 0.00016699995451557599, "loss": 12.3203, "step": 10589 }, { "epoch": 0.5766678238140941, "grad_norm": 0.6602116593538877, "learning_rate": 0.00016699340791964876, "loss": 12.3469, "step": 10590 }, { "epoch": 0.5767222778106771, "grad_norm": 0.5750978231757151, "learning_rate": 0.0001669868608027696, "loss": 12.2704, "step": 10591 }, { "epoch": 0.5767767318072601, "grad_norm": 0.713198247391486, "learning_rate": 0.00016698031316498933, "loss": 12.4092, "step": 10592 }, { "epoch": 0.5768311858038431, "grad_norm": 0.5734036929963306, "learning_rate": 0.00016697376500635894, "loss": 12.3519, "step": 10593 }, { "epoch": 0.5768856398004261, "grad_norm": 0.6086445680995939, "learning_rate": 0.00016696721632692926, "loss": 12.3711, "step": 10594 }, { "epoch": 0.5769400937970092, "grad_norm": 0.5706324360010867, "learning_rate": 0.00016696066712675134, "loss": 12.3824, "step": 10595 }, { "epoch": 0.5769945477935922, "grad_norm": 0.5712190011034245, "learning_rate": 0.000166954117405876, "loss": 12.1989, "step": 10596 }, { "epoch": 0.5770490017901752, "grad_norm": 0.597188553842159, "learning_rate": 0.00016694756716435427, "loss": 12.224, "step": 10597 }, { "epoch": 0.5771034557867581, "grad_norm": 0.5742012347288625, "learning_rate": 0.000166941016402237, "loss": 12.2663, "step": 10598 }, { "epoch": 0.5771579097833411, "grad_norm": 0.5841098572750677, "learning_rate": 0.00016693446511957514, "loss": 12.3919, "step": 10599 }, { "epoch": 0.5772123637799241, "grad_norm": 0.5455079817642827, "learning_rate": 0.00016692791331641968, "loss": 12.1867, "step": 10600 }, { "epoch": 0.5772668177765072, "grad_norm": 0.6024082007021208, "learning_rate": 0.00016692136099282154, "loss": 12.2927, "step": 10601 }, { "epoch": 0.5773212717730902, "grad_norm": 0.5426123737996211, "learning_rate": 0.0001669148081488317, "loss": 12.3577, "step": 10602 }, { "epoch": 0.5773757257696732, "grad_norm": 0.6663180089415239, "learning_rate": 0.00016690825478450104, "loss": 12.3949, "step": 10603 }, { "epoch": 0.5774301797662562, "grad_norm": 0.6024385898428207, "learning_rate": 0.0001669017008998806, "loss": 12.2587, "step": 10604 }, { "epoch": 0.5774846337628392, "grad_norm": 0.5818281069112449, "learning_rate": 0.0001668951464950213, "loss": 12.261, "step": 10605 }, { "epoch": 0.5775390877594222, "grad_norm": 0.5903345499037228, "learning_rate": 0.00016688859156997415, "loss": 12.2928, "step": 10606 }, { "epoch": 0.5775935417560053, "grad_norm": 0.5794801873580113, "learning_rate": 0.00016688203612479004, "loss": 12.4412, "step": 10607 }, { "epoch": 0.5776479957525883, "grad_norm": 0.6805587694466598, "learning_rate": 0.00016687548015952003, "loss": 12.3809, "step": 10608 }, { "epoch": 0.5777024497491713, "grad_norm": 0.6843568923556025, "learning_rate": 0.0001668689236742151, "loss": 12.3297, "step": 10609 }, { "epoch": 0.5777569037457543, "grad_norm": 0.6010521056935886, "learning_rate": 0.00016686236666892617, "loss": 12.3508, "step": 10610 }, { "epoch": 0.5778113577423373, "grad_norm": 0.5713679177009463, "learning_rate": 0.00016685580914370428, "loss": 12.4091, "step": 10611 }, { "epoch": 0.5778658117389203, "grad_norm": 0.6063980630363172, "learning_rate": 0.00016684925109860038, "loss": 12.3177, "step": 10612 }, { "epoch": 0.5779202657355034, "grad_norm": 0.613071108821596, "learning_rate": 0.0001668426925336655, "loss": 12.3598, "step": 10613 }, { "epoch": 0.5779747197320864, "grad_norm": 0.6988243533468197, "learning_rate": 0.0001668361334489507, "loss": 12.3623, "step": 10614 }, { "epoch": 0.5780291737286694, "grad_norm": 0.6237317929685072, "learning_rate": 0.00016682957384450684, "loss": 12.3544, "step": 10615 }, { "epoch": 0.5780836277252523, "grad_norm": 0.5961809283619698, "learning_rate": 0.00016682301372038504, "loss": 12.4062, "step": 10616 }, { "epoch": 0.5781380817218353, "grad_norm": 0.6010875834008084, "learning_rate": 0.00016681645307663626, "loss": 12.2882, "step": 10617 }, { "epoch": 0.5781925357184183, "grad_norm": 0.5755892550319603, "learning_rate": 0.00016680989191331157, "loss": 12.2412, "step": 10618 }, { "epoch": 0.5782469897150014, "grad_norm": 0.6412664803443131, "learning_rate": 0.0001668033302304619, "loss": 12.2868, "step": 10619 }, { "epoch": 0.5783014437115844, "grad_norm": 0.5556221992873542, "learning_rate": 0.00016679676802813838, "loss": 12.3292, "step": 10620 }, { "epoch": 0.5783558977081674, "grad_norm": 0.6912225305689517, "learning_rate": 0.00016679020530639197, "loss": 12.4867, "step": 10621 }, { "epoch": 0.5784103517047504, "grad_norm": 0.5984882630019411, "learning_rate": 0.00016678364206527372, "loss": 12.3977, "step": 10622 }, { "epoch": 0.5784648057013334, "grad_norm": 0.561838444976537, "learning_rate": 0.00016677707830483468, "loss": 12.3208, "step": 10623 }, { "epoch": 0.5785192596979164, "grad_norm": 0.6207495383056233, "learning_rate": 0.00016677051402512588, "loss": 12.3599, "step": 10624 }, { "epoch": 0.5785737136944995, "grad_norm": 0.5758976293841622, "learning_rate": 0.00016676394922619835, "loss": 12.2409, "step": 10625 }, { "epoch": 0.5786281676910825, "grad_norm": 0.5909502850088595, "learning_rate": 0.00016675738390810319, "loss": 12.3915, "step": 10626 }, { "epoch": 0.5786826216876655, "grad_norm": 0.6320800577473589, "learning_rate": 0.0001667508180708914, "loss": 12.3689, "step": 10627 }, { "epoch": 0.5787370756842485, "grad_norm": 0.5729497199492393, "learning_rate": 0.00016674425171461402, "loss": 12.2811, "step": 10628 }, { "epoch": 0.5787915296808315, "grad_norm": 0.6089854746204572, "learning_rate": 0.0001667376848393222, "loss": 12.3424, "step": 10629 }, { "epoch": 0.5788459836774146, "grad_norm": 0.6253594955335714, "learning_rate": 0.0001667311174450669, "loss": 12.3144, "step": 10630 }, { "epoch": 0.5789004376739976, "grad_norm": 0.5667608081147807, "learning_rate": 0.0001667245495318993, "loss": 12.3947, "step": 10631 }, { "epoch": 0.5789548916705806, "grad_norm": 0.6135287869538242, "learning_rate": 0.00016671798109987035, "loss": 12.3181, "step": 10632 }, { "epoch": 0.5790093456671636, "grad_norm": 0.5553515736538324, "learning_rate": 0.00016671141214903124, "loss": 12.2642, "step": 10633 }, { "epoch": 0.5790637996637465, "grad_norm": 0.5392317028369967, "learning_rate": 0.00016670484267943296, "loss": 12.43, "step": 10634 }, { "epoch": 0.5791182536603295, "grad_norm": 0.6236632127739922, "learning_rate": 0.00016669827269112666, "loss": 12.3473, "step": 10635 }, { "epoch": 0.5791727076569126, "grad_norm": 0.5955810877538578, "learning_rate": 0.00016669170218416342, "loss": 12.1647, "step": 10636 }, { "epoch": 0.5792271616534956, "grad_norm": 0.683404071019631, "learning_rate": 0.0001666851311585943, "loss": 12.4758, "step": 10637 }, { "epoch": 0.5792816156500786, "grad_norm": 0.5430831866083706, "learning_rate": 0.0001666785596144704, "loss": 12.1772, "step": 10638 }, { "epoch": 0.5793360696466616, "grad_norm": 0.6560979890502256, "learning_rate": 0.00016667198755184286, "loss": 12.3382, "step": 10639 }, { "epoch": 0.5793905236432446, "grad_norm": 0.5806297471494171, "learning_rate": 0.00016666541497076278, "loss": 12.28, "step": 10640 }, { "epoch": 0.5794449776398276, "grad_norm": 0.613292184104423, "learning_rate": 0.00016665884187128124, "loss": 12.4239, "step": 10641 }, { "epoch": 0.5794994316364107, "grad_norm": 0.7204491078949676, "learning_rate": 0.00016665226825344936, "loss": 12.3856, "step": 10642 }, { "epoch": 0.5795538856329937, "grad_norm": 0.5956727900025345, "learning_rate": 0.00016664569411731827, "loss": 12.248, "step": 10643 }, { "epoch": 0.5796083396295767, "grad_norm": 0.6538876553886454, "learning_rate": 0.00016663911946293908, "loss": 12.261, "step": 10644 }, { "epoch": 0.5796627936261597, "grad_norm": 0.6123826693547904, "learning_rate": 0.00016663254429036292, "loss": 12.3832, "step": 10645 }, { "epoch": 0.5797172476227427, "grad_norm": 0.6421627005585591, "learning_rate": 0.00016662596859964092, "loss": 12.3551, "step": 10646 }, { "epoch": 0.5797717016193257, "grad_norm": 0.6798698187958897, "learning_rate": 0.00016661939239082422, "loss": 12.4644, "step": 10647 }, { "epoch": 0.5798261556159088, "grad_norm": 0.620210117669651, "learning_rate": 0.00016661281566396395, "loss": 12.2705, "step": 10648 }, { "epoch": 0.5798806096124918, "grad_norm": 0.6167602633822272, "learning_rate": 0.00016660623841911127, "loss": 12.3554, "step": 10649 }, { "epoch": 0.5799350636090748, "grad_norm": 0.7079619021600241, "learning_rate": 0.0001665996606563173, "loss": 12.37, "step": 10650 }, { "epoch": 0.5799895176056578, "grad_norm": 0.6471379288128399, "learning_rate": 0.0001665930823756332, "loss": 12.2851, "step": 10651 }, { "epoch": 0.5800439716022407, "grad_norm": 0.6250926694948886, "learning_rate": 0.00016658650357711014, "loss": 12.5016, "step": 10652 }, { "epoch": 0.5800984255988237, "grad_norm": 0.7285422010970602, "learning_rate": 0.00016657992426079922, "loss": 12.3769, "step": 10653 }, { "epoch": 0.5801528795954068, "grad_norm": 0.5863535987053579, "learning_rate": 0.00016657334442675168, "loss": 12.2994, "step": 10654 }, { "epoch": 0.5802073335919898, "grad_norm": 1.0460052189248172, "learning_rate": 0.00016656676407501863, "loss": 12.3731, "step": 10655 }, { "epoch": 0.5802617875885728, "grad_norm": 0.6388253747460793, "learning_rate": 0.00016656018320565128, "loss": 12.3517, "step": 10656 }, { "epoch": 0.5803162415851558, "grad_norm": 0.6482402051013342, "learning_rate": 0.00016655360181870078, "loss": 12.1586, "step": 10657 }, { "epoch": 0.5803706955817388, "grad_norm": 0.6075242808500345, "learning_rate": 0.0001665470199142183, "loss": 12.2407, "step": 10658 }, { "epoch": 0.5804251495783219, "grad_norm": 0.5845619181963486, "learning_rate": 0.000166540437492255, "loss": 12.2608, "step": 10659 }, { "epoch": 0.5804796035749049, "grad_norm": 0.6034154716906985, "learning_rate": 0.00016653385455286213, "loss": 12.3674, "step": 10660 }, { "epoch": 0.5805340575714879, "grad_norm": 0.5829797356728805, "learning_rate": 0.0001665272710960909, "loss": 12.3774, "step": 10661 }, { "epoch": 0.5805885115680709, "grad_norm": 0.6855172700573546, "learning_rate": 0.00016652068712199239, "loss": 12.4455, "step": 10662 }, { "epoch": 0.5806429655646539, "grad_norm": 0.6075926608862877, "learning_rate": 0.00016651410263061786, "loss": 12.4299, "step": 10663 }, { "epoch": 0.5806974195612369, "grad_norm": 0.5648326355226941, "learning_rate": 0.00016650751762201855, "loss": 12.3492, "step": 10664 }, { "epoch": 0.58075187355782, "grad_norm": 0.6824578992264828, "learning_rate": 0.00016650093209624557, "loss": 12.3532, "step": 10665 }, { "epoch": 0.580806327554403, "grad_norm": 0.5804425579415122, "learning_rate": 0.00016649434605335025, "loss": 12.2374, "step": 10666 }, { "epoch": 0.580860781550986, "grad_norm": 0.6508938636771904, "learning_rate": 0.00016648775949338373, "loss": 12.4581, "step": 10667 }, { "epoch": 0.580915235547569, "grad_norm": 0.6252040919432668, "learning_rate": 0.00016648117241639722, "loss": 12.3138, "step": 10668 }, { "epoch": 0.580969689544152, "grad_norm": 0.6052652126208161, "learning_rate": 0.000166474584822442, "loss": 12.4502, "step": 10669 }, { "epoch": 0.581024143540735, "grad_norm": 0.6035779320123137, "learning_rate": 0.00016646799671156924, "loss": 12.2472, "step": 10670 }, { "epoch": 0.581078597537318, "grad_norm": 0.6325234440978983, "learning_rate": 0.00016646140808383023, "loss": 12.2745, "step": 10671 }, { "epoch": 0.581133051533901, "grad_norm": 0.5833686290661749, "learning_rate": 0.0001664548189392761, "loss": 12.3776, "step": 10672 }, { "epoch": 0.581187505530484, "grad_norm": 0.5924299333881107, "learning_rate": 0.00016644822927795817, "loss": 12.3912, "step": 10673 }, { "epoch": 0.581241959527067, "grad_norm": 0.5966865121409107, "learning_rate": 0.00016644163909992768, "loss": 12.2941, "step": 10674 }, { "epoch": 0.58129641352365, "grad_norm": 0.571458474944623, "learning_rate": 0.00016643504840523586, "loss": 12.2669, "step": 10675 }, { "epoch": 0.581350867520233, "grad_norm": 0.6325490664211092, "learning_rate": 0.00016642845719393398, "loss": 12.2097, "step": 10676 }, { "epoch": 0.5814053215168161, "grad_norm": 0.5714605500093208, "learning_rate": 0.00016642186546607322, "loss": 12.2973, "step": 10677 }, { "epoch": 0.5814597755133991, "grad_norm": 0.5435175392981482, "learning_rate": 0.00016641527322170494, "loss": 12.315, "step": 10678 }, { "epoch": 0.5815142295099821, "grad_norm": 0.5737258722843944, "learning_rate": 0.00016640868046088037, "loss": 12.325, "step": 10679 }, { "epoch": 0.5815686835065651, "grad_norm": 0.7006617008446661, "learning_rate": 0.00016640208718365074, "loss": 12.5486, "step": 10680 }, { "epoch": 0.5816231375031481, "grad_norm": 0.6174886267902199, "learning_rate": 0.00016639549339006736, "loss": 12.3535, "step": 10681 }, { "epoch": 0.5816775914997311, "grad_norm": 0.6928596384395789, "learning_rate": 0.00016638889908018146, "loss": 12.2791, "step": 10682 }, { "epoch": 0.5817320454963142, "grad_norm": 0.6084022066172773, "learning_rate": 0.00016638230425404437, "loss": 12.3526, "step": 10683 }, { "epoch": 0.5817864994928972, "grad_norm": 0.5687893948538565, "learning_rate": 0.00016637570891170732, "loss": 12.4278, "step": 10684 }, { "epoch": 0.5818409534894802, "grad_norm": 0.6759339144459159, "learning_rate": 0.00016636911305322168, "loss": 12.366, "step": 10685 }, { "epoch": 0.5818954074860632, "grad_norm": 0.6087122523927804, "learning_rate": 0.00016636251667863868, "loss": 12.3715, "step": 10686 }, { "epoch": 0.5819498614826462, "grad_norm": 0.6395203659277144, "learning_rate": 0.00016635591978800957, "loss": 12.2675, "step": 10687 }, { "epoch": 0.5820043154792292, "grad_norm": 0.7568721198329359, "learning_rate": 0.00016634932238138574, "loss": 12.435, "step": 10688 }, { "epoch": 0.5820587694758123, "grad_norm": 0.6359199886095321, "learning_rate": 0.00016634272445881844, "loss": 12.3296, "step": 10689 }, { "epoch": 0.5821132234723952, "grad_norm": 0.6054270859888692, "learning_rate": 0.000166336126020359, "loss": 12.3969, "step": 10690 }, { "epoch": 0.5821676774689782, "grad_norm": 0.6476557577065587, "learning_rate": 0.0001663295270660587, "loss": 12.3727, "step": 10691 }, { "epoch": 0.5822221314655612, "grad_norm": 0.548446163354268, "learning_rate": 0.0001663229275959689, "loss": 12.1942, "step": 10692 }, { "epoch": 0.5822765854621442, "grad_norm": 0.5874516079699055, "learning_rate": 0.00016631632761014088, "loss": 12.294, "step": 10693 }, { "epoch": 0.5823310394587273, "grad_norm": 0.7223823835601816, "learning_rate": 0.00016630972710862595, "loss": 12.3554, "step": 10694 }, { "epoch": 0.5823854934553103, "grad_norm": 0.6923158838651609, "learning_rate": 0.0001663031260914755, "loss": 12.4702, "step": 10695 }, { "epoch": 0.5824399474518933, "grad_norm": 0.6265117547125986, "learning_rate": 0.0001662965245587408, "loss": 12.266, "step": 10696 }, { "epoch": 0.5824944014484763, "grad_norm": 0.6029729478880793, "learning_rate": 0.00016628992251047322, "loss": 12.3511, "step": 10697 }, { "epoch": 0.5825488554450593, "grad_norm": 0.5424235117126971, "learning_rate": 0.00016628331994672407, "loss": 12.2765, "step": 10698 }, { "epoch": 0.5826033094416423, "grad_norm": 0.6365971163700855, "learning_rate": 0.00016627671686754471, "loss": 12.447, "step": 10699 }, { "epoch": 0.5826577634382254, "grad_norm": 0.6047554713915011, "learning_rate": 0.00016627011327298652, "loss": 12.2762, "step": 10700 }, { "epoch": 0.5827122174348084, "grad_norm": 0.635240192156098, "learning_rate": 0.00016626350916310078, "loss": 12.3235, "step": 10701 }, { "epoch": 0.5827666714313914, "grad_norm": 0.6115231624219086, "learning_rate": 0.00016625690453793887, "loss": 12.3012, "step": 10702 }, { "epoch": 0.5828211254279744, "grad_norm": 0.6807057914114546, "learning_rate": 0.0001662502993975522, "loss": 12.2399, "step": 10703 }, { "epoch": 0.5828755794245574, "grad_norm": 0.6231743295966256, "learning_rate": 0.00016624369374199205, "loss": 12.2065, "step": 10704 }, { "epoch": 0.5829300334211404, "grad_norm": 0.620544586110963, "learning_rate": 0.00016623708757130986, "loss": 12.3695, "step": 10705 }, { "epoch": 0.5829844874177235, "grad_norm": 0.5689139742984867, "learning_rate": 0.00016623048088555695, "loss": 12.3346, "step": 10706 }, { "epoch": 0.5830389414143065, "grad_norm": 0.5860319651455662, "learning_rate": 0.0001662238736847847, "loss": 12.4899, "step": 10707 }, { "epoch": 0.5830933954108894, "grad_norm": 0.6495902268166277, "learning_rate": 0.00016621726596904456, "loss": 12.3605, "step": 10708 }, { "epoch": 0.5831478494074724, "grad_norm": 0.6258170212278469, "learning_rate": 0.00016621065773838779, "loss": 12.1947, "step": 10709 }, { "epoch": 0.5832023034040554, "grad_norm": 0.6346930520938242, "learning_rate": 0.00016620404899286587, "loss": 12.4736, "step": 10710 }, { "epoch": 0.5832567574006384, "grad_norm": 0.6556064557353416, "learning_rate": 0.00016619743973253018, "loss": 12.3694, "step": 10711 }, { "epoch": 0.5833112113972215, "grad_norm": 0.6348183258851631, "learning_rate": 0.0001661908299574321, "loss": 12.3759, "step": 10712 }, { "epoch": 0.5833656653938045, "grad_norm": 0.5517331420369923, "learning_rate": 0.00016618421966762298, "loss": 12.2347, "step": 10713 }, { "epoch": 0.5834201193903875, "grad_norm": 0.6066742407789687, "learning_rate": 0.0001661776088631543, "loss": 12.5373, "step": 10714 }, { "epoch": 0.5834745733869705, "grad_norm": 0.5705601878893244, "learning_rate": 0.00016617099754407744, "loss": 12.3859, "step": 10715 }, { "epoch": 0.5835290273835535, "grad_norm": 0.6543925268915343, "learning_rate": 0.00016616438571044379, "loss": 12.2492, "step": 10716 }, { "epoch": 0.5835834813801365, "grad_norm": 0.559740718934576, "learning_rate": 0.0001661577733623048, "loss": 12.3315, "step": 10717 }, { "epoch": 0.5836379353767196, "grad_norm": 0.7204271917294793, "learning_rate": 0.00016615116049971184, "loss": 12.3104, "step": 10718 }, { "epoch": 0.5836923893733026, "grad_norm": 0.5570670501316027, "learning_rate": 0.0001661445471227164, "loss": 12.2308, "step": 10719 }, { "epoch": 0.5837468433698856, "grad_norm": 0.9876212452291068, "learning_rate": 0.00016613793323136983, "loss": 12.3422, "step": 10720 }, { "epoch": 0.5838012973664686, "grad_norm": 0.5470216742140983, "learning_rate": 0.00016613131882572363, "loss": 12.2743, "step": 10721 }, { "epoch": 0.5838557513630516, "grad_norm": 0.6491742554543132, "learning_rate": 0.00016612470390582918, "loss": 12.4555, "step": 10722 }, { "epoch": 0.5839102053596346, "grad_norm": 0.5823548501056763, "learning_rate": 0.00016611808847173798, "loss": 12.31, "step": 10723 }, { "epoch": 0.5839646593562177, "grad_norm": 0.6165231576326428, "learning_rate": 0.00016611147252350137, "loss": 12.2579, "step": 10724 }, { "epoch": 0.5840191133528007, "grad_norm": 0.6820426398894804, "learning_rate": 0.00016610485606117093, "loss": 12.3685, "step": 10725 }, { "epoch": 0.5840735673493836, "grad_norm": 0.5495775000325852, "learning_rate": 0.00016609823908479804, "loss": 12.3707, "step": 10726 }, { "epoch": 0.5841280213459666, "grad_norm": 0.7082474221272369, "learning_rate": 0.00016609162159443412, "loss": 12.3602, "step": 10727 }, { "epoch": 0.5841824753425496, "grad_norm": 0.5726077459516642, "learning_rate": 0.0001660850035901307, "loss": 12.3536, "step": 10728 }, { "epoch": 0.5842369293391327, "grad_norm": 0.6416664026749477, "learning_rate": 0.00016607838507193918, "loss": 12.3802, "step": 10729 }, { "epoch": 0.5842913833357157, "grad_norm": 0.5706655013876935, "learning_rate": 0.00016607176603991106, "loss": 12.3543, "step": 10730 }, { "epoch": 0.5843458373322987, "grad_norm": 0.6420489842793392, "learning_rate": 0.00016606514649409782, "loss": 12.3892, "step": 10731 }, { "epoch": 0.5844002913288817, "grad_norm": 0.5834620300852785, "learning_rate": 0.00016605852643455094, "loss": 12.275, "step": 10732 }, { "epoch": 0.5844547453254647, "grad_norm": 0.5570349802673065, "learning_rate": 0.00016605190586132184, "loss": 12.4122, "step": 10733 }, { "epoch": 0.5845091993220477, "grad_norm": 0.6062036403931096, "learning_rate": 0.00016604528477446207, "loss": 12.2577, "step": 10734 }, { "epoch": 0.5845636533186308, "grad_norm": 0.6008188923462245, "learning_rate": 0.00016603866317402307, "loss": 12.4129, "step": 10735 }, { "epoch": 0.5846181073152138, "grad_norm": 0.5870953040476993, "learning_rate": 0.00016603204106005638, "loss": 12.3131, "step": 10736 }, { "epoch": 0.5846725613117968, "grad_norm": 0.5775654571884085, "learning_rate": 0.00016602541843261347, "loss": 12.1748, "step": 10737 }, { "epoch": 0.5847270153083798, "grad_norm": 0.6057726613370205, "learning_rate": 0.0001660187952917458, "loss": 12.3621, "step": 10738 }, { "epoch": 0.5847814693049628, "grad_norm": 0.6902081706570492, "learning_rate": 0.00016601217163750488, "loss": 12.5363, "step": 10739 }, { "epoch": 0.5848359233015458, "grad_norm": 0.577736725073556, "learning_rate": 0.0001660055474699423, "loss": 12.3637, "step": 10740 }, { "epoch": 0.5848903772981289, "grad_norm": 0.5566594113188544, "learning_rate": 0.0001659989227891095, "loss": 12.3016, "step": 10741 }, { "epoch": 0.5849448312947119, "grad_norm": 0.5903814894869801, "learning_rate": 0.000165992297595058, "loss": 12.2161, "step": 10742 }, { "epoch": 0.5849992852912949, "grad_norm": 0.6212620436447776, "learning_rate": 0.00016598567188783934, "loss": 12.2501, "step": 10743 }, { "epoch": 0.5850537392878778, "grad_norm": 0.6641309249568489, "learning_rate": 0.000165979045667505, "loss": 12.3462, "step": 10744 }, { "epoch": 0.5851081932844608, "grad_norm": 0.6597269040271564, "learning_rate": 0.00016597241893410658, "loss": 12.4123, "step": 10745 }, { "epoch": 0.5851626472810438, "grad_norm": 0.6040301077382447, "learning_rate": 0.00016596579168769553, "loss": 12.328, "step": 10746 }, { "epoch": 0.5852171012776269, "grad_norm": 0.6346788135306696, "learning_rate": 0.00016595916392832342, "loss": 12.4054, "step": 10747 }, { "epoch": 0.5852715552742099, "grad_norm": 0.6213996954737067, "learning_rate": 0.00016595253565604182, "loss": 12.2539, "step": 10748 }, { "epoch": 0.5853260092707929, "grad_norm": 0.7113686564804004, "learning_rate": 0.00016594590687090224, "loss": 12.3629, "step": 10749 }, { "epoch": 0.5853804632673759, "grad_norm": 0.6576936109864955, "learning_rate": 0.0001659392775729562, "loss": 12.3767, "step": 10750 }, { "epoch": 0.5854349172639589, "grad_norm": 0.6351268245810511, "learning_rate": 0.00016593264776225528, "loss": 12.4524, "step": 10751 }, { "epoch": 0.5854893712605419, "grad_norm": 0.7243956423960791, "learning_rate": 0.00016592601743885106, "loss": 12.2262, "step": 10752 }, { "epoch": 0.585543825257125, "grad_norm": 0.5824637034568017, "learning_rate": 0.00016591938660279507, "loss": 12.1616, "step": 10753 }, { "epoch": 0.585598279253708, "grad_norm": 0.6756771306162614, "learning_rate": 0.00016591275525413887, "loss": 12.3993, "step": 10754 }, { "epoch": 0.585652733250291, "grad_norm": 0.6143731101255684, "learning_rate": 0.00016590612339293403, "loss": 12.2452, "step": 10755 }, { "epoch": 0.585707187246874, "grad_norm": 0.6759993024361596, "learning_rate": 0.0001658994910192321, "loss": 12.3465, "step": 10756 }, { "epoch": 0.585761641243457, "grad_norm": 0.5906562704215874, "learning_rate": 0.0001658928581330847, "loss": 12.3051, "step": 10757 }, { "epoch": 0.58581609524004, "grad_norm": 0.7311117839177587, "learning_rate": 0.0001658862247345434, "loss": 12.3553, "step": 10758 }, { "epoch": 0.5858705492366231, "grad_norm": 0.5858251134557313, "learning_rate": 0.00016587959082365976, "loss": 12.4093, "step": 10759 }, { "epoch": 0.5859250032332061, "grad_norm": 0.6897485784356513, "learning_rate": 0.00016587295640048535, "loss": 12.2952, "step": 10760 }, { "epoch": 0.5859794572297891, "grad_norm": 0.6207767171834111, "learning_rate": 0.0001658663214650718, "loss": 12.1458, "step": 10761 }, { "epoch": 0.586033911226372, "grad_norm": 0.6302702106318713, "learning_rate": 0.0001658596860174707, "loss": 12.352, "step": 10762 }, { "epoch": 0.586088365222955, "grad_norm": 0.6097753042029566, "learning_rate": 0.0001658530500577336, "loss": 12.4807, "step": 10763 }, { "epoch": 0.5861428192195381, "grad_norm": 0.691173923513651, "learning_rate": 0.00016584641358591217, "loss": 12.3892, "step": 10764 }, { "epoch": 0.5861972732161211, "grad_norm": 0.6443847707271058, "learning_rate": 0.00016583977660205798, "loss": 12.4726, "step": 10765 }, { "epoch": 0.5862517272127041, "grad_norm": 0.5835580112949047, "learning_rate": 0.00016583313910622263, "loss": 12.3632, "step": 10766 }, { "epoch": 0.5863061812092871, "grad_norm": 0.5751536494120262, "learning_rate": 0.00016582650109845778, "loss": 12.5108, "step": 10767 }, { "epoch": 0.5863606352058701, "grad_norm": 0.6089091834016841, "learning_rate": 0.00016581986257881498, "loss": 12.2119, "step": 10768 }, { "epoch": 0.5864150892024531, "grad_norm": 0.5850101068908325, "learning_rate": 0.00016581322354734592, "loss": 12.4388, "step": 10769 }, { "epoch": 0.5864695431990362, "grad_norm": 0.6138029272911438, "learning_rate": 0.0001658065840041022, "loss": 12.3174, "step": 10770 }, { "epoch": 0.5865239971956192, "grad_norm": 0.5908410281167666, "learning_rate": 0.00016579994394913544, "loss": 12.4531, "step": 10771 }, { "epoch": 0.5865784511922022, "grad_norm": 0.5352941946472402, "learning_rate": 0.00016579330338249728, "loss": 12.3342, "step": 10772 }, { "epoch": 0.5866329051887852, "grad_norm": 0.5648190561349754, "learning_rate": 0.00016578666230423935, "loss": 12.2958, "step": 10773 }, { "epoch": 0.5866873591853682, "grad_norm": 0.6392221609775258, "learning_rate": 0.0001657800207144133, "loss": 12.4991, "step": 10774 }, { "epoch": 0.5867418131819512, "grad_norm": 0.6129857611687894, "learning_rate": 0.00016577337861307077, "loss": 12.4036, "step": 10775 }, { "epoch": 0.5867962671785343, "grad_norm": 0.627119067624158, "learning_rate": 0.00016576673600026345, "loss": 12.3447, "step": 10776 }, { "epoch": 0.5868507211751173, "grad_norm": 0.5847458260011068, "learning_rate": 0.00016576009287604294, "loss": 12.3191, "step": 10777 }, { "epoch": 0.5869051751717003, "grad_norm": 0.6644982665189451, "learning_rate": 0.00016575344924046092, "loss": 12.3171, "step": 10778 }, { "epoch": 0.5869596291682833, "grad_norm": 0.6030409735792923, "learning_rate": 0.00016574680509356908, "loss": 12.3177, "step": 10779 }, { "epoch": 0.5870140831648663, "grad_norm": 0.639646300876133, "learning_rate": 0.00016574016043541901, "loss": 12.3706, "step": 10780 }, { "epoch": 0.5870685371614492, "grad_norm": 0.6364669286753462, "learning_rate": 0.00016573351526606248, "loss": 12.3207, "step": 10781 }, { "epoch": 0.5871229911580323, "grad_norm": 0.6027203188028022, "learning_rate": 0.00016572686958555107, "loss": 12.3687, "step": 10782 }, { "epoch": 0.5871774451546153, "grad_norm": 0.586665394642215, "learning_rate": 0.00016572022339393652, "loss": 12.1757, "step": 10783 }, { "epoch": 0.5872318991511983, "grad_norm": 0.5734703142469153, "learning_rate": 0.00016571357669127048, "loss": 12.3066, "step": 10784 }, { "epoch": 0.5872863531477813, "grad_norm": 0.6073111293302085, "learning_rate": 0.00016570692947760464, "loss": 12.3468, "step": 10785 }, { "epoch": 0.5873408071443643, "grad_norm": 0.6017266465861099, "learning_rate": 0.00016570028175299072, "loss": 12.0879, "step": 10786 }, { "epoch": 0.5873952611409473, "grad_norm": 0.6343969140689849, "learning_rate": 0.0001656936335174804, "loss": 12.3714, "step": 10787 }, { "epoch": 0.5874497151375304, "grad_norm": 0.6209890963122027, "learning_rate": 0.00016568698477112533, "loss": 12.4377, "step": 10788 }, { "epoch": 0.5875041691341134, "grad_norm": 0.5831684736009015, "learning_rate": 0.00016568033551397728, "loss": 12.3306, "step": 10789 }, { "epoch": 0.5875586231306964, "grad_norm": 0.6591646439527126, "learning_rate": 0.00016567368574608792, "loss": 12.4348, "step": 10790 }, { "epoch": 0.5876130771272794, "grad_norm": 0.6084255226376905, "learning_rate": 0.00016566703546750896, "loss": 12.2603, "step": 10791 }, { "epoch": 0.5876675311238624, "grad_norm": 0.5709180561818189, "learning_rate": 0.00016566038467829213, "loss": 12.384, "step": 10792 }, { "epoch": 0.5877219851204454, "grad_norm": 0.6491470129190081, "learning_rate": 0.00016565373337848913, "loss": 12.4071, "step": 10793 }, { "epoch": 0.5877764391170285, "grad_norm": 0.6136482109577118, "learning_rate": 0.00016564708156815167, "loss": 12.2885, "step": 10794 }, { "epoch": 0.5878308931136115, "grad_norm": 0.5805410890498897, "learning_rate": 0.00016564042924733152, "loss": 12.3627, "step": 10795 }, { "epoch": 0.5878853471101945, "grad_norm": 0.5737420997967682, "learning_rate": 0.00016563377641608037, "loss": 12.336, "step": 10796 }, { "epoch": 0.5879398011067775, "grad_norm": 0.6520150115053743, "learning_rate": 0.00016562712307445, "loss": 12.3845, "step": 10797 }, { "epoch": 0.5879942551033605, "grad_norm": 0.5972841491680021, "learning_rate": 0.0001656204692224921, "loss": 12.3759, "step": 10798 }, { "epoch": 0.5880487090999436, "grad_norm": 0.5493871853747994, "learning_rate": 0.0001656138148602584, "loss": 12.1783, "step": 10799 }, { "epoch": 0.5881031630965265, "grad_norm": 0.5822890201774393, "learning_rate": 0.0001656071599878007, "loss": 12.2892, "step": 10800 }, { "epoch": 0.5881576170931095, "grad_norm": 0.6324818215286, "learning_rate": 0.0001656005046051707, "loss": 12.3577, "step": 10801 }, { "epoch": 0.5882120710896925, "grad_norm": 0.5642795838039929, "learning_rate": 0.0001655938487124202, "loss": 12.3569, "step": 10802 }, { "epoch": 0.5882665250862755, "grad_norm": 0.6910574345507834, "learning_rate": 0.00016558719230960094, "loss": 12.4795, "step": 10803 }, { "epoch": 0.5883209790828585, "grad_norm": 0.542802476435009, "learning_rate": 0.00016558053539676463, "loss": 12.2123, "step": 10804 }, { "epoch": 0.5883754330794416, "grad_norm": 0.6043617230353788, "learning_rate": 0.0001655738779739631, "loss": 12.3511, "step": 10805 }, { "epoch": 0.5884298870760246, "grad_norm": 0.5661938081811106, "learning_rate": 0.0001655672200412481, "loss": 12.3841, "step": 10806 }, { "epoch": 0.5884843410726076, "grad_norm": 0.5513840276449579, "learning_rate": 0.0001655605615986714, "loss": 12.3578, "step": 10807 }, { "epoch": 0.5885387950691906, "grad_norm": 0.5935567757194304, "learning_rate": 0.00016555390264628482, "loss": 12.2641, "step": 10808 }, { "epoch": 0.5885932490657736, "grad_norm": 0.6094467186140181, "learning_rate": 0.00016554724318414005, "loss": 12.3186, "step": 10809 }, { "epoch": 0.5886477030623566, "grad_norm": 0.587326969918712, "learning_rate": 0.00016554058321228892, "loss": 12.3582, "step": 10810 }, { "epoch": 0.5887021570589397, "grad_norm": 0.605656258922736, "learning_rate": 0.00016553392273078324, "loss": 12.3631, "step": 10811 }, { "epoch": 0.5887566110555227, "grad_norm": 0.5416599548400733, "learning_rate": 0.00016552726173967478, "loss": 12.1228, "step": 10812 }, { "epoch": 0.5888110650521057, "grad_norm": 0.6324066438312699, "learning_rate": 0.00016552060023901537, "loss": 12.368, "step": 10813 }, { "epoch": 0.5888655190486887, "grad_norm": 0.5809986823631331, "learning_rate": 0.00016551393822885675, "loss": 12.251, "step": 10814 }, { "epoch": 0.5889199730452717, "grad_norm": 0.5715275065882698, "learning_rate": 0.00016550727570925077, "loss": 12.3118, "step": 10815 }, { "epoch": 0.5889744270418547, "grad_norm": 0.5556233171285592, "learning_rate": 0.00016550061268024924, "loss": 12.3867, "step": 10816 }, { "epoch": 0.5890288810384378, "grad_norm": 0.551949562675175, "learning_rate": 0.00016549394914190394, "loss": 12.2687, "step": 10817 }, { "epoch": 0.5890833350350207, "grad_norm": 0.6604225135995097, "learning_rate": 0.0001654872850942667, "loss": 12.2866, "step": 10818 }, { "epoch": 0.5891377890316037, "grad_norm": 0.6122826288090935, "learning_rate": 0.00016548062053738943, "loss": 12.3139, "step": 10819 }, { "epoch": 0.5891922430281867, "grad_norm": 0.6336015057735573, "learning_rate": 0.00016547395547132377, "loss": 12.3545, "step": 10820 }, { "epoch": 0.5892466970247697, "grad_norm": 0.6077660242831809, "learning_rate": 0.0001654672898961217, "loss": 12.4284, "step": 10821 }, { "epoch": 0.5893011510213527, "grad_norm": 0.5933924734135819, "learning_rate": 0.00016546062381183504, "loss": 12.3477, "step": 10822 }, { "epoch": 0.5893556050179358, "grad_norm": 0.5704821578368983, "learning_rate": 0.00016545395721851553, "loss": 12.3658, "step": 10823 }, { "epoch": 0.5894100590145188, "grad_norm": 0.5531890981277237, "learning_rate": 0.00016544729011621509, "loss": 12.1956, "step": 10824 }, { "epoch": 0.5894645130111018, "grad_norm": 0.6078577362006647, "learning_rate": 0.00016544062250498556, "loss": 12.1014, "step": 10825 }, { "epoch": 0.5895189670076848, "grad_norm": 0.6041162891948126, "learning_rate": 0.00016543395438487876, "loss": 12.3498, "step": 10826 }, { "epoch": 0.5895734210042678, "grad_norm": 0.59779517633144, "learning_rate": 0.00016542728575594657, "loss": 12.3556, "step": 10827 }, { "epoch": 0.5896278750008509, "grad_norm": 0.6802641767919028, "learning_rate": 0.00016542061661824081, "loss": 12.3733, "step": 10828 }, { "epoch": 0.5896823289974339, "grad_norm": 0.6133795427116091, "learning_rate": 0.0001654139469718134, "loss": 12.3098, "step": 10829 }, { "epoch": 0.5897367829940169, "grad_norm": 0.625457286881729, "learning_rate": 0.00016540727681671613, "loss": 12.326, "step": 10830 }, { "epoch": 0.5897912369905999, "grad_norm": 0.5949393486441156, "learning_rate": 0.00016540060615300096, "loss": 12.2548, "step": 10831 }, { "epoch": 0.5898456909871829, "grad_norm": 0.5972619721684153, "learning_rate": 0.00016539393498071967, "loss": 12.2434, "step": 10832 }, { "epoch": 0.5899001449837659, "grad_norm": 0.6253120159387654, "learning_rate": 0.00016538726329992418, "loss": 12.392, "step": 10833 }, { "epoch": 0.589954598980349, "grad_norm": 0.6160790096062935, "learning_rate": 0.00016538059111066635, "loss": 12.4191, "step": 10834 }, { "epoch": 0.590009052976932, "grad_norm": 0.6596655883939287, "learning_rate": 0.0001653739184129981, "loss": 12.4099, "step": 10835 }, { "epoch": 0.590063506973515, "grad_norm": 0.5650329151645308, "learning_rate": 0.0001653672452069713, "loss": 12.2878, "step": 10836 }, { "epoch": 0.5901179609700979, "grad_norm": 0.582767331234623, "learning_rate": 0.00016536057149263783, "loss": 12.3784, "step": 10837 }, { "epoch": 0.5901724149666809, "grad_norm": 0.6578672810819257, "learning_rate": 0.0001653538972700496, "loss": 12.3418, "step": 10838 }, { "epoch": 0.5902268689632639, "grad_norm": 0.5438566298833007, "learning_rate": 0.0001653472225392585, "loss": 12.3361, "step": 10839 }, { "epoch": 0.590281322959847, "grad_norm": 0.5609747831008461, "learning_rate": 0.00016534054730031643, "loss": 12.4159, "step": 10840 }, { "epoch": 0.59033577695643, "grad_norm": 0.5772256672982281, "learning_rate": 0.00016533387155327533, "loss": 12.3209, "step": 10841 }, { "epoch": 0.590390230953013, "grad_norm": 0.564930919863431, "learning_rate": 0.00016532719529818704, "loss": 12.1613, "step": 10842 }, { "epoch": 0.590444684949596, "grad_norm": 0.5629067263774307, "learning_rate": 0.00016532051853510358, "loss": 12.4296, "step": 10843 }, { "epoch": 0.590499138946179, "grad_norm": 0.5442203642637816, "learning_rate": 0.00016531384126407681, "loss": 12.3441, "step": 10844 }, { "epoch": 0.590553592942762, "grad_norm": 0.6046789600876962, "learning_rate": 0.00016530716348515863, "loss": 12.3517, "step": 10845 }, { "epoch": 0.5906080469393451, "grad_norm": 0.580830860122665, "learning_rate": 0.000165300485198401, "loss": 12.2095, "step": 10846 }, { "epoch": 0.5906625009359281, "grad_norm": 0.6466629974220863, "learning_rate": 0.00016529380640385589, "loss": 12.4153, "step": 10847 }, { "epoch": 0.5907169549325111, "grad_norm": 0.6339639913605484, "learning_rate": 0.00016528712710157513, "loss": 12.4205, "step": 10848 }, { "epoch": 0.5907714089290941, "grad_norm": 0.6195441409327394, "learning_rate": 0.00016528044729161075, "loss": 12.2604, "step": 10849 }, { "epoch": 0.5908258629256771, "grad_norm": 0.6118504980387967, "learning_rate": 0.00016527376697401464, "loss": 12.3924, "step": 10850 }, { "epoch": 0.5908803169222601, "grad_norm": 0.5882431282833214, "learning_rate": 0.0001652670861488388, "loss": 12.2959, "step": 10851 }, { "epoch": 0.5909347709188432, "grad_norm": 0.6832713314144646, "learning_rate": 0.00016526040481613515, "loss": 12.3463, "step": 10852 }, { "epoch": 0.5909892249154262, "grad_norm": 0.6671222847117588, "learning_rate": 0.00016525372297595563, "loss": 12.3169, "step": 10853 }, { "epoch": 0.5910436789120092, "grad_norm": 0.5630347120264059, "learning_rate": 0.0001652470406283522, "loss": 12.3274, "step": 10854 }, { "epoch": 0.5910981329085921, "grad_norm": 0.69419863059305, "learning_rate": 0.0001652403577733769, "loss": 12.4147, "step": 10855 }, { "epoch": 0.5911525869051751, "grad_norm": 0.6071255184021517, "learning_rate": 0.0001652336744110816, "loss": 12.2184, "step": 10856 }, { "epoch": 0.5912070409017581, "grad_norm": 0.6381170907717836, "learning_rate": 0.0001652269905415183, "loss": 12.4493, "step": 10857 }, { "epoch": 0.5912614948983412, "grad_norm": 0.6156674336316467, "learning_rate": 0.000165220306164739, "loss": 12.4876, "step": 10858 }, { "epoch": 0.5913159488949242, "grad_norm": 0.6300556219877623, "learning_rate": 0.00016521362128079568, "loss": 12.4234, "step": 10859 }, { "epoch": 0.5913704028915072, "grad_norm": 0.5903640259607501, "learning_rate": 0.00016520693588974026, "loss": 12.3806, "step": 10860 }, { "epoch": 0.5914248568880902, "grad_norm": 0.6030768851262162, "learning_rate": 0.0001652002499916248, "loss": 12.2749, "step": 10861 }, { "epoch": 0.5914793108846732, "grad_norm": 0.5589972880187812, "learning_rate": 0.00016519356358650125, "loss": 12.2358, "step": 10862 }, { "epoch": 0.5915337648812563, "grad_norm": 0.6032709667447022, "learning_rate": 0.00016518687667442164, "loss": 12.352, "step": 10863 }, { "epoch": 0.5915882188778393, "grad_norm": 0.5754656477262209, "learning_rate": 0.00016518018925543791, "loss": 12.3834, "step": 10864 }, { "epoch": 0.5916426728744223, "grad_norm": 0.7367348500947619, "learning_rate": 0.0001651735013296021, "loss": 12.4114, "step": 10865 }, { "epoch": 0.5916971268710053, "grad_norm": 0.5771815214137505, "learning_rate": 0.00016516681289696625, "loss": 12.2369, "step": 10866 }, { "epoch": 0.5917515808675883, "grad_norm": 0.5986111921661764, "learning_rate": 0.00016516012395758231, "loss": 12.2583, "step": 10867 }, { "epoch": 0.5918060348641713, "grad_norm": 0.7461476212901754, "learning_rate": 0.00016515343451150232, "loss": 12.3905, "step": 10868 }, { "epoch": 0.5918604888607544, "grad_norm": 0.6240151613156143, "learning_rate": 0.0001651467445587783, "loss": 12.3152, "step": 10869 }, { "epoch": 0.5919149428573374, "grad_norm": 0.6227323246109967, "learning_rate": 0.00016514005409946228, "loss": 12.2702, "step": 10870 }, { "epoch": 0.5919693968539204, "grad_norm": 0.6066607791261394, "learning_rate": 0.00016513336313360623, "loss": 12.4799, "step": 10871 }, { "epoch": 0.5920238508505034, "grad_norm": 0.621186986997314, "learning_rate": 0.00016512667166126228, "loss": 12.3654, "step": 10872 }, { "epoch": 0.5920783048470863, "grad_norm": 0.6127640166956305, "learning_rate": 0.0001651199796824824, "loss": 12.3064, "step": 10873 }, { "epoch": 0.5921327588436693, "grad_norm": 0.5862571393568207, "learning_rate": 0.00016511328719731862, "loss": 12.268, "step": 10874 }, { "epoch": 0.5921872128402524, "grad_norm": 0.5395592050833669, "learning_rate": 0.00016510659420582302, "loss": 12.2713, "step": 10875 }, { "epoch": 0.5922416668368354, "grad_norm": 0.540435062260064, "learning_rate": 0.0001650999007080476, "loss": 12.3872, "step": 10876 }, { "epoch": 0.5922961208334184, "grad_norm": 0.5877623442475959, "learning_rate": 0.00016509320670404445, "loss": 12.2217, "step": 10877 }, { "epoch": 0.5923505748300014, "grad_norm": 0.5951671429030703, "learning_rate": 0.0001650865121938656, "loss": 12.4027, "step": 10878 }, { "epoch": 0.5924050288265844, "grad_norm": 0.6337323510468316, "learning_rate": 0.0001650798171775631, "loss": 12.3653, "step": 10879 }, { "epoch": 0.5924594828231674, "grad_norm": 0.6740988540522351, "learning_rate": 0.00016507312165518908, "loss": 12.3495, "step": 10880 }, { "epoch": 0.5925139368197505, "grad_norm": 0.5834673840846617, "learning_rate": 0.00016506642562679548, "loss": 12.0696, "step": 10881 }, { "epoch": 0.5925683908163335, "grad_norm": 0.595216213228313, "learning_rate": 0.0001650597290924345, "loss": 12.3565, "step": 10882 }, { "epoch": 0.5926228448129165, "grad_norm": 0.6432560364263551, "learning_rate": 0.00016505303205215815, "loss": 12.3607, "step": 10883 }, { "epoch": 0.5926772988094995, "grad_norm": 0.586856815044308, "learning_rate": 0.0001650463345060185, "loss": 12.312, "step": 10884 }, { "epoch": 0.5927317528060825, "grad_norm": 0.5832360201559386, "learning_rate": 0.00016503963645406763, "loss": 12.2108, "step": 10885 }, { "epoch": 0.5927862068026655, "grad_norm": 0.6363677337191588, "learning_rate": 0.00016503293789635768, "loss": 12.2894, "step": 10886 }, { "epoch": 0.5928406607992486, "grad_norm": 0.6366862094278207, "learning_rate": 0.00016502623883294065, "loss": 12.2471, "step": 10887 }, { "epoch": 0.5928951147958316, "grad_norm": 0.6290404501627677, "learning_rate": 0.00016501953926386872, "loss": 12.3175, "step": 10888 }, { "epoch": 0.5929495687924146, "grad_norm": 0.6032367742735714, "learning_rate": 0.00016501283918919394, "loss": 12.303, "step": 10889 }, { "epoch": 0.5930040227889976, "grad_norm": 0.5908359572494107, "learning_rate": 0.00016500613860896842, "loss": 12.3907, "step": 10890 }, { "epoch": 0.5930584767855805, "grad_norm": 0.6263770904868716, "learning_rate": 0.00016499943752324426, "loss": 12.3837, "step": 10891 }, { "epoch": 0.5931129307821635, "grad_norm": 0.5627338226718853, "learning_rate": 0.0001649927359320736, "loss": 12.3537, "step": 10892 }, { "epoch": 0.5931673847787466, "grad_norm": 0.5863381767298946, "learning_rate": 0.00016498603383550847, "loss": 12.311, "step": 10893 }, { "epoch": 0.5932218387753296, "grad_norm": 0.5987115449104812, "learning_rate": 0.0001649793312336011, "loss": 12.4243, "step": 10894 }, { "epoch": 0.5932762927719126, "grad_norm": 0.6024755309453307, "learning_rate": 0.0001649726281264035, "loss": 12.3362, "step": 10895 }, { "epoch": 0.5933307467684956, "grad_norm": 0.5197680717807689, "learning_rate": 0.00016496592451396787, "loss": 12.3468, "step": 10896 }, { "epoch": 0.5933852007650786, "grad_norm": 0.6111589305287626, "learning_rate": 0.00016495922039634633, "loss": 12.2591, "step": 10897 }, { "epoch": 0.5934396547616617, "grad_norm": 0.6600374756617288, "learning_rate": 0.000164952515773591, "loss": 12.4693, "step": 10898 }, { "epoch": 0.5934941087582447, "grad_norm": 0.6008571027135967, "learning_rate": 0.00016494581064575397, "loss": 12.2133, "step": 10899 }, { "epoch": 0.5935485627548277, "grad_norm": 0.6174842596701852, "learning_rate": 0.0001649391050128875, "loss": 12.3995, "step": 10900 }, { "epoch": 0.5936030167514107, "grad_norm": 0.5966629019054353, "learning_rate": 0.0001649323988750436, "loss": 12.3835, "step": 10901 }, { "epoch": 0.5936574707479937, "grad_norm": 0.6241955185191029, "learning_rate": 0.0001649256922322745, "loss": 12.3144, "step": 10902 }, { "epoch": 0.5937119247445767, "grad_norm": 0.6195522331438866, "learning_rate": 0.00016491898508463234, "loss": 12.3529, "step": 10903 }, { "epoch": 0.5937663787411598, "grad_norm": 0.6498619538274419, "learning_rate": 0.00016491227743216925, "loss": 12.346, "step": 10904 }, { "epoch": 0.5938208327377428, "grad_norm": 0.6874857310664496, "learning_rate": 0.00016490556927493738, "loss": 12.3812, "step": 10905 }, { "epoch": 0.5938752867343258, "grad_norm": 0.5901940883933078, "learning_rate": 0.00016489886061298896, "loss": 12.3213, "step": 10906 }, { "epoch": 0.5939297407309088, "grad_norm": 0.686173498986228, "learning_rate": 0.0001648921514463761, "loss": 12.3013, "step": 10907 }, { "epoch": 0.5939841947274918, "grad_norm": 0.6804757445573253, "learning_rate": 0.000164885441775151, "loss": 12.4066, "step": 10908 }, { "epoch": 0.5940386487240747, "grad_norm": 0.5947407757059022, "learning_rate": 0.0001648787315993658, "loss": 12.3664, "step": 10909 }, { "epoch": 0.5940931027206579, "grad_norm": 0.5568712242564401, "learning_rate": 0.0001648720209190727, "loss": 12.3068, "step": 10910 }, { "epoch": 0.5941475567172408, "grad_norm": 0.6673244286932989, "learning_rate": 0.00016486530973432387, "loss": 12.4117, "step": 10911 }, { "epoch": 0.5942020107138238, "grad_norm": 0.5962148224888308, "learning_rate": 0.00016485859804517156, "loss": 12.3541, "step": 10912 }, { "epoch": 0.5942564647104068, "grad_norm": 0.7011669751181724, "learning_rate": 0.00016485188585166794, "loss": 12.3054, "step": 10913 }, { "epoch": 0.5943109187069898, "grad_norm": 0.5983241513440197, "learning_rate": 0.0001648451731538651, "loss": 12.3304, "step": 10914 }, { "epoch": 0.5943653727035728, "grad_norm": 0.6500449585705957, "learning_rate": 0.00016483845995181537, "loss": 12.3092, "step": 10915 }, { "epoch": 0.5944198267001559, "grad_norm": 0.7336462821120124, "learning_rate": 0.0001648317462455709, "loss": 12.2935, "step": 10916 }, { "epoch": 0.5944742806967389, "grad_norm": 0.6561148282831947, "learning_rate": 0.00016482503203518387, "loss": 12.3112, "step": 10917 }, { "epoch": 0.5945287346933219, "grad_norm": 0.5695500651013421, "learning_rate": 0.00016481831732070656, "loss": 12.4201, "step": 10918 }, { "epoch": 0.5945831886899049, "grad_norm": 0.5918640574477187, "learning_rate": 0.0001648116021021911, "loss": 12.185, "step": 10919 }, { "epoch": 0.5946376426864879, "grad_norm": 0.5833610791625058, "learning_rate": 0.00016480488637968978, "loss": 12.256, "step": 10920 }, { "epoch": 0.5946920966830709, "grad_norm": 0.6358894046455806, "learning_rate": 0.00016479817015325478, "loss": 12.2758, "step": 10921 }, { "epoch": 0.594746550679654, "grad_norm": 0.6786451896587979, "learning_rate": 0.00016479145342293837, "loss": 12.3409, "step": 10922 }, { "epoch": 0.594801004676237, "grad_norm": 0.6305781227330443, "learning_rate": 0.00016478473618879272, "loss": 12.2793, "step": 10923 }, { "epoch": 0.59485545867282, "grad_norm": 0.6153787188546451, "learning_rate": 0.00016477801845087012, "loss": 12.2978, "step": 10924 }, { "epoch": 0.594909912669403, "grad_norm": 0.6843363709563547, "learning_rate": 0.00016477130020922277, "loss": 12.4037, "step": 10925 }, { "epoch": 0.594964366665986, "grad_norm": 0.6054378314241865, "learning_rate": 0.00016476458146390296, "loss": 12.3749, "step": 10926 }, { "epoch": 0.595018820662569, "grad_norm": 0.6777685328611128, "learning_rate": 0.0001647578622149629, "loss": 12.3875, "step": 10927 }, { "epoch": 0.595073274659152, "grad_norm": 0.6999698137431501, "learning_rate": 0.00016475114246245482, "loss": 12.2763, "step": 10928 }, { "epoch": 0.595127728655735, "grad_norm": 0.6153828446300298, "learning_rate": 0.00016474442220643098, "loss": 12.386, "step": 10929 }, { "epoch": 0.595182182652318, "grad_norm": 0.6144697499687396, "learning_rate": 0.00016473770144694367, "loss": 12.4144, "step": 10930 }, { "epoch": 0.595236636648901, "grad_norm": 0.6330746485334776, "learning_rate": 0.00016473098018404513, "loss": 12.3863, "step": 10931 }, { "epoch": 0.595291090645484, "grad_norm": 0.5948147818482774, "learning_rate": 0.00016472425841778767, "loss": 12.3491, "step": 10932 }, { "epoch": 0.5953455446420671, "grad_norm": 0.5672408621231471, "learning_rate": 0.0001647175361482235, "loss": 12.1104, "step": 10933 }, { "epoch": 0.5953999986386501, "grad_norm": 0.6833874407205437, "learning_rate": 0.0001647108133754049, "loss": 12.4, "step": 10934 }, { "epoch": 0.5954544526352331, "grad_norm": 0.6494403309332063, "learning_rate": 0.0001647040900993842, "loss": 12.31, "step": 10935 }, { "epoch": 0.5955089066318161, "grad_norm": 0.6642420865825933, "learning_rate": 0.0001646973663202136, "loss": 12.435, "step": 10936 }, { "epoch": 0.5955633606283991, "grad_norm": 0.7412025529100646, "learning_rate": 0.00016469064203794543, "loss": 12.5043, "step": 10937 }, { "epoch": 0.5956178146249821, "grad_norm": 0.6756310812150244, "learning_rate": 0.00016468391725263203, "loss": 12.4486, "step": 10938 }, { "epoch": 0.5956722686215652, "grad_norm": 0.6006988925370298, "learning_rate": 0.0001646771919643256, "loss": 12.2226, "step": 10939 }, { "epoch": 0.5957267226181482, "grad_norm": 0.7039158770908783, "learning_rate": 0.0001646704661730785, "loss": 12.4274, "step": 10940 }, { "epoch": 0.5957811766147312, "grad_norm": 0.6309968178306992, "learning_rate": 0.000164663739878943, "loss": 12.4284, "step": 10941 }, { "epoch": 0.5958356306113142, "grad_norm": 0.5875586723598385, "learning_rate": 0.00016465701308197143, "loss": 12.3632, "step": 10942 }, { "epoch": 0.5958900846078972, "grad_norm": 0.6376103253963531, "learning_rate": 0.00016465028578221605, "loss": 12.3814, "step": 10943 }, { "epoch": 0.5959445386044802, "grad_norm": 0.5810749677416577, "learning_rate": 0.00016464355797972922, "loss": 12.3421, "step": 10944 }, { "epoch": 0.5959989926010633, "grad_norm": 0.6630727468617987, "learning_rate": 0.00016463682967456325, "loss": 12.3919, "step": 10945 }, { "epoch": 0.5960534465976463, "grad_norm": 0.633931614301666, "learning_rate": 0.00016463010086677048, "loss": 12.36, "step": 10946 }, { "epoch": 0.5961079005942292, "grad_norm": 0.58915438361807, "learning_rate": 0.00016462337155640316, "loss": 12.3195, "step": 10947 }, { "epoch": 0.5961623545908122, "grad_norm": 0.5674771043498464, "learning_rate": 0.0001646166417435137, "loss": 12.3314, "step": 10948 }, { "epoch": 0.5962168085873952, "grad_norm": 0.6278043046671282, "learning_rate": 0.00016460991142815435, "loss": 12.4202, "step": 10949 }, { "epoch": 0.5962712625839782, "grad_norm": 0.6091194463995885, "learning_rate": 0.00016460318061037752, "loss": 12.2741, "step": 10950 }, { "epoch": 0.5963257165805613, "grad_norm": 0.5929091072302531, "learning_rate": 0.00016459644929023553, "loss": 12.215, "step": 10951 }, { "epoch": 0.5963801705771443, "grad_norm": 0.539969046068889, "learning_rate": 0.00016458971746778072, "loss": 12.2799, "step": 10952 }, { "epoch": 0.5964346245737273, "grad_norm": 0.5477239819241904, "learning_rate": 0.00016458298514306546, "loss": 12.2254, "step": 10953 }, { "epoch": 0.5964890785703103, "grad_norm": 0.5945626403678087, "learning_rate": 0.000164576252316142, "loss": 12.2452, "step": 10954 }, { "epoch": 0.5965435325668933, "grad_norm": 0.6720891255289737, "learning_rate": 0.00016456951898706284, "loss": 12.3979, "step": 10955 }, { "epoch": 0.5965979865634763, "grad_norm": 0.6707997944507423, "learning_rate": 0.00016456278515588024, "loss": 12.4544, "step": 10956 }, { "epoch": 0.5966524405600594, "grad_norm": 0.6431423071095793, "learning_rate": 0.0001645560508226466, "loss": 12.301, "step": 10957 }, { "epoch": 0.5967068945566424, "grad_norm": 0.6959504961207419, "learning_rate": 0.0001645493159874143, "loss": 12.3559, "step": 10958 }, { "epoch": 0.5967613485532254, "grad_norm": 0.5793628168767085, "learning_rate": 0.00016454258065023568, "loss": 12.3758, "step": 10959 }, { "epoch": 0.5968158025498084, "grad_norm": 0.6424349375411575, "learning_rate": 0.00016453584481116313, "loss": 12.3487, "step": 10960 }, { "epoch": 0.5968702565463914, "grad_norm": 0.6275970493399082, "learning_rate": 0.00016452910847024901, "loss": 12.4393, "step": 10961 }, { "epoch": 0.5969247105429745, "grad_norm": 0.6446562906094953, "learning_rate": 0.00016452237162754577, "loss": 12.4605, "step": 10962 }, { "epoch": 0.5969791645395575, "grad_norm": 0.5735543134471323, "learning_rate": 0.00016451563428310575, "loss": 12.2989, "step": 10963 }, { "epoch": 0.5970336185361405, "grad_norm": 0.6222944111973252, "learning_rate": 0.0001645088964369813, "loss": 12.2292, "step": 10964 }, { "epoch": 0.5970880725327234, "grad_norm": 0.5985903452256347, "learning_rate": 0.00016450215808922483, "loss": 12.3215, "step": 10965 }, { "epoch": 0.5971425265293064, "grad_norm": 0.7412074618453982, "learning_rate": 0.0001644954192398888, "loss": 12.264, "step": 10966 }, { "epoch": 0.5971969805258894, "grad_norm": 0.6439435344918585, "learning_rate": 0.00016448867988902557, "loss": 12.3645, "step": 10967 }, { "epoch": 0.5972514345224725, "grad_norm": 0.6429930022822522, "learning_rate": 0.00016448194003668756, "loss": 12.1924, "step": 10968 }, { "epoch": 0.5973058885190555, "grad_norm": 0.6067151013037995, "learning_rate": 0.00016447519968292715, "loss": 12.2995, "step": 10969 }, { "epoch": 0.5973603425156385, "grad_norm": 0.6413785659924183, "learning_rate": 0.0001644684588277968, "loss": 12.2482, "step": 10970 }, { "epoch": 0.5974147965122215, "grad_norm": 0.6230928886773899, "learning_rate": 0.00016446171747134888, "loss": 12.3143, "step": 10971 }, { "epoch": 0.5974692505088045, "grad_norm": 0.6399022650982504, "learning_rate": 0.0001644549756136358, "loss": 12.2518, "step": 10972 }, { "epoch": 0.5975237045053875, "grad_norm": 0.6035692650993121, "learning_rate": 0.0001644482332547101, "loss": 12.3228, "step": 10973 }, { "epoch": 0.5975781585019706, "grad_norm": 0.5990368630408056, "learning_rate": 0.00016444149039462409, "loss": 12.218, "step": 10974 }, { "epoch": 0.5976326124985536, "grad_norm": 0.6111786788841953, "learning_rate": 0.00016443474703343024, "loss": 12.3642, "step": 10975 }, { "epoch": 0.5976870664951366, "grad_norm": 0.6189012848292096, "learning_rate": 0.000164428003171181, "loss": 12.3183, "step": 10976 }, { "epoch": 0.5977415204917196, "grad_norm": 0.5916709873890514, "learning_rate": 0.0001644212588079288, "loss": 12.3817, "step": 10977 }, { "epoch": 0.5977959744883026, "grad_norm": 0.7836661075155655, "learning_rate": 0.00016441451394372608, "loss": 12.2071, "step": 10978 }, { "epoch": 0.5978504284848856, "grad_norm": 0.5987173941667159, "learning_rate": 0.0001644077685786253, "loss": 12.5297, "step": 10979 }, { "epoch": 0.5979048824814687, "grad_norm": 0.5824740328749117, "learning_rate": 0.00016440102271267893, "loss": 12.3024, "step": 10980 }, { "epoch": 0.5979593364780517, "grad_norm": 0.5981170336986796, "learning_rate": 0.00016439427634593938, "loss": 12.1627, "step": 10981 }, { "epoch": 0.5980137904746347, "grad_norm": 0.6307947757447596, "learning_rate": 0.00016438752947845915, "loss": 12.3911, "step": 10982 }, { "epoch": 0.5980682444712176, "grad_norm": 0.6119378461428637, "learning_rate": 0.0001643807821102907, "loss": 12.4517, "step": 10983 }, { "epoch": 0.5981226984678006, "grad_norm": 0.5535905143981861, "learning_rate": 0.00016437403424148647, "loss": 12.1956, "step": 10984 }, { "epoch": 0.5981771524643836, "grad_norm": 0.6197162454189239, "learning_rate": 0.00016436728587209898, "loss": 12.4103, "step": 10985 }, { "epoch": 0.5982316064609667, "grad_norm": 0.6043613316867456, "learning_rate": 0.00016436053700218066, "loss": 12.3657, "step": 10986 }, { "epoch": 0.5982860604575497, "grad_norm": 0.6080395119756771, "learning_rate": 0.00016435378763178404, "loss": 12.3862, "step": 10987 }, { "epoch": 0.5983405144541327, "grad_norm": 0.6295366320303318, "learning_rate": 0.00016434703776096153, "loss": 12.3586, "step": 10988 }, { "epoch": 0.5983949684507157, "grad_norm": 0.6145068345615352, "learning_rate": 0.0001643402873897657, "loss": 12.3016, "step": 10989 }, { "epoch": 0.5984494224472987, "grad_norm": 0.5839673827473969, "learning_rate": 0.000164333536518249, "loss": 12.4692, "step": 10990 }, { "epoch": 0.5985038764438817, "grad_norm": 0.6179508354992466, "learning_rate": 0.00016432678514646392, "loss": 12.321, "step": 10991 }, { "epoch": 0.5985583304404648, "grad_norm": 0.5806189956726392, "learning_rate": 0.000164320033274463, "loss": 12.4132, "step": 10992 }, { "epoch": 0.5986127844370478, "grad_norm": 0.6172660230088375, "learning_rate": 0.0001643132809022987, "loss": 12.3561, "step": 10993 }, { "epoch": 0.5986672384336308, "grad_norm": 0.574460998452746, "learning_rate": 0.0001643065280300235, "loss": 12.3413, "step": 10994 }, { "epoch": 0.5987216924302138, "grad_norm": 0.5811058668708309, "learning_rate": 0.00016429977465769, "loss": 12.3356, "step": 10995 }, { "epoch": 0.5987761464267968, "grad_norm": 0.61660924334426, "learning_rate": 0.00016429302078535066, "loss": 12.1396, "step": 10996 }, { "epoch": 0.5988306004233799, "grad_norm": 0.5950546465610835, "learning_rate": 0.000164286266413058, "loss": 12.2852, "step": 10997 }, { "epoch": 0.5988850544199629, "grad_norm": 0.576631418146556, "learning_rate": 0.00016427951154086455, "loss": 12.3239, "step": 10998 }, { "epoch": 0.5989395084165459, "grad_norm": 0.5844309618486992, "learning_rate": 0.00016427275616882287, "loss": 12.3428, "step": 10999 }, { "epoch": 0.5989939624131289, "grad_norm": 0.6278048459137753, "learning_rate": 0.00016426600029698543, "loss": 12.3291, "step": 11000 }, { "epoch": 0.5990484164097118, "grad_norm": 0.5475127016067086, "learning_rate": 0.0001642592439254048, "loss": 12.2299, "step": 11001 }, { "epoch": 0.5991028704062948, "grad_norm": 0.6725176166321495, "learning_rate": 0.0001642524870541335, "loss": 12.2709, "step": 11002 }, { "epoch": 0.5991573244028779, "grad_norm": 0.6494389238030606, "learning_rate": 0.00016424572968322412, "loss": 12.3964, "step": 11003 }, { "epoch": 0.5992117783994609, "grad_norm": 0.6780755193103549, "learning_rate": 0.00016423897181272915, "loss": 12.3481, "step": 11004 }, { "epoch": 0.5992662323960439, "grad_norm": 0.5891529447522418, "learning_rate": 0.00016423221344270114, "loss": 12.4465, "step": 11005 }, { "epoch": 0.5993206863926269, "grad_norm": 0.6440257797821093, "learning_rate": 0.0001642254545731927, "loss": 12.4303, "step": 11006 }, { "epoch": 0.5993751403892099, "grad_norm": 0.6291431127412522, "learning_rate": 0.00016421869520425632, "loss": 12.3692, "step": 11007 }, { "epoch": 0.5994295943857929, "grad_norm": 0.5696472824113517, "learning_rate": 0.00016421193533594465, "loss": 12.2237, "step": 11008 }, { "epoch": 0.599484048382376, "grad_norm": 0.6722822267146954, "learning_rate": 0.00016420517496831016, "loss": 12.3112, "step": 11009 }, { "epoch": 0.599538502378959, "grad_norm": 0.6023223915088669, "learning_rate": 0.00016419841410140546, "loss": 12.2093, "step": 11010 }, { "epoch": 0.599592956375542, "grad_norm": 0.6379524481212994, "learning_rate": 0.00016419165273528317, "loss": 12.233, "step": 11011 }, { "epoch": 0.599647410372125, "grad_norm": 0.5829972476929912, "learning_rate": 0.00016418489086999577, "loss": 12.3742, "step": 11012 }, { "epoch": 0.599701864368708, "grad_norm": 0.6268520971080569, "learning_rate": 0.00016417812850559593, "loss": 12.3992, "step": 11013 }, { "epoch": 0.599756318365291, "grad_norm": 0.6059742211114041, "learning_rate": 0.0001641713656421362, "loss": 12.2391, "step": 11014 }, { "epoch": 0.5998107723618741, "grad_norm": 0.5977241627542447, "learning_rate": 0.00016416460227966915, "loss": 12.3261, "step": 11015 }, { "epoch": 0.5998652263584571, "grad_norm": 0.6257018673855346, "learning_rate": 0.00016415783841824738, "loss": 12.3806, "step": 11016 }, { "epoch": 0.5999196803550401, "grad_norm": 0.7731443352708557, "learning_rate": 0.00016415107405792352, "loss": 12.4871, "step": 11017 }, { "epoch": 0.5999741343516231, "grad_norm": 0.5933405099016545, "learning_rate": 0.0001641443091987502, "loss": 12.2664, "step": 11018 }, { "epoch": 0.600028588348206, "grad_norm": 0.6599217013753389, "learning_rate": 0.0001641375438407799, "loss": 12.399, "step": 11019 }, { "epoch": 0.600083042344789, "grad_norm": 0.732712892593169, "learning_rate": 0.00016413077798406534, "loss": 12.4574, "step": 11020 }, { "epoch": 0.6001374963413721, "grad_norm": 0.618903863653971, "learning_rate": 0.00016412401162865906, "loss": 12.3222, "step": 11021 }, { "epoch": 0.6001919503379551, "grad_norm": 0.5803114091083403, "learning_rate": 0.00016411724477461372, "loss": 12.2771, "step": 11022 }, { "epoch": 0.6002464043345381, "grad_norm": 0.6900362551315334, "learning_rate": 0.00016411047742198197, "loss": 12.3792, "step": 11023 }, { "epoch": 0.6003008583311211, "grad_norm": 0.6273452602392071, "learning_rate": 0.00016410370957081638, "loss": 12.1368, "step": 11024 }, { "epoch": 0.6003553123277041, "grad_norm": 0.6024275706084519, "learning_rate": 0.00016409694122116958, "loss": 12.4001, "step": 11025 }, { "epoch": 0.6004097663242871, "grad_norm": 0.5853978343141101, "learning_rate": 0.00016409017237309424, "loss": 12.2973, "step": 11026 }, { "epoch": 0.6004642203208702, "grad_norm": 0.6306740379428283, "learning_rate": 0.00016408340302664294, "loss": 12.4726, "step": 11027 }, { "epoch": 0.6005186743174532, "grad_norm": 0.582678644761307, "learning_rate": 0.00016407663318186838, "loss": 12.3409, "step": 11028 }, { "epoch": 0.6005731283140362, "grad_norm": 0.6438980879627172, "learning_rate": 0.00016406986283882315, "loss": 12.4626, "step": 11029 }, { "epoch": 0.6006275823106192, "grad_norm": 0.6516172706708898, "learning_rate": 0.00016406309199755992, "loss": 12.3803, "step": 11030 }, { "epoch": 0.6006820363072022, "grad_norm": 0.6589363085519371, "learning_rate": 0.00016405632065813136, "loss": 12.3313, "step": 11031 }, { "epoch": 0.6007364903037853, "grad_norm": 0.6892402550538832, "learning_rate": 0.00016404954882059012, "loss": 12.4779, "step": 11032 }, { "epoch": 0.6007909443003683, "grad_norm": 0.5614826899245277, "learning_rate": 0.00016404277648498881, "loss": 12.2103, "step": 11033 }, { "epoch": 0.6008453982969513, "grad_norm": 0.5673320871722737, "learning_rate": 0.00016403600365138017, "loss": 12.3491, "step": 11034 }, { "epoch": 0.6008998522935343, "grad_norm": 0.6451029222343776, "learning_rate": 0.00016402923031981682, "loss": 12.3691, "step": 11035 }, { "epoch": 0.6009543062901173, "grad_norm": 0.5993129509634276, "learning_rate": 0.00016402245649035143, "loss": 12.3083, "step": 11036 }, { "epoch": 0.6010087602867002, "grad_norm": 0.8425288989468626, "learning_rate": 0.00016401568216303666, "loss": 12.2698, "step": 11037 }, { "epoch": 0.6010632142832834, "grad_norm": 0.6080488327074888, "learning_rate": 0.00016400890733792522, "loss": 12.3828, "step": 11038 }, { "epoch": 0.6011176682798663, "grad_norm": 0.6149323405989047, "learning_rate": 0.0001640021320150698, "loss": 12.29, "step": 11039 }, { "epoch": 0.6011721222764493, "grad_norm": 0.6503595167012923, "learning_rate": 0.00016399535619452307, "loss": 12.4554, "step": 11040 }, { "epoch": 0.6012265762730323, "grad_norm": 0.6144374874038958, "learning_rate": 0.00016398857987633768, "loss": 12.3855, "step": 11041 }, { "epoch": 0.6012810302696153, "grad_norm": 0.5659336071137602, "learning_rate": 0.0001639818030605664, "loss": 12.3596, "step": 11042 }, { "epoch": 0.6013354842661983, "grad_norm": 0.5376304352006531, "learning_rate": 0.00016397502574726187, "loss": 12.2335, "step": 11043 }, { "epoch": 0.6013899382627814, "grad_norm": 0.5924159115387776, "learning_rate": 0.00016396824793647684, "loss": 12.2906, "step": 11044 }, { "epoch": 0.6014443922593644, "grad_norm": 0.6760554560177521, "learning_rate": 0.00016396146962826394, "loss": 12.3811, "step": 11045 }, { "epoch": 0.6014988462559474, "grad_norm": 0.6041717706336399, "learning_rate": 0.00016395469082267598, "loss": 12.4151, "step": 11046 }, { "epoch": 0.6015533002525304, "grad_norm": 0.6416209500189765, "learning_rate": 0.00016394791151976557, "loss": 12.458, "step": 11047 }, { "epoch": 0.6016077542491134, "grad_norm": 0.622185400302005, "learning_rate": 0.0001639411317195855, "loss": 12.3202, "step": 11048 }, { "epoch": 0.6016622082456964, "grad_norm": 0.6290605003665506, "learning_rate": 0.0001639343514221885, "loss": 12.4226, "step": 11049 }, { "epoch": 0.6017166622422795, "grad_norm": 0.6200775111861807, "learning_rate": 0.00016392757062762723, "loss": 12.3675, "step": 11050 }, { "epoch": 0.6017711162388625, "grad_norm": 0.6044956149656243, "learning_rate": 0.00016392078933595442, "loss": 12.4052, "step": 11051 }, { "epoch": 0.6018255702354455, "grad_norm": 0.5622836150915892, "learning_rate": 0.00016391400754722286, "loss": 12.2558, "step": 11052 }, { "epoch": 0.6018800242320285, "grad_norm": 0.7140968733839776, "learning_rate": 0.00016390722526148525, "loss": 12.4013, "step": 11053 }, { "epoch": 0.6019344782286115, "grad_norm": 0.6898733093538983, "learning_rate": 0.00016390044247879435, "loss": 12.3683, "step": 11054 }, { "epoch": 0.6019889322251945, "grad_norm": 0.6503239657646119, "learning_rate": 0.0001638936591992029, "loss": 12.3846, "step": 11055 }, { "epoch": 0.6020433862217776, "grad_norm": 0.5894666821242075, "learning_rate": 0.00016388687542276363, "loss": 12.3637, "step": 11056 }, { "epoch": 0.6020978402183605, "grad_norm": 0.5934108932985701, "learning_rate": 0.00016388009114952929, "loss": 12.3284, "step": 11057 }, { "epoch": 0.6021522942149435, "grad_norm": 0.7523011319008442, "learning_rate": 0.00016387330637955265, "loss": 12.2661, "step": 11058 }, { "epoch": 0.6022067482115265, "grad_norm": 0.647101465243184, "learning_rate": 0.00016386652111288652, "loss": 12.4377, "step": 11059 }, { "epoch": 0.6022612022081095, "grad_norm": 0.5512134794367408, "learning_rate": 0.00016385973534958356, "loss": 12.3801, "step": 11060 }, { "epoch": 0.6023156562046925, "grad_norm": 0.5902503412478513, "learning_rate": 0.0001638529490896966, "loss": 12.3326, "step": 11061 }, { "epoch": 0.6023701102012756, "grad_norm": 0.633910662052685, "learning_rate": 0.00016384616233327837, "loss": 12.4507, "step": 11062 }, { "epoch": 0.6024245641978586, "grad_norm": 0.5754040976327168, "learning_rate": 0.00016383937508038173, "loss": 12.3825, "step": 11063 }, { "epoch": 0.6024790181944416, "grad_norm": 0.6043470862631053, "learning_rate": 0.00016383258733105937, "loss": 12.3094, "step": 11064 }, { "epoch": 0.6025334721910246, "grad_norm": 0.6048248874531785, "learning_rate": 0.00016382579908536413, "loss": 12.2771, "step": 11065 }, { "epoch": 0.6025879261876076, "grad_norm": 0.5452843984000415, "learning_rate": 0.00016381901034334873, "loss": 12.2863, "step": 11066 }, { "epoch": 0.6026423801841907, "grad_norm": 0.6076571075716745, "learning_rate": 0.00016381222110506603, "loss": 12.3473, "step": 11067 }, { "epoch": 0.6026968341807737, "grad_norm": 0.5652508244979979, "learning_rate": 0.00016380543137056882, "loss": 12.4141, "step": 11068 }, { "epoch": 0.6027512881773567, "grad_norm": 0.585225819813063, "learning_rate": 0.00016379864113990985, "loss": 12.305, "step": 11069 }, { "epoch": 0.6028057421739397, "grad_norm": 0.5795408250187182, "learning_rate": 0.00016379185041314194, "loss": 12.2756, "step": 11070 }, { "epoch": 0.6028601961705227, "grad_norm": 0.5995362149946898, "learning_rate": 0.00016378505919031794, "loss": 12.3073, "step": 11071 }, { "epoch": 0.6029146501671057, "grad_norm": 0.5508610806139238, "learning_rate": 0.0001637782674714906, "loss": 12.174, "step": 11072 }, { "epoch": 0.6029691041636888, "grad_norm": 0.5783209754285752, "learning_rate": 0.00016377147525671273, "loss": 12.3101, "step": 11073 }, { "epoch": 0.6030235581602718, "grad_norm": 0.5626050721429561, "learning_rate": 0.0001637646825460372, "loss": 12.2853, "step": 11074 }, { "epoch": 0.6030780121568547, "grad_norm": 0.6027064270819751, "learning_rate": 0.00016375788933951682, "loss": 12.3111, "step": 11075 }, { "epoch": 0.6031324661534377, "grad_norm": 0.6690295265995301, "learning_rate": 0.00016375109563720436, "loss": 12.3696, "step": 11076 }, { "epoch": 0.6031869201500207, "grad_norm": 0.5837871491600592, "learning_rate": 0.0001637443014391527, "loss": 12.2745, "step": 11077 }, { "epoch": 0.6032413741466037, "grad_norm": 0.5733628207555487, "learning_rate": 0.00016373750674541468, "loss": 12.2662, "step": 11078 }, { "epoch": 0.6032958281431868, "grad_norm": 0.6237040428548752, "learning_rate": 0.00016373071155604312, "loss": 12.3948, "step": 11079 }, { "epoch": 0.6033502821397698, "grad_norm": 0.6324278402150281, "learning_rate": 0.00016372391587109086, "loss": 12.4646, "step": 11080 }, { "epoch": 0.6034047361363528, "grad_norm": 0.5900512368765062, "learning_rate": 0.0001637171196906107, "loss": 12.293, "step": 11081 }, { "epoch": 0.6034591901329358, "grad_norm": 0.6510319242318467, "learning_rate": 0.0001637103230146556, "loss": 12.3156, "step": 11082 }, { "epoch": 0.6035136441295188, "grad_norm": 0.603809943286475, "learning_rate": 0.0001637035258432783, "loss": 12.425, "step": 11083 }, { "epoch": 0.6035680981261018, "grad_norm": 0.5907690636784768, "learning_rate": 0.00016369672817653173, "loss": 12.3971, "step": 11084 }, { "epoch": 0.6036225521226849, "grad_norm": 0.6478519079380884, "learning_rate": 0.0001636899300144687, "loss": 12.3261, "step": 11085 }, { "epoch": 0.6036770061192679, "grad_norm": 0.5575314006519871, "learning_rate": 0.0001636831313571421, "loss": 12.3727, "step": 11086 }, { "epoch": 0.6037314601158509, "grad_norm": 0.6231567452468517, "learning_rate": 0.00016367633220460478, "loss": 12.4183, "step": 11087 }, { "epoch": 0.6037859141124339, "grad_norm": 0.5987386128539546, "learning_rate": 0.00016366953255690962, "loss": 12.3108, "step": 11088 }, { "epoch": 0.6038403681090169, "grad_norm": 0.5557211914285582, "learning_rate": 0.00016366273241410952, "loss": 12.1499, "step": 11089 }, { "epoch": 0.6038948221055999, "grad_norm": 0.676169499379147, "learning_rate": 0.0001636559317762573, "loss": 12.4032, "step": 11090 }, { "epoch": 0.603949276102183, "grad_norm": 0.5760384995265102, "learning_rate": 0.0001636491306434059, "loss": 12.2913, "step": 11091 }, { "epoch": 0.604003730098766, "grad_norm": 0.5933103287988173, "learning_rate": 0.0001636423290156082, "loss": 12.428, "step": 11092 }, { "epoch": 0.604058184095349, "grad_norm": 0.7065631146403468, "learning_rate": 0.00016363552689291705, "loss": 12.2464, "step": 11093 }, { "epoch": 0.6041126380919319, "grad_norm": 0.610605509954838, "learning_rate": 0.0001636287242753854, "loss": 12.333, "step": 11094 }, { "epoch": 0.6041670920885149, "grad_norm": 0.5975864259914512, "learning_rate": 0.00016362192116306612, "loss": 12.2726, "step": 11095 }, { "epoch": 0.604221546085098, "grad_norm": 0.6099596482197528, "learning_rate": 0.00016361511755601206, "loss": 12.3904, "step": 11096 }, { "epoch": 0.604276000081681, "grad_norm": 0.5486189081414413, "learning_rate": 0.00016360831345427622, "loss": 12.3003, "step": 11097 }, { "epoch": 0.604330454078264, "grad_norm": 0.6590723996025494, "learning_rate": 0.0001636015088579115, "loss": 12.3686, "step": 11098 }, { "epoch": 0.604384908074847, "grad_norm": 0.5965197006105573, "learning_rate": 0.00016359470376697073, "loss": 12.3768, "step": 11099 }, { "epoch": 0.60443936207143, "grad_norm": 0.6000795522191682, "learning_rate": 0.00016358789818150688, "loss": 12.3006, "step": 11100 }, { "epoch": 0.604493816068013, "grad_norm": 0.5966457566072743, "learning_rate": 0.0001635810921015729, "loss": 12.3902, "step": 11101 }, { "epoch": 0.6045482700645961, "grad_norm": 0.5723338160767066, "learning_rate": 0.00016357428552722165, "loss": 12.2545, "step": 11102 }, { "epoch": 0.6046027240611791, "grad_norm": 0.5917006614386284, "learning_rate": 0.0001635674784585061, "loss": 12.3173, "step": 11103 }, { "epoch": 0.6046571780577621, "grad_norm": 0.6300577844312848, "learning_rate": 0.00016356067089547919, "loss": 12.4326, "step": 11104 }, { "epoch": 0.6047116320543451, "grad_norm": 0.5941836003594154, "learning_rate": 0.00016355386283819386, "loss": 12.3251, "step": 11105 }, { "epoch": 0.6047660860509281, "grad_norm": 0.6859987389573252, "learning_rate": 0.000163547054286703, "loss": 12.3612, "step": 11106 }, { "epoch": 0.6048205400475111, "grad_norm": 0.7555113851041918, "learning_rate": 0.0001635402452410596, "loss": 12.3068, "step": 11107 }, { "epoch": 0.6048749940440942, "grad_norm": 0.7143705006050696, "learning_rate": 0.0001635334357013166, "loss": 12.3739, "step": 11108 }, { "epoch": 0.6049294480406772, "grad_norm": 0.6897982973385011, "learning_rate": 0.00016352662566752698, "loss": 12.384, "step": 11109 }, { "epoch": 0.6049839020372602, "grad_norm": 0.6588045553449117, "learning_rate": 0.00016351981513974365, "loss": 12.4431, "step": 11110 }, { "epoch": 0.6050383560338431, "grad_norm": 0.5990230565766719, "learning_rate": 0.00016351300411801954, "loss": 12.3734, "step": 11111 }, { "epoch": 0.6050928100304261, "grad_norm": 0.6978925280375029, "learning_rate": 0.00016350619260240769, "loss": 12.3558, "step": 11112 }, { "epoch": 0.6051472640270091, "grad_norm": 0.5959688891627724, "learning_rate": 0.00016349938059296104, "loss": 12.2814, "step": 11113 }, { "epoch": 0.6052017180235922, "grad_norm": 0.6048401762930573, "learning_rate": 0.00016349256808973256, "loss": 12.2942, "step": 11114 }, { "epoch": 0.6052561720201752, "grad_norm": 0.6237154293696561, "learning_rate": 0.00016348575509277522, "loss": 12.3011, "step": 11115 }, { "epoch": 0.6053106260167582, "grad_norm": 0.5894504081881295, "learning_rate": 0.000163478941602142, "loss": 12.4032, "step": 11116 }, { "epoch": 0.6053650800133412, "grad_norm": 0.6673670271952441, "learning_rate": 0.0001634721276178859, "loss": 12.3395, "step": 11117 }, { "epoch": 0.6054195340099242, "grad_norm": 0.5951869230383666, "learning_rate": 0.00016346531314005987, "loss": 12.3577, "step": 11118 }, { "epoch": 0.6054739880065072, "grad_norm": 0.6334316531072363, "learning_rate": 0.00016345849816871692, "loss": 12.3382, "step": 11119 }, { "epoch": 0.6055284420030903, "grad_norm": 0.6162645161211552, "learning_rate": 0.00016345168270391004, "loss": 12.2919, "step": 11120 }, { "epoch": 0.6055828959996733, "grad_norm": 0.5205672866936765, "learning_rate": 0.00016344486674569226, "loss": 12.327, "step": 11121 }, { "epoch": 0.6056373499962563, "grad_norm": 0.5930119109328588, "learning_rate": 0.00016343805029411654, "loss": 12.3136, "step": 11122 }, { "epoch": 0.6056918039928393, "grad_norm": 0.5917678881795895, "learning_rate": 0.00016343123334923592, "loss": 12.3358, "step": 11123 }, { "epoch": 0.6057462579894223, "grad_norm": 0.6278136764825851, "learning_rate": 0.0001634244159111034, "loss": 12.4578, "step": 11124 }, { "epoch": 0.6058007119860053, "grad_norm": 0.5941650915734099, "learning_rate": 0.00016341759797977194, "loss": 12.4215, "step": 11125 }, { "epoch": 0.6058551659825884, "grad_norm": 0.6487586325937854, "learning_rate": 0.00016341077955529465, "loss": 12.4542, "step": 11126 }, { "epoch": 0.6059096199791714, "grad_norm": 0.6134015359441436, "learning_rate": 0.00016340396063772449, "loss": 12.3903, "step": 11127 }, { "epoch": 0.6059640739757544, "grad_norm": 0.6836090618022848, "learning_rate": 0.00016339714122711447, "loss": 12.36, "step": 11128 }, { "epoch": 0.6060185279723374, "grad_norm": 0.6845856838845686, "learning_rate": 0.00016339032132351767, "loss": 12.2901, "step": 11129 }, { "epoch": 0.6060729819689203, "grad_norm": 0.5640102141813625, "learning_rate": 0.0001633835009269871, "loss": 12.3379, "step": 11130 }, { "epoch": 0.6061274359655034, "grad_norm": 0.5737426198031063, "learning_rate": 0.0001633766800375758, "loss": 12.3716, "step": 11131 }, { "epoch": 0.6061818899620864, "grad_norm": 0.5824786880831666, "learning_rate": 0.00016336985865533682, "loss": 12.4523, "step": 11132 }, { "epoch": 0.6062363439586694, "grad_norm": 0.5329751071521219, "learning_rate": 0.00016336303678032317, "loss": 12.1134, "step": 11133 }, { "epoch": 0.6062907979552524, "grad_norm": 0.6714939419589115, "learning_rate": 0.00016335621441258792, "loss": 12.3629, "step": 11134 }, { "epoch": 0.6063452519518354, "grad_norm": 0.7119477262638965, "learning_rate": 0.00016334939155218415, "loss": 12.4187, "step": 11135 }, { "epoch": 0.6063997059484184, "grad_norm": 0.6086057695680839, "learning_rate": 0.00016334256819916485, "loss": 12.368, "step": 11136 }, { "epoch": 0.6064541599450015, "grad_norm": 0.6078255268695479, "learning_rate": 0.00016333574435358313, "loss": 12.4414, "step": 11137 }, { "epoch": 0.6065086139415845, "grad_norm": 0.6020225475285034, "learning_rate": 0.00016332892001549206, "loss": 12.2573, "step": 11138 }, { "epoch": 0.6065630679381675, "grad_norm": 0.7082157627835164, "learning_rate": 0.00016332209518494468, "loss": 12.4737, "step": 11139 }, { "epoch": 0.6066175219347505, "grad_norm": 0.5873016676995043, "learning_rate": 0.00016331526986199406, "loss": 12.4236, "step": 11140 }, { "epoch": 0.6066719759313335, "grad_norm": 0.631074673123381, "learning_rate": 0.00016330844404669327, "loss": 12.4155, "step": 11141 }, { "epoch": 0.6067264299279165, "grad_norm": 0.5926052711170497, "learning_rate": 0.00016330161773909542, "loss": 12.3943, "step": 11142 }, { "epoch": 0.6067808839244996, "grad_norm": 0.7595425087844859, "learning_rate": 0.00016329479093925357, "loss": 12.4255, "step": 11143 }, { "epoch": 0.6068353379210826, "grad_norm": 0.6165882148007943, "learning_rate": 0.00016328796364722083, "loss": 12.3621, "step": 11144 }, { "epoch": 0.6068897919176656, "grad_norm": 0.5348979092332994, "learning_rate": 0.00016328113586305026, "loss": 12.3285, "step": 11145 }, { "epoch": 0.6069442459142486, "grad_norm": 0.5812507187291274, "learning_rate": 0.00016327430758679494, "loss": 12.3238, "step": 11146 }, { "epoch": 0.6069986999108316, "grad_norm": 0.57940112965112, "learning_rate": 0.00016326747881850802, "loss": 12.3121, "step": 11147 }, { "epoch": 0.6070531539074145, "grad_norm": 0.5617843893592939, "learning_rate": 0.00016326064955824257, "loss": 12.3478, "step": 11148 }, { "epoch": 0.6071076079039976, "grad_norm": 0.5391895305756338, "learning_rate": 0.0001632538198060517, "loss": 12.3156, "step": 11149 }, { "epoch": 0.6071620619005806, "grad_norm": 0.5712370867105543, "learning_rate": 0.0001632469895619885, "loss": 12.2493, "step": 11150 }, { "epoch": 0.6072165158971636, "grad_norm": 0.613671361377228, "learning_rate": 0.00016324015882610615, "loss": 12.3102, "step": 11151 }, { "epoch": 0.6072709698937466, "grad_norm": 0.6108727049954086, "learning_rate": 0.00016323332759845765, "loss": 12.3195, "step": 11152 }, { "epoch": 0.6073254238903296, "grad_norm": 0.5904626345424183, "learning_rate": 0.00016322649587909623, "loss": 12.3891, "step": 11153 }, { "epoch": 0.6073798778869126, "grad_norm": 0.5562410258535025, "learning_rate": 0.000163219663668075, "loss": 12.258, "step": 11154 }, { "epoch": 0.6074343318834957, "grad_norm": 0.5573520133764401, "learning_rate": 0.00016321283096544704, "loss": 12.3414, "step": 11155 }, { "epoch": 0.6074887858800787, "grad_norm": 0.6867050860705599, "learning_rate": 0.00016320599777126552, "loss": 12.3879, "step": 11156 }, { "epoch": 0.6075432398766617, "grad_norm": 0.6148388888394638, "learning_rate": 0.00016319916408558352, "loss": 12.2203, "step": 11157 }, { "epoch": 0.6075976938732447, "grad_norm": 0.6316122300461814, "learning_rate": 0.00016319232990845425, "loss": 12.3192, "step": 11158 }, { "epoch": 0.6076521478698277, "grad_norm": 0.5381826224839812, "learning_rate": 0.00016318549523993084, "loss": 12.1295, "step": 11159 }, { "epoch": 0.6077066018664107, "grad_norm": 0.639597752296276, "learning_rate": 0.00016317866008006639, "loss": 12.3341, "step": 11160 }, { "epoch": 0.6077610558629938, "grad_norm": 0.5850046410800002, "learning_rate": 0.0001631718244289141, "loss": 12.3696, "step": 11161 }, { "epoch": 0.6078155098595768, "grad_norm": 0.5409904898717542, "learning_rate": 0.00016316498828652712, "loss": 12.2983, "step": 11162 }, { "epoch": 0.6078699638561598, "grad_norm": 0.6560920448327437, "learning_rate": 0.00016315815165295855, "loss": 12.3445, "step": 11163 }, { "epoch": 0.6079244178527428, "grad_norm": 0.5881682714143308, "learning_rate": 0.00016315131452826167, "loss": 12.2529, "step": 11164 }, { "epoch": 0.6079788718493258, "grad_norm": 0.6556078342042584, "learning_rate": 0.00016314447691248956, "loss": 12.4407, "step": 11165 }, { "epoch": 0.6080333258459089, "grad_norm": 0.5998152193241213, "learning_rate": 0.00016313763880569537, "loss": 12.1111, "step": 11166 }, { "epoch": 0.6080877798424918, "grad_norm": 0.6040623193833304, "learning_rate": 0.00016313080020793235, "loss": 12.2528, "step": 11167 }, { "epoch": 0.6081422338390748, "grad_norm": 0.565811403827649, "learning_rate": 0.00016312396111925362, "loss": 12.3451, "step": 11168 }, { "epoch": 0.6081966878356578, "grad_norm": 0.6303795224857877, "learning_rate": 0.00016311712153971238, "loss": 12.3896, "step": 11169 }, { "epoch": 0.6082511418322408, "grad_norm": 0.6103204271982208, "learning_rate": 0.00016311028146936184, "loss": 12.3071, "step": 11170 }, { "epoch": 0.6083055958288238, "grad_norm": 0.5907392547073242, "learning_rate": 0.00016310344090825516, "loss": 12.3801, "step": 11171 }, { "epoch": 0.6083600498254069, "grad_norm": 0.5898616925852171, "learning_rate": 0.00016309659985644555, "loss": 12.2173, "step": 11172 }, { "epoch": 0.6084145038219899, "grad_norm": 0.6721267658389909, "learning_rate": 0.00016308975831398617, "loss": 12.4195, "step": 11173 }, { "epoch": 0.6084689578185729, "grad_norm": 0.5737286379187838, "learning_rate": 0.00016308291628093025, "loss": 12.2979, "step": 11174 }, { "epoch": 0.6085234118151559, "grad_norm": 0.5948908660886684, "learning_rate": 0.00016307607375733103, "loss": 12.4614, "step": 11175 }, { "epoch": 0.6085778658117389, "grad_norm": 0.5847183241982178, "learning_rate": 0.00016306923074324166, "loss": 12.1829, "step": 11176 }, { "epoch": 0.6086323198083219, "grad_norm": 0.5348038205117897, "learning_rate": 0.00016306238723871536, "loss": 12.2261, "step": 11177 }, { "epoch": 0.608686773804905, "grad_norm": 0.7186846714099959, "learning_rate": 0.00016305554324380536, "loss": 12.3023, "step": 11178 }, { "epoch": 0.608741227801488, "grad_norm": 0.656779753977201, "learning_rate": 0.00016304869875856493, "loss": 12.4631, "step": 11179 }, { "epoch": 0.608795681798071, "grad_norm": 0.6612372166326377, "learning_rate": 0.0001630418537830472, "loss": 12.3458, "step": 11180 }, { "epoch": 0.608850135794654, "grad_norm": 0.6116698754329289, "learning_rate": 0.00016303500831730546, "loss": 12.2474, "step": 11181 }, { "epoch": 0.608904589791237, "grad_norm": 0.6175693550944324, "learning_rate": 0.00016302816236139292, "loss": 12.3383, "step": 11182 }, { "epoch": 0.60895904378782, "grad_norm": 0.6838026604965294, "learning_rate": 0.0001630213159153628, "loss": 12.3036, "step": 11183 }, { "epoch": 0.6090134977844031, "grad_norm": 0.6209752444596405, "learning_rate": 0.0001630144689792684, "loss": 12.2806, "step": 11184 }, { "epoch": 0.609067951780986, "grad_norm": 0.6992091942059789, "learning_rate": 0.0001630076215531629, "loss": 12.3381, "step": 11185 }, { "epoch": 0.609122405777569, "grad_norm": 0.6464572432173553, "learning_rate": 0.0001630007736370996, "loss": 12.3463, "step": 11186 }, { "epoch": 0.609176859774152, "grad_norm": 0.6038931960005121, "learning_rate": 0.00016299392523113165, "loss": 12.2283, "step": 11187 }, { "epoch": 0.609231313770735, "grad_norm": 0.6692542463517684, "learning_rate": 0.00016298707633531244, "loss": 12.1545, "step": 11188 }, { "epoch": 0.609285767767318, "grad_norm": 0.5934411500079831, "learning_rate": 0.0001629802269496951, "loss": 12.279, "step": 11189 }, { "epoch": 0.6093402217639011, "grad_norm": 0.6520832855912216, "learning_rate": 0.000162973377074333, "loss": 12.4605, "step": 11190 }, { "epoch": 0.6093946757604841, "grad_norm": 0.6356145983590724, "learning_rate": 0.00016296652670927934, "loss": 12.3357, "step": 11191 }, { "epoch": 0.6094491297570671, "grad_norm": 0.6684675532985174, "learning_rate": 0.00016295967585458742, "loss": 12.3771, "step": 11192 }, { "epoch": 0.6095035837536501, "grad_norm": 0.5676216790776536, "learning_rate": 0.00016295282451031048, "loss": 12.313, "step": 11193 }, { "epoch": 0.6095580377502331, "grad_norm": 0.6318426817865253, "learning_rate": 0.00016294597267650185, "loss": 12.2556, "step": 11194 }, { "epoch": 0.6096124917468161, "grad_norm": 0.546346474512078, "learning_rate": 0.00016293912035321477, "loss": 12.3492, "step": 11195 }, { "epoch": 0.6096669457433992, "grad_norm": 0.5597620710316814, "learning_rate": 0.00016293226754050252, "loss": 12.2942, "step": 11196 }, { "epoch": 0.6097213997399822, "grad_norm": 0.6019576331614028, "learning_rate": 0.00016292541423841843, "loss": 12.4731, "step": 11197 }, { "epoch": 0.6097758537365652, "grad_norm": 0.6087530645396827, "learning_rate": 0.00016291856044701574, "loss": 12.414, "step": 11198 }, { "epoch": 0.6098303077331482, "grad_norm": 0.5511451580189743, "learning_rate": 0.0001629117061663478, "loss": 12.3977, "step": 11199 }, { "epoch": 0.6098847617297312, "grad_norm": 0.5673407108230473, "learning_rate": 0.00016290485139646788, "loss": 12.3911, "step": 11200 }, { "epoch": 0.6099392157263143, "grad_norm": 0.568847143885813, "learning_rate": 0.00016289799613742925, "loss": 12.2846, "step": 11201 }, { "epoch": 0.6099936697228973, "grad_norm": 0.7944651261831513, "learning_rate": 0.0001628911403892853, "loss": 12.4732, "step": 11202 }, { "epoch": 0.6100481237194803, "grad_norm": 0.646189949260018, "learning_rate": 0.00016288428415208925, "loss": 12.3322, "step": 11203 }, { "epoch": 0.6101025777160632, "grad_norm": 0.5732514537043685, "learning_rate": 0.0001628774274258945, "loss": 12.3178, "step": 11204 }, { "epoch": 0.6101570317126462, "grad_norm": 0.654862812790952, "learning_rate": 0.00016287057021075428, "loss": 12.3292, "step": 11205 }, { "epoch": 0.6102114857092292, "grad_norm": 0.6568052904002022, "learning_rate": 0.00016286371250672201, "loss": 12.4505, "step": 11206 }, { "epoch": 0.6102659397058123, "grad_norm": 0.6089929626588377, "learning_rate": 0.00016285685431385096, "loss": 12.2601, "step": 11207 }, { "epoch": 0.6103203937023953, "grad_norm": 0.6441744367395216, "learning_rate": 0.00016284999563219446, "loss": 12.2596, "step": 11208 }, { "epoch": 0.6103748476989783, "grad_norm": 0.5886980647496863, "learning_rate": 0.00016284313646180586, "loss": 12.4004, "step": 11209 }, { "epoch": 0.6104293016955613, "grad_norm": 0.6567455919150536, "learning_rate": 0.0001628362768027385, "loss": 12.4423, "step": 11210 }, { "epoch": 0.6104837556921443, "grad_norm": 0.6434126784891697, "learning_rate": 0.0001628294166550457, "loss": 12.3281, "step": 11211 }, { "epoch": 0.6105382096887273, "grad_norm": 0.5550889914663049, "learning_rate": 0.00016282255601878082, "loss": 12.2448, "step": 11212 }, { "epoch": 0.6105926636853104, "grad_norm": 0.658643938541344, "learning_rate": 0.0001628156948939972, "loss": 12.1808, "step": 11213 }, { "epoch": 0.6106471176818934, "grad_norm": 0.5953183504399878, "learning_rate": 0.00016280883328074824, "loss": 12.3536, "step": 11214 }, { "epoch": 0.6107015716784764, "grad_norm": 0.6197836107074511, "learning_rate": 0.00016280197117908723, "loss": 12.3475, "step": 11215 }, { "epoch": 0.6107560256750594, "grad_norm": 0.6536735721454394, "learning_rate": 0.00016279510858906756, "loss": 12.3812, "step": 11216 }, { "epoch": 0.6108104796716424, "grad_norm": 0.5736370991155088, "learning_rate": 0.00016278824551074262, "loss": 12.3155, "step": 11217 }, { "epoch": 0.6108649336682254, "grad_norm": 0.650879421311335, "learning_rate": 0.0001627813819441657, "loss": 12.3652, "step": 11218 }, { "epoch": 0.6109193876648085, "grad_norm": 0.640251354561717, "learning_rate": 0.00016277451788939028, "loss": 12.1254, "step": 11219 }, { "epoch": 0.6109738416613915, "grad_norm": 0.5670359796552973, "learning_rate": 0.00016276765334646967, "loss": 12.3489, "step": 11220 }, { "epoch": 0.6110282956579745, "grad_norm": 0.6384343722609144, "learning_rate": 0.00016276078831545726, "loss": 12.2302, "step": 11221 }, { "epoch": 0.6110827496545574, "grad_norm": 0.6596286383025379, "learning_rate": 0.00016275392279640642, "loss": 12.3865, "step": 11222 }, { "epoch": 0.6111372036511404, "grad_norm": 0.5891279826921046, "learning_rate": 0.00016274705678937057, "loss": 12.2326, "step": 11223 }, { "epoch": 0.6111916576477234, "grad_norm": 0.6565771235725542, "learning_rate": 0.00016274019029440307, "loss": 12.3797, "step": 11224 }, { "epoch": 0.6112461116443065, "grad_norm": 0.5606421552391951, "learning_rate": 0.00016273332331155732, "loss": 12.2621, "step": 11225 }, { "epoch": 0.6113005656408895, "grad_norm": 0.5888887080072942, "learning_rate": 0.00016272645584088674, "loss": 12.3646, "step": 11226 }, { "epoch": 0.6113550196374725, "grad_norm": 0.5660003854592005, "learning_rate": 0.00016271958788244474, "loss": 12.3718, "step": 11227 }, { "epoch": 0.6114094736340555, "grad_norm": 0.5565818029336428, "learning_rate": 0.00016271271943628466, "loss": 12.3108, "step": 11228 }, { "epoch": 0.6114639276306385, "grad_norm": 0.6250573083091369, "learning_rate": 0.00016270585050245999, "loss": 12.414, "step": 11229 }, { "epoch": 0.6115183816272216, "grad_norm": 0.5887684866794202, "learning_rate": 0.00016269898108102414, "loss": 12.4102, "step": 11230 }, { "epoch": 0.6115728356238046, "grad_norm": 0.6040457465405051, "learning_rate": 0.00016269211117203044, "loss": 12.4095, "step": 11231 }, { "epoch": 0.6116272896203876, "grad_norm": 0.6114239519382482, "learning_rate": 0.00016268524077553238, "loss": 12.2274, "step": 11232 }, { "epoch": 0.6116817436169706, "grad_norm": 0.5965745055703814, "learning_rate": 0.00016267836989158338, "loss": 12.2036, "step": 11233 }, { "epoch": 0.6117361976135536, "grad_norm": 0.6143799913864648, "learning_rate": 0.0001626714985202369, "loss": 12.4022, "step": 11234 }, { "epoch": 0.6117906516101366, "grad_norm": 0.6054444200965224, "learning_rate": 0.0001626646266615463, "loss": 12.2948, "step": 11235 }, { "epoch": 0.6118451056067197, "grad_norm": 0.5873370076509535, "learning_rate": 0.00016265775431556506, "loss": 12.3566, "step": 11236 }, { "epoch": 0.6118995596033027, "grad_norm": 0.6543434983368583, "learning_rate": 0.0001626508814823466, "loss": 12.2912, "step": 11237 }, { "epoch": 0.6119540135998857, "grad_norm": 0.622245962456289, "learning_rate": 0.0001626440081619444, "loss": 12.3253, "step": 11238 }, { "epoch": 0.6120084675964687, "grad_norm": 0.5760824372813074, "learning_rate": 0.00016263713435441188, "loss": 12.3145, "step": 11239 }, { "epoch": 0.6120629215930516, "grad_norm": 0.6597670651926038, "learning_rate": 0.00016263026005980253, "loss": 12.3367, "step": 11240 }, { "epoch": 0.6121173755896346, "grad_norm": 0.67169983719772, "learning_rate": 0.00016262338527816972, "loss": 12.0397, "step": 11241 }, { "epoch": 0.6121718295862177, "grad_norm": 0.6221931850437608, "learning_rate": 0.00016261651000956703, "loss": 12.2584, "step": 11242 }, { "epoch": 0.6122262835828007, "grad_norm": 0.6342830763189989, "learning_rate": 0.0001626096342540478, "loss": 12.3914, "step": 11243 }, { "epoch": 0.6122807375793837, "grad_norm": 0.567870951485226, "learning_rate": 0.0001626027580116656, "loss": 12.2923, "step": 11244 }, { "epoch": 0.6123351915759667, "grad_norm": 0.5671950134759478, "learning_rate": 0.0001625958812824738, "loss": 12.3838, "step": 11245 }, { "epoch": 0.6123896455725497, "grad_norm": 0.6211012286514693, "learning_rate": 0.00016258900406652596, "loss": 12.4002, "step": 11246 }, { "epoch": 0.6124440995691327, "grad_norm": 0.619630309091858, "learning_rate": 0.00016258212636387556, "loss": 12.3923, "step": 11247 }, { "epoch": 0.6124985535657158, "grad_norm": 0.6709943689223554, "learning_rate": 0.00016257524817457598, "loss": 12.1034, "step": 11248 }, { "epoch": 0.6125530075622988, "grad_norm": 0.5728013482326856, "learning_rate": 0.00016256836949868082, "loss": 12.2234, "step": 11249 }, { "epoch": 0.6126074615588818, "grad_norm": 0.5378034627203535, "learning_rate": 0.00016256149033624354, "loss": 12.3361, "step": 11250 }, { "epoch": 0.6126619155554648, "grad_norm": 0.5506895229670732, "learning_rate": 0.0001625546106873176, "loss": 12.1849, "step": 11251 }, { "epoch": 0.6127163695520478, "grad_norm": 0.6064870841906332, "learning_rate": 0.00016254773055195652, "loss": 12.3334, "step": 11252 }, { "epoch": 0.6127708235486308, "grad_norm": 0.6667096903245954, "learning_rate": 0.0001625408499302138, "loss": 12.4349, "step": 11253 }, { "epoch": 0.6128252775452139, "grad_norm": 0.6123349006803258, "learning_rate": 0.00016253396882214292, "loss": 12.415, "step": 11254 }, { "epoch": 0.6128797315417969, "grad_norm": 0.5828800827037145, "learning_rate": 0.00016252708722779742, "loss": 12.2713, "step": 11255 }, { "epoch": 0.6129341855383799, "grad_norm": 0.5810626939893756, "learning_rate": 0.00016252020514723084, "loss": 12.2411, "step": 11256 }, { "epoch": 0.6129886395349629, "grad_norm": 0.610484695656065, "learning_rate": 0.00016251332258049664, "loss": 12.3313, "step": 11257 }, { "epoch": 0.6130430935315458, "grad_norm": 0.5960070240782448, "learning_rate": 0.00016250643952764833, "loss": 12.379, "step": 11258 }, { "epoch": 0.6130975475281288, "grad_norm": 0.6448646995063299, "learning_rate": 0.00016249955598873948, "loss": 12.3597, "step": 11259 }, { "epoch": 0.6131520015247119, "grad_norm": 0.5791912269801887, "learning_rate": 0.00016249267196382362, "loss": 12.2723, "step": 11260 }, { "epoch": 0.6132064555212949, "grad_norm": 0.5642981121650137, "learning_rate": 0.00016248578745295427, "loss": 12.3564, "step": 11261 }, { "epoch": 0.6132609095178779, "grad_norm": 0.6165409779113825, "learning_rate": 0.00016247890245618492, "loss": 12.2522, "step": 11262 }, { "epoch": 0.6133153635144609, "grad_norm": 0.627470761262432, "learning_rate": 0.00016247201697356918, "loss": 12.3381, "step": 11263 }, { "epoch": 0.6133698175110439, "grad_norm": 0.6276131776598461, "learning_rate": 0.00016246513100516053, "loss": 12.2903, "step": 11264 }, { "epoch": 0.613424271507627, "grad_norm": 0.6311633542362459, "learning_rate": 0.00016245824455101256, "loss": 12.385, "step": 11265 }, { "epoch": 0.61347872550421, "grad_norm": 0.6036525845880586, "learning_rate": 0.00016245135761117882, "loss": 12.3985, "step": 11266 }, { "epoch": 0.613533179500793, "grad_norm": 0.6372653976777525, "learning_rate": 0.00016244447018571285, "loss": 12.3874, "step": 11267 }, { "epoch": 0.613587633497376, "grad_norm": 0.6120876586946324, "learning_rate": 0.00016243758227466816, "loss": 12.3954, "step": 11268 }, { "epoch": 0.613642087493959, "grad_norm": 0.5670362688938073, "learning_rate": 0.00016243069387809843, "loss": 12.2247, "step": 11269 }, { "epoch": 0.613696541490542, "grad_norm": 0.6061437844694163, "learning_rate": 0.00016242380499605712, "loss": 12.3352, "step": 11270 }, { "epoch": 0.6137509954871251, "grad_norm": 0.6213968616718148, "learning_rate": 0.00016241691562859782, "loss": 12.2934, "step": 11271 }, { "epoch": 0.6138054494837081, "grad_norm": 0.572983281791859, "learning_rate": 0.00016241002577577413, "loss": 12.296, "step": 11272 }, { "epoch": 0.6138599034802911, "grad_norm": 0.565001252538547, "learning_rate": 0.0001624031354376396, "loss": 12.3484, "step": 11273 }, { "epoch": 0.6139143574768741, "grad_norm": 0.6102422361467659, "learning_rate": 0.00016239624461424784, "loss": 12.5153, "step": 11274 }, { "epoch": 0.613968811473457, "grad_norm": 0.791363759356964, "learning_rate": 0.00016238935330565243, "loss": 12.4972, "step": 11275 }, { "epoch": 0.61402326547004, "grad_norm": 0.5937446958082347, "learning_rate": 0.00016238246151190696, "loss": 12.1792, "step": 11276 }, { "epoch": 0.6140777194666232, "grad_norm": 0.6128242185531629, "learning_rate": 0.00016237556923306496, "loss": 12.4127, "step": 11277 }, { "epoch": 0.6141321734632061, "grad_norm": 0.5133072704141306, "learning_rate": 0.00016236867646918007, "loss": 12.2986, "step": 11278 }, { "epoch": 0.6141866274597891, "grad_norm": 0.616740205009282, "learning_rate": 0.00016236178322030594, "loss": 12.4195, "step": 11279 }, { "epoch": 0.6142410814563721, "grad_norm": 0.6902749010715484, "learning_rate": 0.0001623548894864961, "loss": 12.2448, "step": 11280 }, { "epoch": 0.6142955354529551, "grad_norm": 0.5800100682633578, "learning_rate": 0.00016234799526780418, "loss": 12.3209, "step": 11281 }, { "epoch": 0.6143499894495381, "grad_norm": 0.5863122766710325, "learning_rate": 0.0001623411005642838, "loss": 12.2777, "step": 11282 }, { "epoch": 0.6144044434461212, "grad_norm": 0.5681102531243815, "learning_rate": 0.00016233420537598855, "loss": 12.4094, "step": 11283 }, { "epoch": 0.6144588974427042, "grad_norm": 0.5431159450349899, "learning_rate": 0.0001623273097029721, "loss": 12.3014, "step": 11284 }, { "epoch": 0.6145133514392872, "grad_norm": 0.5311266917053576, "learning_rate": 0.00016232041354528802, "loss": 12.2448, "step": 11285 }, { "epoch": 0.6145678054358702, "grad_norm": 0.610874460094281, "learning_rate": 0.00016231351690298995, "loss": 12.4282, "step": 11286 }, { "epoch": 0.6146222594324532, "grad_norm": 0.6480890556816988, "learning_rate": 0.0001623066197761315, "loss": 12.4236, "step": 11287 }, { "epoch": 0.6146767134290362, "grad_norm": 0.6069702570834702, "learning_rate": 0.00016229972216476635, "loss": 12.4945, "step": 11288 }, { "epoch": 0.6147311674256193, "grad_norm": 0.5934019147160708, "learning_rate": 0.00016229282406894811, "loss": 12.3161, "step": 11289 }, { "epoch": 0.6147856214222023, "grad_norm": 0.5672283273861385, "learning_rate": 0.00016228592548873043, "loss": 12.2918, "step": 11290 }, { "epoch": 0.6148400754187853, "grad_norm": 0.7098678134884511, "learning_rate": 0.00016227902642416695, "loss": 12.3796, "step": 11291 }, { "epoch": 0.6148945294153683, "grad_norm": 0.5864296809404334, "learning_rate": 0.0001622721268753113, "loss": 12.2628, "step": 11292 }, { "epoch": 0.6149489834119513, "grad_norm": 0.5438277220093632, "learning_rate": 0.00016226522684221716, "loss": 12.2331, "step": 11293 }, { "epoch": 0.6150034374085342, "grad_norm": 0.6313422982743069, "learning_rate": 0.00016225832632493819, "loss": 12.194, "step": 11294 }, { "epoch": 0.6150578914051174, "grad_norm": 0.5614566345902298, "learning_rate": 0.00016225142532352803, "loss": 12.4039, "step": 11295 }, { "epoch": 0.6151123454017003, "grad_norm": 0.5955111725120733, "learning_rate": 0.0001622445238380403, "loss": 12.3284, "step": 11296 }, { "epoch": 0.6151667993982833, "grad_norm": 0.5981568208880793, "learning_rate": 0.00016223762186852876, "loss": 12.386, "step": 11297 }, { "epoch": 0.6152212533948663, "grad_norm": 0.5779828111185735, "learning_rate": 0.000162230719415047, "loss": 12.3549, "step": 11298 }, { "epoch": 0.6152757073914493, "grad_norm": 0.5988882027190688, "learning_rate": 0.00016222381647764875, "loss": 12.2865, "step": 11299 }, { "epoch": 0.6153301613880324, "grad_norm": 0.5598859235243391, "learning_rate": 0.0001622169130563877, "loss": 12.3623, "step": 11300 }, { "epoch": 0.6153846153846154, "grad_norm": 0.6025884901045108, "learning_rate": 0.00016221000915131746, "loss": 12.3875, "step": 11301 }, { "epoch": 0.6154390693811984, "grad_norm": 0.5788006219062364, "learning_rate": 0.00016220310476249176, "loss": 12.275, "step": 11302 }, { "epoch": 0.6154935233777814, "grad_norm": 0.6248198295930657, "learning_rate": 0.00016219619988996428, "loss": 12.1747, "step": 11303 }, { "epoch": 0.6155479773743644, "grad_norm": 0.6097860819021905, "learning_rate": 0.00016218929453378874, "loss": 12.3079, "step": 11304 }, { "epoch": 0.6156024313709474, "grad_norm": 0.6942998620702879, "learning_rate": 0.0001621823886940188, "loss": 12.4143, "step": 11305 }, { "epoch": 0.6156568853675305, "grad_norm": 0.6263141335439183, "learning_rate": 0.0001621754823707082, "loss": 12.361, "step": 11306 }, { "epoch": 0.6157113393641135, "grad_norm": 0.6863996262057079, "learning_rate": 0.00016216857556391063, "loss": 12.3674, "step": 11307 }, { "epoch": 0.6157657933606965, "grad_norm": 0.616314884930882, "learning_rate": 0.00016216166827367974, "loss": 12.2604, "step": 11308 }, { "epoch": 0.6158202473572795, "grad_norm": 0.5983392937109377, "learning_rate": 0.00016215476050006932, "loss": 12.2909, "step": 11309 }, { "epoch": 0.6158747013538625, "grad_norm": 0.6468327132530235, "learning_rate": 0.0001621478522431331, "loss": 12.3449, "step": 11310 }, { "epoch": 0.6159291553504455, "grad_norm": 0.6584847095297925, "learning_rate": 0.00016214094350292468, "loss": 12.4412, "step": 11311 }, { "epoch": 0.6159836093470286, "grad_norm": 0.5925212894375936, "learning_rate": 0.00016213403427949792, "loss": 12.3087, "step": 11312 }, { "epoch": 0.6160380633436116, "grad_norm": 0.6141912148999293, "learning_rate": 0.00016212712457290646, "loss": 12.2135, "step": 11313 }, { "epoch": 0.6160925173401945, "grad_norm": 0.5730557449185941, "learning_rate": 0.0001621202143832041, "loss": 12.2448, "step": 11314 }, { "epoch": 0.6161469713367775, "grad_norm": 0.6319410511618868, "learning_rate": 0.0001621133037104445, "loss": 12.2875, "step": 11315 }, { "epoch": 0.6162014253333605, "grad_norm": 0.566548677650518, "learning_rate": 0.00016210639255468144, "loss": 12.3506, "step": 11316 }, { "epoch": 0.6162558793299435, "grad_norm": 0.571999101358985, "learning_rate": 0.00016209948091596864, "loss": 12.3217, "step": 11317 }, { "epoch": 0.6163103333265266, "grad_norm": 0.5840680525297499, "learning_rate": 0.00016209256879435988, "loss": 12.2879, "step": 11318 }, { "epoch": 0.6163647873231096, "grad_norm": 0.6271115358651692, "learning_rate": 0.0001620856561899089, "loss": 12.3066, "step": 11319 }, { "epoch": 0.6164192413196926, "grad_norm": 0.5674867522352245, "learning_rate": 0.00016207874310266945, "loss": 12.404, "step": 11320 }, { "epoch": 0.6164736953162756, "grad_norm": 0.5963678282919692, "learning_rate": 0.00016207182953269523, "loss": 12.3059, "step": 11321 }, { "epoch": 0.6165281493128586, "grad_norm": 0.6584712900680062, "learning_rate": 0.00016206491548004012, "loss": 12.1358, "step": 11322 }, { "epoch": 0.6165826033094416, "grad_norm": 0.6232001423449859, "learning_rate": 0.00016205800094475778, "loss": 12.2733, "step": 11323 }, { "epoch": 0.6166370573060247, "grad_norm": 0.5961936391155271, "learning_rate": 0.00016205108592690204, "loss": 12.267, "step": 11324 }, { "epoch": 0.6166915113026077, "grad_norm": 0.6233694372694509, "learning_rate": 0.00016204417042652665, "loss": 12.3168, "step": 11325 }, { "epoch": 0.6167459652991907, "grad_norm": 0.5967553232670233, "learning_rate": 0.00016203725444368538, "loss": 12.2894, "step": 11326 }, { "epoch": 0.6168004192957737, "grad_norm": 0.6328880831888363, "learning_rate": 0.000162030337978432, "loss": 12.4589, "step": 11327 }, { "epoch": 0.6168548732923567, "grad_norm": 0.6599660413066143, "learning_rate": 0.00016202342103082033, "loss": 12.133, "step": 11328 }, { "epoch": 0.6169093272889397, "grad_norm": 0.6050448734616868, "learning_rate": 0.00016201650360090413, "loss": 12.3368, "step": 11329 }, { "epoch": 0.6169637812855228, "grad_norm": 0.616854598836734, "learning_rate": 0.0001620095856887372, "loss": 12.2232, "step": 11330 }, { "epoch": 0.6170182352821058, "grad_norm": 0.6680516335137024, "learning_rate": 0.00016200266729437333, "loss": 12.3242, "step": 11331 }, { "epoch": 0.6170726892786887, "grad_norm": 0.5497639299706962, "learning_rate": 0.00016199574841786635, "loss": 12.3369, "step": 11332 }, { "epoch": 0.6171271432752717, "grad_norm": 0.6708234481169313, "learning_rate": 0.00016198882905926997, "loss": 12.3544, "step": 11333 }, { "epoch": 0.6171815972718547, "grad_norm": 0.6315538192135355, "learning_rate": 0.0001619819092186381, "loss": 12.1928, "step": 11334 }, { "epoch": 0.6172360512684378, "grad_norm": 0.6490092063174805, "learning_rate": 0.00016197498889602448, "loss": 12.1877, "step": 11335 }, { "epoch": 0.6172905052650208, "grad_norm": 0.5577690708953591, "learning_rate": 0.00016196806809148302, "loss": 12.4101, "step": 11336 }, { "epoch": 0.6173449592616038, "grad_norm": 0.6033656557328602, "learning_rate": 0.00016196114680506741, "loss": 12.3735, "step": 11337 }, { "epoch": 0.6173994132581868, "grad_norm": 0.6226698585246981, "learning_rate": 0.00016195422503683155, "loss": 12.3514, "step": 11338 }, { "epoch": 0.6174538672547698, "grad_norm": 0.6146688034799889, "learning_rate": 0.00016194730278682923, "loss": 12.3374, "step": 11339 }, { "epoch": 0.6175083212513528, "grad_norm": 0.6202871794806031, "learning_rate": 0.00016194038005511432, "loss": 12.3491, "step": 11340 }, { "epoch": 0.6175627752479359, "grad_norm": 0.654543386433988, "learning_rate": 0.0001619334568417406, "loss": 12.4703, "step": 11341 }, { "epoch": 0.6176172292445189, "grad_norm": 0.6079034969424639, "learning_rate": 0.00016192653314676196, "loss": 12.2333, "step": 11342 }, { "epoch": 0.6176716832411019, "grad_norm": 0.666143872758608, "learning_rate": 0.0001619196089702322, "loss": 12.3581, "step": 11343 }, { "epoch": 0.6177261372376849, "grad_norm": 0.5102477274485845, "learning_rate": 0.00016191268431220519, "loss": 12.3265, "step": 11344 }, { "epoch": 0.6177805912342679, "grad_norm": 0.5784737794499895, "learning_rate": 0.00016190575917273474, "loss": 12.3641, "step": 11345 }, { "epoch": 0.6178350452308509, "grad_norm": 0.684633075833952, "learning_rate": 0.00016189883355187477, "loss": 12.3834, "step": 11346 }, { "epoch": 0.617889499227434, "grad_norm": 0.6336297914409275, "learning_rate": 0.00016189190744967906, "loss": 12.3893, "step": 11347 }, { "epoch": 0.617943953224017, "grad_norm": 0.6382634194907015, "learning_rate": 0.00016188498086620146, "loss": 12.4647, "step": 11348 }, { "epoch": 0.6179984072206, "grad_norm": 0.6638108620704917, "learning_rate": 0.00016187805380149596, "loss": 12.4085, "step": 11349 }, { "epoch": 0.618052861217183, "grad_norm": 0.5642107833644697, "learning_rate": 0.00016187112625561624, "loss": 12.2775, "step": 11350 }, { "epoch": 0.6181073152137659, "grad_norm": 0.618448690303876, "learning_rate": 0.00016186419822861634, "loss": 12.323, "step": 11351 }, { "epoch": 0.6181617692103489, "grad_norm": 0.6144504495899537, "learning_rate": 0.00016185726972055002, "loss": 12.2607, "step": 11352 }, { "epoch": 0.618216223206932, "grad_norm": 0.5653022098286621, "learning_rate": 0.0001618503407314712, "loss": 12.2345, "step": 11353 }, { "epoch": 0.618270677203515, "grad_norm": 0.5923636217015734, "learning_rate": 0.00016184341126143376, "loss": 12.3753, "step": 11354 }, { "epoch": 0.618325131200098, "grad_norm": 0.6090629767174827, "learning_rate": 0.0001618364813104916, "loss": 12.3978, "step": 11355 }, { "epoch": 0.618379585196681, "grad_norm": 0.760239146374173, "learning_rate": 0.00016182955087869859, "loss": 12.3513, "step": 11356 }, { "epoch": 0.618434039193264, "grad_norm": 0.5499640804080805, "learning_rate": 0.0001618226199661086, "loss": 12.2744, "step": 11357 }, { "epoch": 0.618488493189847, "grad_norm": 0.5413087096896169, "learning_rate": 0.0001618156885727756, "loss": 12.2719, "step": 11358 }, { "epoch": 0.6185429471864301, "grad_norm": 0.6448601280327934, "learning_rate": 0.0001618087566987534, "loss": 12.3079, "step": 11359 }, { "epoch": 0.6185974011830131, "grad_norm": 0.6204740814101944, "learning_rate": 0.00016180182434409593, "loss": 12.3334, "step": 11360 }, { "epoch": 0.6186518551795961, "grad_norm": 0.6521050686806655, "learning_rate": 0.00016179489150885715, "loss": 12.1476, "step": 11361 }, { "epoch": 0.6187063091761791, "grad_norm": 0.5789250892511003, "learning_rate": 0.00016178795819309086, "loss": 12.3211, "step": 11362 }, { "epoch": 0.6187607631727621, "grad_norm": 0.6427843582992568, "learning_rate": 0.00016178102439685113, "loss": 12.4807, "step": 11363 }, { "epoch": 0.6188152171693452, "grad_norm": 0.7076880164255421, "learning_rate": 0.00016177409012019175, "loss": 12.3935, "step": 11364 }, { "epoch": 0.6188696711659282, "grad_norm": 0.5605514128844377, "learning_rate": 0.0001617671553631667, "loss": 12.2769, "step": 11365 }, { "epoch": 0.6189241251625112, "grad_norm": 0.6799978308680839, "learning_rate": 0.0001617602201258299, "loss": 12.2532, "step": 11366 }, { "epoch": 0.6189785791590942, "grad_norm": 0.7108657629179751, "learning_rate": 0.00016175328440823524, "loss": 12.2893, "step": 11367 }, { "epoch": 0.6190330331556771, "grad_norm": 0.6701568668267008, "learning_rate": 0.00016174634821043666, "loss": 12.331, "step": 11368 }, { "epoch": 0.6190874871522601, "grad_norm": 0.6167861448581061, "learning_rate": 0.00016173941153248817, "loss": 12.1841, "step": 11369 }, { "epoch": 0.6191419411488432, "grad_norm": 0.5542796556543904, "learning_rate": 0.00016173247437444366, "loss": 12.2144, "step": 11370 }, { "epoch": 0.6191963951454262, "grad_norm": 0.5561406263549451, "learning_rate": 0.00016172553673635706, "loss": 12.3912, "step": 11371 }, { "epoch": 0.6192508491420092, "grad_norm": 0.6768993477226342, "learning_rate": 0.00016171859861828237, "loss": 12.3638, "step": 11372 }, { "epoch": 0.6193053031385922, "grad_norm": 0.5643467859039376, "learning_rate": 0.00016171166002027344, "loss": 12.2933, "step": 11373 }, { "epoch": 0.6193597571351752, "grad_norm": 0.6870602034201745, "learning_rate": 0.00016170472094238436, "loss": 12.4109, "step": 11374 }, { "epoch": 0.6194142111317582, "grad_norm": 0.6432492313425489, "learning_rate": 0.00016169778138466897, "loss": 12.3329, "step": 11375 }, { "epoch": 0.6194686651283413, "grad_norm": 0.5871531262125809, "learning_rate": 0.00016169084134718133, "loss": 12.3689, "step": 11376 }, { "epoch": 0.6195231191249243, "grad_norm": 0.581973602589214, "learning_rate": 0.00016168390082997534, "loss": 12.3192, "step": 11377 }, { "epoch": 0.6195775731215073, "grad_norm": 0.7273779095746546, "learning_rate": 0.000161676959833105, "loss": 12.4788, "step": 11378 }, { "epoch": 0.6196320271180903, "grad_norm": 0.6365631002352293, "learning_rate": 0.0001616700183566243, "loss": 12.4043, "step": 11379 }, { "epoch": 0.6196864811146733, "grad_norm": 0.5885970626918695, "learning_rate": 0.00016166307640058712, "loss": 12.3859, "step": 11380 }, { "epoch": 0.6197409351112563, "grad_norm": 0.6437308372453692, "learning_rate": 0.0001616561339650476, "loss": 12.3077, "step": 11381 }, { "epoch": 0.6197953891078394, "grad_norm": 0.7012567621229076, "learning_rate": 0.00016164919105005957, "loss": 12.2332, "step": 11382 }, { "epoch": 0.6198498431044224, "grad_norm": 0.5809889464584606, "learning_rate": 0.00016164224765567714, "loss": 12.2526, "step": 11383 }, { "epoch": 0.6199042971010054, "grad_norm": 0.6078813911317118, "learning_rate": 0.00016163530378195424, "loss": 12.47, "step": 11384 }, { "epoch": 0.6199587510975884, "grad_norm": 0.5687762963315507, "learning_rate": 0.0001616283594289449, "loss": 12.4204, "step": 11385 }, { "epoch": 0.6200132050941713, "grad_norm": 0.6699596697271144, "learning_rate": 0.00016162141459670308, "loss": 12.3298, "step": 11386 }, { "epoch": 0.6200676590907543, "grad_norm": 0.688363027506524, "learning_rate": 0.00016161446928528284, "loss": 12.3479, "step": 11387 }, { "epoch": 0.6201221130873374, "grad_norm": 0.5737921994052604, "learning_rate": 0.00016160752349473812, "loss": 12.3075, "step": 11388 }, { "epoch": 0.6201765670839204, "grad_norm": 0.5286850608693549, "learning_rate": 0.00016160057722512295, "loss": 12.1068, "step": 11389 }, { "epoch": 0.6202310210805034, "grad_norm": 0.5964030221046523, "learning_rate": 0.00016159363047649138, "loss": 12.3793, "step": 11390 }, { "epoch": 0.6202854750770864, "grad_norm": 0.6197530079107472, "learning_rate": 0.00016158668324889742, "loss": 12.4887, "step": 11391 }, { "epoch": 0.6203399290736694, "grad_norm": 0.5881884732597834, "learning_rate": 0.0001615797355423951, "loss": 12.336, "step": 11392 }, { "epoch": 0.6203943830702524, "grad_norm": 0.6180850844308952, "learning_rate": 0.0001615727873570384, "loss": 12.3021, "step": 11393 }, { "epoch": 0.6204488370668355, "grad_norm": 0.5862481019080754, "learning_rate": 0.00016156583869288138, "loss": 12.4791, "step": 11394 }, { "epoch": 0.6205032910634185, "grad_norm": 0.5721204858048597, "learning_rate": 0.0001615588895499781, "loss": 12.3929, "step": 11395 }, { "epoch": 0.6205577450600015, "grad_norm": 0.6527465480459093, "learning_rate": 0.00016155193992838253, "loss": 12.4634, "step": 11396 }, { "epoch": 0.6206121990565845, "grad_norm": 0.5626923759738245, "learning_rate": 0.0001615449898281488, "loss": 12.2249, "step": 11397 }, { "epoch": 0.6206666530531675, "grad_norm": 0.5240694474890504, "learning_rate": 0.00016153803924933086, "loss": 12.268, "step": 11398 }, { "epoch": 0.6207211070497506, "grad_norm": 0.6662649597332017, "learning_rate": 0.00016153108819198285, "loss": 12.3597, "step": 11399 }, { "epoch": 0.6207755610463336, "grad_norm": 0.6755992426640863, "learning_rate": 0.00016152413665615874, "loss": 12.333, "step": 11400 }, { "epoch": 0.6208300150429166, "grad_norm": 0.5522321110889248, "learning_rate": 0.00016151718464191265, "loss": 12.2361, "step": 11401 }, { "epoch": 0.6208844690394996, "grad_norm": 0.6126956189989169, "learning_rate": 0.0001615102321492986, "loss": 12.4911, "step": 11402 }, { "epoch": 0.6209389230360826, "grad_norm": 0.6273283474136503, "learning_rate": 0.0001615032791783707, "loss": 12.4404, "step": 11403 }, { "epoch": 0.6209933770326655, "grad_norm": 0.5857263956973275, "learning_rate": 0.00016149632572918295, "loss": 12.3055, "step": 11404 }, { "epoch": 0.6210478310292487, "grad_norm": 0.6970385254814665, "learning_rate": 0.00016148937180178948, "loss": 12.4064, "step": 11405 }, { "epoch": 0.6211022850258316, "grad_norm": 0.6333024467892219, "learning_rate": 0.00016148241739624431, "loss": 12.2791, "step": 11406 }, { "epoch": 0.6211567390224146, "grad_norm": 0.6105156550817371, "learning_rate": 0.0001614754625126016, "loss": 12.2001, "step": 11407 }, { "epoch": 0.6212111930189976, "grad_norm": 0.5839111937695641, "learning_rate": 0.00016146850715091537, "loss": 12.3525, "step": 11408 }, { "epoch": 0.6212656470155806, "grad_norm": 0.5676464285172782, "learning_rate": 0.00016146155131123972, "loss": 12.2968, "step": 11409 }, { "epoch": 0.6213201010121636, "grad_norm": 0.6103215468950094, "learning_rate": 0.00016145459499362872, "loss": 12.3521, "step": 11410 }, { "epoch": 0.6213745550087467, "grad_norm": 0.5476910263333569, "learning_rate": 0.0001614476381981365, "loss": 12.3984, "step": 11411 }, { "epoch": 0.6214290090053297, "grad_norm": 0.6216564371614643, "learning_rate": 0.00016144068092481715, "loss": 12.4056, "step": 11412 }, { "epoch": 0.6214834630019127, "grad_norm": 0.5650350514742664, "learning_rate": 0.00016143372317372476, "loss": 12.3514, "step": 11413 }, { "epoch": 0.6215379169984957, "grad_norm": 0.5629443200885589, "learning_rate": 0.0001614267649449134, "loss": 12.2287, "step": 11414 }, { "epoch": 0.6215923709950787, "grad_norm": 0.6506262615831745, "learning_rate": 0.00016141980623843725, "loss": 12.3663, "step": 11415 }, { "epoch": 0.6216468249916617, "grad_norm": 0.5682975024890445, "learning_rate": 0.00016141284705435037, "loss": 12.3697, "step": 11416 }, { "epoch": 0.6217012789882448, "grad_norm": 0.7033118716417303, "learning_rate": 0.0001614058873927069, "loss": 12.4198, "step": 11417 }, { "epoch": 0.6217557329848278, "grad_norm": 0.5432031850109923, "learning_rate": 0.00016139892725356095, "loss": 12.1988, "step": 11418 }, { "epoch": 0.6218101869814108, "grad_norm": 0.6199558312329202, "learning_rate": 0.00016139196663696666, "loss": 12.3618, "step": 11419 }, { "epoch": 0.6218646409779938, "grad_norm": 0.6876465073353317, "learning_rate": 0.0001613850055429781, "loss": 12.3008, "step": 11420 }, { "epoch": 0.6219190949745768, "grad_norm": 0.5512823343568609, "learning_rate": 0.0001613780439716495, "loss": 12.3898, "step": 11421 }, { "epoch": 0.6219735489711598, "grad_norm": 0.5895349045147323, "learning_rate": 0.00016137108192303492, "loss": 12.3176, "step": 11422 }, { "epoch": 0.6220280029677429, "grad_norm": 0.629941616548519, "learning_rate": 0.00016136411939718847, "loss": 12.1728, "step": 11423 }, { "epoch": 0.6220824569643258, "grad_norm": 0.610110213177622, "learning_rate": 0.00016135715639416438, "loss": 12.3168, "step": 11424 }, { "epoch": 0.6221369109609088, "grad_norm": 0.6671360522586627, "learning_rate": 0.00016135019291401673, "loss": 12.3129, "step": 11425 }, { "epoch": 0.6221913649574918, "grad_norm": 0.630821330701, "learning_rate": 0.00016134322895679972, "loss": 12.3731, "step": 11426 }, { "epoch": 0.6222458189540748, "grad_norm": 0.6827847510004765, "learning_rate": 0.00016133626452256747, "loss": 12.3675, "step": 11427 }, { "epoch": 0.6223002729506578, "grad_norm": 0.638241364269164, "learning_rate": 0.00016132929961137414, "loss": 12.3356, "step": 11428 }, { "epoch": 0.6223547269472409, "grad_norm": 0.7731307743750341, "learning_rate": 0.00016132233422327385, "loss": 12.44, "step": 11429 }, { "epoch": 0.6224091809438239, "grad_norm": 0.6614816238411625, "learning_rate": 0.00016131536835832085, "loss": 12.2131, "step": 11430 }, { "epoch": 0.6224636349404069, "grad_norm": 0.6298317845000152, "learning_rate": 0.00016130840201656924, "loss": 12.3037, "step": 11431 }, { "epoch": 0.6225180889369899, "grad_norm": 0.6875551830205758, "learning_rate": 0.00016130143519807322, "loss": 12.3649, "step": 11432 }, { "epoch": 0.6225725429335729, "grad_norm": 0.6613920426623728, "learning_rate": 0.00016129446790288699, "loss": 12.231, "step": 11433 }, { "epoch": 0.622626996930156, "grad_norm": 0.5661040079677665, "learning_rate": 0.00016128750013106463, "loss": 12.3363, "step": 11434 }, { "epoch": 0.622681450926739, "grad_norm": 0.6117659979479873, "learning_rate": 0.00016128053188266045, "loss": 12.3404, "step": 11435 }, { "epoch": 0.622735904923322, "grad_norm": 0.6470145341604084, "learning_rate": 0.00016127356315772857, "loss": 12.4804, "step": 11436 }, { "epoch": 0.622790358919905, "grad_norm": 0.5893466214077081, "learning_rate": 0.00016126659395632317, "loss": 12.3519, "step": 11437 }, { "epoch": 0.622844812916488, "grad_norm": 0.6130572078926236, "learning_rate": 0.00016125962427849847, "loss": 12.2067, "step": 11438 }, { "epoch": 0.622899266913071, "grad_norm": 0.649299424382659, "learning_rate": 0.00016125265412430867, "loss": 12.3978, "step": 11439 }, { "epoch": 0.6229537209096541, "grad_norm": 0.6643560544717511, "learning_rate": 0.0001612456834938079, "loss": 12.2235, "step": 11440 }, { "epoch": 0.6230081749062371, "grad_norm": 0.6577597286908099, "learning_rate": 0.00016123871238705052, "loss": 12.2824, "step": 11441 }, { "epoch": 0.62306262890282, "grad_norm": 0.6010533868191522, "learning_rate": 0.00016123174080409056, "loss": 12.2624, "step": 11442 }, { "epoch": 0.623117082899403, "grad_norm": 0.5642128036208448, "learning_rate": 0.00016122476874498234, "loss": 12.2395, "step": 11443 }, { "epoch": 0.623171536895986, "grad_norm": 0.820910878340875, "learning_rate": 0.00016121779620978009, "loss": 12.3242, "step": 11444 }, { "epoch": 0.623225990892569, "grad_norm": 0.5726982445133031, "learning_rate": 0.00016121082319853796, "loss": 12.1313, "step": 11445 }, { "epoch": 0.6232804448891521, "grad_norm": 0.684093458108693, "learning_rate": 0.0001612038497113102, "loss": 12.177, "step": 11446 }, { "epoch": 0.6233348988857351, "grad_norm": 0.7989837270669965, "learning_rate": 0.00016119687574815103, "loss": 12.4761, "step": 11447 }, { "epoch": 0.6233893528823181, "grad_norm": 0.6048684246884773, "learning_rate": 0.00016118990130911472, "loss": 12.3019, "step": 11448 }, { "epoch": 0.6234438068789011, "grad_norm": 0.6627434098596703, "learning_rate": 0.00016118292639425545, "loss": 12.3217, "step": 11449 }, { "epoch": 0.6234982608754841, "grad_norm": 0.5933997333423316, "learning_rate": 0.0001611759510036275, "loss": 12.346, "step": 11450 }, { "epoch": 0.6235527148720671, "grad_norm": 0.6311707370727244, "learning_rate": 0.00016116897513728507, "loss": 12.2266, "step": 11451 }, { "epoch": 0.6236071688686502, "grad_norm": 0.5682200494836679, "learning_rate": 0.00016116199879528245, "loss": 12.2795, "step": 11452 }, { "epoch": 0.6236616228652332, "grad_norm": 0.5491493618190088, "learning_rate": 0.0001611550219776739, "loss": 12.3042, "step": 11453 }, { "epoch": 0.6237160768618162, "grad_norm": 0.5664628758150743, "learning_rate": 0.00016114804468451359, "loss": 12.3853, "step": 11454 }, { "epoch": 0.6237705308583992, "grad_norm": 0.5770704099528122, "learning_rate": 0.00016114106691585587, "loss": 12.3271, "step": 11455 }, { "epoch": 0.6238249848549822, "grad_norm": 0.5652321308945724, "learning_rate": 0.00016113408867175495, "loss": 12.2696, "step": 11456 }, { "epoch": 0.6238794388515652, "grad_norm": 0.5560296538187035, "learning_rate": 0.0001611271099522651, "loss": 12.2529, "step": 11457 }, { "epoch": 0.6239338928481483, "grad_norm": 0.530748673775166, "learning_rate": 0.0001611201307574406, "loss": 12.3456, "step": 11458 }, { "epoch": 0.6239883468447313, "grad_norm": 0.5964895425684653, "learning_rate": 0.00016111315108733568, "loss": 12.2588, "step": 11459 }, { "epoch": 0.6240428008413142, "grad_norm": 0.5661140090484537, "learning_rate": 0.0001611061709420047, "loss": 12.3302, "step": 11460 }, { "epoch": 0.6240972548378972, "grad_norm": 0.5239071385889489, "learning_rate": 0.00016109919032150186, "loss": 12.2311, "step": 11461 }, { "epoch": 0.6241517088344802, "grad_norm": 0.6031453262958488, "learning_rate": 0.00016109220922588146, "loss": 12.3783, "step": 11462 }, { "epoch": 0.6242061628310632, "grad_norm": 0.5879324423494298, "learning_rate": 0.00016108522765519783, "loss": 12.2452, "step": 11463 }, { "epoch": 0.6242606168276463, "grad_norm": 0.5270952355855081, "learning_rate": 0.00016107824560950516, "loss": 12.1846, "step": 11464 }, { "epoch": 0.6243150708242293, "grad_norm": 0.5950726560960155, "learning_rate": 0.00016107126308885787, "loss": 12.2707, "step": 11465 }, { "epoch": 0.6243695248208123, "grad_norm": 0.6100586106298427, "learning_rate": 0.00016106428009331016, "loss": 12.3043, "step": 11466 }, { "epoch": 0.6244239788173953, "grad_norm": 0.6366884073275398, "learning_rate": 0.00016105729662291643, "loss": 12.4719, "step": 11467 }, { "epoch": 0.6244784328139783, "grad_norm": 0.5748578837822204, "learning_rate": 0.00016105031267773086, "loss": 12.313, "step": 11468 }, { "epoch": 0.6245328868105614, "grad_norm": 0.6744817511043489, "learning_rate": 0.00016104332825780783, "loss": 12.4107, "step": 11469 }, { "epoch": 0.6245873408071444, "grad_norm": 0.5967398349761409, "learning_rate": 0.00016103634336320165, "loss": 12.3625, "step": 11470 }, { "epoch": 0.6246417948037274, "grad_norm": 0.6167695078489145, "learning_rate": 0.00016102935799396662, "loss": 12.3634, "step": 11471 }, { "epoch": 0.6246962488003104, "grad_norm": 0.5803870896505192, "learning_rate": 0.0001610223721501571, "loss": 12.2731, "step": 11472 }, { "epoch": 0.6247507027968934, "grad_norm": 0.6282871396517364, "learning_rate": 0.00016101538583182735, "loss": 12.4319, "step": 11473 }, { "epoch": 0.6248051567934764, "grad_norm": 0.5646362615407534, "learning_rate": 0.00016100839903903174, "loss": 12.299, "step": 11474 }, { "epoch": 0.6248596107900595, "grad_norm": 0.5939754539784448, "learning_rate": 0.00016100141177182456, "loss": 12.3805, "step": 11475 }, { "epoch": 0.6249140647866425, "grad_norm": 0.5399769811511669, "learning_rate": 0.0001609944240302602, "loss": 12.3223, "step": 11476 }, { "epoch": 0.6249685187832255, "grad_norm": 0.5462233913359226, "learning_rate": 0.00016098743581439298, "loss": 12.2263, "step": 11477 }, { "epoch": 0.6250229727798085, "grad_norm": 0.5445448329430717, "learning_rate": 0.0001609804471242772, "loss": 12.3775, "step": 11478 }, { "epoch": 0.6250774267763914, "grad_norm": 0.5906469011451747, "learning_rate": 0.00016097345795996728, "loss": 12.4571, "step": 11479 }, { "epoch": 0.6251318807729744, "grad_norm": 0.569176070603741, "learning_rate": 0.00016096646832151746, "loss": 12.3415, "step": 11480 }, { "epoch": 0.6251863347695575, "grad_norm": 0.6055177138206724, "learning_rate": 0.00016095947820898222, "loss": 12.3101, "step": 11481 }, { "epoch": 0.6252407887661405, "grad_norm": 0.5858300768306324, "learning_rate": 0.00016095248762241585, "loss": 12.2721, "step": 11482 }, { "epoch": 0.6252952427627235, "grad_norm": 0.6811483796041748, "learning_rate": 0.0001609454965618727, "loss": 12.3685, "step": 11483 }, { "epoch": 0.6253496967593065, "grad_norm": 0.5647578694435895, "learning_rate": 0.00016093850502740714, "loss": 12.254, "step": 11484 }, { "epoch": 0.6254041507558895, "grad_norm": 0.7362467140997495, "learning_rate": 0.00016093151301907352, "loss": 12.3546, "step": 11485 }, { "epoch": 0.6254586047524725, "grad_norm": 0.6211658326926465, "learning_rate": 0.00016092452053692629, "loss": 12.3764, "step": 11486 }, { "epoch": 0.6255130587490556, "grad_norm": 0.5776185249749698, "learning_rate": 0.00016091752758101976, "loss": 12.3521, "step": 11487 }, { "epoch": 0.6255675127456386, "grad_norm": 0.6806993333104564, "learning_rate": 0.00016091053415140827, "loss": 12.2411, "step": 11488 }, { "epoch": 0.6256219667422216, "grad_norm": 0.5323216271189978, "learning_rate": 0.00016090354024814632, "loss": 12.3461, "step": 11489 }, { "epoch": 0.6256764207388046, "grad_norm": 0.5230959798871765, "learning_rate": 0.0001608965458712882, "loss": 12.2264, "step": 11490 }, { "epoch": 0.6257308747353876, "grad_norm": 0.6505345111176285, "learning_rate": 0.0001608895510208883, "loss": 12.3135, "step": 11491 }, { "epoch": 0.6257853287319706, "grad_norm": 0.6001563708139264, "learning_rate": 0.00016088255569700108, "loss": 12.2745, "step": 11492 }, { "epoch": 0.6258397827285537, "grad_norm": 0.5530352308068975, "learning_rate": 0.0001608755598996809, "loss": 12.3888, "step": 11493 }, { "epoch": 0.6258942367251367, "grad_norm": 0.5921117482992737, "learning_rate": 0.0001608685636289821, "loss": 12.3241, "step": 11494 }, { "epoch": 0.6259486907217197, "grad_norm": 0.5998253973463721, "learning_rate": 0.00016086156688495918, "loss": 12.3406, "step": 11495 }, { "epoch": 0.6260031447183027, "grad_norm": 0.6089749393227019, "learning_rate": 0.00016085456966766652, "loss": 12.4072, "step": 11496 }, { "epoch": 0.6260575987148856, "grad_norm": 0.6446561830580213, "learning_rate": 0.00016084757197715852, "loss": 12.3605, "step": 11497 }, { "epoch": 0.6261120527114687, "grad_norm": 0.6811082040294554, "learning_rate": 0.00016084057381348957, "loss": 12.1462, "step": 11498 }, { "epoch": 0.6261665067080517, "grad_norm": 0.6105997784239102, "learning_rate": 0.00016083357517671413, "loss": 12.3169, "step": 11499 }, { "epoch": 0.6262209607046347, "grad_norm": 0.5720422689859095, "learning_rate": 0.0001608265760668866, "loss": 12.3158, "step": 11500 }, { "epoch": 0.6262754147012177, "grad_norm": 0.5999332881450222, "learning_rate": 0.00016081957648406142, "loss": 12.3959, "step": 11501 }, { "epoch": 0.6263298686978007, "grad_norm": 0.6285254125685592, "learning_rate": 0.00016081257642829304, "loss": 12.1985, "step": 11502 }, { "epoch": 0.6263843226943837, "grad_norm": 0.5574983213776983, "learning_rate": 0.00016080557589963584, "loss": 12.4112, "step": 11503 }, { "epoch": 0.6264387766909668, "grad_norm": 0.5640310694002958, "learning_rate": 0.00016079857489814428, "loss": 12.1416, "step": 11504 }, { "epoch": 0.6264932306875498, "grad_norm": 0.5408204607168074, "learning_rate": 0.00016079157342387284, "loss": 12.3197, "step": 11505 }, { "epoch": 0.6265476846841328, "grad_norm": 0.7008687453180122, "learning_rate": 0.00016078457147687588, "loss": 12.3588, "step": 11506 }, { "epoch": 0.6266021386807158, "grad_norm": 0.6096810692741483, "learning_rate": 0.00016077756905720793, "loss": 12.3563, "step": 11507 }, { "epoch": 0.6266565926772988, "grad_norm": 0.6437700499832963, "learning_rate": 0.0001607705661649234, "loss": 12.4718, "step": 11508 }, { "epoch": 0.6267110466738818, "grad_norm": 0.6347956814809077, "learning_rate": 0.00016076356280007677, "loss": 12.4226, "step": 11509 }, { "epoch": 0.6267655006704649, "grad_norm": 0.5734954095097526, "learning_rate": 0.00016075655896272248, "loss": 12.2312, "step": 11510 }, { "epoch": 0.6268199546670479, "grad_norm": 0.6054178017659978, "learning_rate": 0.00016074955465291498, "loss": 12.2454, "step": 11511 }, { "epoch": 0.6268744086636309, "grad_norm": 0.5455242527056521, "learning_rate": 0.0001607425498707088, "loss": 12.2335, "step": 11512 }, { "epoch": 0.6269288626602139, "grad_norm": 0.6139827424512838, "learning_rate": 0.0001607355446161583, "loss": 12.2324, "step": 11513 }, { "epoch": 0.6269833166567969, "grad_norm": 0.604926511533672, "learning_rate": 0.00016072853888931808, "loss": 12.3358, "step": 11514 }, { "epoch": 0.6270377706533798, "grad_norm": 0.6322046476901865, "learning_rate": 0.00016072153269024254, "loss": 12.3039, "step": 11515 }, { "epoch": 0.627092224649963, "grad_norm": 0.5496567982649935, "learning_rate": 0.00016071452601898616, "loss": 12.3524, "step": 11516 }, { "epoch": 0.6271466786465459, "grad_norm": 0.6463799631048145, "learning_rate": 0.00016070751887560346, "loss": 12.3817, "step": 11517 }, { "epoch": 0.6272011326431289, "grad_norm": 0.554577880732961, "learning_rate": 0.0001607005112601489, "loss": 12.2941, "step": 11518 }, { "epoch": 0.6272555866397119, "grad_norm": 0.6290277561324147, "learning_rate": 0.00016069350317267697, "loss": 12.4027, "step": 11519 }, { "epoch": 0.6273100406362949, "grad_norm": 0.6559922146220869, "learning_rate": 0.0001606864946132422, "loss": 12.1408, "step": 11520 }, { "epoch": 0.6273644946328779, "grad_norm": 0.565898840105582, "learning_rate": 0.0001606794855818991, "loss": 12.313, "step": 11521 }, { "epoch": 0.627418948629461, "grad_norm": 0.6088149405508227, "learning_rate": 0.00016067247607870212, "loss": 12.4226, "step": 11522 }, { "epoch": 0.627473402626044, "grad_norm": 0.6121159921496703, "learning_rate": 0.00016066546610370578, "loss": 12.4291, "step": 11523 }, { "epoch": 0.627527856622627, "grad_norm": 0.6382194354578349, "learning_rate": 0.00016065845565696463, "loss": 12.3131, "step": 11524 }, { "epoch": 0.62758231061921, "grad_norm": 0.5665618795827646, "learning_rate": 0.00016065144473853313, "loss": 12.298, "step": 11525 }, { "epoch": 0.627636764615793, "grad_norm": 0.6252068636147795, "learning_rate": 0.00016064443334846585, "loss": 12.3406, "step": 11526 }, { "epoch": 0.627691218612376, "grad_norm": 0.6200796722836409, "learning_rate": 0.00016063742148681725, "loss": 12.2552, "step": 11527 }, { "epoch": 0.6277456726089591, "grad_norm": 0.6278916891273529, "learning_rate": 0.00016063040915364191, "loss": 12.3177, "step": 11528 }, { "epoch": 0.6278001266055421, "grad_norm": 0.7228648431792107, "learning_rate": 0.00016062339634899435, "loss": 12.28, "step": 11529 }, { "epoch": 0.6278545806021251, "grad_norm": 0.6419605641087148, "learning_rate": 0.00016061638307292907, "loss": 12.3749, "step": 11530 }, { "epoch": 0.6279090345987081, "grad_norm": 0.654333567110793, "learning_rate": 0.00016060936932550064, "loss": 12.4443, "step": 11531 }, { "epoch": 0.627963488595291, "grad_norm": 0.6178585671956981, "learning_rate": 0.0001606023551067636, "loss": 12.4102, "step": 11532 }, { "epoch": 0.6280179425918742, "grad_norm": 0.6668895511152603, "learning_rate": 0.0001605953404167725, "loss": 12.3039, "step": 11533 }, { "epoch": 0.6280723965884571, "grad_norm": 0.6314002451567844, "learning_rate": 0.00016058832525558186, "loss": 12.3504, "step": 11534 }, { "epoch": 0.6281268505850401, "grad_norm": 0.6920156557036148, "learning_rate": 0.0001605813096232462, "loss": 12.2574, "step": 11535 }, { "epoch": 0.6281813045816231, "grad_norm": 0.6072329811421321, "learning_rate": 0.00016057429351982013, "loss": 12.3304, "step": 11536 }, { "epoch": 0.6282357585782061, "grad_norm": 0.6583826115350645, "learning_rate": 0.00016056727694535824, "loss": 12.3934, "step": 11537 }, { "epoch": 0.6282902125747891, "grad_norm": 0.6324447219536025, "learning_rate": 0.000160560259899915, "loss": 12.3431, "step": 11538 }, { "epoch": 0.6283446665713722, "grad_norm": 0.6568949683285629, "learning_rate": 0.00016055324238354506, "loss": 12.2794, "step": 11539 }, { "epoch": 0.6283991205679552, "grad_norm": 0.6226100446959809, "learning_rate": 0.00016054622439630293, "loss": 12.3236, "step": 11540 }, { "epoch": 0.6284535745645382, "grad_norm": 0.5624371013426458, "learning_rate": 0.0001605392059382432, "loss": 12.3061, "step": 11541 }, { "epoch": 0.6285080285611212, "grad_norm": 0.7739453888229896, "learning_rate": 0.0001605321870094205, "loss": 12.2432, "step": 11542 }, { "epoch": 0.6285624825577042, "grad_norm": 0.6571712760851335, "learning_rate": 0.0001605251676098893, "loss": 12.3277, "step": 11543 }, { "epoch": 0.6286169365542872, "grad_norm": 0.659169789649659, "learning_rate": 0.00016051814773970427, "loss": 12.3759, "step": 11544 }, { "epoch": 0.6286713905508703, "grad_norm": 0.567263752452329, "learning_rate": 0.00016051112739891998, "loss": 12.1465, "step": 11545 }, { "epoch": 0.6287258445474533, "grad_norm": 0.584308563991564, "learning_rate": 0.00016050410658759103, "loss": 12.2037, "step": 11546 }, { "epoch": 0.6287802985440363, "grad_norm": 0.6845359198235154, "learning_rate": 0.000160497085305772, "loss": 12.4059, "step": 11547 }, { "epoch": 0.6288347525406193, "grad_norm": 0.5190582578308734, "learning_rate": 0.00016049006355351746, "loss": 12.2964, "step": 11548 }, { "epoch": 0.6288892065372023, "grad_norm": 0.6111561586009826, "learning_rate": 0.00016048304133088202, "loss": 12.2567, "step": 11549 }, { "epoch": 0.6289436605337853, "grad_norm": 0.6995756765034283, "learning_rate": 0.00016047601863792036, "loss": 12.0985, "step": 11550 }, { "epoch": 0.6289981145303684, "grad_norm": 0.6538510687733482, "learning_rate": 0.000160468995474687, "loss": 12.2661, "step": 11551 }, { "epoch": 0.6290525685269514, "grad_norm": 0.6233392511529413, "learning_rate": 0.00016046197184123667, "loss": 12.3554, "step": 11552 }, { "epoch": 0.6291070225235343, "grad_norm": 0.6012815233663004, "learning_rate": 0.00016045494773762382, "loss": 12.2265, "step": 11553 }, { "epoch": 0.6291614765201173, "grad_norm": 0.5931580882714241, "learning_rate": 0.0001604479231639032, "loss": 12.396, "step": 11554 }, { "epoch": 0.6292159305167003, "grad_norm": 0.6316027311980901, "learning_rate": 0.00016044089812012935, "loss": 12.2889, "step": 11555 }, { "epoch": 0.6292703845132833, "grad_norm": 0.7595009750544222, "learning_rate": 0.00016043387260635696, "loss": 12.5283, "step": 11556 }, { "epoch": 0.6293248385098664, "grad_norm": 0.5545253224480045, "learning_rate": 0.00016042684662264067, "loss": 12.2653, "step": 11557 }, { "epoch": 0.6293792925064494, "grad_norm": 0.598868192716635, "learning_rate": 0.00016041982016903506, "loss": 12.2521, "step": 11558 }, { "epoch": 0.6294337465030324, "grad_norm": 0.6504218467817201, "learning_rate": 0.00016041279324559483, "loss": 12.3431, "step": 11559 }, { "epoch": 0.6294882004996154, "grad_norm": 0.5507058063814501, "learning_rate": 0.00016040576585237454, "loss": 12.3622, "step": 11560 }, { "epoch": 0.6295426544961984, "grad_norm": 0.5483976461810933, "learning_rate": 0.00016039873798942892, "loss": 12.2512, "step": 11561 }, { "epoch": 0.6295971084927814, "grad_norm": 0.5704138722303417, "learning_rate": 0.00016039170965681255, "loss": 12.3039, "step": 11562 }, { "epoch": 0.6296515624893645, "grad_norm": 0.5907463783061986, "learning_rate": 0.00016038468085458014, "loss": 12.262, "step": 11563 }, { "epoch": 0.6297060164859475, "grad_norm": 0.6172033679213188, "learning_rate": 0.00016037765158278636, "loss": 12.3686, "step": 11564 }, { "epoch": 0.6297604704825305, "grad_norm": 0.6291212721767191, "learning_rate": 0.00016037062184148576, "loss": 12.2868, "step": 11565 }, { "epoch": 0.6298149244791135, "grad_norm": 0.5517508471945034, "learning_rate": 0.0001603635916307331, "loss": 12.1767, "step": 11566 }, { "epoch": 0.6298693784756965, "grad_norm": 0.5935989273602583, "learning_rate": 0.00016035656095058308, "loss": 12.4345, "step": 11567 }, { "epoch": 0.6299238324722796, "grad_norm": 0.6190845781659723, "learning_rate": 0.0001603495298010903, "loss": 12.3404, "step": 11568 }, { "epoch": 0.6299782864688626, "grad_norm": 0.5894594749346714, "learning_rate": 0.00016034249818230943, "loss": 12.2778, "step": 11569 }, { "epoch": 0.6300327404654456, "grad_norm": 0.70588711193054, "learning_rate": 0.0001603354660942952, "loss": 12.291, "step": 11570 }, { "epoch": 0.6300871944620285, "grad_norm": 0.6115013551554997, "learning_rate": 0.00016032843353710224, "loss": 12.2606, "step": 11571 }, { "epoch": 0.6301416484586115, "grad_norm": 0.681756775481514, "learning_rate": 0.00016032140051078532, "loss": 12.402, "step": 11572 }, { "epoch": 0.6301961024551945, "grad_norm": 0.7152374060110404, "learning_rate": 0.000160314367015399, "loss": 12.3076, "step": 11573 }, { "epoch": 0.6302505564517776, "grad_norm": 0.6401385865783593, "learning_rate": 0.0001603073330509981, "loss": 12.2946, "step": 11574 }, { "epoch": 0.6303050104483606, "grad_norm": 0.7067366584131319, "learning_rate": 0.00016030029861763723, "loss": 12.3269, "step": 11575 }, { "epoch": 0.6303594644449436, "grad_norm": 0.5837445863525274, "learning_rate": 0.00016029326371537115, "loss": 12.2874, "step": 11576 }, { "epoch": 0.6304139184415266, "grad_norm": 0.6664881012459636, "learning_rate": 0.00016028622834425455, "loss": 12.4066, "step": 11577 }, { "epoch": 0.6304683724381096, "grad_norm": 0.5732167094095257, "learning_rate": 0.0001602791925043421, "loss": 12.2736, "step": 11578 }, { "epoch": 0.6305228264346926, "grad_norm": 0.5701728735843571, "learning_rate": 0.00016027215619568853, "loss": 12.3236, "step": 11579 }, { "epoch": 0.6305772804312757, "grad_norm": 0.6263392417410668, "learning_rate": 0.00016026511941834862, "loss": 12.3113, "step": 11580 }, { "epoch": 0.6306317344278587, "grad_norm": 0.5986858779759879, "learning_rate": 0.00016025808217237696, "loss": 12.4141, "step": 11581 }, { "epoch": 0.6306861884244417, "grad_norm": 0.6120558822405074, "learning_rate": 0.0001602510444578284, "loss": 12.3433, "step": 11582 }, { "epoch": 0.6307406424210247, "grad_norm": 0.5997141772225506, "learning_rate": 0.00016024400627475763, "loss": 12.2342, "step": 11583 }, { "epoch": 0.6307950964176077, "grad_norm": 0.6046918603331474, "learning_rate": 0.00016023696762321933, "loss": 12.4326, "step": 11584 }, { "epoch": 0.6308495504141907, "grad_norm": 0.7145511224984784, "learning_rate": 0.0001602299285032683, "loss": 12.4418, "step": 11585 }, { "epoch": 0.6309040044107738, "grad_norm": 0.5674460638373898, "learning_rate": 0.00016022288891495918, "loss": 12.3899, "step": 11586 }, { "epoch": 0.6309584584073568, "grad_norm": 0.6078663393707878, "learning_rate": 0.00016021584885834682, "loss": 12.3836, "step": 11587 }, { "epoch": 0.6310129124039398, "grad_norm": 0.6731437267177017, "learning_rate": 0.00016020880833348593, "loss": 12.4028, "step": 11588 }, { "epoch": 0.6310673664005227, "grad_norm": 0.7464619177043753, "learning_rate": 0.00016020176734043125, "loss": 12.3038, "step": 11589 }, { "epoch": 0.6311218203971057, "grad_norm": 0.6604754928634954, "learning_rate": 0.0001601947258792375, "loss": 12.3084, "step": 11590 }, { "epoch": 0.6311762743936887, "grad_norm": 0.625782172031097, "learning_rate": 0.00016018768394995947, "loss": 12.319, "step": 11591 }, { "epoch": 0.6312307283902718, "grad_norm": 0.6305290661457754, "learning_rate": 0.00016018064155265196, "loss": 12.3992, "step": 11592 }, { "epoch": 0.6312851823868548, "grad_norm": 0.6876046735716144, "learning_rate": 0.00016017359868736964, "loss": 12.3389, "step": 11593 }, { "epoch": 0.6313396363834378, "grad_norm": 0.6017411391397692, "learning_rate": 0.00016016655535416735, "loss": 12.3124, "step": 11594 }, { "epoch": 0.6313940903800208, "grad_norm": 0.5489052272328483, "learning_rate": 0.00016015951155309982, "loss": 12.4313, "step": 11595 }, { "epoch": 0.6314485443766038, "grad_norm": 0.5643980944234432, "learning_rate": 0.00016015246728422186, "loss": 12.3021, "step": 11596 }, { "epoch": 0.6315029983731868, "grad_norm": 0.6283739932043341, "learning_rate": 0.00016014542254758825, "loss": 12.2458, "step": 11597 }, { "epoch": 0.6315574523697699, "grad_norm": 0.6559159408597222, "learning_rate": 0.0001601383773432537, "loss": 12.3632, "step": 11598 }, { "epoch": 0.6316119063663529, "grad_norm": 0.5743836786873061, "learning_rate": 0.00016013133167127306, "loss": 12.28, "step": 11599 }, { "epoch": 0.6316663603629359, "grad_norm": 0.6037395609902132, "learning_rate": 0.0001601242855317011, "loss": 12.2813, "step": 11600 }, { "epoch": 0.6317208143595189, "grad_norm": 0.8138665820981366, "learning_rate": 0.0001601172389245926, "loss": 12.3117, "step": 11601 }, { "epoch": 0.6317752683561019, "grad_norm": 0.6117045153426184, "learning_rate": 0.00016011019185000237, "loss": 12.277, "step": 11602 }, { "epoch": 0.631829722352685, "grad_norm": 0.6571968896939481, "learning_rate": 0.00016010314430798522, "loss": 12.3114, "step": 11603 }, { "epoch": 0.631884176349268, "grad_norm": 0.5819354329428992, "learning_rate": 0.00016009609629859598, "loss": 12.3988, "step": 11604 }, { "epoch": 0.631938630345851, "grad_norm": 0.6130792154075189, "learning_rate": 0.00016008904782188936, "loss": 12.3442, "step": 11605 }, { "epoch": 0.631993084342434, "grad_norm": 0.6161087981605953, "learning_rate": 0.00016008199887792022, "loss": 12.1447, "step": 11606 }, { "epoch": 0.632047538339017, "grad_norm": 0.59213108698951, "learning_rate": 0.0001600749494667434, "loss": 12.4355, "step": 11607 }, { "epoch": 0.6321019923355999, "grad_norm": 0.5643485490047897, "learning_rate": 0.00016006789958841373, "loss": 12.3072, "step": 11608 }, { "epoch": 0.632156446332183, "grad_norm": 0.6552253773174634, "learning_rate": 0.00016006084924298597, "loss": 12.29, "step": 11609 }, { "epoch": 0.632210900328766, "grad_norm": 0.5629019501817991, "learning_rate": 0.000160053798430515, "loss": 12.3558, "step": 11610 }, { "epoch": 0.632265354325349, "grad_norm": 0.6248217978673832, "learning_rate": 0.00016004674715105558, "loss": 12.1634, "step": 11611 }, { "epoch": 0.632319808321932, "grad_norm": 0.7052249240995686, "learning_rate": 0.0001600396954046626, "loss": 12.4626, "step": 11612 }, { "epoch": 0.632374262318515, "grad_norm": 0.6229537066548735, "learning_rate": 0.00016003264319139088, "loss": 12.2556, "step": 11613 }, { "epoch": 0.632428716315098, "grad_norm": 0.6232155182387574, "learning_rate": 0.0001600255905112953, "loss": 12.3656, "step": 11614 }, { "epoch": 0.6324831703116811, "grad_norm": 0.5985385901002135, "learning_rate": 0.0001600185373644306, "loss": 12.2761, "step": 11615 }, { "epoch": 0.6325376243082641, "grad_norm": 0.5804199678921629, "learning_rate": 0.00016001148375085168, "loss": 12.335, "step": 11616 }, { "epoch": 0.6325920783048471, "grad_norm": 0.6273430482799724, "learning_rate": 0.00016000442967061346, "loss": 12.2817, "step": 11617 }, { "epoch": 0.6326465323014301, "grad_norm": 0.62391369595692, "learning_rate": 0.00015999737512377072, "loss": 12.1001, "step": 11618 }, { "epoch": 0.6327009862980131, "grad_norm": 0.6135274564031256, "learning_rate": 0.0001599903201103783, "loss": 12.3653, "step": 11619 }, { "epoch": 0.6327554402945961, "grad_norm": 0.5769543408166947, "learning_rate": 0.0001599832646304911, "loss": 12.2763, "step": 11620 }, { "epoch": 0.6328098942911792, "grad_norm": 0.6553316798350617, "learning_rate": 0.00015997620868416396, "loss": 12.3955, "step": 11621 }, { "epoch": 0.6328643482877622, "grad_norm": 0.6165252378482168, "learning_rate": 0.00015996915227145178, "loss": 12.3882, "step": 11622 }, { "epoch": 0.6329188022843452, "grad_norm": 0.578197074875526, "learning_rate": 0.00015996209539240942, "loss": 12.2141, "step": 11623 }, { "epoch": 0.6329732562809282, "grad_norm": 0.6546997218876552, "learning_rate": 0.00015995503804709175, "loss": 12.2659, "step": 11624 }, { "epoch": 0.6330277102775111, "grad_norm": 0.5346845777961047, "learning_rate": 0.00015994798023555363, "loss": 12.4347, "step": 11625 }, { "epoch": 0.6330821642740941, "grad_norm": 0.7272621472840548, "learning_rate": 0.00015994092195785, "loss": 12.3033, "step": 11626 }, { "epoch": 0.6331366182706772, "grad_norm": 0.5912965473134938, "learning_rate": 0.0001599338632140357, "loss": 12.3621, "step": 11627 }, { "epoch": 0.6331910722672602, "grad_norm": 0.6084544466595607, "learning_rate": 0.0001599268040041656, "loss": 12.3431, "step": 11628 }, { "epoch": 0.6332455262638432, "grad_norm": 0.6195207017613764, "learning_rate": 0.00015991974432829468, "loss": 12.2912, "step": 11629 }, { "epoch": 0.6332999802604262, "grad_norm": 0.6689588330255158, "learning_rate": 0.00015991268418647772, "loss": 12.2959, "step": 11630 }, { "epoch": 0.6333544342570092, "grad_norm": 0.566247369536983, "learning_rate": 0.00015990562357876968, "loss": 12.2669, "step": 11631 }, { "epoch": 0.6334088882535923, "grad_norm": 0.7277889233888325, "learning_rate": 0.00015989856250522548, "loss": 12.4804, "step": 11632 }, { "epoch": 0.6334633422501753, "grad_norm": 0.6384352082662375, "learning_rate": 0.00015989150096590003, "loss": 12.3755, "step": 11633 }, { "epoch": 0.6335177962467583, "grad_norm": 0.7476197942467356, "learning_rate": 0.00015988443896084822, "loss": 12.3098, "step": 11634 }, { "epoch": 0.6335722502433413, "grad_norm": 0.6117565640338275, "learning_rate": 0.00015987737649012497, "loss": 12.2888, "step": 11635 }, { "epoch": 0.6336267042399243, "grad_norm": 0.5877500335920101, "learning_rate": 0.00015987031355378518, "loss": 12.3271, "step": 11636 }, { "epoch": 0.6336811582365073, "grad_norm": 0.7537103433883942, "learning_rate": 0.0001598632501518838, "loss": 12.3379, "step": 11637 }, { "epoch": 0.6337356122330904, "grad_norm": 0.562326931604982, "learning_rate": 0.00015985618628447577, "loss": 12.1521, "step": 11638 }, { "epoch": 0.6337900662296734, "grad_norm": 0.6720970545937142, "learning_rate": 0.00015984912195161595, "loss": 12.2289, "step": 11639 }, { "epoch": 0.6338445202262564, "grad_norm": 0.7437459848399414, "learning_rate": 0.00015984205715335935, "loss": 12.2812, "step": 11640 }, { "epoch": 0.6338989742228394, "grad_norm": 0.5977958341585544, "learning_rate": 0.00015983499188976087, "loss": 12.3845, "step": 11641 }, { "epoch": 0.6339534282194224, "grad_norm": 0.6642266351088731, "learning_rate": 0.00015982792616087545, "loss": 12.3436, "step": 11642 }, { "epoch": 0.6340078822160053, "grad_norm": 0.6253891353430321, "learning_rate": 0.0001598208599667581, "loss": 12.271, "step": 11643 }, { "epoch": 0.6340623362125885, "grad_norm": 0.6099370956862907, "learning_rate": 0.00015981379330746363, "loss": 12.3552, "step": 11644 }, { "epoch": 0.6341167902091714, "grad_norm": 0.6318726619121243, "learning_rate": 0.0001598067261830471, "loss": 12.2443, "step": 11645 }, { "epoch": 0.6341712442057544, "grad_norm": 0.5609364407721689, "learning_rate": 0.00015979965859356347, "loss": 12.4605, "step": 11646 }, { "epoch": 0.6342256982023374, "grad_norm": 0.702645292105402, "learning_rate": 0.00015979259053906764, "loss": 12.3523, "step": 11647 }, { "epoch": 0.6342801521989204, "grad_norm": 0.6312822853043644, "learning_rate": 0.00015978552201961464, "loss": 12.3942, "step": 11648 }, { "epoch": 0.6343346061955034, "grad_norm": 0.6827895428823186, "learning_rate": 0.00015977845303525933, "loss": 12.3318, "step": 11649 }, { "epoch": 0.6343890601920865, "grad_norm": 0.5633296696619046, "learning_rate": 0.00015977138358605676, "loss": 12.3435, "step": 11650 }, { "epoch": 0.6344435141886695, "grad_norm": 0.6803882086394497, "learning_rate": 0.00015976431367206191, "loss": 12.2223, "step": 11651 }, { "epoch": 0.6344979681852525, "grad_norm": 0.5938162328387466, "learning_rate": 0.00015975724329332972, "loss": 12.2128, "step": 11652 }, { "epoch": 0.6345524221818355, "grad_norm": 0.6310709849247171, "learning_rate": 0.00015975017244991522, "loss": 12.3975, "step": 11653 }, { "epoch": 0.6346068761784185, "grad_norm": 0.6423715628698327, "learning_rate": 0.0001597431011418733, "loss": 12.3256, "step": 11654 }, { "epoch": 0.6346613301750015, "grad_norm": 0.6112767294965891, "learning_rate": 0.00015973602936925904, "loss": 12.2323, "step": 11655 }, { "epoch": 0.6347157841715846, "grad_norm": 0.63268138600798, "learning_rate": 0.0001597289571321274, "loss": 12.2258, "step": 11656 }, { "epoch": 0.6347702381681676, "grad_norm": 0.6132891422810992, "learning_rate": 0.00015972188443053335, "loss": 12.2712, "step": 11657 }, { "epoch": 0.6348246921647506, "grad_norm": 0.6007827090687096, "learning_rate": 0.00015971481126453196, "loss": 12.3272, "step": 11658 }, { "epoch": 0.6348791461613336, "grad_norm": 0.5534845093934893, "learning_rate": 0.00015970773763417814, "loss": 12.3964, "step": 11659 }, { "epoch": 0.6349336001579166, "grad_norm": 0.5471382906969506, "learning_rate": 0.00015970066353952696, "loss": 12.2745, "step": 11660 }, { "epoch": 0.6349880541544995, "grad_norm": 0.5633804601387051, "learning_rate": 0.0001596935889806334, "loss": 12.3712, "step": 11661 }, { "epoch": 0.6350425081510827, "grad_norm": 0.5419783900118207, "learning_rate": 0.00015968651395755246, "loss": 12.2674, "step": 11662 }, { "epoch": 0.6350969621476656, "grad_norm": 0.6110092220859774, "learning_rate": 0.00015967943847033922, "loss": 12.2362, "step": 11663 }, { "epoch": 0.6351514161442486, "grad_norm": 0.5929826979767274, "learning_rate": 0.00015967236251904863, "loss": 12.3111, "step": 11664 }, { "epoch": 0.6352058701408316, "grad_norm": 0.6319140215443649, "learning_rate": 0.00015966528610373572, "loss": 12.2552, "step": 11665 }, { "epoch": 0.6352603241374146, "grad_norm": 0.6250575551924299, "learning_rate": 0.00015965820922445557, "loss": 12.1944, "step": 11666 }, { "epoch": 0.6353147781339977, "grad_norm": 0.634596741902867, "learning_rate": 0.0001596511318812632, "loss": 12.4088, "step": 11667 }, { "epoch": 0.6353692321305807, "grad_norm": 0.5708759498126883, "learning_rate": 0.00015964405407421358, "loss": 12.4004, "step": 11668 }, { "epoch": 0.6354236861271637, "grad_norm": 0.6274291085104535, "learning_rate": 0.00015963697580336183, "loss": 12.3656, "step": 11669 }, { "epoch": 0.6354781401237467, "grad_norm": 0.6381275977567078, "learning_rate": 0.00015962989706876294, "loss": 12.282, "step": 11670 }, { "epoch": 0.6355325941203297, "grad_norm": 0.5877263890706275, "learning_rate": 0.00015962281787047197, "loss": 12.4272, "step": 11671 }, { "epoch": 0.6355870481169127, "grad_norm": 0.5476695346740104, "learning_rate": 0.00015961573820854396, "loss": 12.2424, "step": 11672 }, { "epoch": 0.6356415021134958, "grad_norm": 0.5647625962262167, "learning_rate": 0.000159608658083034, "loss": 12.2057, "step": 11673 }, { "epoch": 0.6356959561100788, "grad_norm": 0.6079880681478522, "learning_rate": 0.0001596015774939971, "loss": 12.4301, "step": 11674 }, { "epoch": 0.6357504101066618, "grad_norm": 0.627898353169781, "learning_rate": 0.00015959449644148833, "loss": 12.2848, "step": 11675 }, { "epoch": 0.6358048641032448, "grad_norm": 0.6000947735191888, "learning_rate": 0.00015958741492556278, "loss": 12.5301, "step": 11676 }, { "epoch": 0.6358593180998278, "grad_norm": 0.6383110683869572, "learning_rate": 0.0001595803329462755, "loss": 12.3761, "step": 11677 }, { "epoch": 0.6359137720964108, "grad_norm": 0.6226714343986417, "learning_rate": 0.00015957325050368156, "loss": 12.2989, "step": 11678 }, { "epoch": 0.6359682260929939, "grad_norm": 0.5994882158830673, "learning_rate": 0.00015956616759783604, "loss": 12.4552, "step": 11679 }, { "epoch": 0.6360226800895769, "grad_norm": 0.6428334737207511, "learning_rate": 0.00015955908422879395, "loss": 12.2438, "step": 11680 }, { "epoch": 0.6360771340861598, "grad_norm": 0.6423154220996011, "learning_rate": 0.00015955200039661049, "loss": 12.4005, "step": 11681 }, { "epoch": 0.6361315880827428, "grad_norm": 0.5486200532788184, "learning_rate": 0.00015954491610134066, "loss": 12.3216, "step": 11682 }, { "epoch": 0.6361860420793258, "grad_norm": 0.6309935704318574, "learning_rate": 0.00015953783134303962, "loss": 12.2927, "step": 11683 }, { "epoch": 0.6362404960759088, "grad_norm": 0.5824870916214382, "learning_rate": 0.00015953074612176237, "loss": 12.3833, "step": 11684 }, { "epoch": 0.6362949500724919, "grad_norm": 0.6877518652979272, "learning_rate": 0.0001595236604375641, "loss": 12.3747, "step": 11685 }, { "epoch": 0.6363494040690749, "grad_norm": 0.6248540007950087, "learning_rate": 0.00015951657429049982, "loss": 12.4178, "step": 11686 }, { "epoch": 0.6364038580656579, "grad_norm": 0.5595550579826128, "learning_rate": 0.0001595094876806247, "loss": 12.3002, "step": 11687 }, { "epoch": 0.6364583120622409, "grad_norm": 0.5609830813350578, "learning_rate": 0.00015950240060799383, "loss": 12.1729, "step": 11688 }, { "epoch": 0.6365127660588239, "grad_norm": 0.6840906942224464, "learning_rate": 0.00015949531307266233, "loss": 12.2164, "step": 11689 }, { "epoch": 0.6365672200554069, "grad_norm": 0.6102936461713946, "learning_rate": 0.00015948822507468526, "loss": 12.2205, "step": 11690 }, { "epoch": 0.63662167405199, "grad_norm": 0.7109447118428419, "learning_rate": 0.00015948113661411778, "loss": 12.2895, "step": 11691 }, { "epoch": 0.636676128048573, "grad_norm": 0.5232082210855122, "learning_rate": 0.000159474047691015, "loss": 12.3388, "step": 11692 }, { "epoch": 0.636730582045156, "grad_norm": 0.6181267829552438, "learning_rate": 0.0001594669583054321, "loss": 12.4013, "step": 11693 }, { "epoch": 0.636785036041739, "grad_norm": 0.5853241566666262, "learning_rate": 0.00015945986845742414, "loss": 12.2416, "step": 11694 }, { "epoch": 0.636839490038322, "grad_norm": 0.6440508892552266, "learning_rate": 0.00015945277814704623, "loss": 12.3312, "step": 11695 }, { "epoch": 0.636893944034905, "grad_norm": 0.5764079526694624, "learning_rate": 0.0001594456873743536, "loss": 12.3378, "step": 11696 }, { "epoch": 0.6369483980314881, "grad_norm": 0.5912901229404963, "learning_rate": 0.0001594385961394013, "loss": 12.2194, "step": 11697 }, { "epoch": 0.637002852028071, "grad_norm": 0.6094323269680411, "learning_rate": 0.00015943150444224454, "loss": 12.319, "step": 11698 }, { "epoch": 0.637057306024654, "grad_norm": 0.5635457642014866, "learning_rate": 0.0001594244122829384, "loss": 12.3567, "step": 11699 }, { "epoch": 0.637111760021237, "grad_norm": 0.587726048800709, "learning_rate": 0.00015941731966153807, "loss": 12.3092, "step": 11700 }, { "epoch": 0.63716621401782, "grad_norm": 0.6048198304309254, "learning_rate": 0.0001594102265780987, "loss": 12.3443, "step": 11701 }, { "epoch": 0.6372206680144031, "grad_norm": 0.5849478044884568, "learning_rate": 0.00015940313303267547, "loss": 12.3281, "step": 11702 }, { "epoch": 0.6372751220109861, "grad_norm": 0.6299700554928337, "learning_rate": 0.0001593960390253235, "loss": 12.3386, "step": 11703 }, { "epoch": 0.6373295760075691, "grad_norm": 0.7091301677395995, "learning_rate": 0.00015938894455609797, "loss": 12.2443, "step": 11704 }, { "epoch": 0.6373840300041521, "grad_norm": 0.6079507577811608, "learning_rate": 0.00015938184962505404, "loss": 12.2513, "step": 11705 }, { "epoch": 0.6374384840007351, "grad_norm": 0.6279045948807624, "learning_rate": 0.00015937475423224688, "loss": 12.3791, "step": 11706 }, { "epoch": 0.6374929379973181, "grad_norm": 0.5422081747141756, "learning_rate": 0.00015936765837773172, "loss": 12.2061, "step": 11707 }, { "epoch": 0.6375473919939012, "grad_norm": 0.5599573109726491, "learning_rate": 0.00015936056206156365, "loss": 12.1982, "step": 11708 }, { "epoch": 0.6376018459904842, "grad_norm": 0.6439053391485484, "learning_rate": 0.00015935346528379792, "loss": 12.3223, "step": 11709 }, { "epoch": 0.6376562999870672, "grad_norm": 0.6076777657170431, "learning_rate": 0.00015934636804448968, "loss": 12.2359, "step": 11710 }, { "epoch": 0.6377107539836502, "grad_norm": 0.6062940962482165, "learning_rate": 0.0001593392703436941, "loss": 12.2789, "step": 11711 }, { "epoch": 0.6377652079802332, "grad_norm": 0.6090446598951548, "learning_rate": 0.00015933217218146644, "loss": 12.3359, "step": 11712 }, { "epoch": 0.6378196619768162, "grad_norm": 0.5609615308756994, "learning_rate": 0.00015932507355786182, "loss": 12.3445, "step": 11713 }, { "epoch": 0.6378741159733993, "grad_norm": 0.6507111609328309, "learning_rate": 0.00015931797447293552, "loss": 12.255, "step": 11714 }, { "epoch": 0.6379285699699823, "grad_norm": 0.683631714011919, "learning_rate": 0.0001593108749267427, "loss": 12.21, "step": 11715 }, { "epoch": 0.6379830239665653, "grad_norm": 0.5426813755368318, "learning_rate": 0.00015930377491933854, "loss": 12.2444, "step": 11716 }, { "epoch": 0.6380374779631482, "grad_norm": 0.6285851116986505, "learning_rate": 0.0001592966744507783, "loss": 12.4862, "step": 11717 }, { "epoch": 0.6380919319597312, "grad_norm": 0.6077576905377188, "learning_rate": 0.0001592895735211172, "loss": 12.3795, "step": 11718 }, { "epoch": 0.6381463859563142, "grad_norm": 0.641708820048512, "learning_rate": 0.0001592824721304104, "loss": 12.2624, "step": 11719 }, { "epoch": 0.6382008399528973, "grad_norm": 0.5832512250990588, "learning_rate": 0.00015927537027871316, "loss": 12.4107, "step": 11720 }, { "epoch": 0.6382552939494803, "grad_norm": 0.5560870907638484, "learning_rate": 0.00015926826796608073, "loss": 12.2068, "step": 11721 }, { "epoch": 0.6383097479460633, "grad_norm": 0.621569606432972, "learning_rate": 0.00015926116519256826, "loss": 12.1528, "step": 11722 }, { "epoch": 0.6383642019426463, "grad_norm": 0.6470496143775933, "learning_rate": 0.00015925406195823108, "loss": 12.2376, "step": 11723 }, { "epoch": 0.6384186559392293, "grad_norm": 0.5339169789036717, "learning_rate": 0.00015924695826312435, "loss": 12.3241, "step": 11724 }, { "epoch": 0.6384731099358123, "grad_norm": 0.5973303793387309, "learning_rate": 0.00015923985410730334, "loss": 12.3649, "step": 11725 }, { "epoch": 0.6385275639323954, "grad_norm": 0.6828561520479584, "learning_rate": 0.00015923274949082328, "loss": 12.4293, "step": 11726 }, { "epoch": 0.6385820179289784, "grad_norm": 0.6080382653576513, "learning_rate": 0.00015922564441373945, "loss": 12.3078, "step": 11727 }, { "epoch": 0.6386364719255614, "grad_norm": 0.6970886252419852, "learning_rate": 0.00015921853887610707, "loss": 12.3816, "step": 11728 }, { "epoch": 0.6386909259221444, "grad_norm": 0.5930463408717045, "learning_rate": 0.00015921143287798138, "loss": 12.4402, "step": 11729 }, { "epoch": 0.6387453799187274, "grad_norm": 0.5680002960969729, "learning_rate": 0.00015920432641941768, "loss": 12.455, "step": 11730 }, { "epoch": 0.6387998339153104, "grad_norm": 0.5601685294026225, "learning_rate": 0.00015919721950047119, "loss": 12.3696, "step": 11731 }, { "epoch": 0.6388542879118935, "grad_norm": 0.5913415111135867, "learning_rate": 0.0001591901121211972, "loss": 12.2751, "step": 11732 }, { "epoch": 0.6389087419084765, "grad_norm": 0.559035357315235, "learning_rate": 0.00015918300428165099, "loss": 12.3311, "step": 11733 }, { "epoch": 0.6389631959050595, "grad_norm": 0.7960160680407351, "learning_rate": 0.0001591758959818878, "loss": 12.561, "step": 11734 }, { "epoch": 0.6390176499016424, "grad_norm": 0.5580073249870375, "learning_rate": 0.00015916878722196291, "loss": 12.3125, "step": 11735 }, { "epoch": 0.6390721038982254, "grad_norm": 0.5980153973018723, "learning_rate": 0.00015916167800193162, "loss": 12.2887, "step": 11736 }, { "epoch": 0.6391265578948085, "grad_norm": 0.6145853706411524, "learning_rate": 0.00015915456832184922, "loss": 12.3097, "step": 11737 }, { "epoch": 0.6391810118913915, "grad_norm": 0.5761368251670567, "learning_rate": 0.00015914745818177095, "loss": 12.1488, "step": 11738 }, { "epoch": 0.6392354658879745, "grad_norm": 0.5720363711923273, "learning_rate": 0.00015914034758175211, "loss": 12.3468, "step": 11739 }, { "epoch": 0.6392899198845575, "grad_norm": 0.5730666501611782, "learning_rate": 0.00015913323652184803, "loss": 12.2901, "step": 11740 }, { "epoch": 0.6393443738811405, "grad_norm": 0.5967192597957882, "learning_rate": 0.000159126125002114, "loss": 12.133, "step": 11741 }, { "epoch": 0.6393988278777235, "grad_norm": 0.5847137465942532, "learning_rate": 0.00015911901302260528, "loss": 12.2619, "step": 11742 }, { "epoch": 0.6394532818743066, "grad_norm": 0.6527293239756213, "learning_rate": 0.00015911190058337722, "loss": 12.3478, "step": 11743 }, { "epoch": 0.6395077358708896, "grad_norm": 0.6833711071046793, "learning_rate": 0.0001591047876844851, "loss": 12.3742, "step": 11744 }, { "epoch": 0.6395621898674726, "grad_norm": 0.6587852445363322, "learning_rate": 0.00015909767432598426, "loss": 12.274, "step": 11745 }, { "epoch": 0.6396166438640556, "grad_norm": 0.6486099259602204, "learning_rate": 0.00015909056050792994, "loss": 12.3702, "step": 11746 }, { "epoch": 0.6396710978606386, "grad_norm": 0.7265266626848722, "learning_rate": 0.00015908344623037757, "loss": 12.1387, "step": 11747 }, { "epoch": 0.6397255518572216, "grad_norm": 0.6899028392303771, "learning_rate": 0.00015907633149338238, "loss": 12.334, "step": 11748 }, { "epoch": 0.6397800058538047, "grad_norm": 0.5360899758896915, "learning_rate": 0.00015906921629699975, "loss": 12.3807, "step": 11749 }, { "epoch": 0.6398344598503877, "grad_norm": 0.5463456124183731, "learning_rate": 0.00015906210064128498, "loss": 12.3499, "step": 11750 }, { "epoch": 0.6398889138469707, "grad_norm": 0.6538197908044291, "learning_rate": 0.00015905498452629342, "loss": 12.2644, "step": 11751 }, { "epoch": 0.6399433678435537, "grad_norm": 0.6847980090091363, "learning_rate": 0.00015904786795208038, "loss": 12.4953, "step": 11752 }, { "epoch": 0.6399978218401366, "grad_norm": 0.5717753214335817, "learning_rate": 0.0001590407509187012, "loss": 12.3944, "step": 11753 }, { "epoch": 0.6400522758367196, "grad_norm": 0.5897375896614131, "learning_rate": 0.0001590336334262113, "loss": 12.3208, "step": 11754 }, { "epoch": 0.6401067298333027, "grad_norm": 0.6509711952414429, "learning_rate": 0.00015902651547466593, "loss": 12.3059, "step": 11755 }, { "epoch": 0.6401611838298857, "grad_norm": 0.5686185105629006, "learning_rate": 0.00015901939706412045, "loss": 12.3686, "step": 11756 }, { "epoch": 0.6402156378264687, "grad_norm": 0.5761598035037799, "learning_rate": 0.00015901227819463024, "loss": 12.292, "step": 11757 }, { "epoch": 0.6402700918230517, "grad_norm": 0.6233562948043391, "learning_rate": 0.0001590051588662507, "loss": 12.2645, "step": 11758 }, { "epoch": 0.6403245458196347, "grad_norm": 0.5431735823508531, "learning_rate": 0.00015899803907903714, "loss": 12.3071, "step": 11759 }, { "epoch": 0.6403789998162177, "grad_norm": 0.5844067916605888, "learning_rate": 0.00015899091883304494, "loss": 12.3562, "step": 11760 }, { "epoch": 0.6404334538128008, "grad_norm": 0.5775938645263473, "learning_rate": 0.00015898379812832945, "loss": 12.2704, "step": 11761 }, { "epoch": 0.6404879078093838, "grad_norm": 0.6114526535574534, "learning_rate": 0.00015897667696494606, "loss": 12.2708, "step": 11762 }, { "epoch": 0.6405423618059668, "grad_norm": 0.543502985810955, "learning_rate": 0.0001589695553429501, "loss": 12.2512, "step": 11763 }, { "epoch": 0.6405968158025498, "grad_norm": 0.5491499746554386, "learning_rate": 0.00015896243326239703, "loss": 12.1462, "step": 11764 }, { "epoch": 0.6406512697991328, "grad_norm": 0.659161048417712, "learning_rate": 0.00015895531072334217, "loss": 12.2656, "step": 11765 }, { "epoch": 0.6407057237957159, "grad_norm": 0.5578738711061708, "learning_rate": 0.00015894818772584096, "loss": 12.2665, "step": 11766 }, { "epoch": 0.6407601777922989, "grad_norm": 0.5485848619593658, "learning_rate": 0.00015894106426994875, "loss": 12.3885, "step": 11767 }, { "epoch": 0.6408146317888819, "grad_norm": 0.6042684554813096, "learning_rate": 0.0001589339403557209, "loss": 12.268, "step": 11768 }, { "epoch": 0.6408690857854649, "grad_norm": 0.5603475640961582, "learning_rate": 0.0001589268159832129, "loss": 12.2199, "step": 11769 }, { "epoch": 0.6409235397820479, "grad_norm": 0.6086400504391121, "learning_rate": 0.00015891969115248007, "loss": 12.3038, "step": 11770 }, { "epoch": 0.6409779937786309, "grad_norm": 0.7496642611061374, "learning_rate": 0.00015891256586357782, "loss": 12.3797, "step": 11771 }, { "epoch": 0.641032447775214, "grad_norm": 0.6043326573937473, "learning_rate": 0.00015890544011656161, "loss": 12.3197, "step": 11772 }, { "epoch": 0.641086901771797, "grad_norm": 0.5879429958227689, "learning_rate": 0.0001588983139114868, "loss": 12.3328, "step": 11773 }, { "epoch": 0.6411413557683799, "grad_norm": 0.6431041182157121, "learning_rate": 0.00015889118724840887, "loss": 12.5443, "step": 11774 }, { "epoch": 0.6411958097649629, "grad_norm": 0.6387769615888651, "learning_rate": 0.00015888406012738314, "loss": 12.2873, "step": 11775 }, { "epoch": 0.6412502637615459, "grad_norm": 0.5907844038891188, "learning_rate": 0.00015887693254846509, "loss": 12.1766, "step": 11776 }, { "epoch": 0.6413047177581289, "grad_norm": 0.8089658170344911, "learning_rate": 0.0001588698045117101, "loss": 12.319, "step": 11777 }, { "epoch": 0.641359171754712, "grad_norm": 0.5795627418817415, "learning_rate": 0.00015886267601717373, "loss": 12.1944, "step": 11778 }, { "epoch": 0.641413625751295, "grad_norm": 0.6330996063985966, "learning_rate": 0.00015885554706491126, "loss": 12.3079, "step": 11779 }, { "epoch": 0.641468079747878, "grad_norm": 0.6312142988227856, "learning_rate": 0.0001588484176549782, "loss": 12.2919, "step": 11780 }, { "epoch": 0.641522533744461, "grad_norm": 0.7992335536619127, "learning_rate": 0.00015884128778743, "loss": 12.3403, "step": 11781 }, { "epoch": 0.641576987741044, "grad_norm": 0.6054780153306412, "learning_rate": 0.00015883415746232205, "loss": 12.1981, "step": 11782 }, { "epoch": 0.641631441737627, "grad_norm": 0.6121608821672163, "learning_rate": 0.00015882702667970982, "loss": 12.237, "step": 11783 }, { "epoch": 0.6416858957342101, "grad_norm": 0.6458800184805136, "learning_rate": 0.00015881989543964877, "loss": 12.3485, "step": 11784 }, { "epoch": 0.6417403497307931, "grad_norm": 0.610389351156267, "learning_rate": 0.00015881276374219436, "loss": 12.2533, "step": 11785 }, { "epoch": 0.6417948037273761, "grad_norm": 0.6011654744945086, "learning_rate": 0.00015880563158740202, "loss": 12.2968, "step": 11786 }, { "epoch": 0.6418492577239591, "grad_norm": 0.6489037502758117, "learning_rate": 0.00015879849897532724, "loss": 12.4367, "step": 11787 }, { "epoch": 0.6419037117205421, "grad_norm": 0.6295543987341068, "learning_rate": 0.00015879136590602545, "loss": 12.2218, "step": 11788 }, { "epoch": 0.641958165717125, "grad_norm": 0.7691534351517384, "learning_rate": 0.00015878423237955218, "loss": 12.3219, "step": 11789 }, { "epoch": 0.6420126197137082, "grad_norm": 0.7059532026340123, "learning_rate": 0.00015877709839596285, "loss": 12.2999, "step": 11790 }, { "epoch": 0.6420670737102911, "grad_norm": 0.6641858468829318, "learning_rate": 0.0001587699639553129, "loss": 12.3139, "step": 11791 }, { "epoch": 0.6421215277068741, "grad_norm": 0.6673144977966753, "learning_rate": 0.0001587628290576579, "loss": 12.4529, "step": 11792 }, { "epoch": 0.6421759817034571, "grad_norm": 0.7522695067144315, "learning_rate": 0.0001587556937030533, "loss": 12.2677, "step": 11793 }, { "epoch": 0.6422304357000401, "grad_norm": 0.533954295083735, "learning_rate": 0.00015874855789155455, "loss": 12.2353, "step": 11794 }, { "epoch": 0.6422848896966231, "grad_norm": 0.6863388740679423, "learning_rate": 0.0001587414216232171, "loss": 12.3066, "step": 11795 }, { "epoch": 0.6423393436932062, "grad_norm": 0.7028548606498654, "learning_rate": 0.0001587342848980966, "loss": 12.3073, "step": 11796 }, { "epoch": 0.6423937976897892, "grad_norm": 0.5838235714771344, "learning_rate": 0.0001587271477162484, "loss": 12.3099, "step": 11797 }, { "epoch": 0.6424482516863722, "grad_norm": 0.5491865981727515, "learning_rate": 0.0001587200100777281, "loss": 12.3046, "step": 11798 }, { "epoch": 0.6425027056829552, "grad_norm": 0.6396741653719359, "learning_rate": 0.00015871287198259112, "loss": 12.4255, "step": 11799 }, { "epoch": 0.6425571596795382, "grad_norm": 0.5941077393260389, "learning_rate": 0.00015870573343089298, "loss": 12.3175, "step": 11800 }, { "epoch": 0.6426116136761213, "grad_norm": 0.5365098578871422, "learning_rate": 0.00015869859442268924, "loss": 12.2355, "step": 11801 }, { "epoch": 0.6426660676727043, "grad_norm": 0.629599542239981, "learning_rate": 0.00015869145495803539, "loss": 12.2843, "step": 11802 }, { "epoch": 0.6427205216692873, "grad_norm": 0.6976645290629956, "learning_rate": 0.0001586843150369869, "loss": 12.533, "step": 11803 }, { "epoch": 0.6427749756658703, "grad_norm": 0.6257102830411845, "learning_rate": 0.0001586771746595994, "loss": 12.4957, "step": 11804 }, { "epoch": 0.6428294296624533, "grad_norm": 0.5139680368557052, "learning_rate": 0.0001586700338259283, "loss": 12.3099, "step": 11805 }, { "epoch": 0.6428838836590363, "grad_norm": 0.6795158134138467, "learning_rate": 0.0001586628925360292, "loss": 12.3531, "step": 11806 }, { "epoch": 0.6429383376556194, "grad_norm": 0.6336616387516697, "learning_rate": 0.0001586557507899576, "loss": 12.2245, "step": 11807 }, { "epoch": 0.6429927916522024, "grad_norm": 0.5827707567392567, "learning_rate": 0.00015864860858776908, "loss": 12.2562, "step": 11808 }, { "epoch": 0.6430472456487853, "grad_norm": 0.6793117933085399, "learning_rate": 0.0001586414659295191, "loss": 12.4026, "step": 11809 }, { "epoch": 0.6431016996453683, "grad_norm": 0.7724808780527022, "learning_rate": 0.00015863432281526326, "loss": 12.2154, "step": 11810 }, { "epoch": 0.6431561536419513, "grad_norm": 0.669858522966245, "learning_rate": 0.0001586271792450571, "loss": 12.2635, "step": 11811 }, { "epoch": 0.6432106076385343, "grad_norm": 0.602202572719811, "learning_rate": 0.00015862003521895614, "loss": 12.1333, "step": 11812 }, { "epoch": 0.6432650616351174, "grad_norm": 0.5983408313173444, "learning_rate": 0.00015861289073701597, "loss": 12.3779, "step": 11813 }, { "epoch": 0.6433195156317004, "grad_norm": 0.6113563849314245, "learning_rate": 0.00015860574579929215, "loss": 12.3404, "step": 11814 }, { "epoch": 0.6433739696282834, "grad_norm": 0.6883609075478769, "learning_rate": 0.0001585986004058402, "loss": 12.4151, "step": 11815 }, { "epoch": 0.6434284236248664, "grad_norm": 0.6060410474299993, "learning_rate": 0.0001585914545567157, "loss": 12.3261, "step": 11816 }, { "epoch": 0.6434828776214494, "grad_norm": 0.549030679214863, "learning_rate": 0.00015858430825197426, "loss": 12.3239, "step": 11817 }, { "epoch": 0.6435373316180324, "grad_norm": 0.5995797386872938, "learning_rate": 0.00015857716149167138, "loss": 12.2988, "step": 11818 }, { "epoch": 0.6435917856146155, "grad_norm": 0.6321642957582062, "learning_rate": 0.00015857001427586269, "loss": 12.3957, "step": 11819 }, { "epoch": 0.6436462396111985, "grad_norm": 0.551558625463365, "learning_rate": 0.00015856286660460373, "loss": 12.3422, "step": 11820 }, { "epoch": 0.6437006936077815, "grad_norm": 0.5490497130441678, "learning_rate": 0.00015855571847795012, "loss": 12.2613, "step": 11821 }, { "epoch": 0.6437551476043645, "grad_norm": 0.5985946306848401, "learning_rate": 0.0001585485698959574, "loss": 12.3177, "step": 11822 }, { "epoch": 0.6438096016009475, "grad_norm": 0.5911531978371243, "learning_rate": 0.00015854142085868118, "loss": 12.2837, "step": 11823 }, { "epoch": 0.6438640555975305, "grad_norm": 0.5405399476121963, "learning_rate": 0.00015853427136617708, "loss": 12.3125, "step": 11824 }, { "epoch": 0.6439185095941136, "grad_norm": 0.6308899194181132, "learning_rate": 0.00015852712141850063, "loss": 12.32, "step": 11825 }, { "epoch": 0.6439729635906966, "grad_norm": 0.5272000889908651, "learning_rate": 0.00015851997101570752, "loss": 12.2391, "step": 11826 }, { "epoch": 0.6440274175872795, "grad_norm": 0.6978034099378577, "learning_rate": 0.00015851282015785328, "loss": 12.2693, "step": 11827 }, { "epoch": 0.6440818715838625, "grad_norm": 0.6859417143713247, "learning_rate": 0.00015850566884499352, "loss": 12.1768, "step": 11828 }, { "epoch": 0.6441363255804455, "grad_norm": 0.6287347167833705, "learning_rate": 0.00015849851707718389, "loss": 12.2717, "step": 11829 }, { "epoch": 0.6441907795770285, "grad_norm": 0.6238801089136621, "learning_rate": 0.00015849136485447996, "loss": 12.3766, "step": 11830 }, { "epoch": 0.6442452335736116, "grad_norm": 0.5472991333501801, "learning_rate": 0.0001584842121769374, "loss": 12.2784, "step": 11831 }, { "epoch": 0.6442996875701946, "grad_norm": 0.6514650993952252, "learning_rate": 0.00015847705904461178, "loss": 12.3264, "step": 11832 }, { "epoch": 0.6443541415667776, "grad_norm": 0.5794851645266091, "learning_rate": 0.0001584699054575587, "loss": 12.4061, "step": 11833 }, { "epoch": 0.6444085955633606, "grad_norm": 0.6213116335233053, "learning_rate": 0.00015846275141583388, "loss": 12.3477, "step": 11834 }, { "epoch": 0.6444630495599436, "grad_norm": 0.5722335188355683, "learning_rate": 0.0001584555969194929, "loss": 12.306, "step": 11835 }, { "epoch": 0.6445175035565267, "grad_norm": 0.6736011912847758, "learning_rate": 0.0001584484419685914, "loss": 12.1462, "step": 11836 }, { "epoch": 0.6445719575531097, "grad_norm": 0.5385332998875926, "learning_rate": 0.000158441286563185, "loss": 12.3111, "step": 11837 }, { "epoch": 0.6446264115496927, "grad_norm": 0.6159916819535322, "learning_rate": 0.00015843413070332934, "loss": 12.4208, "step": 11838 }, { "epoch": 0.6446808655462757, "grad_norm": 0.5688143212500484, "learning_rate": 0.0001584269743890801, "loss": 12.0938, "step": 11839 }, { "epoch": 0.6447353195428587, "grad_norm": 0.5963157827362754, "learning_rate": 0.0001584198176204929, "loss": 12.339, "step": 11840 }, { "epoch": 0.6447897735394417, "grad_norm": 0.6757611489483027, "learning_rate": 0.0001584126603976234, "loss": 12.3657, "step": 11841 }, { "epoch": 0.6448442275360248, "grad_norm": 0.5805181095282763, "learning_rate": 0.00015840550272052726, "loss": 12.3187, "step": 11842 }, { "epoch": 0.6448986815326078, "grad_norm": 0.635858992716586, "learning_rate": 0.00015839834458926012, "loss": 12.4277, "step": 11843 }, { "epoch": 0.6449531355291908, "grad_norm": 0.68439571005598, "learning_rate": 0.00015839118600387771, "loss": 12.373, "step": 11844 }, { "epoch": 0.6450075895257738, "grad_norm": 0.6642118909612381, "learning_rate": 0.0001583840269644356, "loss": 12.2769, "step": 11845 }, { "epoch": 0.6450620435223567, "grad_norm": 0.7855157673961042, "learning_rate": 0.00015837686747098952, "loss": 12.4094, "step": 11846 }, { "epoch": 0.6451164975189397, "grad_norm": 0.7055329344152202, "learning_rate": 0.00015836970752359513, "loss": 12.4626, "step": 11847 }, { "epoch": 0.6451709515155228, "grad_norm": 0.6876268336548789, "learning_rate": 0.00015836254712230807, "loss": 12.4605, "step": 11848 }, { "epoch": 0.6452254055121058, "grad_norm": 0.6088296851825628, "learning_rate": 0.00015835538626718412, "loss": 12.3367, "step": 11849 }, { "epoch": 0.6452798595086888, "grad_norm": 0.6061418913585683, "learning_rate": 0.00015834822495827886, "loss": 12.2388, "step": 11850 }, { "epoch": 0.6453343135052718, "grad_norm": 0.5872816859150357, "learning_rate": 0.00015834106319564804, "loss": 12.1829, "step": 11851 }, { "epoch": 0.6453887675018548, "grad_norm": 0.5548108060461536, "learning_rate": 0.00015833390097934728, "loss": 12.1922, "step": 11852 }, { "epoch": 0.6454432214984378, "grad_norm": 0.62230989731747, "learning_rate": 0.00015832673830943236, "loss": 12.3993, "step": 11853 }, { "epoch": 0.6454976754950209, "grad_norm": 0.6812107135336923, "learning_rate": 0.00015831957518595897, "loss": 12.3115, "step": 11854 }, { "epoch": 0.6455521294916039, "grad_norm": 0.6203214207289377, "learning_rate": 0.00015831241160898274, "loss": 12.3399, "step": 11855 }, { "epoch": 0.6456065834881869, "grad_norm": 0.6639827570460414, "learning_rate": 0.00015830524757855943, "loss": 12.2929, "step": 11856 }, { "epoch": 0.6456610374847699, "grad_norm": 0.6651863119017111, "learning_rate": 0.00015829808309474473, "loss": 12.3096, "step": 11857 }, { "epoch": 0.6457154914813529, "grad_norm": 0.5703054855457829, "learning_rate": 0.00015829091815759436, "loss": 12.2785, "step": 11858 }, { "epoch": 0.6457699454779359, "grad_norm": 0.6143567571433828, "learning_rate": 0.00015828375276716406, "loss": 12.3569, "step": 11859 }, { "epoch": 0.645824399474519, "grad_norm": 0.6088561935498821, "learning_rate": 0.0001582765869235095, "loss": 12.3654, "step": 11860 }, { "epoch": 0.645878853471102, "grad_norm": 0.6353000312500281, "learning_rate": 0.00015826942062668645, "loss": 12.2322, "step": 11861 }, { "epoch": 0.645933307467685, "grad_norm": 0.6765711476284719, "learning_rate": 0.0001582622538767506, "loss": 12.3561, "step": 11862 }, { "epoch": 0.645987761464268, "grad_norm": 0.585489756512397, "learning_rate": 0.00015825508667375768, "loss": 12.1992, "step": 11863 }, { "epoch": 0.6460422154608509, "grad_norm": 0.6253853276677027, "learning_rate": 0.00015824791901776342, "loss": 12.3809, "step": 11864 }, { "epoch": 0.6460966694574339, "grad_norm": 0.6279991291887276, "learning_rate": 0.00015824075090882365, "loss": 12.3028, "step": 11865 }, { "epoch": 0.646151123454017, "grad_norm": 0.6155440485579453, "learning_rate": 0.00015823358234699398, "loss": 12.3539, "step": 11866 }, { "epoch": 0.6462055774506, "grad_norm": 0.6762972682778122, "learning_rate": 0.0001582264133323302, "loss": 12.3892, "step": 11867 }, { "epoch": 0.646260031447183, "grad_norm": 0.5950789371522833, "learning_rate": 0.00015821924386488808, "loss": 12.2143, "step": 11868 }, { "epoch": 0.646314485443766, "grad_norm": 0.606593652587477, "learning_rate": 0.00015821207394472336, "loss": 12.2222, "step": 11869 }, { "epoch": 0.646368939440349, "grad_norm": 0.5798466789294091, "learning_rate": 0.00015820490357189177, "loss": 12.2193, "step": 11870 }, { "epoch": 0.6464233934369321, "grad_norm": 0.598060235563983, "learning_rate": 0.0001581977327464491, "loss": 12.3152, "step": 11871 }, { "epoch": 0.6464778474335151, "grad_norm": 0.6380850115231458, "learning_rate": 0.0001581905614684511, "loss": 12.3271, "step": 11872 }, { "epoch": 0.6465323014300981, "grad_norm": 0.5917032614504739, "learning_rate": 0.00015818338973795355, "loss": 12.3361, "step": 11873 }, { "epoch": 0.6465867554266811, "grad_norm": 0.6309372172799924, "learning_rate": 0.0001581762175550122, "loss": 12.343, "step": 11874 }, { "epoch": 0.6466412094232641, "grad_norm": 0.6536200778124828, "learning_rate": 0.00015816904491968282, "loss": 12.4174, "step": 11875 }, { "epoch": 0.6466956634198471, "grad_norm": 0.6009748805400857, "learning_rate": 0.00015816187183202121, "loss": 12.4067, "step": 11876 }, { "epoch": 0.6467501174164302, "grad_norm": 0.5543701890670989, "learning_rate": 0.0001581546982920831, "loss": 12.3837, "step": 11877 }, { "epoch": 0.6468045714130132, "grad_norm": 0.5888343705499574, "learning_rate": 0.0001581475242999243, "loss": 12.2352, "step": 11878 }, { "epoch": 0.6468590254095962, "grad_norm": 0.6293347929653329, "learning_rate": 0.00015814034985560063, "loss": 12.1845, "step": 11879 }, { "epoch": 0.6469134794061792, "grad_norm": 0.6254066864745504, "learning_rate": 0.0001581331749591678, "loss": 12.1629, "step": 11880 }, { "epoch": 0.6469679334027622, "grad_norm": 0.5798924916404218, "learning_rate": 0.0001581259996106817, "loss": 12.2957, "step": 11881 }, { "epoch": 0.6470223873993451, "grad_norm": 0.6548652982042967, "learning_rate": 0.0001581188238101981, "loss": 12.2919, "step": 11882 }, { "epoch": 0.6470768413959282, "grad_norm": 0.6470119826591115, "learning_rate": 0.00015811164755777274, "loss": 12.4236, "step": 11883 }, { "epoch": 0.6471312953925112, "grad_norm": 0.7396324813529676, "learning_rate": 0.00015810447085346145, "loss": 12.3413, "step": 11884 }, { "epoch": 0.6471857493890942, "grad_norm": 0.659380387570783, "learning_rate": 0.0001580972936973201, "loss": 12.2496, "step": 11885 }, { "epoch": 0.6472402033856772, "grad_norm": 0.6407359324597263, "learning_rate": 0.0001580901160894044, "loss": 12.348, "step": 11886 }, { "epoch": 0.6472946573822602, "grad_norm": 0.5541698793238401, "learning_rate": 0.00015808293802977024, "loss": 12.1559, "step": 11887 }, { "epoch": 0.6473491113788432, "grad_norm": 0.6298551123169205, "learning_rate": 0.00015807575951847343, "loss": 12.4117, "step": 11888 }, { "epoch": 0.6474035653754263, "grad_norm": 0.696425727106263, "learning_rate": 0.00015806858055556971, "loss": 12.3259, "step": 11889 }, { "epoch": 0.6474580193720093, "grad_norm": 0.6215328446217157, "learning_rate": 0.00015806140114111504, "loss": 12.298, "step": 11890 }, { "epoch": 0.6475124733685923, "grad_norm": 0.6811713311025708, "learning_rate": 0.00015805422127516513, "loss": 12.416, "step": 11891 }, { "epoch": 0.6475669273651753, "grad_norm": 0.5875299931350033, "learning_rate": 0.00015804704095777588, "loss": 12.2622, "step": 11892 }, { "epoch": 0.6476213813617583, "grad_norm": 0.6182923010880244, "learning_rate": 0.00015803986018900315, "loss": 12.4197, "step": 11893 }, { "epoch": 0.6476758353583413, "grad_norm": 0.7301611412615433, "learning_rate": 0.00015803267896890265, "loss": 12.3345, "step": 11894 }, { "epoch": 0.6477302893549244, "grad_norm": 0.59758392143012, "learning_rate": 0.0001580254972975304, "loss": 12.3508, "step": 11895 }, { "epoch": 0.6477847433515074, "grad_norm": 0.7039951589381491, "learning_rate": 0.00015801831517494208, "loss": 12.3217, "step": 11896 }, { "epoch": 0.6478391973480904, "grad_norm": 0.6196817665670296, "learning_rate": 0.00015801113260119362, "loss": 12.138, "step": 11897 }, { "epoch": 0.6478936513446734, "grad_norm": 0.6676907346235882, "learning_rate": 0.00015800394957634088, "loss": 12.3381, "step": 11898 }, { "epoch": 0.6479481053412564, "grad_norm": 0.7448106654204392, "learning_rate": 0.0001579967661004397, "loss": 12.3667, "step": 11899 }, { "epoch": 0.6480025593378395, "grad_norm": 0.6139973688894776, "learning_rate": 0.00015798958217354592, "loss": 12.2898, "step": 11900 }, { "epoch": 0.6480570133344224, "grad_norm": 0.6219380327775433, "learning_rate": 0.00015798239779571546, "loss": 12.3213, "step": 11901 }, { "epoch": 0.6481114673310054, "grad_norm": 0.624283925317137, "learning_rate": 0.00015797521296700412, "loss": 12.3334, "step": 11902 }, { "epoch": 0.6481659213275884, "grad_norm": 0.6852416421453453, "learning_rate": 0.0001579680276874678, "loss": 12.4149, "step": 11903 }, { "epoch": 0.6482203753241714, "grad_norm": 0.588462029793721, "learning_rate": 0.00015796084195716242, "loss": 12.3818, "step": 11904 }, { "epoch": 0.6482748293207544, "grad_norm": 0.622781245012364, "learning_rate": 0.00015795365577614377, "loss": 12.3759, "step": 11905 }, { "epoch": 0.6483292833173375, "grad_norm": 0.7072158092814134, "learning_rate": 0.00015794646914446778, "loss": 12.4398, "step": 11906 }, { "epoch": 0.6483837373139205, "grad_norm": 0.610219828969978, "learning_rate": 0.00015793928206219034, "loss": 12.3247, "step": 11907 }, { "epoch": 0.6484381913105035, "grad_norm": 0.592606307805623, "learning_rate": 0.00015793209452936733, "loss": 12.371, "step": 11908 }, { "epoch": 0.6484926453070865, "grad_norm": 0.6109845084666096, "learning_rate": 0.0001579249065460546, "loss": 12.3556, "step": 11909 }, { "epoch": 0.6485470993036695, "grad_norm": 0.5691135905588112, "learning_rate": 0.00015791771811230813, "loss": 12.3765, "step": 11910 }, { "epoch": 0.6486015533002525, "grad_norm": 0.627849729098813, "learning_rate": 0.00015791052922818375, "loss": 12.4136, "step": 11911 }, { "epoch": 0.6486560072968356, "grad_norm": 0.5720576719145197, "learning_rate": 0.00015790333989373738, "loss": 12.187, "step": 11912 }, { "epoch": 0.6487104612934186, "grad_norm": 0.5913965852750814, "learning_rate": 0.00015789615010902494, "loss": 12.22, "step": 11913 }, { "epoch": 0.6487649152900016, "grad_norm": 0.6458783596766352, "learning_rate": 0.00015788895987410234, "loss": 12.4376, "step": 11914 }, { "epoch": 0.6488193692865846, "grad_norm": 0.5357031183400054, "learning_rate": 0.00015788176918902545, "loss": 12.3014, "step": 11915 }, { "epoch": 0.6488738232831676, "grad_norm": 0.5878185119517014, "learning_rate": 0.00015787457805385022, "loss": 12.2597, "step": 11916 }, { "epoch": 0.6489282772797506, "grad_norm": 0.5408649338255814, "learning_rate": 0.00015786738646863258, "loss": 12.1758, "step": 11917 }, { "epoch": 0.6489827312763337, "grad_norm": 0.5962386022181083, "learning_rate": 0.0001578601944334284, "loss": 12.3954, "step": 11918 }, { "epoch": 0.6490371852729167, "grad_norm": 0.6894379242956311, "learning_rate": 0.0001578530019482937, "loss": 12.4568, "step": 11919 }, { "epoch": 0.6490916392694996, "grad_norm": 0.6447570054291695, "learning_rate": 0.00015784580901328433, "loss": 12.3826, "step": 11920 }, { "epoch": 0.6491460932660826, "grad_norm": 0.6538136842300999, "learning_rate": 0.00015783861562845624, "loss": 12.1867, "step": 11921 }, { "epoch": 0.6492005472626656, "grad_norm": 0.6588957923748714, "learning_rate": 0.00015783142179386542, "loss": 12.3532, "step": 11922 }, { "epoch": 0.6492550012592486, "grad_norm": 0.609883320307594, "learning_rate": 0.0001578242275095677, "loss": 12.2295, "step": 11923 }, { "epoch": 0.6493094552558317, "grad_norm": 0.5832732508989199, "learning_rate": 0.00015781703277561912, "loss": 12.3784, "step": 11924 }, { "epoch": 0.6493639092524147, "grad_norm": 0.581475044581891, "learning_rate": 0.0001578098375920756, "loss": 12.1825, "step": 11925 }, { "epoch": 0.6494183632489977, "grad_norm": 0.5271469409272188, "learning_rate": 0.0001578026419589931, "loss": 12.2623, "step": 11926 }, { "epoch": 0.6494728172455807, "grad_norm": 0.5873360893022631, "learning_rate": 0.00015779544587642754, "loss": 12.3341, "step": 11927 }, { "epoch": 0.6495272712421637, "grad_norm": 0.7344385378568903, "learning_rate": 0.0001577882493444349, "loss": 12.0677, "step": 11928 }, { "epoch": 0.6495817252387467, "grad_norm": 0.5793826632935817, "learning_rate": 0.00015778105236307117, "loss": 12.444, "step": 11929 }, { "epoch": 0.6496361792353298, "grad_norm": 0.5508804313879363, "learning_rate": 0.00015777385493239226, "loss": 12.343, "step": 11930 }, { "epoch": 0.6496906332319128, "grad_norm": 0.5574879850197885, "learning_rate": 0.00015776665705245416, "loss": 12.3612, "step": 11931 }, { "epoch": 0.6497450872284958, "grad_norm": 0.6029465954215187, "learning_rate": 0.00015775945872331288, "loss": 12.3669, "step": 11932 }, { "epoch": 0.6497995412250788, "grad_norm": 0.621974590243019, "learning_rate": 0.00015775225994502434, "loss": 12.2378, "step": 11933 }, { "epoch": 0.6498539952216618, "grad_norm": 0.5669301302625296, "learning_rate": 0.00015774506071764455, "loss": 12.3814, "step": 11934 }, { "epoch": 0.6499084492182449, "grad_norm": 0.6069993807753911, "learning_rate": 0.00015773786104122947, "loss": 12.31, "step": 11935 }, { "epoch": 0.6499629032148279, "grad_norm": 0.6790511836398206, "learning_rate": 0.0001577306609158351, "loss": 12.4608, "step": 11936 }, { "epoch": 0.6500173572114109, "grad_norm": 0.5829094050295218, "learning_rate": 0.00015772346034151745, "loss": 12.3586, "step": 11937 }, { "epoch": 0.6500718112079938, "grad_norm": 0.6722397218659423, "learning_rate": 0.00015771625931833248, "loss": 12.2938, "step": 11938 }, { "epoch": 0.6501262652045768, "grad_norm": 0.5748677087191401, "learning_rate": 0.0001577090578463362, "loss": 12.3666, "step": 11939 }, { "epoch": 0.6501807192011598, "grad_norm": 0.5524163198640157, "learning_rate": 0.00015770185592558459, "loss": 12.1791, "step": 11940 }, { "epoch": 0.6502351731977429, "grad_norm": 0.64979857567962, "learning_rate": 0.00015769465355613372, "loss": 12.2564, "step": 11941 }, { "epoch": 0.6502896271943259, "grad_norm": 0.6487747620285252, "learning_rate": 0.0001576874507380395, "loss": 12.2984, "step": 11942 }, { "epoch": 0.6503440811909089, "grad_norm": 0.5831819737136476, "learning_rate": 0.00015768024747135802, "loss": 12.3315, "step": 11943 }, { "epoch": 0.6503985351874919, "grad_norm": 0.726194846556106, "learning_rate": 0.00015767304375614524, "loss": 12.373, "step": 11944 }, { "epoch": 0.6504529891840749, "grad_norm": 0.7572436990405581, "learning_rate": 0.00015766583959245722, "loss": 12.3561, "step": 11945 }, { "epoch": 0.6505074431806579, "grad_norm": 0.8388816417572426, "learning_rate": 0.00015765863498034993, "loss": 12.3658, "step": 11946 }, { "epoch": 0.650561897177241, "grad_norm": 0.6220669970386754, "learning_rate": 0.00015765142991987948, "loss": 12.3762, "step": 11947 }, { "epoch": 0.650616351173824, "grad_norm": 0.5850096096207479, "learning_rate": 0.00015764422441110179, "loss": 12.3122, "step": 11948 }, { "epoch": 0.650670805170407, "grad_norm": 0.5960887585380966, "learning_rate": 0.00015763701845407293, "loss": 12.2445, "step": 11949 }, { "epoch": 0.65072525916699, "grad_norm": 0.6286614967255224, "learning_rate": 0.000157629812048849, "loss": 12.3693, "step": 11950 }, { "epoch": 0.650779713163573, "grad_norm": 0.5755252327414278, "learning_rate": 0.00015762260519548596, "loss": 12.3737, "step": 11951 }, { "epoch": 0.650834167160156, "grad_norm": 0.6015708782836013, "learning_rate": 0.0001576153978940399, "loss": 12.3598, "step": 11952 }, { "epoch": 0.6508886211567391, "grad_norm": 0.5503492430025636, "learning_rate": 0.0001576081901445668, "loss": 12.147, "step": 11953 }, { "epoch": 0.6509430751533221, "grad_norm": 0.6174251701802627, "learning_rate": 0.0001576009819471228, "loss": 12.3472, "step": 11954 }, { "epoch": 0.650997529149905, "grad_norm": 0.6617496217312884, "learning_rate": 0.0001575937733017639, "loss": 12.335, "step": 11955 }, { "epoch": 0.651051983146488, "grad_norm": 0.6619382754910553, "learning_rate": 0.00015758656420854615, "loss": 12.297, "step": 11956 }, { "epoch": 0.651106437143071, "grad_norm": 0.6114447904475865, "learning_rate": 0.00015757935466752563, "loss": 12.2679, "step": 11957 }, { "epoch": 0.651160891139654, "grad_norm": 0.5867492114594228, "learning_rate": 0.00015757214467875837, "loss": 12.3383, "step": 11958 }, { "epoch": 0.6512153451362371, "grad_norm": 0.5545629658830868, "learning_rate": 0.00015756493424230045, "loss": 12.3276, "step": 11959 }, { "epoch": 0.6512697991328201, "grad_norm": 0.6228513492480882, "learning_rate": 0.00015755772335820798, "loss": 12.2693, "step": 11960 }, { "epoch": 0.6513242531294031, "grad_norm": 0.5538829374813401, "learning_rate": 0.00015755051202653698, "loss": 12.1914, "step": 11961 }, { "epoch": 0.6513787071259861, "grad_norm": 0.6761415228572771, "learning_rate": 0.00015754330024734357, "loss": 12.2367, "step": 11962 }, { "epoch": 0.6514331611225691, "grad_norm": 0.5784702625909043, "learning_rate": 0.00015753608802068376, "loss": 12.3423, "step": 11963 }, { "epoch": 0.6514876151191521, "grad_norm": 0.5735770424793095, "learning_rate": 0.0001575288753466137, "loss": 12.2221, "step": 11964 }, { "epoch": 0.6515420691157352, "grad_norm": 0.7926214136031734, "learning_rate": 0.0001575216622251895, "loss": 12.3241, "step": 11965 }, { "epoch": 0.6515965231123182, "grad_norm": 0.48699907812999516, "learning_rate": 0.00015751444865646716, "loss": 12.2729, "step": 11966 }, { "epoch": 0.6516509771089012, "grad_norm": 0.64560719180413, "learning_rate": 0.00015750723464050286, "loss": 12.3966, "step": 11967 }, { "epoch": 0.6517054311054842, "grad_norm": 0.6850859013375421, "learning_rate": 0.0001575000201773526, "loss": 12.3712, "step": 11968 }, { "epoch": 0.6517598851020672, "grad_norm": 0.6505676416890495, "learning_rate": 0.0001574928052670726, "loss": 12.352, "step": 11969 }, { "epoch": 0.6518143390986503, "grad_norm": 0.619633594619558, "learning_rate": 0.00015748558990971888, "loss": 12.4747, "step": 11970 }, { "epoch": 0.6518687930952333, "grad_norm": 0.6128752373979734, "learning_rate": 0.00015747837410534757, "loss": 12.3141, "step": 11971 }, { "epoch": 0.6519232470918163, "grad_norm": 0.5500144202818116, "learning_rate": 0.0001574711578540148, "loss": 12.3213, "step": 11972 }, { "epoch": 0.6519777010883993, "grad_norm": 0.5148268142700347, "learning_rate": 0.00015746394115577665, "loss": 12.3108, "step": 11973 }, { "epoch": 0.6520321550849822, "grad_norm": 0.6139723632259834, "learning_rate": 0.00015745672401068928, "loss": 12.2607, "step": 11974 }, { "epoch": 0.6520866090815652, "grad_norm": 0.5864930516835105, "learning_rate": 0.0001574495064188088, "loss": 12.0589, "step": 11975 }, { "epoch": 0.6521410630781483, "grad_norm": 0.5365876166550633, "learning_rate": 0.00015744228838019127, "loss": 12.3051, "step": 11976 }, { "epoch": 0.6521955170747313, "grad_norm": 0.5499501950636869, "learning_rate": 0.0001574350698948929, "loss": 12.3038, "step": 11977 }, { "epoch": 0.6522499710713143, "grad_norm": 0.6472124705907247, "learning_rate": 0.0001574278509629698, "loss": 12.3738, "step": 11978 }, { "epoch": 0.6523044250678973, "grad_norm": 0.5453405892615903, "learning_rate": 0.0001574206315844781, "loss": 12.3297, "step": 11979 }, { "epoch": 0.6523588790644803, "grad_norm": 0.6329484377503383, "learning_rate": 0.00015741341175947392, "loss": 12.2167, "step": 11980 }, { "epoch": 0.6524133330610633, "grad_norm": 0.5707382286572685, "learning_rate": 0.00015740619148801342, "loss": 12.1752, "step": 11981 }, { "epoch": 0.6524677870576464, "grad_norm": 0.5732738756007905, "learning_rate": 0.00015739897077015277, "loss": 12.3096, "step": 11982 }, { "epoch": 0.6525222410542294, "grad_norm": 0.7108703247791771, "learning_rate": 0.00015739174960594809, "loss": 12.3917, "step": 11983 }, { "epoch": 0.6525766950508124, "grad_norm": 0.5927231949777018, "learning_rate": 0.00015738452799545557, "loss": 12.4086, "step": 11984 }, { "epoch": 0.6526311490473954, "grad_norm": 0.5632680185066232, "learning_rate": 0.0001573773059387313, "loss": 12.3268, "step": 11985 }, { "epoch": 0.6526856030439784, "grad_norm": 0.5575083758906307, "learning_rate": 0.00015737008343583148, "loss": 12.3571, "step": 11986 }, { "epoch": 0.6527400570405614, "grad_norm": 0.5772371140502875, "learning_rate": 0.00015736286048681229, "loss": 12.3336, "step": 11987 }, { "epoch": 0.6527945110371445, "grad_norm": 0.6479961373639044, "learning_rate": 0.00015735563709172985, "loss": 12.2698, "step": 11988 }, { "epoch": 0.6528489650337275, "grad_norm": 0.6505984500018023, "learning_rate": 0.00015734841325064038, "loss": 12.3785, "step": 11989 }, { "epoch": 0.6529034190303105, "grad_norm": 0.5773942885565875, "learning_rate": 0.00015734118896360003, "loss": 12.3468, "step": 11990 }, { "epoch": 0.6529578730268935, "grad_norm": 0.5825216433858514, "learning_rate": 0.00015733396423066496, "loss": 12.3984, "step": 11991 }, { "epoch": 0.6530123270234764, "grad_norm": 0.6430205692159725, "learning_rate": 0.00015732673905189136, "loss": 12.3095, "step": 11992 }, { "epoch": 0.6530667810200594, "grad_norm": 0.6068657586227032, "learning_rate": 0.00015731951342733545, "loss": 12.2339, "step": 11993 }, { "epoch": 0.6531212350166425, "grad_norm": 0.5679813776155134, "learning_rate": 0.00015731228735705338, "loss": 12.3382, "step": 11994 }, { "epoch": 0.6531756890132255, "grad_norm": 0.611364306339243, "learning_rate": 0.00015730506084110136, "loss": 12.2155, "step": 11995 }, { "epoch": 0.6532301430098085, "grad_norm": 0.5722300818041792, "learning_rate": 0.00015729783387953558, "loss": 12.3042, "step": 11996 }, { "epoch": 0.6532845970063915, "grad_norm": 0.6272152796074103, "learning_rate": 0.00015729060647241223, "loss": 12.3985, "step": 11997 }, { "epoch": 0.6533390510029745, "grad_norm": 0.5878780174505018, "learning_rate": 0.0001572833786197875, "loss": 12.2693, "step": 11998 }, { "epoch": 0.6533935049995575, "grad_norm": 0.5765005193129974, "learning_rate": 0.00015727615032171764, "loss": 12.2288, "step": 11999 }, { "epoch": 0.6534479589961406, "grad_norm": 0.5794060488531453, "learning_rate": 0.00015726892157825884, "loss": 12.2515, "step": 12000 }, { "epoch": 0.6535024129927236, "grad_norm": 0.5959742103943618, "learning_rate": 0.00015726169238946725, "loss": 12.3568, "step": 12001 }, { "epoch": 0.6535568669893066, "grad_norm": 0.5656155306898348, "learning_rate": 0.00015725446275539917, "loss": 12.3548, "step": 12002 }, { "epoch": 0.6536113209858896, "grad_norm": 0.5809509610831306, "learning_rate": 0.00015724723267611078, "loss": 12.2434, "step": 12003 }, { "epoch": 0.6536657749824726, "grad_norm": 0.5723505321533915, "learning_rate": 0.0001572400021516583, "loss": 12.3567, "step": 12004 }, { "epoch": 0.6537202289790557, "grad_norm": 0.5640139894343855, "learning_rate": 0.000157232771182098, "loss": 12.4136, "step": 12005 }, { "epoch": 0.6537746829756387, "grad_norm": 0.6097832305343643, "learning_rate": 0.00015722553976748604, "loss": 12.316, "step": 12006 }, { "epoch": 0.6538291369722217, "grad_norm": 0.6075626242319075, "learning_rate": 0.00015721830790787868, "loss": 12.448, "step": 12007 }, { "epoch": 0.6538835909688047, "grad_norm": 0.6135216935236425, "learning_rate": 0.00015721107560333217, "loss": 12.2811, "step": 12008 }, { "epoch": 0.6539380449653877, "grad_norm": 0.5900328630403623, "learning_rate": 0.00015720384285390274, "loss": 12.3143, "step": 12009 }, { "epoch": 0.6539924989619706, "grad_norm": 0.5593576082224303, "learning_rate": 0.00015719660965964668, "loss": 12.2963, "step": 12010 }, { "epoch": 0.6540469529585538, "grad_norm": 0.6323270839974057, "learning_rate": 0.00015718937602062015, "loss": 12.3916, "step": 12011 }, { "epoch": 0.6541014069551367, "grad_norm": 0.634713507031242, "learning_rate": 0.00015718214193687945, "loss": 12.4702, "step": 12012 }, { "epoch": 0.6541558609517197, "grad_norm": 0.5918466121827748, "learning_rate": 0.0001571749074084808, "loss": 12.3848, "step": 12013 }, { "epoch": 0.6542103149483027, "grad_norm": 0.5930684196620623, "learning_rate": 0.0001571676724354805, "loss": 12.4133, "step": 12014 }, { "epoch": 0.6542647689448857, "grad_norm": 0.5565132695659449, "learning_rate": 0.0001571604370179348, "loss": 12.2225, "step": 12015 }, { "epoch": 0.6543192229414687, "grad_norm": 0.571361595467882, "learning_rate": 0.00015715320115589995, "loss": 12.248, "step": 12016 }, { "epoch": 0.6543736769380518, "grad_norm": 0.5981817106947294, "learning_rate": 0.0001571459648494322, "loss": 12.407, "step": 12017 }, { "epoch": 0.6544281309346348, "grad_norm": 0.5843807488937806, "learning_rate": 0.00015713872809858788, "loss": 12.299, "step": 12018 }, { "epoch": 0.6544825849312178, "grad_norm": 0.5713110865905602, "learning_rate": 0.00015713149090342321, "loss": 12.2149, "step": 12019 }, { "epoch": 0.6545370389278008, "grad_norm": 0.6267794263402403, "learning_rate": 0.0001571242532639945, "loss": 12.4155, "step": 12020 }, { "epoch": 0.6545914929243838, "grad_norm": 0.6539645542446826, "learning_rate": 0.000157117015180358, "loss": 12.3244, "step": 12021 }, { "epoch": 0.6546459469209668, "grad_norm": 0.6833958466531187, "learning_rate": 0.00015710977665257003, "loss": 12.4647, "step": 12022 }, { "epoch": 0.6547004009175499, "grad_norm": 0.5835734191459407, "learning_rate": 0.0001571025376806868, "loss": 12.4632, "step": 12023 }, { "epoch": 0.6547548549141329, "grad_norm": 0.5560923116448497, "learning_rate": 0.00015709529826476475, "loss": 12.2651, "step": 12024 }, { "epoch": 0.6548093089107159, "grad_norm": 0.6115189866766454, "learning_rate": 0.00015708805840486005, "loss": 12.4087, "step": 12025 }, { "epoch": 0.6548637629072989, "grad_norm": 0.5607442964773205, "learning_rate": 0.000157080818101029, "loss": 12.2975, "step": 12026 }, { "epoch": 0.6549182169038819, "grad_norm": 0.5778224307829855, "learning_rate": 0.00015707357735332797, "loss": 12.4361, "step": 12027 }, { "epoch": 0.6549726709004648, "grad_norm": 0.6330981188570893, "learning_rate": 0.00015706633616181323, "loss": 12.3908, "step": 12028 }, { "epoch": 0.655027124897048, "grad_norm": 0.6278392162178607, "learning_rate": 0.00015705909452654108, "loss": 12.3544, "step": 12029 }, { "epoch": 0.655081578893631, "grad_norm": 0.5846662559335308, "learning_rate": 0.00015705185244756787, "loss": 12.2941, "step": 12030 }, { "epoch": 0.6551360328902139, "grad_norm": 0.6050336247846226, "learning_rate": 0.00015704460992494986, "loss": 12.2861, "step": 12031 }, { "epoch": 0.6551904868867969, "grad_norm": 0.6321047953668683, "learning_rate": 0.0001570373669587434, "loss": 12.3586, "step": 12032 }, { "epoch": 0.6552449408833799, "grad_norm": 0.608849734510545, "learning_rate": 0.00015703012354900483, "loss": 12.3521, "step": 12033 }, { "epoch": 0.655299394879963, "grad_norm": 0.617231457089484, "learning_rate": 0.0001570228796957904, "loss": 12.2676, "step": 12034 }, { "epoch": 0.655353848876546, "grad_norm": 0.6180049647494912, "learning_rate": 0.00015701563539915656, "loss": 12.2132, "step": 12035 }, { "epoch": 0.655408302873129, "grad_norm": 0.6431172833908424, "learning_rate": 0.00015700839065915955, "loss": 12.2558, "step": 12036 }, { "epoch": 0.655462756869712, "grad_norm": 0.6019823191469429, "learning_rate": 0.0001570011454758557, "loss": 12.2704, "step": 12037 }, { "epoch": 0.655517210866295, "grad_norm": 0.5671224077287408, "learning_rate": 0.00015699389984930143, "loss": 12.3156, "step": 12038 }, { "epoch": 0.655571664862878, "grad_norm": 0.7012576025712719, "learning_rate": 0.00015698665377955303, "loss": 12.43, "step": 12039 }, { "epoch": 0.6556261188594611, "grad_norm": 0.6061221280777732, "learning_rate": 0.00015697940726666683, "loss": 12.2937, "step": 12040 }, { "epoch": 0.6556805728560441, "grad_norm": 0.7156210816199511, "learning_rate": 0.00015697216031069925, "loss": 12.3705, "step": 12041 }, { "epoch": 0.6557350268526271, "grad_norm": 0.6125638351685208, "learning_rate": 0.00015696491291170657, "loss": 12.3893, "step": 12042 }, { "epoch": 0.6557894808492101, "grad_norm": 0.6318843500494461, "learning_rate": 0.00015695766506974517, "loss": 12.4039, "step": 12043 }, { "epoch": 0.6558439348457931, "grad_norm": 0.6600868298812459, "learning_rate": 0.00015695041678487142, "loss": 12.2972, "step": 12044 }, { "epoch": 0.6558983888423761, "grad_norm": 0.542910202250795, "learning_rate": 0.00015694316805714165, "loss": 12.2664, "step": 12045 }, { "epoch": 0.6559528428389592, "grad_norm": 0.5845324019516077, "learning_rate": 0.0001569359188866123, "loss": 12.3188, "step": 12046 }, { "epoch": 0.6560072968355422, "grad_norm": 0.7004534797723055, "learning_rate": 0.00015692866927333968, "loss": 12.41, "step": 12047 }, { "epoch": 0.6560617508321251, "grad_norm": 0.6112982977934652, "learning_rate": 0.00015692141921738018, "loss": 12.3632, "step": 12048 }, { "epoch": 0.6561162048287081, "grad_norm": 0.6132310081784185, "learning_rate": 0.00015691416871879018, "loss": 12.3798, "step": 12049 }, { "epoch": 0.6561706588252911, "grad_norm": 0.6721167651882155, "learning_rate": 0.00015690691777762604, "loss": 12.2113, "step": 12050 }, { "epoch": 0.6562251128218741, "grad_norm": 0.6058156618250249, "learning_rate": 0.0001568996663939442, "loss": 12.4447, "step": 12051 }, { "epoch": 0.6562795668184572, "grad_norm": 0.6217678395993428, "learning_rate": 0.000156892414567801, "loss": 12.4583, "step": 12052 }, { "epoch": 0.6563340208150402, "grad_norm": 0.8360757048261092, "learning_rate": 0.00015688516229925284, "loss": 12.3777, "step": 12053 }, { "epoch": 0.6563884748116232, "grad_norm": 0.5595752466357918, "learning_rate": 0.0001568779095883561, "loss": 12.2718, "step": 12054 }, { "epoch": 0.6564429288082062, "grad_norm": 0.5796818253610839, "learning_rate": 0.00015687065643516722, "loss": 12.1915, "step": 12055 }, { "epoch": 0.6564973828047892, "grad_norm": 0.6068369376036751, "learning_rate": 0.00015686340283974258, "loss": 12.1375, "step": 12056 }, { "epoch": 0.6565518368013722, "grad_norm": 0.5964555552588784, "learning_rate": 0.00015685614880213856, "loss": 12.385, "step": 12057 }, { "epoch": 0.6566062907979553, "grad_norm": 0.5893076488767527, "learning_rate": 0.0001568488943224116, "loss": 12.3554, "step": 12058 }, { "epoch": 0.6566607447945383, "grad_norm": 0.6133767840778955, "learning_rate": 0.0001568416394006181, "loss": 12.3736, "step": 12059 }, { "epoch": 0.6567151987911213, "grad_norm": 0.6707278629224666, "learning_rate": 0.00015683438403681448, "loss": 12.4436, "step": 12060 }, { "epoch": 0.6567696527877043, "grad_norm": 0.5560126577532122, "learning_rate": 0.0001568271282310572, "loss": 12.1057, "step": 12061 }, { "epoch": 0.6568241067842873, "grad_norm": 0.6215607526503611, "learning_rate": 0.0001568198719834026, "loss": 12.1294, "step": 12062 }, { "epoch": 0.6568785607808703, "grad_norm": 0.6286325445732135, "learning_rate": 0.00015681261529390715, "loss": 12.2769, "step": 12063 }, { "epoch": 0.6569330147774534, "grad_norm": 0.5312290249893128, "learning_rate": 0.00015680535816262728, "loss": 12.2747, "step": 12064 }, { "epoch": 0.6569874687740364, "grad_norm": 0.5738259013012558, "learning_rate": 0.00015679810058961944, "loss": 12.3717, "step": 12065 }, { "epoch": 0.6570419227706193, "grad_norm": 0.5940529983518259, "learning_rate": 0.00015679084257494002, "loss": 12.4212, "step": 12066 }, { "epoch": 0.6570963767672023, "grad_norm": 0.583664293392366, "learning_rate": 0.00015678358411864548, "loss": 12.2232, "step": 12067 }, { "epoch": 0.6571508307637853, "grad_norm": 0.5896927482492248, "learning_rate": 0.0001567763252207923, "loss": 12.4077, "step": 12068 }, { "epoch": 0.6572052847603684, "grad_norm": 0.6049544554721654, "learning_rate": 0.00015676906588143685, "loss": 12.4348, "step": 12069 }, { "epoch": 0.6572597387569514, "grad_norm": 0.5725497093698602, "learning_rate": 0.00015676180610063565, "loss": 12.4809, "step": 12070 }, { "epoch": 0.6573141927535344, "grad_norm": 0.7405786368932967, "learning_rate": 0.00015675454587844515, "loss": 12.2816, "step": 12071 }, { "epoch": 0.6573686467501174, "grad_norm": 0.600552284188916, "learning_rate": 0.00015674728521492172, "loss": 12.2328, "step": 12072 }, { "epoch": 0.6574231007467004, "grad_norm": 0.5390920058221059, "learning_rate": 0.0001567400241101219, "loss": 12.2939, "step": 12073 }, { "epoch": 0.6574775547432834, "grad_norm": 0.5996252240800342, "learning_rate": 0.0001567327625641022, "loss": 12.2745, "step": 12074 }, { "epoch": 0.6575320087398665, "grad_norm": 0.6203639181018388, "learning_rate": 0.00015672550057691895, "loss": 12.2983, "step": 12075 }, { "epoch": 0.6575864627364495, "grad_norm": 0.5831791648502749, "learning_rate": 0.00015671823814862875, "loss": 12.2538, "step": 12076 }, { "epoch": 0.6576409167330325, "grad_norm": 0.6456117615006848, "learning_rate": 0.00015671097527928795, "loss": 12.2313, "step": 12077 }, { "epoch": 0.6576953707296155, "grad_norm": 0.6120533649848845, "learning_rate": 0.00015670371196895317, "loss": 12.3152, "step": 12078 }, { "epoch": 0.6577498247261985, "grad_norm": 0.5909407598412671, "learning_rate": 0.00015669644821768078, "loss": 12.4194, "step": 12079 }, { "epoch": 0.6578042787227815, "grad_norm": 0.5897918822232837, "learning_rate": 0.0001566891840255273, "loss": 12.3486, "step": 12080 }, { "epoch": 0.6578587327193646, "grad_norm": 0.5691125296830504, "learning_rate": 0.00015668191939254925, "loss": 12.2627, "step": 12081 }, { "epoch": 0.6579131867159476, "grad_norm": 0.6035693166090563, "learning_rate": 0.00015667465431880304, "loss": 12.2438, "step": 12082 }, { "epoch": 0.6579676407125306, "grad_norm": 0.6274229351147108, "learning_rate": 0.00015666738880434523, "loss": 12.2951, "step": 12083 }, { "epoch": 0.6580220947091135, "grad_norm": 0.5970422016358655, "learning_rate": 0.00015666012284923231, "loss": 12.1996, "step": 12084 }, { "epoch": 0.6580765487056965, "grad_norm": 0.613287883262713, "learning_rate": 0.00015665285645352078, "loss": 12.2262, "step": 12085 }, { "epoch": 0.6581310027022795, "grad_norm": 0.8218009396210703, "learning_rate": 0.0001566455896172671, "loss": 12.6371, "step": 12086 }, { "epoch": 0.6581854566988626, "grad_norm": 0.6123225370787431, "learning_rate": 0.00015663832234052787, "loss": 12.2022, "step": 12087 }, { "epoch": 0.6582399106954456, "grad_norm": 0.5879275100204007, "learning_rate": 0.0001566310546233595, "loss": 12.4095, "step": 12088 }, { "epoch": 0.6582943646920286, "grad_norm": 0.5717092565134987, "learning_rate": 0.0001566237864658186, "loss": 12.3095, "step": 12089 }, { "epoch": 0.6583488186886116, "grad_norm": 0.5784460652054834, "learning_rate": 0.0001566165178679616, "loss": 12.219, "step": 12090 }, { "epoch": 0.6584032726851946, "grad_norm": 0.643811496962863, "learning_rate": 0.0001566092488298451, "loss": 12.3034, "step": 12091 }, { "epoch": 0.6584577266817776, "grad_norm": 0.5680520389238057, "learning_rate": 0.00015660197935152555, "loss": 12.2748, "step": 12092 }, { "epoch": 0.6585121806783607, "grad_norm": 0.5790373555853809, "learning_rate": 0.00015659470943305955, "loss": 12.3464, "step": 12093 }, { "epoch": 0.6585666346749437, "grad_norm": 0.6087938649370235, "learning_rate": 0.00015658743907450356, "loss": 12.3078, "step": 12094 }, { "epoch": 0.6586210886715267, "grad_norm": 0.6270072053969875, "learning_rate": 0.00015658016827591417, "loss": 12.1714, "step": 12095 }, { "epoch": 0.6586755426681097, "grad_norm": 0.5861641476491468, "learning_rate": 0.0001565728970373479, "loss": 12.3368, "step": 12096 }, { "epoch": 0.6587299966646927, "grad_norm": 0.5804427884989929, "learning_rate": 0.0001565656253588613, "loss": 12.1728, "step": 12097 }, { "epoch": 0.6587844506612757, "grad_norm": 0.5704907114632229, "learning_rate": 0.00015655835324051093, "loss": 12.053, "step": 12098 }, { "epoch": 0.6588389046578588, "grad_norm": 0.591580493014265, "learning_rate": 0.0001565510806823533, "loss": 12.3192, "step": 12099 }, { "epoch": 0.6588933586544418, "grad_norm": 0.5779853695319296, "learning_rate": 0.000156543807684445, "loss": 12.2956, "step": 12100 }, { "epoch": 0.6589478126510248, "grad_norm": 0.6231852103374036, "learning_rate": 0.00015653653424684255, "loss": 12.2494, "step": 12101 }, { "epoch": 0.6590022666476077, "grad_norm": 0.6667754409013602, "learning_rate": 0.0001565292603696025, "loss": 12.3361, "step": 12102 }, { "epoch": 0.6590567206441907, "grad_norm": 0.5401924422265222, "learning_rate": 0.0001565219860527815, "loss": 12.2276, "step": 12103 }, { "epoch": 0.6591111746407738, "grad_norm": 0.6918791259186888, "learning_rate": 0.00015651471129643602, "loss": 12.4344, "step": 12104 }, { "epoch": 0.6591656286373568, "grad_norm": 0.6604993076391991, "learning_rate": 0.0001565074361006227, "loss": 12.134, "step": 12105 }, { "epoch": 0.6592200826339398, "grad_norm": 0.5515241712824276, "learning_rate": 0.00015650016046539806, "loss": 12.3196, "step": 12106 }, { "epoch": 0.6592745366305228, "grad_norm": 0.6419301595352817, "learning_rate": 0.00015649288439081868, "loss": 12.0543, "step": 12107 }, { "epoch": 0.6593289906271058, "grad_norm": 0.6839742489781926, "learning_rate": 0.00015648560787694118, "loss": 12.337, "step": 12108 }, { "epoch": 0.6593834446236888, "grad_norm": 0.5489400548407735, "learning_rate": 0.0001564783309238221, "loss": 12.3295, "step": 12109 }, { "epoch": 0.6594378986202719, "grad_norm": 0.5588116094373012, "learning_rate": 0.0001564710535315181, "loss": 12.3351, "step": 12110 }, { "epoch": 0.6594923526168549, "grad_norm": 0.6258296469166352, "learning_rate": 0.00015646377570008565, "loss": 12.3593, "step": 12111 }, { "epoch": 0.6595468066134379, "grad_norm": 0.5714303544699514, "learning_rate": 0.00015645649742958146, "loss": 12.2158, "step": 12112 }, { "epoch": 0.6596012606100209, "grad_norm": 0.5690123486052074, "learning_rate": 0.00015644921872006205, "loss": 12.3059, "step": 12113 }, { "epoch": 0.6596557146066039, "grad_norm": 0.5998350963031981, "learning_rate": 0.00015644193957158406, "loss": 12.2973, "step": 12114 }, { "epoch": 0.6597101686031869, "grad_norm": 0.6126527081622569, "learning_rate": 0.0001564346599842041, "loss": 12.3787, "step": 12115 }, { "epoch": 0.65976462259977, "grad_norm": 0.6175210777858374, "learning_rate": 0.00015642737995797873, "loss": 12.3521, "step": 12116 }, { "epoch": 0.659819076596353, "grad_norm": 0.6616911257059388, "learning_rate": 0.0001564200994929646, "loss": 12.1806, "step": 12117 }, { "epoch": 0.659873530592936, "grad_norm": 0.62720735050687, "learning_rate": 0.00015641281858921833, "loss": 12.34, "step": 12118 }, { "epoch": 0.659927984589519, "grad_norm": 0.5898731792627565, "learning_rate": 0.00015640553724679648, "loss": 12.2155, "step": 12119 }, { "epoch": 0.659982438586102, "grad_norm": 0.6070751981197668, "learning_rate": 0.00015639825546575576, "loss": 12.1565, "step": 12120 }, { "epoch": 0.6600368925826849, "grad_norm": 0.6412845745278387, "learning_rate": 0.00015639097324615273, "loss": 12.5173, "step": 12121 }, { "epoch": 0.660091346579268, "grad_norm": 0.6269784738662666, "learning_rate": 0.00015638369058804404, "loss": 12.2585, "step": 12122 }, { "epoch": 0.660145800575851, "grad_norm": 0.5692248201202506, "learning_rate": 0.0001563764074914863, "loss": 12.1797, "step": 12123 }, { "epoch": 0.660200254572434, "grad_norm": 0.5412511925083626, "learning_rate": 0.00015636912395653618, "loss": 12.3225, "step": 12124 }, { "epoch": 0.660254708569017, "grad_norm": 0.5678289684620178, "learning_rate": 0.00015636183998325026, "loss": 12.332, "step": 12125 }, { "epoch": 0.6603091625656, "grad_norm": 0.6016993154089291, "learning_rate": 0.00015635455557168527, "loss": 12.3073, "step": 12126 }, { "epoch": 0.660363616562183, "grad_norm": 0.6483298042378929, "learning_rate": 0.00015634727072189782, "loss": 12.2413, "step": 12127 }, { "epoch": 0.6604180705587661, "grad_norm": 0.5863492923591763, "learning_rate": 0.00015633998543394448, "loss": 12.2328, "step": 12128 }, { "epoch": 0.6604725245553491, "grad_norm": 0.5801873293209294, "learning_rate": 0.00015633269970788201, "loss": 12.3545, "step": 12129 }, { "epoch": 0.6605269785519321, "grad_norm": 0.5478890482606156, "learning_rate": 0.00015632541354376698, "loss": 12.3379, "step": 12130 }, { "epoch": 0.6605814325485151, "grad_norm": 0.5774737062533749, "learning_rate": 0.00015631812694165612, "loss": 12.3044, "step": 12131 }, { "epoch": 0.6606358865450981, "grad_norm": 0.5952639698733143, "learning_rate": 0.00015631083990160605, "loss": 12.2476, "step": 12132 }, { "epoch": 0.6606903405416811, "grad_norm": 0.523753329481436, "learning_rate": 0.00015630355242367348, "loss": 12.1817, "step": 12133 }, { "epoch": 0.6607447945382642, "grad_norm": 0.5474573088759221, "learning_rate": 0.000156296264507915, "loss": 12.2149, "step": 12134 }, { "epoch": 0.6607992485348472, "grad_norm": 0.6304369889579475, "learning_rate": 0.0001562889761543873, "loss": 12.4091, "step": 12135 }, { "epoch": 0.6608537025314302, "grad_norm": 0.6699170088289395, "learning_rate": 0.00015628168736314717, "loss": 12.4412, "step": 12136 }, { "epoch": 0.6609081565280132, "grad_norm": 0.5538506368520014, "learning_rate": 0.00015627439813425115, "loss": 12.282, "step": 12137 }, { "epoch": 0.6609626105245962, "grad_norm": 0.5978091609740449, "learning_rate": 0.00015626710846775596, "loss": 12.3531, "step": 12138 }, { "epoch": 0.6610170645211793, "grad_norm": 0.5623515777462447, "learning_rate": 0.00015625981836371833, "loss": 12.3091, "step": 12139 }, { "epoch": 0.6610715185177622, "grad_norm": 0.5864998538286688, "learning_rate": 0.0001562525278221949, "loss": 12.1195, "step": 12140 }, { "epoch": 0.6611259725143452, "grad_norm": 0.5896243382799916, "learning_rate": 0.00015624523684324237, "loss": 12.2116, "step": 12141 }, { "epoch": 0.6611804265109282, "grad_norm": 0.5491996186435824, "learning_rate": 0.0001562379454269175, "loss": 12.2354, "step": 12142 }, { "epoch": 0.6612348805075112, "grad_norm": 0.6777309207922793, "learning_rate": 0.00015623065357327684, "loss": 12.2775, "step": 12143 }, { "epoch": 0.6612893345040942, "grad_norm": 0.6302362156115535, "learning_rate": 0.00015622336128237726, "loss": 12.1918, "step": 12144 }, { "epoch": 0.6613437885006773, "grad_norm": 0.650374195038451, "learning_rate": 0.00015621606855427538, "loss": 12.3677, "step": 12145 }, { "epoch": 0.6613982424972603, "grad_norm": 0.5932691651104086, "learning_rate": 0.0001562087753890279, "loss": 12.3541, "step": 12146 }, { "epoch": 0.6614526964938433, "grad_norm": 0.5680699252633874, "learning_rate": 0.00015620148178669161, "loss": 12.3498, "step": 12147 }, { "epoch": 0.6615071504904263, "grad_norm": 0.6155619498335252, "learning_rate": 0.0001561941877473231, "loss": 12.195, "step": 12148 }, { "epoch": 0.6615616044870093, "grad_norm": 0.6051882680021209, "learning_rate": 0.0001561868932709792, "loss": 12.1634, "step": 12149 }, { "epoch": 0.6616160584835923, "grad_norm": 0.5986268325000902, "learning_rate": 0.00015617959835771662, "loss": 12.3676, "step": 12150 }, { "epoch": 0.6616705124801754, "grad_norm": 0.5727966647803312, "learning_rate": 0.00015617230300759202, "loss": 12.2913, "step": 12151 }, { "epoch": 0.6617249664767584, "grad_norm": 0.5545194798519972, "learning_rate": 0.00015616500722066219, "loss": 12.377, "step": 12152 }, { "epoch": 0.6617794204733414, "grad_norm": 0.5641449445635649, "learning_rate": 0.00015615771099698384, "loss": 12.2211, "step": 12153 }, { "epoch": 0.6618338744699244, "grad_norm": 0.60254720612745, "learning_rate": 0.0001561504143366137, "loss": 12.2442, "step": 12154 }, { "epoch": 0.6618883284665074, "grad_norm": 0.5907651025492713, "learning_rate": 0.00015614311723960854, "loss": 12.35, "step": 12155 }, { "epoch": 0.6619427824630904, "grad_norm": 0.708172101181777, "learning_rate": 0.0001561358197060251, "loss": 12.3802, "step": 12156 }, { "epoch": 0.6619972364596735, "grad_norm": 0.5675398599531235, "learning_rate": 0.0001561285217359201, "loss": 12.3175, "step": 12157 }, { "epoch": 0.6620516904562564, "grad_norm": 0.6494733439949993, "learning_rate": 0.0001561212233293503, "loss": 12.2998, "step": 12158 }, { "epoch": 0.6621061444528394, "grad_norm": 0.5949033966930486, "learning_rate": 0.00015611392448637243, "loss": 12.2403, "step": 12159 }, { "epoch": 0.6621605984494224, "grad_norm": 0.597294651572877, "learning_rate": 0.0001561066252070433, "loss": 12.3803, "step": 12160 }, { "epoch": 0.6622150524460054, "grad_norm": 0.6428094862347965, "learning_rate": 0.00015609932549141966, "loss": 12.4209, "step": 12161 }, { "epoch": 0.6622695064425884, "grad_norm": 0.6673986827425881, "learning_rate": 0.00015609202533955823, "loss": 12.3467, "step": 12162 }, { "epoch": 0.6623239604391715, "grad_norm": 0.5701563550210614, "learning_rate": 0.00015608472475151582, "loss": 12.1723, "step": 12163 }, { "epoch": 0.6623784144357545, "grad_norm": 0.627956929952825, "learning_rate": 0.00015607742372734915, "loss": 12.3973, "step": 12164 }, { "epoch": 0.6624328684323375, "grad_norm": 0.6038635446801585, "learning_rate": 0.00015607012226711507, "loss": 12.3268, "step": 12165 }, { "epoch": 0.6624873224289205, "grad_norm": 0.5473757396652543, "learning_rate": 0.0001560628203708703, "loss": 12.2702, "step": 12166 }, { "epoch": 0.6625417764255035, "grad_norm": 0.554151613594465, "learning_rate": 0.00015605551803867163, "loss": 12.2511, "step": 12167 }, { "epoch": 0.6625962304220866, "grad_norm": 0.6729198964343531, "learning_rate": 0.00015604821527057588, "loss": 12.4947, "step": 12168 }, { "epoch": 0.6626506844186696, "grad_norm": 0.6180742038416029, "learning_rate": 0.00015604091206663977, "loss": 12.301, "step": 12169 }, { "epoch": 0.6627051384152526, "grad_norm": 0.5713033910454256, "learning_rate": 0.00015603360842692015, "loss": 12.3176, "step": 12170 }, { "epoch": 0.6627595924118356, "grad_norm": 0.599816381001002, "learning_rate": 0.0001560263043514738, "loss": 12.2241, "step": 12171 }, { "epoch": 0.6628140464084186, "grad_norm": 0.5693130389885763, "learning_rate": 0.0001560189998403575, "loss": 12.353, "step": 12172 }, { "epoch": 0.6628685004050016, "grad_norm": 0.5702316741912086, "learning_rate": 0.00015601169489362805, "loss": 12.1861, "step": 12173 }, { "epoch": 0.6629229544015847, "grad_norm": 0.5621961876678578, "learning_rate": 0.0001560043895113423, "loss": 12.1976, "step": 12174 }, { "epoch": 0.6629774083981677, "grad_norm": 0.6012569929692483, "learning_rate": 0.000155997083693557, "loss": 12.238, "step": 12175 }, { "epoch": 0.6630318623947506, "grad_norm": 0.5302971343374439, "learning_rate": 0.00015598977744032898, "loss": 12.2912, "step": 12176 }, { "epoch": 0.6630863163913336, "grad_norm": 0.6054268327809826, "learning_rate": 0.00015598247075171507, "loss": 12.3391, "step": 12177 }, { "epoch": 0.6631407703879166, "grad_norm": 0.6130891768269731, "learning_rate": 0.00015597516362777212, "loss": 12.3841, "step": 12178 }, { "epoch": 0.6631952243844996, "grad_norm": 0.595933876472692, "learning_rate": 0.00015596785606855686, "loss": 12.3136, "step": 12179 }, { "epoch": 0.6632496783810827, "grad_norm": 0.5843406076220462, "learning_rate": 0.00015596054807412617, "loss": 12.1744, "step": 12180 }, { "epoch": 0.6633041323776657, "grad_norm": 0.6031736206179926, "learning_rate": 0.00015595323964453687, "loss": 12.287, "step": 12181 }, { "epoch": 0.6633585863742487, "grad_norm": 0.599656465734208, "learning_rate": 0.00015594593077984583, "loss": 12.2659, "step": 12182 }, { "epoch": 0.6634130403708317, "grad_norm": 0.5738100710286648, "learning_rate": 0.00015593862148010983, "loss": 12.417, "step": 12183 }, { "epoch": 0.6634674943674147, "grad_norm": 0.6222520183921914, "learning_rate": 0.0001559313117453857, "loss": 12.3374, "step": 12184 }, { "epoch": 0.6635219483639977, "grad_norm": 0.543589456981991, "learning_rate": 0.00015592400157573034, "loss": 12.2794, "step": 12185 }, { "epoch": 0.6635764023605808, "grad_norm": 0.6115018237920168, "learning_rate": 0.00015591669097120056, "loss": 12.2219, "step": 12186 }, { "epoch": 0.6636308563571638, "grad_norm": 0.5458482281292727, "learning_rate": 0.00015590937993185323, "loss": 12.2878, "step": 12187 }, { "epoch": 0.6636853103537468, "grad_norm": 0.5843729540859018, "learning_rate": 0.0001559020684577452, "loss": 12.3356, "step": 12188 }, { "epoch": 0.6637397643503298, "grad_norm": 0.6558099047970367, "learning_rate": 0.00015589475654893326, "loss": 12.1897, "step": 12189 }, { "epoch": 0.6637942183469128, "grad_norm": 0.5952426948130463, "learning_rate": 0.00015588744420547433, "loss": 12.3942, "step": 12190 }, { "epoch": 0.6638486723434958, "grad_norm": 0.5839362546215011, "learning_rate": 0.0001558801314274253, "loss": 12.4069, "step": 12191 }, { "epoch": 0.6639031263400789, "grad_norm": 0.8111332595455524, "learning_rate": 0.00015587281821484295, "loss": 12.4592, "step": 12192 }, { "epoch": 0.6639575803366619, "grad_norm": 0.5717171049299086, "learning_rate": 0.00015586550456778424, "loss": 12.2548, "step": 12193 }, { "epoch": 0.6640120343332448, "grad_norm": 0.6587477339077662, "learning_rate": 0.00015585819048630597, "loss": 12.3692, "step": 12194 }, { "epoch": 0.6640664883298278, "grad_norm": 0.7716794105304356, "learning_rate": 0.00015585087597046505, "loss": 12.1079, "step": 12195 }, { "epoch": 0.6641209423264108, "grad_norm": 0.6176557578909904, "learning_rate": 0.00015584356102031833, "loss": 12.402, "step": 12196 }, { "epoch": 0.6641753963229938, "grad_norm": 0.860535049891481, "learning_rate": 0.00015583624563592275, "loss": 12.201, "step": 12197 }, { "epoch": 0.6642298503195769, "grad_norm": 0.6206160971273634, "learning_rate": 0.00015582892981733513, "loss": 12.32, "step": 12198 }, { "epoch": 0.6642843043161599, "grad_norm": 0.5984237246171267, "learning_rate": 0.00015582161356461242, "loss": 12.2633, "step": 12199 }, { "epoch": 0.6643387583127429, "grad_norm": 0.7078818570229009, "learning_rate": 0.00015581429687781147, "loss": 12.3191, "step": 12200 }, { "epoch": 0.6643932123093259, "grad_norm": 0.6105722808230988, "learning_rate": 0.00015580697975698917, "loss": 12.2233, "step": 12201 }, { "epoch": 0.6644476663059089, "grad_norm": 0.6439423699459289, "learning_rate": 0.00015579966220220247, "loss": 12.3645, "step": 12202 }, { "epoch": 0.664502120302492, "grad_norm": 0.6489363836579027, "learning_rate": 0.0001557923442135082, "loss": 12.3112, "step": 12203 }, { "epoch": 0.664556574299075, "grad_norm": 0.6137942271046831, "learning_rate": 0.00015578502579096336, "loss": 12.32, "step": 12204 }, { "epoch": 0.664611028295658, "grad_norm": 0.5813939230459977, "learning_rate": 0.00015577770693462475, "loss": 12.3516, "step": 12205 }, { "epoch": 0.664665482292241, "grad_norm": 0.7177450492577477, "learning_rate": 0.00015577038764454936, "loss": 12.2759, "step": 12206 }, { "epoch": 0.664719936288824, "grad_norm": 0.5908558121589046, "learning_rate": 0.00015576306792079408, "loss": 12.264, "step": 12207 }, { "epoch": 0.664774390285407, "grad_norm": 0.5941370823923019, "learning_rate": 0.00015575574776341582, "loss": 12.3181, "step": 12208 }, { "epoch": 0.6648288442819901, "grad_norm": 0.6088651213946604, "learning_rate": 0.00015574842717247154, "loss": 12.2413, "step": 12209 }, { "epoch": 0.6648832982785731, "grad_norm": 0.6926069129225368, "learning_rate": 0.00015574110614801812, "loss": 12.2957, "step": 12210 }, { "epoch": 0.6649377522751561, "grad_norm": 0.5973284373065706, "learning_rate": 0.00015573378469011252, "loss": 12.4042, "step": 12211 }, { "epoch": 0.664992206271739, "grad_norm": 0.6332900499532489, "learning_rate": 0.00015572646279881166, "loss": 12.3268, "step": 12212 }, { "epoch": 0.665046660268322, "grad_norm": 0.5964173395692406, "learning_rate": 0.0001557191404741725, "loss": 12.3616, "step": 12213 }, { "epoch": 0.665101114264905, "grad_norm": 0.5895920125238943, "learning_rate": 0.00015571181771625192, "loss": 12.3699, "step": 12214 }, { "epoch": 0.6651555682614881, "grad_norm": 0.6704097046230486, "learning_rate": 0.00015570449452510693, "loss": 12.502, "step": 12215 }, { "epoch": 0.6652100222580711, "grad_norm": 0.5794962651301736, "learning_rate": 0.00015569717090079444, "loss": 12.1452, "step": 12216 }, { "epoch": 0.6652644762546541, "grad_norm": 0.5920778321000353, "learning_rate": 0.0001556898468433714, "loss": 12.1568, "step": 12217 }, { "epoch": 0.6653189302512371, "grad_norm": 0.6403214063903778, "learning_rate": 0.0001556825223528948, "loss": 12.2718, "step": 12218 }, { "epoch": 0.6653733842478201, "grad_norm": 0.6007841160667045, "learning_rate": 0.00015567519742942153, "loss": 12.3545, "step": 12219 }, { "epoch": 0.6654278382444031, "grad_norm": 0.6186117578106466, "learning_rate": 0.00015566787207300863, "loss": 12.3221, "step": 12220 }, { "epoch": 0.6654822922409862, "grad_norm": 0.6571254360260127, "learning_rate": 0.000155660546283713, "loss": 12.4017, "step": 12221 }, { "epoch": 0.6655367462375692, "grad_norm": 0.7613889115243458, "learning_rate": 0.00015565322006159163, "loss": 12.3841, "step": 12222 }, { "epoch": 0.6655912002341522, "grad_norm": 0.6264136875700904, "learning_rate": 0.00015564589340670147, "loss": 12.3541, "step": 12223 }, { "epoch": 0.6656456542307352, "grad_norm": 0.5637844084922261, "learning_rate": 0.00015563856631909954, "loss": 12.1914, "step": 12224 }, { "epoch": 0.6657001082273182, "grad_norm": 0.5824889512283538, "learning_rate": 0.00015563123879884278, "loss": 12.3026, "step": 12225 }, { "epoch": 0.6657545622239012, "grad_norm": 0.6220760352001354, "learning_rate": 0.00015562391084598818, "loss": 12.3276, "step": 12226 }, { "epoch": 0.6658090162204843, "grad_norm": 0.5298688423843939, "learning_rate": 0.00015561658246059272, "loss": 12.2705, "step": 12227 }, { "epoch": 0.6658634702170673, "grad_norm": 0.5625501538555766, "learning_rate": 0.00015560925364271338, "loss": 12.4409, "step": 12228 }, { "epoch": 0.6659179242136503, "grad_norm": 0.6663848890519213, "learning_rate": 0.00015560192439240716, "loss": 12.2582, "step": 12229 }, { "epoch": 0.6659723782102333, "grad_norm": 0.5904031545317446, "learning_rate": 0.00015559459470973103, "loss": 12.1851, "step": 12230 }, { "epoch": 0.6660268322068162, "grad_norm": 0.5849650349945908, "learning_rate": 0.00015558726459474203, "loss": 12.2091, "step": 12231 }, { "epoch": 0.6660812862033992, "grad_norm": 0.5324786902705425, "learning_rate": 0.00015557993404749715, "loss": 12.2008, "step": 12232 }, { "epoch": 0.6661357401999823, "grad_norm": 0.5968945370069183, "learning_rate": 0.00015557260306805337, "loss": 12.4381, "step": 12233 }, { "epoch": 0.6661901941965653, "grad_norm": 0.5838181459219236, "learning_rate": 0.0001555652716564677, "loss": 12.2553, "step": 12234 }, { "epoch": 0.6662446481931483, "grad_norm": 0.5742301697411183, "learning_rate": 0.00015555793981279718, "loss": 12.2566, "step": 12235 }, { "epoch": 0.6662991021897313, "grad_norm": 0.5914965836401413, "learning_rate": 0.0001555506075370988, "loss": 12.3349, "step": 12236 }, { "epoch": 0.6663535561863143, "grad_norm": 0.6420592487637078, "learning_rate": 0.00015554327482942957, "loss": 12.2938, "step": 12237 }, { "epoch": 0.6664080101828974, "grad_norm": 0.5607050708073679, "learning_rate": 0.00015553594168984654, "loss": 12.2412, "step": 12238 }, { "epoch": 0.6664624641794804, "grad_norm": 0.5609018375686214, "learning_rate": 0.00015552860811840667, "loss": 12.2956, "step": 12239 }, { "epoch": 0.6665169181760634, "grad_norm": 0.5809352702800051, "learning_rate": 0.00015552127411516702, "loss": 12.2465, "step": 12240 }, { "epoch": 0.6665713721726464, "grad_norm": 0.5842398929846088, "learning_rate": 0.0001555139396801847, "loss": 12.2001, "step": 12241 }, { "epoch": 0.6666258261692294, "grad_norm": 0.5639530150176731, "learning_rate": 0.00015550660481351668, "loss": 12.3483, "step": 12242 }, { "epoch": 0.6666802801658124, "grad_norm": 0.5409246664466759, "learning_rate": 0.00015549926951521993, "loss": 12.306, "step": 12243 }, { "epoch": 0.6667347341623955, "grad_norm": 0.6340991968931381, "learning_rate": 0.0001554919337853516, "loss": 12.4283, "step": 12244 }, { "epoch": 0.6667891881589785, "grad_norm": 0.6306748542962477, "learning_rate": 0.00015548459762396863, "loss": 12.3561, "step": 12245 }, { "epoch": 0.6668436421555615, "grad_norm": 0.6224388651294798, "learning_rate": 0.00015547726103112817, "loss": 12.407, "step": 12246 }, { "epoch": 0.6668980961521445, "grad_norm": 0.524635684961415, "learning_rate": 0.00015546992400688724, "loss": 12.3067, "step": 12247 }, { "epoch": 0.6669525501487275, "grad_norm": 0.589540221009856, "learning_rate": 0.00015546258655130285, "loss": 12.2554, "step": 12248 }, { "epoch": 0.6670070041453104, "grad_norm": 0.5622070308010497, "learning_rate": 0.00015545524866443207, "loss": 12.1965, "step": 12249 }, { "epoch": 0.6670614581418935, "grad_norm": 0.5203199740512348, "learning_rate": 0.000155447910346332, "loss": 12.2916, "step": 12250 }, { "epoch": 0.6671159121384765, "grad_norm": 0.5798442999382576, "learning_rate": 0.00015544057159705966, "loss": 12.3144, "step": 12251 }, { "epoch": 0.6671703661350595, "grad_norm": 0.5390908971582956, "learning_rate": 0.00015543323241667216, "loss": 12.2795, "step": 12252 }, { "epoch": 0.6672248201316425, "grad_norm": 0.5963423017492206, "learning_rate": 0.00015542589280522658, "loss": 12.2141, "step": 12253 }, { "epoch": 0.6672792741282255, "grad_norm": 0.5845700390262353, "learning_rate": 0.0001554185527627799, "loss": 12.3106, "step": 12254 }, { "epoch": 0.6673337281248085, "grad_norm": 0.6140335245300805, "learning_rate": 0.0001554112122893893, "loss": 12.3444, "step": 12255 }, { "epoch": 0.6673881821213916, "grad_norm": 0.6129360063082528, "learning_rate": 0.00015540387138511177, "loss": 12.3694, "step": 12256 }, { "epoch": 0.6674426361179746, "grad_norm": 0.5833285955444938, "learning_rate": 0.0001553965300500045, "loss": 12.2542, "step": 12257 }, { "epoch": 0.6674970901145576, "grad_norm": 0.6353115649335048, "learning_rate": 0.0001553891882841245, "loss": 12.4273, "step": 12258 }, { "epoch": 0.6675515441111406, "grad_norm": 0.5726108332932701, "learning_rate": 0.00015538184608752888, "loss": 12.1541, "step": 12259 }, { "epoch": 0.6676059981077236, "grad_norm": 0.6688286875161022, "learning_rate": 0.00015537450346027475, "loss": 12.3407, "step": 12260 }, { "epoch": 0.6676604521043066, "grad_norm": 0.6635128458029128, "learning_rate": 0.0001553671604024192, "loss": 12.3662, "step": 12261 }, { "epoch": 0.6677149061008897, "grad_norm": 0.616898015401075, "learning_rate": 0.0001553598169140193, "loss": 12.2972, "step": 12262 }, { "epoch": 0.6677693600974727, "grad_norm": 0.6165516831846334, "learning_rate": 0.0001553524729951322, "loss": 12.1998, "step": 12263 }, { "epoch": 0.6678238140940557, "grad_norm": 0.5950312797869388, "learning_rate": 0.00015534512864581499, "loss": 12.1871, "step": 12264 }, { "epoch": 0.6678782680906387, "grad_norm": 0.5886345418949827, "learning_rate": 0.00015533778386612478, "loss": 12.3738, "step": 12265 }, { "epoch": 0.6679327220872217, "grad_norm": 0.5797917759980304, "learning_rate": 0.00015533043865611864, "loss": 12.259, "step": 12266 }, { "epoch": 0.6679871760838046, "grad_norm": 0.6897782017418148, "learning_rate": 0.0001553230930158538, "loss": 12.3921, "step": 12267 }, { "epoch": 0.6680416300803877, "grad_norm": 0.5643376226552791, "learning_rate": 0.00015531574694538727, "loss": 12.3513, "step": 12268 }, { "epoch": 0.6680960840769707, "grad_norm": 0.5793628655866306, "learning_rate": 0.00015530840044477619, "loss": 12.2912, "step": 12269 }, { "epoch": 0.6681505380735537, "grad_norm": 0.524965459789386, "learning_rate": 0.00015530105351407777, "loss": 12.2108, "step": 12270 }, { "epoch": 0.6682049920701367, "grad_norm": 0.588021490527018, "learning_rate": 0.00015529370615334904, "loss": 12.2733, "step": 12271 }, { "epoch": 0.6682594460667197, "grad_norm": 0.5918043083770076, "learning_rate": 0.0001552863583626472, "loss": 12.3017, "step": 12272 }, { "epoch": 0.6683139000633028, "grad_norm": 0.5349154769643079, "learning_rate": 0.00015527901014202936, "loss": 12.1857, "step": 12273 }, { "epoch": 0.6683683540598858, "grad_norm": 0.5826956440854895, "learning_rate": 0.00015527166149155268, "loss": 12.1369, "step": 12274 }, { "epoch": 0.6684228080564688, "grad_norm": 0.6401619839037013, "learning_rate": 0.00015526431241127427, "loss": 12.3254, "step": 12275 }, { "epoch": 0.6684772620530518, "grad_norm": 0.6293240232898998, "learning_rate": 0.0001552569629012513, "loss": 12.3404, "step": 12276 }, { "epoch": 0.6685317160496348, "grad_norm": 0.5878958409192327, "learning_rate": 0.00015524961296154092, "loss": 12.2896, "step": 12277 }, { "epoch": 0.6685861700462178, "grad_norm": 0.5434146624970057, "learning_rate": 0.0001552422625922003, "loss": 12.2185, "step": 12278 }, { "epoch": 0.6686406240428009, "grad_norm": 0.6564827281971444, "learning_rate": 0.00015523491179328657, "loss": 12.3515, "step": 12279 }, { "epoch": 0.6686950780393839, "grad_norm": 0.6563677584404193, "learning_rate": 0.0001552275605648569, "loss": 12.3971, "step": 12280 }, { "epoch": 0.6687495320359669, "grad_norm": 0.639262551518245, "learning_rate": 0.00015522020890696846, "loss": 12.2402, "step": 12281 }, { "epoch": 0.6688039860325499, "grad_norm": 0.7951500546832201, "learning_rate": 0.00015521285681967844, "loss": 12.2807, "step": 12282 }, { "epoch": 0.6688584400291329, "grad_norm": 0.599505447244044, "learning_rate": 0.00015520550430304393, "loss": 12.3153, "step": 12283 }, { "epoch": 0.6689128940257159, "grad_norm": 0.6209193433879819, "learning_rate": 0.0001551981513571222, "loss": 12.2535, "step": 12284 }, { "epoch": 0.668967348022299, "grad_norm": 0.7257855828873232, "learning_rate": 0.00015519079798197042, "loss": 12.321, "step": 12285 }, { "epoch": 0.669021802018882, "grad_norm": 0.5920719331977042, "learning_rate": 0.00015518344417764568, "loss": 12.2257, "step": 12286 }, { "epoch": 0.6690762560154649, "grad_norm": 0.6424559920639874, "learning_rate": 0.00015517608994420527, "loss": 12.2562, "step": 12287 }, { "epoch": 0.6691307100120479, "grad_norm": 0.7367637717941627, "learning_rate": 0.0001551687352817063, "loss": 12.4215, "step": 12288 }, { "epoch": 0.6691851640086309, "grad_norm": 0.6389986361996636, "learning_rate": 0.00015516138019020602, "loss": 12.3891, "step": 12289 }, { "epoch": 0.6692396180052139, "grad_norm": 0.7193207098366835, "learning_rate": 0.00015515402466976155, "loss": 12.4082, "step": 12290 }, { "epoch": 0.669294072001797, "grad_norm": 0.7145208288974416, "learning_rate": 0.00015514666872043015, "loss": 12.3602, "step": 12291 }, { "epoch": 0.66934852599838, "grad_norm": 0.6881150012113569, "learning_rate": 0.00015513931234226904, "loss": 12.4423, "step": 12292 }, { "epoch": 0.669402979994963, "grad_norm": 0.745635936605013, "learning_rate": 0.00015513195553533537, "loss": 12.3203, "step": 12293 }, { "epoch": 0.669457433991546, "grad_norm": 0.6162984481069275, "learning_rate": 0.00015512459829968638, "loss": 12.2663, "step": 12294 }, { "epoch": 0.669511887988129, "grad_norm": 0.6281275905245626, "learning_rate": 0.00015511724063537922, "loss": 12.3963, "step": 12295 }, { "epoch": 0.669566341984712, "grad_norm": 0.7733149049652096, "learning_rate": 0.00015510988254247121, "loss": 12.3302, "step": 12296 }, { "epoch": 0.6696207959812951, "grad_norm": 0.5780855891143521, "learning_rate": 0.0001551025240210195, "loss": 12.282, "step": 12297 }, { "epoch": 0.6696752499778781, "grad_norm": 0.6068608143213838, "learning_rate": 0.00015509516507108132, "loss": 12.212, "step": 12298 }, { "epoch": 0.6697297039744611, "grad_norm": 0.7063211921350746, "learning_rate": 0.00015508780569271387, "loss": 12.3415, "step": 12299 }, { "epoch": 0.6697841579710441, "grad_norm": 0.5802194256618435, "learning_rate": 0.00015508044588597442, "loss": 12.3357, "step": 12300 }, { "epoch": 0.6698386119676271, "grad_norm": 0.6195286163771041, "learning_rate": 0.0001550730856509202, "loss": 12.3546, "step": 12301 }, { "epoch": 0.6698930659642102, "grad_norm": 0.826825832697903, "learning_rate": 0.00015506572498760843, "loss": 12.3014, "step": 12302 }, { "epoch": 0.6699475199607932, "grad_norm": 0.6368084289094414, "learning_rate": 0.00015505836389609633, "loss": 12.3103, "step": 12303 }, { "epoch": 0.6700019739573762, "grad_norm": 0.6280506242277251, "learning_rate": 0.00015505100237644118, "loss": 12.1298, "step": 12304 }, { "epoch": 0.6700564279539591, "grad_norm": 0.6526406731752941, "learning_rate": 0.0001550436404287002, "loss": 12.4316, "step": 12305 }, { "epoch": 0.6701108819505421, "grad_norm": 0.6004196297894798, "learning_rate": 0.0001550362780529306, "loss": 12.3352, "step": 12306 }, { "epoch": 0.6701653359471251, "grad_norm": 0.6262109885265384, "learning_rate": 0.00015502891524918973, "loss": 12.215, "step": 12307 }, { "epoch": 0.6702197899437082, "grad_norm": 0.6828665022831916, "learning_rate": 0.00015502155201753477, "loss": 12.3009, "step": 12308 }, { "epoch": 0.6702742439402912, "grad_norm": 0.5860601334328157, "learning_rate": 0.000155014188358023, "loss": 12.3318, "step": 12309 }, { "epoch": 0.6703286979368742, "grad_norm": 0.6511973526521048, "learning_rate": 0.00015500682427071162, "loss": 12.4775, "step": 12310 }, { "epoch": 0.6703831519334572, "grad_norm": 0.5563454571783759, "learning_rate": 0.000154999459755658, "loss": 12.2955, "step": 12311 }, { "epoch": 0.6704376059300402, "grad_norm": 0.5392087483489257, "learning_rate": 0.00015499209481291935, "loss": 12.2512, "step": 12312 }, { "epoch": 0.6704920599266232, "grad_norm": 0.5933980380777396, "learning_rate": 0.00015498472944255296, "loss": 12.2228, "step": 12313 }, { "epoch": 0.6705465139232063, "grad_norm": 0.578197621670821, "learning_rate": 0.0001549773636446161, "loss": 12.4391, "step": 12314 }, { "epoch": 0.6706009679197893, "grad_norm": 0.5833579940210534, "learning_rate": 0.000154969997419166, "loss": 12.443, "step": 12315 }, { "epoch": 0.6706554219163723, "grad_norm": 0.5603252629323376, "learning_rate": 0.00015496263076626, "loss": 12.1525, "step": 12316 }, { "epoch": 0.6707098759129553, "grad_norm": 0.5888360644780639, "learning_rate": 0.0001549552636859554, "loss": 12.2737, "step": 12317 }, { "epoch": 0.6707643299095383, "grad_norm": 0.6644092677788832, "learning_rate": 0.00015494789617830938, "loss": 12.1984, "step": 12318 }, { "epoch": 0.6708187839061213, "grad_norm": 0.5836970411791585, "learning_rate": 0.00015494052824337938, "loss": 12.3877, "step": 12319 }, { "epoch": 0.6708732379027044, "grad_norm": 0.6424242091880643, "learning_rate": 0.00015493315988122258, "loss": 12.3825, "step": 12320 }, { "epoch": 0.6709276918992874, "grad_norm": 0.6002102631739418, "learning_rate": 0.00015492579109189632, "loss": 12.1623, "step": 12321 }, { "epoch": 0.6709821458958704, "grad_norm": 0.6137470092392461, "learning_rate": 0.0001549184218754579, "loss": 12.3687, "step": 12322 }, { "epoch": 0.6710365998924533, "grad_norm": 0.5601924573585031, "learning_rate": 0.00015491105223196464, "loss": 12.2654, "step": 12323 }, { "epoch": 0.6710910538890363, "grad_norm": 0.6533100680065654, "learning_rate": 0.00015490368216147378, "loss": 12.3328, "step": 12324 }, { "epoch": 0.6711455078856193, "grad_norm": 0.5571867758560064, "learning_rate": 0.00015489631166404273, "loss": 12.2228, "step": 12325 }, { "epoch": 0.6711999618822024, "grad_norm": 0.6720500084489055, "learning_rate": 0.0001548889407397287, "loss": 12.3046, "step": 12326 }, { "epoch": 0.6712544158787854, "grad_norm": 0.5338742799875239, "learning_rate": 0.00015488156938858912, "loss": 12.2962, "step": 12327 }, { "epoch": 0.6713088698753684, "grad_norm": 0.6713263288393614, "learning_rate": 0.00015487419761068126, "loss": 12.3337, "step": 12328 }, { "epoch": 0.6713633238719514, "grad_norm": 0.6455710139817895, "learning_rate": 0.00015486682540606238, "loss": 12.364, "step": 12329 }, { "epoch": 0.6714177778685344, "grad_norm": 0.535391671053815, "learning_rate": 0.00015485945277478989, "loss": 12.3494, "step": 12330 }, { "epoch": 0.6714722318651174, "grad_norm": 0.6048302286197477, "learning_rate": 0.0001548520797169211, "loss": 12.2211, "step": 12331 }, { "epoch": 0.6715266858617005, "grad_norm": 0.5674125889055247, "learning_rate": 0.00015484470623251332, "loss": 12.1535, "step": 12332 }, { "epoch": 0.6715811398582835, "grad_norm": 0.5262918147403375, "learning_rate": 0.00015483733232162395, "loss": 12.1773, "step": 12333 }, { "epoch": 0.6716355938548665, "grad_norm": 0.5512608317557834, "learning_rate": 0.00015482995798431025, "loss": 12.2709, "step": 12334 }, { "epoch": 0.6716900478514495, "grad_norm": 0.5518915582374262, "learning_rate": 0.0001548225832206296, "loss": 12.2212, "step": 12335 }, { "epoch": 0.6717445018480325, "grad_norm": 0.563258202916151, "learning_rate": 0.00015481520803063936, "loss": 12.3136, "step": 12336 }, { "epoch": 0.6717989558446156, "grad_norm": 0.5866718369655832, "learning_rate": 0.0001548078324143969, "loss": 12.3299, "step": 12337 }, { "epoch": 0.6718534098411986, "grad_norm": 0.6200280583867798, "learning_rate": 0.0001548004563719595, "loss": 12.2475, "step": 12338 }, { "epoch": 0.6719078638377816, "grad_norm": 0.6078855752892216, "learning_rate": 0.00015479307990338457, "loss": 12.2136, "step": 12339 }, { "epoch": 0.6719623178343646, "grad_norm": 0.6314598056079747, "learning_rate": 0.00015478570300872947, "loss": 12.2619, "step": 12340 }, { "epoch": 0.6720167718309475, "grad_norm": 0.5881620181870542, "learning_rate": 0.00015477832568805156, "loss": 12.3938, "step": 12341 }, { "epoch": 0.6720712258275305, "grad_norm": 0.6373593538818688, "learning_rate": 0.00015477094794140821, "loss": 12.2626, "step": 12342 }, { "epoch": 0.6721256798241136, "grad_norm": 0.5922390028924963, "learning_rate": 0.00015476356976885675, "loss": 12.3762, "step": 12343 }, { "epoch": 0.6721801338206966, "grad_norm": 0.6446399456565183, "learning_rate": 0.0001547561911704546, "loss": 12.358, "step": 12344 }, { "epoch": 0.6722345878172796, "grad_norm": 0.627724909475266, "learning_rate": 0.00015474881214625917, "loss": 12.3067, "step": 12345 }, { "epoch": 0.6722890418138626, "grad_norm": 0.6242450041274252, "learning_rate": 0.00015474143269632773, "loss": 12.3418, "step": 12346 }, { "epoch": 0.6723434958104456, "grad_norm": 0.5695089624131952, "learning_rate": 0.00015473405282071776, "loss": 12.315, "step": 12347 }, { "epoch": 0.6723979498070286, "grad_norm": 0.6649335811814701, "learning_rate": 0.00015472667251948663, "loss": 12.4619, "step": 12348 }, { "epoch": 0.6724524038036117, "grad_norm": 0.6151808744087764, "learning_rate": 0.0001547192917926917, "loss": 12.2973, "step": 12349 }, { "epoch": 0.6725068578001947, "grad_norm": 0.5903380108730035, "learning_rate": 0.00015471191064039038, "loss": 12.2254, "step": 12350 }, { "epoch": 0.6725613117967777, "grad_norm": 0.5857314014547613, "learning_rate": 0.00015470452906264005, "loss": 12.3142, "step": 12351 }, { "epoch": 0.6726157657933607, "grad_norm": 0.5376113152177633, "learning_rate": 0.00015469714705949815, "loss": 12.2964, "step": 12352 }, { "epoch": 0.6726702197899437, "grad_norm": 0.5985240941302591, "learning_rate": 0.00015468976463102208, "loss": 12.323, "step": 12353 }, { "epoch": 0.6727246737865267, "grad_norm": 0.5841038750145867, "learning_rate": 0.0001546823817772692, "loss": 12.221, "step": 12354 }, { "epoch": 0.6727791277831098, "grad_norm": 0.5958397562184997, "learning_rate": 0.00015467499849829697, "loss": 12.3587, "step": 12355 }, { "epoch": 0.6728335817796928, "grad_norm": 0.6544013573600018, "learning_rate": 0.00015466761479416275, "loss": 12.2337, "step": 12356 }, { "epoch": 0.6728880357762758, "grad_norm": 0.7357460786214085, "learning_rate": 0.00015466023066492402, "loss": 12.3293, "step": 12357 }, { "epoch": 0.6729424897728588, "grad_norm": 0.53074594411693, "learning_rate": 0.00015465284611063815, "loss": 12.2764, "step": 12358 }, { "epoch": 0.6729969437694417, "grad_norm": 0.6640114712340902, "learning_rate": 0.0001546454611313626, "loss": 12.2647, "step": 12359 }, { "epoch": 0.6730513977660247, "grad_norm": 0.5867100703731516, "learning_rate": 0.0001546380757271548, "loss": 12.3141, "step": 12360 }, { "epoch": 0.6731058517626078, "grad_norm": 0.6114819001091513, "learning_rate": 0.0001546306898980721, "loss": 12.4673, "step": 12361 }, { "epoch": 0.6731603057591908, "grad_norm": 0.5919784984722599, "learning_rate": 0.00015462330364417203, "loss": 12.3433, "step": 12362 }, { "epoch": 0.6732147597557738, "grad_norm": 0.6407954340101385, "learning_rate": 0.00015461591696551202, "loss": 12.1382, "step": 12363 }, { "epoch": 0.6732692137523568, "grad_norm": 0.569920810338462, "learning_rate": 0.00015460852986214942, "loss": 12.3783, "step": 12364 }, { "epoch": 0.6733236677489398, "grad_norm": 0.5870295077894265, "learning_rate": 0.00015460114233414177, "loss": 12.3657, "step": 12365 }, { "epoch": 0.6733781217455228, "grad_norm": 0.6636458321676164, "learning_rate": 0.00015459375438154645, "loss": 12.3468, "step": 12366 }, { "epoch": 0.6734325757421059, "grad_norm": 0.6058518127129257, "learning_rate": 0.00015458636600442098, "loss": 12.3653, "step": 12367 }, { "epoch": 0.6734870297386889, "grad_norm": 0.5852369575195829, "learning_rate": 0.00015457897720282278, "loss": 12.31, "step": 12368 }, { "epoch": 0.6735414837352719, "grad_norm": 0.6882324799735526, "learning_rate": 0.00015457158797680927, "loss": 12.2676, "step": 12369 }, { "epoch": 0.6735959377318549, "grad_norm": 0.6390571986339338, "learning_rate": 0.00015456419832643795, "loss": 12.289, "step": 12370 }, { "epoch": 0.6736503917284379, "grad_norm": 0.5658026855278877, "learning_rate": 0.00015455680825176624, "loss": 12.2175, "step": 12371 }, { "epoch": 0.673704845725021, "grad_norm": 0.6516117326196103, "learning_rate": 0.00015454941775285168, "loss": 12.3273, "step": 12372 }, { "epoch": 0.673759299721604, "grad_norm": 0.6616815047039033, "learning_rate": 0.0001545420268297517, "loss": 12.3157, "step": 12373 }, { "epoch": 0.673813753718187, "grad_norm": 0.5321409149239987, "learning_rate": 0.00015453463548252376, "loss": 12.3777, "step": 12374 }, { "epoch": 0.67386820771477, "grad_norm": 0.5706921321363255, "learning_rate": 0.00015452724371122536, "loss": 12.3715, "step": 12375 }, { "epoch": 0.673922661711353, "grad_norm": 0.6419548750079106, "learning_rate": 0.00015451985151591395, "loss": 12.4302, "step": 12376 }, { "epoch": 0.673977115707936, "grad_norm": 0.6001667031937706, "learning_rate": 0.00015451245889664701, "loss": 12.3792, "step": 12377 }, { "epoch": 0.674031569704519, "grad_norm": 0.6269987634290967, "learning_rate": 0.00015450506585348213, "loss": 12.2882, "step": 12378 }, { "epoch": 0.674086023701102, "grad_norm": 0.6149717814168466, "learning_rate": 0.00015449767238647665, "loss": 12.458, "step": 12379 }, { "epoch": 0.674140477697685, "grad_norm": 0.6305814591670665, "learning_rate": 0.00015449027849568815, "loss": 12.1985, "step": 12380 }, { "epoch": 0.674194931694268, "grad_norm": 0.6317839199734646, "learning_rate": 0.0001544828841811741, "loss": 12.3741, "step": 12381 }, { "epoch": 0.674249385690851, "grad_norm": 0.7296820767208071, "learning_rate": 0.00015447548944299202, "loss": 12.4499, "step": 12382 }, { "epoch": 0.674303839687434, "grad_norm": 0.760502440648826, "learning_rate": 0.00015446809428119938, "loss": 12.4208, "step": 12383 }, { "epoch": 0.6743582936840171, "grad_norm": 0.6032206376300737, "learning_rate": 0.0001544606986958537, "loss": 12.3193, "step": 12384 }, { "epoch": 0.6744127476806001, "grad_norm": 0.651116057178426, "learning_rate": 0.00015445330268701253, "loss": 12.3101, "step": 12385 }, { "epoch": 0.6744672016771831, "grad_norm": 0.6649409317668227, "learning_rate": 0.00015444590625473333, "loss": 12.3424, "step": 12386 }, { "epoch": 0.6745216556737661, "grad_norm": 0.6269635487637647, "learning_rate": 0.0001544385093990736, "loss": 12.3125, "step": 12387 }, { "epoch": 0.6745761096703491, "grad_norm": 0.6004238287073058, "learning_rate": 0.00015443111212009095, "loss": 12.2203, "step": 12388 }, { "epoch": 0.6746305636669321, "grad_norm": 0.636585741622858, "learning_rate": 0.00015442371441784278, "loss": 12.3283, "step": 12389 }, { "epoch": 0.6746850176635152, "grad_norm": 0.6314743975811515, "learning_rate": 0.0001544163162923867, "loss": 12.3442, "step": 12390 }, { "epoch": 0.6747394716600982, "grad_norm": 0.5799282392051064, "learning_rate": 0.00015440891774378025, "loss": 12.4539, "step": 12391 }, { "epoch": 0.6747939256566812, "grad_norm": 0.7255747829870278, "learning_rate": 0.0001544015187720809, "loss": 12.2881, "step": 12392 }, { "epoch": 0.6748483796532642, "grad_norm": 0.5439656129914802, "learning_rate": 0.00015439411937734625, "loss": 12.124, "step": 12393 }, { "epoch": 0.6749028336498472, "grad_norm": 0.6537594353864004, "learning_rate": 0.0001543867195596338, "loss": 12.1749, "step": 12394 }, { "epoch": 0.6749572876464301, "grad_norm": 0.6631492345005819, "learning_rate": 0.00015437931931900107, "loss": 12.2417, "step": 12395 }, { "epoch": 0.6750117416430133, "grad_norm": 0.561066732269436, "learning_rate": 0.00015437191865550563, "loss": 12.2644, "step": 12396 }, { "epoch": 0.6750661956395962, "grad_norm": 0.5838921948641176, "learning_rate": 0.00015436451756920504, "loss": 12.29, "step": 12397 }, { "epoch": 0.6751206496361792, "grad_norm": 0.6094728089312846, "learning_rate": 0.0001543571160601569, "loss": 12.3605, "step": 12398 }, { "epoch": 0.6751751036327622, "grad_norm": 0.5575270120681592, "learning_rate": 0.00015434971412841867, "loss": 12.2974, "step": 12399 }, { "epoch": 0.6752295576293452, "grad_norm": 0.6325187505651708, "learning_rate": 0.0001543423117740479, "loss": 12.2801, "step": 12400 }, { "epoch": 0.6752840116259282, "grad_norm": 0.6472180003870803, "learning_rate": 0.00015433490899710223, "loss": 12.3826, "step": 12401 }, { "epoch": 0.6753384656225113, "grad_norm": 0.5868907719763805, "learning_rate": 0.0001543275057976392, "loss": 12.2332, "step": 12402 }, { "epoch": 0.6753929196190943, "grad_norm": 0.6427595911096574, "learning_rate": 0.0001543201021757164, "loss": 12.4348, "step": 12403 }, { "epoch": 0.6754473736156773, "grad_norm": 0.6668338028229127, "learning_rate": 0.00015431269813139138, "loss": 12.3388, "step": 12404 }, { "epoch": 0.6755018276122603, "grad_norm": 0.5775714167901217, "learning_rate": 0.00015430529366472167, "loss": 12.3564, "step": 12405 }, { "epoch": 0.6755562816088433, "grad_norm": 0.5332194175715848, "learning_rate": 0.0001542978887757649, "loss": 12.267, "step": 12406 }, { "epoch": 0.6756107356054264, "grad_norm": 0.5890769707710252, "learning_rate": 0.00015429048346457863, "loss": 12.3213, "step": 12407 }, { "epoch": 0.6756651896020094, "grad_norm": 0.6323650116011595, "learning_rate": 0.0001542830777312205, "loss": 12.289, "step": 12408 }, { "epoch": 0.6757196435985924, "grad_norm": 0.6041259921360344, "learning_rate": 0.00015427567157574802, "loss": 12.172, "step": 12409 }, { "epoch": 0.6757740975951754, "grad_norm": 0.6035886166991528, "learning_rate": 0.00015426826499821884, "loss": 12.1674, "step": 12410 }, { "epoch": 0.6758285515917584, "grad_norm": 0.5608004708899101, "learning_rate": 0.00015426085799869052, "loss": 12.308, "step": 12411 }, { "epoch": 0.6758830055883414, "grad_norm": 0.7775669709518196, "learning_rate": 0.00015425345057722064, "loss": 12.446, "step": 12412 }, { "epoch": 0.6759374595849245, "grad_norm": 0.598766215338868, "learning_rate": 0.0001542460427338669, "loss": 12.3271, "step": 12413 }, { "epoch": 0.6759919135815075, "grad_norm": 0.5941778258183403, "learning_rate": 0.00015423863446868677, "loss": 12.2538, "step": 12414 }, { "epoch": 0.6760463675780904, "grad_norm": 0.5730630987233022, "learning_rate": 0.00015423122578173793, "loss": 12.2467, "step": 12415 }, { "epoch": 0.6761008215746734, "grad_norm": 0.6393883467564998, "learning_rate": 0.000154223816673078, "loss": 12.3639, "step": 12416 }, { "epoch": 0.6761552755712564, "grad_norm": 0.5958537515290336, "learning_rate": 0.00015421640714276459, "loss": 12.2933, "step": 12417 }, { "epoch": 0.6762097295678394, "grad_norm": 0.5473205329362438, "learning_rate": 0.00015420899719085528, "loss": 12.1924, "step": 12418 }, { "epoch": 0.6762641835644225, "grad_norm": 0.6135898548407824, "learning_rate": 0.00015420158681740773, "loss": 12.3284, "step": 12419 }, { "epoch": 0.6763186375610055, "grad_norm": 0.6937169434880651, "learning_rate": 0.00015419417602247955, "loss": 12.3112, "step": 12420 }, { "epoch": 0.6763730915575885, "grad_norm": 0.5813587951963771, "learning_rate": 0.00015418676480612837, "loss": 12.2987, "step": 12421 }, { "epoch": 0.6764275455541715, "grad_norm": 0.5634384409012385, "learning_rate": 0.00015417935316841178, "loss": 12.2539, "step": 12422 }, { "epoch": 0.6764819995507545, "grad_norm": 0.6025612760466049, "learning_rate": 0.0001541719411093875, "loss": 12.1718, "step": 12423 }, { "epoch": 0.6765364535473375, "grad_norm": 0.5833255778249352, "learning_rate": 0.00015416452862911312, "loss": 12.2218, "step": 12424 }, { "epoch": 0.6765909075439206, "grad_norm": 0.5830101697665969, "learning_rate": 0.00015415711572764628, "loss": 12.2757, "step": 12425 }, { "epoch": 0.6766453615405036, "grad_norm": 0.6382187491294317, "learning_rate": 0.00015414970240504463, "loss": 12.1939, "step": 12426 }, { "epoch": 0.6766998155370866, "grad_norm": 0.6245430074794555, "learning_rate": 0.0001541422886613658, "loss": 12.3249, "step": 12427 }, { "epoch": 0.6767542695336696, "grad_norm": 0.5902115680820347, "learning_rate": 0.00015413487449666746, "loss": 12.3674, "step": 12428 }, { "epoch": 0.6768087235302526, "grad_norm": 0.5595368427708826, "learning_rate": 0.00015412745991100722, "loss": 12.2548, "step": 12429 }, { "epoch": 0.6768631775268356, "grad_norm": 0.7746748718973765, "learning_rate": 0.00015412004490444282, "loss": 12.4776, "step": 12430 }, { "epoch": 0.6769176315234187, "grad_norm": 0.5850132877825888, "learning_rate": 0.00015411262947703186, "loss": 12.2565, "step": 12431 }, { "epoch": 0.6769720855200017, "grad_norm": 0.5774462724432337, "learning_rate": 0.00015410521362883205, "loss": 12.3022, "step": 12432 }, { "epoch": 0.6770265395165846, "grad_norm": 0.5587997789708538, "learning_rate": 0.00015409779735990096, "loss": 12.3016, "step": 12433 }, { "epoch": 0.6770809935131676, "grad_norm": 0.7234506787324182, "learning_rate": 0.0001540903806702964, "loss": 12.2563, "step": 12434 }, { "epoch": 0.6771354475097506, "grad_norm": 0.6431977615477494, "learning_rate": 0.00015408296356007593, "loss": 12.3079, "step": 12435 }, { "epoch": 0.6771899015063337, "grad_norm": 0.6397691285259051, "learning_rate": 0.00015407554602929726, "loss": 12.3851, "step": 12436 }, { "epoch": 0.6772443555029167, "grad_norm": 0.5613075305208957, "learning_rate": 0.00015406812807801807, "loss": 12.274, "step": 12437 }, { "epoch": 0.6772988094994997, "grad_norm": 0.5978847918724727, "learning_rate": 0.0001540607097062961, "loss": 12.2493, "step": 12438 }, { "epoch": 0.6773532634960827, "grad_norm": 0.6838034584535284, "learning_rate": 0.00015405329091418892, "loss": 12.4762, "step": 12439 }, { "epoch": 0.6774077174926657, "grad_norm": 0.6147624053315099, "learning_rate": 0.00015404587170175432, "loss": 12.2885, "step": 12440 }, { "epoch": 0.6774621714892487, "grad_norm": 0.566160152720444, "learning_rate": 0.00015403845206904995, "loss": 12.2939, "step": 12441 }, { "epoch": 0.6775166254858318, "grad_norm": 0.6071692350273591, "learning_rate": 0.0001540310320161335, "loss": 12.2652, "step": 12442 }, { "epoch": 0.6775710794824148, "grad_norm": 0.6200172147901956, "learning_rate": 0.0001540236115430627, "loss": 12.3072, "step": 12443 }, { "epoch": 0.6776255334789978, "grad_norm": 0.5818622382070244, "learning_rate": 0.00015401619064989524, "loss": 12.2654, "step": 12444 }, { "epoch": 0.6776799874755808, "grad_norm": 0.669309561790791, "learning_rate": 0.00015400876933668883, "loss": 12.3663, "step": 12445 }, { "epoch": 0.6777344414721638, "grad_norm": 0.5745993725924108, "learning_rate": 0.00015400134760350115, "loss": 12.2131, "step": 12446 }, { "epoch": 0.6777888954687468, "grad_norm": 0.6416078009550086, "learning_rate": 0.00015399392545038993, "loss": 12.2781, "step": 12447 }, { "epoch": 0.6778433494653299, "grad_norm": 0.6656096813777037, "learning_rate": 0.00015398650287741294, "loss": 12.3451, "step": 12448 }, { "epoch": 0.6778978034619129, "grad_norm": 0.5414612288002892, "learning_rate": 0.0001539790798846278, "loss": 12.2011, "step": 12449 }, { "epoch": 0.6779522574584959, "grad_norm": 0.6820187506744111, "learning_rate": 0.00015397165647209232, "loss": 12.1749, "step": 12450 }, { "epoch": 0.6780067114550788, "grad_norm": 0.568131185561711, "learning_rate": 0.00015396423263986414, "loss": 12.2077, "step": 12451 }, { "epoch": 0.6780611654516618, "grad_norm": 0.6201515312515274, "learning_rate": 0.00015395680838800106, "loss": 12.3235, "step": 12452 }, { "epoch": 0.6781156194482448, "grad_norm": 0.576863601249074, "learning_rate": 0.00015394938371656082, "loss": 12.3372, "step": 12453 }, { "epoch": 0.6781700734448279, "grad_norm": 0.6012291927573601, "learning_rate": 0.0001539419586256011, "loss": 12.4355, "step": 12454 }, { "epoch": 0.6782245274414109, "grad_norm": 0.6164387649292629, "learning_rate": 0.00015393453311517968, "loss": 12.346, "step": 12455 }, { "epoch": 0.6782789814379939, "grad_norm": 0.9865572568672095, "learning_rate": 0.00015392710718535425, "loss": 12.302, "step": 12456 }, { "epoch": 0.6783334354345769, "grad_norm": 0.5986802333369561, "learning_rate": 0.0001539196808361826, "loss": 12.3104, "step": 12457 }, { "epoch": 0.6783878894311599, "grad_norm": 0.5640829111191706, "learning_rate": 0.0001539122540677225, "loss": 12.2395, "step": 12458 }, { "epoch": 0.6784423434277429, "grad_norm": 0.556879327150156, "learning_rate": 0.00015390482688003166, "loss": 12.3582, "step": 12459 }, { "epoch": 0.678496797424326, "grad_norm": 0.5968851843906607, "learning_rate": 0.0001538973992731678, "loss": 12.2607, "step": 12460 }, { "epoch": 0.678551251420909, "grad_norm": 0.5580397132881889, "learning_rate": 0.00015388997124718878, "loss": 12.2164, "step": 12461 }, { "epoch": 0.678605705417492, "grad_norm": 0.6033343613214533, "learning_rate": 0.00015388254280215228, "loss": 12.4007, "step": 12462 }, { "epoch": 0.678660159414075, "grad_norm": 0.6286973816578141, "learning_rate": 0.0001538751139381161, "loss": 12.2404, "step": 12463 }, { "epoch": 0.678714613410658, "grad_norm": 0.659599719190087, "learning_rate": 0.000153867684655138, "loss": 12.5315, "step": 12464 }, { "epoch": 0.678769067407241, "grad_norm": 0.5985044281923901, "learning_rate": 0.00015386025495327577, "loss": 12.1016, "step": 12465 }, { "epoch": 0.6788235214038241, "grad_norm": 0.5894161418965819, "learning_rate": 0.00015385282483258714, "loss": 12.2652, "step": 12466 }, { "epoch": 0.6788779754004071, "grad_norm": 0.5674643925713331, "learning_rate": 0.00015384539429312989, "loss": 12.2766, "step": 12467 }, { "epoch": 0.6789324293969901, "grad_norm": 0.6379808068456474, "learning_rate": 0.00015383796333496186, "loss": 12.2333, "step": 12468 }, { "epoch": 0.678986883393573, "grad_norm": 0.5935111162733151, "learning_rate": 0.0001538305319581408, "loss": 12.3503, "step": 12469 }, { "epoch": 0.679041337390156, "grad_norm": 0.6086340132949898, "learning_rate": 0.00015382310016272448, "loss": 12.2272, "step": 12470 }, { "epoch": 0.6790957913867391, "grad_norm": 0.6248968129012789, "learning_rate": 0.0001538156679487707, "loss": 12.3787, "step": 12471 }, { "epoch": 0.6791502453833221, "grad_norm": 0.6803043130004992, "learning_rate": 0.00015380823531633729, "loss": 12.2579, "step": 12472 }, { "epoch": 0.6792046993799051, "grad_norm": 0.656370341075035, "learning_rate": 0.000153800802265482, "loss": 12.3432, "step": 12473 }, { "epoch": 0.6792591533764881, "grad_norm": 0.5485034232369759, "learning_rate": 0.00015379336879626262, "loss": 12.253, "step": 12474 }, { "epoch": 0.6793136073730711, "grad_norm": 0.5865073594312507, "learning_rate": 0.00015378593490873702, "loss": 12.3455, "step": 12475 }, { "epoch": 0.6793680613696541, "grad_norm": 0.6106625314250173, "learning_rate": 0.00015377850060296298, "loss": 12.25, "step": 12476 }, { "epoch": 0.6794225153662372, "grad_norm": 0.6703990987672004, "learning_rate": 0.00015377106587899828, "loss": 12.3259, "step": 12477 }, { "epoch": 0.6794769693628202, "grad_norm": 0.6749449370571579, "learning_rate": 0.00015376363073690076, "loss": 12.4298, "step": 12478 }, { "epoch": 0.6795314233594032, "grad_norm": 0.5267898950623957, "learning_rate": 0.00015375619517672822, "loss": 12.2494, "step": 12479 }, { "epoch": 0.6795858773559862, "grad_norm": 0.5813391501366415, "learning_rate": 0.00015374875919853853, "loss": 12.2722, "step": 12480 }, { "epoch": 0.6796403313525692, "grad_norm": 0.5691942155847314, "learning_rate": 0.00015374132280238944, "loss": 12.3343, "step": 12481 }, { "epoch": 0.6796947853491522, "grad_norm": 0.5597577928871845, "learning_rate": 0.00015373388598833882, "loss": 12.2779, "step": 12482 }, { "epoch": 0.6797492393457353, "grad_norm": 0.5600179115697138, "learning_rate": 0.0001537264487564445, "loss": 12.3019, "step": 12483 }, { "epoch": 0.6798036933423183, "grad_norm": 0.6609106539019437, "learning_rate": 0.00015371901110676427, "loss": 12.3928, "step": 12484 }, { "epoch": 0.6798581473389013, "grad_norm": 0.6196194038765365, "learning_rate": 0.00015371157303935604, "loss": 12.4834, "step": 12485 }, { "epoch": 0.6799126013354843, "grad_norm": 0.7024144124020267, "learning_rate": 0.00015370413455427758, "loss": 12.3145, "step": 12486 }, { "epoch": 0.6799670553320672, "grad_norm": 0.5335364578058331, "learning_rate": 0.0001536966956515868, "loss": 12.1881, "step": 12487 }, { "epoch": 0.6800215093286502, "grad_norm": 0.6308656762616506, "learning_rate": 0.00015368925633134145, "loss": 12.3981, "step": 12488 }, { "epoch": 0.6800759633252333, "grad_norm": 0.5609199684595741, "learning_rate": 0.0001536818165935995, "loss": 12.1629, "step": 12489 }, { "epoch": 0.6801304173218163, "grad_norm": 0.5882023506431455, "learning_rate": 0.0001536743764384187, "loss": 12.217, "step": 12490 }, { "epoch": 0.6801848713183993, "grad_norm": 0.5830886102468975, "learning_rate": 0.00015366693586585698, "loss": 12.3887, "step": 12491 }, { "epoch": 0.6802393253149823, "grad_norm": 0.5933736821972734, "learning_rate": 0.00015365949487597215, "loss": 12.2738, "step": 12492 }, { "epoch": 0.6802937793115653, "grad_norm": 0.6270327180411331, "learning_rate": 0.00015365205346882211, "loss": 12.4265, "step": 12493 }, { "epoch": 0.6803482333081483, "grad_norm": 0.6123050154368799, "learning_rate": 0.0001536446116444647, "loss": 12.2808, "step": 12494 }, { "epoch": 0.6804026873047314, "grad_norm": 0.6149944772394959, "learning_rate": 0.00015363716940295776, "loss": 12.283, "step": 12495 }, { "epoch": 0.6804571413013144, "grad_norm": 0.5767155457877045, "learning_rate": 0.00015362972674435923, "loss": 12.3108, "step": 12496 }, { "epoch": 0.6805115952978974, "grad_norm": 0.6095755670317139, "learning_rate": 0.00015362228366872692, "loss": 12.3838, "step": 12497 }, { "epoch": 0.6805660492944804, "grad_norm": 0.5932684303723733, "learning_rate": 0.00015361484017611878, "loss": 12.3602, "step": 12498 }, { "epoch": 0.6806205032910634, "grad_norm": 0.557865255664125, "learning_rate": 0.00015360739626659264, "loss": 12.2614, "step": 12499 }, { "epoch": 0.6806749572876464, "grad_norm": 0.5466275613168837, "learning_rate": 0.00015359995194020635, "loss": 12.1963, "step": 12500 }, { "epoch": 0.6807294112842295, "grad_norm": 0.5722638998858818, "learning_rate": 0.0001535925071970179, "loss": 12.3277, "step": 12501 }, { "epoch": 0.6807838652808125, "grad_norm": 0.5719742566065302, "learning_rate": 0.0001535850620370851, "loss": 12.301, "step": 12502 }, { "epoch": 0.6808383192773955, "grad_norm": 0.5978564215852396, "learning_rate": 0.00015357761646046586, "loss": 12.4602, "step": 12503 }, { "epoch": 0.6808927732739785, "grad_norm": 0.6845621944319514, "learning_rate": 0.00015357017046721815, "loss": 12.2861, "step": 12504 }, { "epoch": 0.6809472272705615, "grad_norm": 0.5446505254122316, "learning_rate": 0.00015356272405739975, "loss": 12.2506, "step": 12505 }, { "epoch": 0.6810016812671446, "grad_norm": 0.5764664656405027, "learning_rate": 0.00015355527723106866, "loss": 12.1196, "step": 12506 }, { "epoch": 0.6810561352637275, "grad_norm": 0.5824158968217736, "learning_rate": 0.00015354782998828272, "loss": 12.1073, "step": 12507 }, { "epoch": 0.6811105892603105, "grad_norm": 0.5951660676019116, "learning_rate": 0.0001535403823290999, "loss": 12.1546, "step": 12508 }, { "epoch": 0.6811650432568935, "grad_norm": 0.633650384302766, "learning_rate": 0.0001535329342535781, "loss": 12.3471, "step": 12509 }, { "epoch": 0.6812194972534765, "grad_norm": 0.7103507550363984, "learning_rate": 0.0001535254857617752, "loss": 12.4524, "step": 12510 }, { "epoch": 0.6812739512500595, "grad_norm": 0.57847764762371, "learning_rate": 0.00015351803685374914, "loss": 12.2622, "step": 12511 }, { "epoch": 0.6813284052466426, "grad_norm": 0.6403836667625455, "learning_rate": 0.00015351058752955784, "loss": 12.3535, "step": 12512 }, { "epoch": 0.6813828592432256, "grad_norm": 0.7088629683482663, "learning_rate": 0.0001535031377892593, "loss": 12.3262, "step": 12513 }, { "epoch": 0.6814373132398086, "grad_norm": 0.6193844866774646, "learning_rate": 0.00015349568763291135, "loss": 12.3267, "step": 12514 }, { "epoch": 0.6814917672363916, "grad_norm": 0.5998515129373875, "learning_rate": 0.00015348823706057196, "loss": 12.3904, "step": 12515 }, { "epoch": 0.6815462212329746, "grad_norm": 0.6448018105822136, "learning_rate": 0.00015348078607229905, "loss": 12.4076, "step": 12516 }, { "epoch": 0.6816006752295576, "grad_norm": 0.6148133586569113, "learning_rate": 0.00015347333466815059, "loss": 12.2451, "step": 12517 }, { "epoch": 0.6816551292261407, "grad_norm": 0.5678300825860817, "learning_rate": 0.00015346588284818454, "loss": 12.2692, "step": 12518 }, { "epoch": 0.6817095832227237, "grad_norm": 0.5768317706320972, "learning_rate": 0.0001534584306124588, "loss": 12.3386, "step": 12519 }, { "epoch": 0.6817640372193067, "grad_norm": 0.5331376057490201, "learning_rate": 0.00015345097796103135, "loss": 12.1922, "step": 12520 }, { "epoch": 0.6818184912158897, "grad_norm": 0.5544525547019814, "learning_rate": 0.0001534435248939601, "loss": 12.269, "step": 12521 }, { "epoch": 0.6818729452124727, "grad_norm": 0.6193524364573895, "learning_rate": 0.00015343607141130304, "loss": 12.2663, "step": 12522 }, { "epoch": 0.6819273992090557, "grad_norm": 0.5619608059312059, "learning_rate": 0.00015342861751311814, "loss": 12.3329, "step": 12523 }, { "epoch": 0.6819818532056388, "grad_norm": 0.6552963097235771, "learning_rate": 0.00015342116319946338, "loss": 12.3595, "step": 12524 }, { "epoch": 0.6820363072022217, "grad_norm": 0.570274795582616, "learning_rate": 0.00015341370847039666, "loss": 12.2273, "step": 12525 }, { "epoch": 0.6820907611988047, "grad_norm": 0.6239227052665182, "learning_rate": 0.000153406253325976, "loss": 12.3095, "step": 12526 }, { "epoch": 0.6821452151953877, "grad_norm": 0.5745369017605514, "learning_rate": 0.00015339879776625937, "loss": 12.2778, "step": 12527 }, { "epoch": 0.6821996691919707, "grad_norm": 0.6025091065614796, "learning_rate": 0.00015339134179130469, "loss": 12.248, "step": 12528 }, { "epoch": 0.6822541231885537, "grad_norm": 0.6598333268101045, "learning_rate": 0.00015338388540117004, "loss": 12.1671, "step": 12529 }, { "epoch": 0.6823085771851368, "grad_norm": 0.6316947537516462, "learning_rate": 0.0001533764285959133, "loss": 12.3167, "step": 12530 }, { "epoch": 0.6823630311817198, "grad_norm": 0.6344343759849481, "learning_rate": 0.0001533689713755925, "loss": 12.2439, "step": 12531 }, { "epoch": 0.6824174851783028, "grad_norm": 0.6129540352843904, "learning_rate": 0.00015336151374026566, "loss": 12.1051, "step": 12532 }, { "epoch": 0.6824719391748858, "grad_norm": 0.6456101389818156, "learning_rate": 0.00015335405568999073, "loss": 12.3736, "step": 12533 }, { "epoch": 0.6825263931714688, "grad_norm": 0.60867641200438, "learning_rate": 0.00015334659722482572, "loss": 12.2544, "step": 12534 }, { "epoch": 0.6825808471680518, "grad_norm": 0.6173784654404763, "learning_rate": 0.0001533391383448286, "loss": 12.2428, "step": 12535 }, { "epoch": 0.6826353011646349, "grad_norm": 0.6030950147532718, "learning_rate": 0.0001533316790500574, "loss": 12.282, "step": 12536 }, { "epoch": 0.6826897551612179, "grad_norm": 0.6110934537663895, "learning_rate": 0.00015332421934057013, "loss": 12.304, "step": 12537 }, { "epoch": 0.6827442091578009, "grad_norm": 0.6925727134598422, "learning_rate": 0.00015331675921642478, "loss": 12.3138, "step": 12538 }, { "epoch": 0.6827986631543839, "grad_norm": 0.5930059362458303, "learning_rate": 0.00015330929867767937, "loss": 12.3151, "step": 12539 }, { "epoch": 0.6828531171509669, "grad_norm": 0.7085795714441075, "learning_rate": 0.00015330183772439195, "loss": 12.3222, "step": 12540 }, { "epoch": 0.68290757114755, "grad_norm": 0.5369805349876322, "learning_rate": 0.00015329437635662046, "loss": 12.2943, "step": 12541 }, { "epoch": 0.682962025144133, "grad_norm": 0.544593423692748, "learning_rate": 0.00015328691457442296, "loss": 12.257, "step": 12542 }, { "epoch": 0.683016479140716, "grad_norm": 0.5884016852779346, "learning_rate": 0.00015327945237785748, "loss": 12.261, "step": 12543 }, { "epoch": 0.6830709331372989, "grad_norm": 0.5874520382845972, "learning_rate": 0.00015327198976698204, "loss": 12.3641, "step": 12544 }, { "epoch": 0.6831253871338819, "grad_norm": 0.5936986434307064, "learning_rate": 0.00015326452674185468, "loss": 12.3483, "step": 12545 }, { "epoch": 0.6831798411304649, "grad_norm": 0.5729493782893559, "learning_rate": 0.00015325706330253348, "loss": 12.3482, "step": 12546 }, { "epoch": 0.683234295127048, "grad_norm": 0.5379559808268314, "learning_rate": 0.00015324959944907636, "loss": 12.38, "step": 12547 }, { "epoch": 0.683288749123631, "grad_norm": 0.5508562129553998, "learning_rate": 0.00015324213518154144, "loss": 12.2574, "step": 12548 }, { "epoch": 0.683343203120214, "grad_norm": 0.5528210383541169, "learning_rate": 0.00015323467049998675, "loss": 12.1128, "step": 12549 }, { "epoch": 0.683397657116797, "grad_norm": 0.5690346282873607, "learning_rate": 0.00015322720540447034, "loss": 12.0657, "step": 12550 }, { "epoch": 0.68345211111338, "grad_norm": 0.6207858687490257, "learning_rate": 0.00015321973989505024, "loss": 12.1423, "step": 12551 }, { "epoch": 0.683506565109963, "grad_norm": 0.5401235764199944, "learning_rate": 0.00015321227397178456, "loss": 12.1383, "step": 12552 }, { "epoch": 0.6835610191065461, "grad_norm": 0.5639866531793937, "learning_rate": 0.00015320480763473127, "loss": 12.3375, "step": 12553 }, { "epoch": 0.6836154731031291, "grad_norm": 0.5569695080226263, "learning_rate": 0.00015319734088394847, "loss": 12.2254, "step": 12554 }, { "epoch": 0.6836699270997121, "grad_norm": 0.5823886376253445, "learning_rate": 0.00015318987371949424, "loss": 12.1081, "step": 12555 }, { "epoch": 0.6837243810962951, "grad_norm": 0.5831867835423197, "learning_rate": 0.0001531824061414266, "loss": 12.4263, "step": 12556 }, { "epoch": 0.6837788350928781, "grad_norm": 0.6368773183461842, "learning_rate": 0.00015317493814980373, "loss": 12.3931, "step": 12557 }, { "epoch": 0.6838332890894611, "grad_norm": 0.5967235892556965, "learning_rate": 0.00015316746974468356, "loss": 12.3436, "step": 12558 }, { "epoch": 0.6838877430860442, "grad_norm": 0.5611941804067937, "learning_rate": 0.00015316000092612425, "loss": 12.2935, "step": 12559 }, { "epoch": 0.6839421970826272, "grad_norm": 0.5821625925593092, "learning_rate": 0.00015315253169418384, "loss": 12.3281, "step": 12560 }, { "epoch": 0.6839966510792101, "grad_norm": 0.58929587624123, "learning_rate": 0.00015314506204892046, "loss": 12.2427, "step": 12561 }, { "epoch": 0.6840511050757931, "grad_norm": 0.9656728003292488, "learning_rate": 0.00015313759199039214, "loss": 12.3526, "step": 12562 }, { "epoch": 0.6841055590723761, "grad_norm": 0.6415621689917624, "learning_rate": 0.000153130121518657, "loss": 12.2039, "step": 12563 }, { "epoch": 0.6841600130689591, "grad_norm": 0.6300005979679029, "learning_rate": 0.00015312265063377315, "loss": 12.3009, "step": 12564 }, { "epoch": 0.6842144670655422, "grad_norm": 0.5685462441014264, "learning_rate": 0.00015311517933579865, "loss": 12.2691, "step": 12565 }, { "epoch": 0.6842689210621252, "grad_norm": 0.5643867251692329, "learning_rate": 0.00015310770762479156, "loss": 12.2821, "step": 12566 }, { "epoch": 0.6843233750587082, "grad_norm": 0.5868894551224721, "learning_rate": 0.00015310023550081008, "loss": 12.2581, "step": 12567 }, { "epoch": 0.6843778290552912, "grad_norm": 0.5726335847878873, "learning_rate": 0.00015309276296391223, "loss": 12.2945, "step": 12568 }, { "epoch": 0.6844322830518742, "grad_norm": 0.6777200529938219, "learning_rate": 0.0001530852900141562, "loss": 12.3371, "step": 12569 }, { "epoch": 0.6844867370484573, "grad_norm": 0.6001064109162652, "learning_rate": 0.00015307781665160005, "loss": 12.3353, "step": 12570 }, { "epoch": 0.6845411910450403, "grad_norm": 0.5606026837695675, "learning_rate": 0.00015307034287630184, "loss": 12.3618, "step": 12571 }, { "epoch": 0.6845956450416233, "grad_norm": 0.5898496714352334, "learning_rate": 0.00015306286868831977, "loss": 12.2474, "step": 12572 }, { "epoch": 0.6846500990382063, "grad_norm": 0.5889931142568797, "learning_rate": 0.00015305539408771193, "loss": 12.3571, "step": 12573 }, { "epoch": 0.6847045530347893, "grad_norm": 0.6156435547693119, "learning_rate": 0.00015304791907453646, "loss": 12.2613, "step": 12574 }, { "epoch": 0.6847590070313723, "grad_norm": 0.6230231332692246, "learning_rate": 0.00015304044364885147, "loss": 12.4422, "step": 12575 }, { "epoch": 0.6848134610279554, "grad_norm": 0.6871335752250709, "learning_rate": 0.0001530329678107151, "loss": 12.3385, "step": 12576 }, { "epoch": 0.6848679150245384, "grad_norm": 0.623158174408285, "learning_rate": 0.00015302549156018545, "loss": 12.2042, "step": 12577 }, { "epoch": 0.6849223690211214, "grad_norm": 0.5706635418917007, "learning_rate": 0.00015301801489732073, "loss": 12.1545, "step": 12578 }, { "epoch": 0.6849768230177044, "grad_norm": 0.55862280847443, "learning_rate": 0.00015301053782217902, "loss": 12.2803, "step": 12579 }, { "epoch": 0.6850312770142873, "grad_norm": 0.5992289622696629, "learning_rate": 0.0001530030603348185, "loss": 12.2505, "step": 12580 }, { "epoch": 0.6850857310108703, "grad_norm": 0.587073117141727, "learning_rate": 0.00015299558243529724, "loss": 12.2734, "step": 12581 }, { "epoch": 0.6851401850074534, "grad_norm": 0.5766027769184066, "learning_rate": 0.00015298810412367348, "loss": 12.3116, "step": 12582 }, { "epoch": 0.6851946390040364, "grad_norm": 0.6304471982153476, "learning_rate": 0.00015298062540000532, "loss": 12.3564, "step": 12583 }, { "epoch": 0.6852490930006194, "grad_norm": 0.6097819467526905, "learning_rate": 0.00015297314626435093, "loss": 12.3909, "step": 12584 }, { "epoch": 0.6853035469972024, "grad_norm": 0.5925291505144655, "learning_rate": 0.0001529656667167685, "loss": 12.2947, "step": 12585 }, { "epoch": 0.6853580009937854, "grad_norm": 0.5807666138404012, "learning_rate": 0.00015295818675731612, "loss": 12.2874, "step": 12586 }, { "epoch": 0.6854124549903684, "grad_norm": 0.6025571608827972, "learning_rate": 0.00015295070638605203, "loss": 12.2434, "step": 12587 }, { "epoch": 0.6854669089869515, "grad_norm": 0.6237484803353083, "learning_rate": 0.00015294322560303436, "loss": 12.3145, "step": 12588 }, { "epoch": 0.6855213629835345, "grad_norm": 0.5948647440033049, "learning_rate": 0.0001529357444083213, "loss": 12.2099, "step": 12589 }, { "epoch": 0.6855758169801175, "grad_norm": 0.605663960635657, "learning_rate": 0.00015292826280197098, "loss": 12.2644, "step": 12590 }, { "epoch": 0.6856302709767005, "grad_norm": 0.5990931213074819, "learning_rate": 0.00015292078078404166, "loss": 12.2598, "step": 12591 }, { "epoch": 0.6856847249732835, "grad_norm": 0.6073467998974551, "learning_rate": 0.00015291329835459146, "loss": 12.3719, "step": 12592 }, { "epoch": 0.6857391789698665, "grad_norm": 0.614944758172525, "learning_rate": 0.00015290581551367856, "loss": 12.2056, "step": 12593 }, { "epoch": 0.6857936329664496, "grad_norm": 0.6222266110368851, "learning_rate": 0.00015289833226136116, "loss": 12.2476, "step": 12594 }, { "epoch": 0.6858480869630326, "grad_norm": 0.639785126460829, "learning_rate": 0.00015289084859769746, "loss": 12.4306, "step": 12595 }, { "epoch": 0.6859025409596156, "grad_norm": 0.5938562698228961, "learning_rate": 0.0001528833645227457, "loss": 12.2332, "step": 12596 }, { "epoch": 0.6859569949561986, "grad_norm": 0.6253395712765962, "learning_rate": 0.00015287588003656398, "loss": 12.2702, "step": 12597 }, { "epoch": 0.6860114489527815, "grad_norm": 0.7806519859510992, "learning_rate": 0.00015286839513921056, "loss": 12.2199, "step": 12598 }, { "epoch": 0.6860659029493645, "grad_norm": 0.6383790912822903, "learning_rate": 0.00015286090983074365, "loss": 12.3867, "step": 12599 }, { "epoch": 0.6861203569459476, "grad_norm": 0.6354619200163265, "learning_rate": 0.0001528534241112214, "loss": 12.2405, "step": 12600 }, { "epoch": 0.6861748109425306, "grad_norm": 0.7198083446997495, "learning_rate": 0.0001528459379807021, "loss": 12.1971, "step": 12601 }, { "epoch": 0.6862292649391136, "grad_norm": 0.5609639402030633, "learning_rate": 0.0001528384514392439, "loss": 12.2583, "step": 12602 }, { "epoch": 0.6862837189356966, "grad_norm": 0.6590477541121527, "learning_rate": 0.00015283096448690504, "loss": 12.4317, "step": 12603 }, { "epoch": 0.6863381729322796, "grad_norm": 0.6169043747317672, "learning_rate": 0.00015282347712374376, "loss": 12.2422, "step": 12604 }, { "epoch": 0.6863926269288627, "grad_norm": 0.5924528998422541, "learning_rate": 0.00015281598934981826, "loss": 12.3041, "step": 12605 }, { "epoch": 0.6864470809254457, "grad_norm": 0.6317948636202078, "learning_rate": 0.00015280850116518673, "loss": 12.3189, "step": 12606 }, { "epoch": 0.6865015349220287, "grad_norm": 0.6003665146103847, "learning_rate": 0.00015280101256990748, "loss": 12.2639, "step": 12607 }, { "epoch": 0.6865559889186117, "grad_norm": 0.4923224569887765, "learning_rate": 0.00015279352356403872, "loss": 11.9896, "step": 12608 }, { "epoch": 0.6866104429151947, "grad_norm": 0.6174994382831572, "learning_rate": 0.00015278603414763863, "loss": 12.2954, "step": 12609 }, { "epoch": 0.6866648969117777, "grad_norm": 0.5857052679957093, "learning_rate": 0.0001527785443207655, "loss": 12.3063, "step": 12610 }, { "epoch": 0.6867193509083608, "grad_norm": 0.5743478781291048, "learning_rate": 0.00015277105408347756, "loss": 12.1504, "step": 12611 }, { "epoch": 0.6867738049049438, "grad_norm": 0.5559483050548155, "learning_rate": 0.00015276356343583305, "loss": 12.2322, "step": 12612 }, { "epoch": 0.6868282589015268, "grad_norm": 0.6352063878137918, "learning_rate": 0.00015275607237789023, "loss": 12.338, "step": 12613 }, { "epoch": 0.6868827128981098, "grad_norm": 0.561237471566255, "learning_rate": 0.00015274858090970735, "loss": 12.1346, "step": 12614 }, { "epoch": 0.6869371668946928, "grad_norm": 0.6444034241684865, "learning_rate": 0.00015274108903134262, "loss": 12.4475, "step": 12615 }, { "epoch": 0.6869916208912757, "grad_norm": 0.6399563424531881, "learning_rate": 0.00015273359674285438, "loss": 12.1246, "step": 12616 }, { "epoch": 0.6870460748878588, "grad_norm": 0.557675000921174, "learning_rate": 0.00015272610404430082, "loss": 12.1814, "step": 12617 }, { "epoch": 0.6871005288844418, "grad_norm": 0.6547315754685608, "learning_rate": 0.0001527186109357402, "loss": 12.3489, "step": 12618 }, { "epoch": 0.6871549828810248, "grad_norm": 0.6158957717630221, "learning_rate": 0.0001527111174172309, "loss": 12.2577, "step": 12619 }, { "epoch": 0.6872094368776078, "grad_norm": 0.6405001217155041, "learning_rate": 0.00015270362348883108, "loss": 12.3835, "step": 12620 }, { "epoch": 0.6872638908741908, "grad_norm": 0.7138640749459229, "learning_rate": 0.00015269612915059903, "loss": 12.4221, "step": 12621 }, { "epoch": 0.6873183448707738, "grad_norm": 0.5418849717601474, "learning_rate": 0.00015268863440259307, "loss": 12.3072, "step": 12622 }, { "epoch": 0.6873727988673569, "grad_norm": 0.6338427053423075, "learning_rate": 0.0001526811392448714, "loss": 12.316, "step": 12623 }, { "epoch": 0.6874272528639399, "grad_norm": 0.6424556749417951, "learning_rate": 0.00015267364367749242, "loss": 12.2569, "step": 12624 }, { "epoch": 0.6874817068605229, "grad_norm": 0.5318951779941986, "learning_rate": 0.00015266614770051434, "loss": 12.301, "step": 12625 }, { "epoch": 0.6875361608571059, "grad_norm": 0.5824150194263151, "learning_rate": 0.00015265865131399543, "loss": 12.4017, "step": 12626 }, { "epoch": 0.6875906148536889, "grad_norm": 0.6239848450959841, "learning_rate": 0.00015265115451799403, "loss": 12.3879, "step": 12627 }, { "epoch": 0.6876450688502719, "grad_norm": 0.5886551546118839, "learning_rate": 0.0001526436573125684, "loss": 12.349, "step": 12628 }, { "epoch": 0.687699522846855, "grad_norm": 0.6041463662522617, "learning_rate": 0.00015263615969777691, "loss": 12.2759, "step": 12629 }, { "epoch": 0.687753976843438, "grad_norm": 0.6335787004896745, "learning_rate": 0.00015262866167367785, "loss": 12.2605, "step": 12630 }, { "epoch": 0.687808430840021, "grad_norm": 0.5616758429928841, "learning_rate": 0.00015262116324032943, "loss": 12.2135, "step": 12631 }, { "epoch": 0.687862884836604, "grad_norm": 0.5568859215137671, "learning_rate": 0.00015261366439779, "loss": 12.1521, "step": 12632 }, { "epoch": 0.687917338833187, "grad_norm": 0.5301411883490512, "learning_rate": 0.00015260616514611793, "loss": 12.1824, "step": 12633 }, { "epoch": 0.68797179282977, "grad_norm": 0.6112030855999007, "learning_rate": 0.00015259866548537144, "loss": 12.3687, "step": 12634 }, { "epoch": 0.688026246826353, "grad_norm": 0.638658439137997, "learning_rate": 0.00015259116541560902, "loss": 12.3406, "step": 12635 }, { "epoch": 0.688080700822936, "grad_norm": 0.5849601548744854, "learning_rate": 0.0001525836649368888, "loss": 12.2362, "step": 12636 }, { "epoch": 0.688135154819519, "grad_norm": 0.5890796896219497, "learning_rate": 0.00015257616404926915, "loss": 12.2527, "step": 12637 }, { "epoch": 0.688189608816102, "grad_norm": 0.5799865995323636, "learning_rate": 0.0001525686627528085, "loss": 12.3209, "step": 12638 }, { "epoch": 0.688244062812685, "grad_norm": 0.6151652030859852, "learning_rate": 0.00015256116104756507, "loss": 12.3799, "step": 12639 }, { "epoch": 0.6882985168092681, "grad_norm": 0.5264196989976043, "learning_rate": 0.0001525536589335973, "loss": 12.2559, "step": 12640 }, { "epoch": 0.6883529708058511, "grad_norm": 0.5980748544120343, "learning_rate": 0.0001525461564109634, "loss": 12.2105, "step": 12641 }, { "epoch": 0.6884074248024341, "grad_norm": 0.5726667071252022, "learning_rate": 0.0001525386534797218, "loss": 12.2894, "step": 12642 }, { "epoch": 0.6884618787990171, "grad_norm": 0.5679190932449971, "learning_rate": 0.0001525311501399308, "loss": 12.3174, "step": 12643 }, { "epoch": 0.6885163327956001, "grad_norm": 0.5482384017862806, "learning_rate": 0.0001525236463916488, "loss": 12.2429, "step": 12644 }, { "epoch": 0.6885707867921831, "grad_norm": 0.547876635201763, "learning_rate": 0.00015251614223493412, "loss": 12.3051, "step": 12645 }, { "epoch": 0.6886252407887662, "grad_norm": 0.6059517008941783, "learning_rate": 0.00015250863766984508, "loss": 12.2157, "step": 12646 }, { "epoch": 0.6886796947853492, "grad_norm": 0.5977206310025369, "learning_rate": 0.00015250113269644005, "loss": 12.4078, "step": 12647 }, { "epoch": 0.6887341487819322, "grad_norm": 0.5568252330295134, "learning_rate": 0.00015249362731477745, "loss": 12.3534, "step": 12648 }, { "epoch": 0.6887886027785152, "grad_norm": 0.6075483860753537, "learning_rate": 0.00015248612152491558, "loss": 12.3157, "step": 12649 }, { "epoch": 0.6888430567750982, "grad_norm": 0.5951457195496563, "learning_rate": 0.00015247861532691285, "loss": 12.3323, "step": 12650 }, { "epoch": 0.6888975107716812, "grad_norm": 0.6261098231237463, "learning_rate": 0.00015247110872082759, "loss": 12.37, "step": 12651 }, { "epoch": 0.6889519647682643, "grad_norm": 0.651710583732914, "learning_rate": 0.0001524636017067182, "loss": 12.3662, "step": 12652 }, { "epoch": 0.6890064187648473, "grad_norm": 0.6357382069552527, "learning_rate": 0.00015245609428464306, "loss": 12.4347, "step": 12653 }, { "epoch": 0.6890608727614302, "grad_norm": 0.6587152131477861, "learning_rate": 0.00015244858645466046, "loss": 12.3483, "step": 12654 }, { "epoch": 0.6891153267580132, "grad_norm": 0.5647572288398478, "learning_rate": 0.00015244107821682894, "loss": 12.2754, "step": 12655 }, { "epoch": 0.6891697807545962, "grad_norm": 0.5548134170905816, "learning_rate": 0.00015243356957120676, "loss": 12.2864, "step": 12656 }, { "epoch": 0.6892242347511792, "grad_norm": 0.6079633115581555, "learning_rate": 0.00015242606051785236, "loss": 12.3156, "step": 12657 }, { "epoch": 0.6892786887477623, "grad_norm": 0.6751531247949729, "learning_rate": 0.00015241855105682412, "loss": 12.2497, "step": 12658 }, { "epoch": 0.6893331427443453, "grad_norm": 0.5337554194327473, "learning_rate": 0.00015241104118818042, "loss": 12.311, "step": 12659 }, { "epoch": 0.6893875967409283, "grad_norm": 0.6036338919681892, "learning_rate": 0.0001524035309119797, "loss": 12.2545, "step": 12660 }, { "epoch": 0.6894420507375113, "grad_norm": 0.5690388964265504, "learning_rate": 0.00015239602022828032, "loss": 12.2702, "step": 12661 }, { "epoch": 0.6894965047340943, "grad_norm": 0.6315982376221074, "learning_rate": 0.0001523885091371407, "loss": 12.1872, "step": 12662 }, { "epoch": 0.6895509587306773, "grad_norm": 0.560581222380461, "learning_rate": 0.00015238099763861926, "loss": 12.1801, "step": 12663 }, { "epoch": 0.6896054127272604, "grad_norm": 0.5862186620249155, "learning_rate": 0.0001523734857327744, "loss": 12.1854, "step": 12664 }, { "epoch": 0.6896598667238434, "grad_norm": 0.5748225751008038, "learning_rate": 0.00015236597341966452, "loss": 12.3736, "step": 12665 }, { "epoch": 0.6897143207204264, "grad_norm": 0.5517267261087246, "learning_rate": 0.00015235846069934804, "loss": 12.1587, "step": 12666 }, { "epoch": 0.6897687747170094, "grad_norm": 0.7096481690724898, "learning_rate": 0.0001523509475718834, "loss": 12.2512, "step": 12667 }, { "epoch": 0.6898232287135924, "grad_norm": 0.6735552712041467, "learning_rate": 0.00015234343403732902, "loss": 12.1401, "step": 12668 }, { "epoch": 0.6898776827101754, "grad_norm": 0.9097540623374685, "learning_rate": 0.0001523359200957433, "loss": 12.3809, "step": 12669 }, { "epoch": 0.6899321367067585, "grad_norm": 0.6413803080548649, "learning_rate": 0.00015232840574718473, "loss": 12.2742, "step": 12670 }, { "epoch": 0.6899865907033415, "grad_norm": 0.7383597525167909, "learning_rate": 0.00015232089099171165, "loss": 12.2784, "step": 12671 }, { "epoch": 0.6900410446999244, "grad_norm": 0.7636786695259324, "learning_rate": 0.00015231337582938257, "loss": 12.4001, "step": 12672 }, { "epoch": 0.6900954986965074, "grad_norm": 0.6394640382256751, "learning_rate": 0.0001523058602602559, "loss": 12.2575, "step": 12673 }, { "epoch": 0.6901499526930904, "grad_norm": 0.6564638150277223, "learning_rate": 0.0001522983442843901, "loss": 12.2666, "step": 12674 }, { "epoch": 0.6902044066896735, "grad_norm": 0.6819497811738562, "learning_rate": 0.0001522908279018436, "loss": 12.276, "step": 12675 }, { "epoch": 0.6902588606862565, "grad_norm": 0.6309976102996402, "learning_rate": 0.00015228331111267487, "loss": 12.3654, "step": 12676 }, { "epoch": 0.6903133146828395, "grad_norm": 0.632177406445353, "learning_rate": 0.0001522757939169423, "loss": 12.2867, "step": 12677 }, { "epoch": 0.6903677686794225, "grad_norm": 0.6023323477054283, "learning_rate": 0.00015226827631470443, "loss": 12.3492, "step": 12678 }, { "epoch": 0.6904222226760055, "grad_norm": 0.7334268999145962, "learning_rate": 0.00015226075830601966, "loss": 12.4541, "step": 12679 }, { "epoch": 0.6904766766725885, "grad_norm": 0.6600399118868495, "learning_rate": 0.0001522532398909465, "loss": 12.3805, "step": 12680 }, { "epoch": 0.6905311306691716, "grad_norm": 0.6266545126408947, "learning_rate": 0.00015224572106954334, "loss": 12.1993, "step": 12681 }, { "epoch": 0.6905855846657546, "grad_norm": 0.6731096915303995, "learning_rate": 0.0001522382018418687, "loss": 12.1659, "step": 12682 }, { "epoch": 0.6906400386623376, "grad_norm": 0.5633882992088448, "learning_rate": 0.00015223068220798105, "loss": 12.1641, "step": 12683 }, { "epoch": 0.6906944926589206, "grad_norm": 0.6724408702456605, "learning_rate": 0.00015222316216793885, "loss": 12.3575, "step": 12684 }, { "epoch": 0.6907489466555036, "grad_norm": 0.9751431943544033, "learning_rate": 0.00015221564172180062, "loss": 12.2214, "step": 12685 }, { "epoch": 0.6908034006520866, "grad_norm": 0.5675514721348829, "learning_rate": 0.00015220812086962475, "loss": 12.2358, "step": 12686 }, { "epoch": 0.6908578546486697, "grad_norm": 0.5828034922884001, "learning_rate": 0.00015220059961146978, "loss": 12.2934, "step": 12687 }, { "epoch": 0.6909123086452527, "grad_norm": 0.7084947121379525, "learning_rate": 0.00015219307794739424, "loss": 12.2746, "step": 12688 }, { "epoch": 0.6909667626418357, "grad_norm": 0.6315651900269331, "learning_rate": 0.00015218555587745653, "loss": 12.1951, "step": 12689 }, { "epoch": 0.6910212166384186, "grad_norm": 0.6377830658396804, "learning_rate": 0.00015217803340171522, "loss": 12.2809, "step": 12690 }, { "epoch": 0.6910756706350016, "grad_norm": 0.641576023705419, "learning_rate": 0.00015217051052022877, "loss": 12.2517, "step": 12691 }, { "epoch": 0.6911301246315846, "grad_norm": 0.6469535141067794, "learning_rate": 0.00015216298723305565, "loss": 12.3938, "step": 12692 }, { "epoch": 0.6911845786281677, "grad_norm": 0.5669266511751133, "learning_rate": 0.0001521554635402544, "loss": 12.2574, "step": 12693 }, { "epoch": 0.6912390326247507, "grad_norm": 0.6420864976150247, "learning_rate": 0.00015214793944188352, "loss": 12.2383, "step": 12694 }, { "epoch": 0.6912934866213337, "grad_norm": 0.581599782572865, "learning_rate": 0.00015214041493800156, "loss": 12.1808, "step": 12695 }, { "epoch": 0.6913479406179167, "grad_norm": 0.5739199733755143, "learning_rate": 0.00015213289002866698, "loss": 12.2491, "step": 12696 }, { "epoch": 0.6914023946144997, "grad_norm": 0.619162803547792, "learning_rate": 0.00015212536471393825, "loss": 12.35, "step": 12697 }, { "epoch": 0.6914568486110827, "grad_norm": 0.5759863171225407, "learning_rate": 0.00015211783899387395, "loss": 12.2603, "step": 12698 }, { "epoch": 0.6915113026076658, "grad_norm": 0.6136205391072797, "learning_rate": 0.00015211031286853263, "loss": 12.4047, "step": 12699 }, { "epoch": 0.6915657566042488, "grad_norm": 0.5961564416118947, "learning_rate": 0.00015210278633797278, "loss": 12.22, "step": 12700 }, { "epoch": 0.6916202106008318, "grad_norm": 0.5430192216745389, "learning_rate": 0.00015209525940225292, "loss": 12.2475, "step": 12701 }, { "epoch": 0.6916746645974148, "grad_norm": 0.5534574797960371, "learning_rate": 0.00015208773206143157, "loss": 12.2389, "step": 12702 }, { "epoch": 0.6917291185939978, "grad_norm": 0.5240599579137478, "learning_rate": 0.00015208020431556725, "loss": 12.2449, "step": 12703 }, { "epoch": 0.6917835725905809, "grad_norm": 0.5372184729126631, "learning_rate": 0.0001520726761647186, "loss": 12.1546, "step": 12704 }, { "epoch": 0.6918380265871639, "grad_norm": 0.580728450841165, "learning_rate": 0.00015206514760894406, "loss": 12.3278, "step": 12705 }, { "epoch": 0.6918924805837469, "grad_norm": 0.6056883740245131, "learning_rate": 0.0001520576186483022, "loss": 12.2519, "step": 12706 }, { "epoch": 0.6919469345803299, "grad_norm": 0.58052840996791, "learning_rate": 0.00015205008928285155, "loss": 12.1894, "step": 12707 }, { "epoch": 0.6920013885769128, "grad_norm": 0.6161732779139788, "learning_rate": 0.0001520425595126507, "loss": 12.4466, "step": 12708 }, { "epoch": 0.6920558425734958, "grad_norm": 0.590558757040953, "learning_rate": 0.00015203502933775815, "loss": 12.2084, "step": 12709 }, { "epoch": 0.6921102965700789, "grad_norm": 0.6009086221721797, "learning_rate": 0.0001520274987582325, "loss": 12.2238, "step": 12710 }, { "epoch": 0.6921647505666619, "grad_norm": 0.5674633245904497, "learning_rate": 0.0001520199677741323, "loss": 12.2568, "step": 12711 }, { "epoch": 0.6922192045632449, "grad_norm": 0.5529595321026778, "learning_rate": 0.0001520124363855161, "loss": 12.2639, "step": 12712 }, { "epoch": 0.6922736585598279, "grad_norm": 0.6156867194157509, "learning_rate": 0.0001520049045924425, "loss": 12.4344, "step": 12713 }, { "epoch": 0.6923281125564109, "grad_norm": 0.6080044554590114, "learning_rate": 0.00015199737239497, "loss": 12.335, "step": 12714 }, { "epoch": 0.6923825665529939, "grad_norm": 0.5941132220538231, "learning_rate": 0.00015198983979315724, "loss": 12.3197, "step": 12715 }, { "epoch": 0.692437020549577, "grad_norm": 0.5653598233702917, "learning_rate": 0.00015198230678706276, "loss": 12.1885, "step": 12716 }, { "epoch": 0.69249147454616, "grad_norm": 0.5466950124603381, "learning_rate": 0.00015197477337674514, "loss": 12.3421, "step": 12717 }, { "epoch": 0.692545928542743, "grad_norm": 0.6162963914126968, "learning_rate": 0.000151967239562263, "loss": 12.3732, "step": 12718 }, { "epoch": 0.692600382539326, "grad_norm": 0.5681428110322244, "learning_rate": 0.00015195970534367484, "loss": 12.1208, "step": 12719 }, { "epoch": 0.692654836535909, "grad_norm": 0.5765561251357073, "learning_rate": 0.0001519521707210393, "loss": 12.2899, "step": 12720 }, { "epoch": 0.692709290532492, "grad_norm": 0.566818435101275, "learning_rate": 0.00015194463569441496, "loss": 12.3202, "step": 12721 }, { "epoch": 0.6927637445290751, "grad_norm": 0.6627642008500717, "learning_rate": 0.00015193710026386047, "loss": 12.3357, "step": 12722 }, { "epoch": 0.6928181985256581, "grad_norm": 0.6395608584600606, "learning_rate": 0.00015192956442943435, "loss": 12.2827, "step": 12723 }, { "epoch": 0.6928726525222411, "grad_norm": 0.5554920430630873, "learning_rate": 0.00015192202819119523, "loss": 12.2404, "step": 12724 }, { "epoch": 0.692927106518824, "grad_norm": 0.5982854590907775, "learning_rate": 0.00015191449154920172, "loss": 12.2451, "step": 12725 }, { "epoch": 0.692981560515407, "grad_norm": 0.6252918888190135, "learning_rate": 0.00015190695450351244, "loss": 12.2617, "step": 12726 }, { "epoch": 0.69303601451199, "grad_norm": 0.5792879664772468, "learning_rate": 0.0001518994170541859, "loss": 12.1567, "step": 12727 }, { "epoch": 0.6930904685085731, "grad_norm": 0.5816976069993248, "learning_rate": 0.00015189187920128084, "loss": 12.2965, "step": 12728 }, { "epoch": 0.6931449225051561, "grad_norm": 0.5743328304663563, "learning_rate": 0.00015188434094485584, "loss": 12.2071, "step": 12729 }, { "epoch": 0.6931993765017391, "grad_norm": 0.5497556272045456, "learning_rate": 0.00015187680228496948, "loss": 12.3136, "step": 12730 }, { "epoch": 0.6932538304983221, "grad_norm": 0.5326517766493589, "learning_rate": 0.00015186926322168043, "loss": 12.262, "step": 12731 }, { "epoch": 0.6933082844949051, "grad_norm": 0.6330746311860425, "learning_rate": 0.00015186172375504726, "loss": 12.3068, "step": 12732 }, { "epoch": 0.6933627384914881, "grad_norm": 0.6460808031320815, "learning_rate": 0.00015185418388512865, "loss": 12.3098, "step": 12733 }, { "epoch": 0.6934171924880712, "grad_norm": 0.5993161584869566, "learning_rate": 0.0001518466436119832, "loss": 12.2731, "step": 12734 }, { "epoch": 0.6934716464846542, "grad_norm": 0.665898943721501, "learning_rate": 0.00015183910293566956, "loss": 12.4917, "step": 12735 }, { "epoch": 0.6935261004812372, "grad_norm": 0.5514350662347167, "learning_rate": 0.00015183156185624635, "loss": 12.281, "step": 12736 }, { "epoch": 0.6935805544778202, "grad_norm": 0.5824643224684546, "learning_rate": 0.00015182402037377222, "loss": 12.2971, "step": 12737 }, { "epoch": 0.6936350084744032, "grad_norm": 0.5316514992963814, "learning_rate": 0.00015181647848830585, "loss": 12.2978, "step": 12738 }, { "epoch": 0.6936894624709863, "grad_norm": 0.627321558576572, "learning_rate": 0.00015180893619990581, "loss": 12.3271, "step": 12739 }, { "epoch": 0.6937439164675693, "grad_norm": 0.5816828595854907, "learning_rate": 0.00015180139350863086, "loss": 12.3728, "step": 12740 }, { "epoch": 0.6937983704641523, "grad_norm": 0.6047340103704789, "learning_rate": 0.00015179385041453952, "loss": 12.2363, "step": 12741 }, { "epoch": 0.6938528244607353, "grad_norm": 0.5710045945984428, "learning_rate": 0.00015178630691769054, "loss": 12.1712, "step": 12742 }, { "epoch": 0.6939072784573183, "grad_norm": 0.5864954337963909, "learning_rate": 0.00015177876301814255, "loss": 12.3223, "step": 12743 }, { "epoch": 0.6939617324539012, "grad_norm": 0.6000967347826478, "learning_rate": 0.0001517712187159542, "loss": 12.3434, "step": 12744 }, { "epoch": 0.6940161864504844, "grad_norm": 0.5906872874461015, "learning_rate": 0.0001517636740111842, "loss": 12.2595, "step": 12745 }, { "epoch": 0.6940706404470673, "grad_norm": 0.637024524133645, "learning_rate": 0.00015175612890389118, "loss": 12.4404, "step": 12746 }, { "epoch": 0.6941250944436503, "grad_norm": 0.6078531507332336, "learning_rate": 0.0001517485833941338, "loss": 12.2771, "step": 12747 }, { "epoch": 0.6941795484402333, "grad_norm": 0.5362085108950104, "learning_rate": 0.00015174103748197078, "loss": 12.3344, "step": 12748 }, { "epoch": 0.6942340024368163, "grad_norm": 0.591849383692123, "learning_rate": 0.0001517334911674608, "loss": 12.2298, "step": 12749 }, { "epoch": 0.6942884564333993, "grad_norm": 0.5655366449593927, "learning_rate": 0.0001517259444506625, "loss": 12.3227, "step": 12750 }, { "epoch": 0.6943429104299824, "grad_norm": 0.5807513533675587, "learning_rate": 0.00015171839733163457, "loss": 12.2478, "step": 12751 }, { "epoch": 0.6943973644265654, "grad_norm": 0.5495206828814007, "learning_rate": 0.0001517108498104357, "loss": 12.2208, "step": 12752 }, { "epoch": 0.6944518184231484, "grad_norm": 0.5486352146566552, "learning_rate": 0.00015170330188712456, "loss": 12.2291, "step": 12753 }, { "epoch": 0.6945062724197314, "grad_norm": 0.6028736531502734, "learning_rate": 0.00015169575356175992, "loss": 12.4123, "step": 12754 }, { "epoch": 0.6945607264163144, "grad_norm": 0.5180394854006246, "learning_rate": 0.00015168820483440042, "loss": 12.2678, "step": 12755 }, { "epoch": 0.6946151804128974, "grad_norm": 0.6338718124617676, "learning_rate": 0.00015168065570510478, "loss": 12.2655, "step": 12756 }, { "epoch": 0.6946696344094805, "grad_norm": 0.5826209963066219, "learning_rate": 0.0001516731061739317, "loss": 12.2701, "step": 12757 }, { "epoch": 0.6947240884060635, "grad_norm": 0.6083649233191714, "learning_rate": 0.00015166555624093986, "loss": 12.2946, "step": 12758 }, { "epoch": 0.6947785424026465, "grad_norm": 0.5929086951518965, "learning_rate": 0.000151658005906188, "loss": 12.2975, "step": 12759 }, { "epoch": 0.6948329963992295, "grad_norm": 0.6159253227796201, "learning_rate": 0.00015165045516973483, "loss": 12.4129, "step": 12760 }, { "epoch": 0.6948874503958125, "grad_norm": 0.5961923098818454, "learning_rate": 0.00015164290403163905, "loss": 12.2542, "step": 12761 }, { "epoch": 0.6949419043923954, "grad_norm": 0.6082280448345292, "learning_rate": 0.0001516353524919594, "loss": 12.3274, "step": 12762 }, { "epoch": 0.6949963583889786, "grad_norm": 0.5843565354596677, "learning_rate": 0.00015162780055075456, "loss": 12.3214, "step": 12763 }, { "epoch": 0.6950508123855615, "grad_norm": 0.5922646499860922, "learning_rate": 0.0001516202482080833, "loss": 12.2993, "step": 12764 }, { "epoch": 0.6951052663821445, "grad_norm": 0.5319499220696793, "learning_rate": 0.00015161269546400434, "loss": 12.2324, "step": 12765 }, { "epoch": 0.6951597203787275, "grad_norm": 0.7045455639918438, "learning_rate": 0.00015160514231857643, "loss": 12.2201, "step": 12766 }, { "epoch": 0.6952141743753105, "grad_norm": 0.5619467296914059, "learning_rate": 0.00015159758877185823, "loss": 12.2243, "step": 12767 }, { "epoch": 0.6952686283718935, "grad_norm": 0.5465287615022251, "learning_rate": 0.00015159003482390857, "loss": 12.3618, "step": 12768 }, { "epoch": 0.6953230823684766, "grad_norm": 0.5405474447716598, "learning_rate": 0.0001515824804747861, "loss": 12.1688, "step": 12769 }, { "epoch": 0.6953775363650596, "grad_norm": 0.617095133941277, "learning_rate": 0.00015157492572454964, "loss": 12.2569, "step": 12770 }, { "epoch": 0.6954319903616426, "grad_norm": 0.5594754357401277, "learning_rate": 0.00015156737057325792, "loss": 12.1694, "step": 12771 }, { "epoch": 0.6954864443582256, "grad_norm": 0.564470075597679, "learning_rate": 0.00015155981502096966, "loss": 12.1877, "step": 12772 }, { "epoch": 0.6955408983548086, "grad_norm": 0.5394155263359696, "learning_rate": 0.00015155225906774363, "loss": 12.3133, "step": 12773 }, { "epoch": 0.6955953523513917, "grad_norm": 0.5964767067731667, "learning_rate": 0.00015154470271363862, "loss": 12.2444, "step": 12774 }, { "epoch": 0.6956498063479747, "grad_norm": 0.5258330599265519, "learning_rate": 0.00015153714595871328, "loss": 12.3298, "step": 12775 }, { "epoch": 0.6957042603445577, "grad_norm": 0.515769601343884, "learning_rate": 0.00015152958880302654, "loss": 12.1901, "step": 12776 }, { "epoch": 0.6957587143411407, "grad_norm": 0.5625539317088408, "learning_rate": 0.00015152203124663705, "loss": 12.1423, "step": 12777 }, { "epoch": 0.6958131683377237, "grad_norm": 0.5738176088998937, "learning_rate": 0.00015151447328960358, "loss": 12.3304, "step": 12778 }, { "epoch": 0.6958676223343067, "grad_norm": 0.5337821156892026, "learning_rate": 0.00015150691493198495, "loss": 12.158, "step": 12779 }, { "epoch": 0.6959220763308898, "grad_norm": 0.5572367138584677, "learning_rate": 0.00015149935617383986, "loss": 12.3404, "step": 12780 }, { "epoch": 0.6959765303274728, "grad_norm": 0.5785620836035866, "learning_rate": 0.0001514917970152272, "loss": 12.2895, "step": 12781 }, { "epoch": 0.6960309843240557, "grad_norm": 0.5567436442972655, "learning_rate": 0.00015148423745620567, "loss": 12.2598, "step": 12782 }, { "epoch": 0.6960854383206387, "grad_norm": 0.5436685548209514, "learning_rate": 0.0001514766774968341, "loss": 12.2851, "step": 12783 }, { "epoch": 0.6961398923172217, "grad_norm": 0.5620871243842307, "learning_rate": 0.00015146911713717122, "loss": 12.2629, "step": 12784 }, { "epoch": 0.6961943463138047, "grad_norm": 0.5825963197643459, "learning_rate": 0.00015146155637727588, "loss": 12.2778, "step": 12785 }, { "epoch": 0.6962488003103878, "grad_norm": 0.580599403779096, "learning_rate": 0.0001514539952172068, "loss": 12.293, "step": 12786 }, { "epoch": 0.6963032543069708, "grad_norm": 0.5547121852362786, "learning_rate": 0.0001514464336570229, "loss": 12.1824, "step": 12787 }, { "epoch": 0.6963577083035538, "grad_norm": 0.5892606362080496, "learning_rate": 0.00015143887169678284, "loss": 12.1196, "step": 12788 }, { "epoch": 0.6964121623001368, "grad_norm": 0.6486531519357543, "learning_rate": 0.0001514313093365455, "loss": 12.3475, "step": 12789 }, { "epoch": 0.6964666162967198, "grad_norm": 0.5804390280894669, "learning_rate": 0.00015142374657636969, "loss": 12.3115, "step": 12790 }, { "epoch": 0.6965210702933028, "grad_norm": 0.6050143210622061, "learning_rate": 0.00015141618341631418, "loss": 12.3309, "step": 12791 }, { "epoch": 0.6965755242898859, "grad_norm": 0.6440660563413831, "learning_rate": 0.0001514086198564378, "loss": 12.2225, "step": 12792 }, { "epoch": 0.6966299782864689, "grad_norm": 0.6035204658067048, "learning_rate": 0.00015140105589679943, "loss": 12.3462, "step": 12793 }, { "epoch": 0.6966844322830519, "grad_norm": 0.586423227975999, "learning_rate": 0.00015139349153745778, "loss": 12.1881, "step": 12794 }, { "epoch": 0.6967388862796349, "grad_norm": 0.6195614052981723, "learning_rate": 0.00015138592677847173, "loss": 12.3054, "step": 12795 }, { "epoch": 0.6967933402762179, "grad_norm": 0.5695957080876042, "learning_rate": 0.0001513783616199001, "loss": 12.2431, "step": 12796 }, { "epoch": 0.6968477942728009, "grad_norm": 0.5401987111839878, "learning_rate": 0.00015137079606180172, "loss": 12.2292, "step": 12797 }, { "epoch": 0.696902248269384, "grad_norm": 0.644588419023635, "learning_rate": 0.0001513632301042354, "loss": 12.2898, "step": 12798 }, { "epoch": 0.696956702265967, "grad_norm": 0.558414091002706, "learning_rate": 0.00015135566374726, "loss": 12.2123, "step": 12799 }, { "epoch": 0.69701115626255, "grad_norm": 0.5782843434074975, "learning_rate": 0.00015134809699093434, "loss": 12.3783, "step": 12800 }, { "epoch": 0.6970656102591329, "grad_norm": 0.6892559074368685, "learning_rate": 0.00015134052983531725, "loss": 12.3138, "step": 12801 }, { "epoch": 0.6971200642557159, "grad_norm": 0.596134638203521, "learning_rate": 0.00015133296228046764, "loss": 12.2218, "step": 12802 }, { "epoch": 0.6971745182522989, "grad_norm": 0.6657524565185177, "learning_rate": 0.00015132539432644425, "loss": 12.2724, "step": 12803 }, { "epoch": 0.697228972248882, "grad_norm": 0.6904308970357311, "learning_rate": 0.00015131782597330602, "loss": 12.3158, "step": 12804 }, { "epoch": 0.697283426245465, "grad_norm": 0.5646015845227467, "learning_rate": 0.00015131025722111177, "loss": 12.2422, "step": 12805 }, { "epoch": 0.697337880242048, "grad_norm": 0.6003823456063323, "learning_rate": 0.00015130268806992037, "loss": 12.4381, "step": 12806 }, { "epoch": 0.697392334238631, "grad_norm": 0.6052761761039406, "learning_rate": 0.0001512951185197906, "loss": 12.4855, "step": 12807 }, { "epoch": 0.697446788235214, "grad_norm": 0.5215938576523123, "learning_rate": 0.00015128754857078141, "loss": 12.1122, "step": 12808 }, { "epoch": 0.6975012422317971, "grad_norm": 0.5757052319756734, "learning_rate": 0.00015127997822295168, "loss": 12.3324, "step": 12809 }, { "epoch": 0.6975556962283801, "grad_norm": 0.5407436135189331, "learning_rate": 0.00015127240747636023, "loss": 12.2614, "step": 12810 }, { "epoch": 0.6976101502249631, "grad_norm": 0.563649288491365, "learning_rate": 0.00015126483633106595, "loss": 12.2358, "step": 12811 }, { "epoch": 0.6976646042215461, "grad_norm": 0.576318626856709, "learning_rate": 0.00015125726478712765, "loss": 12.3351, "step": 12812 }, { "epoch": 0.6977190582181291, "grad_norm": 0.6966281930802263, "learning_rate": 0.00015124969284460428, "loss": 12.4885, "step": 12813 }, { "epoch": 0.6977735122147121, "grad_norm": 0.7809185908219524, "learning_rate": 0.00015124212050355475, "loss": 12.3636, "step": 12814 }, { "epoch": 0.6978279662112952, "grad_norm": 0.5746577283165907, "learning_rate": 0.00015123454776403786, "loss": 12.2467, "step": 12815 }, { "epoch": 0.6978824202078782, "grad_norm": 0.6024413136284706, "learning_rate": 0.00015122697462611258, "loss": 12.2904, "step": 12816 }, { "epoch": 0.6979368742044612, "grad_norm": 0.5206933345152238, "learning_rate": 0.0001512194010898377, "loss": 12.248, "step": 12817 }, { "epoch": 0.6979913282010441, "grad_norm": 0.5610679148396018, "learning_rate": 0.00015121182715527217, "loss": 12.3231, "step": 12818 }, { "epoch": 0.6980457821976271, "grad_norm": 0.5704520611048067, "learning_rate": 0.0001512042528224749, "loss": 12.2603, "step": 12819 }, { "epoch": 0.6981002361942101, "grad_norm": 0.5341114754262499, "learning_rate": 0.00015119667809150475, "loss": 12.2551, "step": 12820 }, { "epoch": 0.6981546901907932, "grad_norm": 0.5651258746143369, "learning_rate": 0.0001511891029624207, "loss": 12.3242, "step": 12821 }, { "epoch": 0.6982091441873762, "grad_norm": 0.6127180680960584, "learning_rate": 0.00015118152743528158, "loss": 12.2176, "step": 12822 }, { "epoch": 0.6982635981839592, "grad_norm": 0.6178526829964687, "learning_rate": 0.0001511739515101463, "loss": 12.3267, "step": 12823 }, { "epoch": 0.6983180521805422, "grad_norm": 0.5485086221160265, "learning_rate": 0.00015116637518707376, "loss": 12.2266, "step": 12824 }, { "epoch": 0.6983725061771252, "grad_norm": 0.5591931811739885, "learning_rate": 0.00015115879846612295, "loss": 12.1893, "step": 12825 }, { "epoch": 0.6984269601737082, "grad_norm": 0.5504356367908919, "learning_rate": 0.00015115122134735278, "loss": 12.1897, "step": 12826 }, { "epoch": 0.6984814141702913, "grad_norm": 0.5791906704907291, "learning_rate": 0.00015114364383082208, "loss": 12.3134, "step": 12827 }, { "epoch": 0.6985358681668743, "grad_norm": 0.5927965701697596, "learning_rate": 0.00015113606591658985, "loss": 12.2676, "step": 12828 }, { "epoch": 0.6985903221634573, "grad_norm": 0.6514547782037758, "learning_rate": 0.00015112848760471498, "loss": 12.2984, "step": 12829 }, { "epoch": 0.6986447761600403, "grad_norm": 0.5625665172333867, "learning_rate": 0.0001511209088952564, "loss": 12.2379, "step": 12830 }, { "epoch": 0.6986992301566233, "grad_norm": 0.6080219521627901, "learning_rate": 0.0001511133297882731, "loss": 12.3662, "step": 12831 }, { "epoch": 0.6987536841532063, "grad_norm": 0.6208368036638605, "learning_rate": 0.00015110575028382396, "loss": 12.2077, "step": 12832 }, { "epoch": 0.6988081381497894, "grad_norm": 0.5981735476751217, "learning_rate": 0.00015109817038196796, "loss": 12.3023, "step": 12833 }, { "epoch": 0.6988625921463724, "grad_norm": 0.6878859063713462, "learning_rate": 0.00015109059008276397, "loss": 12.3612, "step": 12834 }, { "epoch": 0.6989170461429554, "grad_norm": 0.6536308103657945, "learning_rate": 0.000151083009386271, "loss": 12.3678, "step": 12835 }, { "epoch": 0.6989715001395383, "grad_norm": 0.5334163204272254, "learning_rate": 0.00015107542829254802, "loss": 12.2648, "step": 12836 }, { "epoch": 0.6990259541361213, "grad_norm": 0.5811757267663836, "learning_rate": 0.0001510678468016539, "loss": 12.2406, "step": 12837 }, { "epoch": 0.6990804081327044, "grad_norm": 0.5791601339932474, "learning_rate": 0.00015106026491364767, "loss": 12.173, "step": 12838 }, { "epoch": 0.6991348621292874, "grad_norm": 0.5577669427986419, "learning_rate": 0.00015105268262858825, "loss": 12.3816, "step": 12839 }, { "epoch": 0.6991893161258704, "grad_norm": 0.5523126670538845, "learning_rate": 0.00015104509994653457, "loss": 12.2392, "step": 12840 }, { "epoch": 0.6992437701224534, "grad_norm": 0.5755533090316269, "learning_rate": 0.00015103751686754567, "loss": 12.2055, "step": 12841 }, { "epoch": 0.6992982241190364, "grad_norm": 0.5715341861488942, "learning_rate": 0.00015102993339168047, "loss": 12.2667, "step": 12842 }, { "epoch": 0.6993526781156194, "grad_norm": 0.6337431022952587, "learning_rate": 0.00015102234951899795, "loss": 12.2897, "step": 12843 }, { "epoch": 0.6994071321122025, "grad_norm": 0.5545934302937681, "learning_rate": 0.0001510147652495571, "loss": 12.208, "step": 12844 }, { "epoch": 0.6994615861087855, "grad_norm": 0.5818395953361071, "learning_rate": 0.00015100718058341686, "loss": 12.3168, "step": 12845 }, { "epoch": 0.6995160401053685, "grad_norm": 0.54488203576456, "learning_rate": 0.00015099959552063622, "loss": 12.2805, "step": 12846 }, { "epoch": 0.6995704941019515, "grad_norm": 0.5819073442016384, "learning_rate": 0.00015099201006127418, "loss": 12.2357, "step": 12847 }, { "epoch": 0.6996249480985345, "grad_norm": 0.5980085895394841, "learning_rate": 0.00015098442420538973, "loss": 12.1713, "step": 12848 }, { "epoch": 0.6996794020951175, "grad_norm": 0.5649860888099074, "learning_rate": 0.00015097683795304185, "loss": 12.2638, "step": 12849 }, { "epoch": 0.6997338560917006, "grad_norm": 0.5692873645192698, "learning_rate": 0.0001509692513042895, "loss": 12.2009, "step": 12850 }, { "epoch": 0.6997883100882836, "grad_norm": 0.5254540513369161, "learning_rate": 0.00015096166425919175, "loss": 12.3878, "step": 12851 }, { "epoch": 0.6998427640848666, "grad_norm": 0.559223478872943, "learning_rate": 0.00015095407681780753, "loss": 12.1803, "step": 12852 }, { "epoch": 0.6998972180814496, "grad_norm": 0.5561422820246377, "learning_rate": 0.00015094648898019588, "loss": 12.2901, "step": 12853 }, { "epoch": 0.6999516720780325, "grad_norm": 0.6398204891444215, "learning_rate": 0.00015093890074641575, "loss": 12.2311, "step": 12854 }, { "epoch": 0.7000061260746155, "grad_norm": 0.5730771643651186, "learning_rate": 0.0001509313121165262, "loss": 12.2579, "step": 12855 }, { "epoch": 0.7000605800711986, "grad_norm": 0.5339535864535195, "learning_rate": 0.00015092372309058623, "loss": 12.1686, "step": 12856 }, { "epoch": 0.7001150340677816, "grad_norm": 0.5994247273085027, "learning_rate": 0.00015091613366865486, "loss": 12.2944, "step": 12857 }, { "epoch": 0.7001694880643646, "grad_norm": 0.5083786193557129, "learning_rate": 0.0001509085438507911, "loss": 12.1098, "step": 12858 }, { "epoch": 0.7002239420609476, "grad_norm": 0.5813561783614933, "learning_rate": 0.00015090095363705394, "loss": 12.3114, "step": 12859 }, { "epoch": 0.7002783960575306, "grad_norm": 0.5256842061518319, "learning_rate": 0.00015089336302750246, "loss": 12.2911, "step": 12860 }, { "epoch": 0.7003328500541136, "grad_norm": 0.5854745903233185, "learning_rate": 0.00015088577202219562, "loss": 12.3942, "step": 12861 }, { "epoch": 0.7003873040506967, "grad_norm": 0.6232949445893607, "learning_rate": 0.0001508781806211925, "loss": 12.3221, "step": 12862 }, { "epoch": 0.7004417580472797, "grad_norm": 0.556903770839926, "learning_rate": 0.00015087058882455213, "loss": 12.3262, "step": 12863 }, { "epoch": 0.7004962120438627, "grad_norm": 0.5511222088378669, "learning_rate": 0.0001508629966323335, "loss": 12.2111, "step": 12864 }, { "epoch": 0.7005506660404457, "grad_norm": 0.6933096411771393, "learning_rate": 0.0001508554040445957, "loss": 12.3443, "step": 12865 }, { "epoch": 0.7006051200370287, "grad_norm": 0.6450763766755955, "learning_rate": 0.00015084781106139777, "loss": 12.2932, "step": 12866 }, { "epoch": 0.7006595740336117, "grad_norm": 0.6180854258717581, "learning_rate": 0.00015084021768279867, "loss": 12.326, "step": 12867 }, { "epoch": 0.7007140280301948, "grad_norm": 0.5652729378781475, "learning_rate": 0.00015083262390885754, "loss": 12.1985, "step": 12868 }, { "epoch": 0.7007684820267778, "grad_norm": 0.5832845128774566, "learning_rate": 0.0001508250297396334, "loss": 12.3935, "step": 12869 }, { "epoch": 0.7008229360233608, "grad_norm": 0.6256855422987257, "learning_rate": 0.00015081743517518533, "loss": 12.3105, "step": 12870 }, { "epoch": 0.7008773900199438, "grad_norm": 0.6514861781739107, "learning_rate": 0.0001508098402155724, "loss": 12.2294, "step": 12871 }, { "epoch": 0.7009318440165268, "grad_norm": 0.5976054048957782, "learning_rate": 0.00015080224486085355, "loss": 12.2549, "step": 12872 }, { "epoch": 0.7009862980131099, "grad_norm": 0.6126191865281446, "learning_rate": 0.00015079464911108793, "loss": 12.2187, "step": 12873 }, { "epoch": 0.7010407520096928, "grad_norm": 0.5903689854623854, "learning_rate": 0.00015078705296633462, "loss": 12.2371, "step": 12874 }, { "epoch": 0.7010952060062758, "grad_norm": 0.5812216652704836, "learning_rate": 0.00015077945642665269, "loss": 12.369, "step": 12875 }, { "epoch": 0.7011496600028588, "grad_norm": 0.5534852216522496, "learning_rate": 0.00015077185949210117, "loss": 12.2853, "step": 12876 }, { "epoch": 0.7012041139994418, "grad_norm": 0.6274739036507253, "learning_rate": 0.00015076426216273914, "loss": 12.1898, "step": 12877 }, { "epoch": 0.7012585679960248, "grad_norm": 0.6063997065462186, "learning_rate": 0.00015075666443862572, "loss": 12.3014, "step": 12878 }, { "epoch": 0.7013130219926079, "grad_norm": 0.5217788404855932, "learning_rate": 0.00015074906631981999, "loss": 12.2941, "step": 12879 }, { "epoch": 0.7013674759891909, "grad_norm": 0.5552649160050687, "learning_rate": 0.00015074146780638095, "loss": 12.1025, "step": 12880 }, { "epoch": 0.7014219299857739, "grad_norm": 0.6392569035584885, "learning_rate": 0.00015073386889836782, "loss": 12.1831, "step": 12881 }, { "epoch": 0.7014763839823569, "grad_norm": 0.5521041501633279, "learning_rate": 0.00015072626959583956, "loss": 12.2143, "step": 12882 }, { "epoch": 0.7015308379789399, "grad_norm": 0.6227440240483435, "learning_rate": 0.00015071866989885532, "loss": 12.3998, "step": 12883 }, { "epoch": 0.7015852919755229, "grad_norm": 0.6334247030673338, "learning_rate": 0.00015071106980747421, "loss": 12.3131, "step": 12884 }, { "epoch": 0.701639745972106, "grad_norm": 0.5563039830909307, "learning_rate": 0.0001507034693217553, "loss": 12.2404, "step": 12885 }, { "epoch": 0.701694199968689, "grad_norm": 0.5740373990473525, "learning_rate": 0.00015069586844175775, "loss": 12.1678, "step": 12886 }, { "epoch": 0.701748653965272, "grad_norm": 0.5615508506413522, "learning_rate": 0.00015068826716754062, "loss": 12.2148, "step": 12887 }, { "epoch": 0.701803107961855, "grad_norm": 0.5881964519645851, "learning_rate": 0.000150680665499163, "loss": 12.3873, "step": 12888 }, { "epoch": 0.701857561958438, "grad_norm": 0.6559325462620302, "learning_rate": 0.000150673063436684, "loss": 12.2265, "step": 12889 }, { "epoch": 0.701912015955021, "grad_norm": 0.5351451244591657, "learning_rate": 0.0001506654609801628, "loss": 12.2559, "step": 12890 }, { "epoch": 0.701966469951604, "grad_norm": 0.5569533211512768, "learning_rate": 0.00015065785812965848, "loss": 12.0566, "step": 12891 }, { "epoch": 0.702020923948187, "grad_norm": 0.5937804356031744, "learning_rate": 0.00015065025488523017, "loss": 12.2113, "step": 12892 }, { "epoch": 0.70207537794477, "grad_norm": 0.5659010125123903, "learning_rate": 0.00015064265124693698, "loss": 12.278, "step": 12893 }, { "epoch": 0.702129831941353, "grad_norm": 0.535901319384464, "learning_rate": 0.00015063504721483803, "loss": 12.3676, "step": 12894 }, { "epoch": 0.702184285937936, "grad_norm": 0.5669487525365963, "learning_rate": 0.00015062744278899242, "loss": 12.3111, "step": 12895 }, { "epoch": 0.702238739934519, "grad_norm": 0.6140266146940161, "learning_rate": 0.0001506198379694594, "loss": 12.276, "step": 12896 }, { "epoch": 0.7022931939311021, "grad_norm": 0.5226019933935814, "learning_rate": 0.000150612232756298, "loss": 12.2468, "step": 12897 }, { "epoch": 0.7023476479276851, "grad_norm": 0.5518765132762485, "learning_rate": 0.00015060462714956737, "loss": 12.3558, "step": 12898 }, { "epoch": 0.7024021019242681, "grad_norm": 0.5930158592363907, "learning_rate": 0.0001505970211493267, "loss": 12.2942, "step": 12899 }, { "epoch": 0.7024565559208511, "grad_norm": 0.5990262226635165, "learning_rate": 0.00015058941475563507, "loss": 12.4069, "step": 12900 }, { "epoch": 0.7025110099174341, "grad_norm": 0.6295009447765174, "learning_rate": 0.0001505818079685517, "loss": 12.2482, "step": 12901 }, { "epoch": 0.7025654639140171, "grad_norm": 0.5713781944258155, "learning_rate": 0.0001505742007881357, "loss": 12.272, "step": 12902 }, { "epoch": 0.7026199179106002, "grad_norm": 0.5775952506978804, "learning_rate": 0.00015056659321444627, "loss": 12.198, "step": 12903 }, { "epoch": 0.7026743719071832, "grad_norm": 0.5989314705696425, "learning_rate": 0.00015055898524754245, "loss": 12.3093, "step": 12904 }, { "epoch": 0.7027288259037662, "grad_norm": 0.5691834182800973, "learning_rate": 0.00015055137688748356, "loss": 12.2635, "step": 12905 }, { "epoch": 0.7027832799003492, "grad_norm": 0.5813875692402253, "learning_rate": 0.00015054376813432866, "loss": 12.2248, "step": 12906 }, { "epoch": 0.7028377338969322, "grad_norm": 0.5870360426395933, "learning_rate": 0.00015053615898813694, "loss": 12.2217, "step": 12907 }, { "epoch": 0.7028921878935153, "grad_norm": 0.5343732930374057, "learning_rate": 0.00015052854944896756, "loss": 12.3343, "step": 12908 }, { "epoch": 0.7029466418900983, "grad_norm": 0.5699443719681166, "learning_rate": 0.0001505209395168797, "loss": 12.223, "step": 12909 }, { "epoch": 0.7030010958866812, "grad_norm": 0.5973307979328459, "learning_rate": 0.00015051332919193257, "loss": 12.2756, "step": 12910 }, { "epoch": 0.7030555498832642, "grad_norm": 0.6120749129557116, "learning_rate": 0.0001505057184741853, "loss": 12.3103, "step": 12911 }, { "epoch": 0.7031100038798472, "grad_norm": 0.6953633712044719, "learning_rate": 0.0001504981073636971, "loss": 12.3662, "step": 12912 }, { "epoch": 0.7031644578764302, "grad_norm": 0.5138596022468547, "learning_rate": 0.00015049049586052718, "loss": 12.253, "step": 12913 }, { "epoch": 0.7032189118730133, "grad_norm": 0.6047293103686023, "learning_rate": 0.00015048288396473468, "loss": 12.4362, "step": 12914 }, { "epoch": 0.7032733658695963, "grad_norm": 0.5717537391124353, "learning_rate": 0.00015047527167637878, "loss": 12.1667, "step": 12915 }, { "epoch": 0.7033278198661793, "grad_norm": 0.5407187604839495, "learning_rate": 0.0001504676589955187, "loss": 12.1886, "step": 12916 }, { "epoch": 0.7033822738627623, "grad_norm": 0.6358353512639061, "learning_rate": 0.00015046004592221367, "loss": 12.2309, "step": 12917 }, { "epoch": 0.7034367278593453, "grad_norm": 0.5882551571751676, "learning_rate": 0.00015045243245652285, "loss": 12.3412, "step": 12918 }, { "epoch": 0.7034911818559283, "grad_norm": 0.5664833841054566, "learning_rate": 0.00015044481859850545, "loss": 12.3031, "step": 12919 }, { "epoch": 0.7035456358525114, "grad_norm": 0.6640648902820085, "learning_rate": 0.00015043720434822067, "loss": 12.1166, "step": 12920 }, { "epoch": 0.7036000898490944, "grad_norm": 0.5332762140500776, "learning_rate": 0.0001504295897057278, "loss": 12.213, "step": 12921 }, { "epoch": 0.7036545438456774, "grad_norm": 0.6033862431121884, "learning_rate": 0.0001504219746710859, "loss": 12.1348, "step": 12922 }, { "epoch": 0.7037089978422604, "grad_norm": 0.6008975227578273, "learning_rate": 0.0001504143592443543, "loss": 12.2651, "step": 12923 }, { "epoch": 0.7037634518388434, "grad_norm": 0.5713175980307648, "learning_rate": 0.00015040674342559217, "loss": 12.1195, "step": 12924 }, { "epoch": 0.7038179058354264, "grad_norm": 0.6870336688388138, "learning_rate": 0.00015039912721485877, "loss": 12.2627, "step": 12925 }, { "epoch": 0.7038723598320095, "grad_norm": 0.550509285896728, "learning_rate": 0.0001503915106122133, "loss": 12.2699, "step": 12926 }, { "epoch": 0.7039268138285925, "grad_norm": 0.5183542937646016, "learning_rate": 0.000150383893617715, "loss": 12.1956, "step": 12927 }, { "epoch": 0.7039812678251754, "grad_norm": 0.505524050927356, "learning_rate": 0.0001503762762314231, "loss": 12.2436, "step": 12928 }, { "epoch": 0.7040357218217584, "grad_norm": 0.5528750624664851, "learning_rate": 0.0001503686584533968, "loss": 12.2574, "step": 12929 }, { "epoch": 0.7040901758183414, "grad_norm": 0.6847928417111844, "learning_rate": 0.00015036104028369538, "loss": 12.2566, "step": 12930 }, { "epoch": 0.7041446298149244, "grad_norm": 0.5811133448734164, "learning_rate": 0.00015035342172237806, "loss": 12.193, "step": 12931 }, { "epoch": 0.7041990838115075, "grad_norm": 0.5146003924536093, "learning_rate": 0.0001503458027695041, "loss": 12.2794, "step": 12932 }, { "epoch": 0.7042535378080905, "grad_norm": 0.6257737359358769, "learning_rate": 0.00015033818342513272, "loss": 12.2411, "step": 12933 }, { "epoch": 0.7043079918046735, "grad_norm": 0.5742448206754013, "learning_rate": 0.00015033056368932322, "loss": 12.3275, "step": 12934 }, { "epoch": 0.7043624458012565, "grad_norm": 0.5797591318656884, "learning_rate": 0.00015032294356213477, "loss": 12.1942, "step": 12935 }, { "epoch": 0.7044168997978395, "grad_norm": 0.7451421687573521, "learning_rate": 0.0001503153230436267, "loss": 12.226, "step": 12936 }, { "epoch": 0.7044713537944225, "grad_norm": 0.539841668324749, "learning_rate": 0.00015030770213385824, "loss": 12.272, "step": 12937 }, { "epoch": 0.7045258077910056, "grad_norm": 0.5935585281767789, "learning_rate": 0.00015030008083288864, "loss": 12.3097, "step": 12938 }, { "epoch": 0.7045802617875886, "grad_norm": 0.5511736584624088, "learning_rate": 0.0001502924591407772, "loss": 12.2108, "step": 12939 }, { "epoch": 0.7046347157841716, "grad_norm": 0.5534412404344856, "learning_rate": 0.00015028483705758312, "loss": 12.2039, "step": 12940 }, { "epoch": 0.7046891697807546, "grad_norm": 0.6563150821077004, "learning_rate": 0.00015027721458336574, "loss": 12.2303, "step": 12941 }, { "epoch": 0.7047436237773376, "grad_norm": 0.6366833265609215, "learning_rate": 0.00015026959171818432, "loss": 12.4365, "step": 12942 }, { "epoch": 0.7047980777739207, "grad_norm": 0.6358540136371424, "learning_rate": 0.0001502619684620981, "loss": 12.3262, "step": 12943 }, { "epoch": 0.7048525317705037, "grad_norm": 0.6537474371778512, "learning_rate": 0.00015025434481516638, "loss": 12.3111, "step": 12944 }, { "epoch": 0.7049069857670867, "grad_norm": 0.5743493906745762, "learning_rate": 0.00015024672077744845, "loss": 12.2707, "step": 12945 }, { "epoch": 0.7049614397636697, "grad_norm": 0.5473693388988641, "learning_rate": 0.00015023909634900363, "loss": 12.2449, "step": 12946 }, { "epoch": 0.7050158937602526, "grad_norm": 0.6103975608154453, "learning_rate": 0.00015023147152989113, "loss": 12.3103, "step": 12947 }, { "epoch": 0.7050703477568356, "grad_norm": 0.5814312456830363, "learning_rate": 0.00015022384632017033, "loss": 12.2969, "step": 12948 }, { "epoch": 0.7051248017534187, "grad_norm": 0.5968487505951352, "learning_rate": 0.00015021622071990045, "loss": 12.2258, "step": 12949 }, { "epoch": 0.7051792557500017, "grad_norm": 0.5681675401207187, "learning_rate": 0.00015020859472914078, "loss": 12.083, "step": 12950 }, { "epoch": 0.7052337097465847, "grad_norm": 0.5611788321327241, "learning_rate": 0.0001502009683479507, "loss": 12.2544, "step": 12951 }, { "epoch": 0.7052881637431677, "grad_norm": 0.6028495282539408, "learning_rate": 0.00015019334157638948, "loss": 12.0151, "step": 12952 }, { "epoch": 0.7053426177397507, "grad_norm": 0.6158821652859549, "learning_rate": 0.00015018571441451642, "loss": 12.3645, "step": 12953 }, { "epoch": 0.7053970717363337, "grad_norm": 0.5943618216782155, "learning_rate": 0.00015017808686239079, "loss": 12.2499, "step": 12954 }, { "epoch": 0.7054515257329168, "grad_norm": 0.5629309510246335, "learning_rate": 0.00015017045892007195, "loss": 12.3606, "step": 12955 }, { "epoch": 0.7055059797294998, "grad_norm": 0.6314402368385341, "learning_rate": 0.0001501628305876192, "loss": 12.0944, "step": 12956 }, { "epoch": 0.7055604337260828, "grad_norm": 0.6365115644400484, "learning_rate": 0.00015015520186509193, "loss": 12.3064, "step": 12957 }, { "epoch": 0.7056148877226658, "grad_norm": 0.5965986296655351, "learning_rate": 0.00015014757275254932, "loss": 12.1364, "step": 12958 }, { "epoch": 0.7056693417192488, "grad_norm": 0.5948973063017181, "learning_rate": 0.00015013994325005085, "loss": 12.3672, "step": 12959 }, { "epoch": 0.7057237957158318, "grad_norm": 0.5884444949376633, "learning_rate": 0.00015013231335765572, "loss": 12.2143, "step": 12960 }, { "epoch": 0.7057782497124149, "grad_norm": 0.6133597645062747, "learning_rate": 0.0001501246830754233, "loss": 12.3034, "step": 12961 }, { "epoch": 0.7058327037089979, "grad_norm": 0.5898220414273352, "learning_rate": 0.000150117052403413, "loss": 12.2611, "step": 12962 }, { "epoch": 0.7058871577055809, "grad_norm": 0.5961760638203288, "learning_rate": 0.00015010942134168403, "loss": 12.1966, "step": 12963 }, { "epoch": 0.7059416117021639, "grad_norm": 0.5601188165322548, "learning_rate": 0.00015010178989029584, "loss": 12.3401, "step": 12964 }, { "epoch": 0.7059960656987468, "grad_norm": 0.6761852903853308, "learning_rate": 0.00015009415804930772, "loss": 12.328, "step": 12965 }, { "epoch": 0.7060505196953298, "grad_norm": 0.8251909509115992, "learning_rate": 0.000150086525818779, "loss": 12.3578, "step": 12966 }, { "epoch": 0.7061049736919129, "grad_norm": 0.5408091436796602, "learning_rate": 0.00015007889319876912, "loss": 12.2561, "step": 12967 }, { "epoch": 0.7061594276884959, "grad_norm": 0.5507950813247795, "learning_rate": 0.00015007126018933733, "loss": 12.1793, "step": 12968 }, { "epoch": 0.7062138816850789, "grad_norm": 0.5232231954991038, "learning_rate": 0.000150063626790543, "loss": 12.2478, "step": 12969 }, { "epoch": 0.7062683356816619, "grad_norm": 0.5860145048316517, "learning_rate": 0.00015005599300244556, "loss": 12.3502, "step": 12970 }, { "epoch": 0.7063227896782449, "grad_norm": 0.5740070028883291, "learning_rate": 0.0001500483588251043, "loss": 12.201, "step": 12971 }, { "epoch": 0.706377243674828, "grad_norm": 0.5277369607087119, "learning_rate": 0.0001500407242585786, "loss": 12.2771, "step": 12972 }, { "epoch": 0.706431697671411, "grad_norm": 0.6978047578361452, "learning_rate": 0.00015003308930292784, "loss": 12.2717, "step": 12973 }, { "epoch": 0.706486151667994, "grad_norm": 0.5385232740976329, "learning_rate": 0.00015002545395821138, "loss": 12.1565, "step": 12974 }, { "epoch": 0.706540605664577, "grad_norm": 0.5768496412226605, "learning_rate": 0.0001500178182244886, "loss": 12.2259, "step": 12975 }, { "epoch": 0.70659505966116, "grad_norm": 0.6189081619794492, "learning_rate": 0.0001500101821018189, "loss": 12.1882, "step": 12976 }, { "epoch": 0.706649513657743, "grad_norm": 0.630691109976242, "learning_rate": 0.00015000254559026163, "loss": 12.1659, "step": 12977 }, { "epoch": 0.7067039676543261, "grad_norm": 0.5674145907848038, "learning_rate": 0.00014999490868987617, "loss": 12.2894, "step": 12978 }, { "epoch": 0.7067584216509091, "grad_norm": 0.5967081606199218, "learning_rate": 0.00014998727140072194, "loss": 12.3154, "step": 12979 }, { "epoch": 0.7068128756474921, "grad_norm": 0.5814432648578499, "learning_rate": 0.00014997963372285827, "loss": 12.2798, "step": 12980 }, { "epoch": 0.7068673296440751, "grad_norm": 0.7079190267375342, "learning_rate": 0.0001499719956563446, "loss": 12.2989, "step": 12981 }, { "epoch": 0.706921783640658, "grad_norm": 0.5976573686600499, "learning_rate": 0.0001499643572012403, "loss": 12.3619, "step": 12982 }, { "epoch": 0.706976237637241, "grad_norm": 0.5945798656008229, "learning_rate": 0.00014995671835760482, "loss": 12.2903, "step": 12983 }, { "epoch": 0.7070306916338241, "grad_norm": 0.7134044262037385, "learning_rate": 0.0001499490791254975, "loss": 12.315, "step": 12984 }, { "epoch": 0.7070851456304071, "grad_norm": 0.7298247499418934, "learning_rate": 0.00014994143950497775, "loss": 12.2258, "step": 12985 }, { "epoch": 0.7071395996269901, "grad_norm": 0.5753519644815357, "learning_rate": 0.000149933799496105, "loss": 12.2707, "step": 12986 }, { "epoch": 0.7071940536235731, "grad_norm": 0.5618107193737906, "learning_rate": 0.00014992615909893865, "loss": 12.2128, "step": 12987 }, { "epoch": 0.7072485076201561, "grad_norm": 0.6589114753084558, "learning_rate": 0.0001499185183135381, "loss": 12.3593, "step": 12988 }, { "epoch": 0.7073029616167391, "grad_norm": 0.6024408905521136, "learning_rate": 0.0001499108771399628, "loss": 12.1083, "step": 12989 }, { "epoch": 0.7073574156133222, "grad_norm": 0.5998225459431465, "learning_rate": 0.00014990323557827214, "loss": 12.2451, "step": 12990 }, { "epoch": 0.7074118696099052, "grad_norm": 0.6596737008732433, "learning_rate": 0.00014989559362852555, "loss": 12.352, "step": 12991 }, { "epoch": 0.7074663236064882, "grad_norm": 0.5640430440547669, "learning_rate": 0.0001498879512907825, "loss": 12.2266, "step": 12992 }, { "epoch": 0.7075207776030712, "grad_norm": 0.58905805846539, "learning_rate": 0.0001498803085651023, "loss": 12.2615, "step": 12993 }, { "epoch": 0.7075752315996542, "grad_norm": 0.5459263943702819, "learning_rate": 0.0001498726654515445, "loss": 12.2659, "step": 12994 }, { "epoch": 0.7076296855962372, "grad_norm": 0.64321390044219, "learning_rate": 0.0001498650219501685, "loss": 12.3126, "step": 12995 }, { "epoch": 0.7076841395928203, "grad_norm": 0.6119570222522704, "learning_rate": 0.00014985737806103372, "loss": 12.3096, "step": 12996 }, { "epoch": 0.7077385935894033, "grad_norm": 0.594510800282877, "learning_rate": 0.0001498497337841996, "loss": 12.3027, "step": 12997 }, { "epoch": 0.7077930475859863, "grad_norm": 0.6117166303249918, "learning_rate": 0.0001498420891197256, "loss": 12.0491, "step": 12998 }, { "epoch": 0.7078475015825693, "grad_norm": 0.5886463053915209, "learning_rate": 0.00014983444406767112, "loss": 12.3004, "step": 12999 }, { "epoch": 0.7079019555791523, "grad_norm": 0.5637629847214728, "learning_rate": 0.00014982679862809566, "loss": 12.2073, "step": 13000 }, { "epoch": 0.7079564095757352, "grad_norm": 0.8458027872746456, "learning_rate": 0.00014981915280105868, "loss": 12.3988, "step": 13001 }, { "epoch": 0.7080108635723183, "grad_norm": 0.6073435712822226, "learning_rate": 0.0001498115065866196, "loss": 12.2968, "step": 13002 }, { "epoch": 0.7080653175689013, "grad_norm": 0.574713441835352, "learning_rate": 0.0001498038599848379, "loss": 12.3245, "step": 13003 }, { "epoch": 0.7081197715654843, "grad_norm": 0.5738326787390379, "learning_rate": 0.000149796212995773, "loss": 12.0705, "step": 13004 }, { "epoch": 0.7081742255620673, "grad_norm": 0.6681791939973973, "learning_rate": 0.00014978856561948442, "loss": 12.3879, "step": 13005 }, { "epoch": 0.7082286795586503, "grad_norm": 0.5903997054499869, "learning_rate": 0.00014978091785603162, "loss": 12.3157, "step": 13006 }, { "epoch": 0.7082831335552334, "grad_norm": 0.6755493009094661, "learning_rate": 0.00014977326970547406, "loss": 12.3751, "step": 13007 }, { "epoch": 0.7083375875518164, "grad_norm": 0.6520915349052323, "learning_rate": 0.00014976562116787115, "loss": 12.2351, "step": 13008 }, { "epoch": 0.7083920415483994, "grad_norm": 0.6333760318312311, "learning_rate": 0.00014975797224328247, "loss": 12.1828, "step": 13009 }, { "epoch": 0.7084464955449824, "grad_norm": 0.6331195754530802, "learning_rate": 0.00014975032293176744, "loss": 12.3569, "step": 13010 }, { "epoch": 0.7085009495415654, "grad_norm": 0.6345875524219766, "learning_rate": 0.00014974267323338556, "loss": 12.3998, "step": 13011 }, { "epoch": 0.7085554035381484, "grad_norm": 0.580569784650918, "learning_rate": 0.00014973502314819635, "loss": 12.2017, "step": 13012 }, { "epoch": 0.7086098575347315, "grad_norm": 0.5794457087183509, "learning_rate": 0.0001497273726762592, "loss": 12.2481, "step": 13013 }, { "epoch": 0.7086643115313145, "grad_norm": 0.566031642380644, "learning_rate": 0.0001497197218176337, "loss": 12.17, "step": 13014 }, { "epoch": 0.7087187655278975, "grad_norm": 0.5701972777924958, "learning_rate": 0.00014971207057237927, "loss": 12.2852, "step": 13015 }, { "epoch": 0.7087732195244805, "grad_norm": 0.6256922142544816, "learning_rate": 0.00014970441894055544, "loss": 12.3115, "step": 13016 }, { "epoch": 0.7088276735210635, "grad_norm": 0.6354118579123414, "learning_rate": 0.00014969676692222174, "loss": 12.3488, "step": 13017 }, { "epoch": 0.7088821275176465, "grad_norm": 0.6177833774317564, "learning_rate": 0.00014968911451743766, "loss": 12.3751, "step": 13018 }, { "epoch": 0.7089365815142296, "grad_norm": 0.6051671506499325, "learning_rate": 0.00014968146172626264, "loss": 12.1438, "step": 13019 }, { "epoch": 0.7089910355108126, "grad_norm": 0.5834906437141846, "learning_rate": 0.0001496738085487563, "loss": 12.2348, "step": 13020 }, { "epoch": 0.7090454895073955, "grad_norm": 0.579844554082125, "learning_rate": 0.00014966615498497804, "loss": 12.297, "step": 13021 }, { "epoch": 0.7090999435039785, "grad_norm": 0.5442590529551725, "learning_rate": 0.00014965850103498747, "loss": 12.2521, "step": 13022 }, { "epoch": 0.7091543975005615, "grad_norm": 0.5210616076593362, "learning_rate": 0.00014965084669884407, "loss": 11.9946, "step": 13023 }, { "epoch": 0.7092088514971445, "grad_norm": 0.6417333566925977, "learning_rate": 0.00014964319197660736, "loss": 12.4075, "step": 13024 }, { "epoch": 0.7092633054937276, "grad_norm": 0.5371747489792881, "learning_rate": 0.00014963553686833683, "loss": 12.1904, "step": 13025 }, { "epoch": 0.7093177594903106, "grad_norm": 0.5548915066555968, "learning_rate": 0.00014962788137409206, "loss": 12.1782, "step": 13026 }, { "epoch": 0.7093722134868936, "grad_norm": 0.5334034909808053, "learning_rate": 0.00014962022549393255, "loss": 12.1223, "step": 13027 }, { "epoch": 0.7094266674834766, "grad_norm": 0.5738456004784469, "learning_rate": 0.00014961256922791787, "loss": 12.1724, "step": 13028 }, { "epoch": 0.7094811214800596, "grad_norm": 0.6147623973023848, "learning_rate": 0.00014960491257610752, "loss": 12.3882, "step": 13029 }, { "epoch": 0.7095355754766426, "grad_norm": 0.5406251727724115, "learning_rate": 0.00014959725553856105, "loss": 12.1801, "step": 13030 }, { "epoch": 0.7095900294732257, "grad_norm": 0.5821260739638942, "learning_rate": 0.000149589598115338, "loss": 12.2504, "step": 13031 }, { "epoch": 0.7096444834698087, "grad_norm": 0.555126981255046, "learning_rate": 0.00014958194030649792, "loss": 12.1343, "step": 13032 }, { "epoch": 0.7096989374663917, "grad_norm": 0.5697013295420689, "learning_rate": 0.00014957428211210036, "loss": 12.3033, "step": 13033 }, { "epoch": 0.7097533914629747, "grad_norm": 0.5492315961312766, "learning_rate": 0.00014956662353220488, "loss": 12.3455, "step": 13034 }, { "epoch": 0.7098078454595577, "grad_norm": 0.5918307712403531, "learning_rate": 0.00014955896456687103, "loss": 12.2083, "step": 13035 }, { "epoch": 0.7098622994561407, "grad_norm": 0.6530977518570804, "learning_rate": 0.00014955130521615833, "loss": 12.2589, "step": 13036 }, { "epoch": 0.7099167534527238, "grad_norm": 0.6120186751995188, "learning_rate": 0.00014954364548012638, "loss": 12.2314, "step": 13037 }, { "epoch": 0.7099712074493068, "grad_norm": 0.5867757951906781, "learning_rate": 0.00014953598535883474, "loss": 12.3824, "step": 13038 }, { "epoch": 0.7100256614458897, "grad_norm": 0.6451329228108325, "learning_rate": 0.00014952832485234295, "loss": 12.3503, "step": 13039 }, { "epoch": 0.7100801154424727, "grad_norm": 0.6861941361354333, "learning_rate": 0.00014952066396071062, "loss": 12.3653, "step": 13040 }, { "epoch": 0.7101345694390557, "grad_norm": 0.6513061150663569, "learning_rate": 0.0001495130026839973, "loss": 12.2643, "step": 13041 }, { "epoch": 0.7101890234356388, "grad_norm": 0.6295887315128024, "learning_rate": 0.0001495053410222626, "loss": 12.1674, "step": 13042 }, { "epoch": 0.7102434774322218, "grad_norm": 0.6510454621330697, "learning_rate": 0.00014949767897556602, "loss": 12.2018, "step": 13043 }, { "epoch": 0.7102979314288048, "grad_norm": 0.6875565394105327, "learning_rate": 0.00014949001654396719, "loss": 12.3256, "step": 13044 }, { "epoch": 0.7103523854253878, "grad_norm": 0.5398634943151209, "learning_rate": 0.0001494823537275257, "loss": 12.2262, "step": 13045 }, { "epoch": 0.7104068394219708, "grad_norm": 0.543330019868829, "learning_rate": 0.00014947469052630115, "loss": 12.2737, "step": 13046 }, { "epoch": 0.7104612934185538, "grad_norm": 0.6116969575632312, "learning_rate": 0.0001494670269403531, "loss": 12.2623, "step": 13047 }, { "epoch": 0.7105157474151369, "grad_norm": 0.577409531377027, "learning_rate": 0.00014945936296974113, "loss": 12.3066, "step": 13048 }, { "epoch": 0.7105702014117199, "grad_norm": 0.6337211061727935, "learning_rate": 0.0001494516986145249, "loss": 12.2312, "step": 13049 }, { "epoch": 0.7106246554083029, "grad_norm": 0.6188852759508668, "learning_rate": 0.00014944403387476393, "loss": 12.1674, "step": 13050 }, { "epoch": 0.7106791094048859, "grad_norm": 0.6080463199956461, "learning_rate": 0.00014943636875051788, "loss": 12.2713, "step": 13051 }, { "epoch": 0.7107335634014689, "grad_norm": 0.5719060777051059, "learning_rate": 0.00014942870324184633, "loss": 12.248, "step": 13052 }, { "epoch": 0.7107880173980519, "grad_norm": 0.5589157114552834, "learning_rate": 0.0001494210373488089, "loss": 12.2863, "step": 13053 }, { "epoch": 0.710842471394635, "grad_norm": 0.6891559546184134, "learning_rate": 0.00014941337107146518, "loss": 12.2425, "step": 13054 }, { "epoch": 0.710896925391218, "grad_norm": 0.5982489360119315, "learning_rate": 0.0001494057044098748, "loss": 12.2747, "step": 13055 }, { "epoch": 0.710951379387801, "grad_norm": 0.6009505560195769, "learning_rate": 0.00014939803736409738, "loss": 12.2842, "step": 13056 }, { "epoch": 0.711005833384384, "grad_norm": 0.6199360893918832, "learning_rate": 0.00014939036993419255, "loss": 12.2094, "step": 13057 }, { "epoch": 0.7110602873809669, "grad_norm": 0.6178935479060033, "learning_rate": 0.00014938270212021994, "loss": 12.323, "step": 13058 }, { "epoch": 0.7111147413775499, "grad_norm": 0.6180419467020031, "learning_rate": 0.0001493750339222391, "loss": 12.4192, "step": 13059 }, { "epoch": 0.711169195374133, "grad_norm": 0.61773533913744, "learning_rate": 0.00014936736534030978, "loss": 12.3702, "step": 13060 }, { "epoch": 0.711223649370716, "grad_norm": 0.6224002643765801, "learning_rate": 0.0001493596963744915, "loss": 12.3371, "step": 13061 }, { "epoch": 0.711278103367299, "grad_norm": 0.6259537112445535, "learning_rate": 0.00014935202702484395, "loss": 12.3679, "step": 13062 }, { "epoch": 0.711332557363882, "grad_norm": 0.5328991389879344, "learning_rate": 0.00014934435729142682, "loss": 12.269, "step": 13063 }, { "epoch": 0.711387011360465, "grad_norm": 0.5898400658269457, "learning_rate": 0.0001493366871742996, "loss": 12.4181, "step": 13064 }, { "epoch": 0.711441465357048, "grad_norm": 0.5628544206358188, "learning_rate": 0.0001493290166735221, "loss": 12.3083, "step": 13065 }, { "epoch": 0.7114959193536311, "grad_norm": 0.6712292466408655, "learning_rate": 0.00014932134578915385, "loss": 12.2852, "step": 13066 }, { "epoch": 0.7115503733502141, "grad_norm": 0.5481225982918251, "learning_rate": 0.00014931367452125456, "loss": 12.0959, "step": 13067 }, { "epoch": 0.7116048273467971, "grad_norm": 0.6030042765006167, "learning_rate": 0.00014930600286988387, "loss": 12.2808, "step": 13068 }, { "epoch": 0.7116592813433801, "grad_norm": 0.6391940261618729, "learning_rate": 0.00014929833083510144, "loss": 12.4102, "step": 13069 }, { "epoch": 0.7117137353399631, "grad_norm": 0.5354168607988496, "learning_rate": 0.00014929065841696686, "loss": 12.2758, "step": 13070 }, { "epoch": 0.7117681893365461, "grad_norm": 0.596410067524563, "learning_rate": 0.00014928298561553996, "loss": 12.3488, "step": 13071 }, { "epoch": 0.7118226433331292, "grad_norm": 0.5435545899850704, "learning_rate": 0.00014927531243088022, "loss": 12.0993, "step": 13072 }, { "epoch": 0.7118770973297122, "grad_norm": 0.5464907779246453, "learning_rate": 0.00014926763886304744, "loss": 12.2565, "step": 13073 }, { "epoch": 0.7119315513262952, "grad_norm": 0.5507232020598494, "learning_rate": 0.0001492599649121012, "loss": 12.2084, "step": 13074 }, { "epoch": 0.7119860053228781, "grad_norm": 0.576496083537236, "learning_rate": 0.0001492522905781012, "loss": 12.4546, "step": 13075 }, { "epoch": 0.7120404593194611, "grad_norm": 0.5174273315209944, "learning_rate": 0.00014924461586110716, "loss": 12.2471, "step": 13076 }, { "epoch": 0.7120949133160442, "grad_norm": 0.5563814598625784, "learning_rate": 0.00014923694076117872, "loss": 12.1744, "step": 13077 }, { "epoch": 0.7121493673126272, "grad_norm": 0.5938209262601601, "learning_rate": 0.00014922926527837556, "loss": 12.4136, "step": 13078 }, { "epoch": 0.7122038213092102, "grad_norm": 0.6440486117840959, "learning_rate": 0.00014922158941275742, "loss": 12.2879, "step": 13079 }, { "epoch": 0.7122582753057932, "grad_norm": 0.5424085110763885, "learning_rate": 0.0001492139131643839, "loss": 12.2847, "step": 13080 }, { "epoch": 0.7123127293023762, "grad_norm": 0.6288430281756898, "learning_rate": 0.00014920623653331473, "loss": 12.2262, "step": 13081 }, { "epoch": 0.7123671832989592, "grad_norm": 0.55112138539526, "learning_rate": 0.00014919855951960964, "loss": 12.193, "step": 13082 }, { "epoch": 0.7124216372955423, "grad_norm": 0.6103232989314326, "learning_rate": 0.00014919088212332832, "loss": 12.3006, "step": 13083 }, { "epoch": 0.7124760912921253, "grad_norm": 0.585852252955573, "learning_rate": 0.00014918320434453044, "loss": 12.2011, "step": 13084 }, { "epoch": 0.7125305452887083, "grad_norm": 0.6023320204235458, "learning_rate": 0.00014917552618327567, "loss": 12.183, "step": 13085 }, { "epoch": 0.7125849992852913, "grad_norm": 0.637684131987905, "learning_rate": 0.00014916784763962382, "loss": 12.1952, "step": 13086 }, { "epoch": 0.7126394532818743, "grad_norm": 0.5635832480007723, "learning_rate": 0.0001491601687136345, "loss": 12.2105, "step": 13087 }, { "epoch": 0.7126939072784573, "grad_norm": 0.5277875554433332, "learning_rate": 0.00014915248940536747, "loss": 12.2859, "step": 13088 }, { "epoch": 0.7127483612750404, "grad_norm": 0.7686941264622156, "learning_rate": 0.00014914480971488247, "loss": 12.3042, "step": 13089 }, { "epoch": 0.7128028152716234, "grad_norm": 0.5777273873310947, "learning_rate": 0.00014913712964223917, "loss": 12.3386, "step": 13090 }, { "epoch": 0.7128572692682064, "grad_norm": 0.5662398880449121, "learning_rate": 0.0001491294491874973, "loss": 12.0751, "step": 13091 }, { "epoch": 0.7129117232647894, "grad_norm": 0.5694475424596657, "learning_rate": 0.00014912176835071657, "loss": 12.2462, "step": 13092 }, { "epoch": 0.7129661772613723, "grad_norm": 0.6194296392446067, "learning_rate": 0.00014911408713195678, "loss": 12.3051, "step": 13093 }, { "epoch": 0.7130206312579553, "grad_norm": 0.5929804883936958, "learning_rate": 0.0001491064055312776, "loss": 11.9636, "step": 13094 }, { "epoch": 0.7130750852545384, "grad_norm": 0.577121793325591, "learning_rate": 0.00014909872354873876, "loss": 12.1692, "step": 13095 }, { "epoch": 0.7131295392511214, "grad_norm": 0.5399081423465084, "learning_rate": 0.00014909104118440003, "loss": 12.1337, "step": 13096 }, { "epoch": 0.7131839932477044, "grad_norm": 0.6129261066771551, "learning_rate": 0.00014908335843832112, "loss": 12.2635, "step": 13097 }, { "epoch": 0.7132384472442874, "grad_norm": 0.5557186330224296, "learning_rate": 0.0001490756753105618, "loss": 12.152, "step": 13098 }, { "epoch": 0.7132929012408704, "grad_norm": 0.6522389101871415, "learning_rate": 0.00014906799180118178, "loss": 12.222, "step": 13099 }, { "epoch": 0.7133473552374534, "grad_norm": 0.6201291721778343, "learning_rate": 0.00014906030791024083, "loss": 12.4092, "step": 13100 }, { "epoch": 0.7134018092340365, "grad_norm": 0.5769656708175186, "learning_rate": 0.0001490526236377987, "loss": 12.1597, "step": 13101 }, { "epoch": 0.7134562632306195, "grad_norm": 0.6034623352492604, "learning_rate": 0.00014904493898391515, "loss": 12.2657, "step": 13102 }, { "epoch": 0.7135107172272025, "grad_norm": 0.7048369446631927, "learning_rate": 0.00014903725394864993, "loss": 12.3768, "step": 13103 }, { "epoch": 0.7135651712237855, "grad_norm": 0.5917872807045255, "learning_rate": 0.00014902956853206275, "loss": 12.1509, "step": 13104 }, { "epoch": 0.7136196252203685, "grad_norm": 0.6228402490002504, "learning_rate": 0.0001490218827342135, "loss": 12.1816, "step": 13105 }, { "epoch": 0.7136740792169516, "grad_norm": 0.6237825639120929, "learning_rate": 0.00014901419655516182, "loss": 12.2323, "step": 13106 }, { "epoch": 0.7137285332135346, "grad_norm": 0.6437731143746762, "learning_rate": 0.00014900650999496754, "loss": 12.1785, "step": 13107 }, { "epoch": 0.7137829872101176, "grad_norm": 0.5712009786525403, "learning_rate": 0.00014899882305369043, "loss": 12.2221, "step": 13108 }, { "epoch": 0.7138374412067006, "grad_norm": 0.5935683808044501, "learning_rate": 0.00014899113573139022, "loss": 12.2265, "step": 13109 }, { "epoch": 0.7138918952032836, "grad_norm": 0.6016139400333811, "learning_rate": 0.00014898344802812677, "loss": 12.2309, "step": 13110 }, { "epoch": 0.7139463491998665, "grad_norm": 0.5515894574044553, "learning_rate": 0.0001489757599439598, "loss": 12.2961, "step": 13111 }, { "epoch": 0.7140008031964497, "grad_norm": 0.6157520966673266, "learning_rate": 0.0001489680714789491, "loss": 12.3123, "step": 13112 }, { "epoch": 0.7140552571930326, "grad_norm": 0.6139323540701279, "learning_rate": 0.00014896038263315445, "loss": 12.1918, "step": 13113 }, { "epoch": 0.7141097111896156, "grad_norm": 0.6524316381419333, "learning_rate": 0.00014895269340663568, "loss": 12.3856, "step": 13114 }, { "epoch": 0.7141641651861986, "grad_norm": 0.564740400972851, "learning_rate": 0.00014894500379945252, "loss": 12.401, "step": 13115 }, { "epoch": 0.7142186191827816, "grad_norm": 0.621305851648461, "learning_rate": 0.00014893731381166486, "loss": 12.2224, "step": 13116 }, { "epoch": 0.7142730731793646, "grad_norm": 0.6056844793845901, "learning_rate": 0.0001489296234433324, "loss": 12.2252, "step": 13117 }, { "epoch": 0.7143275271759477, "grad_norm": 0.565175529177596, "learning_rate": 0.000148921932694515, "loss": 12.2674, "step": 13118 }, { "epoch": 0.7143819811725307, "grad_norm": 0.6050541903974955, "learning_rate": 0.00014891424156527241, "loss": 12.3528, "step": 13119 }, { "epoch": 0.7144364351691137, "grad_norm": 0.5742183932203648, "learning_rate": 0.0001489065500556645, "loss": 12.2321, "step": 13120 }, { "epoch": 0.7144908891656967, "grad_norm": 0.6274447021592812, "learning_rate": 0.00014889885816575106, "loss": 12.2845, "step": 13121 }, { "epoch": 0.7145453431622797, "grad_norm": 0.6539372288535807, "learning_rate": 0.00014889116589559192, "loss": 12.2235, "step": 13122 }, { "epoch": 0.7145997971588627, "grad_norm": 0.5398132389690338, "learning_rate": 0.00014888347324524685, "loss": 12.1618, "step": 13123 }, { "epoch": 0.7146542511554458, "grad_norm": 0.6012845841839606, "learning_rate": 0.0001488757802147757, "loss": 12.4002, "step": 13124 }, { "epoch": 0.7147087051520288, "grad_norm": 0.6481016820248655, "learning_rate": 0.00014886808680423825, "loss": 12.3224, "step": 13125 }, { "epoch": 0.7147631591486118, "grad_norm": 0.5473743337050713, "learning_rate": 0.0001488603930136944, "loss": 12.2754, "step": 13126 }, { "epoch": 0.7148176131451948, "grad_norm": 0.5585932175414414, "learning_rate": 0.00014885269884320394, "loss": 12.3176, "step": 13127 }, { "epoch": 0.7148720671417778, "grad_norm": 0.6098644156874881, "learning_rate": 0.00014884500429282672, "loss": 12.3161, "step": 13128 }, { "epoch": 0.7149265211383607, "grad_norm": 0.5900891649967697, "learning_rate": 0.0001488373093626225, "loss": 12.3236, "step": 13129 }, { "epoch": 0.7149809751349439, "grad_norm": 0.52526226559206, "learning_rate": 0.0001488296140526512, "loss": 12.2481, "step": 13130 }, { "epoch": 0.7150354291315268, "grad_norm": 0.6211944112719395, "learning_rate": 0.00014882191836297263, "loss": 12.355, "step": 13131 }, { "epoch": 0.7150898831281098, "grad_norm": 0.6088171060766341, "learning_rate": 0.00014881422229364667, "loss": 12.2572, "step": 13132 }, { "epoch": 0.7151443371246928, "grad_norm": 0.5867369564056631, "learning_rate": 0.0001488065258447331, "loss": 12.366, "step": 13133 }, { "epoch": 0.7151987911212758, "grad_norm": 0.6565966460145465, "learning_rate": 0.00014879882901629182, "loss": 12.0463, "step": 13134 }, { "epoch": 0.7152532451178588, "grad_norm": 0.8062366477423585, "learning_rate": 0.00014879113180838263, "loss": 12.2164, "step": 13135 }, { "epoch": 0.7153076991144419, "grad_norm": 0.8015097363835164, "learning_rate": 0.0001487834342210654, "loss": 12.4014, "step": 13136 }, { "epoch": 0.7153621531110249, "grad_norm": 0.5663030303378246, "learning_rate": 0.00014877573625440006, "loss": 12.3439, "step": 13137 }, { "epoch": 0.7154166071076079, "grad_norm": 0.6414869394241983, "learning_rate": 0.0001487680379084464, "loss": 12.3671, "step": 13138 }, { "epoch": 0.7154710611041909, "grad_norm": 0.599459862838817, "learning_rate": 0.0001487603391832643, "loss": 12.2809, "step": 13139 }, { "epoch": 0.7155255151007739, "grad_norm": 0.6084045336182192, "learning_rate": 0.0001487526400789136, "loss": 12.366, "step": 13140 }, { "epoch": 0.715579969097357, "grad_norm": 0.5717786606274167, "learning_rate": 0.0001487449405954542, "loss": 12.1181, "step": 13141 }, { "epoch": 0.71563442309394, "grad_norm": 0.5944221913914374, "learning_rate": 0.00014873724073294597, "loss": 12.3322, "step": 13142 }, { "epoch": 0.715688877090523, "grad_norm": 0.6191905531252168, "learning_rate": 0.0001487295404914488, "loss": 12.1768, "step": 13143 }, { "epoch": 0.715743331087106, "grad_norm": 0.5948946545639103, "learning_rate": 0.00014872183987102254, "loss": 12.138, "step": 13144 }, { "epoch": 0.715797785083689, "grad_norm": 0.5729785875130822, "learning_rate": 0.0001487141388717271, "loss": 12.2828, "step": 13145 }, { "epoch": 0.715852239080272, "grad_norm": 0.5673965410686297, "learning_rate": 0.00014870643749362233, "loss": 12.33, "step": 13146 }, { "epoch": 0.7159066930768551, "grad_norm": 0.5663436986398205, "learning_rate": 0.00014869873573676812, "loss": 12.3317, "step": 13147 }, { "epoch": 0.715961147073438, "grad_norm": 0.5832471979785514, "learning_rate": 0.00014869103360122437, "loss": 12.2599, "step": 13148 }, { "epoch": 0.716015601070021, "grad_norm": 0.6042795355125038, "learning_rate": 0.00014868333108705102, "loss": 12.3361, "step": 13149 }, { "epoch": 0.716070055066604, "grad_norm": 0.5654725103208902, "learning_rate": 0.0001486756281943079, "loss": 12.1197, "step": 13150 }, { "epoch": 0.716124509063187, "grad_norm": 0.5867311364311525, "learning_rate": 0.00014866792492305493, "loss": 12.2859, "step": 13151 }, { "epoch": 0.71617896305977, "grad_norm": 0.6016682905288424, "learning_rate": 0.00014866022127335202, "loss": 12.2406, "step": 13152 }, { "epoch": 0.7162334170563531, "grad_norm": 0.6115550769668805, "learning_rate": 0.00014865251724525906, "loss": 12.2205, "step": 13153 }, { "epoch": 0.7162878710529361, "grad_norm": 0.5630660440481695, "learning_rate": 0.00014864481283883598, "loss": 12.1391, "step": 13154 }, { "epoch": 0.7163423250495191, "grad_norm": 0.5638294256497566, "learning_rate": 0.00014863710805414267, "loss": 12.3224, "step": 13155 }, { "epoch": 0.7163967790461021, "grad_norm": 0.560701108490784, "learning_rate": 0.00014862940289123904, "loss": 12.1735, "step": 13156 }, { "epoch": 0.7164512330426851, "grad_norm": 0.541192260738824, "learning_rate": 0.00014862169735018504, "loss": 12.1591, "step": 13157 }, { "epoch": 0.7165056870392681, "grad_norm": 0.6230640920391687, "learning_rate": 0.00014861399143104053, "loss": 12.2217, "step": 13158 }, { "epoch": 0.7165601410358512, "grad_norm": 0.5660894693307399, "learning_rate": 0.0001486062851338655, "loss": 12.2232, "step": 13159 }, { "epoch": 0.7166145950324342, "grad_norm": 0.5447717408784877, "learning_rate": 0.00014859857845871984, "loss": 12.3504, "step": 13160 }, { "epoch": 0.7166690490290172, "grad_norm": 0.5989972551874758, "learning_rate": 0.00014859087140566351, "loss": 12.3824, "step": 13161 }, { "epoch": 0.7167235030256002, "grad_norm": 0.6462447002724582, "learning_rate": 0.0001485831639747564, "loss": 12.3014, "step": 13162 }, { "epoch": 0.7167779570221832, "grad_norm": 0.5806368639453152, "learning_rate": 0.00014857545616605842, "loss": 12.2635, "step": 13163 }, { "epoch": 0.7168324110187662, "grad_norm": 0.5597040487622332, "learning_rate": 0.00014856774797962957, "loss": 12.1284, "step": 13164 }, { "epoch": 0.7168868650153493, "grad_norm": 0.6358123005292217, "learning_rate": 0.0001485600394155298, "loss": 12.3169, "step": 13165 }, { "epoch": 0.7169413190119323, "grad_norm": 0.6256827556331831, "learning_rate": 0.00014855233047381897, "loss": 12.2201, "step": 13166 }, { "epoch": 0.7169957730085152, "grad_norm": 0.6116325432530314, "learning_rate": 0.00014854462115455712, "loss": 12.3764, "step": 13167 }, { "epoch": 0.7170502270050982, "grad_norm": 0.5973785607260411, "learning_rate": 0.00014853691145780412, "loss": 12.3064, "step": 13168 }, { "epoch": 0.7171046810016812, "grad_norm": 0.6229178221685887, "learning_rate": 0.00014852920138361996, "loss": 12.2634, "step": 13169 }, { "epoch": 0.7171591349982642, "grad_norm": 0.5018641979222833, "learning_rate": 0.00014852149093206462, "loss": 12.227, "step": 13170 }, { "epoch": 0.7172135889948473, "grad_norm": 0.6245439842759595, "learning_rate": 0.00014851378010319801, "loss": 12.1027, "step": 13171 }, { "epoch": 0.7172680429914303, "grad_norm": 0.5944257970056329, "learning_rate": 0.00014850606889708012, "loss": 12.3463, "step": 13172 }, { "epoch": 0.7173224969880133, "grad_norm": 0.5401040438062852, "learning_rate": 0.00014849835731377088, "loss": 12.2422, "step": 13173 }, { "epoch": 0.7173769509845963, "grad_norm": 0.5836619995670721, "learning_rate": 0.0001484906453533303, "loss": 12.2505, "step": 13174 }, { "epoch": 0.7174314049811793, "grad_norm": 0.6015006881115633, "learning_rate": 0.00014848293301581835, "loss": 12.195, "step": 13175 }, { "epoch": 0.7174858589777624, "grad_norm": 0.6136890602982735, "learning_rate": 0.00014847522030129494, "loss": 12.3676, "step": 13176 }, { "epoch": 0.7175403129743454, "grad_norm": 0.622763211404351, "learning_rate": 0.0001484675072098201, "loss": 12.3623, "step": 13177 }, { "epoch": 0.7175947669709284, "grad_norm": 0.5902021660932876, "learning_rate": 0.00014845979374145384, "loss": 12.3369, "step": 13178 }, { "epoch": 0.7176492209675114, "grad_norm": 0.6462075351516074, "learning_rate": 0.00014845207989625604, "loss": 12.3864, "step": 13179 }, { "epoch": 0.7177036749640944, "grad_norm": 0.5608681041428895, "learning_rate": 0.00014844436567428674, "loss": 12.0696, "step": 13180 }, { "epoch": 0.7177581289606774, "grad_norm": 0.6024320792955538, "learning_rate": 0.00014843665107560597, "loss": 12.419, "step": 13181 }, { "epoch": 0.7178125829572605, "grad_norm": 0.5527658114202388, "learning_rate": 0.00014842893610027367, "loss": 12.3359, "step": 13182 }, { "epoch": 0.7178670369538435, "grad_norm": 0.585011551195655, "learning_rate": 0.00014842122074834984, "loss": 12.2481, "step": 13183 }, { "epoch": 0.7179214909504265, "grad_norm": 0.6151587175440556, "learning_rate": 0.00014841350501989446, "loss": 12.2827, "step": 13184 }, { "epoch": 0.7179759449470094, "grad_norm": 0.5769188019599946, "learning_rate": 0.00014840578891496753, "loss": 12.2817, "step": 13185 }, { "epoch": 0.7180303989435924, "grad_norm": 0.621828495887503, "learning_rate": 0.00014839807243362908, "loss": 12.2329, "step": 13186 }, { "epoch": 0.7180848529401754, "grad_norm": 0.5963198844843429, "learning_rate": 0.00014839035557593912, "loss": 12.3039, "step": 13187 }, { "epoch": 0.7181393069367585, "grad_norm": 0.5708228204132709, "learning_rate": 0.00014838263834195765, "loss": 12.3559, "step": 13188 }, { "epoch": 0.7181937609333415, "grad_norm": 0.5727996839170717, "learning_rate": 0.00014837492073174467, "loss": 12.2922, "step": 13189 }, { "epoch": 0.7182482149299245, "grad_norm": 0.6060897473951321, "learning_rate": 0.00014836720274536016, "loss": 12.4252, "step": 13190 }, { "epoch": 0.7183026689265075, "grad_norm": 0.5326123536533812, "learning_rate": 0.00014835948438286418, "loss": 12.1688, "step": 13191 }, { "epoch": 0.7183571229230905, "grad_norm": 0.5509354815979086, "learning_rate": 0.00014835176564431674, "loss": 12.256, "step": 13192 }, { "epoch": 0.7184115769196735, "grad_norm": 0.6110671982352849, "learning_rate": 0.0001483440465297779, "loss": 12.2245, "step": 13193 }, { "epoch": 0.7184660309162566, "grad_norm": 0.5516487381645298, "learning_rate": 0.0001483363270393076, "loss": 12.2766, "step": 13194 }, { "epoch": 0.7185204849128396, "grad_norm": 0.5437664696658203, "learning_rate": 0.00014832860717296594, "loss": 12.2439, "step": 13195 }, { "epoch": 0.7185749389094226, "grad_norm": 0.5607613825596112, "learning_rate": 0.0001483208869308129, "loss": 12.4613, "step": 13196 }, { "epoch": 0.7186293929060056, "grad_norm": 0.5516449388920995, "learning_rate": 0.00014831316631290856, "loss": 12.2504, "step": 13197 }, { "epoch": 0.7186838469025886, "grad_norm": 0.589519706303882, "learning_rate": 0.00014830544531931292, "loss": 12.3524, "step": 13198 }, { "epoch": 0.7187383008991716, "grad_norm": 0.5644158485740307, "learning_rate": 0.00014829772395008606, "loss": 12.2768, "step": 13199 }, { "epoch": 0.7187927548957547, "grad_norm": 0.565257166737133, "learning_rate": 0.00014829000220528799, "loss": 12.3203, "step": 13200 }, { "epoch": 0.7188472088923377, "grad_norm": 0.5562113769820356, "learning_rate": 0.00014828228008497877, "loss": 12.2482, "step": 13201 }, { "epoch": 0.7189016628889207, "grad_norm": 0.5630262812387138, "learning_rate": 0.00014827455758921842, "loss": 12.2811, "step": 13202 }, { "epoch": 0.7189561168855036, "grad_norm": 0.5694366223079674, "learning_rate": 0.00014826683471806703, "loss": 12.2332, "step": 13203 }, { "epoch": 0.7190105708820866, "grad_norm": 0.6049096069216633, "learning_rate": 0.00014825911147158465, "loss": 12.2179, "step": 13204 }, { "epoch": 0.7190650248786696, "grad_norm": 0.5414694234678251, "learning_rate": 0.0001482513878498313, "loss": 12.2502, "step": 13205 }, { "epoch": 0.7191194788752527, "grad_norm": 0.5530293618529364, "learning_rate": 0.0001482436638528671, "loss": 12.1771, "step": 13206 }, { "epoch": 0.7191739328718357, "grad_norm": 0.605115901589326, "learning_rate": 0.00014823593948075202, "loss": 12.3434, "step": 13207 }, { "epoch": 0.7192283868684187, "grad_norm": 0.5936913368093815, "learning_rate": 0.00014822821473354624, "loss": 12.2302, "step": 13208 }, { "epoch": 0.7192828408650017, "grad_norm": 0.5730714795630938, "learning_rate": 0.0001482204896113098, "loss": 12.2835, "step": 13209 }, { "epoch": 0.7193372948615847, "grad_norm": 0.5703306686028046, "learning_rate": 0.0001482127641141027, "loss": 12.2479, "step": 13210 }, { "epoch": 0.7193917488581678, "grad_norm": 0.574174984669217, "learning_rate": 0.00014820503824198507, "loss": 12.2809, "step": 13211 }, { "epoch": 0.7194462028547508, "grad_norm": 0.5336652865060002, "learning_rate": 0.00014819731199501696, "loss": 12.2294, "step": 13212 }, { "epoch": 0.7195006568513338, "grad_norm": 0.556654346812789, "learning_rate": 0.00014818958537325848, "loss": 12.1503, "step": 13213 }, { "epoch": 0.7195551108479168, "grad_norm": 0.5763679539817065, "learning_rate": 0.00014818185837676972, "loss": 12.2594, "step": 13214 }, { "epoch": 0.7196095648444998, "grad_norm": 0.5844597639610618, "learning_rate": 0.00014817413100561076, "loss": 12.2629, "step": 13215 }, { "epoch": 0.7196640188410828, "grad_norm": 0.5921512871316501, "learning_rate": 0.00014816640325984165, "loss": 12.2631, "step": 13216 }, { "epoch": 0.7197184728376659, "grad_norm": 0.5898121302300365, "learning_rate": 0.0001481586751395225, "loss": 12.35, "step": 13217 }, { "epoch": 0.7197729268342489, "grad_norm": 0.5358922462860523, "learning_rate": 0.00014815094664471343, "loss": 12.2799, "step": 13218 }, { "epoch": 0.7198273808308319, "grad_norm": 0.6353500122755664, "learning_rate": 0.00014814321777547454, "loss": 12.3604, "step": 13219 }, { "epoch": 0.7198818348274149, "grad_norm": 0.6809386098176096, "learning_rate": 0.0001481354885318659, "loss": 12.1429, "step": 13220 }, { "epoch": 0.7199362888239978, "grad_norm": 0.5314956806716192, "learning_rate": 0.00014812775891394762, "loss": 12.1481, "step": 13221 }, { "epoch": 0.7199907428205808, "grad_norm": 0.5688952356004566, "learning_rate": 0.00014812002892177983, "loss": 12.2585, "step": 13222 }, { "epoch": 0.720045196817164, "grad_norm": 0.5839960551532886, "learning_rate": 0.00014811229855542265, "loss": 12.2537, "step": 13223 }, { "epoch": 0.7200996508137469, "grad_norm": 0.626351737694393, "learning_rate": 0.0001481045678149361, "loss": 12.2205, "step": 13224 }, { "epoch": 0.7201541048103299, "grad_norm": 0.6407856048354745, "learning_rate": 0.0001480968367003804, "loss": 12.356, "step": 13225 }, { "epoch": 0.7202085588069129, "grad_norm": 0.6536082447989517, "learning_rate": 0.00014808910521181564, "loss": 12.3344, "step": 13226 }, { "epoch": 0.7202630128034959, "grad_norm": 0.6131272889251808, "learning_rate": 0.00014808137334930193, "loss": 12.2439, "step": 13227 }, { "epoch": 0.7203174668000789, "grad_norm": 0.6580668437691843, "learning_rate": 0.0001480736411128994, "loss": 12.2149, "step": 13228 }, { "epoch": 0.720371920796662, "grad_norm": 0.5741574367765172, "learning_rate": 0.00014806590850266818, "loss": 12.2251, "step": 13229 }, { "epoch": 0.720426374793245, "grad_norm": 0.5474613052768861, "learning_rate": 0.00014805817551866838, "loss": 12.3027, "step": 13230 }, { "epoch": 0.720480828789828, "grad_norm": 0.6422235053459793, "learning_rate": 0.00014805044216096016, "loss": 12.3378, "step": 13231 }, { "epoch": 0.720535282786411, "grad_norm": 0.597895121175297, "learning_rate": 0.00014804270842960364, "loss": 12.3361, "step": 13232 }, { "epoch": 0.720589736782994, "grad_norm": 0.6785533887594158, "learning_rate": 0.00014803497432465897, "loss": 12.379, "step": 13233 }, { "epoch": 0.720644190779577, "grad_norm": 0.6109394549575866, "learning_rate": 0.00014802723984618624, "loss": 12.3102, "step": 13234 }, { "epoch": 0.7206986447761601, "grad_norm": 0.6062002142767499, "learning_rate": 0.00014801950499424571, "loss": 12.3901, "step": 13235 }, { "epoch": 0.7207530987727431, "grad_norm": 0.6146312263798974, "learning_rate": 0.00014801176976889742, "loss": 12.27, "step": 13236 }, { "epoch": 0.7208075527693261, "grad_norm": 0.5439968721697996, "learning_rate": 0.00014800403417020155, "loss": 12.2037, "step": 13237 }, { "epoch": 0.7208620067659091, "grad_norm": 0.5652952590775139, "learning_rate": 0.00014799629819821824, "loss": 12.2625, "step": 13238 }, { "epoch": 0.720916460762492, "grad_norm": 0.5736293661004036, "learning_rate": 0.00014798856185300772, "loss": 12.3362, "step": 13239 }, { "epoch": 0.7209709147590752, "grad_norm": 0.6988017105721162, "learning_rate": 0.00014798082513463006, "loss": 12.2804, "step": 13240 }, { "epoch": 0.7210253687556581, "grad_norm": 0.5507195086396749, "learning_rate": 0.00014797308804314545, "loss": 12.2721, "step": 13241 }, { "epoch": 0.7210798227522411, "grad_norm": 0.5989828390022323, "learning_rate": 0.00014796535057861408, "loss": 12.2255, "step": 13242 }, { "epoch": 0.7211342767488241, "grad_norm": 0.5783072500642104, "learning_rate": 0.0001479576127410961, "loss": 12.3047, "step": 13243 }, { "epoch": 0.7211887307454071, "grad_norm": 0.5611434128093166, "learning_rate": 0.0001479498745306517, "loss": 12.1689, "step": 13244 }, { "epoch": 0.7212431847419901, "grad_norm": 0.5748784280245217, "learning_rate": 0.00014794213594734098, "loss": 12.2031, "step": 13245 }, { "epoch": 0.7212976387385732, "grad_norm": 0.5399655039572351, "learning_rate": 0.00014793439699122422, "loss": 12.2459, "step": 13246 }, { "epoch": 0.7213520927351562, "grad_norm": 0.6441317220669062, "learning_rate": 0.00014792665766236155, "loss": 12.3354, "step": 13247 }, { "epoch": 0.7214065467317392, "grad_norm": 0.5339434294189568, "learning_rate": 0.00014791891796081313, "loss": 12.2029, "step": 13248 }, { "epoch": 0.7214610007283222, "grad_norm": 0.5497203591337948, "learning_rate": 0.00014791117788663918, "loss": 12.28, "step": 13249 }, { "epoch": 0.7215154547249052, "grad_norm": 0.5884085579228641, "learning_rate": 0.00014790343743989988, "loss": 12.4046, "step": 13250 }, { "epoch": 0.7215699087214882, "grad_norm": 0.5542525975959941, "learning_rate": 0.00014789569662065539, "loss": 12.239, "step": 13251 }, { "epoch": 0.7216243627180713, "grad_norm": 0.5225190570253677, "learning_rate": 0.00014788795542896595, "loss": 12.2479, "step": 13252 }, { "epoch": 0.7216788167146543, "grad_norm": 0.5464471759603579, "learning_rate": 0.00014788021386489173, "loss": 12.2732, "step": 13253 }, { "epoch": 0.7217332707112373, "grad_norm": 0.6026126866474447, "learning_rate": 0.00014787247192849296, "loss": 12.338, "step": 13254 }, { "epoch": 0.7217877247078203, "grad_norm": 0.5680906729215941, "learning_rate": 0.0001478647296198298, "loss": 12.2654, "step": 13255 }, { "epoch": 0.7218421787044033, "grad_norm": 0.5279643367138486, "learning_rate": 0.00014785698693896247, "loss": 12.281, "step": 13256 }, { "epoch": 0.7218966327009863, "grad_norm": 0.5780639737499039, "learning_rate": 0.00014784924388595118, "loss": 12.1744, "step": 13257 }, { "epoch": 0.7219510866975694, "grad_norm": 0.6157191538415512, "learning_rate": 0.00014784150046085617, "loss": 12.2092, "step": 13258 }, { "epoch": 0.7220055406941523, "grad_norm": 0.594849991840615, "learning_rate": 0.00014783375666373765, "loss": 12.3156, "step": 13259 }, { "epoch": 0.7220599946907353, "grad_norm": 0.5610084800496234, "learning_rate": 0.00014782601249465577, "loss": 12.3049, "step": 13260 }, { "epoch": 0.7221144486873183, "grad_norm": 0.5138308536565745, "learning_rate": 0.0001478182679536708, "loss": 12.1668, "step": 13261 }, { "epoch": 0.7221689026839013, "grad_norm": 0.5857148696720267, "learning_rate": 0.00014781052304084296, "loss": 12.2645, "step": 13262 }, { "epoch": 0.7222233566804843, "grad_norm": 0.5869236689159668, "learning_rate": 0.00014780277775623248, "loss": 12.2645, "step": 13263 }, { "epoch": 0.7222778106770674, "grad_norm": 0.5861017227555576, "learning_rate": 0.00014779503209989963, "loss": 12.1274, "step": 13264 }, { "epoch": 0.7223322646736504, "grad_norm": 0.5319448586885308, "learning_rate": 0.00014778728607190453, "loss": 12.2314, "step": 13265 }, { "epoch": 0.7223867186702334, "grad_norm": 0.5311790313561285, "learning_rate": 0.0001477795396723075, "loss": 12.1801, "step": 13266 }, { "epoch": 0.7224411726668164, "grad_norm": 0.6732219693817844, "learning_rate": 0.00014777179290116873, "loss": 12.3139, "step": 13267 }, { "epoch": 0.7224956266633994, "grad_norm": 0.6493652353578988, "learning_rate": 0.00014776404575854855, "loss": 12.3059, "step": 13268 }, { "epoch": 0.7225500806599824, "grad_norm": 0.5937044833129376, "learning_rate": 0.0001477562982445071, "loss": 12.364, "step": 13269 }, { "epoch": 0.7226045346565655, "grad_norm": 0.6198658920216181, "learning_rate": 0.00014774855035910468, "loss": 12.2827, "step": 13270 }, { "epoch": 0.7226589886531485, "grad_norm": 0.5732476439334573, "learning_rate": 0.00014774080210240153, "loss": 12.2738, "step": 13271 }, { "epoch": 0.7227134426497315, "grad_norm": 0.5360260553007256, "learning_rate": 0.00014773305347445787, "loss": 12.111, "step": 13272 }, { "epoch": 0.7227678966463145, "grad_norm": 0.5275770907062345, "learning_rate": 0.000147725304475334, "loss": 12.2252, "step": 13273 }, { "epoch": 0.7228223506428975, "grad_norm": 0.5172824207596717, "learning_rate": 0.00014771755510509016, "loss": 12.2106, "step": 13274 }, { "epoch": 0.7228768046394806, "grad_norm": 0.5721249521602304, "learning_rate": 0.0001477098053637866, "loss": 12.199, "step": 13275 }, { "epoch": 0.7229312586360636, "grad_norm": 0.7191742987674118, "learning_rate": 0.0001477020552514836, "loss": 12.2984, "step": 13276 }, { "epoch": 0.7229857126326465, "grad_norm": 0.6365763140364471, "learning_rate": 0.00014769430476824143, "loss": 12.2329, "step": 13277 }, { "epoch": 0.7230401666292295, "grad_norm": 0.551456274285001, "learning_rate": 0.0001476865539141203, "loss": 12.2782, "step": 13278 }, { "epoch": 0.7230946206258125, "grad_norm": 0.5991760905410665, "learning_rate": 0.0001476788026891806, "loss": 12.2426, "step": 13279 }, { "epoch": 0.7231490746223955, "grad_norm": 0.5544333248898721, "learning_rate": 0.0001476710510934825, "loss": 12.299, "step": 13280 }, { "epoch": 0.7232035286189786, "grad_norm": 0.5394336831800627, "learning_rate": 0.0001476632991270863, "loss": 12.2804, "step": 13281 }, { "epoch": 0.7232579826155616, "grad_norm": 0.6062299792911675, "learning_rate": 0.00014765554679005232, "loss": 12.2861, "step": 13282 }, { "epoch": 0.7233124366121446, "grad_norm": 0.5730035830507569, "learning_rate": 0.00014764779408244077, "loss": 12.2278, "step": 13283 }, { "epoch": 0.7233668906087276, "grad_norm": 0.6156500355967656, "learning_rate": 0.00014764004100431202, "loss": 12.2724, "step": 13284 }, { "epoch": 0.7234213446053106, "grad_norm": 0.6188118186143371, "learning_rate": 0.0001476322875557263, "loss": 12.3845, "step": 13285 }, { "epoch": 0.7234757986018936, "grad_norm": 0.5600796231001508, "learning_rate": 0.00014762453373674394, "loss": 12.3946, "step": 13286 }, { "epoch": 0.7235302525984767, "grad_norm": 0.5966923620651808, "learning_rate": 0.00014761677954742525, "loss": 12.2449, "step": 13287 }, { "epoch": 0.7235847065950597, "grad_norm": 0.6581313433360012, "learning_rate": 0.00014760902498783045, "loss": 12.2262, "step": 13288 }, { "epoch": 0.7236391605916427, "grad_norm": 0.6462196705353955, "learning_rate": 0.0001476012700580199, "loss": 12.1701, "step": 13289 }, { "epoch": 0.7236936145882257, "grad_norm": 0.5686442508026062, "learning_rate": 0.0001475935147580539, "loss": 12.3473, "step": 13290 }, { "epoch": 0.7237480685848087, "grad_norm": 0.6936995965357283, "learning_rate": 0.00014758575908799273, "loss": 12.161, "step": 13291 }, { "epoch": 0.7238025225813917, "grad_norm": 0.5734359477612324, "learning_rate": 0.00014757800304789672, "loss": 12.2128, "step": 13292 }, { "epoch": 0.7238569765779748, "grad_norm": 0.5543159894192236, "learning_rate": 0.00014757024663782618, "loss": 12.2305, "step": 13293 }, { "epoch": 0.7239114305745578, "grad_norm": 0.7523444100322242, "learning_rate": 0.00014756248985784145, "loss": 12.2388, "step": 13294 }, { "epoch": 0.7239658845711407, "grad_norm": 0.6761115842844122, "learning_rate": 0.0001475547327080028, "loss": 12.5183, "step": 13295 }, { "epoch": 0.7240203385677237, "grad_norm": 0.6478389352027981, "learning_rate": 0.00014754697518837057, "loss": 12.2586, "step": 13296 }, { "epoch": 0.7240747925643067, "grad_norm": 0.6431879080137026, "learning_rate": 0.0001475392172990051, "loss": 12.3238, "step": 13297 }, { "epoch": 0.7241292465608897, "grad_norm": 0.5920708792499044, "learning_rate": 0.0001475314590399667, "loss": 12.2876, "step": 13298 }, { "epoch": 0.7241837005574728, "grad_norm": 0.5144758746792878, "learning_rate": 0.00014752370041131571, "loss": 12.118, "step": 13299 }, { "epoch": 0.7242381545540558, "grad_norm": 0.5594016258185207, "learning_rate": 0.00014751594141311245, "loss": 12.171, "step": 13300 }, { "epoch": 0.7242926085506388, "grad_norm": 0.5727034735694334, "learning_rate": 0.00014750818204541726, "loss": 12.2966, "step": 13301 }, { "epoch": 0.7243470625472218, "grad_norm": 0.5423280565884221, "learning_rate": 0.00014750042230829048, "loss": 12.2054, "step": 13302 }, { "epoch": 0.7244015165438048, "grad_norm": 0.6132969743187309, "learning_rate": 0.00014749266220179244, "loss": 12.2604, "step": 13303 }, { "epoch": 0.7244559705403878, "grad_norm": 0.5824401764590079, "learning_rate": 0.00014748490172598351, "loss": 12.3298, "step": 13304 }, { "epoch": 0.7245104245369709, "grad_norm": 0.6718562011950225, "learning_rate": 0.000147477140880924, "loss": 12.2995, "step": 13305 }, { "epoch": 0.7245648785335539, "grad_norm": 0.6296253155808531, "learning_rate": 0.0001474693796666743, "loss": 12.2741, "step": 13306 }, { "epoch": 0.7246193325301369, "grad_norm": 0.5643711473386575, "learning_rate": 0.00014746161808329474, "loss": 12.3123, "step": 13307 }, { "epoch": 0.7246737865267199, "grad_norm": 0.5789895661930375, "learning_rate": 0.00014745385613084569, "loss": 12.3231, "step": 13308 }, { "epoch": 0.7247282405233029, "grad_norm": 0.6649865105288295, "learning_rate": 0.0001474460938093875, "loss": 12.3596, "step": 13309 }, { "epoch": 0.724782694519886, "grad_norm": 0.6178714964656502, "learning_rate": 0.00014743833111898052, "loss": 12.252, "step": 13310 }, { "epoch": 0.724837148516469, "grad_norm": 0.5727155683070335, "learning_rate": 0.00014743056805968508, "loss": 12.2908, "step": 13311 }, { "epoch": 0.724891602513052, "grad_norm": 0.6811695965625274, "learning_rate": 0.00014742280463156163, "loss": 12.4003, "step": 13312 }, { "epoch": 0.724946056509635, "grad_norm": 0.606043872375847, "learning_rate": 0.0001474150408346705, "loss": 12.3047, "step": 13313 }, { "epoch": 0.7250005105062179, "grad_norm": 0.5663335087891896, "learning_rate": 0.00014740727666907207, "loss": 12.2953, "step": 13314 }, { "epoch": 0.7250549645028009, "grad_norm": 0.6290111840184379, "learning_rate": 0.00014739951213482667, "loss": 12.2735, "step": 13315 }, { "epoch": 0.725109418499384, "grad_norm": 0.6244440368051256, "learning_rate": 0.0001473917472319947, "loss": 12.2807, "step": 13316 }, { "epoch": 0.725163872495967, "grad_norm": 0.5204893675027747, "learning_rate": 0.00014738398196063662, "loss": 12.1904, "step": 13317 }, { "epoch": 0.72521832649255, "grad_norm": 0.6421462501017908, "learning_rate": 0.00014737621632081272, "loss": 12.2922, "step": 13318 }, { "epoch": 0.725272780489133, "grad_norm": 0.6330231138389201, "learning_rate": 0.0001473684503125834, "loss": 12.3048, "step": 13319 }, { "epoch": 0.725327234485716, "grad_norm": 0.5362822429633152, "learning_rate": 0.0001473606839360091, "loss": 12.2355, "step": 13320 }, { "epoch": 0.725381688482299, "grad_norm": 0.5748616604842707, "learning_rate": 0.00014735291719115016, "loss": 12.2656, "step": 13321 }, { "epoch": 0.7254361424788821, "grad_norm": 0.6475297889755877, "learning_rate": 0.00014734515007806698, "loss": 12.3399, "step": 13322 }, { "epoch": 0.7254905964754651, "grad_norm": 0.6503247222470766, "learning_rate": 0.00014733738259682, "loss": 12.3816, "step": 13323 }, { "epoch": 0.7255450504720481, "grad_norm": 0.5656483255703779, "learning_rate": 0.0001473296147474696, "loss": 12.2718, "step": 13324 }, { "epoch": 0.7255995044686311, "grad_norm": 0.6695739183253075, "learning_rate": 0.00014732184653007616, "loss": 12.1371, "step": 13325 }, { "epoch": 0.7256539584652141, "grad_norm": 0.6144707598948669, "learning_rate": 0.00014731407794470013, "loss": 12.2903, "step": 13326 }, { "epoch": 0.7257084124617971, "grad_norm": 0.6436012618615664, "learning_rate": 0.00014730630899140184, "loss": 12.2941, "step": 13327 }, { "epoch": 0.7257628664583802, "grad_norm": 0.5990201294905514, "learning_rate": 0.00014729853967024176, "loss": 12.3202, "step": 13328 }, { "epoch": 0.7258173204549632, "grad_norm": 0.558011434881847, "learning_rate": 0.00014729076998128038, "loss": 12.3215, "step": 13329 }, { "epoch": 0.7258717744515462, "grad_norm": 0.5789380298918541, "learning_rate": 0.000147282999924578, "loss": 12.2409, "step": 13330 }, { "epoch": 0.7259262284481292, "grad_norm": 0.6535672422004971, "learning_rate": 0.00014727522950019507, "loss": 12.2585, "step": 13331 }, { "epoch": 0.7259806824447121, "grad_norm": 0.5473404565670977, "learning_rate": 0.00014726745870819203, "loss": 12.063, "step": 13332 }, { "epoch": 0.7260351364412951, "grad_norm": 0.5768166524166972, "learning_rate": 0.0001472596875486293, "loss": 12.3775, "step": 13333 }, { "epoch": 0.7260895904378782, "grad_norm": 0.605122601726206, "learning_rate": 0.00014725191602156735, "loss": 12.3055, "step": 13334 }, { "epoch": 0.7261440444344612, "grad_norm": 0.5977671889604294, "learning_rate": 0.00014724414412706655, "loss": 12.2939, "step": 13335 }, { "epoch": 0.7261984984310442, "grad_norm": 0.5656575048123962, "learning_rate": 0.00014723637186518738, "loss": 12.2937, "step": 13336 }, { "epoch": 0.7262529524276272, "grad_norm": 0.7920585669939834, "learning_rate": 0.0001472285992359902, "loss": 12.4134, "step": 13337 }, { "epoch": 0.7263074064242102, "grad_norm": 0.6288762888571692, "learning_rate": 0.0001472208262395356, "loss": 12.4197, "step": 13338 }, { "epoch": 0.7263618604207932, "grad_norm": 0.5799542013846901, "learning_rate": 0.00014721305287588386, "loss": 12.2353, "step": 13339 }, { "epoch": 0.7264163144173763, "grad_norm": 0.5553553616139497, "learning_rate": 0.00014720527914509557, "loss": 12.2557, "step": 13340 }, { "epoch": 0.7264707684139593, "grad_norm": 0.5912935785724808, "learning_rate": 0.00014719750504723107, "loss": 12.2559, "step": 13341 }, { "epoch": 0.7265252224105423, "grad_norm": 0.5412870648624158, "learning_rate": 0.00014718973058235087, "loss": 12.0724, "step": 13342 }, { "epoch": 0.7265796764071253, "grad_norm": 0.5471026390853213, "learning_rate": 0.0001471819557505154, "loss": 12.1997, "step": 13343 }, { "epoch": 0.7266341304037083, "grad_norm": 0.6702127640155227, "learning_rate": 0.0001471741805517851, "loss": 12.4182, "step": 13344 }, { "epoch": 0.7266885844002914, "grad_norm": 0.583732138964941, "learning_rate": 0.00014716640498622054, "loss": 12.2193, "step": 13345 }, { "epoch": 0.7267430383968744, "grad_norm": 0.571671078235295, "learning_rate": 0.00014715862905388205, "loss": 12.2459, "step": 13346 }, { "epoch": 0.7267974923934574, "grad_norm": 0.7012614857505217, "learning_rate": 0.00014715085275483015, "loss": 12.2849, "step": 13347 }, { "epoch": 0.7268519463900404, "grad_norm": 0.6271694136899435, "learning_rate": 0.00014714307608912534, "loss": 12.2322, "step": 13348 }, { "epoch": 0.7269064003866234, "grad_norm": 0.6199557536061102, "learning_rate": 0.00014713529905682805, "loss": 12.1896, "step": 13349 }, { "epoch": 0.7269608543832063, "grad_norm": 0.7010623743553475, "learning_rate": 0.00014712752165799877, "loss": 12.4026, "step": 13350 }, { "epoch": 0.7270153083797894, "grad_norm": 0.6694734843494442, "learning_rate": 0.00014711974389269798, "loss": 12.1728, "step": 13351 }, { "epoch": 0.7270697623763724, "grad_norm": 0.6172901433277747, "learning_rate": 0.00014711196576098616, "loss": 12.2305, "step": 13352 }, { "epoch": 0.7271242163729554, "grad_norm": 0.6393667883302725, "learning_rate": 0.00014710418726292382, "loss": 12.2514, "step": 13353 }, { "epoch": 0.7271786703695384, "grad_norm": 0.8042984169721533, "learning_rate": 0.00014709640839857138, "loss": 12.2913, "step": 13354 }, { "epoch": 0.7272331243661214, "grad_norm": 0.5605753639003496, "learning_rate": 0.00014708862916798938, "loss": 12.3283, "step": 13355 }, { "epoch": 0.7272875783627044, "grad_norm": 0.8140781294933005, "learning_rate": 0.00014708084957123832, "loss": 12.388, "step": 13356 }, { "epoch": 0.7273420323592875, "grad_norm": 0.6234743849624682, "learning_rate": 0.00014707306960837866, "loss": 12.2007, "step": 13357 }, { "epoch": 0.7273964863558705, "grad_norm": 0.5595606397637554, "learning_rate": 0.00014706528927947092, "loss": 12.2607, "step": 13358 }, { "epoch": 0.7274509403524535, "grad_norm": 0.5624929559642442, "learning_rate": 0.00014705750858457557, "loss": 12.155, "step": 13359 }, { "epoch": 0.7275053943490365, "grad_norm": 0.7250644876285313, "learning_rate": 0.0001470497275237532, "loss": 12.232, "step": 13360 }, { "epoch": 0.7275598483456195, "grad_norm": 0.5341427046039439, "learning_rate": 0.0001470419460970642, "loss": 12.1898, "step": 13361 }, { "epoch": 0.7276143023422025, "grad_norm": 0.6067037064595763, "learning_rate": 0.00014703416430456918, "loss": 12.2517, "step": 13362 }, { "epoch": 0.7276687563387856, "grad_norm": 0.6619249652306931, "learning_rate": 0.0001470263821463286, "loss": 12.1289, "step": 13363 }, { "epoch": 0.7277232103353686, "grad_norm": 0.539140862548324, "learning_rate": 0.00014701859962240298, "loss": 12.2192, "step": 13364 }, { "epoch": 0.7277776643319516, "grad_norm": 0.6013055091812155, "learning_rate": 0.00014701081673285286, "loss": 12.2707, "step": 13365 }, { "epoch": 0.7278321183285346, "grad_norm": 0.5978285453649279, "learning_rate": 0.00014700303347773872, "loss": 12.2326, "step": 13366 }, { "epoch": 0.7278865723251176, "grad_norm": 0.5711855928250466, "learning_rate": 0.00014699524985712113, "loss": 12.3082, "step": 13367 }, { "epoch": 0.7279410263217005, "grad_norm": 0.5499721625163349, "learning_rate": 0.0001469874658710606, "loss": 12.134, "step": 13368 }, { "epoch": 0.7279954803182836, "grad_norm": 0.6450866127583862, "learning_rate": 0.00014697968151961764, "loss": 12.3181, "step": 13369 }, { "epoch": 0.7280499343148666, "grad_norm": 0.6587570915991241, "learning_rate": 0.00014697189680285282, "loss": 12.1078, "step": 13370 }, { "epoch": 0.7281043883114496, "grad_norm": 0.6449739855760339, "learning_rate": 0.0001469641117208266, "loss": 12.3408, "step": 13371 }, { "epoch": 0.7281588423080326, "grad_norm": 0.5962989797445586, "learning_rate": 0.00014695632627359962, "loss": 12.0901, "step": 13372 }, { "epoch": 0.7282132963046156, "grad_norm": 0.6419537874928303, "learning_rate": 0.00014694854046123237, "loss": 12.3548, "step": 13373 }, { "epoch": 0.7282677503011986, "grad_norm": 0.6217723276634614, "learning_rate": 0.0001469407542837854, "loss": 12.3115, "step": 13374 }, { "epoch": 0.7283222042977817, "grad_norm": 0.5143051049374879, "learning_rate": 0.00014693296774131924, "loss": 12.1744, "step": 13375 }, { "epoch": 0.7283766582943647, "grad_norm": 0.7747848484144629, "learning_rate": 0.00014692518083389442, "loss": 12.3623, "step": 13376 }, { "epoch": 0.7284311122909477, "grad_norm": 0.6839726523055348, "learning_rate": 0.00014691739356157156, "loss": 12.1832, "step": 13377 }, { "epoch": 0.7284855662875307, "grad_norm": 0.5831370886270943, "learning_rate": 0.00014690960592441118, "loss": 12.2775, "step": 13378 }, { "epoch": 0.7285400202841137, "grad_norm": 0.6356590097338246, "learning_rate": 0.00014690181792247382, "loss": 12.2817, "step": 13379 }, { "epoch": 0.7285944742806968, "grad_norm": 0.6111939040509912, "learning_rate": 0.0001468940295558201, "loss": 12.32, "step": 13380 }, { "epoch": 0.7286489282772798, "grad_norm": 0.6661048890883065, "learning_rate": 0.0001468862408245105, "loss": 12.4681, "step": 13381 }, { "epoch": 0.7287033822738628, "grad_norm": 0.701286526648347, "learning_rate": 0.0001468784517286056, "loss": 12.3801, "step": 13382 }, { "epoch": 0.7287578362704458, "grad_norm": 0.6065563264179271, "learning_rate": 0.00014687066226816607, "loss": 12.1662, "step": 13383 }, { "epoch": 0.7288122902670288, "grad_norm": 0.6226504736642833, "learning_rate": 0.00014686287244325237, "loss": 12.2861, "step": 13384 }, { "epoch": 0.7288667442636118, "grad_norm": 0.6341344589623714, "learning_rate": 0.00014685508225392515, "loss": 12.3233, "step": 13385 }, { "epoch": 0.7289211982601949, "grad_norm": 0.6283968099363282, "learning_rate": 0.00014684729170024493, "loss": 12.2036, "step": 13386 }, { "epoch": 0.7289756522567779, "grad_norm": 0.5482355338897202, "learning_rate": 0.00014683950078227232, "loss": 12.1719, "step": 13387 }, { "epoch": 0.7290301062533608, "grad_norm": 0.5844528949461678, "learning_rate": 0.00014683170950006785, "loss": 12.1159, "step": 13388 }, { "epoch": 0.7290845602499438, "grad_norm": 0.6758906069619615, "learning_rate": 0.0001468239178536922, "loss": 12.4005, "step": 13389 }, { "epoch": 0.7291390142465268, "grad_norm": 0.5703040967089928, "learning_rate": 0.00014681612584320592, "loss": 12.2006, "step": 13390 }, { "epoch": 0.7291934682431098, "grad_norm": 0.5527985593987613, "learning_rate": 0.00014680833346866957, "loss": 12.1996, "step": 13391 }, { "epoch": 0.7292479222396929, "grad_norm": 0.5777786022154163, "learning_rate": 0.00014680054073014378, "loss": 12.3138, "step": 13392 }, { "epoch": 0.7293023762362759, "grad_norm": 0.546652622965535, "learning_rate": 0.00014679274762768911, "loss": 12.2746, "step": 13393 }, { "epoch": 0.7293568302328589, "grad_norm": 0.5675033988841082, "learning_rate": 0.00014678495416136622, "loss": 12.2041, "step": 13394 }, { "epoch": 0.7294112842294419, "grad_norm": 0.5321126755557474, "learning_rate": 0.00014677716033123568, "loss": 12.1684, "step": 13395 }, { "epoch": 0.7294657382260249, "grad_norm": 0.5728224465252162, "learning_rate": 0.0001467693661373581, "loss": 12.18, "step": 13396 }, { "epoch": 0.7295201922226079, "grad_norm": 0.5844124602179768, "learning_rate": 0.00014676157157979407, "loss": 12.3234, "step": 13397 }, { "epoch": 0.729574646219191, "grad_norm": 0.5655242088204449, "learning_rate": 0.00014675377665860422, "loss": 12.2359, "step": 13398 }, { "epoch": 0.729629100215774, "grad_norm": 0.5145490553683293, "learning_rate": 0.00014674598137384915, "loss": 12.1504, "step": 13399 }, { "epoch": 0.729683554212357, "grad_norm": 0.5206155089772565, "learning_rate": 0.0001467381857255895, "loss": 12.3104, "step": 13400 }, { "epoch": 0.72973800820894, "grad_norm": 0.5732089652040417, "learning_rate": 0.0001467303897138859, "loss": 12.3735, "step": 13401 }, { "epoch": 0.729792462205523, "grad_norm": 0.5893013514173729, "learning_rate": 0.00014672259333879892, "loss": 12.3674, "step": 13402 }, { "epoch": 0.729846916202106, "grad_norm": 0.5453283379493583, "learning_rate": 0.00014671479660038921, "loss": 12.3758, "step": 13403 }, { "epoch": 0.7299013701986891, "grad_norm": 0.5553399861077735, "learning_rate": 0.00014670699949871746, "loss": 12.2429, "step": 13404 }, { "epoch": 0.729955824195272, "grad_norm": 0.5781061733118386, "learning_rate": 0.00014669920203384422, "loss": 12.325, "step": 13405 }, { "epoch": 0.730010278191855, "grad_norm": 0.6196937505905702, "learning_rate": 0.00014669140420583017, "loss": 12.3664, "step": 13406 }, { "epoch": 0.730064732188438, "grad_norm": 0.5884521889356856, "learning_rate": 0.0001466836060147359, "loss": 12.2777, "step": 13407 }, { "epoch": 0.730119186185021, "grad_norm": 0.672981152642552, "learning_rate": 0.00014667580746062208, "loss": 12.359, "step": 13408 }, { "epoch": 0.7301736401816041, "grad_norm": 0.5582101153546333, "learning_rate": 0.00014666800854354938, "loss": 12.3577, "step": 13409 }, { "epoch": 0.7302280941781871, "grad_norm": 0.573828160756465, "learning_rate": 0.00014666020926357843, "loss": 12.2839, "step": 13410 }, { "epoch": 0.7302825481747701, "grad_norm": 0.5411774195844762, "learning_rate": 0.00014665240962076983, "loss": 12.3829, "step": 13411 }, { "epoch": 0.7303370021713531, "grad_norm": 0.5269134935140668, "learning_rate": 0.0001466446096151843, "loss": 12.1819, "step": 13412 }, { "epoch": 0.7303914561679361, "grad_norm": 0.5500051890399976, "learning_rate": 0.00014663680924688243, "loss": 12.2836, "step": 13413 }, { "epoch": 0.7304459101645191, "grad_norm": 0.5691310142421843, "learning_rate": 0.00014662900851592493, "loss": 12.0934, "step": 13414 }, { "epoch": 0.7305003641611022, "grad_norm": 0.5971972214596137, "learning_rate": 0.00014662120742237245, "loss": 12.2853, "step": 13415 }, { "epoch": 0.7305548181576852, "grad_norm": 0.5193671565095754, "learning_rate": 0.00014661340596628563, "loss": 12.2221, "step": 13416 }, { "epoch": 0.7306092721542682, "grad_norm": 0.6140800043494785, "learning_rate": 0.00014660560414772516, "loss": 12.2681, "step": 13417 }, { "epoch": 0.7306637261508512, "grad_norm": 0.5116223750237714, "learning_rate": 0.00014659780196675168, "loss": 12.0489, "step": 13418 }, { "epoch": 0.7307181801474342, "grad_norm": 0.5778048595206212, "learning_rate": 0.00014658999942342588, "loss": 12.2313, "step": 13419 }, { "epoch": 0.7307726341440172, "grad_norm": 0.5833576137200138, "learning_rate": 0.00014658219651780843, "loss": 12.3763, "step": 13420 }, { "epoch": 0.7308270881406003, "grad_norm": 0.5915683602865042, "learning_rate": 0.00014657439324996, "loss": 12.233, "step": 13421 }, { "epoch": 0.7308815421371833, "grad_norm": 0.5454616527970554, "learning_rate": 0.0001465665896199413, "loss": 12.0943, "step": 13422 }, { "epoch": 0.7309359961337663, "grad_norm": 0.5649149517234928, "learning_rate": 0.000146558785627813, "loss": 12.0765, "step": 13423 }, { "epoch": 0.7309904501303492, "grad_norm": 0.5516421296053254, "learning_rate": 0.00014655098127363574, "loss": 12.167, "step": 13424 }, { "epoch": 0.7310449041269322, "grad_norm": 0.5613095485317154, "learning_rate": 0.00014654317655747026, "loss": 12.3, "step": 13425 }, { "epoch": 0.7310993581235152, "grad_norm": 0.6169308152750115, "learning_rate": 0.00014653537147937723, "loss": 12.2698, "step": 13426 }, { "epoch": 0.7311538121200983, "grad_norm": 0.5861048880352672, "learning_rate": 0.00014652756603941735, "loss": 12.1751, "step": 13427 }, { "epoch": 0.7312082661166813, "grad_norm": 0.6425101239536442, "learning_rate": 0.00014651976023765133, "loss": 12.2381, "step": 13428 }, { "epoch": 0.7312627201132643, "grad_norm": 0.5962613060442913, "learning_rate": 0.00014651195407413984, "loss": 12.1845, "step": 13429 }, { "epoch": 0.7313171741098473, "grad_norm": 0.5802762355006588, "learning_rate": 0.0001465041475489436, "loss": 12.3437, "step": 13430 }, { "epoch": 0.7313716281064303, "grad_norm": 0.6089072591955769, "learning_rate": 0.00014649634066212327, "loss": 12.2565, "step": 13431 }, { "epoch": 0.7314260821030133, "grad_norm": 0.5502931701197896, "learning_rate": 0.00014648853341373965, "loss": 12.2332, "step": 13432 }, { "epoch": 0.7314805360995964, "grad_norm": 0.624853396439485, "learning_rate": 0.00014648072580385337, "loss": 12.1673, "step": 13433 }, { "epoch": 0.7315349900961794, "grad_norm": 0.7874743943545011, "learning_rate": 0.00014647291783252518, "loss": 12.4053, "step": 13434 }, { "epoch": 0.7315894440927624, "grad_norm": 0.6302379277319068, "learning_rate": 0.0001464651094998158, "loss": 12.3989, "step": 13435 }, { "epoch": 0.7316438980893454, "grad_norm": 0.5415596819633678, "learning_rate": 0.00014645730080578592, "loss": 12.1859, "step": 13436 }, { "epoch": 0.7316983520859284, "grad_norm": 0.6349985580522457, "learning_rate": 0.00014644949175049627, "loss": 12.2451, "step": 13437 }, { "epoch": 0.7317528060825114, "grad_norm": 0.5329773431001347, "learning_rate": 0.0001464416823340076, "loss": 12.2649, "step": 13438 }, { "epoch": 0.7318072600790945, "grad_norm": 0.6348334862342842, "learning_rate": 0.00014643387255638062, "loss": 12.2546, "step": 13439 }, { "epoch": 0.7318617140756775, "grad_norm": 0.5907952876566426, "learning_rate": 0.00014642606241767605, "loss": 12.263, "step": 13440 }, { "epoch": 0.7319161680722605, "grad_norm": 0.674265035975136, "learning_rate": 0.00014641825191795464, "loss": 12.3306, "step": 13441 }, { "epoch": 0.7319706220688434, "grad_norm": 0.5746427347799835, "learning_rate": 0.0001464104410572771, "loss": 12.2673, "step": 13442 }, { "epoch": 0.7320250760654264, "grad_norm": 0.9009121743624731, "learning_rate": 0.0001464026298357042, "loss": 12.3275, "step": 13443 }, { "epoch": 0.7320795300620095, "grad_norm": 0.8717593714396172, "learning_rate": 0.00014639481825329668, "loss": 12.4716, "step": 13444 }, { "epoch": 0.7321339840585925, "grad_norm": 0.5369226230477494, "learning_rate": 0.0001463870063101153, "loss": 12.1858, "step": 13445 }, { "epoch": 0.7321884380551755, "grad_norm": 0.7186729605792112, "learning_rate": 0.0001463791940062207, "loss": 12.3621, "step": 13446 }, { "epoch": 0.7322428920517585, "grad_norm": 0.6834082893354678, "learning_rate": 0.00014637138134167377, "loss": 12.2566, "step": 13447 }, { "epoch": 0.7322973460483415, "grad_norm": 0.5394081582792316, "learning_rate": 0.00014636356831653518, "loss": 12.2781, "step": 13448 }, { "epoch": 0.7323518000449245, "grad_norm": 0.7195940712977363, "learning_rate": 0.0001463557549308657, "loss": 12.3361, "step": 13449 }, { "epoch": 0.7324062540415076, "grad_norm": 0.6233426938470448, "learning_rate": 0.00014634794118472612, "loss": 12.3518, "step": 13450 }, { "epoch": 0.7324607080380906, "grad_norm": 0.5872558256245961, "learning_rate": 0.00014634012707817718, "loss": 12.3114, "step": 13451 }, { "epoch": 0.7325151620346736, "grad_norm": 0.5707993151414409, "learning_rate": 0.00014633231261127963, "loss": 12.0597, "step": 13452 }, { "epoch": 0.7325696160312566, "grad_norm": 0.750199557350371, "learning_rate": 0.0001463244977840942, "loss": 12.3008, "step": 13453 }, { "epoch": 0.7326240700278396, "grad_norm": 0.5962135364609051, "learning_rate": 0.00014631668259668177, "loss": 12.2241, "step": 13454 }, { "epoch": 0.7326785240244226, "grad_norm": 0.5914875532602344, "learning_rate": 0.00014630886704910306, "loss": 12.2123, "step": 13455 }, { "epoch": 0.7327329780210057, "grad_norm": 0.6519128543769156, "learning_rate": 0.00014630105114141878, "loss": 12.1622, "step": 13456 }, { "epoch": 0.7327874320175887, "grad_norm": 0.5820134384859893, "learning_rate": 0.0001462932348736898, "loss": 12.169, "step": 13457 }, { "epoch": 0.7328418860141717, "grad_norm": 0.6285044893524522, "learning_rate": 0.00014628541824597685, "loss": 12.3364, "step": 13458 }, { "epoch": 0.7328963400107547, "grad_norm": 0.5651507324870145, "learning_rate": 0.0001462776012583407, "loss": 12.2208, "step": 13459 }, { "epoch": 0.7329507940073376, "grad_norm": 0.6813457185451129, "learning_rate": 0.0001462697839108422, "loss": 12.3105, "step": 13460 }, { "epoch": 0.7330052480039206, "grad_norm": 0.6321546183033216, "learning_rate": 0.0001462619662035421, "loss": 12.298, "step": 13461 }, { "epoch": 0.7330597020005037, "grad_norm": 0.5713403422122469, "learning_rate": 0.00014625414813650115, "loss": 12.1503, "step": 13462 }, { "epoch": 0.7331141559970867, "grad_norm": 0.5789797059059206, "learning_rate": 0.0001462463297097802, "loss": 12.3081, "step": 13463 }, { "epoch": 0.7331686099936697, "grad_norm": 0.6189844941170249, "learning_rate": 0.00014623851092344006, "loss": 12.2846, "step": 13464 }, { "epoch": 0.7332230639902527, "grad_norm": 0.6421197750744856, "learning_rate": 0.0001462306917775415, "loss": 12.302, "step": 13465 }, { "epoch": 0.7332775179868357, "grad_norm": 0.6500344081161316, "learning_rate": 0.00014622287227214533, "loss": 12.0938, "step": 13466 }, { "epoch": 0.7333319719834187, "grad_norm": 0.5543130907603288, "learning_rate": 0.00014621505240731234, "loss": 12.1314, "step": 13467 }, { "epoch": 0.7333864259800018, "grad_norm": 0.6276524792591983, "learning_rate": 0.00014620723218310334, "loss": 12.2113, "step": 13468 }, { "epoch": 0.7334408799765848, "grad_norm": 0.5433848208031679, "learning_rate": 0.0001461994115995792, "loss": 12.2855, "step": 13469 }, { "epoch": 0.7334953339731678, "grad_norm": 0.5441465096653444, "learning_rate": 0.00014619159065680065, "loss": 12.209, "step": 13470 }, { "epoch": 0.7335497879697508, "grad_norm": 0.5901166753899868, "learning_rate": 0.00014618376935482855, "loss": 12.341, "step": 13471 }, { "epoch": 0.7336042419663338, "grad_norm": 0.5837915344088275, "learning_rate": 0.00014617594769372372, "loss": 12.2193, "step": 13472 }, { "epoch": 0.7336586959629168, "grad_norm": 0.5186080736607912, "learning_rate": 0.000146168125673547, "loss": 12.2442, "step": 13473 }, { "epoch": 0.7337131499594999, "grad_norm": 0.5407521525804058, "learning_rate": 0.00014616030329435913, "loss": 12.2598, "step": 13474 }, { "epoch": 0.7337676039560829, "grad_norm": 0.4887043957052942, "learning_rate": 0.00014615248055622107, "loss": 12.1829, "step": 13475 }, { "epoch": 0.7338220579526659, "grad_norm": 0.5614679674040877, "learning_rate": 0.00014614465745919355, "loss": 12.3229, "step": 13476 }, { "epoch": 0.7338765119492489, "grad_norm": 0.631059913970582, "learning_rate": 0.00014613683400333742, "loss": 12.4136, "step": 13477 }, { "epoch": 0.7339309659458318, "grad_norm": 0.5837309056326555, "learning_rate": 0.00014612901018871356, "loss": 12.0118, "step": 13478 }, { "epoch": 0.733985419942415, "grad_norm": 0.6021308541180563, "learning_rate": 0.00014612118601538276, "loss": 12.3842, "step": 13479 }, { "epoch": 0.7340398739389979, "grad_norm": 0.6012447827579785, "learning_rate": 0.0001461133614834059, "loss": 12.3327, "step": 13480 }, { "epoch": 0.7340943279355809, "grad_norm": 0.5456836130548256, "learning_rate": 0.00014610553659284378, "loss": 12.2408, "step": 13481 }, { "epoch": 0.7341487819321639, "grad_norm": 0.5359770041758793, "learning_rate": 0.0001460977113437573, "loss": 12.0626, "step": 13482 }, { "epoch": 0.7342032359287469, "grad_norm": 0.5577707378911068, "learning_rate": 0.00014608988573620727, "loss": 12.3124, "step": 13483 }, { "epoch": 0.7342576899253299, "grad_norm": 0.5949322494568023, "learning_rate": 0.00014608205977025457, "loss": 12.1902, "step": 13484 }, { "epoch": 0.734312143921913, "grad_norm": 0.5502749814129778, "learning_rate": 0.00014607423344596004, "loss": 12.2777, "step": 13485 }, { "epoch": 0.734366597918496, "grad_norm": 0.632209872094979, "learning_rate": 0.00014606640676338457, "loss": 12.4037, "step": 13486 }, { "epoch": 0.734421051915079, "grad_norm": 0.557195963153172, "learning_rate": 0.00014605857972258896, "loss": 12.391, "step": 13487 }, { "epoch": 0.734475505911662, "grad_norm": 0.5943557669758027, "learning_rate": 0.00014605075232363413, "loss": 12.217, "step": 13488 }, { "epoch": 0.734529959908245, "grad_norm": 0.5943023457895839, "learning_rate": 0.0001460429245665809, "loss": 12.3385, "step": 13489 }, { "epoch": 0.734584413904828, "grad_norm": 0.7428476430130446, "learning_rate": 0.0001460350964514902, "loss": 12.2624, "step": 13490 }, { "epoch": 0.7346388679014111, "grad_norm": 0.5815614808212614, "learning_rate": 0.0001460272679784229, "loss": 12.2735, "step": 13491 }, { "epoch": 0.7346933218979941, "grad_norm": 0.5817982523327593, "learning_rate": 0.00014601943914743977, "loss": 12.332, "step": 13492 }, { "epoch": 0.7347477758945771, "grad_norm": 0.6430540982028815, "learning_rate": 0.00014601160995860178, "loss": 12.2342, "step": 13493 }, { "epoch": 0.7348022298911601, "grad_norm": 0.5849969352651594, "learning_rate": 0.00014600378041196982, "loss": 12.2903, "step": 13494 }, { "epoch": 0.7348566838877431, "grad_norm": 0.5780107159072119, "learning_rate": 0.00014599595050760475, "loss": 12.2955, "step": 13495 }, { "epoch": 0.734911137884326, "grad_norm": 0.5634969389091683, "learning_rate": 0.00014598812024556746, "loss": 12.2938, "step": 13496 }, { "epoch": 0.7349655918809092, "grad_norm": 0.5658156766141947, "learning_rate": 0.0001459802896259188, "loss": 12.2406, "step": 13497 }, { "epoch": 0.7350200458774921, "grad_norm": 0.5327649821036103, "learning_rate": 0.00014597245864871974, "loss": 12.3137, "step": 13498 }, { "epoch": 0.7350744998740751, "grad_norm": 0.5806255240789654, "learning_rate": 0.0001459646273140311, "loss": 12.2654, "step": 13499 }, { "epoch": 0.7351289538706581, "grad_norm": 0.6076178268505733, "learning_rate": 0.00014595679562191382, "loss": 12.1846, "step": 13500 }, { "epoch": 0.7351834078672411, "grad_norm": 0.5687460574489392, "learning_rate": 0.0001459489635724288, "loss": 12.1725, "step": 13501 }, { "epoch": 0.7352378618638241, "grad_norm": 0.8070955169800478, "learning_rate": 0.0001459411311656369, "loss": 12.2549, "step": 13502 }, { "epoch": 0.7352923158604072, "grad_norm": 0.6600330966428078, "learning_rate": 0.00014593329840159908, "loss": 12.2134, "step": 13503 }, { "epoch": 0.7353467698569902, "grad_norm": 0.5385361695723251, "learning_rate": 0.00014592546528037625, "loss": 11.9943, "step": 13504 }, { "epoch": 0.7354012238535732, "grad_norm": 0.5557788743641016, "learning_rate": 0.00014591763180202928, "loss": 12.2433, "step": 13505 }, { "epoch": 0.7354556778501562, "grad_norm": 0.5609249760965059, "learning_rate": 0.00014590979796661913, "loss": 12.0992, "step": 13506 }, { "epoch": 0.7355101318467392, "grad_norm": 0.5638842934128018, "learning_rate": 0.00014590196377420667, "loss": 12.2798, "step": 13507 }, { "epoch": 0.7355645858433222, "grad_norm": 0.5813055251948805, "learning_rate": 0.0001458941292248528, "loss": 12.4461, "step": 13508 }, { "epoch": 0.7356190398399053, "grad_norm": 0.6686401426161527, "learning_rate": 0.00014588629431861857, "loss": 12.4359, "step": 13509 }, { "epoch": 0.7356734938364883, "grad_norm": 0.5796752659915911, "learning_rate": 0.00014587845905556478, "loss": 12.2123, "step": 13510 }, { "epoch": 0.7357279478330713, "grad_norm": 0.5726235025614373, "learning_rate": 0.0001458706234357524, "loss": 12.1894, "step": 13511 }, { "epoch": 0.7357824018296543, "grad_norm": 0.5743503966196527, "learning_rate": 0.00014586278745924234, "loss": 12.209, "step": 13512 }, { "epoch": 0.7358368558262373, "grad_norm": 0.6588986599637868, "learning_rate": 0.00014585495112609558, "loss": 12.3586, "step": 13513 }, { "epoch": 0.7358913098228204, "grad_norm": 0.5738888272484045, "learning_rate": 0.00014584711443637298, "loss": 12.2194, "step": 13514 }, { "epoch": 0.7359457638194034, "grad_norm": 0.5493036599841871, "learning_rate": 0.00014583927739013558, "loss": 12.1909, "step": 13515 }, { "epoch": 0.7360002178159863, "grad_norm": 0.5487234894005516, "learning_rate": 0.00014583143998744426, "loss": 12.2235, "step": 13516 }, { "epoch": 0.7360546718125693, "grad_norm": 0.5138716167966478, "learning_rate": 0.00014582360222835998, "loss": 12.2678, "step": 13517 }, { "epoch": 0.7361091258091523, "grad_norm": 0.5461539491689609, "learning_rate": 0.0001458157641129437, "loss": 12.2597, "step": 13518 }, { "epoch": 0.7361635798057353, "grad_norm": 0.5466894629268688, "learning_rate": 0.0001458079256412563, "loss": 12.2962, "step": 13519 }, { "epoch": 0.7362180338023184, "grad_norm": 0.5822005527745485, "learning_rate": 0.00014580008681335885, "loss": 12.2964, "step": 13520 }, { "epoch": 0.7362724877989014, "grad_norm": 0.5800259077540941, "learning_rate": 0.00014579224762931224, "loss": 12.2507, "step": 13521 }, { "epoch": 0.7363269417954844, "grad_norm": 0.6048416669196667, "learning_rate": 0.0001457844080891774, "loss": 12.1804, "step": 13522 }, { "epoch": 0.7363813957920674, "grad_norm": 0.5683179294002261, "learning_rate": 0.00014577656819301534, "loss": 12.276, "step": 13523 }, { "epoch": 0.7364358497886504, "grad_norm": 0.6042767266846831, "learning_rate": 0.000145768727940887, "loss": 12.3961, "step": 13524 }, { "epoch": 0.7364903037852334, "grad_norm": 0.5971000332059202, "learning_rate": 0.00014576088733285334, "loss": 12.1241, "step": 13525 }, { "epoch": 0.7365447577818165, "grad_norm": 0.5960415410498545, "learning_rate": 0.00014575304636897538, "loss": 12.2755, "step": 13526 }, { "epoch": 0.7365992117783995, "grad_norm": 0.5501720767126754, "learning_rate": 0.00014574520504931403, "loss": 12.2194, "step": 13527 }, { "epoch": 0.7366536657749825, "grad_norm": 0.5525318178591357, "learning_rate": 0.00014573736337393031, "loss": 12.2742, "step": 13528 }, { "epoch": 0.7367081197715655, "grad_norm": 0.5931735852925304, "learning_rate": 0.0001457295213428852, "loss": 12.2892, "step": 13529 }, { "epoch": 0.7367625737681485, "grad_norm": 0.574905675319439, "learning_rate": 0.00014572167895623962, "loss": 12.2478, "step": 13530 }, { "epoch": 0.7368170277647315, "grad_norm": 0.5614724178224977, "learning_rate": 0.00014571383621405463, "loss": 12.2513, "step": 13531 }, { "epoch": 0.7368714817613146, "grad_norm": 0.5164268210770245, "learning_rate": 0.0001457059931163912, "loss": 12.3159, "step": 13532 }, { "epoch": 0.7369259357578976, "grad_norm": 0.5551055122116084, "learning_rate": 0.00014569814966331027, "loss": 12.2811, "step": 13533 }, { "epoch": 0.7369803897544805, "grad_norm": 0.7238589607973043, "learning_rate": 0.00014569030585487286, "loss": 12.2553, "step": 13534 }, { "epoch": 0.7370348437510635, "grad_norm": 0.6121042465176388, "learning_rate": 0.00014568246169114, "loss": 12.2991, "step": 13535 }, { "epoch": 0.7370892977476465, "grad_norm": 0.5443821640990063, "learning_rate": 0.00014567461717217262, "loss": 12.2379, "step": 13536 }, { "epoch": 0.7371437517442295, "grad_norm": 0.5899600381512372, "learning_rate": 0.00014566677229803178, "loss": 12.2119, "step": 13537 }, { "epoch": 0.7371982057408126, "grad_norm": 0.5833225779741186, "learning_rate": 0.00014565892706877847, "loss": 12.3063, "step": 13538 }, { "epoch": 0.7372526597373956, "grad_norm": 0.5632297981423725, "learning_rate": 0.00014565108148447366, "loss": 12.2568, "step": 13539 }, { "epoch": 0.7373071137339786, "grad_norm": 0.5456236956874208, "learning_rate": 0.0001456432355451784, "loss": 12.2761, "step": 13540 }, { "epoch": 0.7373615677305616, "grad_norm": 0.5495704263049151, "learning_rate": 0.00014563538925095368, "loss": 12.2297, "step": 13541 }, { "epoch": 0.7374160217271446, "grad_norm": 0.5689853068119046, "learning_rate": 0.0001456275426018605, "loss": 12.2283, "step": 13542 }, { "epoch": 0.7374704757237277, "grad_norm": 0.5180836306877948, "learning_rate": 0.00014561969559795995, "loss": 12.1401, "step": 13543 }, { "epoch": 0.7375249297203107, "grad_norm": 0.5718271747766805, "learning_rate": 0.00014561184823931296, "loss": 12.3157, "step": 13544 }, { "epoch": 0.7375793837168937, "grad_norm": 0.5746504330019291, "learning_rate": 0.0001456040005259806, "loss": 12.1731, "step": 13545 }, { "epoch": 0.7376338377134767, "grad_norm": 0.6065461245363518, "learning_rate": 0.00014559615245802386, "loss": 12.352, "step": 13546 }, { "epoch": 0.7376882917100597, "grad_norm": 0.5784360166514776, "learning_rate": 0.00014558830403550382, "loss": 12.3759, "step": 13547 }, { "epoch": 0.7377427457066427, "grad_norm": 0.5602189288940183, "learning_rate": 0.00014558045525848147, "loss": 12.1386, "step": 13548 }, { "epoch": 0.7377971997032258, "grad_norm": 0.5670399784838877, "learning_rate": 0.00014557260612701786, "loss": 12.2799, "step": 13549 }, { "epoch": 0.7378516536998088, "grad_norm": 0.5896749388285503, "learning_rate": 0.00014556475664117405, "loss": 12.3304, "step": 13550 }, { "epoch": 0.7379061076963918, "grad_norm": 0.5927587892809875, "learning_rate": 0.00014555690680101102, "loss": 12.3231, "step": 13551 }, { "epoch": 0.7379605616929747, "grad_norm": 0.5778156056704327, "learning_rate": 0.00014554905660658983, "loss": 12.2669, "step": 13552 }, { "epoch": 0.7380150156895577, "grad_norm": 0.6212925554269098, "learning_rate": 0.00014554120605797156, "loss": 12.268, "step": 13553 }, { "epoch": 0.7380694696861407, "grad_norm": 0.5703826293906922, "learning_rate": 0.0001455333551552172, "loss": 12.1634, "step": 13554 }, { "epoch": 0.7381239236827238, "grad_norm": 0.5892926596679476, "learning_rate": 0.00014552550389838791, "loss": 12.362, "step": 13555 }, { "epoch": 0.7381783776793068, "grad_norm": 0.6057136352760333, "learning_rate": 0.00014551765228754463, "loss": 12.2202, "step": 13556 }, { "epoch": 0.7382328316758898, "grad_norm": 0.5703198221862579, "learning_rate": 0.00014550980032274841, "loss": 12.2498, "step": 13557 }, { "epoch": 0.7382872856724728, "grad_norm": 0.627692916894544, "learning_rate": 0.00014550194800406037, "loss": 12.3628, "step": 13558 }, { "epoch": 0.7383417396690558, "grad_norm": 0.6016477926686616, "learning_rate": 0.0001454940953315416, "loss": 12.1268, "step": 13559 }, { "epoch": 0.7383961936656388, "grad_norm": 0.6573337693225969, "learning_rate": 0.00014548624230525307, "loss": 12.3016, "step": 13560 }, { "epoch": 0.7384506476622219, "grad_norm": 0.575658458983648, "learning_rate": 0.00014547838892525592, "loss": 12.2689, "step": 13561 }, { "epoch": 0.7385051016588049, "grad_norm": 0.583711733930323, "learning_rate": 0.00014547053519161116, "loss": 12.2254, "step": 13562 }, { "epoch": 0.7385595556553879, "grad_norm": 0.6919837322214447, "learning_rate": 0.0001454626811043799, "loss": 12.4156, "step": 13563 }, { "epoch": 0.7386140096519709, "grad_norm": 0.5667989851626005, "learning_rate": 0.0001454548266636232, "loss": 12.2173, "step": 13564 }, { "epoch": 0.7386684636485539, "grad_norm": 0.5967847312016634, "learning_rate": 0.00014544697186940218, "loss": 12.3047, "step": 13565 }, { "epoch": 0.7387229176451369, "grad_norm": 0.5783482130215254, "learning_rate": 0.00014543911672177786, "loss": 12.393, "step": 13566 }, { "epoch": 0.73877737164172, "grad_norm": 0.6326278799926038, "learning_rate": 0.00014543126122081138, "loss": 12.3051, "step": 13567 }, { "epoch": 0.738831825638303, "grad_norm": 0.5975802047422007, "learning_rate": 0.00014542340536656374, "loss": 12.1247, "step": 13568 }, { "epoch": 0.738886279634886, "grad_norm": 0.6201015768827072, "learning_rate": 0.0001454155491590961, "loss": 12.2745, "step": 13569 }, { "epoch": 0.738940733631469, "grad_norm": 0.5977105967525396, "learning_rate": 0.00014540769259846953, "loss": 12.3551, "step": 13570 }, { "epoch": 0.7389951876280519, "grad_norm": 0.5992047393148686, "learning_rate": 0.00014539983568474517, "loss": 12.2941, "step": 13571 }, { "epoch": 0.7390496416246349, "grad_norm": 0.5986098983608372, "learning_rate": 0.00014539197841798403, "loss": 12.2243, "step": 13572 }, { "epoch": 0.739104095621218, "grad_norm": 0.562071760476364, "learning_rate": 0.00014538412079824728, "loss": 12.3107, "step": 13573 }, { "epoch": 0.739158549617801, "grad_norm": 0.5992289735675804, "learning_rate": 0.00014537626282559596, "loss": 12.1891, "step": 13574 }, { "epoch": 0.739213003614384, "grad_norm": 0.5848912184261011, "learning_rate": 0.00014536840450009124, "loss": 12.1678, "step": 13575 }, { "epoch": 0.739267457610967, "grad_norm": 0.5714703977133936, "learning_rate": 0.0001453605458217942, "loss": 12.2443, "step": 13576 }, { "epoch": 0.73932191160755, "grad_norm": 0.5760218558487907, "learning_rate": 0.00014535268679076595, "loss": 12.3124, "step": 13577 }, { "epoch": 0.7393763656041331, "grad_norm": 0.6493899149913374, "learning_rate": 0.00014534482740706758, "loss": 12.4153, "step": 13578 }, { "epoch": 0.7394308196007161, "grad_norm": 0.5886954646525789, "learning_rate": 0.00014533696767076023, "loss": 12.331, "step": 13579 }, { "epoch": 0.7394852735972991, "grad_norm": 0.5521788953301661, "learning_rate": 0.00014532910758190503, "loss": 12.2524, "step": 13580 }, { "epoch": 0.7395397275938821, "grad_norm": 0.5263278183981711, "learning_rate": 0.0001453212471405631, "loss": 12.3494, "step": 13581 }, { "epoch": 0.7395941815904651, "grad_norm": 0.5995637429608626, "learning_rate": 0.00014531338634679553, "loss": 12.2439, "step": 13582 }, { "epoch": 0.7396486355870481, "grad_norm": 0.6126319407071071, "learning_rate": 0.00014530552520066348, "loss": 12.3076, "step": 13583 }, { "epoch": 0.7397030895836312, "grad_norm": 0.5829210288313339, "learning_rate": 0.00014529766370222807, "loss": 12.1716, "step": 13584 }, { "epoch": 0.7397575435802142, "grad_norm": 0.5936945728405048, "learning_rate": 0.0001452898018515504, "loss": 12.2531, "step": 13585 }, { "epoch": 0.7398119975767972, "grad_norm": 0.5985701987761367, "learning_rate": 0.00014528193964869168, "loss": 12.1612, "step": 13586 }, { "epoch": 0.7398664515733802, "grad_norm": 0.5641246242590785, "learning_rate": 0.00014527407709371298, "loss": 12.1089, "step": 13587 }, { "epoch": 0.7399209055699631, "grad_norm": 0.6151635949583258, "learning_rate": 0.00014526621418667546, "loss": 12.2131, "step": 13588 }, { "epoch": 0.7399753595665461, "grad_norm": 0.6159449104099786, "learning_rate": 0.0001452583509276403, "loss": 12.302, "step": 13589 }, { "epoch": 0.7400298135631292, "grad_norm": 0.6151269426786296, "learning_rate": 0.00014525048731666858, "loss": 12.2723, "step": 13590 }, { "epoch": 0.7400842675597122, "grad_norm": 0.657999967620929, "learning_rate": 0.00014524262335382149, "loss": 12.1819, "step": 13591 }, { "epoch": 0.7401387215562952, "grad_norm": 0.5417844327833512, "learning_rate": 0.00014523475903916016, "loss": 12.2666, "step": 13592 }, { "epoch": 0.7401931755528782, "grad_norm": 0.659413396793381, "learning_rate": 0.00014522689437274577, "loss": 12.3194, "step": 13593 }, { "epoch": 0.7402476295494612, "grad_norm": 0.6522191671164157, "learning_rate": 0.00014521902935463947, "loss": 12.2682, "step": 13594 }, { "epoch": 0.7403020835460442, "grad_norm": 0.5712575409989621, "learning_rate": 0.00014521116398490241, "loss": 12.2528, "step": 13595 }, { "epoch": 0.7403565375426273, "grad_norm": 0.5925659529071102, "learning_rate": 0.00014520329826359576, "loss": 12.1879, "step": 13596 }, { "epoch": 0.7404109915392103, "grad_norm": 0.6085032636756643, "learning_rate": 0.00014519543219078068, "loss": 12.2377, "step": 13597 }, { "epoch": 0.7404654455357933, "grad_norm": 0.548249484685204, "learning_rate": 0.00014518756576651834, "loss": 12.3519, "step": 13598 }, { "epoch": 0.7405198995323763, "grad_norm": 0.5862348515271124, "learning_rate": 0.0001451796989908699, "loss": 12.3024, "step": 13599 }, { "epoch": 0.7405743535289593, "grad_norm": 0.5617435120340949, "learning_rate": 0.00014517183186389657, "loss": 12.3191, "step": 13600 }, { "epoch": 0.7406288075255423, "grad_norm": 0.5561225287567393, "learning_rate": 0.00014516396438565948, "loss": 12.2676, "step": 13601 }, { "epoch": 0.7406832615221254, "grad_norm": 0.6384067084986527, "learning_rate": 0.00014515609655621985, "loss": 12.3782, "step": 13602 }, { "epoch": 0.7407377155187084, "grad_norm": 0.530157034517588, "learning_rate": 0.00014514822837563882, "loss": 12.2542, "step": 13603 }, { "epoch": 0.7407921695152914, "grad_norm": 0.5767821558742593, "learning_rate": 0.00014514035984397757, "loss": 12.2971, "step": 13604 }, { "epoch": 0.7408466235118744, "grad_norm": 0.6036355364934566, "learning_rate": 0.00014513249096129735, "loss": 12.2806, "step": 13605 }, { "epoch": 0.7409010775084574, "grad_norm": 0.6545513956221317, "learning_rate": 0.0001451246217276593, "loss": 12.0547, "step": 13606 }, { "epoch": 0.7409555315050403, "grad_norm": 0.670134175781721, "learning_rate": 0.00014511675214312462, "loss": 12.1504, "step": 13607 }, { "epoch": 0.7410099855016234, "grad_norm": 0.6100622110757251, "learning_rate": 0.00014510888220775454, "loss": 12.2609, "step": 13608 }, { "epoch": 0.7410644394982064, "grad_norm": 0.8321259426406292, "learning_rate": 0.00014510101192161018, "loss": 12.1772, "step": 13609 }, { "epoch": 0.7411188934947894, "grad_norm": 0.5704049760476888, "learning_rate": 0.00014509314128475283, "loss": 12.2398, "step": 13610 }, { "epoch": 0.7411733474913724, "grad_norm": 0.5847939629309116, "learning_rate": 0.00014508527029724366, "loss": 12.2684, "step": 13611 }, { "epoch": 0.7412278014879554, "grad_norm": 0.5778186625862041, "learning_rate": 0.00014507739895914382, "loss": 12.2927, "step": 13612 }, { "epoch": 0.7412822554845385, "grad_norm": 0.6271163582322844, "learning_rate": 0.00014506952727051458, "loss": 12.2311, "step": 13613 }, { "epoch": 0.7413367094811215, "grad_norm": 0.569653045038958, "learning_rate": 0.00014506165523141712, "loss": 12.28, "step": 13614 }, { "epoch": 0.7413911634777045, "grad_norm": 0.6158185408598926, "learning_rate": 0.0001450537828419127, "loss": 12.2618, "step": 13615 }, { "epoch": 0.7414456174742875, "grad_norm": 0.6184929667207226, "learning_rate": 0.0001450459101020625, "loss": 12.2997, "step": 13616 }, { "epoch": 0.7415000714708705, "grad_norm": 0.5651499843666569, "learning_rate": 0.00014503803701192776, "loss": 12.228, "step": 13617 }, { "epoch": 0.7415545254674535, "grad_norm": 0.5706628227808465, "learning_rate": 0.00014503016357156969, "loss": 12.264, "step": 13618 }, { "epoch": 0.7416089794640366, "grad_norm": 0.5585073128263942, "learning_rate": 0.0001450222897810495, "loss": 12.2749, "step": 13619 }, { "epoch": 0.7416634334606196, "grad_norm": 0.6289826168740338, "learning_rate": 0.00014501441564042847, "loss": 12.3302, "step": 13620 }, { "epoch": 0.7417178874572026, "grad_norm": 0.5512294887972594, "learning_rate": 0.00014500654114976778, "loss": 12.2143, "step": 13621 }, { "epoch": 0.7417723414537856, "grad_norm": 0.6371616506621154, "learning_rate": 0.00014499866630912866, "loss": 12.4681, "step": 13622 }, { "epoch": 0.7418267954503686, "grad_norm": 0.5869525221861992, "learning_rate": 0.00014499079111857235, "loss": 12.212, "step": 13623 }, { "epoch": 0.7418812494469516, "grad_norm": 0.5600136938066297, "learning_rate": 0.00014498291557816012, "loss": 12.2693, "step": 13624 }, { "epoch": 0.7419357034435347, "grad_norm": 0.6598005867431953, "learning_rate": 0.00014497503968795324, "loss": 12.3359, "step": 13625 }, { "epoch": 0.7419901574401176, "grad_norm": 0.5939656040106014, "learning_rate": 0.00014496716344801288, "loss": 12.2251, "step": 13626 }, { "epoch": 0.7420446114367006, "grad_norm": 0.6090690177906303, "learning_rate": 0.0001449592868584003, "loss": 12.2218, "step": 13627 }, { "epoch": 0.7420990654332836, "grad_norm": 0.5774584014151134, "learning_rate": 0.00014495140991917674, "loss": 12.3191, "step": 13628 }, { "epoch": 0.7421535194298666, "grad_norm": 0.6263921067003643, "learning_rate": 0.0001449435326304035, "loss": 12.3032, "step": 13629 }, { "epoch": 0.7422079734264496, "grad_norm": 0.7222604042773875, "learning_rate": 0.00014493565499214183, "loss": 12.237, "step": 13630 }, { "epoch": 0.7422624274230327, "grad_norm": 0.6076500489786401, "learning_rate": 0.00014492777700445296, "loss": 12.3552, "step": 13631 }, { "epoch": 0.7423168814196157, "grad_norm": 0.6058780080649863, "learning_rate": 0.0001449198986673982, "loss": 12.2474, "step": 13632 }, { "epoch": 0.7423713354161987, "grad_norm": 0.5490641811425729, "learning_rate": 0.00014491201998103874, "loss": 12.2425, "step": 13633 }, { "epoch": 0.7424257894127817, "grad_norm": 0.5456156945385467, "learning_rate": 0.00014490414094543589, "loss": 12.1875, "step": 13634 }, { "epoch": 0.7424802434093647, "grad_norm": 0.6296439828779367, "learning_rate": 0.00014489626156065087, "loss": 12.244, "step": 13635 }, { "epoch": 0.7425346974059477, "grad_norm": 0.6227359546068482, "learning_rate": 0.00014488838182674503, "loss": 12.2818, "step": 13636 }, { "epoch": 0.7425891514025308, "grad_norm": 0.5959937363619823, "learning_rate": 0.00014488050174377962, "loss": 12.3168, "step": 13637 }, { "epoch": 0.7426436053991138, "grad_norm": 0.6016861050673398, "learning_rate": 0.00014487262131181587, "loss": 12.333, "step": 13638 }, { "epoch": 0.7426980593956968, "grad_norm": 0.5950905794185968, "learning_rate": 0.0001448647405309151, "loss": 12.3405, "step": 13639 }, { "epoch": 0.7427525133922798, "grad_norm": 0.5464111091884458, "learning_rate": 0.0001448568594011386, "loss": 12.1223, "step": 13640 }, { "epoch": 0.7428069673888628, "grad_norm": 0.7861765047156204, "learning_rate": 0.0001448489779225476, "loss": 12.2474, "step": 13641 }, { "epoch": 0.7428614213854458, "grad_norm": 0.6003190220576142, "learning_rate": 0.00014484109609520345, "loss": 12.2989, "step": 13642 }, { "epoch": 0.7429158753820289, "grad_norm": 0.5756507794999683, "learning_rate": 0.00014483321391916746, "loss": 12.2264, "step": 13643 }, { "epoch": 0.7429703293786118, "grad_norm": 0.6686777632634148, "learning_rate": 0.0001448253313945008, "loss": 12.2312, "step": 13644 }, { "epoch": 0.7430247833751948, "grad_norm": 0.6014856625238543, "learning_rate": 0.00014481744852126485, "loss": 12.1709, "step": 13645 }, { "epoch": 0.7430792373717778, "grad_norm": 0.6274728754383695, "learning_rate": 0.00014480956529952095, "loss": 12.1732, "step": 13646 }, { "epoch": 0.7431336913683608, "grad_norm": 0.6578097771737071, "learning_rate": 0.00014480168172933036, "loss": 12.1725, "step": 13647 }, { "epoch": 0.7431881453649439, "grad_norm": 0.6243897320773598, "learning_rate": 0.00014479379781075438, "loss": 12.2669, "step": 13648 }, { "epoch": 0.7432425993615269, "grad_norm": 0.5644404409732856, "learning_rate": 0.00014478591354385428, "loss": 12.2525, "step": 13649 }, { "epoch": 0.7432970533581099, "grad_norm": 0.5899929628289414, "learning_rate": 0.00014477802892869142, "loss": 12.3192, "step": 13650 }, { "epoch": 0.7433515073546929, "grad_norm": 0.6204189531105295, "learning_rate": 0.0001447701439653271, "loss": 12.2539, "step": 13651 }, { "epoch": 0.7434059613512759, "grad_norm": 0.5978774508167788, "learning_rate": 0.00014476225865382264, "loss": 12.2512, "step": 13652 }, { "epoch": 0.7434604153478589, "grad_norm": 0.5607476504150831, "learning_rate": 0.00014475437299423937, "loss": 12.4198, "step": 13653 }, { "epoch": 0.743514869344442, "grad_norm": 0.7064522212229157, "learning_rate": 0.00014474648698663856, "loss": 12.3364, "step": 13654 }, { "epoch": 0.743569323341025, "grad_norm": 0.5292842905349883, "learning_rate": 0.00014473860063108157, "loss": 12.2842, "step": 13655 }, { "epoch": 0.743623777337608, "grad_norm": 0.6604362435987455, "learning_rate": 0.0001447307139276297, "loss": 12.2738, "step": 13656 }, { "epoch": 0.743678231334191, "grad_norm": 0.6411131805921383, "learning_rate": 0.00014472282687634432, "loss": 12.3607, "step": 13657 }, { "epoch": 0.743732685330774, "grad_norm": 0.5740431821060552, "learning_rate": 0.00014471493947728673, "loss": 12.2158, "step": 13658 }, { "epoch": 0.743787139327357, "grad_norm": 0.650154708336648, "learning_rate": 0.00014470705173051827, "loss": 12.3459, "step": 13659 }, { "epoch": 0.7438415933239401, "grad_norm": 0.6166673090641452, "learning_rate": 0.0001446991636361003, "loss": 12.2296, "step": 13660 }, { "epoch": 0.7438960473205231, "grad_norm": 0.6630240435802148, "learning_rate": 0.00014469127519409414, "loss": 12.2581, "step": 13661 }, { "epoch": 0.743950501317106, "grad_norm": 0.6285502485037263, "learning_rate": 0.0001446833864045611, "loss": 12.3641, "step": 13662 }, { "epoch": 0.744004955313689, "grad_norm": 0.6243840836632044, "learning_rate": 0.00014467549726756256, "loss": 12.2185, "step": 13663 }, { "epoch": 0.744059409310272, "grad_norm": 0.594732247309961, "learning_rate": 0.00014466760778315986, "loss": 12.1288, "step": 13664 }, { "epoch": 0.744113863306855, "grad_norm": 0.5717664112534702, "learning_rate": 0.00014465971795141436, "loss": 12.2183, "step": 13665 }, { "epoch": 0.7441683173034381, "grad_norm": 0.6705905603648593, "learning_rate": 0.0001446518277723874, "loss": 12.3219, "step": 13666 }, { "epoch": 0.7442227713000211, "grad_norm": 0.6370616353146389, "learning_rate": 0.00014464393724614033, "loss": 12.3651, "step": 13667 }, { "epoch": 0.7442772252966041, "grad_norm": 0.558520232425937, "learning_rate": 0.00014463604637273453, "loss": 12.2007, "step": 13668 }, { "epoch": 0.7443316792931871, "grad_norm": 0.6498084660186766, "learning_rate": 0.00014462815515223137, "loss": 12.3098, "step": 13669 }, { "epoch": 0.7443861332897701, "grad_norm": 0.7689024411652365, "learning_rate": 0.00014462026358469214, "loss": 12.232, "step": 13670 }, { "epoch": 0.7444405872863531, "grad_norm": 0.5720853515258976, "learning_rate": 0.0001446123716701783, "loss": 12.2595, "step": 13671 }, { "epoch": 0.7444950412829362, "grad_norm": 0.5988148788680487, "learning_rate": 0.00014460447940875114, "loss": 12.3409, "step": 13672 }, { "epoch": 0.7445494952795192, "grad_norm": 0.7856904621089689, "learning_rate": 0.0001445965868004721, "loss": 12.3772, "step": 13673 }, { "epoch": 0.7446039492761022, "grad_norm": 0.7043842824252822, "learning_rate": 0.0001445886938454025, "loss": 12.2397, "step": 13674 }, { "epoch": 0.7446584032726852, "grad_norm": 0.5913036266727861, "learning_rate": 0.00014458080054360374, "loss": 12.2747, "step": 13675 }, { "epoch": 0.7447128572692682, "grad_norm": 0.6960974142798146, "learning_rate": 0.00014457290689513723, "loss": 12.3003, "step": 13676 }, { "epoch": 0.7447673112658513, "grad_norm": 0.6087124615164062, "learning_rate": 0.00014456501290006427, "loss": 12.2339, "step": 13677 }, { "epoch": 0.7448217652624343, "grad_norm": 0.5571231214937553, "learning_rate": 0.00014455711855844636, "loss": 12.1596, "step": 13678 }, { "epoch": 0.7448762192590173, "grad_norm": 0.5879599096930934, "learning_rate": 0.00014454922387034476, "loss": 12.2023, "step": 13679 }, { "epoch": 0.7449306732556003, "grad_norm": 0.6587926585973795, "learning_rate": 0.00014454132883582097, "loss": 12.3255, "step": 13680 }, { "epoch": 0.7449851272521832, "grad_norm": 0.5565501984200791, "learning_rate": 0.00014453343345493633, "loss": 12.2422, "step": 13681 }, { "epoch": 0.7450395812487662, "grad_norm": 0.6054602481476327, "learning_rate": 0.00014452553772775225, "loss": 12.1699, "step": 13682 }, { "epoch": 0.7450940352453493, "grad_norm": 0.6502136729803876, "learning_rate": 0.00014451764165433008, "loss": 12.0922, "step": 13683 }, { "epoch": 0.7451484892419323, "grad_norm": 0.5510847853537779, "learning_rate": 0.0001445097452347313, "loss": 12.1959, "step": 13684 }, { "epoch": 0.7452029432385153, "grad_norm": 0.5850713060797119, "learning_rate": 0.00014450184846901724, "loss": 12.1878, "step": 13685 }, { "epoch": 0.7452573972350983, "grad_norm": 0.6319530785149593, "learning_rate": 0.00014449395135724937, "loss": 12.264, "step": 13686 }, { "epoch": 0.7453118512316813, "grad_norm": 0.554872046596875, "learning_rate": 0.0001444860538994891, "loss": 12.1727, "step": 13687 }, { "epoch": 0.7453663052282643, "grad_norm": 0.5718499206730574, "learning_rate": 0.00014447815609579777, "loss": 12.1314, "step": 13688 }, { "epoch": 0.7454207592248474, "grad_norm": 0.5848613944881548, "learning_rate": 0.00014447025794623686, "loss": 12.189, "step": 13689 }, { "epoch": 0.7454752132214304, "grad_norm": 0.5843558986303549, "learning_rate": 0.00014446235945086774, "loss": 12.3481, "step": 13690 }, { "epoch": 0.7455296672180134, "grad_norm": 0.5790485101159226, "learning_rate": 0.0001444544606097519, "loss": 12.2546, "step": 13691 }, { "epoch": 0.7455841212145964, "grad_norm": 0.5908234248192212, "learning_rate": 0.0001444465614229507, "loss": 12.1865, "step": 13692 }, { "epoch": 0.7456385752111794, "grad_norm": 0.5979218535232742, "learning_rate": 0.00014443866189052559, "loss": 12.2428, "step": 13693 }, { "epoch": 0.7456930292077624, "grad_norm": 0.5620026112583111, "learning_rate": 0.00014443076201253795, "loss": 12.3382, "step": 13694 }, { "epoch": 0.7457474832043455, "grad_norm": 0.6823118955874878, "learning_rate": 0.00014442286178904928, "loss": 12.325, "step": 13695 }, { "epoch": 0.7458019372009285, "grad_norm": 0.6067039441439942, "learning_rate": 0.000144414961220121, "loss": 12.3408, "step": 13696 }, { "epoch": 0.7458563911975115, "grad_norm": 0.7007921904958164, "learning_rate": 0.00014440706030581456, "loss": 12.3337, "step": 13697 }, { "epoch": 0.7459108451940945, "grad_norm": 0.6113298266092536, "learning_rate": 0.00014439915904619134, "loss": 12.2801, "step": 13698 }, { "epoch": 0.7459652991906774, "grad_norm": 0.5916639524324443, "learning_rate": 0.00014439125744131282, "loss": 12.1633, "step": 13699 }, { "epoch": 0.7460197531872604, "grad_norm": 0.604444000681318, "learning_rate": 0.0001443833554912404, "loss": 12.315, "step": 13700 }, { "epoch": 0.7460742071838435, "grad_norm": 0.5307951087338542, "learning_rate": 0.0001443754531960356, "loss": 12.1628, "step": 13701 }, { "epoch": 0.7461286611804265, "grad_norm": 0.6424748872034338, "learning_rate": 0.00014436755055575984, "loss": 12.2518, "step": 13702 }, { "epoch": 0.7461831151770095, "grad_norm": 0.5858372985468218, "learning_rate": 0.00014435964757047458, "loss": 12.1831, "step": 13703 }, { "epoch": 0.7462375691735925, "grad_norm": 0.7016042162035304, "learning_rate": 0.00014435174424024124, "loss": 12.3785, "step": 13704 }, { "epoch": 0.7462920231701755, "grad_norm": 0.5351112745115311, "learning_rate": 0.00014434384056512126, "loss": 12.0123, "step": 13705 }, { "epoch": 0.7463464771667585, "grad_norm": 0.6004125768755129, "learning_rate": 0.00014433593654517618, "loss": 12.2861, "step": 13706 }, { "epoch": 0.7464009311633416, "grad_norm": 0.5993177925145355, "learning_rate": 0.00014432803218046746, "loss": 12.265, "step": 13707 }, { "epoch": 0.7464553851599246, "grad_norm": 0.50355922016749, "learning_rate": 0.00014432012747105647, "loss": 12.0824, "step": 13708 }, { "epoch": 0.7465098391565076, "grad_norm": 0.5574649579642977, "learning_rate": 0.00014431222241700475, "loss": 12.1988, "step": 13709 }, { "epoch": 0.7465642931530906, "grad_norm": 0.6904411913829945, "learning_rate": 0.00014430431701837376, "loss": 12.2252, "step": 13710 }, { "epoch": 0.7466187471496736, "grad_norm": 0.5940225992786953, "learning_rate": 0.00014429641127522495, "loss": 11.9661, "step": 13711 }, { "epoch": 0.7466732011462567, "grad_norm": 0.5574148268284755, "learning_rate": 0.00014428850518761986, "loss": 12.318, "step": 13712 }, { "epoch": 0.7467276551428397, "grad_norm": 0.5236783246576344, "learning_rate": 0.0001442805987556199, "loss": 12.0991, "step": 13713 }, { "epoch": 0.7467821091394227, "grad_norm": 0.5820653271899563, "learning_rate": 0.0001442726919792866, "loss": 12.3288, "step": 13714 }, { "epoch": 0.7468365631360057, "grad_norm": 0.5932316400669257, "learning_rate": 0.0001442647848586814, "loss": 12.3073, "step": 13715 }, { "epoch": 0.7468910171325887, "grad_norm": 0.6467755435657985, "learning_rate": 0.0001442568773938658, "loss": 12.1978, "step": 13716 }, { "epoch": 0.7469454711291716, "grad_norm": 0.5516042158353747, "learning_rate": 0.00014424896958490133, "loss": 12.0761, "step": 13717 }, { "epoch": 0.7469999251257547, "grad_norm": 0.5613663038502804, "learning_rate": 0.00014424106143184944, "loss": 12.187, "step": 13718 }, { "epoch": 0.7470543791223377, "grad_norm": 0.6013135202519371, "learning_rate": 0.00014423315293477163, "loss": 12.3413, "step": 13719 }, { "epoch": 0.7471088331189207, "grad_norm": 0.5628113838646053, "learning_rate": 0.0001442252440937294, "loss": 12.2496, "step": 13720 }, { "epoch": 0.7471632871155037, "grad_norm": 0.5904699220530979, "learning_rate": 0.00014421733490878425, "loss": 12.2199, "step": 13721 }, { "epoch": 0.7472177411120867, "grad_norm": 0.6257160288719434, "learning_rate": 0.00014420942537999773, "loss": 12.2555, "step": 13722 }, { "epoch": 0.7472721951086697, "grad_norm": 0.5439262444923114, "learning_rate": 0.00014420151550743125, "loss": 12.2134, "step": 13723 }, { "epoch": 0.7473266491052528, "grad_norm": 0.6459417849868265, "learning_rate": 0.00014419360529114642, "loss": 12.2833, "step": 13724 }, { "epoch": 0.7473811031018358, "grad_norm": 0.62630629000918, "learning_rate": 0.00014418569473120468, "loss": 12.2979, "step": 13725 }, { "epoch": 0.7474355570984188, "grad_norm": 0.5777191040932425, "learning_rate": 0.00014417778382766757, "loss": 12.211, "step": 13726 }, { "epoch": 0.7474900110950018, "grad_norm": 0.6563049547344048, "learning_rate": 0.00014416987258059663, "loss": 12.3766, "step": 13727 }, { "epoch": 0.7475444650915848, "grad_norm": 0.530946466034582, "learning_rate": 0.0001441619609900533, "loss": 12.187, "step": 13728 }, { "epoch": 0.7475989190881678, "grad_norm": 0.5851608738144949, "learning_rate": 0.0001441540490560992, "loss": 12.1812, "step": 13729 }, { "epoch": 0.7476533730847509, "grad_norm": 0.623454123044338, "learning_rate": 0.0001441461367787958, "loss": 12.2312, "step": 13730 }, { "epoch": 0.7477078270813339, "grad_norm": 0.6151173822258862, "learning_rate": 0.00014413822415820465, "loss": 12.272, "step": 13731 }, { "epoch": 0.7477622810779169, "grad_norm": 0.6270601805006041, "learning_rate": 0.00014413031119438723, "loss": 12.2757, "step": 13732 }, { "epoch": 0.7478167350744999, "grad_norm": 0.5982856037425696, "learning_rate": 0.00014412239788740513, "loss": 12.2053, "step": 13733 }, { "epoch": 0.7478711890710829, "grad_norm": 0.6499148875444776, "learning_rate": 0.00014411448423731985, "loss": 12.3002, "step": 13734 }, { "epoch": 0.7479256430676658, "grad_norm": 0.566224351321186, "learning_rate": 0.00014410657024419295, "loss": 12.1763, "step": 13735 }, { "epoch": 0.747980097064249, "grad_norm": 0.6627051566300732, "learning_rate": 0.00014409865590808598, "loss": 12.2677, "step": 13736 }, { "epoch": 0.7480345510608319, "grad_norm": 0.646344114628114, "learning_rate": 0.00014409074122906048, "loss": 12.2699, "step": 13737 }, { "epoch": 0.7480890050574149, "grad_norm": 0.5937235560428703, "learning_rate": 0.00014408282620717794, "loss": 12.2404, "step": 13738 }, { "epoch": 0.7481434590539979, "grad_norm": 0.5806014220082066, "learning_rate": 0.00014407491084249995, "loss": 12.2855, "step": 13739 }, { "epoch": 0.7481979130505809, "grad_norm": 0.5659780798715816, "learning_rate": 0.0001440669951350881, "loss": 12.3017, "step": 13740 }, { "epoch": 0.7482523670471639, "grad_norm": 0.6227277218313976, "learning_rate": 0.00014405907908500388, "loss": 12.2454, "step": 13741 }, { "epoch": 0.748306821043747, "grad_norm": 0.6664710496279762, "learning_rate": 0.0001440511626923089, "loss": 12.3328, "step": 13742 }, { "epoch": 0.74836127504033, "grad_norm": 0.5970770618016265, "learning_rate": 0.00014404324595706464, "loss": 12.2713, "step": 13743 }, { "epoch": 0.748415729036913, "grad_norm": 0.578291694869162, "learning_rate": 0.00014403532887933274, "loss": 12.3062, "step": 13744 }, { "epoch": 0.748470183033496, "grad_norm": 0.6416824753124646, "learning_rate": 0.00014402741145917475, "loss": 12.0758, "step": 13745 }, { "epoch": 0.748524637030079, "grad_norm": 0.5655552786892055, "learning_rate": 0.00014401949369665222, "loss": 12.1638, "step": 13746 }, { "epoch": 0.7485790910266621, "grad_norm": 0.6710049036354784, "learning_rate": 0.00014401157559182674, "loss": 12.285, "step": 13747 }, { "epoch": 0.7486335450232451, "grad_norm": 0.6801326451669022, "learning_rate": 0.00014400365714475986, "loss": 12.3959, "step": 13748 }, { "epoch": 0.7486879990198281, "grad_norm": 0.5368301698649847, "learning_rate": 0.00014399573835551313, "loss": 12.2605, "step": 13749 }, { "epoch": 0.7487424530164111, "grad_norm": 0.6540587730560741, "learning_rate": 0.00014398781922414817, "loss": 12.3747, "step": 13750 }, { "epoch": 0.7487969070129941, "grad_norm": 0.6016638900317841, "learning_rate": 0.00014397989975072656, "loss": 12.3126, "step": 13751 }, { "epoch": 0.748851361009577, "grad_norm": 0.555668289888147, "learning_rate": 0.0001439719799353099, "loss": 12.3146, "step": 13752 }, { "epoch": 0.7489058150061602, "grad_norm": 0.6973329845200757, "learning_rate": 0.00014396405977795972, "loss": 12.2642, "step": 13753 }, { "epoch": 0.7489602690027432, "grad_norm": 0.5280970056108852, "learning_rate": 0.00014395613927873765, "loss": 12.2146, "step": 13754 }, { "epoch": 0.7490147229993261, "grad_norm": 0.5899667507355091, "learning_rate": 0.00014394821843770526, "loss": 12.2097, "step": 13755 }, { "epoch": 0.7490691769959091, "grad_norm": 0.5202820450953016, "learning_rate": 0.00014394029725492416, "loss": 12.2154, "step": 13756 }, { "epoch": 0.7491236309924921, "grad_norm": 0.5798168456983885, "learning_rate": 0.00014393237573045596, "loss": 12.2218, "step": 13757 }, { "epoch": 0.7491780849890751, "grad_norm": 0.5757177192193262, "learning_rate": 0.0001439244538643622, "loss": 12.1298, "step": 13758 }, { "epoch": 0.7492325389856582, "grad_norm": 0.5439117109561773, "learning_rate": 0.00014391653165670454, "loss": 12.2106, "step": 13759 }, { "epoch": 0.7492869929822412, "grad_norm": 0.5665992283629767, "learning_rate": 0.00014390860910754453, "loss": 12.2342, "step": 13760 }, { "epoch": 0.7493414469788242, "grad_norm": 0.610271080039589, "learning_rate": 0.00014390068621694387, "loss": 12.2926, "step": 13761 }, { "epoch": 0.7493959009754072, "grad_norm": 0.5701767578829343, "learning_rate": 0.0001438927629849641, "loss": 12.23, "step": 13762 }, { "epoch": 0.7494503549719902, "grad_norm": 0.7850785013536542, "learning_rate": 0.00014388483941166682, "loss": 12.2746, "step": 13763 }, { "epoch": 0.7495048089685732, "grad_norm": 0.6326742294350892, "learning_rate": 0.0001438769154971137, "loss": 12.2667, "step": 13764 }, { "epoch": 0.7495592629651563, "grad_norm": 0.5210535123007397, "learning_rate": 0.0001438689912413663, "loss": 12.2196, "step": 13765 }, { "epoch": 0.7496137169617393, "grad_norm": 0.5766175886374203, "learning_rate": 0.00014386106664448625, "loss": 12.2693, "step": 13766 }, { "epoch": 0.7496681709583223, "grad_norm": 0.6178574912384032, "learning_rate": 0.00014385314170653523, "loss": 12.252, "step": 13767 }, { "epoch": 0.7497226249549053, "grad_norm": 0.5915591393770658, "learning_rate": 0.0001438452164275748, "loss": 12.2551, "step": 13768 }, { "epoch": 0.7497770789514883, "grad_norm": 0.6269592287242665, "learning_rate": 0.00014383729080766664, "loss": 12.2719, "step": 13769 }, { "epoch": 0.7498315329480713, "grad_norm": 0.5609303240083141, "learning_rate": 0.00014382936484687233, "loss": 12.1725, "step": 13770 }, { "epoch": 0.7498859869446544, "grad_norm": 0.6438885235253843, "learning_rate": 0.00014382143854525353, "loss": 12.333, "step": 13771 }, { "epoch": 0.7499404409412374, "grad_norm": 0.5705368582587276, "learning_rate": 0.00014381351190287188, "loss": 12.2322, "step": 13772 }, { "epoch": 0.7499948949378203, "grad_norm": 0.6498876610872063, "learning_rate": 0.000143805584919789, "loss": 12.3526, "step": 13773 }, { "epoch": 0.7500493489344033, "grad_norm": 0.5829378708822657, "learning_rate": 0.00014379765759606658, "loss": 12.146, "step": 13774 }, { "epoch": 0.7501038029309863, "grad_norm": 0.5781624917520148, "learning_rate": 0.00014378972993176622, "loss": 12.2652, "step": 13775 }, { "epoch": 0.7501582569275693, "grad_norm": 0.5605017438627762, "learning_rate": 0.00014378180192694957, "loss": 12.1909, "step": 13776 }, { "epoch": 0.7502127109241524, "grad_norm": 0.5589222728784693, "learning_rate": 0.00014377387358167828, "loss": 12.159, "step": 13777 }, { "epoch": 0.7502671649207354, "grad_norm": 0.5655748541001888, "learning_rate": 0.00014376594489601402, "loss": 12.1719, "step": 13778 }, { "epoch": 0.7503216189173184, "grad_norm": 0.9229516569274044, "learning_rate": 0.00014375801587001842, "loss": 12.1961, "step": 13779 }, { "epoch": 0.7503760729139014, "grad_norm": 0.5190587246726092, "learning_rate": 0.00014375008650375313, "loss": 12.1197, "step": 13780 }, { "epoch": 0.7504305269104844, "grad_norm": 0.5842286466771999, "learning_rate": 0.0001437421567972799, "loss": 12.0913, "step": 13781 }, { "epoch": 0.7504849809070675, "grad_norm": 0.5366466263553302, "learning_rate": 0.00014373422675066023, "loss": 12.1292, "step": 13782 }, { "epoch": 0.7505394349036505, "grad_norm": 0.6292072846094948, "learning_rate": 0.00014372629636395597, "loss": 12.3445, "step": 13783 }, { "epoch": 0.7505938889002335, "grad_norm": 0.6518660654831823, "learning_rate": 0.00014371836563722865, "loss": 12.3205, "step": 13784 }, { "epoch": 0.7506483428968165, "grad_norm": 0.5621702149563956, "learning_rate": 0.00014371043457054, "loss": 12.3394, "step": 13785 }, { "epoch": 0.7507027968933995, "grad_norm": 0.5716000303103266, "learning_rate": 0.00014370250316395167, "loss": 12.2124, "step": 13786 }, { "epoch": 0.7507572508899825, "grad_norm": 0.527542076552203, "learning_rate": 0.00014369457141752534, "loss": 12.2448, "step": 13787 }, { "epoch": 0.7508117048865656, "grad_norm": 0.6303658246608846, "learning_rate": 0.0001436866393313227, "loss": 12.3021, "step": 13788 }, { "epoch": 0.7508661588831486, "grad_norm": 0.5276474677797767, "learning_rate": 0.00014367870690540544, "loss": 12.2299, "step": 13789 }, { "epoch": 0.7509206128797316, "grad_norm": 0.6712221790438491, "learning_rate": 0.00014367077413983523, "loss": 12.2085, "step": 13790 }, { "epoch": 0.7509750668763145, "grad_norm": 0.534081814151848, "learning_rate": 0.00014366284103467373, "loss": 12.1906, "step": 13791 }, { "epoch": 0.7510295208728975, "grad_norm": 0.5951491738825837, "learning_rate": 0.0001436549075899827, "loss": 12.2514, "step": 13792 }, { "epoch": 0.7510839748694805, "grad_norm": 0.6803997052913654, "learning_rate": 0.00014364697380582375, "loss": 12.2647, "step": 13793 }, { "epoch": 0.7511384288660636, "grad_norm": 0.5874316064236298, "learning_rate": 0.00014363903968225863, "loss": 12.3495, "step": 13794 }, { "epoch": 0.7511928828626466, "grad_norm": 0.5356708641919317, "learning_rate": 0.000143631105219349, "loss": 12.2118, "step": 13795 }, { "epoch": 0.7512473368592296, "grad_norm": 0.6295249693626224, "learning_rate": 0.00014362317041715657, "loss": 12.1805, "step": 13796 }, { "epoch": 0.7513017908558126, "grad_norm": 1.03868303664006, "learning_rate": 0.00014361523527574307, "loss": 12.259, "step": 13797 }, { "epoch": 0.7513562448523956, "grad_norm": 0.6257302393342892, "learning_rate": 0.0001436072997951702, "loss": 12.257, "step": 13798 }, { "epoch": 0.7514106988489786, "grad_norm": 0.623335943617568, "learning_rate": 0.00014359936397549965, "loss": 12.3552, "step": 13799 }, { "epoch": 0.7514651528455617, "grad_norm": 0.5684967086416157, "learning_rate": 0.00014359142781679313, "loss": 12.169, "step": 13800 }, { "epoch": 0.7515196068421447, "grad_norm": 0.5688960037551076, "learning_rate": 0.00014358349131911234, "loss": 12.3163, "step": 13801 }, { "epoch": 0.7515740608387277, "grad_norm": 0.5842685732263988, "learning_rate": 0.00014357555448251902, "loss": 12.2271, "step": 13802 }, { "epoch": 0.7516285148353107, "grad_norm": 0.6143806368491119, "learning_rate": 0.00014356761730707489, "loss": 12.3893, "step": 13803 }, { "epoch": 0.7516829688318937, "grad_norm": 0.6554362600326484, "learning_rate": 0.00014355967979284167, "loss": 12.2427, "step": 13804 }, { "epoch": 0.7517374228284767, "grad_norm": 0.6311453226982938, "learning_rate": 0.00014355174193988107, "loss": 12.3582, "step": 13805 }, { "epoch": 0.7517918768250598, "grad_norm": 0.6346421035651231, "learning_rate": 0.0001435438037482548, "loss": 12.2786, "step": 13806 }, { "epoch": 0.7518463308216428, "grad_norm": 0.6117921300048075, "learning_rate": 0.00014353586521802461, "loss": 12.2096, "step": 13807 }, { "epoch": 0.7519007848182258, "grad_norm": 0.6019476080731878, "learning_rate": 0.0001435279263492523, "loss": 12.3236, "step": 13808 }, { "epoch": 0.7519552388148087, "grad_norm": 0.7514865124107512, "learning_rate": 0.00014351998714199943, "loss": 12.3547, "step": 13809 }, { "epoch": 0.7520096928113917, "grad_norm": 0.5646877462273913, "learning_rate": 0.0001435120475963279, "loss": 12.2639, "step": 13810 }, { "epoch": 0.7520641468079748, "grad_norm": 0.5891752717887218, "learning_rate": 0.0001435041077122994, "loss": 12.3077, "step": 13811 }, { "epoch": 0.7521186008045578, "grad_norm": 0.5862291042415606, "learning_rate": 0.00014349616748997562, "loss": 12.2445, "step": 13812 }, { "epoch": 0.7521730548011408, "grad_norm": 0.614589247223749, "learning_rate": 0.00014348822692941842, "loss": 12.3795, "step": 13813 }, { "epoch": 0.7522275087977238, "grad_norm": 0.5813380916070603, "learning_rate": 0.00014348028603068942, "loss": 12.2293, "step": 13814 }, { "epoch": 0.7522819627943068, "grad_norm": 0.63080789984502, "learning_rate": 0.0001434723447938504, "loss": 12.2895, "step": 13815 }, { "epoch": 0.7523364167908898, "grad_norm": 0.5631956520829646, "learning_rate": 0.00014346440321896318, "loss": 12.2301, "step": 13816 }, { "epoch": 0.7523908707874729, "grad_norm": 0.5276970829388432, "learning_rate": 0.00014345646130608944, "loss": 12.0721, "step": 13817 }, { "epoch": 0.7524453247840559, "grad_norm": 0.5801900867009577, "learning_rate": 0.00014344851905529103, "loss": 12.2662, "step": 13818 }, { "epoch": 0.7524997787806389, "grad_norm": 0.620601218168845, "learning_rate": 0.0001434405764666296, "loss": 12.3432, "step": 13819 }, { "epoch": 0.7525542327772219, "grad_norm": 0.5993138059551066, "learning_rate": 0.00014343263354016695, "loss": 12.246, "step": 13820 }, { "epoch": 0.7526086867738049, "grad_norm": 0.5793428284321268, "learning_rate": 0.00014342469027596487, "loss": 12.2635, "step": 13821 }, { "epoch": 0.7526631407703879, "grad_norm": 0.626149109871221, "learning_rate": 0.00014341674667408513, "loss": 12.0145, "step": 13822 }, { "epoch": 0.752717594766971, "grad_norm": 0.6405112662502047, "learning_rate": 0.0001434088027345895, "loss": 12.463, "step": 13823 }, { "epoch": 0.752772048763554, "grad_norm": 0.6023957152504208, "learning_rate": 0.00014340085845753972, "loss": 12.1497, "step": 13824 }, { "epoch": 0.752826502760137, "grad_norm": 0.6355489896387884, "learning_rate": 0.0001433929138429976, "loss": 12.459, "step": 13825 }, { "epoch": 0.75288095675672, "grad_norm": 0.6425241440639089, "learning_rate": 0.00014338496889102487, "loss": 12.311, "step": 13826 }, { "epoch": 0.752935410753303, "grad_norm": 0.6167990707777722, "learning_rate": 0.00014337702360168336, "loss": 12.1696, "step": 13827 }, { "epoch": 0.7529898647498859, "grad_norm": 0.6539399435350298, "learning_rate": 0.00014336907797503487, "loss": 12.2048, "step": 13828 }, { "epoch": 0.753044318746469, "grad_norm": 0.6056473383463385, "learning_rate": 0.00014336113201114114, "loss": 12.1473, "step": 13829 }, { "epoch": 0.753098772743052, "grad_norm": 0.5535186851656467, "learning_rate": 0.00014335318571006398, "loss": 12.2927, "step": 13830 }, { "epoch": 0.753153226739635, "grad_norm": 0.6022086596989811, "learning_rate": 0.00014334523907186513, "loss": 12.2001, "step": 13831 }, { "epoch": 0.753207680736218, "grad_norm": 0.591273556250499, "learning_rate": 0.00014333729209660648, "loss": 12.199, "step": 13832 }, { "epoch": 0.753262134732801, "grad_norm": 0.5760100490032448, "learning_rate": 0.00014332934478434982, "loss": 12.1534, "step": 13833 }, { "epoch": 0.753316588729384, "grad_norm": 0.6027884470696825, "learning_rate": 0.00014332139713515684, "loss": 12.3531, "step": 13834 }, { "epoch": 0.7533710427259671, "grad_norm": 0.6205928858416357, "learning_rate": 0.00014331344914908942, "loss": 12.2737, "step": 13835 }, { "epoch": 0.7534254967225501, "grad_norm": 0.5886492463854501, "learning_rate": 0.00014330550082620937, "loss": 12.263, "step": 13836 }, { "epoch": 0.7534799507191331, "grad_norm": 0.5914234570101967, "learning_rate": 0.00014329755216657849, "loss": 12.0383, "step": 13837 }, { "epoch": 0.7535344047157161, "grad_norm": 0.6410193708975239, "learning_rate": 0.00014328960317025856, "loss": 12.281, "step": 13838 }, { "epoch": 0.7535888587122991, "grad_norm": 0.5314330429599432, "learning_rate": 0.00014328165383731145, "loss": 12.2602, "step": 13839 }, { "epoch": 0.7536433127088821, "grad_norm": 0.540957434758258, "learning_rate": 0.0001432737041677989, "loss": 12.2461, "step": 13840 }, { "epoch": 0.7536977667054652, "grad_norm": 0.5432546815104532, "learning_rate": 0.00014326575416178278, "loss": 12.1897, "step": 13841 }, { "epoch": 0.7537522207020482, "grad_norm": 0.5541501713891164, "learning_rate": 0.00014325780381932492, "loss": 12.164, "step": 13842 }, { "epoch": 0.7538066746986312, "grad_norm": 0.7697095586952005, "learning_rate": 0.0001432498531404871, "loss": 12.1623, "step": 13843 }, { "epoch": 0.7538611286952142, "grad_norm": 0.49945314514455, "learning_rate": 0.0001432419021253312, "loss": 12.2043, "step": 13844 }, { "epoch": 0.7539155826917971, "grad_norm": 0.5374440022342415, "learning_rate": 0.000143233950773919, "loss": 12.1716, "step": 13845 }, { "epoch": 0.7539700366883803, "grad_norm": 0.7339500382749483, "learning_rate": 0.00014322599908631232, "loss": 12.3037, "step": 13846 }, { "epoch": 0.7540244906849632, "grad_norm": 0.5815419891307385, "learning_rate": 0.00014321804706257307, "loss": 12.2847, "step": 13847 }, { "epoch": 0.7540789446815462, "grad_norm": 0.5297258689843342, "learning_rate": 0.00014321009470276303, "loss": 12.1873, "step": 13848 }, { "epoch": 0.7541333986781292, "grad_norm": 0.6692806498141518, "learning_rate": 0.000143202142006944, "loss": 12.3004, "step": 13849 }, { "epoch": 0.7541878526747122, "grad_norm": 0.517943893944968, "learning_rate": 0.00014319418897517792, "loss": 12.1011, "step": 13850 }, { "epoch": 0.7542423066712952, "grad_norm": 0.5772763106403177, "learning_rate": 0.00014318623560752658, "loss": 12.3086, "step": 13851 }, { "epoch": 0.7542967606678783, "grad_norm": 0.6149695725607878, "learning_rate": 0.0001431782819040518, "loss": 12.2392, "step": 13852 }, { "epoch": 0.7543512146644613, "grad_norm": 0.6400262055660713, "learning_rate": 0.00014317032786481548, "loss": 12.4411, "step": 13853 }, { "epoch": 0.7544056686610443, "grad_norm": 0.5576986266208344, "learning_rate": 0.00014316237348987944, "loss": 12.2759, "step": 13854 }, { "epoch": 0.7544601226576273, "grad_norm": 0.6692229239456416, "learning_rate": 0.00014315441877930555, "loss": 12.2574, "step": 13855 }, { "epoch": 0.7545145766542103, "grad_norm": 0.6281993479852913, "learning_rate": 0.0001431464637331557, "loss": 12.3431, "step": 13856 }, { "epoch": 0.7545690306507933, "grad_norm": 0.5816207313651164, "learning_rate": 0.00014313850835149166, "loss": 12.1529, "step": 13857 }, { "epoch": 0.7546234846473764, "grad_norm": 0.6051148573253917, "learning_rate": 0.00014313055263437535, "loss": 12.3998, "step": 13858 }, { "epoch": 0.7546779386439594, "grad_norm": 0.557618965382382, "learning_rate": 0.00014312259658186865, "loss": 12.2016, "step": 13859 }, { "epoch": 0.7547323926405424, "grad_norm": 0.5795972098301391, "learning_rate": 0.0001431146401940334, "loss": 12.2748, "step": 13860 }, { "epoch": 0.7547868466371254, "grad_norm": 0.5661625741258683, "learning_rate": 0.00014310668347093146, "loss": 12.1477, "step": 13861 }, { "epoch": 0.7548413006337084, "grad_norm": 0.5847540858969881, "learning_rate": 0.00014309872641262475, "loss": 12.2328, "step": 13862 }, { "epoch": 0.7548957546302913, "grad_norm": 0.5954478677911897, "learning_rate": 0.00014309076901917512, "loss": 12.2756, "step": 13863 }, { "epoch": 0.7549502086268745, "grad_norm": 0.5042730004768833, "learning_rate": 0.00014308281129064442, "loss": 12.159, "step": 13864 }, { "epoch": 0.7550046626234574, "grad_norm": 0.6458120398598242, "learning_rate": 0.00014307485322709456, "loss": 12.1989, "step": 13865 }, { "epoch": 0.7550591166200404, "grad_norm": 0.5819826806870918, "learning_rate": 0.00014306689482858738, "loss": 12.1708, "step": 13866 }, { "epoch": 0.7551135706166234, "grad_norm": 0.552542592845485, "learning_rate": 0.00014305893609518487, "loss": 12.1174, "step": 13867 }, { "epoch": 0.7551680246132064, "grad_norm": 0.6422678536801202, "learning_rate": 0.00014305097702694883, "loss": 12.2365, "step": 13868 }, { "epoch": 0.7552224786097894, "grad_norm": 0.5456608259492738, "learning_rate": 0.00014304301762394116, "loss": 12.2812, "step": 13869 }, { "epoch": 0.7552769326063725, "grad_norm": 0.5815679736576249, "learning_rate": 0.00014303505788622374, "loss": 12.2574, "step": 13870 }, { "epoch": 0.7553313866029555, "grad_norm": 0.6311052988627255, "learning_rate": 0.00014302709781385855, "loss": 12.2308, "step": 13871 }, { "epoch": 0.7553858405995385, "grad_norm": 0.6006336352942205, "learning_rate": 0.0001430191374069074, "loss": 12.3778, "step": 13872 }, { "epoch": 0.7554402945961215, "grad_norm": 0.6255333097598189, "learning_rate": 0.00014301117666543225, "loss": 11.9867, "step": 13873 }, { "epoch": 0.7554947485927045, "grad_norm": 0.5808851218717448, "learning_rate": 0.00014300321558949496, "loss": 12.172, "step": 13874 }, { "epoch": 0.7555492025892875, "grad_norm": 0.661725093009846, "learning_rate": 0.00014299525417915744, "loss": 12.3623, "step": 13875 }, { "epoch": 0.7556036565858706, "grad_norm": 0.6244849203626905, "learning_rate": 0.00014298729243448162, "loss": 12.2022, "step": 13876 }, { "epoch": 0.7556581105824536, "grad_norm": 0.5769886983567915, "learning_rate": 0.00014297933035552942, "loss": 12.2396, "step": 13877 }, { "epoch": 0.7557125645790366, "grad_norm": 0.5965097791921065, "learning_rate": 0.00014297136794236273, "loss": 12.2386, "step": 13878 }, { "epoch": 0.7557670185756196, "grad_norm": 0.5921318795590744, "learning_rate": 0.00014296340519504347, "loss": 12.2327, "step": 13879 }, { "epoch": 0.7558214725722026, "grad_norm": 0.6021718374457375, "learning_rate": 0.00014295544211363357, "loss": 12.2171, "step": 13880 }, { "epoch": 0.7558759265687857, "grad_norm": 0.5942547115161655, "learning_rate": 0.00014294747869819495, "loss": 12.2583, "step": 13881 }, { "epoch": 0.7559303805653687, "grad_norm": 0.5705233666422782, "learning_rate": 0.00014293951494878955, "loss": 12.1982, "step": 13882 }, { "epoch": 0.7559848345619516, "grad_norm": 0.6233055567094691, "learning_rate": 0.00014293155086547927, "loss": 12.3178, "step": 13883 }, { "epoch": 0.7560392885585346, "grad_norm": 0.5574722512875832, "learning_rate": 0.00014292358644832603, "loss": 12.4212, "step": 13884 }, { "epoch": 0.7560937425551176, "grad_norm": 0.6029716387807774, "learning_rate": 0.0001429156216973918, "loss": 12.4054, "step": 13885 }, { "epoch": 0.7561481965517006, "grad_norm": 0.588774065224866, "learning_rate": 0.00014290765661273847, "loss": 12.2661, "step": 13886 }, { "epoch": 0.7562026505482837, "grad_norm": 0.5495595440606598, "learning_rate": 0.00014289969119442804, "loss": 12.364, "step": 13887 }, { "epoch": 0.7562571045448667, "grad_norm": 0.6343815607337305, "learning_rate": 0.00014289172544252246, "loss": 12.2642, "step": 13888 }, { "epoch": 0.7563115585414497, "grad_norm": 0.5732501696621901, "learning_rate": 0.00014288375935708357, "loss": 12.2799, "step": 13889 }, { "epoch": 0.7563660125380327, "grad_norm": 0.5839071557316391, "learning_rate": 0.0001428757929381734, "loss": 12.1891, "step": 13890 }, { "epoch": 0.7564204665346157, "grad_norm": 0.6679324286207482, "learning_rate": 0.00014286782618585383, "loss": 12.253, "step": 13891 }, { "epoch": 0.7564749205311987, "grad_norm": 0.5527326954974784, "learning_rate": 0.00014285985910018689, "loss": 12.2295, "step": 13892 }, { "epoch": 0.7565293745277818, "grad_norm": 0.5980791290120517, "learning_rate": 0.0001428518916812345, "loss": 12.3769, "step": 13893 }, { "epoch": 0.7565838285243648, "grad_norm": 0.6069665356579306, "learning_rate": 0.00014284392392905861, "loss": 12.2317, "step": 13894 }, { "epoch": 0.7566382825209478, "grad_norm": 0.5593302708857323, "learning_rate": 0.0001428359558437212, "loss": 12.1659, "step": 13895 }, { "epoch": 0.7566927365175308, "grad_norm": 0.6431685721579361, "learning_rate": 0.0001428279874252842, "loss": 12.3359, "step": 13896 }, { "epoch": 0.7567471905141138, "grad_norm": 0.6286285307146453, "learning_rate": 0.00014282001867380953, "loss": 12.261, "step": 13897 }, { "epoch": 0.7568016445106968, "grad_norm": 0.5815019618216938, "learning_rate": 0.00014281204958935929, "loss": 12.2054, "step": 13898 }, { "epoch": 0.7568560985072799, "grad_norm": 0.6448256396057778, "learning_rate": 0.00014280408017199535, "loss": 12.2712, "step": 13899 }, { "epoch": 0.7569105525038629, "grad_norm": 0.5632502677636745, "learning_rate": 0.0001427961104217797, "loss": 12.3028, "step": 13900 }, { "epoch": 0.7569650065004458, "grad_norm": 0.5939995631430055, "learning_rate": 0.00014278814033877432, "loss": 12.1262, "step": 13901 }, { "epoch": 0.7570194604970288, "grad_norm": 0.610425379765044, "learning_rate": 0.0001427801699230412, "loss": 12.2151, "step": 13902 }, { "epoch": 0.7570739144936118, "grad_norm": 0.5515460328017994, "learning_rate": 0.0001427721991746423, "loss": 12.2251, "step": 13903 }, { "epoch": 0.7571283684901948, "grad_norm": 0.6204862537814627, "learning_rate": 0.00014276422809363957, "loss": 12.2634, "step": 13904 }, { "epoch": 0.7571828224867779, "grad_norm": 0.582736281918877, "learning_rate": 0.00014275625668009508, "loss": 12.2089, "step": 13905 }, { "epoch": 0.7572372764833609, "grad_norm": 0.5936936567696945, "learning_rate": 0.00014274828493407072, "loss": 12.2002, "step": 13906 }, { "epoch": 0.7572917304799439, "grad_norm": 0.5530303776248853, "learning_rate": 0.00014274031285562856, "loss": 12.2321, "step": 13907 }, { "epoch": 0.7573461844765269, "grad_norm": 0.5826100861165144, "learning_rate": 0.00014273234044483054, "loss": 12.3413, "step": 13908 }, { "epoch": 0.7574006384731099, "grad_norm": 0.5578995915437032, "learning_rate": 0.00014272436770173868, "loss": 12.2443, "step": 13909 }, { "epoch": 0.7574550924696929, "grad_norm": 0.5545115358555107, "learning_rate": 0.000142716394626415, "loss": 12.1602, "step": 13910 }, { "epoch": 0.757509546466276, "grad_norm": 0.6002801986882448, "learning_rate": 0.00014270842121892144, "loss": 12.2704, "step": 13911 }, { "epoch": 0.757564000462859, "grad_norm": 0.5875392696566134, "learning_rate": 0.00014270044747932006, "loss": 12.3167, "step": 13912 }, { "epoch": 0.757618454459442, "grad_norm": 0.6594591775214861, "learning_rate": 0.00014269247340767283, "loss": 12.3784, "step": 13913 }, { "epoch": 0.757672908456025, "grad_norm": 0.5947682770062622, "learning_rate": 0.00014268449900404175, "loss": 12.3146, "step": 13914 }, { "epoch": 0.757727362452608, "grad_norm": 0.6568834496108618, "learning_rate": 0.00014267652426848887, "loss": 12.1571, "step": 13915 }, { "epoch": 0.7577818164491911, "grad_norm": 0.6560720446581242, "learning_rate": 0.00014266854920107617, "loss": 12.1478, "step": 13916 }, { "epoch": 0.7578362704457741, "grad_norm": 0.5738304909572517, "learning_rate": 0.0001426605738018657, "loss": 12.2459, "step": 13917 }, { "epoch": 0.757890724442357, "grad_norm": 0.560306964319945, "learning_rate": 0.0001426525980709194, "loss": 12.3494, "step": 13918 }, { "epoch": 0.75794517843894, "grad_norm": 0.6763115699225631, "learning_rate": 0.00014264462200829937, "loss": 12.3879, "step": 13919 }, { "epoch": 0.757999632435523, "grad_norm": 0.5703290243771721, "learning_rate": 0.00014263664561406763, "loss": 12.3699, "step": 13920 }, { "epoch": 0.758054086432106, "grad_norm": 0.6625014384246614, "learning_rate": 0.00014262866888828615, "loss": 12.2679, "step": 13921 }, { "epoch": 0.7581085404286891, "grad_norm": 0.5769931413845019, "learning_rate": 0.000142620691831017, "loss": 12.2372, "step": 13922 }, { "epoch": 0.7581629944252721, "grad_norm": 0.5949061529616914, "learning_rate": 0.00014261271444232222, "loss": 12.2714, "step": 13923 }, { "epoch": 0.7582174484218551, "grad_norm": 0.5496679640085099, "learning_rate": 0.00014260473672226382, "loss": 12.2477, "step": 13924 }, { "epoch": 0.7582719024184381, "grad_norm": 0.6493771055430315, "learning_rate": 0.00014259675867090384, "loss": 12.3036, "step": 13925 }, { "epoch": 0.7583263564150211, "grad_norm": 0.5665578504631344, "learning_rate": 0.00014258878028830432, "loss": 12.3569, "step": 13926 }, { "epoch": 0.7583808104116041, "grad_norm": 0.553654878253341, "learning_rate": 0.0001425808015745273, "loss": 12.2306, "step": 13927 }, { "epoch": 0.7584352644081872, "grad_norm": 0.6336767633083233, "learning_rate": 0.00014257282252963483, "loss": 12.2879, "step": 13928 }, { "epoch": 0.7584897184047702, "grad_norm": 0.6957955270092684, "learning_rate": 0.00014256484315368896, "loss": 12.3103, "step": 13929 }, { "epoch": 0.7585441724013532, "grad_norm": 0.6296785833482869, "learning_rate": 0.00014255686344675171, "loss": 12.2726, "step": 13930 }, { "epoch": 0.7585986263979362, "grad_norm": 0.6146755812721879, "learning_rate": 0.00014254888340888518, "loss": 12.3361, "step": 13931 }, { "epoch": 0.7586530803945192, "grad_norm": 0.5695074475030054, "learning_rate": 0.00014254090304015136, "loss": 12.3646, "step": 13932 }, { "epoch": 0.7587075343911022, "grad_norm": 0.5958776096596177, "learning_rate": 0.00014253292234061237, "loss": 12.2512, "step": 13933 }, { "epoch": 0.7587619883876853, "grad_norm": 0.579919117409958, "learning_rate": 0.00014252494131033027, "loss": 12.2253, "step": 13934 }, { "epoch": 0.7588164423842683, "grad_norm": 0.5915421701951903, "learning_rate": 0.00014251695994936704, "loss": 12.2334, "step": 13935 }, { "epoch": 0.7588708963808513, "grad_norm": 0.6499479162687332, "learning_rate": 0.00014250897825778482, "loss": 12.4318, "step": 13936 }, { "epoch": 0.7589253503774342, "grad_norm": 0.5295834541077248, "learning_rate": 0.00014250099623564565, "loss": 12.1041, "step": 13937 }, { "epoch": 0.7589798043740172, "grad_norm": 0.5251911988552174, "learning_rate": 0.00014249301388301165, "loss": 12.2811, "step": 13938 }, { "epoch": 0.7590342583706002, "grad_norm": 0.6043959090041436, "learning_rate": 0.00014248503119994484, "loss": 12.246, "step": 13939 }, { "epoch": 0.7590887123671833, "grad_norm": 0.577799259201176, "learning_rate": 0.00014247704818650723, "loss": 12.1886, "step": 13940 }, { "epoch": 0.7591431663637663, "grad_norm": 0.6230601576377133, "learning_rate": 0.00014246906484276104, "loss": 12.2068, "step": 13941 }, { "epoch": 0.7591976203603493, "grad_norm": 0.6102940610921074, "learning_rate": 0.00014246108116876824, "loss": 12.1497, "step": 13942 }, { "epoch": 0.7592520743569323, "grad_norm": 0.638796409373731, "learning_rate": 0.00014245309716459098, "loss": 12.1821, "step": 13943 }, { "epoch": 0.7593065283535153, "grad_norm": 0.60346867893492, "learning_rate": 0.00014244511283029133, "loss": 12.2799, "step": 13944 }, { "epoch": 0.7593609823500984, "grad_norm": 0.5569590445930767, "learning_rate": 0.00014243712816593132, "loss": 12.1409, "step": 13945 }, { "epoch": 0.7594154363466814, "grad_norm": 0.6095387293968589, "learning_rate": 0.0001424291431715731, "loss": 12.3211, "step": 13946 }, { "epoch": 0.7594698903432644, "grad_norm": 0.6054626111032342, "learning_rate": 0.00014242115784727873, "loss": 12.281, "step": 13947 }, { "epoch": 0.7595243443398474, "grad_norm": 0.5724240846205761, "learning_rate": 0.00014241317219311036, "loss": 12.375, "step": 13948 }, { "epoch": 0.7595787983364304, "grad_norm": 0.561696636977615, "learning_rate": 0.00014240518620913005, "loss": 12.2647, "step": 13949 }, { "epoch": 0.7596332523330134, "grad_norm": 0.5821499988373755, "learning_rate": 0.0001423971998953999, "loss": 12.1901, "step": 13950 }, { "epoch": 0.7596877063295965, "grad_norm": 0.5776864442082547, "learning_rate": 0.000142389213251982, "loss": 12.2944, "step": 13951 }, { "epoch": 0.7597421603261795, "grad_norm": 0.5354845840539186, "learning_rate": 0.00014238122627893845, "loss": 12.1452, "step": 13952 }, { "epoch": 0.7597966143227625, "grad_norm": 0.5960205210876889, "learning_rate": 0.0001423732389763314, "loss": 12.0842, "step": 13953 }, { "epoch": 0.7598510683193455, "grad_norm": 0.6156684709857669, "learning_rate": 0.00014236525134422295, "loss": 12.2972, "step": 13954 }, { "epoch": 0.7599055223159285, "grad_norm": 0.5610810124186224, "learning_rate": 0.00014235726338267517, "loss": 12.2933, "step": 13955 }, { "epoch": 0.7599599763125114, "grad_norm": 0.6195395184747021, "learning_rate": 0.00014234927509175022, "loss": 12.2857, "step": 13956 }, { "epoch": 0.7600144303090945, "grad_norm": 0.5602682088640545, "learning_rate": 0.00014234128647151018, "loss": 12.093, "step": 13957 }, { "epoch": 0.7600688843056775, "grad_norm": 0.722577336954991, "learning_rate": 0.00014233329752201722, "loss": 12.2452, "step": 13958 }, { "epoch": 0.7601233383022605, "grad_norm": 0.5471100875030792, "learning_rate": 0.00014232530824333348, "loss": 12.2115, "step": 13959 }, { "epoch": 0.7601777922988435, "grad_norm": 0.6606957761725386, "learning_rate": 0.000142317318635521, "loss": 12.2199, "step": 13960 }, { "epoch": 0.7602322462954265, "grad_norm": 0.5268687412814926, "learning_rate": 0.00014230932869864195, "loss": 12.092, "step": 13961 }, { "epoch": 0.7602867002920095, "grad_norm": 0.607743751189324, "learning_rate": 0.00014230133843275847, "loss": 12.2734, "step": 13962 }, { "epoch": 0.7603411542885926, "grad_norm": 0.5600065514983386, "learning_rate": 0.00014229334783793268, "loss": 12.0704, "step": 13963 }, { "epoch": 0.7603956082851756, "grad_norm": 0.5695028254072064, "learning_rate": 0.00014228535691422674, "loss": 12.2109, "step": 13964 }, { "epoch": 0.7604500622817586, "grad_norm": 0.5815235519116067, "learning_rate": 0.0001422773656617028, "loss": 12.1929, "step": 13965 }, { "epoch": 0.7605045162783416, "grad_norm": 0.5251375307774158, "learning_rate": 0.00014226937408042293, "loss": 12.2333, "step": 13966 }, { "epoch": 0.7605589702749246, "grad_norm": 0.5857315453039587, "learning_rate": 0.00014226138217044937, "loss": 12.2082, "step": 13967 }, { "epoch": 0.7606134242715076, "grad_norm": 0.577963728586446, "learning_rate": 0.00014225338993184417, "loss": 12.2695, "step": 13968 }, { "epoch": 0.7606678782680907, "grad_norm": 0.5513473816586468, "learning_rate": 0.0001422453973646695, "loss": 12.3888, "step": 13969 }, { "epoch": 0.7607223322646737, "grad_norm": 0.6016650383635523, "learning_rate": 0.0001422374044689876, "loss": 12.1849, "step": 13970 }, { "epoch": 0.7607767862612567, "grad_norm": 0.6370471484189492, "learning_rate": 0.00014222941124486052, "loss": 12.3189, "step": 13971 }, { "epoch": 0.7608312402578397, "grad_norm": 0.5499723366235015, "learning_rate": 0.00014222141769235048, "loss": 12.2014, "step": 13972 }, { "epoch": 0.7608856942544227, "grad_norm": 0.6698610911475733, "learning_rate": 0.0001422134238115196, "loss": 12.2091, "step": 13973 }, { "epoch": 0.7609401482510056, "grad_norm": 0.599557631459671, "learning_rate": 0.00014220542960243003, "loss": 12.2612, "step": 13974 }, { "epoch": 0.7609946022475887, "grad_norm": 0.6551527802649024, "learning_rate": 0.000142197435065144, "loss": 12.2865, "step": 13975 }, { "epoch": 0.7610490562441717, "grad_norm": 0.6157311467055842, "learning_rate": 0.00014218944019972363, "loss": 12.3061, "step": 13976 }, { "epoch": 0.7611035102407547, "grad_norm": 0.5781439219031981, "learning_rate": 0.0001421814450062311, "loss": 12.1868, "step": 13977 }, { "epoch": 0.7611579642373377, "grad_norm": 0.5501211790005208, "learning_rate": 0.00014217344948472857, "loss": 12.2197, "step": 13978 }, { "epoch": 0.7612124182339207, "grad_norm": 0.5240842381295177, "learning_rate": 0.00014216545363527822, "loss": 12.1797, "step": 13979 }, { "epoch": 0.7612668722305038, "grad_norm": 0.6189210529605181, "learning_rate": 0.00014215745745794226, "loss": 12.2871, "step": 13980 }, { "epoch": 0.7613213262270868, "grad_norm": 0.5951223275828108, "learning_rate": 0.0001421494609527828, "loss": 12.3688, "step": 13981 }, { "epoch": 0.7613757802236698, "grad_norm": 0.5921341999404179, "learning_rate": 0.0001421414641198621, "loss": 12.3034, "step": 13982 }, { "epoch": 0.7614302342202528, "grad_norm": 0.655618968539555, "learning_rate": 0.00014213346695924229, "loss": 12.3406, "step": 13983 }, { "epoch": 0.7614846882168358, "grad_norm": 0.5432306320692075, "learning_rate": 0.00014212546947098558, "loss": 12.24, "step": 13984 }, { "epoch": 0.7615391422134188, "grad_norm": 0.6036883548019074, "learning_rate": 0.00014211747165515415, "loss": 12.0426, "step": 13985 }, { "epoch": 0.7615935962100019, "grad_norm": 0.5124128687561235, "learning_rate": 0.00014210947351181018, "loss": 12.1231, "step": 13986 }, { "epoch": 0.7616480502065849, "grad_norm": 0.5640171129791517, "learning_rate": 0.0001421014750410159, "loss": 12.2459, "step": 13987 }, { "epoch": 0.7617025042031679, "grad_norm": 0.5533932551479256, "learning_rate": 0.0001420934762428335, "loss": 12.2373, "step": 13988 }, { "epoch": 0.7617569581997509, "grad_norm": 0.7231313373660987, "learning_rate": 0.00014208547711732516, "loss": 12.2069, "step": 13989 }, { "epoch": 0.7618114121963339, "grad_norm": 0.5883848264970278, "learning_rate": 0.0001420774776645531, "loss": 12.2925, "step": 13990 }, { "epoch": 0.7618658661929169, "grad_norm": 0.6112306520637002, "learning_rate": 0.00014206947788457952, "loss": 12.2716, "step": 13991 }, { "epoch": 0.7619203201895, "grad_norm": 0.5651211551615413, "learning_rate": 0.00014206147777746662, "loss": 12.315, "step": 13992 }, { "epoch": 0.761974774186083, "grad_norm": 0.5509869436075503, "learning_rate": 0.0001420534773432766, "loss": 12.346, "step": 13993 }, { "epoch": 0.7620292281826659, "grad_norm": 0.5918797097019901, "learning_rate": 0.00014204547658207173, "loss": 12.276, "step": 13994 }, { "epoch": 0.7620836821792489, "grad_norm": 0.6708119074943824, "learning_rate": 0.00014203747549391412, "loss": 12.3249, "step": 13995 }, { "epoch": 0.7621381361758319, "grad_norm": 0.6152818078894708, "learning_rate": 0.0001420294740788661, "loss": 12.153, "step": 13996 }, { "epoch": 0.7621925901724149, "grad_norm": 0.6318583507656009, "learning_rate": 0.00014202147233698982, "loss": 12.3252, "step": 13997 }, { "epoch": 0.762247044168998, "grad_norm": 0.5700684553606541, "learning_rate": 0.0001420134702683475, "loss": 12.2328, "step": 13998 }, { "epoch": 0.762301498165581, "grad_norm": 0.5645040981476561, "learning_rate": 0.00014200546787300144, "loss": 12.1694, "step": 13999 }, { "epoch": 0.762355952162164, "grad_norm": 0.688139703247829, "learning_rate": 0.00014199746515101383, "loss": 12.3021, "step": 14000 }, { "epoch": 0.762410406158747, "grad_norm": 0.9161755471060709, "learning_rate": 0.0001419894621024468, "loss": 12.309, "step": 14001 }, { "epoch": 0.76246486015533, "grad_norm": 0.6852488173033148, "learning_rate": 0.00014198145872736272, "loss": 12.3005, "step": 14002 }, { "epoch": 0.762519314151913, "grad_norm": 0.6742687447311347, "learning_rate": 0.0001419734550258238, "loss": 12.3142, "step": 14003 }, { "epoch": 0.7625737681484961, "grad_norm": 0.6925915386799684, "learning_rate": 0.00014196545099789224, "loss": 12.2864, "step": 14004 }, { "epoch": 0.7626282221450791, "grad_norm": 0.595301777501327, "learning_rate": 0.00014195744664363032, "loss": 12.0688, "step": 14005 }, { "epoch": 0.7626826761416621, "grad_norm": 0.5281359073325815, "learning_rate": 0.00014194944196310017, "loss": 12.2558, "step": 14006 }, { "epoch": 0.7627371301382451, "grad_norm": 0.5523714966665472, "learning_rate": 0.00014194143695636421, "loss": 12.2697, "step": 14007 }, { "epoch": 0.7627915841348281, "grad_norm": 0.6080070137033171, "learning_rate": 0.00014193343162348455, "loss": 12.2527, "step": 14008 }, { "epoch": 0.762846038131411, "grad_norm": 0.6627933267237143, "learning_rate": 0.00014192542596452355, "loss": 12.0337, "step": 14009 }, { "epoch": 0.7629004921279942, "grad_norm": 0.6805001156429517, "learning_rate": 0.0001419174199795434, "loss": 12.2574, "step": 14010 }, { "epoch": 0.7629549461245771, "grad_norm": 0.6161146392108356, "learning_rate": 0.00014190941366860633, "loss": 12.1328, "step": 14011 }, { "epoch": 0.7630094001211601, "grad_norm": 0.5934805056499477, "learning_rate": 0.0001419014070317746, "loss": 12.2775, "step": 14012 }, { "epoch": 0.7630638541177431, "grad_norm": 0.6779302289590717, "learning_rate": 0.00014189340006911055, "loss": 12.3298, "step": 14013 }, { "epoch": 0.7631183081143261, "grad_norm": 0.6070414779581786, "learning_rate": 0.00014188539278067643, "loss": 12.219, "step": 14014 }, { "epoch": 0.7631727621109092, "grad_norm": 0.6993064018098617, "learning_rate": 0.0001418773851665344, "loss": 12.3345, "step": 14015 }, { "epoch": 0.7632272161074922, "grad_norm": 0.6305373858898516, "learning_rate": 0.00014186937722674683, "loss": 12.1297, "step": 14016 }, { "epoch": 0.7632816701040752, "grad_norm": 0.6013170403095079, "learning_rate": 0.00014186136896137598, "loss": 12.265, "step": 14017 }, { "epoch": 0.7633361241006582, "grad_norm": 0.5879012594309635, "learning_rate": 0.00014185336037048408, "loss": 12.2657, "step": 14018 }, { "epoch": 0.7633905780972412, "grad_norm": 0.5452413532638681, "learning_rate": 0.00014184535145413344, "loss": 12.1823, "step": 14019 }, { "epoch": 0.7634450320938242, "grad_norm": 0.6873152094810032, "learning_rate": 0.00014183734221238635, "loss": 12.4098, "step": 14020 }, { "epoch": 0.7634994860904073, "grad_norm": 0.6416778737618622, "learning_rate": 0.00014182933264530502, "loss": 12.283, "step": 14021 }, { "epoch": 0.7635539400869903, "grad_norm": 0.6437179415426925, "learning_rate": 0.00014182132275295182, "loss": 12.3234, "step": 14022 }, { "epoch": 0.7636083940835733, "grad_norm": 0.556334606335069, "learning_rate": 0.00014181331253538897, "loss": 12.2012, "step": 14023 }, { "epoch": 0.7636628480801563, "grad_norm": 0.5543179396361387, "learning_rate": 0.00014180530199267883, "loss": 12.2312, "step": 14024 }, { "epoch": 0.7637173020767393, "grad_norm": 0.5500295422833468, "learning_rate": 0.00014179729112488365, "loss": 12.1844, "step": 14025 }, { "epoch": 0.7637717560733223, "grad_norm": 0.5425630994642155, "learning_rate": 0.0001417892799320657, "loss": 12.2491, "step": 14026 }, { "epoch": 0.7638262100699054, "grad_norm": 0.6669730471823939, "learning_rate": 0.00014178126841428733, "loss": 12.3245, "step": 14027 }, { "epoch": 0.7638806640664884, "grad_norm": 0.6372441479102375, "learning_rate": 0.00014177325657161079, "loss": 12.121, "step": 14028 }, { "epoch": 0.7639351180630714, "grad_norm": 0.5936749439861729, "learning_rate": 0.00014176524440409838, "loss": 12.1716, "step": 14029 }, { "epoch": 0.7639895720596543, "grad_norm": 0.5548252061186839, "learning_rate": 0.00014175723191181246, "loss": 12.2135, "step": 14030 }, { "epoch": 0.7640440260562373, "grad_norm": 0.6081908661433257, "learning_rate": 0.00014174921909481528, "loss": 12.1114, "step": 14031 }, { "epoch": 0.7640984800528203, "grad_norm": 0.5962901142755918, "learning_rate": 0.00014174120595316918, "loss": 12.3284, "step": 14032 }, { "epoch": 0.7641529340494034, "grad_norm": 0.6069543878257613, "learning_rate": 0.00014173319248693647, "loss": 12.2171, "step": 14033 }, { "epoch": 0.7642073880459864, "grad_norm": 0.5340676390046819, "learning_rate": 0.00014172517869617942, "loss": 12.1292, "step": 14034 }, { "epoch": 0.7642618420425694, "grad_norm": 0.5802220742321209, "learning_rate": 0.00014171716458096043, "loss": 12.0764, "step": 14035 }, { "epoch": 0.7643162960391524, "grad_norm": 0.5425549092592509, "learning_rate": 0.00014170915014134175, "loss": 12.1105, "step": 14036 }, { "epoch": 0.7643707500357354, "grad_norm": 0.6521458879338885, "learning_rate": 0.00014170113537738572, "loss": 12.3549, "step": 14037 }, { "epoch": 0.7644252040323184, "grad_norm": 0.5721445904275433, "learning_rate": 0.00014169312028915467, "loss": 12.2843, "step": 14038 }, { "epoch": 0.7644796580289015, "grad_norm": 0.6204254611658935, "learning_rate": 0.00014168510487671095, "loss": 12.1998, "step": 14039 }, { "epoch": 0.7645341120254845, "grad_norm": 0.5713946970086126, "learning_rate": 0.00014167708914011683, "loss": 12.3153, "step": 14040 }, { "epoch": 0.7645885660220675, "grad_norm": 0.5532519282768511, "learning_rate": 0.00014166907307943468, "loss": 12.1961, "step": 14041 }, { "epoch": 0.7646430200186505, "grad_norm": 0.5569680627789245, "learning_rate": 0.00014166105669472686, "loss": 12.2786, "step": 14042 }, { "epoch": 0.7646974740152335, "grad_norm": 0.5536494835696807, "learning_rate": 0.00014165303998605567, "loss": 12.2799, "step": 14043 }, { "epoch": 0.7647519280118165, "grad_norm": 0.681863473872942, "learning_rate": 0.00014164502295348344, "loss": 12.2401, "step": 14044 }, { "epoch": 0.7648063820083996, "grad_norm": 0.5696680388291402, "learning_rate": 0.00014163700559707251, "loss": 12.0994, "step": 14045 }, { "epoch": 0.7648608360049826, "grad_norm": 0.5586529453896714, "learning_rate": 0.00014162898791688527, "loss": 12.2236, "step": 14046 }, { "epoch": 0.7649152900015656, "grad_norm": 0.6305781565597973, "learning_rate": 0.000141620969912984, "loss": 12.3263, "step": 14047 }, { "epoch": 0.7649697439981485, "grad_norm": 0.5753330948747856, "learning_rate": 0.00014161295158543117, "loss": 12.3274, "step": 14048 }, { "epoch": 0.7650241979947315, "grad_norm": 0.5359807136362528, "learning_rate": 0.000141604932934289, "loss": 12.2058, "step": 14049 }, { "epoch": 0.7650786519913146, "grad_norm": 0.5956265092053136, "learning_rate": 0.00014159691395961986, "loss": 12.2686, "step": 14050 }, { "epoch": 0.7651331059878976, "grad_norm": 0.5829785198998221, "learning_rate": 0.0001415888946614862, "loss": 12.2445, "step": 14051 }, { "epoch": 0.7651875599844806, "grad_norm": 0.5570639705465623, "learning_rate": 0.00014158087503995029, "loss": 12.2025, "step": 14052 }, { "epoch": 0.7652420139810636, "grad_norm": 0.6362187285845966, "learning_rate": 0.00014157285509507452, "loss": 12.3384, "step": 14053 }, { "epoch": 0.7652964679776466, "grad_norm": 0.5309991837853475, "learning_rate": 0.00014156483482692127, "loss": 12.2158, "step": 14054 }, { "epoch": 0.7653509219742296, "grad_norm": 0.5545637757522944, "learning_rate": 0.00014155681423555288, "loss": 12.1136, "step": 14055 }, { "epoch": 0.7654053759708127, "grad_norm": 0.6145129134489931, "learning_rate": 0.00014154879332103174, "loss": 12.1744, "step": 14056 }, { "epoch": 0.7654598299673957, "grad_norm": 0.5123633665586574, "learning_rate": 0.00014154077208342023, "loss": 12.1632, "step": 14057 }, { "epoch": 0.7655142839639787, "grad_norm": 0.5908039187960368, "learning_rate": 0.00014153275052278068, "loss": 12.2842, "step": 14058 }, { "epoch": 0.7655687379605617, "grad_norm": 0.5412909082059538, "learning_rate": 0.00014152472863917555, "loss": 12.0648, "step": 14059 }, { "epoch": 0.7656231919571447, "grad_norm": 0.6130141862215384, "learning_rate": 0.00014151670643266712, "loss": 12.1749, "step": 14060 }, { "epoch": 0.7656776459537277, "grad_norm": 0.6770562027867211, "learning_rate": 0.0001415086839033178, "loss": 12.4342, "step": 14061 }, { "epoch": 0.7657320999503108, "grad_norm": 0.6104892574412778, "learning_rate": 0.00014150066105119002, "loss": 12.3244, "step": 14062 }, { "epoch": 0.7657865539468938, "grad_norm": 0.556694748689259, "learning_rate": 0.00014149263787634615, "loss": 12.1292, "step": 14063 }, { "epoch": 0.7658410079434768, "grad_norm": 0.532048703486904, "learning_rate": 0.00014148461437884857, "loss": 12.1891, "step": 14064 }, { "epoch": 0.7658954619400598, "grad_norm": 0.5730215255960645, "learning_rate": 0.0001414765905587597, "loss": 12.272, "step": 14065 }, { "epoch": 0.7659499159366427, "grad_norm": 0.7000139721440017, "learning_rate": 0.00014146856641614184, "loss": 12.1002, "step": 14066 }, { "epoch": 0.7660043699332257, "grad_norm": 0.6843534527260816, "learning_rate": 0.00014146054195105748, "loss": 12.3384, "step": 14067 }, { "epoch": 0.7660588239298088, "grad_norm": 0.5634617117221637, "learning_rate": 0.00014145251716356897, "loss": 12.3222, "step": 14068 }, { "epoch": 0.7661132779263918, "grad_norm": 0.5687279399361462, "learning_rate": 0.00014144449205373877, "loss": 12.2644, "step": 14069 }, { "epoch": 0.7661677319229748, "grad_norm": 0.6051485761650688, "learning_rate": 0.00014143646662162927, "loss": 12.2573, "step": 14070 }, { "epoch": 0.7662221859195578, "grad_norm": 0.5839936942791063, "learning_rate": 0.00014142844086730282, "loss": 12.2485, "step": 14071 }, { "epoch": 0.7662766399161408, "grad_norm": 0.5517736427541252, "learning_rate": 0.00014142041479082185, "loss": 12.116, "step": 14072 }, { "epoch": 0.7663310939127238, "grad_norm": 0.5854388894353925, "learning_rate": 0.00014141238839224883, "loss": 12.3393, "step": 14073 }, { "epoch": 0.7663855479093069, "grad_norm": 0.6314116918440781, "learning_rate": 0.0001414043616716461, "loss": 12.3267, "step": 14074 }, { "epoch": 0.7664400019058899, "grad_norm": 0.5714963377609871, "learning_rate": 0.00014139633462907614, "loss": 12.2609, "step": 14075 }, { "epoch": 0.7664944559024729, "grad_norm": 0.6137410946530638, "learning_rate": 0.00014138830726460132, "loss": 12.2717, "step": 14076 }, { "epoch": 0.7665489098990559, "grad_norm": 0.5546721402047556, "learning_rate": 0.0001413802795782841, "loss": 12.3057, "step": 14077 }, { "epoch": 0.7666033638956389, "grad_norm": 0.6403782377884639, "learning_rate": 0.00014137225157018684, "loss": 12.2847, "step": 14078 }, { "epoch": 0.766657817892222, "grad_norm": 0.611941356520709, "learning_rate": 0.00014136422324037207, "loss": 12.3098, "step": 14079 }, { "epoch": 0.766712271888805, "grad_norm": 0.6050520607106942, "learning_rate": 0.00014135619458890215, "loss": 12.3527, "step": 14080 }, { "epoch": 0.766766725885388, "grad_norm": 0.5664621409668334, "learning_rate": 0.0001413481656158395, "loss": 12.2701, "step": 14081 }, { "epoch": 0.766821179881971, "grad_norm": 0.5631607069562086, "learning_rate": 0.00014134013632124658, "loss": 12.1803, "step": 14082 }, { "epoch": 0.766875633878554, "grad_norm": 0.5437281052141982, "learning_rate": 0.00014133210670518585, "loss": 12.1745, "step": 14083 }, { "epoch": 0.766930087875137, "grad_norm": 0.5991515782094939, "learning_rate": 0.0001413240767677197, "loss": 12.2471, "step": 14084 }, { "epoch": 0.76698454187172, "grad_norm": 0.5363419925328808, "learning_rate": 0.00014131604650891063, "loss": 12.1673, "step": 14085 }, { "epoch": 0.767038995868303, "grad_norm": 0.6946396140367467, "learning_rate": 0.00014130801592882107, "loss": 12.2711, "step": 14086 }, { "epoch": 0.767093449864886, "grad_norm": 0.6315343447460228, "learning_rate": 0.00014129998502751342, "loss": 12.2857, "step": 14087 }, { "epoch": 0.767147903861469, "grad_norm": 0.5829576252690439, "learning_rate": 0.00014129195380505017, "loss": 12.225, "step": 14088 }, { "epoch": 0.767202357858052, "grad_norm": 0.6156299807501912, "learning_rate": 0.0001412839222614937, "loss": 12.2652, "step": 14089 }, { "epoch": 0.767256811854635, "grad_norm": 0.6254524138851091, "learning_rate": 0.00014127589039690663, "loss": 12.2521, "step": 14090 }, { "epoch": 0.7673112658512181, "grad_norm": 0.6034966930210365, "learning_rate": 0.00014126785821135126, "loss": 12.2941, "step": 14091 }, { "epoch": 0.7673657198478011, "grad_norm": 0.6437585849859505, "learning_rate": 0.00014125982570489012, "loss": 12.202, "step": 14092 }, { "epoch": 0.7674201738443841, "grad_norm": 0.5373181078765519, "learning_rate": 0.00014125179287758564, "loss": 12.1046, "step": 14093 }, { "epoch": 0.7674746278409671, "grad_norm": 0.6572518223600609, "learning_rate": 0.0001412437597295003, "loss": 12.4171, "step": 14094 }, { "epoch": 0.7675290818375501, "grad_norm": 0.6232379859928363, "learning_rate": 0.00014123572626069657, "loss": 12.2784, "step": 14095 }, { "epoch": 0.7675835358341331, "grad_norm": 0.6577703484439251, "learning_rate": 0.00014122769247123694, "loss": 12.199, "step": 14096 }, { "epoch": 0.7676379898307162, "grad_norm": 0.5299622761718101, "learning_rate": 0.00014121965836118384, "loss": 12.2502, "step": 14097 }, { "epoch": 0.7676924438272992, "grad_norm": 0.6044188745898461, "learning_rate": 0.00014121162393059976, "loss": 12.2248, "step": 14098 }, { "epoch": 0.7677468978238822, "grad_norm": 0.6542289909898396, "learning_rate": 0.0001412035891795472, "loss": 12.1898, "step": 14099 }, { "epoch": 0.7678013518204652, "grad_norm": 0.5732423193009067, "learning_rate": 0.0001411955541080886, "loss": 12.3215, "step": 14100 }, { "epoch": 0.7678558058170482, "grad_norm": 0.6485999873433785, "learning_rate": 0.00014118751871628645, "loss": 12.341, "step": 14101 }, { "epoch": 0.7679102598136311, "grad_norm": 0.6092602520664506, "learning_rate": 0.0001411794830042033, "loss": 12.2059, "step": 14102 }, { "epoch": 0.7679647138102143, "grad_norm": 0.5593846752229331, "learning_rate": 0.00014117144697190154, "loss": 12.2195, "step": 14103 }, { "epoch": 0.7680191678067972, "grad_norm": 0.5751117900315875, "learning_rate": 0.00014116341061944372, "loss": 12.1362, "step": 14104 }, { "epoch": 0.7680736218033802, "grad_norm": 0.6063510707625296, "learning_rate": 0.00014115537394689232, "loss": 12.2299, "step": 14105 }, { "epoch": 0.7681280757999632, "grad_norm": 0.597977108682404, "learning_rate": 0.0001411473369543098, "loss": 12.0905, "step": 14106 }, { "epoch": 0.7681825297965462, "grad_norm": 0.5748477244036646, "learning_rate": 0.0001411392996417587, "loss": 12.2832, "step": 14107 }, { "epoch": 0.7682369837931292, "grad_norm": 0.6037677483892898, "learning_rate": 0.00014113126200930153, "loss": 12.2684, "step": 14108 }, { "epoch": 0.7682914377897123, "grad_norm": 0.6156244411231219, "learning_rate": 0.00014112322405700076, "loss": 12.2037, "step": 14109 }, { "epoch": 0.7683458917862953, "grad_norm": 0.6140588551870635, "learning_rate": 0.0001411151857849189, "loss": 12.3049, "step": 14110 }, { "epoch": 0.7684003457828783, "grad_norm": 0.6866071509031823, "learning_rate": 0.00014110714719311847, "loss": 12.1586, "step": 14111 }, { "epoch": 0.7684547997794613, "grad_norm": 0.6114492089687162, "learning_rate": 0.00014109910828166196, "loss": 12.2286, "step": 14112 }, { "epoch": 0.7685092537760443, "grad_norm": 0.5802988526973467, "learning_rate": 0.0001410910690506119, "loss": 12.2486, "step": 14113 }, { "epoch": 0.7685637077726274, "grad_norm": 0.6874522798193882, "learning_rate": 0.00014108302950003077, "loss": 12.1485, "step": 14114 }, { "epoch": 0.7686181617692104, "grad_norm": 0.6517853430434413, "learning_rate": 0.00014107498962998114, "loss": 12.317, "step": 14115 }, { "epoch": 0.7686726157657934, "grad_norm": 0.6469399529099216, "learning_rate": 0.0001410669494405255, "loss": 12.1699, "step": 14116 }, { "epoch": 0.7687270697623764, "grad_norm": 0.6214515150946013, "learning_rate": 0.00014105890893172636, "loss": 12.3012, "step": 14117 }, { "epoch": 0.7687815237589594, "grad_norm": 0.6367902841549076, "learning_rate": 0.00014105086810364625, "loss": 12.3051, "step": 14118 }, { "epoch": 0.7688359777555424, "grad_norm": 0.5850425284936396, "learning_rate": 0.00014104282695634771, "loss": 12.2222, "step": 14119 }, { "epoch": 0.7688904317521255, "grad_norm": 0.6108114424575637, "learning_rate": 0.0001410347854898933, "loss": 12.2159, "step": 14120 }, { "epoch": 0.7689448857487085, "grad_norm": 0.6282073225540852, "learning_rate": 0.00014102674370434546, "loss": 12.2697, "step": 14121 }, { "epoch": 0.7689993397452914, "grad_norm": 0.6241786619890566, "learning_rate": 0.00014101870159976683, "loss": 12.1934, "step": 14122 }, { "epoch": 0.7690537937418744, "grad_norm": 0.5502129213330372, "learning_rate": 0.00014101065917621988, "loss": 12.1528, "step": 14123 }, { "epoch": 0.7691082477384574, "grad_norm": 0.6558359504268575, "learning_rate": 0.00014100261643376717, "loss": 12.1864, "step": 14124 }, { "epoch": 0.7691627017350404, "grad_norm": 0.610417619158311, "learning_rate": 0.00014099457337247127, "loss": 12.201, "step": 14125 }, { "epoch": 0.7692171557316235, "grad_norm": 0.5636968506474196, "learning_rate": 0.00014098652999239462, "loss": 12.1071, "step": 14126 }, { "epoch": 0.7692716097282065, "grad_norm": 0.6876986715271385, "learning_rate": 0.0001409784862935999, "loss": 12.2103, "step": 14127 }, { "epoch": 0.7693260637247895, "grad_norm": 0.5493141280887939, "learning_rate": 0.00014097044227614954, "loss": 12.2582, "step": 14128 }, { "epoch": 0.7693805177213725, "grad_norm": 0.5951341346995529, "learning_rate": 0.0001409623979401062, "loss": 12.2833, "step": 14129 }, { "epoch": 0.7694349717179555, "grad_norm": 0.6742088084590737, "learning_rate": 0.00014095435328553239, "loss": 12.3141, "step": 14130 }, { "epoch": 0.7694894257145385, "grad_norm": 0.5384175217702536, "learning_rate": 0.00014094630831249064, "loss": 12.2142, "step": 14131 }, { "epoch": 0.7695438797111216, "grad_norm": 0.6060570398527729, "learning_rate": 0.00014093826302104351, "loss": 12.2129, "step": 14132 }, { "epoch": 0.7695983337077046, "grad_norm": 0.7023543612414251, "learning_rate": 0.0001409302174112536, "loss": 12.4134, "step": 14133 }, { "epoch": 0.7696527877042876, "grad_norm": 0.5842896036172063, "learning_rate": 0.00014092217148318348, "loss": 12.3908, "step": 14134 }, { "epoch": 0.7697072417008706, "grad_norm": 0.5804160737393887, "learning_rate": 0.00014091412523689566, "loss": 12.2307, "step": 14135 }, { "epoch": 0.7697616956974536, "grad_norm": 0.5472274719740815, "learning_rate": 0.00014090607867245274, "loss": 12.1742, "step": 14136 }, { "epoch": 0.7698161496940366, "grad_norm": 0.5146133222305155, "learning_rate": 0.00014089803178991733, "loss": 12.2048, "step": 14137 }, { "epoch": 0.7698706036906197, "grad_norm": 0.5899090148481555, "learning_rate": 0.00014088998458935192, "loss": 12.2604, "step": 14138 }, { "epoch": 0.7699250576872027, "grad_norm": 0.5819490009014724, "learning_rate": 0.00014088193707081914, "loss": 12.2092, "step": 14139 }, { "epoch": 0.7699795116837856, "grad_norm": 0.585003880961486, "learning_rate": 0.0001408738892343816, "loss": 12.1618, "step": 14140 }, { "epoch": 0.7700339656803686, "grad_norm": 0.6022137293508225, "learning_rate": 0.0001408658410801018, "loss": 12.2765, "step": 14141 }, { "epoch": 0.7700884196769516, "grad_norm": 0.5975999931093471, "learning_rate": 0.0001408577926080424, "loss": 12.208, "step": 14142 }, { "epoch": 0.7701428736735346, "grad_norm": 0.5542571069872959, "learning_rate": 0.00014084974381826592, "loss": 12.2463, "step": 14143 }, { "epoch": 0.7701973276701177, "grad_norm": 0.5281431689300452, "learning_rate": 0.000140841694710835, "loss": 12.2376, "step": 14144 }, { "epoch": 0.7702517816667007, "grad_norm": 0.5943061625491666, "learning_rate": 0.0001408336452858122, "loss": 12.2756, "step": 14145 }, { "epoch": 0.7703062356632837, "grad_norm": 0.5421811990530448, "learning_rate": 0.00014082559554326015, "loss": 12.0254, "step": 14146 }, { "epoch": 0.7703606896598667, "grad_norm": 0.6361321674266639, "learning_rate": 0.0001408175454832414, "loss": 12.2234, "step": 14147 }, { "epoch": 0.7704151436564497, "grad_norm": 0.5642016530538166, "learning_rate": 0.00014080949510581858, "loss": 12.2879, "step": 14148 }, { "epoch": 0.7704695976530328, "grad_norm": 0.5787333057483416, "learning_rate": 0.00014080144441105429, "loss": 12.2408, "step": 14149 }, { "epoch": 0.7705240516496158, "grad_norm": 0.5170654352262323, "learning_rate": 0.00014079339339901113, "loss": 12.2432, "step": 14150 }, { "epoch": 0.7705785056461988, "grad_norm": 0.6319362718158542, "learning_rate": 0.00014078534206975166, "loss": 12.328, "step": 14151 }, { "epoch": 0.7706329596427818, "grad_norm": 0.628696957200546, "learning_rate": 0.0001407772904233386, "loss": 12.313, "step": 14152 }, { "epoch": 0.7706874136393648, "grad_norm": 0.5423506725771897, "learning_rate": 0.00014076923845983443, "loss": 12.2563, "step": 14153 }, { "epoch": 0.7707418676359478, "grad_norm": 0.5830858369473833, "learning_rate": 0.00014076118617930186, "loss": 12.2093, "step": 14154 }, { "epoch": 0.7707963216325309, "grad_norm": 0.6498214355393194, "learning_rate": 0.00014075313358180344, "loss": 12.2451, "step": 14155 }, { "epoch": 0.7708507756291139, "grad_norm": 0.6162319238332199, "learning_rate": 0.00014074508066740183, "loss": 12.1171, "step": 14156 }, { "epoch": 0.7709052296256969, "grad_norm": 0.6190695713664903, "learning_rate": 0.00014073702743615964, "loss": 12.2088, "step": 14157 }, { "epoch": 0.7709596836222798, "grad_norm": 0.5412261050077144, "learning_rate": 0.0001407289738881395, "loss": 12.0418, "step": 14158 }, { "epoch": 0.7710141376188628, "grad_norm": 0.7319502020643702, "learning_rate": 0.00014072092002340405, "loss": 12.3812, "step": 14159 }, { "epoch": 0.7710685916154458, "grad_norm": 0.6467337001986067, "learning_rate": 0.00014071286584201587, "loss": 12.3046, "step": 14160 }, { "epoch": 0.7711230456120289, "grad_norm": 0.63723613660386, "learning_rate": 0.00014070481134403762, "loss": 12.3337, "step": 14161 }, { "epoch": 0.7711774996086119, "grad_norm": 0.541520470381976, "learning_rate": 0.00014069675652953194, "loss": 12.1234, "step": 14162 }, { "epoch": 0.7712319536051949, "grad_norm": 0.6029623074529031, "learning_rate": 0.00014068870139856142, "loss": 12.3006, "step": 14163 }, { "epoch": 0.7712864076017779, "grad_norm": 0.5985857701434745, "learning_rate": 0.00014068064595118877, "loss": 12.2511, "step": 14164 }, { "epoch": 0.7713408615983609, "grad_norm": 0.550235811414424, "learning_rate": 0.0001406725901874766, "loss": 12.1389, "step": 14165 }, { "epoch": 0.7713953155949439, "grad_norm": 0.5984638770455566, "learning_rate": 0.00014066453410748752, "loss": 12.3201, "step": 14166 }, { "epoch": 0.771449769591527, "grad_norm": 0.5862567048207609, "learning_rate": 0.00014065647771128423, "loss": 12.3524, "step": 14167 }, { "epoch": 0.77150422358811, "grad_norm": 0.562059377621077, "learning_rate": 0.00014064842099892935, "loss": 12.2515, "step": 14168 }, { "epoch": 0.771558677584693, "grad_norm": 0.5315770319244236, "learning_rate": 0.00014064036397048551, "loss": 12.0774, "step": 14169 }, { "epoch": 0.771613131581276, "grad_norm": 0.6436814701280071, "learning_rate": 0.0001406323066260154, "loss": 12.2492, "step": 14170 }, { "epoch": 0.771667585577859, "grad_norm": 0.5141619178170451, "learning_rate": 0.00014062424896558166, "loss": 12.216, "step": 14171 }, { "epoch": 0.771722039574442, "grad_norm": 0.601453934681363, "learning_rate": 0.00014061619098924694, "loss": 12.0353, "step": 14172 }, { "epoch": 0.7717764935710251, "grad_norm": 0.7781834634976749, "learning_rate": 0.00014060813269707392, "loss": 12.272, "step": 14173 }, { "epoch": 0.7718309475676081, "grad_norm": 0.6037550044731685, "learning_rate": 0.00014060007408912525, "loss": 12.2854, "step": 14174 }, { "epoch": 0.771885401564191, "grad_norm": 0.6248294027085685, "learning_rate": 0.0001405920151654636, "loss": 12.2883, "step": 14175 }, { "epoch": 0.771939855560774, "grad_norm": 0.5940242997523166, "learning_rate": 0.00014058395592615158, "loss": 12.2134, "step": 14176 }, { "epoch": 0.771994309557357, "grad_norm": 0.6932183762839871, "learning_rate": 0.00014057589637125198, "loss": 12.1776, "step": 14177 }, { "epoch": 0.77204876355394, "grad_norm": 0.5124592144785853, "learning_rate": 0.00014056783650082737, "loss": 12.0275, "step": 14178 }, { "epoch": 0.7721032175505231, "grad_norm": 0.5714334986091909, "learning_rate": 0.00014055977631494045, "loss": 12.2715, "step": 14179 }, { "epoch": 0.7721576715471061, "grad_norm": 0.5712169349242016, "learning_rate": 0.00014055171581365397, "loss": 12.2596, "step": 14180 }, { "epoch": 0.7722121255436891, "grad_norm": 0.6319480061558428, "learning_rate": 0.00014054365499703046, "loss": 12.3137, "step": 14181 }, { "epoch": 0.7722665795402721, "grad_norm": 0.5785141653358192, "learning_rate": 0.00014053559386513275, "loss": 12.2219, "step": 14182 }, { "epoch": 0.7723210335368551, "grad_norm": 0.5591661426535585, "learning_rate": 0.00014052753241802342, "loss": 12.2081, "step": 14183 }, { "epoch": 0.7723754875334382, "grad_norm": 0.5548912954895845, "learning_rate": 0.00014051947065576526, "loss": 12.2108, "step": 14184 }, { "epoch": 0.7724299415300212, "grad_norm": 0.569668227503653, "learning_rate": 0.00014051140857842086, "loss": 12.2756, "step": 14185 }, { "epoch": 0.7724843955266042, "grad_norm": 0.525580456092135, "learning_rate": 0.000140503346186053, "loss": 12.3377, "step": 14186 }, { "epoch": 0.7725388495231872, "grad_norm": 0.5478331742854908, "learning_rate": 0.00014049528347872425, "loss": 12.2428, "step": 14187 }, { "epoch": 0.7725933035197702, "grad_norm": 0.572341978220355, "learning_rate": 0.00014048722045649742, "loss": 12.2341, "step": 14188 }, { "epoch": 0.7726477575163532, "grad_norm": 0.6014348989134071, "learning_rate": 0.0001404791571194352, "loss": 12.2087, "step": 14189 }, { "epoch": 0.7727022115129363, "grad_norm": 0.5457895419471461, "learning_rate": 0.00014047109346760023, "loss": 12.2166, "step": 14190 }, { "epoch": 0.7727566655095193, "grad_norm": 0.6426511252714703, "learning_rate": 0.00014046302950105529, "loss": 12.2607, "step": 14191 }, { "epoch": 0.7728111195061023, "grad_norm": 0.5161979350729351, "learning_rate": 0.000140454965219863, "loss": 12.1732, "step": 14192 }, { "epoch": 0.7728655735026853, "grad_norm": 0.554545149023617, "learning_rate": 0.00014044690062408612, "loss": 12.2118, "step": 14193 }, { "epoch": 0.7729200274992682, "grad_norm": 0.5719533718854034, "learning_rate": 0.00014043883571378737, "loss": 12.22, "step": 14194 }, { "epoch": 0.7729744814958512, "grad_norm": 0.6718674667684825, "learning_rate": 0.0001404307704890295, "loss": 12.0822, "step": 14195 }, { "epoch": 0.7730289354924343, "grad_norm": 0.5855615166821195, "learning_rate": 0.00014042270494987513, "loss": 12.3239, "step": 14196 }, { "epoch": 0.7730833894890173, "grad_norm": 0.6017993497722051, "learning_rate": 0.000140414639096387, "loss": 12.3342, "step": 14197 }, { "epoch": 0.7731378434856003, "grad_norm": 0.5411438881222047, "learning_rate": 0.00014040657292862792, "loss": 12.3142, "step": 14198 }, { "epoch": 0.7731922974821833, "grad_norm": 0.5559677870703806, "learning_rate": 0.0001403985064466605, "loss": 12.2001, "step": 14199 }, { "epoch": 0.7732467514787663, "grad_norm": 0.5760873129276473, "learning_rate": 0.00014039043965054758, "loss": 12.2604, "step": 14200 }, { "epoch": 0.7733012054753493, "grad_norm": 0.5805544309998965, "learning_rate": 0.00014038237254035177, "loss": 12.3626, "step": 14201 }, { "epoch": 0.7733556594719324, "grad_norm": 0.5615832662673361, "learning_rate": 0.00014037430511613588, "loss": 12.0235, "step": 14202 }, { "epoch": 0.7734101134685154, "grad_norm": 0.6065320858204621, "learning_rate": 0.00014036623737796261, "loss": 12.3079, "step": 14203 }, { "epoch": 0.7734645674650984, "grad_norm": 0.5479637189418413, "learning_rate": 0.0001403581693258947, "loss": 12.2065, "step": 14204 }, { "epoch": 0.7735190214616814, "grad_norm": 0.5847645596608256, "learning_rate": 0.00014035010095999497, "loss": 12.1909, "step": 14205 }, { "epoch": 0.7735734754582644, "grad_norm": 0.5972784753038021, "learning_rate": 0.00014034203228032604, "loss": 12.2817, "step": 14206 }, { "epoch": 0.7736279294548474, "grad_norm": 0.6331906216266722, "learning_rate": 0.0001403339632869507, "loss": 12.2867, "step": 14207 }, { "epoch": 0.7736823834514305, "grad_norm": 0.5132859404573481, "learning_rate": 0.00014032589397993168, "loss": 12.1685, "step": 14208 }, { "epoch": 0.7737368374480135, "grad_norm": 0.5589622425109604, "learning_rate": 0.00014031782435933174, "loss": 12.1366, "step": 14209 }, { "epoch": 0.7737912914445965, "grad_norm": 0.5858976298498404, "learning_rate": 0.00014030975442521365, "loss": 12.1649, "step": 14210 }, { "epoch": 0.7738457454411795, "grad_norm": 0.6674804842623974, "learning_rate": 0.0001403016841776402, "loss": 12.4075, "step": 14211 }, { "epoch": 0.7739001994377624, "grad_norm": 0.5939039505378468, "learning_rate": 0.00014029361361667407, "loss": 12.1996, "step": 14212 }, { "epoch": 0.7739546534343456, "grad_norm": 0.5616736497767092, "learning_rate": 0.000140285542742378, "loss": 12.3147, "step": 14213 }, { "epoch": 0.7740091074309285, "grad_norm": 0.6052812911670153, "learning_rate": 0.00014027747155481482, "loss": 12.3252, "step": 14214 }, { "epoch": 0.7740635614275115, "grad_norm": 0.612917274492108, "learning_rate": 0.00014026940005404726, "loss": 12.2219, "step": 14215 }, { "epoch": 0.7741180154240945, "grad_norm": 0.546998223405516, "learning_rate": 0.00014026132824013812, "loss": 12.2989, "step": 14216 }, { "epoch": 0.7741724694206775, "grad_norm": 0.5894059670789069, "learning_rate": 0.00014025325611315012, "loss": 12.2427, "step": 14217 }, { "epoch": 0.7742269234172605, "grad_norm": 0.5957066757457238, "learning_rate": 0.00014024518367314602, "loss": 12.3165, "step": 14218 }, { "epoch": 0.7742813774138436, "grad_norm": 0.5836536982041985, "learning_rate": 0.00014023711092018868, "loss": 12.2283, "step": 14219 }, { "epoch": 0.7743358314104266, "grad_norm": 0.5368407291439166, "learning_rate": 0.00014022903785434077, "loss": 12.351, "step": 14220 }, { "epoch": 0.7743902854070096, "grad_norm": 0.5384549553801493, "learning_rate": 0.00014022096447566514, "loss": 12.2068, "step": 14221 }, { "epoch": 0.7744447394035926, "grad_norm": 0.5645116287805855, "learning_rate": 0.00014021289078422456, "loss": 12.1967, "step": 14222 }, { "epoch": 0.7744991934001756, "grad_norm": 0.5907865445463186, "learning_rate": 0.00014020481678008175, "loss": 12.252, "step": 14223 }, { "epoch": 0.7745536473967586, "grad_norm": 0.6491946219544488, "learning_rate": 0.0001401967424632996, "loss": 12.2809, "step": 14224 }, { "epoch": 0.7746081013933417, "grad_norm": 0.562889073485214, "learning_rate": 0.0001401886678339408, "loss": 12.1292, "step": 14225 }, { "epoch": 0.7746625553899247, "grad_norm": 0.6158908521325102, "learning_rate": 0.00014018059289206818, "loss": 12.2408, "step": 14226 }, { "epoch": 0.7747170093865077, "grad_norm": 0.6743988176690096, "learning_rate": 0.00014017251763774456, "loss": 12.2302, "step": 14227 }, { "epoch": 0.7747714633830907, "grad_norm": 0.5750539418203042, "learning_rate": 0.0001401644420710327, "loss": 12.3108, "step": 14228 }, { "epoch": 0.7748259173796737, "grad_norm": 0.6184375139837525, "learning_rate": 0.00014015636619199535, "loss": 12.3028, "step": 14229 }, { "epoch": 0.7748803713762566, "grad_norm": 0.610642355628531, "learning_rate": 0.0001401482900006954, "loss": 12.1486, "step": 14230 }, { "epoch": 0.7749348253728398, "grad_norm": 0.5821790796481626, "learning_rate": 0.0001401402134971956, "loss": 12.2153, "step": 14231 }, { "epoch": 0.7749892793694227, "grad_norm": 0.5664819681648965, "learning_rate": 0.0001401321366815588, "loss": 12.2911, "step": 14232 }, { "epoch": 0.7750437333660057, "grad_norm": 0.5699499311143544, "learning_rate": 0.00014012405955384776, "loss": 12.1542, "step": 14233 }, { "epoch": 0.7750981873625887, "grad_norm": 0.6004532193029597, "learning_rate": 0.0001401159821141253, "loss": 12.2844, "step": 14234 }, { "epoch": 0.7751526413591717, "grad_norm": 0.5606771204873505, "learning_rate": 0.00014010790436245425, "loss": 12.2204, "step": 14235 }, { "epoch": 0.7752070953557547, "grad_norm": 0.5976923703128083, "learning_rate": 0.00014009982629889736, "loss": 12.2957, "step": 14236 }, { "epoch": 0.7752615493523378, "grad_norm": 0.6291384584779743, "learning_rate": 0.00014009174792351756, "loss": 12.1954, "step": 14237 }, { "epoch": 0.7753160033489208, "grad_norm": 0.5841736611008006, "learning_rate": 0.00014008366923637757, "loss": 12.2286, "step": 14238 }, { "epoch": 0.7753704573455038, "grad_norm": 0.6188229355529823, "learning_rate": 0.00014007559023754027, "loss": 12.2629, "step": 14239 }, { "epoch": 0.7754249113420868, "grad_norm": 0.6582155521706322, "learning_rate": 0.00014006751092706843, "loss": 12.2669, "step": 14240 }, { "epoch": 0.7754793653386698, "grad_norm": 0.643902293261855, "learning_rate": 0.00014005943130502492, "loss": 12.3276, "step": 14241 }, { "epoch": 0.7755338193352528, "grad_norm": 0.6682038792668181, "learning_rate": 0.00014005135137147256, "loss": 12.2784, "step": 14242 }, { "epoch": 0.7755882733318359, "grad_norm": 0.5877777782070308, "learning_rate": 0.00014004327112647418, "loss": 12.3538, "step": 14243 }, { "epoch": 0.7756427273284189, "grad_norm": 0.5785124014962444, "learning_rate": 0.0001400351905700926, "loss": 12.1579, "step": 14244 }, { "epoch": 0.7756971813250019, "grad_norm": 0.6505713308803961, "learning_rate": 0.00014002710970239062, "loss": 12.1179, "step": 14245 }, { "epoch": 0.7757516353215849, "grad_norm": 0.6449881450005565, "learning_rate": 0.00014001902852343122, "loss": 12.3544, "step": 14246 }, { "epoch": 0.7758060893181679, "grad_norm": 0.5479162926644555, "learning_rate": 0.00014001094703327706, "loss": 12.1618, "step": 14247 }, { "epoch": 0.775860543314751, "grad_norm": 0.6190528133158492, "learning_rate": 0.00014000286523199108, "loss": 12.2099, "step": 14248 }, { "epoch": 0.775914997311334, "grad_norm": 0.7418593316007284, "learning_rate": 0.00013999478311963614, "loss": 12.3345, "step": 14249 }, { "epoch": 0.775969451307917, "grad_norm": 0.5577836545952066, "learning_rate": 0.00013998670069627505, "loss": 12.3324, "step": 14250 }, { "epoch": 0.7760239053044999, "grad_norm": 0.5608169550998805, "learning_rate": 0.00013997861796197068, "loss": 12.124, "step": 14251 }, { "epoch": 0.7760783593010829, "grad_norm": 0.632786855936109, "learning_rate": 0.00013997053491678584, "loss": 12.4297, "step": 14252 }, { "epoch": 0.7761328132976659, "grad_norm": 0.5433203133912633, "learning_rate": 0.00013996245156078343, "loss": 12.072, "step": 14253 }, { "epoch": 0.776187267294249, "grad_norm": 0.5843187705344397, "learning_rate": 0.0001399543678940263, "loss": 12.2276, "step": 14254 }, { "epoch": 0.776241721290832, "grad_norm": 0.5815658943635917, "learning_rate": 0.0001399462839165773, "loss": 12.2323, "step": 14255 }, { "epoch": 0.776296175287415, "grad_norm": 0.5538371414048853, "learning_rate": 0.00013993819962849932, "loss": 12.1063, "step": 14256 }, { "epoch": 0.776350629283998, "grad_norm": 0.5472741001888909, "learning_rate": 0.00013993011502985519, "loss": 12.1826, "step": 14257 }, { "epoch": 0.776405083280581, "grad_norm": 0.6017028137201669, "learning_rate": 0.00013992203012070775, "loss": 12.1747, "step": 14258 }, { "epoch": 0.776459537277164, "grad_norm": 0.5861421427275709, "learning_rate": 0.00013991394490111994, "loss": 12.1843, "step": 14259 }, { "epoch": 0.7765139912737471, "grad_norm": 0.6023377412435076, "learning_rate": 0.0001399058593711546, "loss": 12.1658, "step": 14260 }, { "epoch": 0.7765684452703301, "grad_norm": 0.5754821585939178, "learning_rate": 0.00013989777353087463, "loss": 12.1961, "step": 14261 }, { "epoch": 0.7766228992669131, "grad_norm": 0.6475523071910245, "learning_rate": 0.00013988968738034286, "loss": 12.2166, "step": 14262 }, { "epoch": 0.7766773532634961, "grad_norm": 0.5220931415276219, "learning_rate": 0.00013988160091962218, "loss": 12.0853, "step": 14263 }, { "epoch": 0.7767318072600791, "grad_norm": 0.570755149486794, "learning_rate": 0.00013987351414877547, "loss": 12.1619, "step": 14264 }, { "epoch": 0.7767862612566621, "grad_norm": 0.6100661674420887, "learning_rate": 0.00013986542706786564, "loss": 12.2246, "step": 14265 }, { "epoch": 0.7768407152532452, "grad_norm": 0.5887754471812554, "learning_rate": 0.00013985733967695563, "loss": 12.2059, "step": 14266 }, { "epoch": 0.7768951692498282, "grad_norm": 0.6519893463475808, "learning_rate": 0.0001398492519761082, "loss": 12.1453, "step": 14267 }, { "epoch": 0.7769496232464111, "grad_norm": 0.5964378271102649, "learning_rate": 0.0001398411639653863, "loss": 12.2564, "step": 14268 }, { "epoch": 0.7770040772429941, "grad_norm": 0.5732917751538955, "learning_rate": 0.0001398330756448528, "loss": 12.2191, "step": 14269 }, { "epoch": 0.7770585312395771, "grad_norm": 0.5754070704973367, "learning_rate": 0.0001398249870145707, "loss": 12.1242, "step": 14270 }, { "epoch": 0.7771129852361601, "grad_norm": 0.6793722475374825, "learning_rate": 0.0001398168980746028, "loss": 12.3338, "step": 14271 }, { "epoch": 0.7771674392327432, "grad_norm": 0.5648411282352749, "learning_rate": 0.00013980880882501199, "loss": 12.218, "step": 14272 }, { "epoch": 0.7772218932293262, "grad_norm": 0.6331849677299348, "learning_rate": 0.00013980071926586124, "loss": 12.4167, "step": 14273 }, { "epoch": 0.7772763472259092, "grad_norm": 0.578231429227074, "learning_rate": 0.0001397926293972134, "loss": 12.2841, "step": 14274 }, { "epoch": 0.7773308012224922, "grad_norm": 0.6491037203032933, "learning_rate": 0.0001397845392191314, "loss": 12.3289, "step": 14275 }, { "epoch": 0.7773852552190752, "grad_norm": 0.5303799869679332, "learning_rate": 0.00013977644873167816, "loss": 12.1669, "step": 14276 }, { "epoch": 0.7774397092156582, "grad_norm": 0.5901335407418612, "learning_rate": 0.0001397683579349166, "loss": 12.1261, "step": 14277 }, { "epoch": 0.7774941632122413, "grad_norm": 0.5676359507328063, "learning_rate": 0.00013976026682890958, "loss": 12.1885, "step": 14278 }, { "epoch": 0.7775486172088243, "grad_norm": 0.534057470171481, "learning_rate": 0.0001397521754137201, "loss": 12.1992, "step": 14279 }, { "epoch": 0.7776030712054073, "grad_norm": 0.5814240032332899, "learning_rate": 0.000139744083689411, "loss": 12.1737, "step": 14280 }, { "epoch": 0.7776575252019903, "grad_norm": 0.5591124285685799, "learning_rate": 0.00013973599165604527, "loss": 12.2514, "step": 14281 }, { "epoch": 0.7777119791985733, "grad_norm": 0.5714511589156374, "learning_rate": 0.0001397278993136858, "loss": 12.2969, "step": 14282 }, { "epoch": 0.7777664331951564, "grad_norm": 0.5185389079557104, "learning_rate": 0.0001397198066623955, "loss": 12.2458, "step": 14283 }, { "epoch": 0.7778208871917394, "grad_norm": 0.5416837860102734, "learning_rate": 0.00013971171370223736, "loss": 12.292, "step": 14284 }, { "epoch": 0.7778753411883224, "grad_norm": 0.6085444604486575, "learning_rate": 0.00013970362043327423, "loss": 12.2255, "step": 14285 }, { "epoch": 0.7779297951849053, "grad_norm": 0.6118832655342363, "learning_rate": 0.00013969552685556914, "loss": 12.249, "step": 14286 }, { "epoch": 0.7779842491814883, "grad_norm": 0.583875910795135, "learning_rate": 0.0001396874329691849, "loss": 12.2431, "step": 14287 }, { "epoch": 0.7780387031780713, "grad_norm": 0.6199842096758743, "learning_rate": 0.0001396793387741846, "loss": 12.237, "step": 14288 }, { "epoch": 0.7780931571746544, "grad_norm": 0.5951711257796768, "learning_rate": 0.00013967124427063108, "loss": 12.0649, "step": 14289 }, { "epoch": 0.7781476111712374, "grad_norm": 0.6533099070438597, "learning_rate": 0.0001396631494585873, "loss": 12.3251, "step": 14290 }, { "epoch": 0.7782020651678204, "grad_norm": 0.6306942750243015, "learning_rate": 0.00013965505433811623, "loss": 12.2949, "step": 14291 }, { "epoch": 0.7782565191644034, "grad_norm": 0.5373222830184418, "learning_rate": 0.0001396469589092808, "loss": 12.1354, "step": 14292 }, { "epoch": 0.7783109731609864, "grad_norm": 0.5551774573109617, "learning_rate": 0.00013963886317214398, "loss": 12.2581, "step": 14293 }, { "epoch": 0.7783654271575694, "grad_norm": 0.5963154606421266, "learning_rate": 0.00013963076712676873, "loss": 12.3286, "step": 14294 }, { "epoch": 0.7784198811541525, "grad_norm": 0.5861963744730597, "learning_rate": 0.00013962267077321795, "loss": 12.2763, "step": 14295 }, { "epoch": 0.7784743351507355, "grad_norm": 0.6176149298969914, "learning_rate": 0.00013961457411155466, "loss": 12.3619, "step": 14296 }, { "epoch": 0.7785287891473185, "grad_norm": 0.5448354456452081, "learning_rate": 0.00013960647714184182, "loss": 12.2151, "step": 14297 }, { "epoch": 0.7785832431439015, "grad_norm": 0.520422704062557, "learning_rate": 0.0001395983798641423, "loss": 12.2499, "step": 14298 }, { "epoch": 0.7786376971404845, "grad_norm": 0.557202372867009, "learning_rate": 0.0001395902822785192, "loss": 12.165, "step": 14299 }, { "epoch": 0.7786921511370675, "grad_norm": 0.5961029491895187, "learning_rate": 0.00013958218438503542, "loss": 12.3071, "step": 14300 }, { "epoch": 0.7787466051336506, "grad_norm": 0.5826837314749691, "learning_rate": 0.00013957408618375393, "loss": 12.2213, "step": 14301 }, { "epoch": 0.7788010591302336, "grad_norm": 0.5420504530064048, "learning_rate": 0.0001395659876747377, "loss": 12.2102, "step": 14302 }, { "epoch": 0.7788555131268166, "grad_norm": 0.5346947413027613, "learning_rate": 0.00013955788885804972, "loss": 12.1678, "step": 14303 }, { "epoch": 0.7789099671233995, "grad_norm": 0.5882148826393877, "learning_rate": 0.00013954978973375294, "loss": 12.1439, "step": 14304 }, { "epoch": 0.7789644211199825, "grad_norm": 0.5857740211823618, "learning_rate": 0.0001395416903019104, "loss": 12.2248, "step": 14305 }, { "epoch": 0.7790188751165655, "grad_norm": 0.5330991902072838, "learning_rate": 0.00013953359056258503, "loss": 12.1562, "step": 14306 }, { "epoch": 0.7790733291131486, "grad_norm": 0.6525952363657743, "learning_rate": 0.00013952549051583982, "loss": 12.281, "step": 14307 }, { "epoch": 0.7791277831097316, "grad_norm": 0.6381722528438454, "learning_rate": 0.0001395173901617378, "loss": 12.2519, "step": 14308 }, { "epoch": 0.7791822371063146, "grad_norm": 0.5788636699692825, "learning_rate": 0.00013950928950034187, "loss": 12.1706, "step": 14309 }, { "epoch": 0.7792366911028976, "grad_norm": 0.5714525079864948, "learning_rate": 0.00013950118853171513, "loss": 12.1567, "step": 14310 }, { "epoch": 0.7792911450994806, "grad_norm": 0.5847814821575158, "learning_rate": 0.0001394930872559205, "loss": 12.274, "step": 14311 }, { "epoch": 0.7793455990960636, "grad_norm": 0.6262641473386918, "learning_rate": 0.000139484985673021, "loss": 12.3492, "step": 14312 }, { "epoch": 0.7794000530926467, "grad_norm": 0.6105688553660109, "learning_rate": 0.00013947688378307963, "loss": 12.1608, "step": 14313 }, { "epoch": 0.7794545070892297, "grad_norm": 0.5249630319727343, "learning_rate": 0.0001394687815861594, "loss": 12.0978, "step": 14314 }, { "epoch": 0.7795089610858127, "grad_norm": 0.6093801425256825, "learning_rate": 0.00013946067908232333, "loss": 12.1557, "step": 14315 }, { "epoch": 0.7795634150823957, "grad_norm": 0.6559736581795284, "learning_rate": 0.00013945257627163437, "loss": 12.2336, "step": 14316 }, { "epoch": 0.7796178690789787, "grad_norm": 0.5574043885221901, "learning_rate": 0.00013944447315415557, "loss": 12.1308, "step": 14317 }, { "epoch": 0.7796723230755618, "grad_norm": 0.5868701210809807, "learning_rate": 0.00013943636972994991, "loss": 12.2638, "step": 14318 }, { "epoch": 0.7797267770721448, "grad_norm": 0.6012516255490127, "learning_rate": 0.00013942826599908044, "loss": 12.2809, "step": 14319 }, { "epoch": 0.7797812310687278, "grad_norm": 0.5372097078881665, "learning_rate": 0.00013942016196161016, "loss": 12.2253, "step": 14320 }, { "epoch": 0.7798356850653108, "grad_norm": 0.545134903719249, "learning_rate": 0.00013941205761760212, "loss": 12.2653, "step": 14321 }, { "epoch": 0.7798901390618938, "grad_norm": 0.5730787720955979, "learning_rate": 0.0001394039529671193, "loss": 12.2101, "step": 14322 }, { "epoch": 0.7799445930584767, "grad_norm": 0.5340799709141614, "learning_rate": 0.0001393958480102247, "loss": 12.1503, "step": 14323 }, { "epoch": 0.7799990470550598, "grad_norm": 0.562407601107663, "learning_rate": 0.0001393877427469814, "loss": 12.2644, "step": 14324 }, { "epoch": 0.7800535010516428, "grad_norm": 0.6181723575845991, "learning_rate": 0.0001393796371774524, "loss": 12.124, "step": 14325 }, { "epoch": 0.7801079550482258, "grad_norm": 0.509798324698095, "learning_rate": 0.00013937153130170075, "loss": 12.2401, "step": 14326 }, { "epoch": 0.7801624090448088, "grad_norm": 0.587458779265688, "learning_rate": 0.00013936342511978946, "loss": 12.126, "step": 14327 }, { "epoch": 0.7802168630413918, "grad_norm": 0.6251305508340274, "learning_rate": 0.00013935531863178157, "loss": 12.1942, "step": 14328 }, { "epoch": 0.7802713170379748, "grad_norm": 0.5939469560259177, "learning_rate": 0.00013934721183774015, "loss": 12.2734, "step": 14329 }, { "epoch": 0.7803257710345579, "grad_norm": 0.584601867857084, "learning_rate": 0.00013933910473772816, "loss": 12.2859, "step": 14330 }, { "epoch": 0.7803802250311409, "grad_norm": 0.5854926109302606, "learning_rate": 0.00013933099733180876, "loss": 12.2367, "step": 14331 }, { "epoch": 0.7804346790277239, "grad_norm": 0.6588983389598322, "learning_rate": 0.00013932288962004486, "loss": 12.209, "step": 14332 }, { "epoch": 0.7804891330243069, "grad_norm": 0.6337923522917029, "learning_rate": 0.00013931478160249966, "loss": 12.237, "step": 14333 }, { "epoch": 0.7805435870208899, "grad_norm": 0.587236476231902, "learning_rate": 0.00013930667327923606, "loss": 12.2099, "step": 14334 }, { "epoch": 0.7805980410174729, "grad_norm": 0.5244300732635503, "learning_rate": 0.00013929856465031716, "loss": 12.1203, "step": 14335 }, { "epoch": 0.780652495014056, "grad_norm": 0.5159189318018038, "learning_rate": 0.00013929045571580608, "loss": 12.2196, "step": 14336 }, { "epoch": 0.780706949010639, "grad_norm": 0.6009509071692117, "learning_rate": 0.00013928234647576581, "loss": 12.2065, "step": 14337 }, { "epoch": 0.780761403007222, "grad_norm": 0.62828740155288, "learning_rate": 0.00013927423693025942, "loss": 12.2639, "step": 14338 }, { "epoch": 0.780815857003805, "grad_norm": 0.5782629456373587, "learning_rate": 0.00013926612707935, "loss": 12.3063, "step": 14339 }, { "epoch": 0.780870311000388, "grad_norm": 0.6183466777482532, "learning_rate": 0.00013925801692310058, "loss": 12.1923, "step": 14340 }, { "epoch": 0.7809247649969709, "grad_norm": 0.5903083877025544, "learning_rate": 0.00013924990646157424, "loss": 12.2672, "step": 14341 }, { "epoch": 0.780979218993554, "grad_norm": 0.6053161053985386, "learning_rate": 0.00013924179569483401, "loss": 12.2701, "step": 14342 }, { "epoch": 0.781033672990137, "grad_norm": 0.5986010917087732, "learning_rate": 0.00013923368462294303, "loss": 12.2788, "step": 14343 }, { "epoch": 0.78108812698672, "grad_norm": 0.6302334667450733, "learning_rate": 0.00013922557324596435, "loss": 12.3288, "step": 14344 }, { "epoch": 0.781142580983303, "grad_norm": 0.564817574612964, "learning_rate": 0.000139217461563961, "loss": 12.2768, "step": 14345 }, { "epoch": 0.781197034979886, "grad_norm": 0.6042381395814973, "learning_rate": 0.00013920934957699612, "loss": 12.0756, "step": 14346 }, { "epoch": 0.7812514889764691, "grad_norm": 0.7369856283803117, "learning_rate": 0.00013920123728513274, "loss": 12.2836, "step": 14347 }, { "epoch": 0.7813059429730521, "grad_norm": 0.6592566362375767, "learning_rate": 0.00013919312468843397, "loss": 12.2566, "step": 14348 }, { "epoch": 0.7813603969696351, "grad_norm": 0.6098850365522455, "learning_rate": 0.0001391850117869629, "loss": 12.2025, "step": 14349 }, { "epoch": 0.7814148509662181, "grad_norm": 0.6014968474919596, "learning_rate": 0.0001391768985807826, "loss": 12.2522, "step": 14350 }, { "epoch": 0.7814693049628011, "grad_norm": 0.5381136132046134, "learning_rate": 0.00013916878506995618, "loss": 12.2215, "step": 14351 }, { "epoch": 0.7815237589593841, "grad_norm": 0.5348296555678123, "learning_rate": 0.0001391606712545467, "loss": 12.2183, "step": 14352 }, { "epoch": 0.7815782129559672, "grad_norm": 0.6507633060534262, "learning_rate": 0.00013915255713461727, "loss": 12.2612, "step": 14353 }, { "epoch": 0.7816326669525502, "grad_norm": 0.5396809649087732, "learning_rate": 0.000139144442710231, "loss": 12.1339, "step": 14354 }, { "epoch": 0.7816871209491332, "grad_norm": 0.5438368503662775, "learning_rate": 0.000139136327981451, "loss": 12.3099, "step": 14355 }, { "epoch": 0.7817415749457162, "grad_norm": 0.659822970082171, "learning_rate": 0.00013912821294834033, "loss": 12.2638, "step": 14356 }, { "epoch": 0.7817960289422992, "grad_norm": 0.4852121890389048, "learning_rate": 0.00013912009761096213, "loss": 12.1385, "step": 14357 }, { "epoch": 0.7818504829388822, "grad_norm": 0.515123968159365, "learning_rate": 0.00013911198196937946, "loss": 12.2034, "step": 14358 }, { "epoch": 0.7819049369354653, "grad_norm": 0.5601852691409116, "learning_rate": 0.00013910386602365547, "loss": 12.2976, "step": 14359 }, { "epoch": 0.7819593909320482, "grad_norm": 0.5218995324590391, "learning_rate": 0.00013909574977385327, "loss": 12.2155, "step": 14360 }, { "epoch": 0.7820138449286312, "grad_norm": 0.7009425777978717, "learning_rate": 0.00013908763322003595, "loss": 12.1889, "step": 14361 }, { "epoch": 0.7820682989252142, "grad_norm": 0.5250741002436927, "learning_rate": 0.00013907951636226665, "loss": 12.2074, "step": 14362 }, { "epoch": 0.7821227529217972, "grad_norm": 0.5484504796190243, "learning_rate": 0.00013907139920060847, "loss": 12.2898, "step": 14363 }, { "epoch": 0.7821772069183802, "grad_norm": 0.639001255662936, "learning_rate": 0.00013906328173512455, "loss": 12.215, "step": 14364 }, { "epoch": 0.7822316609149633, "grad_norm": 0.5844299666446592, "learning_rate": 0.000139055163965878, "loss": 12.1908, "step": 14365 }, { "epoch": 0.7822861149115463, "grad_norm": 0.6061975542541682, "learning_rate": 0.00013904704589293192, "loss": 12.2301, "step": 14366 }, { "epoch": 0.7823405689081293, "grad_norm": 0.6185399821651211, "learning_rate": 0.00013903892751634947, "loss": 12.2471, "step": 14367 }, { "epoch": 0.7823950229047123, "grad_norm": 0.6760511265559507, "learning_rate": 0.0001390308088361938, "loss": 12.233, "step": 14368 }, { "epoch": 0.7824494769012953, "grad_norm": 0.5388520380861961, "learning_rate": 0.000139022689852528, "loss": 12.2188, "step": 14369 }, { "epoch": 0.7825039308978783, "grad_norm": 0.5564895987143336, "learning_rate": 0.0001390145705654152, "loss": 12.106, "step": 14370 }, { "epoch": 0.7825583848944614, "grad_norm": 0.5215538856075096, "learning_rate": 0.00013900645097491857, "loss": 12.1252, "step": 14371 }, { "epoch": 0.7826128388910444, "grad_norm": 0.6067388147613065, "learning_rate": 0.00013899833108110127, "loss": 12.1462, "step": 14372 }, { "epoch": 0.7826672928876274, "grad_norm": 0.5499048916495173, "learning_rate": 0.00013899021088402638, "loss": 12.1765, "step": 14373 }, { "epoch": 0.7827217468842104, "grad_norm": 0.6123862168122999, "learning_rate": 0.00013898209038375704, "loss": 12.1465, "step": 14374 }, { "epoch": 0.7827762008807934, "grad_norm": 0.5353567283212902, "learning_rate": 0.00013897396958035646, "loss": 12.3048, "step": 14375 }, { "epoch": 0.7828306548773764, "grad_norm": 0.7451386834832064, "learning_rate": 0.00013896584847388776, "loss": 12.3199, "step": 14376 }, { "epoch": 0.7828851088739595, "grad_norm": 0.5468178378836177, "learning_rate": 0.0001389577270644141, "loss": 12.2477, "step": 14377 }, { "epoch": 0.7829395628705424, "grad_norm": 0.561119411847658, "learning_rate": 0.00013894960535199862, "loss": 12.0196, "step": 14378 }, { "epoch": 0.7829940168671254, "grad_norm": 0.5808741207692449, "learning_rate": 0.00013894148333670444, "loss": 12.1076, "step": 14379 }, { "epoch": 0.7830484708637084, "grad_norm": 0.6466017916541937, "learning_rate": 0.00013893336101859479, "loss": 12.3068, "step": 14380 }, { "epoch": 0.7831029248602914, "grad_norm": 0.6033825408993769, "learning_rate": 0.00013892523839773274, "loss": 12.1595, "step": 14381 }, { "epoch": 0.7831573788568745, "grad_norm": 0.6134304033155082, "learning_rate": 0.00013891711547418158, "loss": 12.3442, "step": 14382 }, { "epoch": 0.7832118328534575, "grad_norm": 0.516047645087405, "learning_rate": 0.0001389089922480044, "loss": 12.241, "step": 14383 }, { "epoch": 0.7832662868500405, "grad_norm": 0.5306019795143434, "learning_rate": 0.0001389008687192643, "loss": 12.2247, "step": 14384 }, { "epoch": 0.7833207408466235, "grad_norm": 0.6731617967193122, "learning_rate": 0.00013889274488802458, "loss": 12.157, "step": 14385 }, { "epoch": 0.7833751948432065, "grad_norm": 0.5768927983386264, "learning_rate": 0.00013888462075434832, "loss": 12.1987, "step": 14386 }, { "epoch": 0.7834296488397895, "grad_norm": 0.5348814636384673, "learning_rate": 0.00013887649631829879, "loss": 12.2369, "step": 14387 }, { "epoch": 0.7834841028363726, "grad_norm": 0.5322337299843027, "learning_rate": 0.00013886837157993904, "loss": 12.1423, "step": 14388 }, { "epoch": 0.7835385568329556, "grad_norm": 0.5413549190692319, "learning_rate": 0.00013886024653933233, "loss": 12.2067, "step": 14389 }, { "epoch": 0.7835930108295386, "grad_norm": 0.5780044416754666, "learning_rate": 0.0001388521211965418, "loss": 12.1955, "step": 14390 }, { "epoch": 0.7836474648261216, "grad_norm": 0.5817049120825359, "learning_rate": 0.0001388439955516307, "loss": 12.2339, "step": 14391 }, { "epoch": 0.7837019188227046, "grad_norm": 0.5970867677148517, "learning_rate": 0.0001388358696046622, "loss": 12.3324, "step": 14392 }, { "epoch": 0.7837563728192876, "grad_norm": 0.6115114126909592, "learning_rate": 0.00013882774335569943, "loss": 12.1704, "step": 14393 }, { "epoch": 0.7838108268158707, "grad_norm": 0.6358443448694101, "learning_rate": 0.00013881961680480562, "loss": 12.2058, "step": 14394 }, { "epoch": 0.7838652808124537, "grad_norm": 0.5083773725240279, "learning_rate": 0.00013881148995204394, "loss": 12.0843, "step": 14395 }, { "epoch": 0.7839197348090367, "grad_norm": 0.5771923630494237, "learning_rate": 0.0001388033627974776, "loss": 12.2925, "step": 14396 }, { "epoch": 0.7839741888056196, "grad_norm": 0.7033857158513953, "learning_rate": 0.00013879523534116988, "loss": 12.3006, "step": 14397 }, { "epoch": 0.7840286428022026, "grad_norm": 0.5933514222197236, "learning_rate": 0.00013878710758318384, "loss": 12.2273, "step": 14398 }, { "epoch": 0.7840830967987856, "grad_norm": 0.5768914323508817, "learning_rate": 0.00013877897952358276, "loss": 12.1288, "step": 14399 }, { "epoch": 0.7841375507953687, "grad_norm": 0.5646886542575501, "learning_rate": 0.00013877085116242982, "loss": 12.2829, "step": 14400 }, { "epoch": 0.7841920047919517, "grad_norm": 0.6477289200019736, "learning_rate": 0.00013876272249978823, "loss": 12.3978, "step": 14401 }, { "epoch": 0.7842464587885347, "grad_norm": 0.5648206891195865, "learning_rate": 0.00013875459353572124, "loss": 12.1633, "step": 14402 }, { "epoch": 0.7843009127851177, "grad_norm": 0.6490668828600686, "learning_rate": 0.00013874646427029203, "loss": 12.2609, "step": 14403 }, { "epoch": 0.7843553667817007, "grad_norm": 0.5890545313527348, "learning_rate": 0.00013873833470356381, "loss": 12.3098, "step": 14404 }, { "epoch": 0.7844098207782837, "grad_norm": 0.6127329938396254, "learning_rate": 0.00013873020483559978, "loss": 12.3745, "step": 14405 }, { "epoch": 0.7844642747748668, "grad_norm": 0.5983140066662879, "learning_rate": 0.00013872207466646323, "loss": 12.2806, "step": 14406 }, { "epoch": 0.7845187287714498, "grad_norm": 0.5606783047599062, "learning_rate": 0.0001387139441962173, "loss": 12.0884, "step": 14407 }, { "epoch": 0.7845731827680328, "grad_norm": 0.6118712385980577, "learning_rate": 0.00013870581342492527, "loss": 12.2015, "step": 14408 }, { "epoch": 0.7846276367646158, "grad_norm": 0.5752553693397917, "learning_rate": 0.00013869768235265034, "loss": 12.262, "step": 14409 }, { "epoch": 0.7846820907611988, "grad_norm": 0.6494019440602582, "learning_rate": 0.00013868955097945572, "loss": 12.2359, "step": 14410 }, { "epoch": 0.7847365447577818, "grad_norm": 0.5540211353726932, "learning_rate": 0.00013868141930540467, "loss": 12.2692, "step": 14411 }, { "epoch": 0.7847909987543649, "grad_norm": 0.5986349560833598, "learning_rate": 0.00013867328733056043, "loss": 12.1355, "step": 14412 }, { "epoch": 0.7848454527509479, "grad_norm": 0.5163497163917473, "learning_rate": 0.00013866515505498619, "loss": 12.2954, "step": 14413 }, { "epoch": 0.7848999067475309, "grad_norm": 0.6433436001488466, "learning_rate": 0.00013865702247874525, "loss": 12.2901, "step": 14414 }, { "epoch": 0.7849543607441138, "grad_norm": 0.581890366088506, "learning_rate": 0.0001386488896019008, "loss": 12.0715, "step": 14415 }, { "epoch": 0.7850088147406968, "grad_norm": 0.6102902133868333, "learning_rate": 0.0001386407564245161, "loss": 12.3666, "step": 14416 }, { "epoch": 0.7850632687372799, "grad_norm": 0.5804923766002399, "learning_rate": 0.00013863262294665444, "loss": 12.3214, "step": 14417 }, { "epoch": 0.7851177227338629, "grad_norm": 0.6068979406349941, "learning_rate": 0.00013862448916837901, "loss": 12.41, "step": 14418 }, { "epoch": 0.7851721767304459, "grad_norm": 0.6058305883785644, "learning_rate": 0.00013861635508975305, "loss": 12.224, "step": 14419 }, { "epoch": 0.7852266307270289, "grad_norm": 0.5933602069707344, "learning_rate": 0.00013860822071083985, "loss": 12.2893, "step": 14420 }, { "epoch": 0.7852810847236119, "grad_norm": 0.5838506041908982, "learning_rate": 0.00013860008603170266, "loss": 12.219, "step": 14421 }, { "epoch": 0.7853355387201949, "grad_norm": 0.565446422535431, "learning_rate": 0.0001385919510524047, "loss": 12.1994, "step": 14422 }, { "epoch": 0.785389992716778, "grad_norm": 0.591349648141618, "learning_rate": 0.00013858381577300927, "loss": 12.3008, "step": 14423 }, { "epoch": 0.785444446713361, "grad_norm": 0.6212443898915186, "learning_rate": 0.00013857568019357962, "loss": 12.3911, "step": 14424 }, { "epoch": 0.785498900709944, "grad_norm": 0.5605166019234723, "learning_rate": 0.00013856754431417905, "loss": 12.297, "step": 14425 }, { "epoch": 0.785553354706527, "grad_norm": 0.5557117201016185, "learning_rate": 0.00013855940813487075, "loss": 12.2484, "step": 14426 }, { "epoch": 0.78560780870311, "grad_norm": 0.5896367418056946, "learning_rate": 0.00013855127165571804, "loss": 12.0708, "step": 14427 }, { "epoch": 0.785662262699693, "grad_norm": 0.5566213369949221, "learning_rate": 0.00013854313487678414, "loss": 12.2472, "step": 14428 }, { "epoch": 0.7857167166962761, "grad_norm": 0.5948199089283874, "learning_rate": 0.0001385349977981324, "loss": 12.3153, "step": 14429 }, { "epoch": 0.7857711706928591, "grad_norm": 0.6235122010792709, "learning_rate": 0.00013852686041982603, "loss": 12.3027, "step": 14430 }, { "epoch": 0.7858256246894421, "grad_norm": 0.5084880832530156, "learning_rate": 0.00013851872274192833, "loss": 12.1476, "step": 14431 }, { "epoch": 0.785880078686025, "grad_norm": 0.5662903553664386, "learning_rate": 0.00013851058476450264, "loss": 12.1704, "step": 14432 }, { "epoch": 0.785934532682608, "grad_norm": 0.5153724619182718, "learning_rate": 0.0001385024464876121, "loss": 12.1991, "step": 14433 }, { "epoch": 0.785988986679191, "grad_norm": 0.608915473834492, "learning_rate": 0.00013849430791132013, "loss": 12.4918, "step": 14434 }, { "epoch": 0.7860434406757741, "grad_norm": 0.5316088716633605, "learning_rate": 0.00013848616903568996, "loss": 12.1875, "step": 14435 }, { "epoch": 0.7860978946723571, "grad_norm": 0.5752852247644937, "learning_rate": 0.00013847802986078486, "loss": 12.2453, "step": 14436 }, { "epoch": 0.7861523486689401, "grad_norm": 0.5561773848748036, "learning_rate": 0.0001384698903866682, "loss": 12.1688, "step": 14437 }, { "epoch": 0.7862068026655231, "grad_norm": 0.5808394601058333, "learning_rate": 0.00013846175061340318, "loss": 12.2476, "step": 14438 }, { "epoch": 0.7862612566621061, "grad_norm": 0.6028946704665702, "learning_rate": 0.00013845361054105312, "loss": 12.2198, "step": 14439 }, { "epoch": 0.7863157106586891, "grad_norm": 0.5933014981190774, "learning_rate": 0.00013844547016968138, "loss": 12.2097, "step": 14440 }, { "epoch": 0.7863701646552722, "grad_norm": 0.5770644222918464, "learning_rate": 0.0001384373294993512, "loss": 12.1455, "step": 14441 }, { "epoch": 0.7864246186518552, "grad_norm": 0.6156425720653473, "learning_rate": 0.00013842918853012592, "loss": 12.2782, "step": 14442 }, { "epoch": 0.7864790726484382, "grad_norm": 0.6109416698177631, "learning_rate": 0.0001384210472620688, "loss": 12.2674, "step": 14443 }, { "epoch": 0.7865335266450212, "grad_norm": 0.6145744345463588, "learning_rate": 0.00013841290569524314, "loss": 12.2003, "step": 14444 }, { "epoch": 0.7865879806416042, "grad_norm": 0.5651575047053573, "learning_rate": 0.00013840476382971233, "loss": 12.2573, "step": 14445 }, { "epoch": 0.7866424346381872, "grad_norm": 0.569776001009847, "learning_rate": 0.00013839662166553965, "loss": 12.2178, "step": 14446 }, { "epoch": 0.7866968886347703, "grad_norm": 0.5834825925351157, "learning_rate": 0.00013838847920278838, "loss": 12.203, "step": 14447 }, { "epoch": 0.7867513426313533, "grad_norm": 0.5820569556884193, "learning_rate": 0.00013838033644152185, "loss": 12.1508, "step": 14448 }, { "epoch": 0.7868057966279363, "grad_norm": 0.549526625619789, "learning_rate": 0.0001383721933818034, "loss": 12.1994, "step": 14449 }, { "epoch": 0.7868602506245193, "grad_norm": 0.6419107847056853, "learning_rate": 0.0001383640500236963, "loss": 12.1019, "step": 14450 }, { "epoch": 0.7869147046211022, "grad_norm": 0.5877121778475324, "learning_rate": 0.00013835590636726396, "loss": 12.2655, "step": 14451 }, { "epoch": 0.7869691586176853, "grad_norm": 0.5575112325526567, "learning_rate": 0.00013834776241256966, "loss": 12.223, "step": 14452 }, { "epoch": 0.7870236126142683, "grad_norm": 0.5705576045447335, "learning_rate": 0.0001383396181596767, "loss": 12.2809, "step": 14453 }, { "epoch": 0.7870780666108513, "grad_norm": 0.5420703228607463, "learning_rate": 0.00013833147360864848, "loss": 12.2297, "step": 14454 }, { "epoch": 0.7871325206074343, "grad_norm": 0.5484969588430767, "learning_rate": 0.00013832332875954824, "loss": 12.2913, "step": 14455 }, { "epoch": 0.7871869746040173, "grad_norm": 0.549863675343978, "learning_rate": 0.00013831518361243937, "loss": 12.2342, "step": 14456 }, { "epoch": 0.7872414286006003, "grad_norm": 0.5468044821046292, "learning_rate": 0.00013830703816738527, "loss": 12.2085, "step": 14457 }, { "epoch": 0.7872958825971834, "grad_norm": 0.6595316857062661, "learning_rate": 0.00013829889242444917, "loss": 12.1952, "step": 14458 }, { "epoch": 0.7873503365937664, "grad_norm": 0.6309748753681473, "learning_rate": 0.0001382907463836945, "loss": 12.1863, "step": 14459 }, { "epoch": 0.7874047905903494, "grad_norm": 0.528524171845402, "learning_rate": 0.0001382826000451845, "loss": 12.1068, "step": 14460 }, { "epoch": 0.7874592445869324, "grad_norm": 0.6553348815463083, "learning_rate": 0.00013827445340898258, "loss": 12.2282, "step": 14461 }, { "epoch": 0.7875136985835154, "grad_norm": 0.5770544173966313, "learning_rate": 0.00013826630647515218, "loss": 12.2327, "step": 14462 }, { "epoch": 0.7875681525800984, "grad_norm": 0.6103420761265705, "learning_rate": 0.0001382581592437565, "loss": 12.3382, "step": 14463 }, { "epoch": 0.7876226065766815, "grad_norm": 0.6456961095391563, "learning_rate": 0.00013825001171485895, "loss": 12.2109, "step": 14464 }, { "epoch": 0.7876770605732645, "grad_norm": 0.6069538803998202, "learning_rate": 0.0001382418638885229, "loss": 12.2003, "step": 14465 }, { "epoch": 0.7877315145698475, "grad_norm": 0.6036311575066308, "learning_rate": 0.00013823371576481174, "loss": 12.2489, "step": 14466 }, { "epoch": 0.7877859685664305, "grad_norm": 0.6012697959525011, "learning_rate": 0.00013822556734378877, "loss": 12.2291, "step": 14467 }, { "epoch": 0.7878404225630135, "grad_norm": 0.5750414494310325, "learning_rate": 0.00013821741862551738, "loss": 12.1316, "step": 14468 }, { "epoch": 0.7878948765595964, "grad_norm": 0.6562236907687204, "learning_rate": 0.00013820926961006092, "loss": 12.1454, "step": 14469 }, { "epoch": 0.7879493305561796, "grad_norm": 0.61036382134781, "learning_rate": 0.0001382011202974828, "loss": 12.3017, "step": 14470 }, { "epoch": 0.7880037845527625, "grad_norm": 0.5865091461942048, "learning_rate": 0.00013819297068784633, "loss": 12.2436, "step": 14471 }, { "epoch": 0.7880582385493455, "grad_norm": 0.6258204578276099, "learning_rate": 0.00013818482078121496, "loss": 12.297, "step": 14472 }, { "epoch": 0.7881126925459285, "grad_norm": 0.5526784225643813, "learning_rate": 0.00013817667057765197, "loss": 12.263, "step": 14473 }, { "epoch": 0.7881671465425115, "grad_norm": 0.6303789008627477, "learning_rate": 0.0001381685200772208, "loss": 12.0504, "step": 14474 }, { "epoch": 0.7882216005390945, "grad_norm": 0.5270705695551884, "learning_rate": 0.00013816036927998484, "loss": 12.0476, "step": 14475 }, { "epoch": 0.7882760545356776, "grad_norm": 0.5661338193897029, "learning_rate": 0.00013815221818600743, "loss": 12.3261, "step": 14476 }, { "epoch": 0.7883305085322606, "grad_norm": 0.4920199077371078, "learning_rate": 0.000138144066795352, "loss": 12.1692, "step": 14477 }, { "epoch": 0.7883849625288436, "grad_norm": 0.5405871902740343, "learning_rate": 0.00013813591510808187, "loss": 12.1211, "step": 14478 }, { "epoch": 0.7884394165254266, "grad_norm": 0.5315923941485983, "learning_rate": 0.00013812776312426047, "loss": 12.0371, "step": 14479 }, { "epoch": 0.7884938705220096, "grad_norm": 0.5127041828760139, "learning_rate": 0.0001381196108439512, "loss": 12.135, "step": 14480 }, { "epoch": 0.7885483245185927, "grad_norm": 0.5459664774774081, "learning_rate": 0.00013811145826721747, "loss": 12.1413, "step": 14481 }, { "epoch": 0.7886027785151757, "grad_norm": 0.6777493278458847, "learning_rate": 0.00013810330539412263, "loss": 12.2048, "step": 14482 }, { "epoch": 0.7886572325117587, "grad_norm": 0.567214388297729, "learning_rate": 0.0001380951522247301, "loss": 12.194, "step": 14483 }, { "epoch": 0.7887116865083417, "grad_norm": 0.5948900536613386, "learning_rate": 0.00013808699875910327, "loss": 12.1971, "step": 14484 }, { "epoch": 0.7887661405049247, "grad_norm": 0.552835977401995, "learning_rate": 0.00013807884499730555, "loss": 12.2779, "step": 14485 }, { "epoch": 0.7888205945015077, "grad_norm": 0.6584396299441723, "learning_rate": 0.00013807069093940035, "loss": 12.3761, "step": 14486 }, { "epoch": 0.7888750484980908, "grad_norm": 0.5307595145590913, "learning_rate": 0.0001380625365854511, "loss": 12.2804, "step": 14487 }, { "epoch": 0.7889295024946738, "grad_norm": 0.5491257047139692, "learning_rate": 0.00013805438193552114, "loss": 12.2452, "step": 14488 }, { "epoch": 0.7889839564912567, "grad_norm": 0.5955458673578021, "learning_rate": 0.00013804622698967392, "loss": 12.1676, "step": 14489 }, { "epoch": 0.7890384104878397, "grad_norm": 0.5192351249345226, "learning_rate": 0.0001380380717479729, "loss": 12.3022, "step": 14490 }, { "epoch": 0.7890928644844227, "grad_norm": 0.4887155507006782, "learning_rate": 0.00013802991621048141, "loss": 12.192, "step": 14491 }, { "epoch": 0.7891473184810057, "grad_norm": 0.5547709732773544, "learning_rate": 0.00013802176037726297, "loss": 12.2312, "step": 14492 }, { "epoch": 0.7892017724775888, "grad_norm": 0.556762358309864, "learning_rate": 0.00013801360424838089, "loss": 12.2581, "step": 14493 }, { "epoch": 0.7892562264741718, "grad_norm": 0.5308349164149758, "learning_rate": 0.00013800544782389867, "loss": 12.2044, "step": 14494 }, { "epoch": 0.7893106804707548, "grad_norm": 0.5452192484856463, "learning_rate": 0.00013799729110387972, "loss": 12.2182, "step": 14495 }, { "epoch": 0.7893651344673378, "grad_norm": 0.5342171146676084, "learning_rate": 0.00013798913408838746, "loss": 12.1737, "step": 14496 }, { "epoch": 0.7894195884639208, "grad_norm": 0.5880953024934228, "learning_rate": 0.00013798097677748534, "loss": 12.1635, "step": 14497 }, { "epoch": 0.7894740424605038, "grad_norm": 0.5501073488750367, "learning_rate": 0.00013797281917123674, "loss": 12.1803, "step": 14498 }, { "epoch": 0.7895284964570869, "grad_norm": 0.5466462771813189, "learning_rate": 0.00013796466126970514, "loss": 12.282, "step": 14499 }, { "epoch": 0.7895829504536699, "grad_norm": 0.555337501442166, "learning_rate": 0.00013795650307295396, "loss": 12.2572, "step": 14500 }, { "epoch": 0.7896374044502529, "grad_norm": 0.5644067977646973, "learning_rate": 0.00013794834458104665, "loss": 12.2426, "step": 14501 }, { "epoch": 0.7896918584468359, "grad_norm": 0.6290705505458757, "learning_rate": 0.00013794018579404668, "loss": 12.3258, "step": 14502 }, { "epoch": 0.7897463124434189, "grad_norm": 0.5530317475999645, "learning_rate": 0.00013793202671201745, "loss": 12.2766, "step": 14503 }, { "epoch": 0.7898007664400019, "grad_norm": 0.5526295277722347, "learning_rate": 0.0001379238673350224, "loss": 12.3009, "step": 14504 }, { "epoch": 0.789855220436585, "grad_norm": 0.5924239444057052, "learning_rate": 0.000137915707663125, "loss": 12.1161, "step": 14505 }, { "epoch": 0.789909674433168, "grad_norm": 0.59420567157587, "learning_rate": 0.0001379075476963887, "loss": 12.1919, "step": 14506 }, { "epoch": 0.789964128429751, "grad_norm": 0.6298810966617565, "learning_rate": 0.00013789938743487696, "loss": 12.2766, "step": 14507 }, { "epoch": 0.7900185824263339, "grad_norm": 0.5476263842566301, "learning_rate": 0.00013789122687865325, "loss": 12.2857, "step": 14508 }, { "epoch": 0.7900730364229169, "grad_norm": 0.555176190503391, "learning_rate": 0.00013788306602778097, "loss": 12.132, "step": 14509 }, { "epoch": 0.7901274904194999, "grad_norm": 0.6204025876595295, "learning_rate": 0.00013787490488232363, "loss": 12.2649, "step": 14510 }, { "epoch": 0.790181944416083, "grad_norm": 0.5461093843848734, "learning_rate": 0.00013786674344234466, "loss": 12.1079, "step": 14511 }, { "epoch": 0.790236398412666, "grad_norm": 0.5836535420064173, "learning_rate": 0.00013785858170790754, "loss": 11.9077, "step": 14512 }, { "epoch": 0.790290852409249, "grad_norm": 0.5692528585134783, "learning_rate": 0.0001378504196790758, "loss": 12.3145, "step": 14513 }, { "epoch": 0.790345306405832, "grad_norm": 0.526143377711485, "learning_rate": 0.00013784225735591278, "loss": 12.1412, "step": 14514 }, { "epoch": 0.790399760402415, "grad_norm": 0.6780130437929691, "learning_rate": 0.00013783409473848207, "loss": 12.1476, "step": 14515 }, { "epoch": 0.7904542143989981, "grad_norm": 0.5763331530144483, "learning_rate": 0.00013782593182684705, "loss": 12.2956, "step": 14516 }, { "epoch": 0.7905086683955811, "grad_norm": 0.5775096015189632, "learning_rate": 0.00013781776862107126, "loss": 12.259, "step": 14517 }, { "epoch": 0.7905631223921641, "grad_norm": 0.5617552123384292, "learning_rate": 0.00013780960512121818, "loss": 12.0761, "step": 14518 }, { "epoch": 0.7906175763887471, "grad_norm": 0.715470699824914, "learning_rate": 0.00013780144132735124, "loss": 12.267, "step": 14519 }, { "epoch": 0.7906720303853301, "grad_norm": 0.6163374199941223, "learning_rate": 0.00013779327723953397, "loss": 12.1038, "step": 14520 }, { "epoch": 0.7907264843819131, "grad_norm": 0.5935665540501485, "learning_rate": 0.0001377851128578298, "loss": 12.2294, "step": 14521 }, { "epoch": 0.7907809383784962, "grad_norm": 0.5838932995567909, "learning_rate": 0.0001377769481823023, "loss": 12.312, "step": 14522 }, { "epoch": 0.7908353923750792, "grad_norm": 0.6095538760565489, "learning_rate": 0.00013776878321301492, "loss": 12.1626, "step": 14523 }, { "epoch": 0.7908898463716622, "grad_norm": 0.5922649772685893, "learning_rate": 0.00013776061795003113, "loss": 12.3035, "step": 14524 }, { "epoch": 0.7909443003682451, "grad_norm": 0.5515954714545865, "learning_rate": 0.00013775245239341444, "loss": 12.315, "step": 14525 }, { "epoch": 0.7909987543648281, "grad_norm": 0.6539394474403301, "learning_rate": 0.00013774428654322836, "loss": 12.242, "step": 14526 }, { "epoch": 0.7910532083614111, "grad_norm": 0.5634951214555385, "learning_rate": 0.00013773612039953635, "loss": 12.26, "step": 14527 }, { "epoch": 0.7911076623579942, "grad_norm": 0.6319106731830428, "learning_rate": 0.000137727953962402, "loss": 12.3335, "step": 14528 }, { "epoch": 0.7911621163545772, "grad_norm": 0.5989947043730212, "learning_rate": 0.00013771978723188869, "loss": 12.0249, "step": 14529 }, { "epoch": 0.7912165703511602, "grad_norm": 0.658527852097458, "learning_rate": 0.00013771162020806004, "loss": 12.1369, "step": 14530 }, { "epoch": 0.7912710243477432, "grad_norm": 0.6143727627790688, "learning_rate": 0.00013770345289097945, "loss": 12.2429, "step": 14531 }, { "epoch": 0.7913254783443262, "grad_norm": 0.7332441044745701, "learning_rate": 0.00013769528528071053, "loss": 12.2045, "step": 14532 }, { "epoch": 0.7913799323409092, "grad_norm": 0.7027518226233722, "learning_rate": 0.00013768711737731674, "loss": 12.3577, "step": 14533 }, { "epoch": 0.7914343863374923, "grad_norm": 0.6661778673562905, "learning_rate": 0.0001376789491808616, "loss": 12.2812, "step": 14534 }, { "epoch": 0.7914888403340753, "grad_norm": 0.7305109231819773, "learning_rate": 0.00013767078069140863, "loss": 12.3905, "step": 14535 }, { "epoch": 0.7915432943306583, "grad_norm": 0.678911449401082, "learning_rate": 0.00013766261190902138, "loss": 12.2408, "step": 14536 }, { "epoch": 0.7915977483272413, "grad_norm": 0.6530486210619075, "learning_rate": 0.0001376544428337633, "loss": 12.1734, "step": 14537 }, { "epoch": 0.7916522023238243, "grad_norm": 0.5827242495444377, "learning_rate": 0.00013764627346569798, "loss": 12.2727, "step": 14538 }, { "epoch": 0.7917066563204073, "grad_norm": 0.542965175998758, "learning_rate": 0.00013763810380488893, "loss": 12.2165, "step": 14539 }, { "epoch": 0.7917611103169904, "grad_norm": 0.6735407450418386, "learning_rate": 0.00013762993385139967, "loss": 12.2434, "step": 14540 }, { "epoch": 0.7918155643135734, "grad_norm": 0.5408198038923513, "learning_rate": 0.0001376217636052937, "loss": 12.2286, "step": 14541 }, { "epoch": 0.7918700183101564, "grad_norm": 0.6335727041039692, "learning_rate": 0.00013761359306663465, "loss": 12.3902, "step": 14542 }, { "epoch": 0.7919244723067393, "grad_norm": 0.7766853164870627, "learning_rate": 0.00013760542223548595, "loss": 12.3514, "step": 14543 }, { "epoch": 0.7919789263033223, "grad_norm": 0.5963601706181519, "learning_rate": 0.00013759725111191118, "loss": 12.1699, "step": 14544 }, { "epoch": 0.7920333802999053, "grad_norm": 0.641213799629075, "learning_rate": 0.0001375890796959739, "loss": 12.32, "step": 14545 }, { "epoch": 0.7920878342964884, "grad_norm": 0.6972634743512368, "learning_rate": 0.00013758090798773762, "loss": 12.3032, "step": 14546 }, { "epoch": 0.7921422882930714, "grad_norm": 0.6017069234143751, "learning_rate": 0.00013757273598726587, "loss": 12.3563, "step": 14547 }, { "epoch": 0.7921967422896544, "grad_norm": 0.6360018090555419, "learning_rate": 0.00013756456369462227, "loss": 12.2611, "step": 14548 }, { "epoch": 0.7922511962862374, "grad_norm": 0.6503478151736607, "learning_rate": 0.00013755639110987032, "loss": 12.3143, "step": 14549 }, { "epoch": 0.7923056502828204, "grad_norm": 0.5543365015342491, "learning_rate": 0.00013754821823307354, "loss": 12.3949, "step": 14550 }, { "epoch": 0.7923601042794035, "grad_norm": 0.5973243221748423, "learning_rate": 0.00013754004506429554, "loss": 12.3585, "step": 14551 }, { "epoch": 0.7924145582759865, "grad_norm": 0.7117129070923307, "learning_rate": 0.00013753187160359985, "loss": 12.2683, "step": 14552 }, { "epoch": 0.7924690122725695, "grad_norm": 0.5653042724525108, "learning_rate": 0.00013752369785105005, "loss": 12.2011, "step": 14553 }, { "epoch": 0.7925234662691525, "grad_norm": 0.6353979891028753, "learning_rate": 0.00013751552380670968, "loss": 12.2549, "step": 14554 }, { "epoch": 0.7925779202657355, "grad_norm": 0.5861169980674861, "learning_rate": 0.00013750734947064227, "loss": 12.2593, "step": 14555 }, { "epoch": 0.7926323742623185, "grad_norm": 0.6516937965590943, "learning_rate": 0.00013749917484291144, "loss": 12.2449, "step": 14556 }, { "epoch": 0.7926868282589016, "grad_norm": 0.6707502137605735, "learning_rate": 0.00013749099992358077, "loss": 12.2887, "step": 14557 }, { "epoch": 0.7927412822554846, "grad_norm": 0.5770933413979192, "learning_rate": 0.00013748282471271375, "loss": 12.2399, "step": 14558 }, { "epoch": 0.7927957362520676, "grad_norm": 0.6133565514888829, "learning_rate": 0.000137474649210374, "loss": 12.1271, "step": 14559 }, { "epoch": 0.7928501902486506, "grad_norm": 0.565282292880067, "learning_rate": 0.00013746647341662512, "loss": 12.0827, "step": 14560 }, { "epoch": 0.7929046442452335, "grad_norm": 0.630819282440226, "learning_rate": 0.00013745829733153065, "loss": 12.2273, "step": 14561 }, { "epoch": 0.7929590982418165, "grad_norm": 0.5287941385974477, "learning_rate": 0.00013745012095515418, "loss": 12.3124, "step": 14562 }, { "epoch": 0.7930135522383996, "grad_norm": 0.6013911595860558, "learning_rate": 0.00013744194428755933, "loss": 12.1449, "step": 14563 }, { "epoch": 0.7930680062349826, "grad_norm": 0.5908970621387647, "learning_rate": 0.0001374337673288096, "loss": 11.9763, "step": 14564 }, { "epoch": 0.7931224602315656, "grad_norm": 0.5678961642577054, "learning_rate": 0.0001374255900789686, "loss": 12.1764, "step": 14565 }, { "epoch": 0.7931769142281486, "grad_norm": 0.5223695709824954, "learning_rate": 0.00013741741253809996, "loss": 12.0156, "step": 14566 }, { "epoch": 0.7932313682247316, "grad_norm": 0.564503233628171, "learning_rate": 0.0001374092347062672, "loss": 12.188, "step": 14567 }, { "epoch": 0.7932858222213146, "grad_norm": 0.5416228729888778, "learning_rate": 0.00013740105658353405, "loss": 12.1661, "step": 14568 }, { "epoch": 0.7933402762178977, "grad_norm": 0.6134468990697433, "learning_rate": 0.00013739287816996395, "loss": 12.3041, "step": 14569 }, { "epoch": 0.7933947302144807, "grad_norm": 0.5865723569145507, "learning_rate": 0.00013738469946562054, "loss": 12.1678, "step": 14570 }, { "epoch": 0.7934491842110637, "grad_norm": 0.5917678061680537, "learning_rate": 0.00013737652047056745, "loss": 12.1629, "step": 14571 }, { "epoch": 0.7935036382076467, "grad_norm": 0.6067411146767691, "learning_rate": 0.0001373683411848683, "loss": 12.2797, "step": 14572 }, { "epoch": 0.7935580922042297, "grad_norm": 0.5688816523572685, "learning_rate": 0.00013736016160858667, "loss": 12.1704, "step": 14573 }, { "epoch": 0.7936125462008127, "grad_norm": 0.5348887968152709, "learning_rate": 0.00013735198174178614, "loss": 12.1637, "step": 14574 }, { "epoch": 0.7936670001973958, "grad_norm": 0.5976971969257272, "learning_rate": 0.0001373438015845303, "loss": 12.2787, "step": 14575 }, { "epoch": 0.7937214541939788, "grad_norm": 0.6372535098151176, "learning_rate": 0.00013733562113688283, "loss": 12.2731, "step": 14576 }, { "epoch": 0.7937759081905618, "grad_norm": 0.5856623029227053, "learning_rate": 0.0001373274403989073, "loss": 12.1469, "step": 14577 }, { "epoch": 0.7938303621871448, "grad_norm": 0.5340829343077999, "learning_rate": 0.00013731925937066736, "loss": 12.2518, "step": 14578 }, { "epoch": 0.7938848161837277, "grad_norm": 0.6135248305708401, "learning_rate": 0.0001373110780522266, "loss": 12.2766, "step": 14579 }, { "epoch": 0.7939392701803107, "grad_norm": 0.5980173322439528, "learning_rate": 0.0001373028964436486, "loss": 12.1919, "step": 14580 }, { "epoch": 0.7939937241768938, "grad_norm": 0.6206658138611297, "learning_rate": 0.00013729471454499703, "loss": 12.1507, "step": 14581 }, { "epoch": 0.7940481781734768, "grad_norm": 0.5367388619002081, "learning_rate": 0.0001372865323563355, "loss": 12.2038, "step": 14582 }, { "epoch": 0.7941026321700598, "grad_norm": 0.5240897993979492, "learning_rate": 0.00013727834987772768, "loss": 12.2194, "step": 14583 }, { "epoch": 0.7941570861666428, "grad_norm": 0.647420197970378, "learning_rate": 0.0001372701671092371, "loss": 12.2785, "step": 14584 }, { "epoch": 0.7942115401632258, "grad_norm": 0.5684613022836379, "learning_rate": 0.0001372619840509275, "loss": 12.2735, "step": 14585 }, { "epoch": 0.7942659941598089, "grad_norm": 0.5945683442190239, "learning_rate": 0.00013725380070286246, "loss": 12.2594, "step": 14586 }, { "epoch": 0.7943204481563919, "grad_norm": 0.6259752328008525, "learning_rate": 0.0001372456170651056, "loss": 12.1878, "step": 14587 }, { "epoch": 0.7943749021529749, "grad_norm": 0.546480691542995, "learning_rate": 0.00013723743313772058, "loss": 12.2561, "step": 14588 }, { "epoch": 0.7944293561495579, "grad_norm": 0.5150143310049871, "learning_rate": 0.00013722924892077106, "loss": 12.1557, "step": 14589 }, { "epoch": 0.7944838101461409, "grad_norm": 0.561310388062198, "learning_rate": 0.00013722106441432064, "loss": 12.1778, "step": 14590 }, { "epoch": 0.7945382641427239, "grad_norm": 0.606486418543721, "learning_rate": 0.00013721287961843297, "loss": 12.2936, "step": 14591 }, { "epoch": 0.794592718139307, "grad_norm": 0.5837914612089911, "learning_rate": 0.00013720469453317173, "loss": 12.3163, "step": 14592 }, { "epoch": 0.79464717213589, "grad_norm": 0.6707863662057902, "learning_rate": 0.00013719650915860053, "loss": 12.1247, "step": 14593 }, { "epoch": 0.794701626132473, "grad_norm": 0.5614619023691085, "learning_rate": 0.00013718832349478305, "loss": 12.3049, "step": 14594 }, { "epoch": 0.794756080129056, "grad_norm": 0.5097509032435517, "learning_rate": 0.0001371801375417829, "loss": 12.164, "step": 14595 }, { "epoch": 0.794810534125639, "grad_norm": 0.5976077493113269, "learning_rate": 0.00013717195129966378, "loss": 12.2807, "step": 14596 }, { "epoch": 0.794864988122222, "grad_norm": 0.5392292790465633, "learning_rate": 0.00013716376476848933, "loss": 12.2907, "step": 14597 }, { "epoch": 0.794919442118805, "grad_norm": 0.5623035202767939, "learning_rate": 0.00013715557794832323, "loss": 12.2697, "step": 14598 }, { "epoch": 0.794973896115388, "grad_norm": 0.5536073700263109, "learning_rate": 0.00013714739083922912, "loss": 12.1871, "step": 14599 }, { "epoch": 0.795028350111971, "grad_norm": 0.5485839640207039, "learning_rate": 0.00013713920344127068, "loss": 12.2897, "step": 14600 }, { "epoch": 0.795082804108554, "grad_norm": 0.5211499804336917, "learning_rate": 0.00013713101575451152, "loss": 12.1034, "step": 14601 }, { "epoch": 0.795137258105137, "grad_norm": 0.5511440782227762, "learning_rate": 0.00013712282777901538, "loss": 12.2477, "step": 14602 }, { "epoch": 0.79519171210172, "grad_norm": 0.5648455952058448, "learning_rate": 0.00013711463951484592, "loss": 12.1965, "step": 14603 }, { "epoch": 0.7952461660983031, "grad_norm": 0.6047438965263254, "learning_rate": 0.0001371064509620668, "loss": 12.2426, "step": 14604 }, { "epoch": 0.7953006200948861, "grad_norm": 0.5210930126052459, "learning_rate": 0.0001370982621207417, "loss": 12.2967, "step": 14605 }, { "epoch": 0.7953550740914691, "grad_norm": 0.6753107385324894, "learning_rate": 0.00013709007299093426, "loss": 12.233, "step": 14606 }, { "epoch": 0.7954095280880521, "grad_norm": 0.5903172203022329, "learning_rate": 0.0001370818835727082, "loss": 12.1532, "step": 14607 }, { "epoch": 0.7954639820846351, "grad_norm": 0.5400132903393716, "learning_rate": 0.00013707369386612721, "loss": 12.1621, "step": 14608 }, { "epoch": 0.7955184360812181, "grad_norm": 0.5691708873764322, "learning_rate": 0.00013706550387125493, "loss": 12.2356, "step": 14609 }, { "epoch": 0.7955728900778012, "grad_norm": 0.545545193497752, "learning_rate": 0.0001370573135881551, "loss": 12.2555, "step": 14610 }, { "epoch": 0.7956273440743842, "grad_norm": 0.5469260876252976, "learning_rate": 0.0001370491230168914, "loss": 12.1018, "step": 14611 }, { "epoch": 0.7956817980709672, "grad_norm": 0.571494057494282, "learning_rate": 0.0001370409321575275, "loss": 12.1387, "step": 14612 }, { "epoch": 0.7957362520675502, "grad_norm": 0.5837025395276881, "learning_rate": 0.00013703274101012708, "loss": 12.2117, "step": 14613 }, { "epoch": 0.7957907060641332, "grad_norm": 0.6528327502031022, "learning_rate": 0.00013702454957475386, "loss": 12.2818, "step": 14614 }, { "epoch": 0.7958451600607163, "grad_norm": 0.6390175326589221, "learning_rate": 0.00013701635785147152, "loss": 12.097, "step": 14615 }, { "epoch": 0.7958996140572993, "grad_norm": 0.6133716093343833, "learning_rate": 0.0001370081658403438, "loss": 12.2751, "step": 14616 }, { "epoch": 0.7959540680538822, "grad_norm": 0.5489687858980747, "learning_rate": 0.00013699997354143438, "loss": 12.2439, "step": 14617 }, { "epoch": 0.7960085220504652, "grad_norm": 0.6197973559483165, "learning_rate": 0.00013699178095480697, "loss": 12.2906, "step": 14618 }, { "epoch": 0.7960629760470482, "grad_norm": 0.7095353639430027, "learning_rate": 0.0001369835880805252, "loss": 12.1462, "step": 14619 }, { "epoch": 0.7961174300436312, "grad_norm": 0.5870978258985311, "learning_rate": 0.0001369753949186529, "loss": 12.1074, "step": 14620 }, { "epoch": 0.7961718840402143, "grad_norm": 0.6594864495359288, "learning_rate": 0.00013696720146925373, "loss": 12.368, "step": 14621 }, { "epoch": 0.7962263380367973, "grad_norm": 0.6184783874708606, "learning_rate": 0.0001369590077323914, "loss": 12.3256, "step": 14622 }, { "epoch": 0.7962807920333803, "grad_norm": 0.5951694789565457, "learning_rate": 0.00013695081370812963, "loss": 12.1502, "step": 14623 }, { "epoch": 0.7963352460299633, "grad_norm": 0.607457031537557, "learning_rate": 0.00013694261939653214, "loss": 12.1754, "step": 14624 }, { "epoch": 0.7963897000265463, "grad_norm": 0.5191105385985078, "learning_rate": 0.0001369344247976626, "loss": 12.1142, "step": 14625 }, { "epoch": 0.7964441540231293, "grad_norm": 0.5516297677297756, "learning_rate": 0.0001369262299115848, "loss": 12.2765, "step": 14626 }, { "epoch": 0.7964986080197124, "grad_norm": 0.5515463986744881, "learning_rate": 0.00013691803473836247, "loss": 12.2683, "step": 14627 }, { "epoch": 0.7965530620162954, "grad_norm": 0.5851600829755839, "learning_rate": 0.00013690983927805932, "loss": 12.2109, "step": 14628 }, { "epoch": 0.7966075160128784, "grad_norm": 0.6260586049833458, "learning_rate": 0.00013690164353073904, "loss": 12.29, "step": 14629 }, { "epoch": 0.7966619700094614, "grad_norm": 0.5304233546419216, "learning_rate": 0.0001368934474964654, "loss": 12.2011, "step": 14630 }, { "epoch": 0.7967164240060444, "grad_norm": 0.6180773390482837, "learning_rate": 0.0001368852511753021, "loss": 12.2119, "step": 14631 }, { "epoch": 0.7967708780026274, "grad_norm": 0.5630688489731834, "learning_rate": 0.00013687705456731295, "loss": 12.2902, "step": 14632 }, { "epoch": 0.7968253319992105, "grad_norm": 0.6090926735929871, "learning_rate": 0.00013686885767256164, "loss": 12.1717, "step": 14633 }, { "epoch": 0.7968797859957935, "grad_norm": 0.5872552028357287, "learning_rate": 0.00013686066049111185, "loss": 12.269, "step": 14634 }, { "epoch": 0.7969342399923764, "grad_norm": 0.5742787047807977, "learning_rate": 0.00013685246302302744, "loss": 12.2413, "step": 14635 }, { "epoch": 0.7969886939889594, "grad_norm": 0.6247212389621019, "learning_rate": 0.00013684426526837205, "loss": 12.2067, "step": 14636 }, { "epoch": 0.7970431479855424, "grad_norm": 0.5868775930778958, "learning_rate": 0.0001368360672272095, "loss": 12.2478, "step": 14637 }, { "epoch": 0.7970976019821254, "grad_norm": 0.5846150678878385, "learning_rate": 0.00013682786889960354, "loss": 12.2411, "step": 14638 }, { "epoch": 0.7971520559787085, "grad_norm": 0.5843841367905177, "learning_rate": 0.00013681967028561785, "loss": 12.2898, "step": 14639 }, { "epoch": 0.7972065099752915, "grad_norm": 0.5868475342754406, "learning_rate": 0.00013681147138531625, "loss": 12.3007, "step": 14640 }, { "epoch": 0.7972609639718745, "grad_norm": 0.583273108266093, "learning_rate": 0.00013680327219876248, "loss": 12.315, "step": 14641 }, { "epoch": 0.7973154179684575, "grad_norm": 0.600712103579576, "learning_rate": 0.00013679507272602027, "loss": 12.2416, "step": 14642 }, { "epoch": 0.7973698719650405, "grad_norm": 0.5903813153092471, "learning_rate": 0.0001367868729671534, "loss": 12.1484, "step": 14643 }, { "epoch": 0.7974243259616235, "grad_norm": 0.6057061535598088, "learning_rate": 0.00013677867292222567, "loss": 12.2062, "step": 14644 }, { "epoch": 0.7974787799582066, "grad_norm": 0.5382834472760614, "learning_rate": 0.00013677047259130082, "loss": 12.1268, "step": 14645 }, { "epoch": 0.7975332339547896, "grad_norm": 0.6063616100416661, "learning_rate": 0.00013676227197444258, "loss": 12.2374, "step": 14646 }, { "epoch": 0.7975876879513726, "grad_norm": 0.5621407926219465, "learning_rate": 0.00013675407107171473, "loss": 12.2684, "step": 14647 }, { "epoch": 0.7976421419479556, "grad_norm": 0.5651809829118901, "learning_rate": 0.00013674586988318108, "loss": 12.3008, "step": 14648 }, { "epoch": 0.7976965959445386, "grad_norm": 0.7892357277511804, "learning_rate": 0.0001367376684089054, "loss": 12.313, "step": 14649 }, { "epoch": 0.7977510499411217, "grad_norm": 0.6506794636249186, "learning_rate": 0.00013672946664895145, "loss": 12.4271, "step": 14650 }, { "epoch": 0.7978055039377047, "grad_norm": 0.5041891292585133, "learning_rate": 0.00013672126460338298, "loss": 12.2156, "step": 14651 }, { "epoch": 0.7978599579342877, "grad_norm": 0.7401483260152929, "learning_rate": 0.00013671306227226385, "loss": 12.2375, "step": 14652 }, { "epoch": 0.7979144119308706, "grad_norm": 0.6222369533899695, "learning_rate": 0.00013670485965565772, "loss": 12.276, "step": 14653 }, { "epoch": 0.7979688659274536, "grad_norm": 0.6692280187649381, "learning_rate": 0.00013669665675362848, "loss": 12.3599, "step": 14654 }, { "epoch": 0.7980233199240366, "grad_norm": 0.5884085203012612, "learning_rate": 0.0001366884535662399, "loss": 12.4401, "step": 14655 }, { "epoch": 0.7980777739206197, "grad_norm": 0.5589894629310427, "learning_rate": 0.00013668025009355573, "loss": 12.1091, "step": 14656 }, { "epoch": 0.7981322279172027, "grad_norm": 0.5326014670977347, "learning_rate": 0.0001366720463356398, "loss": 12.1856, "step": 14657 }, { "epoch": 0.7981866819137857, "grad_norm": 0.6594920539581905, "learning_rate": 0.00013666384229255585, "loss": 12.336, "step": 14658 }, { "epoch": 0.7982411359103687, "grad_norm": 0.5766616140343678, "learning_rate": 0.00013665563796436776, "loss": 12.111, "step": 14659 }, { "epoch": 0.7982955899069517, "grad_norm": 0.6231565003777191, "learning_rate": 0.00013664743335113926, "loss": 12.3686, "step": 14660 }, { "epoch": 0.7983500439035347, "grad_norm": 0.6206882350344767, "learning_rate": 0.00013663922845293417, "loss": 12.1138, "step": 14661 }, { "epoch": 0.7984044979001178, "grad_norm": 0.5864199324503923, "learning_rate": 0.00013663102326981632, "loss": 12.2635, "step": 14662 }, { "epoch": 0.7984589518967008, "grad_norm": 0.5555180848908318, "learning_rate": 0.00013662281780184947, "loss": 12.1993, "step": 14663 }, { "epoch": 0.7985134058932838, "grad_norm": 0.5393908024567289, "learning_rate": 0.00013661461204909746, "loss": 12.2018, "step": 14664 }, { "epoch": 0.7985678598898668, "grad_norm": 0.5869442261183992, "learning_rate": 0.00013660640601162406, "loss": 12.3016, "step": 14665 }, { "epoch": 0.7986223138864498, "grad_norm": 0.5705150655125303, "learning_rate": 0.0001365981996894931, "loss": 12.1733, "step": 14666 }, { "epoch": 0.7986767678830328, "grad_norm": 0.5564124217432502, "learning_rate": 0.00013658999308276845, "loss": 12.2509, "step": 14667 }, { "epoch": 0.7987312218796159, "grad_norm": 0.543176948218349, "learning_rate": 0.00013658178619151384, "loss": 12.1485, "step": 14668 }, { "epoch": 0.7987856758761989, "grad_norm": 0.5580168309808128, "learning_rate": 0.00013657357901579315, "loss": 12.1323, "step": 14669 }, { "epoch": 0.7988401298727819, "grad_norm": 0.5844524466135493, "learning_rate": 0.00013656537155567016, "loss": 12.1079, "step": 14670 }, { "epoch": 0.7988945838693648, "grad_norm": 0.5497642901168213, "learning_rate": 0.0001365571638112087, "loss": 12.2781, "step": 14671 }, { "epoch": 0.7989490378659478, "grad_norm": 0.5574194895445754, "learning_rate": 0.00013654895578247262, "loss": 12.2041, "step": 14672 }, { "epoch": 0.7990034918625308, "grad_norm": 0.5767024085857305, "learning_rate": 0.00013654074746952572, "loss": 12.2425, "step": 14673 }, { "epoch": 0.7990579458591139, "grad_norm": 0.6576026837736271, "learning_rate": 0.00013653253887243184, "loss": 12.3398, "step": 14674 }, { "epoch": 0.7991123998556969, "grad_norm": 0.6085438305694729, "learning_rate": 0.00013652432999125484, "loss": 12.229, "step": 14675 }, { "epoch": 0.7991668538522799, "grad_norm": 0.5624586386412984, "learning_rate": 0.0001365161208260585, "loss": 12.0903, "step": 14676 }, { "epoch": 0.7992213078488629, "grad_norm": 0.5549898217069293, "learning_rate": 0.00013650791137690668, "loss": 12.1677, "step": 14677 }, { "epoch": 0.7992757618454459, "grad_norm": 0.5676483063775526, "learning_rate": 0.00013649970164386323, "loss": 12.2304, "step": 14678 }, { "epoch": 0.7993302158420289, "grad_norm": 0.5308701782521362, "learning_rate": 0.00013649149162699197, "loss": 12.261, "step": 14679 }, { "epoch": 0.799384669838612, "grad_norm": 0.6357462521824179, "learning_rate": 0.00013648328132635676, "loss": 12.3132, "step": 14680 }, { "epoch": 0.799439123835195, "grad_norm": 0.5580459839955783, "learning_rate": 0.00013647507074202142, "loss": 12.2095, "step": 14681 }, { "epoch": 0.799493577831778, "grad_norm": 0.5323776944845773, "learning_rate": 0.0001364668598740498, "loss": 12.259, "step": 14682 }, { "epoch": 0.799548031828361, "grad_norm": 0.5452627919182734, "learning_rate": 0.0001364586487225058, "loss": 12.1575, "step": 14683 }, { "epoch": 0.799602485824944, "grad_norm": 0.6235269652424776, "learning_rate": 0.00013645043728745325, "loss": 12.2087, "step": 14684 }, { "epoch": 0.7996569398215271, "grad_norm": 0.6513695487707094, "learning_rate": 0.00013644222556895592, "loss": 12.3251, "step": 14685 }, { "epoch": 0.7997113938181101, "grad_norm": 0.541478799476608, "learning_rate": 0.00013643401356707777, "loss": 12.2063, "step": 14686 }, { "epoch": 0.7997658478146931, "grad_norm": 0.6221379479706041, "learning_rate": 0.00013642580128188264, "loss": 12.2197, "step": 14687 }, { "epoch": 0.7998203018112761, "grad_norm": 0.5977131685243543, "learning_rate": 0.00013641758871343432, "loss": 12.2846, "step": 14688 }, { "epoch": 0.799874755807859, "grad_norm": 0.5986729314260992, "learning_rate": 0.00013640937586179678, "loss": 12.2008, "step": 14689 }, { "epoch": 0.799929209804442, "grad_norm": 0.6037694902876016, "learning_rate": 0.0001364011627270338, "loss": 12.3244, "step": 14690 }, { "epoch": 0.7999836638010251, "grad_norm": 0.5716463867704794, "learning_rate": 0.00013639294930920925, "loss": 12.2065, "step": 14691 }, { "epoch": 0.8000381177976081, "grad_norm": 0.6101374572745445, "learning_rate": 0.00013638473560838706, "loss": 12.2074, "step": 14692 }, { "epoch": 0.8000925717941911, "grad_norm": 0.5804774150756093, "learning_rate": 0.00013637652162463103, "loss": 12.1971, "step": 14693 }, { "epoch": 0.8001470257907741, "grad_norm": 0.5712723197657905, "learning_rate": 0.0001363683073580051, "loss": 12.1784, "step": 14694 }, { "epoch": 0.8002014797873571, "grad_norm": 0.5736267925533555, "learning_rate": 0.0001363600928085731, "loss": 12.1645, "step": 14695 }, { "epoch": 0.8002559337839401, "grad_norm": 0.5704183696042504, "learning_rate": 0.0001363518779763989, "loss": 12.2962, "step": 14696 }, { "epoch": 0.8003103877805232, "grad_norm": 0.5422945074682746, "learning_rate": 0.00013634366286154642, "loss": 12.2148, "step": 14697 }, { "epoch": 0.8003648417771062, "grad_norm": 0.538070683148117, "learning_rate": 0.00013633544746407953, "loss": 12.2103, "step": 14698 }, { "epoch": 0.8004192957736892, "grad_norm": 0.5463898898573379, "learning_rate": 0.00013632723178406213, "loss": 12.1015, "step": 14699 }, { "epoch": 0.8004737497702722, "grad_norm": 0.6780580468037041, "learning_rate": 0.00013631901582155807, "loss": 12.2762, "step": 14700 }, { "epoch": 0.8005282037668552, "grad_norm": 0.5277520574048612, "learning_rate": 0.00013631079957663122, "loss": 12.2817, "step": 14701 }, { "epoch": 0.8005826577634382, "grad_norm": 0.5393645235090414, "learning_rate": 0.00013630258304934552, "loss": 12.146, "step": 14702 }, { "epoch": 0.8006371117600213, "grad_norm": 0.5997930595758416, "learning_rate": 0.00013629436623976483, "loss": 12.2559, "step": 14703 }, { "epoch": 0.8006915657566043, "grad_norm": 0.5373163929300773, "learning_rate": 0.0001362861491479531, "loss": 12.1077, "step": 14704 }, { "epoch": 0.8007460197531873, "grad_norm": 0.5806005994554888, "learning_rate": 0.00013627793177397416, "loss": 12.3336, "step": 14705 }, { "epoch": 0.8008004737497703, "grad_norm": 0.6518520429407361, "learning_rate": 0.00013626971411789197, "loss": 12.2488, "step": 14706 }, { "epoch": 0.8008549277463533, "grad_norm": 0.5552547751323605, "learning_rate": 0.00013626149617977035, "loss": 12.2116, "step": 14707 }, { "epoch": 0.8009093817429362, "grad_norm": 0.551120271148661, "learning_rate": 0.00013625327795967326, "loss": 12.2319, "step": 14708 }, { "epoch": 0.8009638357395193, "grad_norm": 0.5855379834522427, "learning_rate": 0.00013624505945766466, "loss": 12.1803, "step": 14709 }, { "epoch": 0.8010182897361023, "grad_norm": 0.5753752303480557, "learning_rate": 0.00013623684067380835, "loss": 12.2338, "step": 14710 }, { "epoch": 0.8010727437326853, "grad_norm": 0.5330703433849421, "learning_rate": 0.0001362286216081683, "loss": 12.3258, "step": 14711 }, { "epoch": 0.8011271977292683, "grad_norm": 0.5619976654850436, "learning_rate": 0.00013622040226080842, "loss": 12.2137, "step": 14712 }, { "epoch": 0.8011816517258513, "grad_norm": 0.600438451497241, "learning_rate": 0.00013621218263179259, "loss": 12.1938, "step": 14713 }, { "epoch": 0.8012361057224343, "grad_norm": 0.6901686333060042, "learning_rate": 0.0001362039627211848, "loss": 12.058, "step": 14714 }, { "epoch": 0.8012905597190174, "grad_norm": 0.5017285782650442, "learning_rate": 0.0001361957425290489, "loss": 12.2392, "step": 14715 }, { "epoch": 0.8013450137156004, "grad_norm": 0.5516965967674011, "learning_rate": 0.00013618752205544885, "loss": 12.138, "step": 14716 }, { "epoch": 0.8013994677121834, "grad_norm": 0.6359579853789539, "learning_rate": 0.00013617930130044854, "loss": 12.2751, "step": 14717 }, { "epoch": 0.8014539217087664, "grad_norm": 0.6236073792954583, "learning_rate": 0.0001361710802641119, "loss": 12.131, "step": 14718 }, { "epoch": 0.8015083757053494, "grad_norm": 0.5946363293936324, "learning_rate": 0.0001361628589465029, "loss": 12.2467, "step": 14719 }, { "epoch": 0.8015628297019325, "grad_norm": 0.5702754809065214, "learning_rate": 0.00013615463734768546, "loss": 12.2673, "step": 14720 }, { "epoch": 0.8016172836985155, "grad_norm": 0.5655799635243692, "learning_rate": 0.00013614641546772348, "loss": 12.1912, "step": 14721 }, { "epoch": 0.8016717376950985, "grad_norm": 0.5317017544142133, "learning_rate": 0.0001361381933066809, "loss": 12.2049, "step": 14722 }, { "epoch": 0.8017261916916815, "grad_norm": 0.6350850966842021, "learning_rate": 0.00013612997086462169, "loss": 12.302, "step": 14723 }, { "epoch": 0.8017806456882645, "grad_norm": 0.6144874801838794, "learning_rate": 0.00013612174814160976, "loss": 12.2868, "step": 14724 }, { "epoch": 0.8018350996848475, "grad_norm": 0.5321833507643821, "learning_rate": 0.00013611352513770905, "loss": 12.0525, "step": 14725 }, { "epoch": 0.8018895536814306, "grad_norm": 0.5718843327099883, "learning_rate": 0.00013610530185298353, "loss": 12.1852, "step": 14726 }, { "epoch": 0.8019440076780135, "grad_norm": 0.6070868198744772, "learning_rate": 0.0001360970782874971, "loss": 12.1785, "step": 14727 }, { "epoch": 0.8019984616745965, "grad_norm": 0.5306665602802828, "learning_rate": 0.00013608885444131374, "loss": 12.3216, "step": 14728 }, { "epoch": 0.8020529156711795, "grad_norm": 0.6288825474118335, "learning_rate": 0.0001360806303144974, "loss": 12.3004, "step": 14729 }, { "epoch": 0.8021073696677625, "grad_norm": 0.5613030264008502, "learning_rate": 0.00013607240590711206, "loss": 12.2871, "step": 14730 }, { "epoch": 0.8021618236643455, "grad_norm": 0.5322723276921058, "learning_rate": 0.0001360641812192216, "loss": 12.1806, "step": 14731 }, { "epoch": 0.8022162776609286, "grad_norm": 0.584965786831096, "learning_rate": 0.00013605595625089005, "loss": 12.1354, "step": 14732 }, { "epoch": 0.8022707316575116, "grad_norm": 0.561919891996372, "learning_rate": 0.00013604773100218132, "loss": 12.2726, "step": 14733 }, { "epoch": 0.8023251856540946, "grad_norm": 0.697282815185284, "learning_rate": 0.0001360395054731594, "loss": 12.3293, "step": 14734 }, { "epoch": 0.8023796396506776, "grad_norm": 0.5649339931331062, "learning_rate": 0.0001360312796638882, "loss": 12.0921, "step": 14735 }, { "epoch": 0.8024340936472606, "grad_norm": 0.5525623322911426, "learning_rate": 0.0001360230535744318, "loss": 12.2729, "step": 14736 }, { "epoch": 0.8024885476438436, "grad_norm": 0.5735865225673554, "learning_rate": 0.00013601482720485404, "loss": 12.06, "step": 14737 }, { "epoch": 0.8025430016404267, "grad_norm": 0.6089480738355258, "learning_rate": 0.00013600660055521896, "loss": 12.2962, "step": 14738 }, { "epoch": 0.8025974556370097, "grad_norm": 0.6060095300813723, "learning_rate": 0.00013599837362559053, "loss": 12.1462, "step": 14739 }, { "epoch": 0.8026519096335927, "grad_norm": 0.5992632558396295, "learning_rate": 0.0001359901464160327, "loss": 12.3345, "step": 14740 }, { "epoch": 0.8027063636301757, "grad_norm": 0.646098819998647, "learning_rate": 0.00013598191892660942, "loss": 12.2307, "step": 14741 }, { "epoch": 0.8027608176267587, "grad_norm": 0.590717453674496, "learning_rate": 0.00013597369115738475, "loss": 12.2248, "step": 14742 }, { "epoch": 0.8028152716233417, "grad_norm": 0.5790171924040252, "learning_rate": 0.00013596546310842259, "loss": 12.3083, "step": 14743 }, { "epoch": 0.8028697256199248, "grad_norm": 0.5690108975248733, "learning_rate": 0.000135957234779787, "loss": 12.1001, "step": 14744 }, { "epoch": 0.8029241796165077, "grad_norm": 0.6598419449912617, "learning_rate": 0.00013594900617154188, "loss": 12.2677, "step": 14745 }, { "epoch": 0.8029786336130907, "grad_norm": 0.6457502835703006, "learning_rate": 0.00013594077728375128, "loss": 12.2013, "step": 14746 }, { "epoch": 0.8030330876096737, "grad_norm": 0.5676104527087016, "learning_rate": 0.00013593254811647916, "loss": 12.2458, "step": 14747 }, { "epoch": 0.8030875416062567, "grad_norm": 0.5815299697852133, "learning_rate": 0.00013592431866978955, "loss": 12.0223, "step": 14748 }, { "epoch": 0.8031419956028398, "grad_norm": 0.635907774790155, "learning_rate": 0.00013591608894374642, "loss": 12.1705, "step": 14749 }, { "epoch": 0.8031964495994228, "grad_norm": 0.632519027187312, "learning_rate": 0.00013590785893841372, "loss": 12.2368, "step": 14750 }, { "epoch": 0.8032509035960058, "grad_norm": 0.5971202957757051, "learning_rate": 0.00013589962865385546, "loss": 12.2808, "step": 14751 }, { "epoch": 0.8033053575925888, "grad_norm": 0.6267980859454244, "learning_rate": 0.00013589139809013572, "loss": 12.3181, "step": 14752 }, { "epoch": 0.8033598115891718, "grad_norm": 0.5696313738145158, "learning_rate": 0.00013588316724731842, "loss": 12.1311, "step": 14753 }, { "epoch": 0.8034142655857548, "grad_norm": 0.5387721506325479, "learning_rate": 0.00013587493612546764, "loss": 12.2173, "step": 14754 }, { "epoch": 0.8034687195823379, "grad_norm": 0.5487563950306569, "learning_rate": 0.00013586670472464732, "loss": 12.2189, "step": 14755 }, { "epoch": 0.8035231735789209, "grad_norm": 0.6023344704593654, "learning_rate": 0.00013585847304492144, "loss": 12.2929, "step": 14756 }, { "epoch": 0.8035776275755039, "grad_norm": 0.68285705582827, "learning_rate": 0.00013585024108635408, "loss": 12.2312, "step": 14757 }, { "epoch": 0.8036320815720869, "grad_norm": 0.5828683432730125, "learning_rate": 0.00013584200884900926, "loss": 12.166, "step": 14758 }, { "epoch": 0.8036865355686699, "grad_norm": 0.598886249716344, "learning_rate": 0.00013583377633295097, "loss": 12.272, "step": 14759 }, { "epoch": 0.8037409895652529, "grad_norm": 0.574350861635556, "learning_rate": 0.00013582554353824323, "loss": 12.2154, "step": 14760 }, { "epoch": 0.803795443561836, "grad_norm": 0.531639449220211, "learning_rate": 0.00013581731046495004, "loss": 12.0654, "step": 14761 }, { "epoch": 0.803849897558419, "grad_norm": 0.5760371469392463, "learning_rate": 0.00013580907711313543, "loss": 12.1291, "step": 14762 }, { "epoch": 0.803904351555002, "grad_norm": 0.6355944627676456, "learning_rate": 0.00013580084348286344, "loss": 12.2196, "step": 14763 }, { "epoch": 0.8039588055515849, "grad_norm": 0.6236060806198317, "learning_rate": 0.00013579260957419812, "loss": 12.1445, "step": 14764 }, { "epoch": 0.8040132595481679, "grad_norm": 0.5654738248795809, "learning_rate": 0.0001357843753872034, "loss": 12.3111, "step": 14765 }, { "epoch": 0.8040677135447509, "grad_norm": 0.6123689053793384, "learning_rate": 0.0001357761409219434, "loss": 12.2449, "step": 14766 }, { "epoch": 0.804122167541334, "grad_norm": 0.6004670492334654, "learning_rate": 0.00013576790617848215, "loss": 12.2578, "step": 14767 }, { "epoch": 0.804176621537917, "grad_norm": 0.5645618267114271, "learning_rate": 0.00013575967115688365, "loss": 12.2648, "step": 14768 }, { "epoch": 0.8042310755345, "grad_norm": 0.5500008223839482, "learning_rate": 0.00013575143585721196, "loss": 12.2283, "step": 14769 }, { "epoch": 0.804285529531083, "grad_norm": 0.6051696895269583, "learning_rate": 0.0001357432002795311, "loss": 12.3082, "step": 14770 }, { "epoch": 0.804339983527666, "grad_norm": 0.5952390426204499, "learning_rate": 0.00013573496442390511, "loss": 12.1966, "step": 14771 }, { "epoch": 0.804394437524249, "grad_norm": 0.6088605323849096, "learning_rate": 0.00013572672829039806, "loss": 12.3442, "step": 14772 }, { "epoch": 0.8044488915208321, "grad_norm": 0.5993152028924302, "learning_rate": 0.00013571849187907396, "loss": 12.1041, "step": 14773 }, { "epoch": 0.8045033455174151, "grad_norm": 0.5218338456393441, "learning_rate": 0.0001357102551899969, "loss": 12.1031, "step": 14774 }, { "epoch": 0.8045577995139981, "grad_norm": 0.609926418028468, "learning_rate": 0.0001357020182232309, "loss": 12.3269, "step": 14775 }, { "epoch": 0.8046122535105811, "grad_norm": 0.6470069008285002, "learning_rate": 0.00013569378097884, "loss": 12.2054, "step": 14776 }, { "epoch": 0.8046667075071641, "grad_norm": 0.5562287041283757, "learning_rate": 0.0001356855434568883, "loss": 12.171, "step": 14777 }, { "epoch": 0.8047211615037471, "grad_norm": 0.5383874909013538, "learning_rate": 0.00013567730565743982, "loss": 12.2667, "step": 14778 }, { "epoch": 0.8047756155003302, "grad_norm": 0.5781141534823001, "learning_rate": 0.00013566906758055863, "loss": 12.0728, "step": 14779 }, { "epoch": 0.8048300694969132, "grad_norm": 0.5767436399068844, "learning_rate": 0.00013566082922630878, "loss": 12.181, "step": 14780 }, { "epoch": 0.8048845234934962, "grad_norm": 0.6601181379244292, "learning_rate": 0.00013565259059475436, "loss": 12.1892, "step": 14781 }, { "epoch": 0.8049389774900791, "grad_norm": 0.5180753674094193, "learning_rate": 0.00013564435168595938, "loss": 12.2281, "step": 14782 }, { "epoch": 0.8049934314866621, "grad_norm": 0.5461455256382294, "learning_rate": 0.000135636112499988, "loss": 12.1856, "step": 14783 }, { "epoch": 0.8050478854832452, "grad_norm": 0.5651300856132475, "learning_rate": 0.0001356278730369042, "loss": 12.1808, "step": 14784 }, { "epoch": 0.8051023394798282, "grad_norm": 0.5762346524051141, "learning_rate": 0.00013561963329677208, "loss": 12.0586, "step": 14785 }, { "epoch": 0.8051567934764112, "grad_norm": 0.5566516236897829, "learning_rate": 0.0001356113932796557, "loss": 12.2405, "step": 14786 }, { "epoch": 0.8052112474729942, "grad_norm": 0.5272763168734077, "learning_rate": 0.0001356031529856192, "loss": 12.2018, "step": 14787 }, { "epoch": 0.8052657014695772, "grad_norm": 0.5277452420280747, "learning_rate": 0.00013559491241472657, "loss": 12.1506, "step": 14788 }, { "epoch": 0.8053201554661602, "grad_norm": 0.5549414438819789, "learning_rate": 0.00013558667156704195, "loss": 12.2929, "step": 14789 }, { "epoch": 0.8053746094627433, "grad_norm": 0.573943349349434, "learning_rate": 0.00013557843044262942, "loss": 12.2114, "step": 14790 }, { "epoch": 0.8054290634593263, "grad_norm": 0.631727434172093, "learning_rate": 0.000135570189041553, "loss": 12.0948, "step": 14791 }, { "epoch": 0.8054835174559093, "grad_norm": 0.5886082743780373, "learning_rate": 0.00013556194736387688, "loss": 12.2512, "step": 14792 }, { "epoch": 0.8055379714524923, "grad_norm": 0.5156967125767348, "learning_rate": 0.00013555370540966507, "loss": 12.1234, "step": 14793 }, { "epoch": 0.8055924254490753, "grad_norm": 0.5906966524320194, "learning_rate": 0.00013554546317898168, "loss": 12.1704, "step": 14794 }, { "epoch": 0.8056468794456583, "grad_norm": 0.6407555076229271, "learning_rate": 0.00013553722067189084, "loss": 12.1559, "step": 14795 }, { "epoch": 0.8057013334422414, "grad_norm": 0.7499130923304811, "learning_rate": 0.00013552897788845656, "loss": 12.1958, "step": 14796 }, { "epoch": 0.8057557874388244, "grad_norm": 0.5890173368065736, "learning_rate": 0.00013552073482874302, "loss": 12.1924, "step": 14797 }, { "epoch": 0.8058102414354074, "grad_norm": 0.5424754999109495, "learning_rate": 0.0001355124914928143, "loss": 12.1568, "step": 14798 }, { "epoch": 0.8058646954319904, "grad_norm": 0.5570372008104348, "learning_rate": 0.00013550424788073446, "loss": 12.1763, "step": 14799 }, { "epoch": 0.8059191494285733, "grad_norm": 0.6144758166704617, "learning_rate": 0.00013549600399256762, "loss": 12.2377, "step": 14800 }, { "epoch": 0.8059736034251563, "grad_norm": 0.5475933792469335, "learning_rate": 0.00013548775982837795, "loss": 12.1686, "step": 14801 }, { "epoch": 0.8060280574217394, "grad_norm": 0.532083181554464, "learning_rate": 0.0001354795153882295, "loss": 12.1992, "step": 14802 }, { "epoch": 0.8060825114183224, "grad_norm": 0.569394156356611, "learning_rate": 0.00013547127067218637, "loss": 12.2908, "step": 14803 }, { "epoch": 0.8061369654149054, "grad_norm": 0.5876466479875222, "learning_rate": 0.0001354630256803127, "loss": 12.376, "step": 14804 }, { "epoch": 0.8061914194114884, "grad_norm": 0.6446041649581469, "learning_rate": 0.00013545478041267258, "loss": 12.3708, "step": 14805 }, { "epoch": 0.8062458734080714, "grad_norm": 0.5745533624203241, "learning_rate": 0.00013544653486933017, "loss": 12.2191, "step": 14806 }, { "epoch": 0.8063003274046544, "grad_norm": 0.5876921213626856, "learning_rate": 0.00013543828905034953, "loss": 12.0911, "step": 14807 }, { "epoch": 0.8063547814012375, "grad_norm": 0.61122104079932, "learning_rate": 0.00013543004295579481, "loss": 12.3174, "step": 14808 }, { "epoch": 0.8064092353978205, "grad_norm": 0.5693585541111951, "learning_rate": 0.00013542179658573018, "loss": 12.3254, "step": 14809 }, { "epoch": 0.8064636893944035, "grad_norm": 0.6509018058521152, "learning_rate": 0.00013541354994021972, "loss": 12.1927, "step": 14810 }, { "epoch": 0.8065181433909865, "grad_norm": 0.524792413116897, "learning_rate": 0.0001354053030193275, "loss": 12.1193, "step": 14811 }, { "epoch": 0.8065725973875695, "grad_norm": 0.6622307559286874, "learning_rate": 0.0001353970558231177, "loss": 12.1634, "step": 14812 }, { "epoch": 0.8066270513841525, "grad_norm": 0.6240020101914101, "learning_rate": 0.00013538880835165453, "loss": 12.1128, "step": 14813 }, { "epoch": 0.8066815053807356, "grad_norm": 0.6213612845211237, "learning_rate": 0.000135380560605002, "loss": 12.2167, "step": 14814 }, { "epoch": 0.8067359593773186, "grad_norm": 0.5891375759818152, "learning_rate": 0.00013537231258322434, "loss": 12.2565, "step": 14815 }, { "epoch": 0.8067904133739016, "grad_norm": 0.6213675246525658, "learning_rate": 0.00013536406428638558, "loss": 12.1733, "step": 14816 }, { "epoch": 0.8068448673704846, "grad_norm": 0.5792737305087725, "learning_rate": 0.00013535581571454995, "loss": 12.2326, "step": 14817 }, { "epoch": 0.8068993213670675, "grad_norm": 0.5503879796337302, "learning_rate": 0.00013534756686778157, "loss": 12.1473, "step": 14818 }, { "epoch": 0.8069537753636506, "grad_norm": 0.5875674071397632, "learning_rate": 0.0001353393177461446, "loss": 12.3264, "step": 14819 }, { "epoch": 0.8070082293602336, "grad_norm": 0.6658764117400607, "learning_rate": 0.00013533106834970319, "loss": 12.3356, "step": 14820 }, { "epoch": 0.8070626833568166, "grad_norm": 0.5598078870331447, "learning_rate": 0.00013532281867852144, "loss": 12.056, "step": 14821 }, { "epoch": 0.8071171373533996, "grad_norm": 0.5427067549451772, "learning_rate": 0.00013531456873266352, "loss": 12.2513, "step": 14822 }, { "epoch": 0.8071715913499826, "grad_norm": 0.6039181316269996, "learning_rate": 0.00013530631851219358, "loss": 12.104, "step": 14823 }, { "epoch": 0.8072260453465656, "grad_norm": 0.5514512877987486, "learning_rate": 0.00013529806801717583, "loss": 12.0721, "step": 14824 }, { "epoch": 0.8072804993431487, "grad_norm": 0.5026969335243999, "learning_rate": 0.00013528981724767434, "loss": 12.1453, "step": 14825 }, { "epoch": 0.8073349533397317, "grad_norm": 0.6399354209506974, "learning_rate": 0.00013528156620375335, "loss": 12.3137, "step": 14826 }, { "epoch": 0.8073894073363147, "grad_norm": 0.5228089435675359, "learning_rate": 0.00013527331488547698, "loss": 12.1478, "step": 14827 }, { "epoch": 0.8074438613328977, "grad_norm": 0.5234410976027418, "learning_rate": 0.00013526506329290933, "loss": 12.2074, "step": 14828 }, { "epoch": 0.8074983153294807, "grad_norm": 0.529908021814124, "learning_rate": 0.00013525681142611472, "loss": 12.1225, "step": 14829 }, { "epoch": 0.8075527693260637, "grad_norm": 0.544371633644651, "learning_rate": 0.00013524855928515717, "loss": 12.0765, "step": 14830 }, { "epoch": 0.8076072233226468, "grad_norm": 0.5615786923535416, "learning_rate": 0.00013524030687010096, "loss": 12.2037, "step": 14831 }, { "epoch": 0.8076616773192298, "grad_norm": 0.5504412742251322, "learning_rate": 0.0001352320541810102, "loss": 12.1708, "step": 14832 }, { "epoch": 0.8077161313158128, "grad_norm": 0.7066085094534744, "learning_rate": 0.00013522380121794907, "loss": 12.1918, "step": 14833 }, { "epoch": 0.8077705853123958, "grad_norm": 0.6199773081342997, "learning_rate": 0.00013521554798098172, "loss": 12.276, "step": 14834 }, { "epoch": 0.8078250393089788, "grad_norm": 0.5580291655636943, "learning_rate": 0.00013520729447017243, "loss": 12.3218, "step": 14835 }, { "epoch": 0.8078794933055617, "grad_norm": 0.5219495895617315, "learning_rate": 0.0001351990406855853, "loss": 12.1418, "step": 14836 }, { "epoch": 0.8079339473021449, "grad_norm": 0.6113277869483955, "learning_rate": 0.00013519078662728448, "loss": 12.1674, "step": 14837 }, { "epoch": 0.8079884012987278, "grad_norm": 0.5109110687457398, "learning_rate": 0.00013518253229533424, "loss": 12.3098, "step": 14838 }, { "epoch": 0.8080428552953108, "grad_norm": 0.557004942818786, "learning_rate": 0.0001351742776897987, "loss": 12.1824, "step": 14839 }, { "epoch": 0.8080973092918938, "grad_norm": 0.6010657488392056, "learning_rate": 0.00013516602281074213, "loss": 12.2227, "step": 14840 }, { "epoch": 0.8081517632884768, "grad_norm": 0.6358189902556056, "learning_rate": 0.00013515776765822863, "loss": 12.277, "step": 14841 }, { "epoch": 0.8082062172850598, "grad_norm": 0.5751197052084914, "learning_rate": 0.00013514951223232244, "loss": 12.2324, "step": 14842 }, { "epoch": 0.8082606712816429, "grad_norm": 0.5700495119696227, "learning_rate": 0.00013514125653308777, "loss": 12.2499, "step": 14843 }, { "epoch": 0.8083151252782259, "grad_norm": 0.5918496963055511, "learning_rate": 0.00013513300056058877, "loss": 12.2725, "step": 14844 }, { "epoch": 0.8083695792748089, "grad_norm": 0.5626414254597144, "learning_rate": 0.00013512474431488967, "loss": 12.2727, "step": 14845 }, { "epoch": 0.8084240332713919, "grad_norm": 0.5642979068634242, "learning_rate": 0.00013511648779605465, "loss": 12.156, "step": 14846 }, { "epoch": 0.8084784872679749, "grad_norm": 0.5493409572701059, "learning_rate": 0.00013510823100414796, "loss": 12.2125, "step": 14847 }, { "epoch": 0.8085329412645579, "grad_norm": 0.5689975122058666, "learning_rate": 0.00013509997393923377, "loss": 12.2015, "step": 14848 }, { "epoch": 0.808587395261141, "grad_norm": 0.49182099649915784, "learning_rate": 0.0001350917166013763, "loss": 12.1185, "step": 14849 }, { "epoch": 0.808641849257724, "grad_norm": 0.5247699829593893, "learning_rate": 0.00013508345899063975, "loss": 12.1663, "step": 14850 }, { "epoch": 0.808696303254307, "grad_norm": 0.5642604245222994, "learning_rate": 0.00013507520110708833, "loss": 12.1613, "step": 14851 }, { "epoch": 0.80875075725089, "grad_norm": 0.5587763435778694, "learning_rate": 0.00013506694295078628, "loss": 12.2153, "step": 14852 }, { "epoch": 0.808805211247473, "grad_norm": 0.5574095281181681, "learning_rate": 0.0001350586845217978, "loss": 12.267, "step": 14853 }, { "epoch": 0.8088596652440561, "grad_norm": 0.5716751867881421, "learning_rate": 0.0001350504258201871, "loss": 12.1996, "step": 14854 }, { "epoch": 0.808914119240639, "grad_norm": 0.5550756421929837, "learning_rate": 0.00013504216684601843, "loss": 12.1243, "step": 14855 }, { "epoch": 0.808968573237222, "grad_norm": 0.5443095253722837, "learning_rate": 0.00013503390759935597, "loss": 11.9836, "step": 14856 }, { "epoch": 0.809023027233805, "grad_norm": 0.6122039032601948, "learning_rate": 0.00013502564808026398, "loss": 12.2784, "step": 14857 }, { "epoch": 0.809077481230388, "grad_norm": 0.5787822118596989, "learning_rate": 0.00013501738828880668, "loss": 12.086, "step": 14858 }, { "epoch": 0.809131935226971, "grad_norm": 0.6197521341959868, "learning_rate": 0.0001350091282250483, "loss": 12.3282, "step": 14859 }, { "epoch": 0.8091863892235541, "grad_norm": 0.5613037311489635, "learning_rate": 0.00013500086788905305, "loss": 12.1889, "step": 14860 }, { "epoch": 0.8092408432201371, "grad_norm": 0.5834880990877368, "learning_rate": 0.00013499260728088518, "loss": 12.2518, "step": 14861 }, { "epoch": 0.8092952972167201, "grad_norm": 0.6046503559268155, "learning_rate": 0.00013498434640060896, "loss": 12.1995, "step": 14862 }, { "epoch": 0.8093497512133031, "grad_norm": 0.5478187529728412, "learning_rate": 0.00013497608524828857, "loss": 12.2138, "step": 14863 }, { "epoch": 0.8094042052098861, "grad_norm": 0.5791711463562509, "learning_rate": 0.0001349678238239883, "loss": 12.0796, "step": 14864 }, { "epoch": 0.8094586592064691, "grad_norm": 0.5683328909188236, "learning_rate": 0.00013495956212777237, "loss": 12.1985, "step": 14865 }, { "epoch": 0.8095131132030522, "grad_norm": 0.5371246737714352, "learning_rate": 0.00013495130015970497, "loss": 12.0929, "step": 14866 }, { "epoch": 0.8095675671996352, "grad_norm": 0.5835081530600631, "learning_rate": 0.00013494303791985045, "loss": 12.1738, "step": 14867 }, { "epoch": 0.8096220211962182, "grad_norm": 0.5857274196314827, "learning_rate": 0.00013493477540827298, "loss": 12.2489, "step": 14868 }, { "epoch": 0.8096764751928012, "grad_norm": 0.5554600483101491, "learning_rate": 0.00013492651262503685, "loss": 12.1106, "step": 14869 }, { "epoch": 0.8097309291893842, "grad_norm": 0.6650260925920747, "learning_rate": 0.00013491824957020628, "loss": 12.4283, "step": 14870 }, { "epoch": 0.8097853831859672, "grad_norm": 0.5837912278619439, "learning_rate": 0.00013490998624384558, "loss": 12.1607, "step": 14871 }, { "epoch": 0.8098398371825503, "grad_norm": 0.5421872387535078, "learning_rate": 0.0001349017226460189, "loss": 12.2336, "step": 14872 }, { "epoch": 0.8098942911791333, "grad_norm": 0.7267994171336976, "learning_rate": 0.00013489345877679067, "loss": 12.2144, "step": 14873 }, { "epoch": 0.8099487451757162, "grad_norm": 0.544225159839763, "learning_rate": 0.000134885194636225, "loss": 12.3555, "step": 14874 }, { "epoch": 0.8100031991722992, "grad_norm": 0.6004138775591223, "learning_rate": 0.00013487693022438624, "loss": 12.2701, "step": 14875 }, { "epoch": 0.8100576531688822, "grad_norm": 0.6246154775919707, "learning_rate": 0.0001348686655413386, "loss": 12.2617, "step": 14876 }, { "epoch": 0.8101121071654652, "grad_norm": 0.5404264875377471, "learning_rate": 0.00013486040058714632, "loss": 12.2787, "step": 14877 }, { "epoch": 0.8101665611620483, "grad_norm": 0.550133321004403, "learning_rate": 0.00013485213536187378, "loss": 12.2487, "step": 14878 }, { "epoch": 0.8102210151586313, "grad_norm": 0.5770492698911255, "learning_rate": 0.00013484386986558516, "loss": 12.2898, "step": 14879 }, { "epoch": 0.8102754691552143, "grad_norm": 0.7417205778001171, "learning_rate": 0.0001348356040983448, "loss": 12.324, "step": 14880 }, { "epoch": 0.8103299231517973, "grad_norm": 0.5670109763720117, "learning_rate": 0.00013482733806021693, "loss": 12.1479, "step": 14881 }, { "epoch": 0.8103843771483803, "grad_norm": 0.6161954924055218, "learning_rate": 0.00013481907175126582, "loss": 12.3676, "step": 14882 }, { "epoch": 0.8104388311449634, "grad_norm": 0.5659342862521876, "learning_rate": 0.00013481080517155578, "loss": 12.1375, "step": 14883 }, { "epoch": 0.8104932851415464, "grad_norm": 0.5521924434604571, "learning_rate": 0.00013480253832115108, "loss": 12.1946, "step": 14884 }, { "epoch": 0.8105477391381294, "grad_norm": 0.5282392078953213, "learning_rate": 0.000134794271200116, "loss": 12.1081, "step": 14885 }, { "epoch": 0.8106021931347124, "grad_norm": 0.5869486267483298, "learning_rate": 0.00013478600380851486, "loss": 12.1544, "step": 14886 }, { "epoch": 0.8106566471312954, "grad_norm": 0.6393240761158134, "learning_rate": 0.00013477773614641188, "loss": 12.2286, "step": 14887 }, { "epoch": 0.8107111011278784, "grad_norm": 0.5865865463438562, "learning_rate": 0.0001347694682138714, "loss": 12.1453, "step": 14888 }, { "epoch": 0.8107655551244615, "grad_norm": 0.6034268114450104, "learning_rate": 0.0001347612000109577, "loss": 12.1117, "step": 14889 }, { "epoch": 0.8108200091210445, "grad_norm": 0.5755438394885927, "learning_rate": 0.0001347529315377351, "loss": 12.2393, "step": 14890 }, { "epoch": 0.8108744631176275, "grad_norm": 0.5920600215118633, "learning_rate": 0.00013474466279426788, "loss": 12.2165, "step": 14891 }, { "epoch": 0.8109289171142104, "grad_norm": 0.5869621005908414, "learning_rate": 0.00013473639378062035, "loss": 12.1462, "step": 14892 }, { "epoch": 0.8109833711107934, "grad_norm": 0.6218140884335485, "learning_rate": 0.00013472812449685675, "loss": 12.2385, "step": 14893 }, { "epoch": 0.8110378251073764, "grad_norm": 0.6585378622196876, "learning_rate": 0.00013471985494304143, "loss": 12.2975, "step": 14894 }, { "epoch": 0.8110922791039595, "grad_norm": 0.603287054489097, "learning_rate": 0.00013471158511923876, "loss": 12.1148, "step": 14895 }, { "epoch": 0.8111467331005425, "grad_norm": 0.5996342050759436, "learning_rate": 0.00013470331502551293, "loss": 12.2267, "step": 14896 }, { "epoch": 0.8112011870971255, "grad_norm": 0.5561600021162297, "learning_rate": 0.00013469504466192831, "loss": 12.227, "step": 14897 }, { "epoch": 0.8112556410937085, "grad_norm": 0.5989965552789527, "learning_rate": 0.0001346867740285492, "loss": 12.1776, "step": 14898 }, { "epoch": 0.8113100950902915, "grad_norm": 0.5785238524484136, "learning_rate": 0.00013467850312543994, "loss": 12.2839, "step": 14899 }, { "epoch": 0.8113645490868745, "grad_norm": 0.6157536243138646, "learning_rate": 0.0001346702319526648, "loss": 12.216, "step": 14900 }, { "epoch": 0.8114190030834576, "grad_norm": 0.6139216252470333, "learning_rate": 0.00013466196051028814, "loss": 12.3419, "step": 14901 }, { "epoch": 0.8114734570800406, "grad_norm": 0.565349217936788, "learning_rate": 0.00013465368879837425, "loss": 12.2088, "step": 14902 }, { "epoch": 0.8115279110766236, "grad_norm": 0.610329727565721, "learning_rate": 0.00013464541681698747, "loss": 12.246, "step": 14903 }, { "epoch": 0.8115823650732066, "grad_norm": 0.6121022967448893, "learning_rate": 0.0001346371445661921, "loss": 12.1796, "step": 14904 }, { "epoch": 0.8116368190697896, "grad_norm": 0.5470315206379218, "learning_rate": 0.00013462887204605253, "loss": 12.0974, "step": 14905 }, { "epoch": 0.8116912730663726, "grad_norm": 0.5627392323847954, "learning_rate": 0.00013462059925663299, "loss": 12.1349, "step": 14906 }, { "epoch": 0.8117457270629557, "grad_norm": 0.6150381921746256, "learning_rate": 0.0001346123261979979, "loss": 12.1735, "step": 14907 }, { "epoch": 0.8118001810595387, "grad_norm": 0.539941217460702, "learning_rate": 0.00013460405287021155, "loss": 12.1675, "step": 14908 }, { "epoch": 0.8118546350561217, "grad_norm": 0.6506378340672261, "learning_rate": 0.0001345957792733383, "loss": 12.1344, "step": 14909 }, { "epoch": 0.8119090890527046, "grad_norm": 0.5772058084352601, "learning_rate": 0.00013458750540744244, "loss": 12.2413, "step": 14910 }, { "epoch": 0.8119635430492876, "grad_norm": 0.5675206244256016, "learning_rate": 0.00013457923127258833, "loss": 12.0752, "step": 14911 }, { "epoch": 0.8120179970458706, "grad_norm": 0.5806601939090454, "learning_rate": 0.00013457095686884033, "loss": 12.2102, "step": 14912 }, { "epoch": 0.8120724510424537, "grad_norm": 0.6070178312564786, "learning_rate": 0.00013456268219626277, "loss": 12.2825, "step": 14913 }, { "epoch": 0.8121269050390367, "grad_norm": 0.6414391844447898, "learning_rate": 0.00013455440725492, "loss": 12.1771, "step": 14914 }, { "epoch": 0.8121813590356197, "grad_norm": 0.593889169013522, "learning_rate": 0.00013454613204487637, "loss": 12.1321, "step": 14915 }, { "epoch": 0.8122358130322027, "grad_norm": 0.6188895270570187, "learning_rate": 0.00013453785656619623, "loss": 12.3201, "step": 14916 }, { "epoch": 0.8122902670287857, "grad_norm": 0.6319531156353801, "learning_rate": 0.00013452958081894392, "loss": 12.2275, "step": 14917 }, { "epoch": 0.8123447210253688, "grad_norm": 0.6033406417544306, "learning_rate": 0.0001345213048031838, "loss": 12.2757, "step": 14918 }, { "epoch": 0.8123991750219518, "grad_norm": 0.5136940686796113, "learning_rate": 0.00013451302851898023, "loss": 12.141, "step": 14919 }, { "epoch": 0.8124536290185348, "grad_norm": 0.5767843735213666, "learning_rate": 0.00013450475196639754, "loss": 12.2928, "step": 14920 }, { "epoch": 0.8125080830151178, "grad_norm": 0.5360562112845915, "learning_rate": 0.00013449647514550013, "loss": 12.2221, "step": 14921 }, { "epoch": 0.8125625370117008, "grad_norm": 0.5779506182159675, "learning_rate": 0.00013448819805635234, "loss": 12.184, "step": 14922 }, { "epoch": 0.8126169910082838, "grad_norm": 0.5536109511509439, "learning_rate": 0.0001344799206990185, "loss": 12.2786, "step": 14923 }, { "epoch": 0.8126714450048669, "grad_norm": 0.530682294119379, "learning_rate": 0.0001344716430735631, "loss": 12.1823, "step": 14924 }, { "epoch": 0.8127258990014499, "grad_norm": 0.5865372297258536, "learning_rate": 0.00013446336518005037, "loss": 12.1718, "step": 14925 }, { "epoch": 0.8127803529980329, "grad_norm": 0.5676681573906274, "learning_rate": 0.00013445508701854473, "loss": 12.1888, "step": 14926 }, { "epoch": 0.8128348069946159, "grad_norm": 0.5783128967423121, "learning_rate": 0.00013444680858911055, "loss": 12.2015, "step": 14927 }, { "epoch": 0.8128892609911988, "grad_norm": 0.6538533256948675, "learning_rate": 0.00013443852989181222, "loss": 12.3719, "step": 14928 }, { "epoch": 0.8129437149877818, "grad_norm": 0.5702116895106648, "learning_rate": 0.0001344302509267141, "loss": 12.2315, "step": 14929 }, { "epoch": 0.8129981689843649, "grad_norm": 0.5575423623731566, "learning_rate": 0.0001344219716938806, "loss": 12.1814, "step": 14930 }, { "epoch": 0.8130526229809479, "grad_norm": 0.6088179847352079, "learning_rate": 0.00013441369219337605, "loss": 12.1685, "step": 14931 }, { "epoch": 0.8131070769775309, "grad_norm": 0.607212284364403, "learning_rate": 0.00013440541242526485, "loss": 12.3098, "step": 14932 }, { "epoch": 0.8131615309741139, "grad_norm": 0.567310137687238, "learning_rate": 0.00013439713238961142, "loss": 12.0795, "step": 14933 }, { "epoch": 0.8132159849706969, "grad_norm": 0.5280748988570373, "learning_rate": 0.0001343888520864801, "loss": 12.2618, "step": 14934 }, { "epoch": 0.8132704389672799, "grad_norm": 0.6022799459452628, "learning_rate": 0.00013438057151593532, "loss": 12.2013, "step": 14935 }, { "epoch": 0.813324892963863, "grad_norm": 0.5395486494308462, "learning_rate": 0.00013437229067804146, "loss": 12.1533, "step": 14936 }, { "epoch": 0.813379346960446, "grad_norm": 0.5345849608595724, "learning_rate": 0.00013436400957286286, "loss": 12.2119, "step": 14937 }, { "epoch": 0.813433800957029, "grad_norm": 0.5223620841321481, "learning_rate": 0.00013435572820046397, "loss": 12.1625, "step": 14938 }, { "epoch": 0.813488254953612, "grad_norm": 0.6776491326508257, "learning_rate": 0.0001343474465609092, "loss": 12.2583, "step": 14939 }, { "epoch": 0.813542708950195, "grad_norm": 0.5517732802269188, "learning_rate": 0.00013433916465426294, "loss": 12.069, "step": 14940 }, { "epoch": 0.813597162946778, "grad_norm": 0.5566442191844937, "learning_rate": 0.00013433088248058955, "loss": 12.2793, "step": 14941 }, { "epoch": 0.8136516169433611, "grad_norm": 0.5713057966290501, "learning_rate": 0.00013432260003995347, "loss": 12.1679, "step": 14942 }, { "epoch": 0.8137060709399441, "grad_norm": 0.5897516315170662, "learning_rate": 0.00013431431733241907, "loss": 12.1071, "step": 14943 }, { "epoch": 0.8137605249365271, "grad_norm": 0.5827581462897125, "learning_rate": 0.00013430603435805077, "loss": 12.3552, "step": 14944 }, { "epoch": 0.8138149789331101, "grad_norm": 0.5386720004141335, "learning_rate": 0.00013429775111691304, "loss": 12.1711, "step": 14945 }, { "epoch": 0.813869432929693, "grad_norm": 0.6672047986879622, "learning_rate": 0.00013428946760907025, "loss": 12.0904, "step": 14946 }, { "epoch": 0.813923886926276, "grad_norm": 0.570405414930972, "learning_rate": 0.00013428118383458678, "loss": 12.1777, "step": 14947 }, { "epoch": 0.8139783409228591, "grad_norm": 0.5348817899525419, "learning_rate": 0.00013427289979352707, "loss": 12.181, "step": 14948 }, { "epoch": 0.8140327949194421, "grad_norm": 0.5564605962690318, "learning_rate": 0.00013426461548595556, "loss": 12.2239, "step": 14949 }, { "epoch": 0.8140872489160251, "grad_norm": 0.5826757439277491, "learning_rate": 0.00013425633091193666, "loss": 12.0763, "step": 14950 }, { "epoch": 0.8141417029126081, "grad_norm": 0.6392072154759486, "learning_rate": 0.00013424804607153478, "loss": 12.2083, "step": 14951 }, { "epoch": 0.8141961569091911, "grad_norm": 0.5278847783302267, "learning_rate": 0.00013423976096481435, "loss": 12.0883, "step": 14952 }, { "epoch": 0.8142506109057742, "grad_norm": 0.5667153367275108, "learning_rate": 0.00013423147559183982, "loss": 12.1424, "step": 14953 }, { "epoch": 0.8143050649023572, "grad_norm": 0.6059042657487198, "learning_rate": 0.00013422318995267554, "loss": 12.1883, "step": 14954 }, { "epoch": 0.8143595188989402, "grad_norm": 0.584830107343991, "learning_rate": 0.00013421490404738604, "loss": 12.2183, "step": 14955 }, { "epoch": 0.8144139728955232, "grad_norm": 0.6678900363954223, "learning_rate": 0.0001342066178760357, "loss": 12.2575, "step": 14956 }, { "epoch": 0.8144684268921062, "grad_norm": 0.563722864109056, "learning_rate": 0.00013419833143868897, "loss": 12.1862, "step": 14957 }, { "epoch": 0.8145228808886892, "grad_norm": 0.6231865053320498, "learning_rate": 0.00013419004473541027, "loss": 12.1494, "step": 14958 }, { "epoch": 0.8145773348852723, "grad_norm": 0.6414733001046949, "learning_rate": 0.000134181757766264, "loss": 12.2958, "step": 14959 }, { "epoch": 0.8146317888818553, "grad_norm": 0.5652240401860344, "learning_rate": 0.0001341734705313147, "loss": 12.1767, "step": 14960 }, { "epoch": 0.8146862428784383, "grad_norm": 0.5747060543173036, "learning_rate": 0.0001341651830306268, "loss": 12.1846, "step": 14961 }, { "epoch": 0.8147406968750213, "grad_norm": 0.6027332207345627, "learning_rate": 0.00013415689526426465, "loss": 12.1885, "step": 14962 }, { "epoch": 0.8147951508716043, "grad_norm": 0.5982557446880276, "learning_rate": 0.00013414860723229277, "loss": 12.173, "step": 14963 }, { "epoch": 0.8148496048681872, "grad_norm": 0.557802973894175, "learning_rate": 0.00013414031893477558, "loss": 12.2135, "step": 14964 }, { "epoch": 0.8149040588647704, "grad_norm": 0.5448109877067271, "learning_rate": 0.00013413203037177754, "loss": 12.146, "step": 14965 }, { "epoch": 0.8149585128613533, "grad_norm": 0.5656869069438613, "learning_rate": 0.00013412374154336316, "loss": 12.1739, "step": 14966 }, { "epoch": 0.8150129668579363, "grad_norm": 0.5997702753121104, "learning_rate": 0.0001341154524495968, "loss": 12.2298, "step": 14967 }, { "epoch": 0.8150674208545193, "grad_norm": 0.5748158705419266, "learning_rate": 0.00013410716309054295, "loss": 12.3095, "step": 14968 }, { "epoch": 0.8151218748511023, "grad_norm": 0.6540648899325642, "learning_rate": 0.00013409887346626612, "loss": 12.1783, "step": 14969 }, { "epoch": 0.8151763288476853, "grad_norm": 0.5447230236480691, "learning_rate": 0.0001340905835768307, "loss": 12.2082, "step": 14970 }, { "epoch": 0.8152307828442684, "grad_norm": 0.5399966726816193, "learning_rate": 0.0001340822934223012, "loss": 12.118, "step": 14971 }, { "epoch": 0.8152852368408514, "grad_norm": 0.5574187249050045, "learning_rate": 0.00013407400300274207, "loss": 12.1641, "step": 14972 }, { "epoch": 0.8153396908374344, "grad_norm": 0.5774856992493318, "learning_rate": 0.00013406571231821775, "loss": 12.2274, "step": 14973 }, { "epoch": 0.8153941448340174, "grad_norm": 0.5995070765895913, "learning_rate": 0.00013405742136879278, "loss": 12.2242, "step": 14974 }, { "epoch": 0.8154485988306004, "grad_norm": 0.5695205010593473, "learning_rate": 0.00013404913015453157, "loss": 12.1707, "step": 14975 }, { "epoch": 0.8155030528271834, "grad_norm": 0.5626070337798693, "learning_rate": 0.00013404083867549863, "loss": 12.2188, "step": 14976 }, { "epoch": 0.8155575068237665, "grad_norm": 0.6446567968174487, "learning_rate": 0.0001340325469317584, "loss": 12.254, "step": 14977 }, { "epoch": 0.8156119608203495, "grad_norm": 0.578028180559197, "learning_rate": 0.00013402425492337538, "loss": 12.0871, "step": 14978 }, { "epoch": 0.8156664148169325, "grad_norm": 0.5707444550148375, "learning_rate": 0.00013401596265041405, "loss": 12.2326, "step": 14979 }, { "epoch": 0.8157208688135155, "grad_norm": 0.5790218064683561, "learning_rate": 0.0001340076701129389, "loss": 12.1651, "step": 14980 }, { "epoch": 0.8157753228100985, "grad_norm": 0.5530111516710103, "learning_rate": 0.0001339993773110144, "loss": 12.2381, "step": 14981 }, { "epoch": 0.8158297768066815, "grad_norm": 0.5525339506735362, "learning_rate": 0.00013399108424470504, "loss": 12.1943, "step": 14982 }, { "epoch": 0.8158842308032646, "grad_norm": 0.6791189230415343, "learning_rate": 0.0001339827909140753, "loss": 12.2272, "step": 14983 }, { "epoch": 0.8159386847998475, "grad_norm": 0.8677941890684707, "learning_rate": 0.00013397449731918968, "loss": 12.2352, "step": 14984 }, { "epoch": 0.8159931387964305, "grad_norm": 0.5423445874305333, "learning_rate": 0.00013396620346011267, "loss": 12.2357, "step": 14985 }, { "epoch": 0.8160475927930135, "grad_norm": 0.5993168559027767, "learning_rate": 0.00013395790933690878, "loss": 12.2122, "step": 14986 }, { "epoch": 0.8161020467895965, "grad_norm": 0.5884806571904675, "learning_rate": 0.0001339496149496425, "loss": 12.2743, "step": 14987 }, { "epoch": 0.8161565007861796, "grad_norm": 0.5554630343945989, "learning_rate": 0.00013394132029837828, "loss": 11.9055, "step": 14988 }, { "epoch": 0.8162109547827626, "grad_norm": 0.5215621664224854, "learning_rate": 0.0001339330253831807, "loss": 12.1985, "step": 14989 }, { "epoch": 0.8162654087793456, "grad_norm": 0.6540481921438157, "learning_rate": 0.0001339247302041142, "loss": 12.2831, "step": 14990 }, { "epoch": 0.8163198627759286, "grad_norm": 0.6157703375357999, "learning_rate": 0.00013391643476124335, "loss": 12.262, "step": 14991 }, { "epoch": 0.8163743167725116, "grad_norm": 0.6485824430647104, "learning_rate": 0.00013390813905463255, "loss": 12.2356, "step": 14992 }, { "epoch": 0.8164287707690946, "grad_norm": 0.5571332481181887, "learning_rate": 0.0001338998430843464, "loss": 12.2123, "step": 14993 }, { "epoch": 0.8164832247656777, "grad_norm": 0.6334643771864544, "learning_rate": 0.0001338915468504494, "loss": 12.2523, "step": 14994 }, { "epoch": 0.8165376787622607, "grad_norm": 0.5118559097275878, "learning_rate": 0.00013388325035300605, "loss": 12.1212, "step": 14995 }, { "epoch": 0.8165921327588437, "grad_norm": 0.5741352405113275, "learning_rate": 0.00013387495359208087, "loss": 12.1782, "step": 14996 }, { "epoch": 0.8166465867554267, "grad_norm": 0.7133859869683401, "learning_rate": 0.00013386665656773834, "loss": 12.1758, "step": 14997 }, { "epoch": 0.8167010407520097, "grad_norm": 0.6902711214672571, "learning_rate": 0.00013385835928004302, "loss": 12.2727, "step": 14998 }, { "epoch": 0.8167554947485927, "grad_norm": 0.5711108832715581, "learning_rate": 0.00013385006172905942, "loss": 12.0462, "step": 14999 }, { "epoch": 0.8168099487451758, "grad_norm": 0.7150747556761515, "learning_rate": 0.00013384176391485205, "loss": 12.3261, "step": 15000 }, { "epoch": 0.8168644027417588, "grad_norm": 0.5812774549161445, "learning_rate": 0.0001338334658374855, "loss": 12.1661, "step": 15001 }, { "epoch": 0.8169188567383417, "grad_norm": 0.5824205643528076, "learning_rate": 0.0001338251674970242, "loss": 12.1429, "step": 15002 }, { "epoch": 0.8169733107349247, "grad_norm": 0.5720606638834883, "learning_rate": 0.00013381686889353273, "loss": 12.2057, "step": 15003 }, { "epoch": 0.8170277647315077, "grad_norm": 0.5880657890604791, "learning_rate": 0.00013380857002707563, "loss": 12.3358, "step": 15004 }, { "epoch": 0.8170822187280907, "grad_norm": 0.6258769532904722, "learning_rate": 0.0001338002708977174, "loss": 12.2655, "step": 15005 }, { "epoch": 0.8171366727246738, "grad_norm": 0.6101374276518876, "learning_rate": 0.00013379197150552262, "loss": 12.2291, "step": 15006 }, { "epoch": 0.8171911267212568, "grad_norm": 0.512928837530879, "learning_rate": 0.0001337836718505558, "loss": 12.1752, "step": 15007 }, { "epoch": 0.8172455807178398, "grad_norm": 0.5762590538623538, "learning_rate": 0.00013377537193288145, "loss": 12.1972, "step": 15008 }, { "epoch": 0.8173000347144228, "grad_norm": 0.5424482566110911, "learning_rate": 0.00013376707175256417, "loss": 12.0931, "step": 15009 }, { "epoch": 0.8173544887110058, "grad_norm": 0.5571591334733318, "learning_rate": 0.00013375877130966847, "loss": 12.2593, "step": 15010 }, { "epoch": 0.8174089427075888, "grad_norm": 0.5990429172530969, "learning_rate": 0.00013375047060425893, "loss": 12.3188, "step": 15011 }, { "epoch": 0.8174633967041719, "grad_norm": 0.5771953018397764, "learning_rate": 0.00013374216963640004, "loss": 12.1905, "step": 15012 }, { "epoch": 0.8175178507007549, "grad_norm": 0.5856012728256729, "learning_rate": 0.0001337338684061564, "loss": 12.2053, "step": 15013 }, { "epoch": 0.8175723046973379, "grad_norm": 0.5262269838088067, "learning_rate": 0.0001337255669135925, "loss": 12.2862, "step": 15014 }, { "epoch": 0.8176267586939209, "grad_norm": 0.553391820209899, "learning_rate": 0.000133717265158773, "loss": 12.2014, "step": 15015 }, { "epoch": 0.8176812126905039, "grad_norm": 0.568890866266896, "learning_rate": 0.00013370896314176235, "loss": 12.2971, "step": 15016 }, { "epoch": 0.817735666687087, "grad_norm": 0.616745452688003, "learning_rate": 0.00013370066086262517, "loss": 12.232, "step": 15017 }, { "epoch": 0.81779012068367, "grad_norm": 0.5888482042030274, "learning_rate": 0.00013369235832142598, "loss": 12.2087, "step": 15018 }, { "epoch": 0.817844574680253, "grad_norm": 0.606699835744316, "learning_rate": 0.00013368405551822935, "loss": 12.2074, "step": 15019 }, { "epoch": 0.817899028676836, "grad_norm": 0.6539207177383145, "learning_rate": 0.00013367575245309987, "loss": 12.1792, "step": 15020 }, { "epoch": 0.8179534826734189, "grad_norm": 0.5783875286795106, "learning_rate": 0.0001336674491261021, "loss": 12.2332, "step": 15021 }, { "epoch": 0.8180079366700019, "grad_norm": 0.6137768782047479, "learning_rate": 0.0001336591455373006, "loss": 12.1272, "step": 15022 }, { "epoch": 0.818062390666585, "grad_norm": 0.6339487854126397, "learning_rate": 0.00013365084168675994, "loss": 12.1809, "step": 15023 }, { "epoch": 0.818116844663168, "grad_norm": 0.57824407345384, "learning_rate": 0.00013364253757454467, "loss": 12.3144, "step": 15024 }, { "epoch": 0.818171298659751, "grad_norm": 0.5894692927948438, "learning_rate": 0.00013363423320071938, "loss": 12.2668, "step": 15025 }, { "epoch": 0.818225752656334, "grad_norm": 0.5856085183522631, "learning_rate": 0.00013362592856534873, "loss": 12.1982, "step": 15026 }, { "epoch": 0.818280206652917, "grad_norm": 0.5610742843623008, "learning_rate": 0.00013361762366849715, "loss": 12.2298, "step": 15027 }, { "epoch": 0.8183346606495, "grad_norm": 0.540835830705468, "learning_rate": 0.00013360931851022931, "loss": 12.19, "step": 15028 }, { "epoch": 0.8183891146460831, "grad_norm": 0.5510492670300929, "learning_rate": 0.00013360101309060974, "loss": 12.3049, "step": 15029 }, { "epoch": 0.8184435686426661, "grad_norm": 0.5300153454770301, "learning_rate": 0.0001335927074097031, "loss": 12.014, "step": 15030 }, { "epoch": 0.8184980226392491, "grad_norm": 0.5576465141288676, "learning_rate": 0.0001335844014675739, "loss": 12.1904, "step": 15031 }, { "epoch": 0.8185524766358321, "grad_norm": 0.5183661440156276, "learning_rate": 0.0001335760952642868, "loss": 12.2148, "step": 15032 }, { "epoch": 0.8186069306324151, "grad_norm": 0.5266003458757883, "learning_rate": 0.00013356778879990632, "loss": 12.1942, "step": 15033 }, { "epoch": 0.8186613846289981, "grad_norm": 0.5466301509703855, "learning_rate": 0.0001335594820744971, "loss": 12.1378, "step": 15034 }, { "epoch": 0.8187158386255812, "grad_norm": 0.5023351188898104, "learning_rate": 0.00013355117508812372, "loss": 12.106, "step": 15035 }, { "epoch": 0.8187702926221642, "grad_norm": 0.660955828481278, "learning_rate": 0.00013354286784085078, "loss": 12.1578, "step": 15036 }, { "epoch": 0.8188247466187472, "grad_norm": 0.5585541576290538, "learning_rate": 0.00013353456033274286, "loss": 12.1009, "step": 15037 }, { "epoch": 0.8188792006153301, "grad_norm": 0.5276413850534565, "learning_rate": 0.0001335262525638646, "loss": 12.2114, "step": 15038 }, { "epoch": 0.8189336546119131, "grad_norm": 0.6796528046414316, "learning_rate": 0.00013351794453428056, "loss": 12.5493, "step": 15039 }, { "epoch": 0.8189881086084961, "grad_norm": 0.5857017757416788, "learning_rate": 0.00013350963624405538, "loss": 12.196, "step": 15040 }, { "epoch": 0.8190425626050792, "grad_norm": 0.5514624615441971, "learning_rate": 0.00013350132769325362, "loss": 12.152, "step": 15041 }, { "epoch": 0.8190970166016622, "grad_norm": 0.5636971843317338, "learning_rate": 0.00013349301888193992, "loss": 12.2295, "step": 15042 }, { "epoch": 0.8191514705982452, "grad_norm": 0.5006540132671965, "learning_rate": 0.0001334847098101789, "loss": 12.1136, "step": 15043 }, { "epoch": 0.8192059245948282, "grad_norm": 0.5326584324047584, "learning_rate": 0.00013347640047803517, "loss": 12.065, "step": 15044 }, { "epoch": 0.8192603785914112, "grad_norm": 0.5372718477066166, "learning_rate": 0.00013346809088557332, "loss": 12.2216, "step": 15045 }, { "epoch": 0.8193148325879942, "grad_norm": 0.5359071036987115, "learning_rate": 0.000133459781032858, "loss": 12.1195, "step": 15046 }, { "epoch": 0.8193692865845773, "grad_norm": 0.5939841514566832, "learning_rate": 0.00013345147091995378, "loss": 12.1763, "step": 15047 }, { "epoch": 0.8194237405811603, "grad_norm": 0.5482117414915757, "learning_rate": 0.00013344316054692533, "loss": 12.176, "step": 15048 }, { "epoch": 0.8194781945777433, "grad_norm": 0.5910514392352234, "learning_rate": 0.0001334348499138373, "loss": 12.2198, "step": 15049 }, { "epoch": 0.8195326485743263, "grad_norm": 0.5148094608300227, "learning_rate": 0.00013342653902075418, "loss": 12.2085, "step": 15050 }, { "epoch": 0.8195871025709093, "grad_norm": 0.6351328799642851, "learning_rate": 0.00013341822786774076, "loss": 12.2853, "step": 15051 }, { "epoch": 0.8196415565674924, "grad_norm": 0.5943046099493197, "learning_rate": 0.00013340991645486157, "loss": 12.1503, "step": 15052 }, { "epoch": 0.8196960105640754, "grad_norm": 0.7340126164156044, "learning_rate": 0.00013340160478218126, "loss": 12.3678, "step": 15053 }, { "epoch": 0.8197504645606584, "grad_norm": 0.5561954548461967, "learning_rate": 0.00013339329284976447, "loss": 12.2121, "step": 15054 }, { "epoch": 0.8198049185572414, "grad_norm": 0.5936471684288838, "learning_rate": 0.00013338498065767587, "loss": 12.2002, "step": 15055 }, { "epoch": 0.8198593725538244, "grad_norm": 0.5767221429828392, "learning_rate": 0.00013337666820598001, "loss": 12.2062, "step": 15056 }, { "epoch": 0.8199138265504073, "grad_norm": 0.5520368662964245, "learning_rate": 0.0001333683554947416, "loss": 12.2063, "step": 15057 }, { "epoch": 0.8199682805469904, "grad_norm": 0.5753584754006462, "learning_rate": 0.00013336004252402527, "loss": 12.1897, "step": 15058 }, { "epoch": 0.8200227345435734, "grad_norm": 0.533969251635747, "learning_rate": 0.00013335172929389565, "loss": 12.1252, "step": 15059 }, { "epoch": 0.8200771885401564, "grad_norm": 0.5427429013322442, "learning_rate": 0.0001333434158044174, "loss": 12.1885, "step": 15060 }, { "epoch": 0.8201316425367394, "grad_norm": 0.4952467730062113, "learning_rate": 0.00013333510205565516, "loss": 12.0779, "step": 15061 }, { "epoch": 0.8201860965333224, "grad_norm": 0.5345066578384771, "learning_rate": 0.00013332678804767358, "loss": 12.2012, "step": 15062 }, { "epoch": 0.8202405505299054, "grad_norm": 0.552949495917493, "learning_rate": 0.00013331847378053726, "loss": 12.1347, "step": 15063 }, { "epoch": 0.8202950045264885, "grad_norm": 0.5550958159476888, "learning_rate": 0.00013331015925431095, "loss": 12.2529, "step": 15064 }, { "epoch": 0.8203494585230715, "grad_norm": 0.5544262032236434, "learning_rate": 0.00013330184446905922, "loss": 12.0939, "step": 15065 }, { "epoch": 0.8204039125196545, "grad_norm": 0.5958917087716364, "learning_rate": 0.00013329352942484678, "loss": 12.1504, "step": 15066 }, { "epoch": 0.8204583665162375, "grad_norm": 0.550229500277586, "learning_rate": 0.0001332852141217383, "loss": 12.101, "step": 15067 }, { "epoch": 0.8205128205128205, "grad_norm": 0.5225662194495485, "learning_rate": 0.00013327689855979836, "loss": 12.1023, "step": 15068 }, { "epoch": 0.8205672745094035, "grad_norm": 0.530911218491028, "learning_rate": 0.00013326858273909168, "loss": 12.2245, "step": 15069 }, { "epoch": 0.8206217285059866, "grad_norm": 0.5554527417021022, "learning_rate": 0.00013326026665968295, "loss": 12.1633, "step": 15070 }, { "epoch": 0.8206761825025696, "grad_norm": 0.6875066978735751, "learning_rate": 0.00013325195032163682, "loss": 12.2125, "step": 15071 }, { "epoch": 0.8207306364991526, "grad_norm": 0.6042554096651395, "learning_rate": 0.00013324363372501795, "loss": 12.2184, "step": 15072 }, { "epoch": 0.8207850904957356, "grad_norm": 0.5588330743067605, "learning_rate": 0.00013323531686989097, "loss": 12.1346, "step": 15073 }, { "epoch": 0.8208395444923186, "grad_norm": 0.6216993891491206, "learning_rate": 0.0001332269997563206, "loss": 12.2326, "step": 15074 }, { "epoch": 0.8208939984889015, "grad_norm": 0.5679478272925854, "learning_rate": 0.00013321868238437153, "loss": 12.2848, "step": 15075 }, { "epoch": 0.8209484524854846, "grad_norm": 0.6401907090556928, "learning_rate": 0.0001332103647541084, "loss": 12.2149, "step": 15076 }, { "epoch": 0.8210029064820676, "grad_norm": 0.5509524927768689, "learning_rate": 0.00013320204686559592, "loss": 12.1914, "step": 15077 }, { "epoch": 0.8210573604786506, "grad_norm": 0.5333250539046567, "learning_rate": 0.00013319372871889874, "loss": 12.1979, "step": 15078 }, { "epoch": 0.8211118144752336, "grad_norm": 0.6510537470053677, "learning_rate": 0.00013318541031408156, "loss": 12.1866, "step": 15079 }, { "epoch": 0.8211662684718166, "grad_norm": 0.5456246146264664, "learning_rate": 0.00013317709165120903, "loss": 12.1768, "step": 15080 }, { "epoch": 0.8212207224683996, "grad_norm": 0.60234366708656, "learning_rate": 0.0001331687727303459, "loss": 12.173, "step": 15081 }, { "epoch": 0.8212751764649827, "grad_norm": 0.5435572532492777, "learning_rate": 0.00013316045355155689, "loss": 12.2481, "step": 15082 }, { "epoch": 0.8213296304615657, "grad_norm": 0.5782641007791894, "learning_rate": 0.00013315213411490656, "loss": 12.126, "step": 15083 }, { "epoch": 0.8213840844581487, "grad_norm": 0.5774753450730651, "learning_rate": 0.0001331438144204597, "loss": 12.0706, "step": 15084 }, { "epoch": 0.8214385384547317, "grad_norm": 0.6488453649299252, "learning_rate": 0.00013313549446828096, "loss": 12.1743, "step": 15085 }, { "epoch": 0.8214929924513147, "grad_norm": 0.5730644068548515, "learning_rate": 0.00013312717425843508, "loss": 12.218, "step": 15086 }, { "epoch": 0.8215474464478978, "grad_norm": 0.5404712651524503, "learning_rate": 0.00013311885379098674, "loss": 12.1142, "step": 15087 }, { "epoch": 0.8216019004444808, "grad_norm": 0.5496206771070863, "learning_rate": 0.00013311053306600066, "loss": 12.2041, "step": 15088 }, { "epoch": 0.8216563544410638, "grad_norm": 0.5862083753367804, "learning_rate": 0.00013310221208354147, "loss": 12.2, "step": 15089 }, { "epoch": 0.8217108084376468, "grad_norm": 0.6033512458007323, "learning_rate": 0.00013309389084367396, "loss": 12.2182, "step": 15090 }, { "epoch": 0.8217652624342298, "grad_norm": 0.576839526949148, "learning_rate": 0.00013308556934646276, "loss": 12.3081, "step": 15091 }, { "epoch": 0.8218197164308128, "grad_norm": 0.5115106817823334, "learning_rate": 0.0001330772475919727, "loss": 12.1187, "step": 15092 }, { "epoch": 0.8218741704273959, "grad_norm": 0.6423138685815555, "learning_rate": 0.00013306892558026837, "loss": 12.3313, "step": 15093 }, { "epoch": 0.8219286244239788, "grad_norm": 0.5606849061577902, "learning_rate": 0.00013306060331141456, "loss": 12.1125, "step": 15094 }, { "epoch": 0.8219830784205618, "grad_norm": 0.5475736926996045, "learning_rate": 0.0001330522807854759, "loss": 12.2019, "step": 15095 }, { "epoch": 0.8220375324171448, "grad_norm": 0.5095605580907949, "learning_rate": 0.00013304395800251722, "loss": 12.1341, "step": 15096 }, { "epoch": 0.8220919864137278, "grad_norm": 0.5374397129569775, "learning_rate": 0.00013303563496260313, "loss": 12.304, "step": 15097 }, { "epoch": 0.8221464404103108, "grad_norm": 0.5965306221315761, "learning_rate": 0.00013302731166579842, "loss": 12.108, "step": 15098 }, { "epoch": 0.8222008944068939, "grad_norm": 0.6075627779084403, "learning_rate": 0.00013301898811216782, "loss": 12.2044, "step": 15099 }, { "epoch": 0.8222553484034769, "grad_norm": 0.5798476561085326, "learning_rate": 0.000133010664301776, "loss": 12.1951, "step": 15100 }, { "epoch": 0.8223098024000599, "grad_norm": 0.5656120162879976, "learning_rate": 0.00013300234023468774, "loss": 12.2176, "step": 15101 }, { "epoch": 0.8223642563966429, "grad_norm": 0.5392765374070314, "learning_rate": 0.00013299401591096774, "loss": 12.3709, "step": 15102 }, { "epoch": 0.8224187103932259, "grad_norm": 0.5474554976638221, "learning_rate": 0.00013298569133068073, "loss": 12.1893, "step": 15103 }, { "epoch": 0.8224731643898089, "grad_norm": 0.5922941430634817, "learning_rate": 0.00013297736649389147, "loss": 12.2515, "step": 15104 }, { "epoch": 0.822527618386392, "grad_norm": 0.6390417808924062, "learning_rate": 0.00013296904140066467, "loss": 12.2673, "step": 15105 }, { "epoch": 0.822582072382975, "grad_norm": 0.5320100230544484, "learning_rate": 0.00013296071605106507, "loss": 12.1982, "step": 15106 }, { "epoch": 0.822636526379558, "grad_norm": 0.574697692543034, "learning_rate": 0.00013295239044515742, "loss": 12.2736, "step": 15107 }, { "epoch": 0.822690980376141, "grad_norm": 0.5725886367874117, "learning_rate": 0.00013294406458300644, "loss": 12.1297, "step": 15108 }, { "epoch": 0.822745434372724, "grad_norm": 0.6607264694600522, "learning_rate": 0.0001329357384646769, "loss": 12.2569, "step": 15109 }, { "epoch": 0.822799888369307, "grad_norm": 0.5687599519704036, "learning_rate": 0.00013292741209023354, "loss": 12.2327, "step": 15110 }, { "epoch": 0.8228543423658901, "grad_norm": 0.5317134091370369, "learning_rate": 0.0001329190854597411, "loss": 12.1822, "step": 15111 }, { "epoch": 0.822908796362473, "grad_norm": 0.5366673126182483, "learning_rate": 0.00013291075857326434, "loss": 12.1526, "step": 15112 }, { "epoch": 0.822963250359056, "grad_norm": 0.6491831756234533, "learning_rate": 0.00013290243143086798, "loss": 12.2972, "step": 15113 }, { "epoch": 0.823017704355639, "grad_norm": 0.5172806571766189, "learning_rate": 0.00013289410403261682, "loss": 12.0933, "step": 15114 }, { "epoch": 0.823072158352222, "grad_norm": 0.5683596383075902, "learning_rate": 0.00013288577637857556, "loss": 12.2948, "step": 15115 }, { "epoch": 0.823126612348805, "grad_norm": 0.5875142686688732, "learning_rate": 0.00013287744846880904, "loss": 12.2356, "step": 15116 }, { "epoch": 0.8231810663453881, "grad_norm": 0.570076875886587, "learning_rate": 0.00013286912030338193, "loss": 12.3245, "step": 15117 }, { "epoch": 0.8232355203419711, "grad_norm": 0.6519852031719279, "learning_rate": 0.000132860791882359, "loss": 12.1499, "step": 15118 }, { "epoch": 0.8232899743385541, "grad_norm": 0.5983918322104361, "learning_rate": 0.0001328524632058051, "loss": 12.1896, "step": 15119 }, { "epoch": 0.8233444283351371, "grad_norm": 0.5932760518870501, "learning_rate": 0.0001328441342737849, "loss": 12.198, "step": 15120 }, { "epoch": 0.8233988823317201, "grad_norm": 0.59185708375135, "learning_rate": 0.00013283580508636323, "loss": 12.1776, "step": 15121 }, { "epoch": 0.8234533363283032, "grad_norm": 0.5858903392430636, "learning_rate": 0.0001328274756436048, "loss": 12.1823, "step": 15122 }, { "epoch": 0.8235077903248862, "grad_norm": 0.5887880925684659, "learning_rate": 0.00013281914594557442, "loss": 12.0918, "step": 15123 }, { "epoch": 0.8235622443214692, "grad_norm": 0.540027316882084, "learning_rate": 0.00013281081599233688, "loss": 12.1714, "step": 15124 }, { "epoch": 0.8236166983180522, "grad_norm": 0.5667513931707916, "learning_rate": 0.0001328024857839569, "loss": 12.1175, "step": 15125 }, { "epoch": 0.8236711523146352, "grad_norm": 0.6075293531496816, "learning_rate": 0.00013279415532049932, "loss": 12.1341, "step": 15126 }, { "epoch": 0.8237256063112182, "grad_norm": 0.6328301413775694, "learning_rate": 0.0001327858246020289, "loss": 12.2516, "step": 15127 }, { "epoch": 0.8237800603078013, "grad_norm": 0.5389076487138665, "learning_rate": 0.0001327774936286104, "loss": 12.3203, "step": 15128 }, { "epoch": 0.8238345143043843, "grad_norm": 0.5732364690525088, "learning_rate": 0.00013276916240030856, "loss": 12.2238, "step": 15129 }, { "epoch": 0.8238889683009673, "grad_norm": 0.6314953539941783, "learning_rate": 0.00013276083091718827, "loss": 12.236, "step": 15130 }, { "epoch": 0.8239434222975502, "grad_norm": 0.5614318534426168, "learning_rate": 0.00013275249917931424, "loss": 12.0909, "step": 15131 }, { "epoch": 0.8239978762941332, "grad_norm": 0.5909689994430131, "learning_rate": 0.00013274416718675133, "loss": 12.1413, "step": 15132 }, { "epoch": 0.8240523302907162, "grad_norm": 0.634909287782436, "learning_rate": 0.00013273583493956425, "loss": 12.3205, "step": 15133 }, { "epoch": 0.8241067842872993, "grad_norm": 0.5915683255470073, "learning_rate": 0.00013272750243781782, "loss": 12.0764, "step": 15134 }, { "epoch": 0.8241612382838823, "grad_norm": 0.6220220305808242, "learning_rate": 0.00013271916968157684, "loss": 12.2925, "step": 15135 }, { "epoch": 0.8242156922804653, "grad_norm": 0.5997585424873934, "learning_rate": 0.00013271083667090614, "loss": 12.1521, "step": 15136 }, { "epoch": 0.8242701462770483, "grad_norm": 0.5278769519062395, "learning_rate": 0.00013270250340587046, "loss": 12.1131, "step": 15137 }, { "epoch": 0.8243246002736313, "grad_norm": 0.524995503437437, "learning_rate": 0.00013269416988653468, "loss": 12.2127, "step": 15138 }, { "epoch": 0.8243790542702143, "grad_norm": 0.6492659430294754, "learning_rate": 0.0001326858361129635, "loss": 12.2993, "step": 15139 }, { "epoch": 0.8244335082667974, "grad_norm": 0.5796193885415427, "learning_rate": 0.00013267750208522175, "loss": 12.2585, "step": 15140 }, { "epoch": 0.8244879622633804, "grad_norm": 0.5702422691628836, "learning_rate": 0.00013266916780337433, "loss": 12.1578, "step": 15141 }, { "epoch": 0.8245424162599634, "grad_norm": 0.6843938991700117, "learning_rate": 0.00013266083326748596, "loss": 12.4301, "step": 15142 }, { "epoch": 0.8245968702565464, "grad_norm": 0.6428570758362845, "learning_rate": 0.00013265249847762146, "loss": 12.1424, "step": 15143 }, { "epoch": 0.8246513242531294, "grad_norm": 0.5287730976454701, "learning_rate": 0.00013264416343384568, "loss": 12.2458, "step": 15144 }, { "epoch": 0.8247057782497124, "grad_norm": 0.5955981955761246, "learning_rate": 0.0001326358281362234, "loss": 12.1539, "step": 15145 }, { "epoch": 0.8247602322462955, "grad_norm": 0.5641366752885426, "learning_rate": 0.0001326274925848194, "loss": 12.1635, "step": 15146 }, { "epoch": 0.8248146862428785, "grad_norm": 0.5511840238603661, "learning_rate": 0.00013261915677969862, "loss": 12.2499, "step": 15147 }, { "epoch": 0.8248691402394615, "grad_norm": 0.5706898292626512, "learning_rate": 0.00013261082072092578, "loss": 12.1368, "step": 15148 }, { "epoch": 0.8249235942360444, "grad_norm": 0.6157811933476967, "learning_rate": 0.00013260248440856572, "loss": 12.2726, "step": 15149 }, { "epoch": 0.8249780482326274, "grad_norm": 0.5371187183576617, "learning_rate": 0.00013259414784268328, "loss": 12.159, "step": 15150 }, { "epoch": 0.8250325022292105, "grad_norm": 0.5935211585790439, "learning_rate": 0.00013258581102334327, "loss": 12.0911, "step": 15151 }, { "epoch": 0.8250869562257935, "grad_norm": 0.5473734635512906, "learning_rate": 0.00013257747395061054, "loss": 12.2132, "step": 15152 }, { "epoch": 0.8251414102223765, "grad_norm": 0.5040749155652381, "learning_rate": 0.00013256913662454994, "loss": 12.1254, "step": 15153 }, { "epoch": 0.8251958642189595, "grad_norm": 0.5500514667341897, "learning_rate": 0.00013256079904522622, "loss": 12.2533, "step": 15154 }, { "epoch": 0.8252503182155425, "grad_norm": 0.5382832797192473, "learning_rate": 0.00013255246121270429, "loss": 12.1713, "step": 15155 }, { "epoch": 0.8253047722121255, "grad_norm": 0.5520042361371003, "learning_rate": 0.00013254412312704896, "loss": 12.1337, "step": 15156 }, { "epoch": 0.8253592262087086, "grad_norm": 0.569443223317474, "learning_rate": 0.00013253578478832507, "loss": 12.2426, "step": 15157 }, { "epoch": 0.8254136802052916, "grad_norm": 0.6446110993756711, "learning_rate": 0.00013252744619659745, "loss": 12.1518, "step": 15158 }, { "epoch": 0.8254681342018746, "grad_norm": 0.6038693948816372, "learning_rate": 0.00013251910735193097, "loss": 12.2149, "step": 15159 }, { "epoch": 0.8255225881984576, "grad_norm": 0.6525901872902011, "learning_rate": 0.00013251076825439043, "loss": 12.3306, "step": 15160 }, { "epoch": 0.8255770421950406, "grad_norm": 0.6481041452764463, "learning_rate": 0.00013250242890404076, "loss": 12.283, "step": 15161 }, { "epoch": 0.8256314961916236, "grad_norm": 0.5856391893384809, "learning_rate": 0.0001324940893009467, "loss": 12.0143, "step": 15162 }, { "epoch": 0.8256859501882067, "grad_norm": 0.5459209242515835, "learning_rate": 0.00013248574944517316, "loss": 12.1153, "step": 15163 }, { "epoch": 0.8257404041847897, "grad_norm": 0.6465502579114393, "learning_rate": 0.00013247740933678502, "loss": 12.1841, "step": 15164 }, { "epoch": 0.8257948581813727, "grad_norm": 0.7028110202838673, "learning_rate": 0.00013246906897584706, "loss": 12.1794, "step": 15165 }, { "epoch": 0.8258493121779557, "grad_norm": 0.5582118795351031, "learning_rate": 0.00013246072836242418, "loss": 12.1924, "step": 15166 }, { "epoch": 0.8259037661745386, "grad_norm": 0.6080292099406148, "learning_rate": 0.00013245238749658123, "loss": 12.2244, "step": 15167 }, { "epoch": 0.8259582201711216, "grad_norm": 0.6377228813808977, "learning_rate": 0.0001324440463783831, "loss": 12.0584, "step": 15168 }, { "epoch": 0.8260126741677047, "grad_norm": 0.5637368797575435, "learning_rate": 0.0001324357050078946, "loss": 12.2439, "step": 15169 }, { "epoch": 0.8260671281642877, "grad_norm": 0.5521350397117724, "learning_rate": 0.00013242736338518063, "loss": 12.1303, "step": 15170 }, { "epoch": 0.8261215821608707, "grad_norm": 0.5868245001879185, "learning_rate": 0.00013241902151030604, "loss": 12.1829, "step": 15171 }, { "epoch": 0.8261760361574537, "grad_norm": 0.5677736365759718, "learning_rate": 0.00013241067938333568, "loss": 12.1276, "step": 15172 }, { "epoch": 0.8262304901540367, "grad_norm": 0.5734079441240211, "learning_rate": 0.00013240233700433448, "loss": 12.2542, "step": 15173 }, { "epoch": 0.8262849441506197, "grad_norm": 0.5641702940489937, "learning_rate": 0.00013239399437336725, "loss": 12.3364, "step": 15174 }, { "epoch": 0.8263393981472028, "grad_norm": 0.5934181207445682, "learning_rate": 0.00013238565149049892, "loss": 12.2717, "step": 15175 }, { "epoch": 0.8263938521437858, "grad_norm": 0.5179909032091367, "learning_rate": 0.0001323773083557943, "loss": 12.2665, "step": 15176 }, { "epoch": 0.8264483061403688, "grad_norm": 0.5937253578214855, "learning_rate": 0.00013236896496931832, "loss": 12.3528, "step": 15177 }, { "epoch": 0.8265027601369518, "grad_norm": 0.502476889232368, "learning_rate": 0.0001323606213311358, "loss": 12.1983, "step": 15178 }, { "epoch": 0.8265572141335348, "grad_norm": 0.531901728632106, "learning_rate": 0.0001323522774413117, "loss": 11.9762, "step": 15179 }, { "epoch": 0.8266116681301178, "grad_norm": 0.5294521390225176, "learning_rate": 0.00013234393329991084, "loss": 12.1733, "step": 15180 }, { "epoch": 0.8266661221267009, "grad_norm": 0.5465460122389068, "learning_rate": 0.00013233558890699814, "loss": 12.0676, "step": 15181 }, { "epoch": 0.8267205761232839, "grad_norm": 0.549624763802986, "learning_rate": 0.00013232724426263853, "loss": 12.2706, "step": 15182 }, { "epoch": 0.8267750301198669, "grad_norm": 0.5855428195237342, "learning_rate": 0.00013231889936689677, "loss": 12.2152, "step": 15183 }, { "epoch": 0.8268294841164499, "grad_norm": 0.48878624781299707, "learning_rate": 0.00013231055421983787, "loss": 12.1706, "step": 15184 }, { "epoch": 0.8268839381130328, "grad_norm": 0.6439542590151076, "learning_rate": 0.00013230220882152666, "loss": 12.2569, "step": 15185 }, { "epoch": 0.826938392109616, "grad_norm": 0.5333101585897986, "learning_rate": 0.00013229386317202806, "loss": 12.0302, "step": 15186 }, { "epoch": 0.8269928461061989, "grad_norm": 0.6082116074025128, "learning_rate": 0.000132285517271407, "loss": 12.2354, "step": 15187 }, { "epoch": 0.8270473001027819, "grad_norm": 0.5978135139139774, "learning_rate": 0.00013227717111972834, "loss": 12.2528, "step": 15188 }, { "epoch": 0.8271017540993649, "grad_norm": 0.5878083936155488, "learning_rate": 0.0001322688247170569, "loss": 12.3464, "step": 15189 }, { "epoch": 0.8271562080959479, "grad_norm": 0.5858863888880815, "learning_rate": 0.00013226047806345773, "loss": 12.3016, "step": 15190 }, { "epoch": 0.8272106620925309, "grad_norm": 0.589348190036878, "learning_rate": 0.00013225213115899569, "loss": 12.1782, "step": 15191 }, { "epoch": 0.827265116089114, "grad_norm": 0.5216357234796167, "learning_rate": 0.00013224378400373564, "loss": 12.1173, "step": 15192 }, { "epoch": 0.827319570085697, "grad_norm": 0.5453169370096311, "learning_rate": 0.00013223543659774255, "loss": 12.1557, "step": 15193 }, { "epoch": 0.82737402408228, "grad_norm": 1.8532061690992983, "learning_rate": 0.00013222708894108126, "loss": 12.2979, "step": 15194 }, { "epoch": 0.827428478078863, "grad_norm": 0.5981247067276574, "learning_rate": 0.00013221874103381672, "loss": 12.1334, "step": 15195 }, { "epoch": 0.827482932075446, "grad_norm": 0.5542933873060595, "learning_rate": 0.00013221039287601388, "loss": 12.1865, "step": 15196 }, { "epoch": 0.827537386072029, "grad_norm": 0.5801329723495282, "learning_rate": 0.00013220204446773762, "loss": 12.2615, "step": 15197 }, { "epoch": 0.8275918400686121, "grad_norm": 0.7613352726613339, "learning_rate": 0.00013219369580905289, "loss": 12.1892, "step": 15198 }, { "epoch": 0.8276462940651951, "grad_norm": 0.6650532458101823, "learning_rate": 0.00013218534690002452, "loss": 12.2418, "step": 15199 }, { "epoch": 0.8277007480617781, "grad_norm": 0.6317919173574159, "learning_rate": 0.0001321769977407175, "loss": 12.1991, "step": 15200 }, { "epoch": 0.8277552020583611, "grad_norm": 0.5618309537187812, "learning_rate": 0.00013216864833119675, "loss": 12.1656, "step": 15201 }, { "epoch": 0.827809656054944, "grad_norm": 0.6232714602395312, "learning_rate": 0.00013216029867152724, "loss": 12.1143, "step": 15202 }, { "epoch": 0.827864110051527, "grad_norm": 0.5737033519336524, "learning_rate": 0.00013215194876177385, "loss": 12.1755, "step": 15203 }, { "epoch": 0.8279185640481102, "grad_norm": 0.70282100669915, "learning_rate": 0.00013214359860200148, "loss": 12.1138, "step": 15204 }, { "epoch": 0.8279730180446931, "grad_norm": 0.757833350297887, "learning_rate": 0.00013213524819227513, "loss": 12.187, "step": 15205 }, { "epoch": 0.8280274720412761, "grad_norm": 0.5789596533234671, "learning_rate": 0.00013212689753265965, "loss": 12.2284, "step": 15206 }, { "epoch": 0.8280819260378591, "grad_norm": 0.641352209545092, "learning_rate": 0.00013211854662322007, "loss": 12.3184, "step": 15207 }, { "epoch": 0.8281363800344421, "grad_norm": 0.5660627888936081, "learning_rate": 0.00013211019546402128, "loss": 12.1388, "step": 15208 }, { "epoch": 0.8281908340310251, "grad_norm": 0.5537899501912319, "learning_rate": 0.0001321018440551282, "loss": 12.2371, "step": 15209 }, { "epoch": 0.8282452880276082, "grad_norm": 0.7017780050980083, "learning_rate": 0.0001320934923966058, "loss": 12.2134, "step": 15210 }, { "epoch": 0.8282997420241912, "grad_norm": 0.629825583850304, "learning_rate": 0.00013208514048851903, "loss": 12.1223, "step": 15211 }, { "epoch": 0.8283541960207742, "grad_norm": 0.5852813246540232, "learning_rate": 0.00013207678833093285, "loss": 12.193, "step": 15212 }, { "epoch": 0.8284086500173572, "grad_norm": 0.654363836888728, "learning_rate": 0.00013206843592391217, "loss": 12.2933, "step": 15213 }, { "epoch": 0.8284631040139402, "grad_norm": 0.6444913765173723, "learning_rate": 0.00013206008326752198, "loss": 12.1784, "step": 15214 }, { "epoch": 0.8285175580105232, "grad_norm": 0.7632623438059727, "learning_rate": 0.00013205173036182717, "loss": 12.2421, "step": 15215 }, { "epoch": 0.8285720120071063, "grad_norm": 0.6093109181145168, "learning_rate": 0.00013204337720689274, "loss": 12.2275, "step": 15216 }, { "epoch": 0.8286264660036893, "grad_norm": 0.5887353236091951, "learning_rate": 0.0001320350238027836, "loss": 12.2388, "step": 15217 }, { "epoch": 0.8286809200002723, "grad_norm": 0.5890757678442023, "learning_rate": 0.00013202667014956481, "loss": 12.2197, "step": 15218 }, { "epoch": 0.8287353739968553, "grad_norm": 0.8375274103573752, "learning_rate": 0.0001320183162473012, "loss": 12.2183, "step": 15219 }, { "epoch": 0.8287898279934383, "grad_norm": 0.6257245685057095, "learning_rate": 0.0001320099620960578, "loss": 12.1336, "step": 15220 }, { "epoch": 0.8288442819900214, "grad_norm": 0.5952583992488294, "learning_rate": 0.00013200160769589962, "loss": 12.1276, "step": 15221 }, { "epoch": 0.8288987359866044, "grad_norm": 0.6480649554934045, "learning_rate": 0.00013199325304689153, "loss": 12.1835, "step": 15222 }, { "epoch": 0.8289531899831873, "grad_norm": 0.6080343846993349, "learning_rate": 0.00013198489814909855, "loss": 12.1816, "step": 15223 }, { "epoch": 0.8290076439797703, "grad_norm": 0.5783908524821597, "learning_rate": 0.00013197654300258564, "loss": 12.3538, "step": 15224 }, { "epoch": 0.8290620979763533, "grad_norm": 0.6053366205027224, "learning_rate": 0.0001319681876074178, "loss": 12.1792, "step": 15225 }, { "epoch": 0.8291165519729363, "grad_norm": 0.6561232963435246, "learning_rate": 0.0001319598319636599, "loss": 12.3573, "step": 15226 }, { "epoch": 0.8291710059695194, "grad_norm": 0.5575796826423648, "learning_rate": 0.00013195147607137703, "loss": 12.3334, "step": 15227 }, { "epoch": 0.8292254599661024, "grad_norm": 0.613532357704036, "learning_rate": 0.00013194311993063412, "loss": 12.1937, "step": 15228 }, { "epoch": 0.8292799139626854, "grad_norm": 0.6084414020114763, "learning_rate": 0.00013193476354149617, "loss": 12.213, "step": 15229 }, { "epoch": 0.8293343679592684, "grad_norm": 0.5463433763684223, "learning_rate": 0.00013192640690402811, "loss": 12.2368, "step": 15230 }, { "epoch": 0.8293888219558514, "grad_norm": 0.5157030945118751, "learning_rate": 0.00013191805001829495, "loss": 12.1926, "step": 15231 }, { "epoch": 0.8294432759524344, "grad_norm": 0.6152722825800643, "learning_rate": 0.00013190969288436172, "loss": 12.4055, "step": 15232 }, { "epoch": 0.8294977299490175, "grad_norm": 0.6345939630072156, "learning_rate": 0.00013190133550229334, "loss": 12.1898, "step": 15233 }, { "epoch": 0.8295521839456005, "grad_norm": 0.5770984625351699, "learning_rate": 0.00013189297787215485, "loss": 12.2821, "step": 15234 }, { "epoch": 0.8296066379421835, "grad_norm": 0.5530313175997955, "learning_rate": 0.00013188461999401118, "loss": 12.2363, "step": 15235 }, { "epoch": 0.8296610919387665, "grad_norm": 0.5676731684196313, "learning_rate": 0.0001318762618679274, "loss": 12.1333, "step": 15236 }, { "epoch": 0.8297155459353495, "grad_norm": 0.5897430392692317, "learning_rate": 0.00013186790349396842, "loss": 12.2853, "step": 15237 }, { "epoch": 0.8297699999319325, "grad_norm": 0.6143112745421574, "learning_rate": 0.0001318595448721993, "loss": 12.1977, "step": 15238 }, { "epoch": 0.8298244539285156, "grad_norm": 0.5961912593192616, "learning_rate": 0.000131851186002685, "loss": 12.2267, "step": 15239 }, { "epoch": 0.8298789079250986, "grad_norm": 0.5864432196548776, "learning_rate": 0.00013184282688549057, "loss": 12.0463, "step": 15240 }, { "epoch": 0.8299333619216815, "grad_norm": 0.550662140713574, "learning_rate": 0.00013183446752068094, "loss": 12.1639, "step": 15241 }, { "epoch": 0.8299878159182645, "grad_norm": 0.5855212190237381, "learning_rate": 0.00013182610790832118, "loss": 12.1433, "step": 15242 }, { "epoch": 0.8300422699148475, "grad_norm": 0.5995853240988076, "learning_rate": 0.00013181774804847627, "loss": 12.1656, "step": 15243 }, { "epoch": 0.8300967239114305, "grad_norm": 0.5616551083670744, "learning_rate": 0.00013180938794121118, "loss": 12.2573, "step": 15244 }, { "epoch": 0.8301511779080136, "grad_norm": 0.7404378825252865, "learning_rate": 0.000131801027586591, "loss": 12.3811, "step": 15245 }, { "epoch": 0.8302056319045966, "grad_norm": 0.6166144410712183, "learning_rate": 0.00013179266698468064, "loss": 12.3776, "step": 15246 }, { "epoch": 0.8302600859011796, "grad_norm": 0.5533530192204831, "learning_rate": 0.00013178430613554522, "loss": 12.1586, "step": 15247 }, { "epoch": 0.8303145398977626, "grad_norm": 0.5844910546885216, "learning_rate": 0.0001317759450392497, "loss": 12.3598, "step": 15248 }, { "epoch": 0.8303689938943456, "grad_norm": 0.6002406804984849, "learning_rate": 0.0001317675836958591, "loss": 12.2845, "step": 15249 }, { "epoch": 0.8304234478909286, "grad_norm": 0.4980976925076202, "learning_rate": 0.0001317592221054384, "loss": 12.1897, "step": 15250 }, { "epoch": 0.8304779018875117, "grad_norm": 0.6031308926270025, "learning_rate": 0.0001317508602680527, "loss": 12.2017, "step": 15251 }, { "epoch": 0.8305323558840947, "grad_norm": 0.6147733627442982, "learning_rate": 0.00013174249818376699, "loss": 12.2611, "step": 15252 }, { "epoch": 0.8305868098806777, "grad_norm": 0.5800706246651661, "learning_rate": 0.00013173413585264632, "loss": 12.3297, "step": 15253 }, { "epoch": 0.8306412638772607, "grad_norm": 0.5807351910438452, "learning_rate": 0.00013172577327475563, "loss": 12.3804, "step": 15254 }, { "epoch": 0.8306957178738437, "grad_norm": 0.617417880066591, "learning_rate": 0.00013171741045016002, "loss": 12.1884, "step": 15255 }, { "epoch": 0.8307501718704268, "grad_norm": 0.6256639537422343, "learning_rate": 0.00013170904737892452, "loss": 12.2294, "step": 15256 }, { "epoch": 0.8308046258670098, "grad_norm": 0.5417802792206997, "learning_rate": 0.00013170068406111413, "loss": 12.2778, "step": 15257 }, { "epoch": 0.8308590798635928, "grad_norm": 0.6743405301489448, "learning_rate": 0.00013169232049679394, "loss": 12.1945, "step": 15258 }, { "epoch": 0.8309135338601757, "grad_norm": 0.637377462288876, "learning_rate": 0.00013168395668602893, "loss": 12.2543, "step": 15259 }, { "epoch": 0.8309679878567587, "grad_norm": 0.5322625723022238, "learning_rate": 0.00013167559262888413, "loss": 12.1928, "step": 15260 }, { "epoch": 0.8310224418533417, "grad_norm": 0.6171535140221914, "learning_rate": 0.00013166722832542465, "loss": 12.2262, "step": 15261 }, { "epoch": 0.8310768958499248, "grad_norm": 0.8597285154023284, "learning_rate": 0.00013165886377571547, "loss": 12.2184, "step": 15262 }, { "epoch": 0.8311313498465078, "grad_norm": 0.5779101303293248, "learning_rate": 0.00013165049897982168, "loss": 12.2524, "step": 15263 }, { "epoch": 0.8311858038430908, "grad_norm": 0.5739459923844983, "learning_rate": 0.0001316421339378083, "loss": 12.2599, "step": 15264 }, { "epoch": 0.8312402578396738, "grad_norm": 0.6661241609743747, "learning_rate": 0.00013163376864974038, "loss": 12.2557, "step": 15265 }, { "epoch": 0.8312947118362568, "grad_norm": 0.5379554045635468, "learning_rate": 0.00013162540311568294, "loss": 12.1778, "step": 15266 }, { "epoch": 0.8313491658328398, "grad_norm": 0.5524540929241767, "learning_rate": 0.00013161703733570107, "loss": 12.1688, "step": 15267 }, { "epoch": 0.8314036198294229, "grad_norm": 0.6297163416154375, "learning_rate": 0.00013160867130985985, "loss": 12.2242, "step": 15268 }, { "epoch": 0.8314580738260059, "grad_norm": 0.9573883676541012, "learning_rate": 0.00013160030503822428, "loss": 12.1995, "step": 15269 }, { "epoch": 0.8315125278225889, "grad_norm": 0.6181746204171851, "learning_rate": 0.00013159193852085944, "loss": 12.1728, "step": 15270 }, { "epoch": 0.8315669818191719, "grad_norm": 0.5946896226480637, "learning_rate": 0.00013158357175783038, "loss": 12.051, "step": 15271 }, { "epoch": 0.8316214358157549, "grad_norm": 0.5841329282239158, "learning_rate": 0.00013157520474920214, "loss": 12.1945, "step": 15272 }, { "epoch": 0.8316758898123379, "grad_norm": 0.6162149903340335, "learning_rate": 0.00013156683749503988, "loss": 12.186, "step": 15273 }, { "epoch": 0.831730343808921, "grad_norm": 0.5633195206483552, "learning_rate": 0.00013155846999540858, "loss": 12.1581, "step": 15274 }, { "epoch": 0.831784797805504, "grad_norm": 0.5604639867880522, "learning_rate": 0.0001315501022503733, "loss": 12.0926, "step": 15275 }, { "epoch": 0.831839251802087, "grad_norm": 0.5308652778688698, "learning_rate": 0.00013154173425999915, "loss": 12.0024, "step": 15276 }, { "epoch": 0.83189370579867, "grad_norm": 0.5447564444093732, "learning_rate": 0.00013153336602435113, "loss": 12.1712, "step": 15277 }, { "epoch": 0.8319481597952529, "grad_norm": 0.5386724699747754, "learning_rate": 0.00013152499754349445, "loss": 11.9927, "step": 15278 }, { "epoch": 0.8320026137918359, "grad_norm": 0.5797490134641445, "learning_rate": 0.00013151662881749407, "loss": 12.2172, "step": 15279 }, { "epoch": 0.832057067788419, "grad_norm": 0.626816413248062, "learning_rate": 0.0001315082598464151, "loss": 12.1729, "step": 15280 }, { "epoch": 0.832111521785002, "grad_norm": 0.5420185299027364, "learning_rate": 0.0001314998906303226, "loss": 12.1501, "step": 15281 }, { "epoch": 0.832165975781585, "grad_norm": 0.6286576341012895, "learning_rate": 0.00013149152116928169, "loss": 12.3009, "step": 15282 }, { "epoch": 0.832220429778168, "grad_norm": 0.5347714267872193, "learning_rate": 0.00013148315146335743, "loss": 12.094, "step": 15283 }, { "epoch": 0.832274883774751, "grad_norm": 0.5982064880757932, "learning_rate": 0.0001314747815126149, "loss": 12.0955, "step": 15284 }, { "epoch": 0.8323293377713341, "grad_norm": 0.5360239055325735, "learning_rate": 0.00013146641131711918, "loss": 12.207, "step": 15285 }, { "epoch": 0.8323837917679171, "grad_norm": 0.6032727350194691, "learning_rate": 0.0001314580408769354, "loss": 12.2156, "step": 15286 }, { "epoch": 0.8324382457645001, "grad_norm": 0.5264867464022237, "learning_rate": 0.00013144967019212858, "loss": 12.2824, "step": 15287 }, { "epoch": 0.8324926997610831, "grad_norm": 0.614179478401167, "learning_rate": 0.00013144129926276387, "loss": 12.2034, "step": 15288 }, { "epoch": 0.8325471537576661, "grad_norm": 0.585713319770856, "learning_rate": 0.00013143292808890633, "loss": 12.1851, "step": 15289 }, { "epoch": 0.8326016077542491, "grad_norm": 0.5740739803646323, "learning_rate": 0.00013142455667062108, "loss": 12.01, "step": 15290 }, { "epoch": 0.8326560617508322, "grad_norm": 0.5796548730360052, "learning_rate": 0.00013141618500797322, "loss": 12.244, "step": 15291 }, { "epoch": 0.8327105157474152, "grad_norm": 0.5597923489671587, "learning_rate": 0.0001314078131010278, "loss": 12.2179, "step": 15292 }, { "epoch": 0.8327649697439982, "grad_norm": 0.6321308750754813, "learning_rate": 0.00013139944094985, "loss": 12.2756, "step": 15293 }, { "epoch": 0.8328194237405812, "grad_norm": 0.5446385936148014, "learning_rate": 0.00013139106855450486, "loss": 12.2949, "step": 15294 }, { "epoch": 0.8328738777371641, "grad_norm": 0.6349345381171592, "learning_rate": 0.00013138269591505752, "loss": 12.0296, "step": 15295 }, { "epoch": 0.8329283317337471, "grad_norm": 0.6889425832186824, "learning_rate": 0.00013137432303157305, "loss": 12.3, "step": 15296 }, { "epoch": 0.8329827857303302, "grad_norm": 0.5147927373090182, "learning_rate": 0.0001313659499041166, "loss": 12.211, "step": 15297 }, { "epoch": 0.8330372397269132, "grad_norm": 0.6220664367543297, "learning_rate": 0.00013135757653275326, "loss": 12.3032, "step": 15298 }, { "epoch": 0.8330916937234962, "grad_norm": 0.5922621718306847, "learning_rate": 0.00013134920291754814, "loss": 12.1767, "step": 15299 }, { "epoch": 0.8331461477200792, "grad_norm": 0.598265551750958, "learning_rate": 0.00013134082905856637, "loss": 12.1843, "step": 15300 }, { "epoch": 0.8332006017166622, "grad_norm": 0.6566281451981195, "learning_rate": 0.00013133245495587306, "loss": 12.1329, "step": 15301 }, { "epoch": 0.8332550557132452, "grad_norm": 0.6198392400520069, "learning_rate": 0.0001313240806095333, "loss": 12.2163, "step": 15302 }, { "epoch": 0.8333095097098283, "grad_norm": 0.5089681050707612, "learning_rate": 0.0001313157060196123, "loss": 12.1203, "step": 15303 }, { "epoch": 0.8333639637064113, "grad_norm": 0.5946276409729907, "learning_rate": 0.00013130733118617505, "loss": 12.3139, "step": 15304 }, { "epoch": 0.8334184177029943, "grad_norm": 0.6768073905113495, "learning_rate": 0.00013129895610928678, "loss": 12.0899, "step": 15305 }, { "epoch": 0.8334728716995773, "grad_norm": 0.5706898431899106, "learning_rate": 0.00013129058078901256, "loss": 12.2588, "step": 15306 }, { "epoch": 0.8335273256961603, "grad_norm": 0.5295503593770585, "learning_rate": 0.00013128220522541753, "loss": 12.2527, "step": 15307 }, { "epoch": 0.8335817796927433, "grad_norm": 0.5914096601533734, "learning_rate": 0.00013127382941856687, "loss": 12.304, "step": 15308 }, { "epoch": 0.8336362336893264, "grad_norm": 0.555249289629514, "learning_rate": 0.0001312654533685256, "loss": 12.0958, "step": 15309 }, { "epoch": 0.8336906876859094, "grad_norm": 0.553953495231632, "learning_rate": 0.00013125707707535897, "loss": 12.0996, "step": 15310 }, { "epoch": 0.8337451416824924, "grad_norm": 0.5426767378037972, "learning_rate": 0.00013124870053913206, "loss": 12.1959, "step": 15311 }, { "epoch": 0.8337995956790754, "grad_norm": 0.5638715609519398, "learning_rate": 0.00013124032375991, "loss": 12.1801, "step": 15312 }, { "epoch": 0.8338540496756583, "grad_norm": 0.5968174649338724, "learning_rate": 0.00013123194673775798, "loss": 12.2152, "step": 15313 }, { "epoch": 0.8339085036722413, "grad_norm": 0.5734240715647675, "learning_rate": 0.00013122356947274107, "loss": 12.1478, "step": 15314 }, { "epoch": 0.8339629576688244, "grad_norm": 0.611642059701702, "learning_rate": 0.00013121519196492444, "loss": 12.1398, "step": 15315 }, { "epoch": 0.8340174116654074, "grad_norm": 0.6082665454481369, "learning_rate": 0.00013120681421437325, "loss": 12.0678, "step": 15316 }, { "epoch": 0.8340718656619904, "grad_norm": 0.6145617171266933, "learning_rate": 0.00013119843622115264, "loss": 12.2043, "step": 15317 }, { "epoch": 0.8341263196585734, "grad_norm": 0.5730138743482941, "learning_rate": 0.0001311900579853278, "loss": 12.3287, "step": 15318 }, { "epoch": 0.8341807736551564, "grad_norm": 0.5975019952444647, "learning_rate": 0.00013118167950696382, "loss": 12.1496, "step": 15319 }, { "epoch": 0.8342352276517395, "grad_norm": 0.5874154839513164, "learning_rate": 0.00013117330078612582, "loss": 12.1702, "step": 15320 }, { "epoch": 0.8342896816483225, "grad_norm": 0.5368851943471138, "learning_rate": 0.00013116492182287904, "loss": 12.1665, "step": 15321 }, { "epoch": 0.8343441356449055, "grad_norm": 0.6005496640293485, "learning_rate": 0.0001311565426172886, "loss": 12.1575, "step": 15322 }, { "epoch": 0.8343985896414885, "grad_norm": 0.5216150312343807, "learning_rate": 0.00013114816316941967, "loss": 12.1666, "step": 15323 }, { "epoch": 0.8344530436380715, "grad_norm": 0.5951216910198904, "learning_rate": 0.0001311397834793374, "loss": 12.1675, "step": 15324 }, { "epoch": 0.8345074976346545, "grad_norm": 0.6309894694156547, "learning_rate": 0.00013113140354710693, "loss": 12.2063, "step": 15325 }, { "epoch": 0.8345619516312376, "grad_norm": 0.5681904907277748, "learning_rate": 0.00013112302337279342, "loss": 12.1512, "step": 15326 }, { "epoch": 0.8346164056278206, "grad_norm": 0.5735928927624965, "learning_rate": 0.00013111464295646212, "loss": 12.3276, "step": 15327 }, { "epoch": 0.8346708596244036, "grad_norm": 0.6591312024956174, "learning_rate": 0.00013110626229817813, "loss": 12.224, "step": 15328 }, { "epoch": 0.8347253136209866, "grad_norm": 0.6444292844076731, "learning_rate": 0.0001310978813980066, "loss": 12.2489, "step": 15329 }, { "epoch": 0.8347797676175696, "grad_norm": 0.6733190185260566, "learning_rate": 0.00013108950025601275, "loss": 12.1378, "step": 15330 }, { "epoch": 0.8348342216141525, "grad_norm": 0.5369684470576751, "learning_rate": 0.00013108111887226174, "loss": 12.0915, "step": 15331 }, { "epoch": 0.8348886756107357, "grad_norm": 0.6256900431355148, "learning_rate": 0.0001310727372468187, "loss": 12.2504, "step": 15332 }, { "epoch": 0.8349431296073186, "grad_norm": 0.6406448205533162, "learning_rate": 0.0001310643553797489, "loss": 12.1589, "step": 15333 }, { "epoch": 0.8349975836039016, "grad_norm": 0.5376786196408638, "learning_rate": 0.0001310559732711174, "loss": 12.2238, "step": 15334 }, { "epoch": 0.8350520376004846, "grad_norm": 0.5783970591286314, "learning_rate": 0.0001310475909209895, "loss": 12.3498, "step": 15335 }, { "epoch": 0.8351064915970676, "grad_norm": 0.5942024240935793, "learning_rate": 0.0001310392083294303, "loss": 12.188, "step": 15336 }, { "epoch": 0.8351609455936506, "grad_norm": 0.60545358347949, "learning_rate": 0.00013103082549650497, "loss": 12.2307, "step": 15337 }, { "epoch": 0.8352153995902337, "grad_norm": 0.5636018467561122, "learning_rate": 0.00013102244242227878, "loss": 12.2584, "step": 15338 }, { "epoch": 0.8352698535868167, "grad_norm": 0.580316800840767, "learning_rate": 0.0001310140591068169, "loss": 12.1868, "step": 15339 }, { "epoch": 0.8353243075833997, "grad_norm": 0.5418777539446611, "learning_rate": 0.00013100567555018446, "loss": 12.1379, "step": 15340 }, { "epoch": 0.8353787615799827, "grad_norm": 0.6088070564598652, "learning_rate": 0.0001309972917524467, "loss": 12.1415, "step": 15341 }, { "epoch": 0.8354332155765657, "grad_norm": 0.6252243895763365, "learning_rate": 0.00013098890771366878, "loss": 12.2393, "step": 15342 }, { "epoch": 0.8354876695731487, "grad_norm": 0.5364906707347072, "learning_rate": 0.00013098052343391597, "loss": 12.147, "step": 15343 }, { "epoch": 0.8355421235697318, "grad_norm": 0.5666736471551951, "learning_rate": 0.00013097213891325336, "loss": 12.1947, "step": 15344 }, { "epoch": 0.8355965775663148, "grad_norm": 0.5833266248887412, "learning_rate": 0.00013096375415174623, "loss": 12.1789, "step": 15345 }, { "epoch": 0.8356510315628978, "grad_norm": 0.6036746488446625, "learning_rate": 0.00013095536914945973, "loss": 12.2361, "step": 15346 }, { "epoch": 0.8357054855594808, "grad_norm": 0.5709164918817299, "learning_rate": 0.00013094698390645913, "loss": 12.1975, "step": 15347 }, { "epoch": 0.8357599395560638, "grad_norm": 0.5415393121643536, "learning_rate": 0.00013093859842280955, "loss": 12.1258, "step": 15348 }, { "epoch": 0.8358143935526468, "grad_norm": 0.6444672849336692, "learning_rate": 0.00013093021269857625, "loss": 12.1467, "step": 15349 }, { "epoch": 0.8358688475492299, "grad_norm": 0.6121385670183787, "learning_rate": 0.00013092182673382445, "loss": 12.2194, "step": 15350 }, { "epoch": 0.8359233015458128, "grad_norm": 0.5742302760383521, "learning_rate": 0.0001309134405286193, "loss": 12.1077, "step": 15351 }, { "epoch": 0.8359777555423958, "grad_norm": 0.5597706966472342, "learning_rate": 0.00013090505408302612, "loss": 12.2605, "step": 15352 }, { "epoch": 0.8360322095389788, "grad_norm": 0.6120210383203647, "learning_rate": 0.00013089666739711, "loss": 12.3513, "step": 15353 }, { "epoch": 0.8360866635355618, "grad_norm": 0.6357714430157161, "learning_rate": 0.00013088828047093623, "loss": 12.22, "step": 15354 }, { "epoch": 0.8361411175321449, "grad_norm": 0.560241191145675, "learning_rate": 0.00013087989330457, "loss": 12.1131, "step": 15355 }, { "epoch": 0.8361955715287279, "grad_norm": 0.558665134696943, "learning_rate": 0.00013087150589807656, "loss": 12.2699, "step": 15356 }, { "epoch": 0.8362500255253109, "grad_norm": 0.6030718684554315, "learning_rate": 0.00013086311825152112, "loss": 12.164, "step": 15357 }, { "epoch": 0.8363044795218939, "grad_norm": 0.5626851183441052, "learning_rate": 0.00013085473036496888, "loss": 12.2383, "step": 15358 }, { "epoch": 0.8363589335184769, "grad_norm": 0.60581591719412, "learning_rate": 0.00013084634223848506, "loss": 12.1086, "step": 15359 }, { "epoch": 0.8364133875150599, "grad_norm": 0.6441289035553069, "learning_rate": 0.00013083795387213495, "loss": 12.1012, "step": 15360 }, { "epoch": 0.836467841511643, "grad_norm": 0.5432182200872989, "learning_rate": 0.0001308295652659837, "loss": 12.2727, "step": 15361 }, { "epoch": 0.836522295508226, "grad_norm": 0.5112965914274825, "learning_rate": 0.00013082117642009662, "loss": 12.1125, "step": 15362 }, { "epoch": 0.836576749504809, "grad_norm": 0.5197450256056448, "learning_rate": 0.0001308127873345389, "loss": 12.117, "step": 15363 }, { "epoch": 0.836631203501392, "grad_norm": 0.5860435056081319, "learning_rate": 0.00013080439800937575, "loss": 12.2292, "step": 15364 }, { "epoch": 0.836685657497975, "grad_norm": 0.5629178413356406, "learning_rate": 0.00013079600844467242, "loss": 12.196, "step": 15365 }, { "epoch": 0.836740111494558, "grad_norm": 0.5758380845386492, "learning_rate": 0.0001307876186404942, "loss": 12.2736, "step": 15366 }, { "epoch": 0.8367945654911411, "grad_norm": 0.5345267079541275, "learning_rate": 0.0001307792285969063, "loss": 12.2351, "step": 15367 }, { "epoch": 0.836849019487724, "grad_norm": 0.6010343621198327, "learning_rate": 0.00013077083831397395, "loss": 12.1085, "step": 15368 }, { "epoch": 0.836903473484307, "grad_norm": 0.5567987453339249, "learning_rate": 0.00013076244779176244, "loss": 12.2886, "step": 15369 }, { "epoch": 0.83695792748089, "grad_norm": 0.575988620818282, "learning_rate": 0.0001307540570303369, "loss": 12.0892, "step": 15370 }, { "epoch": 0.837012381477473, "grad_norm": 0.5461307537724167, "learning_rate": 0.00013074566602976268, "loss": 12.1839, "step": 15371 }, { "epoch": 0.837066835474056, "grad_norm": 0.5741964053298968, "learning_rate": 0.000130737274790105, "loss": 12.288, "step": 15372 }, { "epoch": 0.8371212894706391, "grad_norm": 0.5969391944247079, "learning_rate": 0.00013072888331142914, "loss": 12.0641, "step": 15373 }, { "epoch": 0.8371757434672221, "grad_norm": 0.5987646358058737, "learning_rate": 0.00013072049159380033, "loss": 12.2112, "step": 15374 }, { "epoch": 0.8372301974638051, "grad_norm": 0.5455711004770989, "learning_rate": 0.0001307120996372838, "loss": 12.2458, "step": 15375 }, { "epoch": 0.8372846514603881, "grad_norm": 0.5414312185891778, "learning_rate": 0.00013070370744194487, "loss": 12.127, "step": 15376 }, { "epoch": 0.8373391054569711, "grad_norm": 0.6305935747972193, "learning_rate": 0.00013069531500784873, "loss": 12.2119, "step": 15377 }, { "epoch": 0.8373935594535541, "grad_norm": 0.5685400442083921, "learning_rate": 0.00013068692233506068, "loss": 12.2422, "step": 15378 }, { "epoch": 0.8374480134501372, "grad_norm": 0.5949894077370246, "learning_rate": 0.000130678529423646, "loss": 12.1692, "step": 15379 }, { "epoch": 0.8375024674467202, "grad_norm": 0.6653462622638929, "learning_rate": 0.00013067013627366991, "loss": 12.2144, "step": 15380 }, { "epoch": 0.8375569214433032, "grad_norm": 0.5731677512947161, "learning_rate": 0.00013066174288519768, "loss": 12.1916, "step": 15381 }, { "epoch": 0.8376113754398862, "grad_norm": 0.6319538726760888, "learning_rate": 0.0001306533492582946, "loss": 12.2809, "step": 15382 }, { "epoch": 0.8376658294364692, "grad_norm": 0.6846801198465237, "learning_rate": 0.00013064495539302594, "loss": 12.0877, "step": 15383 }, { "epoch": 0.8377202834330522, "grad_norm": 0.534280489313378, "learning_rate": 0.000130636561289457, "loss": 12.076, "step": 15384 }, { "epoch": 0.8377747374296353, "grad_norm": 0.5713743213754297, "learning_rate": 0.000130628166947653, "loss": 12.1155, "step": 15385 }, { "epoch": 0.8378291914262183, "grad_norm": 0.6238325355933657, "learning_rate": 0.0001306197723676792, "loss": 12.1667, "step": 15386 }, { "epoch": 0.8378836454228012, "grad_norm": 0.6029480253780464, "learning_rate": 0.00013061137754960094, "loss": 12.1733, "step": 15387 }, { "epoch": 0.8379380994193842, "grad_norm": 0.5375203365801448, "learning_rate": 0.0001306029824934835, "loss": 12.2736, "step": 15388 }, { "epoch": 0.8379925534159672, "grad_norm": 0.6180323788492011, "learning_rate": 0.00013059458719939215, "loss": 12.2002, "step": 15389 }, { "epoch": 0.8380470074125503, "grad_norm": 0.5576984568599684, "learning_rate": 0.0001305861916673921, "loss": 12.0192, "step": 15390 }, { "epoch": 0.8381014614091333, "grad_norm": 0.5571576789776306, "learning_rate": 0.00013057779589754876, "loss": 12.1686, "step": 15391 }, { "epoch": 0.8381559154057163, "grad_norm": 0.5723703608319274, "learning_rate": 0.0001305693998899273, "loss": 12.1266, "step": 15392 }, { "epoch": 0.8382103694022993, "grad_norm": 0.5639093049927778, "learning_rate": 0.00013056100364459305, "loss": 12.1069, "step": 15393 }, { "epoch": 0.8382648233988823, "grad_norm": 0.5571453029303685, "learning_rate": 0.00013055260716161136, "loss": 12.1868, "step": 15394 }, { "epoch": 0.8383192773954653, "grad_norm": 0.5848548378507161, "learning_rate": 0.00013054421044104744, "loss": 12.2119, "step": 15395 }, { "epoch": 0.8383737313920484, "grad_norm": 0.5833350880830424, "learning_rate": 0.00013053581348296663, "loss": 12.2092, "step": 15396 }, { "epoch": 0.8384281853886314, "grad_norm": 0.564726235079284, "learning_rate": 0.0001305274162874342, "loss": 12.2672, "step": 15397 }, { "epoch": 0.8384826393852144, "grad_norm": 0.5640123189749882, "learning_rate": 0.00013051901885451544, "loss": 12.1969, "step": 15398 }, { "epoch": 0.8385370933817974, "grad_norm": 0.5567158354042706, "learning_rate": 0.00013051062118427575, "loss": 12.1581, "step": 15399 }, { "epoch": 0.8385915473783804, "grad_norm": 0.6333351307124527, "learning_rate": 0.0001305022232767803, "loss": 12.2515, "step": 15400 }, { "epoch": 0.8386460013749634, "grad_norm": 0.6196519026665442, "learning_rate": 0.00013049382513209446, "loss": 12.157, "step": 15401 }, { "epoch": 0.8387004553715465, "grad_norm": 0.6613005026286023, "learning_rate": 0.0001304854267502835, "loss": 12.2158, "step": 15402 }, { "epoch": 0.8387549093681295, "grad_norm": 0.5624156990839256, "learning_rate": 0.00013047702813141274, "loss": 12.1756, "step": 15403 }, { "epoch": 0.8388093633647125, "grad_norm": 0.6391498061034914, "learning_rate": 0.00013046862927554756, "loss": 12.1817, "step": 15404 }, { "epoch": 0.8388638173612954, "grad_norm": 0.7324785085813693, "learning_rate": 0.00013046023018275314, "loss": 12.4085, "step": 15405 }, { "epoch": 0.8389182713578784, "grad_norm": 0.5810583470765037, "learning_rate": 0.00013045183085309492, "loss": 12.3102, "step": 15406 }, { "epoch": 0.8389727253544614, "grad_norm": 0.5512366258667796, "learning_rate": 0.00013044343128663813, "loss": 12.1526, "step": 15407 }, { "epoch": 0.8390271793510445, "grad_norm": 0.5588903997378186, "learning_rate": 0.0001304350314834481, "loss": 12.1694, "step": 15408 }, { "epoch": 0.8390816333476275, "grad_norm": 0.5795679654907878, "learning_rate": 0.00013042663144359015, "loss": 12.3495, "step": 15409 }, { "epoch": 0.8391360873442105, "grad_norm": 0.5369781674763817, "learning_rate": 0.00013041823116712964, "loss": 12.2644, "step": 15410 }, { "epoch": 0.8391905413407935, "grad_norm": 0.5698494013959302, "learning_rate": 0.00013040983065413185, "loss": 12.1736, "step": 15411 }, { "epoch": 0.8392449953373765, "grad_norm": 0.5609634761067283, "learning_rate": 0.00013040142990466212, "loss": 12.3333, "step": 15412 }, { "epoch": 0.8392994493339595, "grad_norm": 0.5922172919484425, "learning_rate": 0.0001303930289187858, "loss": 12.1139, "step": 15413 }, { "epoch": 0.8393539033305426, "grad_norm": 0.5710970376219491, "learning_rate": 0.00013038462769656816, "loss": 12.2493, "step": 15414 }, { "epoch": 0.8394083573271256, "grad_norm": 0.6130269140220768, "learning_rate": 0.00013037622623807458, "loss": 12.3838, "step": 15415 }, { "epoch": 0.8394628113237086, "grad_norm": 0.5685305558318331, "learning_rate": 0.00013036782454337034, "loss": 12.264, "step": 15416 }, { "epoch": 0.8395172653202916, "grad_norm": 0.6027775225125119, "learning_rate": 0.00013035942261252083, "loss": 12.0201, "step": 15417 }, { "epoch": 0.8395717193168746, "grad_norm": 0.5522397647824678, "learning_rate": 0.00013035102044559133, "loss": 12.0813, "step": 15418 }, { "epoch": 0.8396261733134577, "grad_norm": 0.6259116033321801, "learning_rate": 0.00013034261804264726, "loss": 12.2236, "step": 15419 }, { "epoch": 0.8396806273100407, "grad_norm": 0.5905157049713758, "learning_rate": 0.00013033421540375385, "loss": 12.3173, "step": 15420 }, { "epoch": 0.8397350813066237, "grad_norm": 0.5151391159511745, "learning_rate": 0.0001303258125289765, "loss": 12.1515, "step": 15421 }, { "epoch": 0.8397895353032067, "grad_norm": 0.531701959746869, "learning_rate": 0.00013031740941838057, "loss": 12.1666, "step": 15422 }, { "epoch": 0.8398439892997897, "grad_norm": 0.522400648155579, "learning_rate": 0.00013030900607203136, "loss": 12.1277, "step": 15423 }, { "epoch": 0.8398984432963726, "grad_norm": 0.6362661145573922, "learning_rate": 0.00013030060248999425, "loss": 12.2302, "step": 15424 }, { "epoch": 0.8399528972929557, "grad_norm": 0.5138634705111699, "learning_rate": 0.00013029219867233458, "loss": 12.1477, "step": 15425 }, { "epoch": 0.8400073512895387, "grad_norm": 0.5504408190011557, "learning_rate": 0.00013028379461911766, "loss": 12.2609, "step": 15426 }, { "epoch": 0.8400618052861217, "grad_norm": 0.5817229386487759, "learning_rate": 0.0001302753903304089, "loss": 12.1662, "step": 15427 }, { "epoch": 0.8401162592827047, "grad_norm": 0.5779589417910154, "learning_rate": 0.00013026698580627364, "loss": 12.1039, "step": 15428 }, { "epoch": 0.8401707132792877, "grad_norm": 0.6501390033485781, "learning_rate": 0.00013025858104677722, "loss": 12.3038, "step": 15429 }, { "epoch": 0.8402251672758707, "grad_norm": 0.547251099236945, "learning_rate": 0.00013025017605198494, "loss": 12.1442, "step": 15430 }, { "epoch": 0.8402796212724538, "grad_norm": 0.5299319663603849, "learning_rate": 0.00013024177082196226, "loss": 12.1717, "step": 15431 }, { "epoch": 0.8403340752690368, "grad_norm": 0.5703350863581309, "learning_rate": 0.00013023336535677454, "loss": 12.2853, "step": 15432 }, { "epoch": 0.8403885292656198, "grad_norm": 0.6139029056887727, "learning_rate": 0.00013022495965648705, "loss": 12.2486, "step": 15433 }, { "epoch": 0.8404429832622028, "grad_norm": 0.5488466788920311, "learning_rate": 0.00013021655372116525, "loss": 12.1232, "step": 15434 }, { "epoch": 0.8404974372587858, "grad_norm": 0.5072533971790376, "learning_rate": 0.0001302081475508744, "loss": 12.1123, "step": 15435 }, { "epoch": 0.8405518912553688, "grad_norm": 0.6553603035867362, "learning_rate": 0.00013019974114567993, "loss": 12.4077, "step": 15436 }, { "epoch": 0.8406063452519519, "grad_norm": 0.5963926708645846, "learning_rate": 0.00013019133450564724, "loss": 12.0707, "step": 15437 }, { "epoch": 0.8406607992485349, "grad_norm": 0.5766582613875916, "learning_rate": 0.00013018292763084167, "loss": 12.1169, "step": 15438 }, { "epoch": 0.8407152532451179, "grad_norm": 0.598765168143679, "learning_rate": 0.0001301745205213286, "loss": 12.2108, "step": 15439 }, { "epoch": 0.8407697072417009, "grad_norm": 0.6183119765379161, "learning_rate": 0.0001301661131771734, "loss": 12.3027, "step": 15440 }, { "epoch": 0.8408241612382839, "grad_norm": 0.5438659222201025, "learning_rate": 0.0001301577055984414, "loss": 12.2832, "step": 15441 }, { "epoch": 0.8408786152348668, "grad_norm": 0.5569020818739407, "learning_rate": 0.00013014929778519806, "loss": 12.1564, "step": 15442 }, { "epoch": 0.84093306923145, "grad_norm": 0.5396355165755193, "learning_rate": 0.00013014088973750874, "loss": 12.2076, "step": 15443 }, { "epoch": 0.8409875232280329, "grad_norm": 0.5594209164770022, "learning_rate": 0.00013013248145543878, "loss": 12.1998, "step": 15444 }, { "epoch": 0.8410419772246159, "grad_norm": 0.5582406828411369, "learning_rate": 0.00013012407293905363, "loss": 12.0757, "step": 15445 }, { "epoch": 0.8410964312211989, "grad_norm": 0.557935986177484, "learning_rate": 0.00013011566418841858, "loss": 12.1023, "step": 15446 }, { "epoch": 0.8411508852177819, "grad_norm": 0.5590660230542867, "learning_rate": 0.00013010725520359908, "loss": 12.2449, "step": 15447 }, { "epoch": 0.8412053392143649, "grad_norm": 0.5686932609582025, "learning_rate": 0.00013009884598466054, "loss": 12.1602, "step": 15448 }, { "epoch": 0.841259793210948, "grad_norm": 0.5438917008388785, "learning_rate": 0.00013009043653166834, "loss": 12.1989, "step": 15449 }, { "epoch": 0.841314247207531, "grad_norm": 0.5770990080511804, "learning_rate": 0.00013008202684468786, "loss": 12.0269, "step": 15450 }, { "epoch": 0.841368701204114, "grad_norm": 0.6950577279601987, "learning_rate": 0.00013007361692378446, "loss": 12.2033, "step": 15451 }, { "epoch": 0.841423155200697, "grad_norm": 0.5215834135230242, "learning_rate": 0.00013006520676902357, "loss": 12.153, "step": 15452 }, { "epoch": 0.84147760919728, "grad_norm": 0.5919430674869965, "learning_rate": 0.00013005679638047058, "loss": 12.2235, "step": 15453 }, { "epoch": 0.8415320631938631, "grad_norm": 0.5878741611778164, "learning_rate": 0.00013004838575819097, "loss": 12.1629, "step": 15454 }, { "epoch": 0.8415865171904461, "grad_norm": 0.5506223431101179, "learning_rate": 0.00013003997490225003, "loss": 12.1377, "step": 15455 }, { "epoch": 0.8416409711870291, "grad_norm": 0.5477777108898231, "learning_rate": 0.0001300315638127132, "loss": 12.1777, "step": 15456 }, { "epoch": 0.8416954251836121, "grad_norm": 0.6021798307343533, "learning_rate": 0.00013002315248964588, "loss": 12.058, "step": 15457 }, { "epoch": 0.8417498791801951, "grad_norm": 0.6091936693650849, "learning_rate": 0.00013001474093311352, "loss": 12.1401, "step": 15458 }, { "epoch": 0.841804333176778, "grad_norm": 0.590264553535219, "learning_rate": 0.0001300063291431815, "loss": 12.1855, "step": 15459 }, { "epoch": 0.8418587871733612, "grad_norm": 0.5543263624857145, "learning_rate": 0.00012999791711991522, "loss": 12.1857, "step": 15460 }, { "epoch": 0.8419132411699441, "grad_norm": 0.6258587587841732, "learning_rate": 0.00012998950486338014, "loss": 12.1809, "step": 15461 }, { "epoch": 0.8419676951665271, "grad_norm": 0.576115813718609, "learning_rate": 0.0001299810923736416, "loss": 12.1628, "step": 15462 }, { "epoch": 0.8420221491631101, "grad_norm": 0.5779529876001324, "learning_rate": 0.00012997267965076504, "loss": 12.1651, "step": 15463 }, { "epoch": 0.8420766031596931, "grad_norm": 0.6170858443746692, "learning_rate": 0.00012996426669481593, "loss": 12.3123, "step": 15464 }, { "epoch": 0.8421310571562761, "grad_norm": 0.5993499195332175, "learning_rate": 0.00012995585350585967, "loss": 12.2695, "step": 15465 }, { "epoch": 0.8421855111528592, "grad_norm": 0.5826163014390762, "learning_rate": 0.00012994744008396167, "loss": 12.2115, "step": 15466 }, { "epoch": 0.8422399651494422, "grad_norm": 0.5385510345677272, "learning_rate": 0.00012993902642918732, "loss": 12.2976, "step": 15467 }, { "epoch": 0.8422944191460252, "grad_norm": 0.5659184008479512, "learning_rate": 0.0001299306125416021, "loss": 12.1892, "step": 15468 }, { "epoch": 0.8423488731426082, "grad_norm": 0.5205833855431005, "learning_rate": 0.00012992219842127142, "loss": 12.1612, "step": 15469 }, { "epoch": 0.8424033271391912, "grad_norm": 0.5737317949815772, "learning_rate": 0.0001299137840682607, "loss": 12.2158, "step": 15470 }, { "epoch": 0.8424577811357742, "grad_norm": 0.5923701455972223, "learning_rate": 0.00012990536948263536, "loss": 12.1834, "step": 15471 }, { "epoch": 0.8425122351323573, "grad_norm": 0.5610356626591934, "learning_rate": 0.00012989695466446088, "loss": 12.1868, "step": 15472 }, { "epoch": 0.8425666891289403, "grad_norm": 0.5593317641200841, "learning_rate": 0.00012988853961380268, "loss": 12.1865, "step": 15473 }, { "epoch": 0.8426211431255233, "grad_norm": 0.5609214500978, "learning_rate": 0.00012988012433072616, "loss": 12.2068, "step": 15474 }, { "epoch": 0.8426755971221063, "grad_norm": 0.6189225809724805, "learning_rate": 0.00012987170881529678, "loss": 12.3564, "step": 15475 }, { "epoch": 0.8427300511186893, "grad_norm": 0.5810532995223517, "learning_rate": 0.00012986329306757997, "loss": 12.225, "step": 15476 }, { "epoch": 0.8427845051152723, "grad_norm": 0.6477006315994673, "learning_rate": 0.00012985487708764122, "loss": 12.3078, "step": 15477 }, { "epoch": 0.8428389591118554, "grad_norm": 0.5325454748651055, "learning_rate": 0.0001298464608755459, "loss": 12.2116, "step": 15478 }, { "epoch": 0.8428934131084383, "grad_norm": 0.5630427183844071, "learning_rate": 0.0001298380444313595, "loss": 12.2595, "step": 15479 }, { "epoch": 0.8429478671050213, "grad_norm": 0.5153174542840714, "learning_rate": 0.0001298296277551475, "loss": 12.144, "step": 15480 }, { "epoch": 0.8430023211016043, "grad_norm": 0.5812089814295625, "learning_rate": 0.00012982121084697529, "loss": 12.289, "step": 15481 }, { "epoch": 0.8430567750981873, "grad_norm": 0.5651481908263806, "learning_rate": 0.00012981279370690834, "loss": 12.1724, "step": 15482 }, { "epoch": 0.8431112290947703, "grad_norm": 0.5993511716762084, "learning_rate": 0.00012980437633501214, "loss": 12.1811, "step": 15483 }, { "epoch": 0.8431656830913534, "grad_norm": 0.5576741439576591, "learning_rate": 0.00012979595873135205, "loss": 12.1552, "step": 15484 }, { "epoch": 0.8432201370879364, "grad_norm": 0.5825885230304538, "learning_rate": 0.00012978754089599363, "loss": 12.2838, "step": 15485 }, { "epoch": 0.8432745910845194, "grad_norm": 0.5981931555447255, "learning_rate": 0.00012977912282900232, "loss": 12.1833, "step": 15486 }, { "epoch": 0.8433290450811024, "grad_norm": 0.5922220565819358, "learning_rate": 0.00012977070453044348, "loss": 12.1535, "step": 15487 }, { "epoch": 0.8433834990776854, "grad_norm": 0.5485480344051351, "learning_rate": 0.00012976228600038273, "loss": 12.2509, "step": 15488 }, { "epoch": 0.8434379530742685, "grad_norm": 0.5948772804151026, "learning_rate": 0.00012975386723888542, "loss": 12.1709, "step": 15489 }, { "epoch": 0.8434924070708515, "grad_norm": 0.6364488969136304, "learning_rate": 0.00012974544824601703, "loss": 12.0577, "step": 15490 }, { "epoch": 0.8435468610674345, "grad_norm": 0.5738671439296887, "learning_rate": 0.00012973702902184306, "loss": 12.2015, "step": 15491 }, { "epoch": 0.8436013150640175, "grad_norm": 0.5646197147628447, "learning_rate": 0.00012972860956642895, "loss": 12.1925, "step": 15492 }, { "epoch": 0.8436557690606005, "grad_norm": 0.527313632438477, "learning_rate": 0.00012972018987984023, "loss": 12.1908, "step": 15493 }, { "epoch": 0.8437102230571835, "grad_norm": 0.5621058442006267, "learning_rate": 0.00012971176996214232, "loss": 12.189, "step": 15494 }, { "epoch": 0.8437646770537666, "grad_norm": 0.5441499521332711, "learning_rate": 0.00012970334981340063, "loss": 12.1707, "step": 15495 }, { "epoch": 0.8438191310503496, "grad_norm": 0.6113276956688619, "learning_rate": 0.0001296949294336808, "loss": 12.1549, "step": 15496 }, { "epoch": 0.8438735850469326, "grad_norm": 0.6162383652250739, "learning_rate": 0.00012968650882304818, "loss": 12.2017, "step": 15497 }, { "epoch": 0.8439280390435155, "grad_norm": 0.5558501593387551, "learning_rate": 0.00012967808798156828, "loss": 12.1558, "step": 15498 }, { "epoch": 0.8439824930400985, "grad_norm": 0.527880376040958, "learning_rate": 0.00012966966690930665, "loss": 12.2283, "step": 15499 }, { "epoch": 0.8440369470366815, "grad_norm": 0.5775924687961809, "learning_rate": 0.00012966124560632867, "loss": 12.3681, "step": 15500 }, { "epoch": 0.8440914010332646, "grad_norm": 0.5918371650924008, "learning_rate": 0.00012965282407269982, "loss": 12.0314, "step": 15501 }, { "epoch": 0.8441458550298476, "grad_norm": 0.5889430522237789, "learning_rate": 0.0001296444023084857, "loss": 12.1733, "step": 15502 }, { "epoch": 0.8442003090264306, "grad_norm": 0.630949062021415, "learning_rate": 0.0001296359803137517, "loss": 12.1696, "step": 15503 }, { "epoch": 0.8442547630230136, "grad_norm": 0.6027404325392326, "learning_rate": 0.00012962755808856342, "loss": 12.2744, "step": 15504 }, { "epoch": 0.8443092170195966, "grad_norm": 0.6058898368980901, "learning_rate": 0.00012961913563298624, "loss": 12.3292, "step": 15505 }, { "epoch": 0.8443636710161796, "grad_norm": 0.5531306126172333, "learning_rate": 0.0001296107129470857, "loss": 12.2541, "step": 15506 }, { "epoch": 0.8444181250127627, "grad_norm": 0.6083152632010047, "learning_rate": 0.00012960229003092724, "loss": 12.163, "step": 15507 }, { "epoch": 0.8444725790093457, "grad_norm": 0.5771152076523544, "learning_rate": 0.00012959386688457642, "loss": 12.3182, "step": 15508 }, { "epoch": 0.8445270330059287, "grad_norm": 0.5904188640337819, "learning_rate": 0.00012958544350809878, "loss": 12.0135, "step": 15509 }, { "epoch": 0.8445814870025117, "grad_norm": 0.511318850188779, "learning_rate": 0.00012957701990155975, "loss": 12.1324, "step": 15510 }, { "epoch": 0.8446359409990947, "grad_norm": 0.549514494740283, "learning_rate": 0.00012956859606502486, "loss": 12.1693, "step": 15511 }, { "epoch": 0.8446903949956777, "grad_norm": 0.6198624919438058, "learning_rate": 0.00012956017199855957, "loss": 12.3404, "step": 15512 }, { "epoch": 0.8447448489922608, "grad_norm": 0.5654961144625223, "learning_rate": 0.00012955174770222944, "loss": 12.3016, "step": 15513 }, { "epoch": 0.8447993029888438, "grad_norm": 0.5328895178588599, "learning_rate": 0.00012954332317609995, "loss": 12.1564, "step": 15514 }, { "epoch": 0.8448537569854268, "grad_norm": 0.593522704545548, "learning_rate": 0.0001295348984202367, "loss": 12.2934, "step": 15515 }, { "epoch": 0.8449082109820097, "grad_norm": 0.5430808014371415, "learning_rate": 0.00012952647343470505, "loss": 12.191, "step": 15516 }, { "epoch": 0.8449626649785927, "grad_norm": 0.5265172841458723, "learning_rate": 0.00012951804821957063, "loss": 12.2325, "step": 15517 }, { "epoch": 0.8450171189751757, "grad_norm": 0.6839327055872534, "learning_rate": 0.00012950962277489885, "loss": 12.3049, "step": 15518 }, { "epoch": 0.8450715729717588, "grad_norm": 0.5473030469567395, "learning_rate": 0.00012950119710075536, "loss": 12.2636, "step": 15519 }, { "epoch": 0.8451260269683418, "grad_norm": 0.5500510581940654, "learning_rate": 0.00012949277119720564, "loss": 12.2041, "step": 15520 }, { "epoch": 0.8451804809649248, "grad_norm": 0.586482402015144, "learning_rate": 0.00012948434506431514, "loss": 12.2955, "step": 15521 }, { "epoch": 0.8452349349615078, "grad_norm": 0.5724440277061343, "learning_rate": 0.00012947591870214945, "loss": 12.2484, "step": 15522 }, { "epoch": 0.8452893889580908, "grad_norm": 0.5696594400728073, "learning_rate": 0.00012946749211077406, "loss": 12.193, "step": 15523 }, { "epoch": 0.8453438429546739, "grad_norm": 0.5209405674486619, "learning_rate": 0.00012945906529025447, "loss": 12.0911, "step": 15524 }, { "epoch": 0.8453982969512569, "grad_norm": 0.570044338500723, "learning_rate": 0.00012945063824065632, "loss": 12.2399, "step": 15525 }, { "epoch": 0.8454527509478399, "grad_norm": 0.5518226214837841, "learning_rate": 0.00012944221096204502, "loss": 12.2789, "step": 15526 }, { "epoch": 0.8455072049444229, "grad_norm": 0.5511632782042666, "learning_rate": 0.00012943378345448616, "loss": 12.0836, "step": 15527 }, { "epoch": 0.8455616589410059, "grad_norm": 0.6136098803437449, "learning_rate": 0.00012942535571804526, "loss": 12.1695, "step": 15528 }, { "epoch": 0.8456161129375889, "grad_norm": 0.6071253304180788, "learning_rate": 0.00012941692775278785, "loss": 12.2983, "step": 15529 }, { "epoch": 0.845670566934172, "grad_norm": 0.5204147345078632, "learning_rate": 0.0001294084995587795, "loss": 12.1814, "step": 15530 }, { "epoch": 0.845725020930755, "grad_norm": 0.5472504695583279, "learning_rate": 0.0001294000711360857, "loss": 12.2124, "step": 15531 }, { "epoch": 0.845779474927338, "grad_norm": 0.6406576106211804, "learning_rate": 0.00012939164248477206, "loss": 12.3914, "step": 15532 }, { "epoch": 0.845833928923921, "grad_norm": 0.5236012866958867, "learning_rate": 0.00012938321360490406, "loss": 12.0712, "step": 15533 }, { "epoch": 0.845888382920504, "grad_norm": 0.5797560648484227, "learning_rate": 0.00012937478449654726, "loss": 12.1934, "step": 15534 }, { "epoch": 0.8459428369170869, "grad_norm": 0.5685664703573508, "learning_rate": 0.00012936635515976722, "loss": 12.1307, "step": 15535 }, { "epoch": 0.84599729091367, "grad_norm": 0.6221459849482847, "learning_rate": 0.00012935792559462945, "loss": 12.1631, "step": 15536 }, { "epoch": 0.846051744910253, "grad_norm": 0.5876772033125665, "learning_rate": 0.00012934949580119953, "loss": 12.1943, "step": 15537 }, { "epoch": 0.846106198906836, "grad_norm": 0.759882435895185, "learning_rate": 0.00012934106577954305, "loss": 12.3563, "step": 15538 }, { "epoch": 0.846160652903419, "grad_norm": 0.5415582133886058, "learning_rate": 0.00012933263552972549, "loss": 12.1682, "step": 15539 }, { "epoch": 0.846215106900002, "grad_norm": 0.5903793304696313, "learning_rate": 0.00012932420505181241, "loss": 12.2252, "step": 15540 }, { "epoch": 0.846269560896585, "grad_norm": 0.5849930079909551, "learning_rate": 0.00012931577434586943, "loss": 12.3335, "step": 15541 }, { "epoch": 0.8463240148931681, "grad_norm": 0.6003166564253619, "learning_rate": 0.00012930734341196206, "loss": 12.1644, "step": 15542 }, { "epoch": 0.8463784688897511, "grad_norm": 0.6097396533940193, "learning_rate": 0.00012929891225015586, "loss": 12.1985, "step": 15543 }, { "epoch": 0.8464329228863341, "grad_norm": 0.5938451907882728, "learning_rate": 0.00012929048086051645, "loss": 12.2239, "step": 15544 }, { "epoch": 0.8464873768829171, "grad_norm": 0.7848091599738762, "learning_rate": 0.0001292820492431093, "loss": 12.3416, "step": 15545 }, { "epoch": 0.8465418308795001, "grad_norm": 0.5677497381016352, "learning_rate": 0.00012927361739800005, "loss": 12.1728, "step": 15546 }, { "epoch": 0.8465962848760831, "grad_norm": 0.6104022337169018, "learning_rate": 0.00012926518532525424, "loss": 12.1725, "step": 15547 }, { "epoch": 0.8466507388726662, "grad_norm": 0.5022216219713065, "learning_rate": 0.00012925675302493745, "loss": 12.1429, "step": 15548 }, { "epoch": 0.8467051928692492, "grad_norm": 0.5848567111984192, "learning_rate": 0.00012924832049711525, "loss": 12.2029, "step": 15549 }, { "epoch": 0.8467596468658322, "grad_norm": 0.5081974836950981, "learning_rate": 0.00012923988774185316, "loss": 11.9883, "step": 15550 }, { "epoch": 0.8468141008624152, "grad_norm": 0.6221118858893209, "learning_rate": 0.00012923145475921683, "loss": 12.2106, "step": 15551 }, { "epoch": 0.8468685548589981, "grad_norm": 0.6175250867818555, "learning_rate": 0.00012922302154927179, "loss": 12.3017, "step": 15552 }, { "epoch": 0.8469230088555812, "grad_norm": 0.5352118342810327, "learning_rate": 0.00012921458811208366, "loss": 12.1494, "step": 15553 }, { "epoch": 0.8469774628521642, "grad_norm": 0.5872967934663972, "learning_rate": 0.00012920615444771797, "loss": 12.0825, "step": 15554 }, { "epoch": 0.8470319168487472, "grad_norm": 0.5933345426004941, "learning_rate": 0.00012919772055624038, "loss": 12.2394, "step": 15555 }, { "epoch": 0.8470863708453302, "grad_norm": 0.6272768455911071, "learning_rate": 0.00012918928643771633, "loss": 12.2328, "step": 15556 }, { "epoch": 0.8471408248419132, "grad_norm": 0.5198233088362648, "learning_rate": 0.00012918085209221155, "loss": 12.0769, "step": 15557 }, { "epoch": 0.8471952788384962, "grad_norm": 0.6244403370285655, "learning_rate": 0.00012917241751979154, "loss": 12.1873, "step": 15558 }, { "epoch": 0.8472497328350793, "grad_norm": 0.522512235489971, "learning_rate": 0.00012916398272052194, "loss": 12.1636, "step": 15559 }, { "epoch": 0.8473041868316623, "grad_norm": 0.5587611000537016, "learning_rate": 0.00012915554769446833, "loss": 12.3209, "step": 15560 }, { "epoch": 0.8473586408282453, "grad_norm": 0.5404422738375619, "learning_rate": 0.00012914711244169626, "loss": 12.2373, "step": 15561 }, { "epoch": 0.8474130948248283, "grad_norm": 0.5233974892533286, "learning_rate": 0.00012913867696227136, "loss": 12.1196, "step": 15562 }, { "epoch": 0.8474675488214113, "grad_norm": 0.5540664811886048, "learning_rate": 0.00012913024125625925, "loss": 12.201, "step": 15563 }, { "epoch": 0.8475220028179943, "grad_norm": 0.585623802648054, "learning_rate": 0.00012912180532372548, "loss": 12.0897, "step": 15564 }, { "epoch": 0.8475764568145774, "grad_norm": 0.5162575407180071, "learning_rate": 0.0001291133691647357, "loss": 12.097, "step": 15565 }, { "epoch": 0.8476309108111604, "grad_norm": 0.6065161656591413, "learning_rate": 0.00012910493277935544, "loss": 12.1424, "step": 15566 }, { "epoch": 0.8476853648077434, "grad_norm": 0.6120489582519593, "learning_rate": 0.00012909649616765033, "loss": 12.1668, "step": 15567 }, { "epoch": 0.8477398188043264, "grad_norm": 0.610427532528256, "learning_rate": 0.00012908805932968602, "loss": 12.2514, "step": 15568 }, { "epoch": 0.8477942728009094, "grad_norm": 0.5880797631439723, "learning_rate": 0.00012907962226552807, "loss": 12.1468, "step": 15569 }, { "epoch": 0.8478487267974923, "grad_norm": 0.7011077942022087, "learning_rate": 0.00012907118497524213, "loss": 12.3227, "step": 15570 }, { "epoch": 0.8479031807940755, "grad_norm": 0.5620293371921344, "learning_rate": 0.00012906274745889374, "loss": 12.1715, "step": 15571 }, { "epoch": 0.8479576347906584, "grad_norm": 0.6079063552721871, "learning_rate": 0.00012905430971654858, "loss": 12.1763, "step": 15572 }, { "epoch": 0.8480120887872414, "grad_norm": 0.6148687010089448, "learning_rate": 0.0001290458717482722, "loss": 12.2441, "step": 15573 }, { "epoch": 0.8480665427838244, "grad_norm": 0.5995826169807735, "learning_rate": 0.00012903743355413024, "loss": 12.19, "step": 15574 }, { "epoch": 0.8481209967804074, "grad_norm": 0.5907546417641577, "learning_rate": 0.0001290289951341884, "loss": 12.1121, "step": 15575 }, { "epoch": 0.8481754507769904, "grad_norm": 0.5300240353852979, "learning_rate": 0.00012902055648851218, "loss": 12.134, "step": 15576 }, { "epoch": 0.8482299047735735, "grad_norm": 0.6731591510323991, "learning_rate": 0.00012901211761716724, "loss": 12.3143, "step": 15577 }, { "epoch": 0.8482843587701565, "grad_norm": 0.6375915340003899, "learning_rate": 0.0001290036785202192, "loss": 12.2515, "step": 15578 }, { "epoch": 0.8483388127667395, "grad_norm": 0.5813097313194986, "learning_rate": 0.00012899523919773372, "loss": 12.2116, "step": 15579 }, { "epoch": 0.8483932667633225, "grad_norm": 0.5742185102082507, "learning_rate": 0.0001289867996497764, "loss": 12.1474, "step": 15580 }, { "epoch": 0.8484477207599055, "grad_norm": 0.590523813660125, "learning_rate": 0.00012897835987641285, "loss": 12.106, "step": 15581 }, { "epoch": 0.8485021747564885, "grad_norm": 0.6000171184383458, "learning_rate": 0.0001289699198777087, "loss": 12.154, "step": 15582 }, { "epoch": 0.8485566287530716, "grad_norm": 0.5719366141175086, "learning_rate": 0.00012896147965372963, "loss": 12.2698, "step": 15583 }, { "epoch": 0.8486110827496546, "grad_norm": 0.5386123198262065, "learning_rate": 0.00012895303920454118, "loss": 12.2407, "step": 15584 }, { "epoch": 0.8486655367462376, "grad_norm": 0.5296364039346468, "learning_rate": 0.0001289445985302091, "loss": 12.155, "step": 15585 }, { "epoch": 0.8487199907428206, "grad_norm": 0.6482716971909245, "learning_rate": 0.00012893615763079894, "loss": 12.2155, "step": 15586 }, { "epoch": 0.8487744447394036, "grad_norm": 0.7109488935622398, "learning_rate": 0.00012892771650637637, "loss": 12.1994, "step": 15587 }, { "epoch": 0.8488288987359867, "grad_norm": 0.593128389693315, "learning_rate": 0.00012891927515700703, "loss": 12.1713, "step": 15588 }, { "epoch": 0.8488833527325697, "grad_norm": 0.6519114749932312, "learning_rate": 0.00012891083358275654, "loss": 12.1332, "step": 15589 }, { "epoch": 0.8489378067291526, "grad_norm": 0.6107853226476072, "learning_rate": 0.00012890239178369058, "loss": 12.2532, "step": 15590 }, { "epoch": 0.8489922607257356, "grad_norm": 0.6797629066198152, "learning_rate": 0.00012889394975987476, "loss": 12.1826, "step": 15591 }, { "epoch": 0.8490467147223186, "grad_norm": 0.6361362221930984, "learning_rate": 0.00012888550751137475, "loss": 12.1707, "step": 15592 }, { "epoch": 0.8491011687189016, "grad_norm": 0.5971055886039516, "learning_rate": 0.0001288770650382562, "loss": 12.2782, "step": 15593 }, { "epoch": 0.8491556227154847, "grad_norm": 0.5947171593968466, "learning_rate": 0.00012886862234058475, "loss": 12.0976, "step": 15594 }, { "epoch": 0.8492100767120677, "grad_norm": 0.6335157038701741, "learning_rate": 0.00012886017941842604, "loss": 12.1543, "step": 15595 }, { "epoch": 0.8492645307086507, "grad_norm": 0.5592617264152265, "learning_rate": 0.00012885173627184571, "loss": 12.1406, "step": 15596 }, { "epoch": 0.8493189847052337, "grad_norm": 0.6160812598362347, "learning_rate": 0.00012884329290090949, "loss": 12.1859, "step": 15597 }, { "epoch": 0.8493734387018167, "grad_norm": 0.5659969854376821, "learning_rate": 0.00012883484930568294, "loss": 12.1066, "step": 15598 }, { "epoch": 0.8494278926983997, "grad_norm": 0.6664842479810961, "learning_rate": 0.0001288264054862318, "loss": 12.096, "step": 15599 }, { "epoch": 0.8494823466949828, "grad_norm": 0.5276228134648259, "learning_rate": 0.00012881796144262168, "loss": 12.1223, "step": 15600 }, { "epoch": 0.8495368006915658, "grad_norm": 0.5805855702523423, "learning_rate": 0.00012880951717491828, "loss": 12.2562, "step": 15601 }, { "epoch": 0.8495912546881488, "grad_norm": 0.5383428403603512, "learning_rate": 0.00012880107268318722, "loss": 12.0346, "step": 15602 }, { "epoch": 0.8496457086847318, "grad_norm": 0.5522025055589089, "learning_rate": 0.00012879262796749422, "loss": 12.1612, "step": 15603 }, { "epoch": 0.8497001626813148, "grad_norm": 0.583298513098549, "learning_rate": 0.00012878418302790488, "loss": 12.1072, "step": 15604 }, { "epoch": 0.8497546166778978, "grad_norm": 0.6016494841639478, "learning_rate": 0.0001287757378644849, "loss": 12.3494, "step": 15605 }, { "epoch": 0.8498090706744809, "grad_norm": 0.5713091203784781, "learning_rate": 0.00012876729247729998, "loss": 12.0923, "step": 15606 }, { "epoch": 0.8498635246710639, "grad_norm": 0.5960425318662682, "learning_rate": 0.00012875884686641578, "loss": 12.0263, "step": 15607 }, { "epoch": 0.8499179786676468, "grad_norm": 0.5389275337863298, "learning_rate": 0.00012875040103189791, "loss": 12.1331, "step": 15608 }, { "epoch": 0.8499724326642298, "grad_norm": 0.5523982837647737, "learning_rate": 0.00012874195497381217, "loss": 12.1424, "step": 15609 }, { "epoch": 0.8500268866608128, "grad_norm": 0.5161903641796735, "learning_rate": 0.0001287335086922241, "loss": 12.1487, "step": 15610 }, { "epoch": 0.8500813406573958, "grad_norm": 0.544290717987044, "learning_rate": 0.0001287250621871995, "loss": 12.1779, "step": 15611 }, { "epoch": 0.8501357946539789, "grad_norm": 0.5652310341744862, "learning_rate": 0.00012871661545880398, "loss": 12.2929, "step": 15612 }, { "epoch": 0.8501902486505619, "grad_norm": 0.5724418405069429, "learning_rate": 0.00012870816850710326, "loss": 12.1614, "step": 15613 }, { "epoch": 0.8502447026471449, "grad_norm": 0.5898243654550441, "learning_rate": 0.000128699721332163, "loss": 12.229, "step": 15614 }, { "epoch": 0.8502991566437279, "grad_norm": 0.5731288486052545, "learning_rate": 0.0001286912739340489, "loss": 12.183, "step": 15615 }, { "epoch": 0.8503536106403109, "grad_norm": 0.5246223185961127, "learning_rate": 0.00012868282631282662, "loss": 12.1769, "step": 15616 }, { "epoch": 0.8504080646368939, "grad_norm": 0.5049356988743475, "learning_rate": 0.00012867437846856186, "loss": 12.1367, "step": 15617 }, { "epoch": 0.850462518633477, "grad_norm": 0.5481648482068753, "learning_rate": 0.00012866593040132036, "loss": 12.1416, "step": 15618 }, { "epoch": 0.85051697263006, "grad_norm": 0.6076905164308919, "learning_rate": 0.00012865748211116776, "loss": 12.2317, "step": 15619 }, { "epoch": 0.850571426626643, "grad_norm": 0.507112466697084, "learning_rate": 0.00012864903359816979, "loss": 12.0967, "step": 15620 }, { "epoch": 0.850625880623226, "grad_norm": 0.5362129911701178, "learning_rate": 0.0001286405848623921, "loss": 12.0546, "step": 15621 }, { "epoch": 0.850680334619809, "grad_norm": 0.5950019792773952, "learning_rate": 0.00012863213590390044, "loss": 12.0381, "step": 15622 }, { "epoch": 0.8507347886163921, "grad_norm": 0.5447907140074338, "learning_rate": 0.0001286236867227605, "loss": 12.1993, "step": 15623 }, { "epoch": 0.8507892426129751, "grad_norm": 0.5149878848651488, "learning_rate": 0.00012861523731903796, "loss": 12.0942, "step": 15624 }, { "epoch": 0.850843696609558, "grad_norm": 0.5531567677264669, "learning_rate": 0.00012860678769279854, "loss": 12.2105, "step": 15625 }, { "epoch": 0.850898150606141, "grad_norm": 0.5594788185985377, "learning_rate": 0.00012859833784410792, "loss": 12.1554, "step": 15626 }, { "epoch": 0.850952604602724, "grad_norm": 0.5571718604967746, "learning_rate": 0.00012858988777303184, "loss": 12.2235, "step": 15627 }, { "epoch": 0.851007058599307, "grad_norm": 0.7899774324453996, "learning_rate": 0.000128581437479636, "loss": 12.5309, "step": 15628 }, { "epoch": 0.8510615125958901, "grad_norm": 0.5659755971424956, "learning_rate": 0.00012857298696398613, "loss": 12.2907, "step": 15629 }, { "epoch": 0.8511159665924731, "grad_norm": 0.61763375246358, "learning_rate": 0.00012856453622614791, "loss": 12.2098, "step": 15630 }, { "epoch": 0.8511704205890561, "grad_norm": 0.5680070609100679, "learning_rate": 0.00012855608526618706, "loss": 12.1818, "step": 15631 }, { "epoch": 0.8512248745856391, "grad_norm": 0.6291213052887918, "learning_rate": 0.0001285476340841693, "loss": 12.1265, "step": 15632 }, { "epoch": 0.8512793285822221, "grad_norm": 0.5967570939808631, "learning_rate": 0.00012853918268016033, "loss": 12.1694, "step": 15633 }, { "epoch": 0.8513337825788051, "grad_norm": 0.5645989960623116, "learning_rate": 0.0001285307310542259, "loss": 12.123, "step": 15634 }, { "epoch": 0.8513882365753882, "grad_norm": 0.5923944416190499, "learning_rate": 0.00012852227920643177, "loss": 12.1184, "step": 15635 }, { "epoch": 0.8514426905719712, "grad_norm": 0.5343867541462105, "learning_rate": 0.00012851382713684358, "loss": 12.2321, "step": 15636 }, { "epoch": 0.8514971445685542, "grad_norm": 0.5317908527102665, "learning_rate": 0.00012850537484552707, "loss": 12.0625, "step": 15637 }, { "epoch": 0.8515515985651372, "grad_norm": 0.6008950192481002, "learning_rate": 0.00012849692233254798, "loss": 12.2511, "step": 15638 }, { "epoch": 0.8516060525617202, "grad_norm": 0.5891196291828917, "learning_rate": 0.00012848846959797206, "loss": 12.2549, "step": 15639 }, { "epoch": 0.8516605065583032, "grad_norm": 0.6080059299665412, "learning_rate": 0.00012848001664186504, "loss": 12.2132, "step": 15640 }, { "epoch": 0.8517149605548863, "grad_norm": 0.6157269450140649, "learning_rate": 0.00012847156346429262, "loss": 12.3017, "step": 15641 }, { "epoch": 0.8517694145514693, "grad_norm": 0.5276589367835588, "learning_rate": 0.00012846311006532054, "loss": 12.2921, "step": 15642 }, { "epoch": 0.8518238685480523, "grad_norm": 0.5142565017966974, "learning_rate": 0.00012845465644501454, "loss": 12.1849, "step": 15643 }, { "epoch": 0.8518783225446352, "grad_norm": 0.5449135733343222, "learning_rate": 0.00012844620260344037, "loss": 12.1698, "step": 15644 }, { "epoch": 0.8519327765412182, "grad_norm": 0.6327389332555616, "learning_rate": 0.00012843774854066377, "loss": 12.1762, "step": 15645 }, { "epoch": 0.8519872305378012, "grad_norm": 0.5907704006134956, "learning_rate": 0.00012842929425675048, "loss": 12.2365, "step": 15646 }, { "epoch": 0.8520416845343843, "grad_norm": 0.6351242368170997, "learning_rate": 0.0001284208397517662, "loss": 12.1271, "step": 15647 }, { "epoch": 0.8520961385309673, "grad_norm": 0.5151588337199436, "learning_rate": 0.0001284123850257767, "loss": 12.1418, "step": 15648 }, { "epoch": 0.8521505925275503, "grad_norm": 0.5683816891671283, "learning_rate": 0.00012840393007884776, "loss": 12.1423, "step": 15649 }, { "epoch": 0.8522050465241333, "grad_norm": 0.6980382453882409, "learning_rate": 0.00012839547491104507, "loss": 12.3939, "step": 15650 }, { "epoch": 0.8522595005207163, "grad_norm": 0.5468550971970724, "learning_rate": 0.00012838701952243439, "loss": 12.1403, "step": 15651 }, { "epoch": 0.8523139545172993, "grad_norm": 0.5720309246905906, "learning_rate": 0.00012837856391308152, "loss": 12.1732, "step": 15652 }, { "epoch": 0.8523684085138824, "grad_norm": 0.5901994933747409, "learning_rate": 0.00012837010808305216, "loss": 12.1811, "step": 15653 }, { "epoch": 0.8524228625104654, "grad_norm": 0.6417524330224629, "learning_rate": 0.00012836165203241207, "loss": 12.1895, "step": 15654 }, { "epoch": 0.8524773165070484, "grad_norm": 0.5508830074943798, "learning_rate": 0.00012835319576122705, "loss": 12.2433, "step": 15655 }, { "epoch": 0.8525317705036314, "grad_norm": 0.4847934473438708, "learning_rate": 0.00012834473926956282, "loss": 12.1649, "step": 15656 }, { "epoch": 0.8525862245002144, "grad_norm": 0.5106405252112488, "learning_rate": 0.0001283362825574851, "loss": 12.1226, "step": 15657 }, { "epoch": 0.8526406784967975, "grad_norm": 0.6224454278859773, "learning_rate": 0.00012832782562505974, "loss": 12.4138, "step": 15658 }, { "epoch": 0.8526951324933805, "grad_norm": 0.5774410000543585, "learning_rate": 0.00012831936847235243, "loss": 12.185, "step": 15659 }, { "epoch": 0.8527495864899635, "grad_norm": 0.5379968708739294, "learning_rate": 0.000128310911099429, "loss": 12.121, "step": 15660 }, { "epoch": 0.8528040404865465, "grad_norm": 0.5884777063588783, "learning_rate": 0.00012830245350635514, "loss": 12.2266, "step": 15661 }, { "epoch": 0.8528584944831294, "grad_norm": 0.5666623086845319, "learning_rate": 0.0001282939956931967, "loss": 12.2207, "step": 15662 }, { "epoch": 0.8529129484797124, "grad_norm": 0.5922210812425717, "learning_rate": 0.00012828553766001935, "loss": 12.2144, "step": 15663 }, { "epoch": 0.8529674024762955, "grad_norm": 0.5464399974466867, "learning_rate": 0.00012827707940688896, "loss": 12.3001, "step": 15664 }, { "epoch": 0.8530218564728785, "grad_norm": 0.5641139397080874, "learning_rate": 0.00012826862093387123, "loss": 12.1005, "step": 15665 }, { "epoch": 0.8530763104694615, "grad_norm": 0.5914279130202791, "learning_rate": 0.00012826016224103196, "loss": 12.2504, "step": 15666 }, { "epoch": 0.8531307644660445, "grad_norm": 0.5375514541952663, "learning_rate": 0.00012825170332843696, "loss": 12.18, "step": 15667 }, { "epoch": 0.8531852184626275, "grad_norm": 0.5536914518116253, "learning_rate": 0.00012824324419615196, "loss": 12.0696, "step": 15668 }, { "epoch": 0.8532396724592105, "grad_norm": 0.608791386538355, "learning_rate": 0.00012823478484424273, "loss": 12.2929, "step": 15669 }, { "epoch": 0.8532941264557936, "grad_norm": 0.5442849581551492, "learning_rate": 0.0001282263252727751, "loss": 12.2019, "step": 15670 }, { "epoch": 0.8533485804523766, "grad_norm": 0.5579861998109426, "learning_rate": 0.00012821786548181485, "loss": 12.193, "step": 15671 }, { "epoch": 0.8534030344489596, "grad_norm": 0.5299834636731653, "learning_rate": 0.00012820940547142773, "loss": 12.2381, "step": 15672 }, { "epoch": 0.8534574884455426, "grad_norm": 0.7834986858693507, "learning_rate": 0.00012820094524167955, "loss": 12.1198, "step": 15673 }, { "epoch": 0.8535119424421256, "grad_norm": 0.602285605010258, "learning_rate": 0.00012819248479263606, "loss": 12.2375, "step": 15674 }, { "epoch": 0.8535663964387086, "grad_norm": 0.5712320288841317, "learning_rate": 0.00012818402412436312, "loss": 12.1941, "step": 15675 }, { "epoch": 0.8536208504352917, "grad_norm": 0.5429152513655475, "learning_rate": 0.00012817556323692646, "loss": 12.0621, "step": 15676 }, { "epoch": 0.8536753044318747, "grad_norm": 0.6057068946766633, "learning_rate": 0.0001281671021303919, "loss": 12.2741, "step": 15677 }, { "epoch": 0.8537297584284577, "grad_norm": 0.6539577442174344, "learning_rate": 0.00012815864080482523, "loss": 12.2394, "step": 15678 }, { "epoch": 0.8537842124250407, "grad_norm": 0.6474510632946274, "learning_rate": 0.00012815017926029224, "loss": 12.2298, "step": 15679 }, { "epoch": 0.8538386664216236, "grad_norm": 0.6012156573147364, "learning_rate": 0.00012814171749685874, "loss": 12.2179, "step": 15680 }, { "epoch": 0.8538931204182066, "grad_norm": 0.5748682195613363, "learning_rate": 0.00012813325551459055, "loss": 12.0987, "step": 15681 }, { "epoch": 0.8539475744147897, "grad_norm": 0.5932538036371817, "learning_rate": 0.0001281247933135534, "loss": 12.2372, "step": 15682 }, { "epoch": 0.8540020284113727, "grad_norm": 0.5569302402712455, "learning_rate": 0.00012811633089381317, "loss": 12.1693, "step": 15683 }, { "epoch": 0.8540564824079557, "grad_norm": 0.9870608844038362, "learning_rate": 0.00012810786825543562, "loss": 12.1499, "step": 15684 }, { "epoch": 0.8541109364045387, "grad_norm": 0.6173207121444124, "learning_rate": 0.0001280994053984866, "loss": 12.2683, "step": 15685 }, { "epoch": 0.8541653904011217, "grad_norm": 0.6280964771873433, "learning_rate": 0.0001280909423230319, "loss": 12.2718, "step": 15686 }, { "epoch": 0.8542198443977048, "grad_norm": 0.619197910380965, "learning_rate": 0.00012808247902913725, "loss": 12.2047, "step": 15687 }, { "epoch": 0.8542742983942878, "grad_norm": 0.6570766982144078, "learning_rate": 0.0001280740155168686, "loss": 12.314, "step": 15688 }, { "epoch": 0.8543287523908708, "grad_norm": 0.7308763519060135, "learning_rate": 0.00012806555178629167, "loss": 12.1709, "step": 15689 }, { "epoch": 0.8543832063874538, "grad_norm": 0.6809149470383755, "learning_rate": 0.00012805708783747232, "loss": 12.1298, "step": 15690 }, { "epoch": 0.8544376603840368, "grad_norm": 0.5754298989534565, "learning_rate": 0.00012804862367047637, "loss": 12.1261, "step": 15691 }, { "epoch": 0.8544921143806198, "grad_norm": 0.645798903559039, "learning_rate": 0.00012804015928536956, "loss": 12.1758, "step": 15692 }, { "epoch": 0.8545465683772029, "grad_norm": 0.5825265322505782, "learning_rate": 0.00012803169468221778, "loss": 12.1584, "step": 15693 }, { "epoch": 0.8546010223737859, "grad_norm": 0.5700802449068311, "learning_rate": 0.00012802322986108687, "loss": 12.1773, "step": 15694 }, { "epoch": 0.8546554763703689, "grad_norm": 0.5854575405293855, "learning_rate": 0.0001280147648220426, "loss": 12.1983, "step": 15695 }, { "epoch": 0.8547099303669519, "grad_norm": 0.559123078988021, "learning_rate": 0.00012800629956515088, "loss": 12.1285, "step": 15696 }, { "epoch": 0.8547643843635349, "grad_norm": 0.5305252493834495, "learning_rate": 0.0001279978340904774, "loss": 12.1489, "step": 15697 }, { "epoch": 0.8548188383601178, "grad_norm": 0.6947880528017051, "learning_rate": 0.00012798936839808811, "loss": 12.2499, "step": 15698 }, { "epoch": 0.854873292356701, "grad_norm": 0.5427173645269607, "learning_rate": 0.00012798090248804876, "loss": 12.0906, "step": 15699 }, { "epoch": 0.854927746353284, "grad_norm": 0.5586751572827647, "learning_rate": 0.00012797243636042527, "loss": 12.1936, "step": 15700 }, { "epoch": 0.8549822003498669, "grad_norm": 0.5557568132196792, "learning_rate": 0.0001279639700152834, "loss": 12.2273, "step": 15701 }, { "epoch": 0.8550366543464499, "grad_norm": 0.5682109416724617, "learning_rate": 0.00012795550345268903, "loss": 12.2523, "step": 15702 }, { "epoch": 0.8550911083430329, "grad_norm": 0.5987692325981113, "learning_rate": 0.00012794703667270795, "loss": 12.2351, "step": 15703 }, { "epoch": 0.8551455623396159, "grad_norm": 0.5738180654741157, "learning_rate": 0.00012793856967540602, "loss": 12.2044, "step": 15704 }, { "epoch": 0.855200016336199, "grad_norm": 0.556910543590465, "learning_rate": 0.00012793010246084908, "loss": 12.1197, "step": 15705 }, { "epoch": 0.855254470332782, "grad_norm": 0.6121439251457784, "learning_rate": 0.00012792163502910303, "loss": 12.0928, "step": 15706 }, { "epoch": 0.855308924329365, "grad_norm": 0.564178283270504, "learning_rate": 0.00012791316738023365, "loss": 12.211, "step": 15707 }, { "epoch": 0.855363378325948, "grad_norm": 0.5654129343631402, "learning_rate": 0.00012790469951430678, "loss": 12.2054, "step": 15708 }, { "epoch": 0.855417832322531, "grad_norm": 0.5129254702848438, "learning_rate": 0.00012789623143138828, "loss": 12.2229, "step": 15709 }, { "epoch": 0.855472286319114, "grad_norm": 0.5557932268999569, "learning_rate": 0.000127887763131544, "loss": 12.2176, "step": 15710 }, { "epoch": 0.8555267403156971, "grad_norm": 0.5486285626359496, "learning_rate": 0.00012787929461483983, "loss": 12.2681, "step": 15711 }, { "epoch": 0.8555811943122801, "grad_norm": 0.5652838634167313, "learning_rate": 0.0001278708258813416, "loss": 12.1061, "step": 15712 }, { "epoch": 0.8556356483088631, "grad_norm": 0.5872954549608131, "learning_rate": 0.0001278623569311151, "loss": 12.0819, "step": 15713 }, { "epoch": 0.8556901023054461, "grad_norm": 0.5543051339598092, "learning_rate": 0.00012785388776422626, "loss": 12.1606, "step": 15714 }, { "epoch": 0.8557445563020291, "grad_norm": 0.6378223338713711, "learning_rate": 0.0001278454183807409, "loss": 12.202, "step": 15715 }, { "epoch": 0.855799010298612, "grad_norm": 0.5744170509509097, "learning_rate": 0.00012783694878072495, "loss": 12.1183, "step": 15716 }, { "epoch": 0.8558534642951952, "grad_norm": 0.5258546467076047, "learning_rate": 0.00012782847896424418, "loss": 12.1676, "step": 15717 }, { "epoch": 0.8559079182917781, "grad_norm": 0.6485700887433437, "learning_rate": 0.00012782000893136452, "loss": 12.3775, "step": 15718 }, { "epoch": 0.8559623722883611, "grad_norm": 0.5813874713791451, "learning_rate": 0.00012781153868215178, "loss": 12.3445, "step": 15719 }, { "epoch": 0.8560168262849441, "grad_norm": 0.5446866086229591, "learning_rate": 0.00012780306821667185, "loss": 12.1281, "step": 15720 }, { "epoch": 0.8560712802815271, "grad_norm": 0.617440955219744, "learning_rate": 0.00012779459753499062, "loss": 12.3763, "step": 15721 }, { "epoch": 0.8561257342781102, "grad_norm": 0.5485880847095598, "learning_rate": 0.0001277861266371739, "loss": 12.1894, "step": 15722 }, { "epoch": 0.8561801882746932, "grad_norm": 0.6267972681339404, "learning_rate": 0.00012777765552328765, "loss": 12.1803, "step": 15723 }, { "epoch": 0.8562346422712762, "grad_norm": 0.6028696142522535, "learning_rate": 0.00012776918419339764, "loss": 12.0883, "step": 15724 }, { "epoch": 0.8562890962678592, "grad_norm": 0.5685786219339078, "learning_rate": 0.00012776071264756985, "loss": 12.1525, "step": 15725 }, { "epoch": 0.8563435502644422, "grad_norm": 0.7374933028055527, "learning_rate": 0.00012775224088587005, "loss": 12.2591, "step": 15726 }, { "epoch": 0.8563980042610252, "grad_norm": 0.5980493259753991, "learning_rate": 0.0001277437689083642, "loss": 12.1731, "step": 15727 }, { "epoch": 0.8564524582576083, "grad_norm": 0.5843647706304774, "learning_rate": 0.00012773529671511816, "loss": 12.1551, "step": 15728 }, { "epoch": 0.8565069122541913, "grad_norm": 0.5503302278270538, "learning_rate": 0.00012772682430619778, "loss": 12.2297, "step": 15729 }, { "epoch": 0.8565613662507743, "grad_norm": 0.6475393445604003, "learning_rate": 0.000127718351681669, "loss": 12.2689, "step": 15730 }, { "epoch": 0.8566158202473573, "grad_norm": 0.5359188086214569, "learning_rate": 0.0001277098788415976, "loss": 12.1432, "step": 15731 }, { "epoch": 0.8566702742439403, "grad_norm": 0.563354163807259, "learning_rate": 0.0001277014057860496, "loss": 12.2512, "step": 15732 }, { "epoch": 0.8567247282405233, "grad_norm": 0.5281041117124711, "learning_rate": 0.0001276929325150908, "loss": 12.1194, "step": 15733 }, { "epoch": 0.8567791822371064, "grad_norm": 0.556022921641765, "learning_rate": 0.00012768445902878713, "loss": 12.1332, "step": 15734 }, { "epoch": 0.8568336362336894, "grad_norm": 0.5349451535607578, "learning_rate": 0.00012767598532720443, "loss": 12.0863, "step": 15735 }, { "epoch": 0.8568880902302723, "grad_norm": 0.608521578203048, "learning_rate": 0.00012766751141040866, "loss": 12.2161, "step": 15736 }, { "epoch": 0.8569425442268553, "grad_norm": 0.543320017849001, "learning_rate": 0.00012765903727846565, "loss": 12.1176, "step": 15737 }, { "epoch": 0.8569969982234383, "grad_norm": 0.5457695761142336, "learning_rate": 0.00012765056293144133, "loss": 12.2448, "step": 15738 }, { "epoch": 0.8570514522200213, "grad_norm": 0.554684247229752, "learning_rate": 0.0001276420883694016, "loss": 12.0115, "step": 15739 }, { "epoch": 0.8571059062166044, "grad_norm": 0.5336282304607382, "learning_rate": 0.00012763361359241238, "loss": 12.0986, "step": 15740 }, { "epoch": 0.8571603602131874, "grad_norm": 0.5906900125833782, "learning_rate": 0.00012762513860053955, "loss": 12.1907, "step": 15741 }, { "epoch": 0.8572148142097704, "grad_norm": 0.5163197838124347, "learning_rate": 0.00012761666339384896, "loss": 12.1659, "step": 15742 }, { "epoch": 0.8572692682063534, "grad_norm": 0.5845795882841365, "learning_rate": 0.0001276081879724066, "loss": 12.3958, "step": 15743 }, { "epoch": 0.8573237222029364, "grad_norm": 0.5321494538582507, "learning_rate": 0.00012759971233627834, "loss": 12.1918, "step": 15744 }, { "epoch": 0.8573781761995194, "grad_norm": 0.5166924507597535, "learning_rate": 0.00012759123648553006, "loss": 12.1325, "step": 15745 }, { "epoch": 0.8574326301961025, "grad_norm": 0.587566718750353, "learning_rate": 0.00012758276042022776, "loss": 12.1939, "step": 15746 }, { "epoch": 0.8574870841926855, "grad_norm": 0.6003595372439998, "learning_rate": 0.0001275742841404372, "loss": 12.2588, "step": 15747 }, { "epoch": 0.8575415381892685, "grad_norm": 0.7960268013196092, "learning_rate": 0.00012756580764622445, "loss": 12.4471, "step": 15748 }, { "epoch": 0.8575959921858515, "grad_norm": 0.5824598169241241, "learning_rate": 0.00012755733093765533, "loss": 12.2527, "step": 15749 }, { "epoch": 0.8576504461824345, "grad_norm": 0.551996715410952, "learning_rate": 0.00012754885401479582, "loss": 12.107, "step": 15750 }, { "epoch": 0.8577049001790175, "grad_norm": 0.5434241010597629, "learning_rate": 0.00012754037687771178, "loss": 12.1144, "step": 15751 }, { "epoch": 0.8577593541756006, "grad_norm": 0.6147048798357491, "learning_rate": 0.00012753189952646916, "loss": 12.1863, "step": 15752 }, { "epoch": 0.8578138081721836, "grad_norm": 0.6034774893716371, "learning_rate": 0.00012752342196113383, "loss": 12.2386, "step": 15753 }, { "epoch": 0.8578682621687665, "grad_norm": 0.5959562157298051, "learning_rate": 0.0001275149441817718, "loss": 12.2741, "step": 15754 }, { "epoch": 0.8579227161653495, "grad_norm": 0.5231375209394874, "learning_rate": 0.0001275064661884489, "loss": 12.1576, "step": 15755 }, { "epoch": 0.8579771701619325, "grad_norm": 0.5972949129909878, "learning_rate": 0.00012749798798123116, "loss": 12.1434, "step": 15756 }, { "epoch": 0.8580316241585156, "grad_norm": 0.6122546872739701, "learning_rate": 0.00012748950956018444, "loss": 12.1824, "step": 15757 }, { "epoch": 0.8580860781550986, "grad_norm": 0.548827539391232, "learning_rate": 0.00012748103092537466, "loss": 12.1858, "step": 15758 }, { "epoch": 0.8581405321516816, "grad_norm": 0.5268961504095557, "learning_rate": 0.00012747255207686778, "loss": 12.1409, "step": 15759 }, { "epoch": 0.8581949861482646, "grad_norm": 0.6139714662781754, "learning_rate": 0.00012746407301472974, "loss": 12.2219, "step": 15760 }, { "epoch": 0.8582494401448476, "grad_norm": 0.5731399770059618, "learning_rate": 0.00012745559373902648, "loss": 12.2019, "step": 15761 }, { "epoch": 0.8583038941414306, "grad_norm": 0.7382380608364358, "learning_rate": 0.0001274471142498239, "loss": 12.0339, "step": 15762 }, { "epoch": 0.8583583481380137, "grad_norm": 0.6025951957591087, "learning_rate": 0.00012743863454718797, "loss": 12.1943, "step": 15763 }, { "epoch": 0.8584128021345967, "grad_norm": 0.5695672727653155, "learning_rate": 0.00012743015463118458, "loss": 12.3162, "step": 15764 }, { "epoch": 0.8584672561311797, "grad_norm": 0.5282926940371087, "learning_rate": 0.0001274216745018797, "loss": 12.1697, "step": 15765 }, { "epoch": 0.8585217101277627, "grad_norm": 0.6295977037542692, "learning_rate": 0.00012741319415933934, "loss": 12.4558, "step": 15766 }, { "epoch": 0.8585761641243457, "grad_norm": 0.5456157031705988, "learning_rate": 0.00012740471360362938, "loss": 12.213, "step": 15767 }, { "epoch": 0.8586306181209287, "grad_norm": 0.5722528479712246, "learning_rate": 0.00012739623283481572, "loss": 12.0039, "step": 15768 }, { "epoch": 0.8586850721175118, "grad_norm": 0.5259898355911858, "learning_rate": 0.0001273877518529644, "loss": 12.1635, "step": 15769 }, { "epoch": 0.8587395261140948, "grad_norm": 0.5325491294898629, "learning_rate": 0.00012737927065814127, "loss": 12.216, "step": 15770 }, { "epoch": 0.8587939801106778, "grad_norm": 0.5831718666607144, "learning_rate": 0.00012737078925041244, "loss": 12.2157, "step": 15771 }, { "epoch": 0.8588484341072607, "grad_norm": 0.5362693585664796, "learning_rate": 0.0001273623076298437, "loss": 12.0752, "step": 15772 }, { "epoch": 0.8589028881038437, "grad_norm": 0.5317601893405478, "learning_rate": 0.00012735382579650106, "loss": 12.1272, "step": 15773 }, { "epoch": 0.8589573421004267, "grad_norm": 0.5385207556616182, "learning_rate": 0.0001273453437504505, "loss": 12.2153, "step": 15774 }, { "epoch": 0.8590117960970098, "grad_norm": 0.5966096170591533, "learning_rate": 0.00012733686149175795, "loss": 12.3078, "step": 15775 }, { "epoch": 0.8590662500935928, "grad_norm": 0.5946618340368106, "learning_rate": 0.00012732837902048943, "loss": 12.1621, "step": 15776 }, { "epoch": 0.8591207040901758, "grad_norm": 0.5682263517375123, "learning_rate": 0.00012731989633671078, "loss": 12.2022, "step": 15777 }, { "epoch": 0.8591751580867588, "grad_norm": 0.6371370179585477, "learning_rate": 0.00012731141344048808, "loss": 12.2738, "step": 15778 }, { "epoch": 0.8592296120833418, "grad_norm": 0.562737486394942, "learning_rate": 0.00012730293033188722, "loss": 12.2651, "step": 15779 }, { "epoch": 0.8592840660799248, "grad_norm": 0.5862226570010126, "learning_rate": 0.0001272944470109742, "loss": 12.2035, "step": 15780 }, { "epoch": 0.8593385200765079, "grad_norm": 0.5991217322704121, "learning_rate": 0.000127285963477815, "loss": 12.213, "step": 15781 }, { "epoch": 0.8593929740730909, "grad_norm": 0.5343548554741508, "learning_rate": 0.00012727747973247558, "loss": 12.1085, "step": 15782 }, { "epoch": 0.8594474280696739, "grad_norm": 0.6527294044364248, "learning_rate": 0.0001272689957750219, "loss": 12.2033, "step": 15783 }, { "epoch": 0.8595018820662569, "grad_norm": 0.5903937465005267, "learning_rate": 0.0001272605116055199, "loss": 12.221, "step": 15784 }, { "epoch": 0.8595563360628399, "grad_norm": 0.5348246661864136, "learning_rate": 0.00012725202722403561, "loss": 12.1709, "step": 15785 }, { "epoch": 0.8596107900594229, "grad_norm": 0.5726522511258787, "learning_rate": 0.000127243542630635, "loss": 12.1589, "step": 15786 }, { "epoch": 0.859665244056006, "grad_norm": 0.5883931333080112, "learning_rate": 0.00012723505782538403, "loss": 12.1653, "step": 15787 }, { "epoch": 0.859719698052589, "grad_norm": 0.5167501422138584, "learning_rate": 0.00012722657280834866, "loss": 12.1629, "step": 15788 }, { "epoch": 0.859774152049172, "grad_norm": 0.5965163354165692, "learning_rate": 0.00012721808757959493, "loss": 12.1697, "step": 15789 }, { "epoch": 0.859828606045755, "grad_norm": 0.6206399193062977, "learning_rate": 0.00012720960213918875, "loss": 12.1275, "step": 15790 }, { "epoch": 0.8598830600423379, "grad_norm": 0.5041028697890138, "learning_rate": 0.00012720111648719618, "loss": 11.9535, "step": 15791 }, { "epoch": 0.859937514038921, "grad_norm": 0.5346675298940814, "learning_rate": 0.0001271926306236831, "loss": 12.1904, "step": 15792 }, { "epoch": 0.859991968035504, "grad_norm": 0.6794951925620087, "learning_rate": 0.00012718414454871563, "loss": 12.2198, "step": 15793 }, { "epoch": 0.860046422032087, "grad_norm": 0.5376773252891305, "learning_rate": 0.00012717565826235967, "loss": 12.196, "step": 15794 }, { "epoch": 0.86010087602867, "grad_norm": 0.572556725984343, "learning_rate": 0.00012716717176468125, "loss": 12.1767, "step": 15795 }, { "epoch": 0.860155330025253, "grad_norm": 0.5859922266043762, "learning_rate": 0.00012715868505574633, "loss": 12.2814, "step": 15796 }, { "epoch": 0.860209784021836, "grad_norm": 0.5422575776659524, "learning_rate": 0.00012715019813562092, "loss": 12.1301, "step": 15797 }, { "epoch": 0.8602642380184191, "grad_norm": 0.563991747041213, "learning_rate": 0.00012714171100437101, "loss": 12.2544, "step": 15798 }, { "epoch": 0.8603186920150021, "grad_norm": 0.5510860391557669, "learning_rate": 0.00012713322366206262, "loss": 12.1942, "step": 15799 }, { "epoch": 0.8603731460115851, "grad_norm": 0.5276618685659437, "learning_rate": 0.00012712473610876173, "loss": 12.1224, "step": 15800 }, { "epoch": 0.8604276000081681, "grad_norm": 0.5539203820098891, "learning_rate": 0.00012711624834453434, "loss": 12.2318, "step": 15801 }, { "epoch": 0.8604820540047511, "grad_norm": 0.5027654839906905, "learning_rate": 0.00012710776036944644, "loss": 12.1669, "step": 15802 }, { "epoch": 0.8605365080013341, "grad_norm": 0.5413982573232451, "learning_rate": 0.00012709927218356408, "loss": 12.1924, "step": 15803 }, { "epoch": 0.8605909619979172, "grad_norm": 0.5670345239907993, "learning_rate": 0.0001270907837869532, "loss": 12.2394, "step": 15804 }, { "epoch": 0.8606454159945002, "grad_norm": 0.6040295601424962, "learning_rate": 0.00012708229517967987, "loss": 12.2011, "step": 15805 }, { "epoch": 0.8606998699910832, "grad_norm": 0.5927785994065459, "learning_rate": 0.00012707380636181004, "loss": 12.2454, "step": 15806 }, { "epoch": 0.8607543239876662, "grad_norm": 0.6117346037164473, "learning_rate": 0.0001270653173334098, "loss": 12.3725, "step": 15807 }, { "epoch": 0.8608087779842492, "grad_norm": 0.5259833447062889, "learning_rate": 0.0001270568280945451, "loss": 12.1544, "step": 15808 }, { "epoch": 0.8608632319808321, "grad_norm": 0.5304112166595931, "learning_rate": 0.00012704833864528195, "loss": 12.1403, "step": 15809 }, { "epoch": 0.8609176859774152, "grad_norm": 0.5797000367923341, "learning_rate": 0.00012703984898568637, "loss": 12.182, "step": 15810 }, { "epoch": 0.8609721399739982, "grad_norm": 0.6167607347967838, "learning_rate": 0.0001270313591158244, "loss": 12.2032, "step": 15811 }, { "epoch": 0.8610265939705812, "grad_norm": 0.6009461728701405, "learning_rate": 0.00012702286903576207, "loss": 12.2631, "step": 15812 }, { "epoch": 0.8610810479671642, "grad_norm": 0.5570489244263896, "learning_rate": 0.00012701437874556537, "loss": 12.2562, "step": 15813 }, { "epoch": 0.8611355019637472, "grad_norm": 0.5456738299269331, "learning_rate": 0.0001270058882453003, "loss": 12.1073, "step": 15814 }, { "epoch": 0.8611899559603302, "grad_norm": 0.5308395603172099, "learning_rate": 0.00012699739753503295, "loss": 12.3165, "step": 15815 }, { "epoch": 0.8612444099569133, "grad_norm": 0.5629932726871376, "learning_rate": 0.0001269889066148293, "loss": 12.0754, "step": 15816 }, { "epoch": 0.8612988639534963, "grad_norm": 0.5583612163321409, "learning_rate": 0.00012698041548475539, "loss": 12.055, "step": 15817 }, { "epoch": 0.8613533179500793, "grad_norm": 0.5271745262645791, "learning_rate": 0.00012697192414487724, "loss": 12.2464, "step": 15818 }, { "epoch": 0.8614077719466623, "grad_norm": 0.7819952104852348, "learning_rate": 0.00012696343259526087, "loss": 12.2299, "step": 15819 }, { "epoch": 0.8614622259432453, "grad_norm": 0.5765961651972086, "learning_rate": 0.00012695494083597234, "loss": 12.0144, "step": 15820 }, { "epoch": 0.8615166799398284, "grad_norm": 0.5198545087241869, "learning_rate": 0.00012694644886707766, "loss": 12.0843, "step": 15821 }, { "epoch": 0.8615711339364114, "grad_norm": 0.5480677269766174, "learning_rate": 0.0001269379566886429, "loss": 12.1978, "step": 15822 }, { "epoch": 0.8616255879329944, "grad_norm": 0.5826026538724887, "learning_rate": 0.00012692946430073408, "loss": 12.1008, "step": 15823 }, { "epoch": 0.8616800419295774, "grad_norm": 0.6127132481854947, "learning_rate": 0.00012692097170341718, "loss": 12.0893, "step": 15824 }, { "epoch": 0.8617344959261604, "grad_norm": 0.5628802050208016, "learning_rate": 0.0001269124788967583, "loss": 12.2108, "step": 15825 }, { "epoch": 0.8617889499227434, "grad_norm": 0.5169071358368816, "learning_rate": 0.00012690398588082347, "loss": 12.163, "step": 15826 }, { "epoch": 0.8618434039193265, "grad_norm": 0.6288968216432084, "learning_rate": 0.00012689549265567878, "loss": 12.2239, "step": 15827 }, { "epoch": 0.8618978579159094, "grad_norm": 0.5522019003477887, "learning_rate": 0.0001268869992213902, "loss": 12.232, "step": 15828 }, { "epoch": 0.8619523119124924, "grad_norm": 0.560181845302788, "learning_rate": 0.0001268785055780238, "loss": 12.1887, "step": 15829 }, { "epoch": 0.8620067659090754, "grad_norm": 0.6260370544594249, "learning_rate": 0.0001268700117256456, "loss": 12.1765, "step": 15830 }, { "epoch": 0.8620612199056584, "grad_norm": 0.5681163602908822, "learning_rate": 0.00012686151766432173, "loss": 12.164, "step": 15831 }, { "epoch": 0.8621156739022414, "grad_norm": 0.5935928659798232, "learning_rate": 0.0001268530233941182, "loss": 12.1859, "step": 15832 }, { "epoch": 0.8621701278988245, "grad_norm": 0.5675365643912833, "learning_rate": 0.00012684452891510104, "loss": 12.1672, "step": 15833 }, { "epoch": 0.8622245818954075, "grad_norm": 0.6448365470006007, "learning_rate": 0.00012683603422733631, "loss": 12.2005, "step": 15834 }, { "epoch": 0.8622790358919905, "grad_norm": 0.5880991860675321, "learning_rate": 0.0001268275393308901, "loss": 12.088, "step": 15835 }, { "epoch": 0.8623334898885735, "grad_norm": 0.5695140928744413, "learning_rate": 0.0001268190442258284, "loss": 12.0658, "step": 15836 }, { "epoch": 0.8623879438851565, "grad_norm": 0.5976367558594671, "learning_rate": 0.00012681054891221737, "loss": 12.1741, "step": 15837 }, { "epoch": 0.8624423978817395, "grad_norm": 0.588921568249969, "learning_rate": 0.000126802053390123, "loss": 12.1703, "step": 15838 }, { "epoch": 0.8624968518783226, "grad_norm": 0.5867597648512988, "learning_rate": 0.00012679355765961135, "loss": 12.2174, "step": 15839 }, { "epoch": 0.8625513058749056, "grad_norm": 0.6466506954919735, "learning_rate": 0.00012678506172074852, "loss": 12.3293, "step": 15840 }, { "epoch": 0.8626057598714886, "grad_norm": 0.5361124196749647, "learning_rate": 0.00012677656557360053, "loss": 12.1079, "step": 15841 }, { "epoch": 0.8626602138680716, "grad_norm": 0.6141638826558271, "learning_rate": 0.00012676806921823352, "loss": 12.2044, "step": 15842 }, { "epoch": 0.8627146678646546, "grad_norm": 0.5573530110558617, "learning_rate": 0.0001267595726547135, "loss": 12.0857, "step": 15843 }, { "epoch": 0.8627691218612376, "grad_norm": 0.5294474626887843, "learning_rate": 0.00012675107588310653, "loss": 12.1398, "step": 15844 }, { "epoch": 0.8628235758578207, "grad_norm": 0.547586828976658, "learning_rate": 0.00012674257890347873, "loss": 12.2056, "step": 15845 }, { "epoch": 0.8628780298544036, "grad_norm": 0.5949898096535011, "learning_rate": 0.00012673408171589616, "loss": 12.357, "step": 15846 }, { "epoch": 0.8629324838509866, "grad_norm": 0.6332146130789029, "learning_rate": 0.00012672558432042487, "loss": 12.138, "step": 15847 }, { "epoch": 0.8629869378475696, "grad_norm": 0.5961911285767854, "learning_rate": 0.00012671708671713097, "loss": 12.1459, "step": 15848 }, { "epoch": 0.8630413918441526, "grad_norm": 0.5387073458048571, "learning_rate": 0.00012670858890608048, "loss": 12.1757, "step": 15849 }, { "epoch": 0.8630958458407356, "grad_norm": 0.5953776124197804, "learning_rate": 0.00012670009088733955, "loss": 12.2023, "step": 15850 }, { "epoch": 0.8631502998373187, "grad_norm": 0.5234957418215699, "learning_rate": 0.00012669159266097426, "loss": 12.1306, "step": 15851 }, { "epoch": 0.8632047538339017, "grad_norm": 0.6395342720574844, "learning_rate": 0.00012668309422705063, "loss": 12.3002, "step": 15852 }, { "epoch": 0.8632592078304847, "grad_norm": 0.5452036442524539, "learning_rate": 0.0001266745955856348, "loss": 12.1587, "step": 15853 }, { "epoch": 0.8633136618270677, "grad_norm": 0.5731158621347431, "learning_rate": 0.0001266660967367929, "loss": 12.2635, "step": 15854 }, { "epoch": 0.8633681158236507, "grad_norm": 0.5685607595502273, "learning_rate": 0.00012665759768059085, "loss": 12.2997, "step": 15855 }, { "epoch": 0.8634225698202338, "grad_norm": 0.578836813347422, "learning_rate": 0.00012664909841709495, "loss": 12.0963, "step": 15856 }, { "epoch": 0.8634770238168168, "grad_norm": 0.5947050777966181, "learning_rate": 0.00012664059894637112, "loss": 12.2036, "step": 15857 }, { "epoch": 0.8635314778133998, "grad_norm": 0.5946116539132492, "learning_rate": 0.00012663209926848555, "loss": 12.0137, "step": 15858 }, { "epoch": 0.8635859318099828, "grad_norm": 0.5032585307333325, "learning_rate": 0.00012662359938350433, "loss": 12.1626, "step": 15859 }, { "epoch": 0.8636403858065658, "grad_norm": 0.5593607639097642, "learning_rate": 0.00012661509929149352, "loss": 12.0341, "step": 15860 }, { "epoch": 0.8636948398031488, "grad_norm": 0.6007704384398124, "learning_rate": 0.00012660659899251924, "loss": 12.3132, "step": 15861 }, { "epoch": 0.8637492937997319, "grad_norm": 0.6161230813975721, "learning_rate": 0.00012659809848664756, "loss": 12.0831, "step": 15862 }, { "epoch": 0.8638037477963149, "grad_norm": 0.5722203566505186, "learning_rate": 0.00012658959777394463, "loss": 12.2513, "step": 15863 }, { "epoch": 0.8638582017928979, "grad_norm": 0.5794489421825233, "learning_rate": 0.00012658109685447652, "loss": 12.272, "step": 15864 }, { "epoch": 0.8639126557894808, "grad_norm": 0.6115055730524654, "learning_rate": 0.00012657259572830935, "loss": 12.2261, "step": 15865 }, { "epoch": 0.8639671097860638, "grad_norm": 0.5648488003671629, "learning_rate": 0.0001265640943955092, "loss": 12.2224, "step": 15866 }, { "epoch": 0.8640215637826468, "grad_norm": 0.5465945861616255, "learning_rate": 0.00012655559285614225, "loss": 12.189, "step": 15867 }, { "epoch": 0.8640760177792299, "grad_norm": 0.6274956086793052, "learning_rate": 0.00012654709111027448, "loss": 12.1032, "step": 15868 }, { "epoch": 0.8641304717758129, "grad_norm": 0.5903592400481897, "learning_rate": 0.00012653858915797212, "loss": 12.1017, "step": 15869 }, { "epoch": 0.8641849257723959, "grad_norm": 0.5421409523757088, "learning_rate": 0.00012653008699930123, "loss": 12.1491, "step": 15870 }, { "epoch": 0.8642393797689789, "grad_norm": 0.569210839807263, "learning_rate": 0.00012652158463432795, "loss": 12.2195, "step": 15871 }, { "epoch": 0.8642938337655619, "grad_norm": 0.5722602728817883, "learning_rate": 0.0001265130820631184, "loss": 12.2284, "step": 15872 }, { "epoch": 0.8643482877621449, "grad_norm": 0.6533167097267406, "learning_rate": 0.00012650457928573862, "loss": 12.2311, "step": 15873 }, { "epoch": 0.864402741758728, "grad_norm": 0.578072245143734, "learning_rate": 0.00012649607630225483, "loss": 12.245, "step": 15874 }, { "epoch": 0.864457195755311, "grad_norm": 0.5929833512597014, "learning_rate": 0.00012648757311273308, "loss": 12.24, "step": 15875 }, { "epoch": 0.864511649751894, "grad_norm": 0.6360942032083976, "learning_rate": 0.00012647906971723953, "loss": 12.1463, "step": 15876 }, { "epoch": 0.864566103748477, "grad_norm": 0.5783819182317385, "learning_rate": 0.00012647056611584032, "loss": 12.1567, "step": 15877 }, { "epoch": 0.86462055774506, "grad_norm": 0.6337107565036638, "learning_rate": 0.0001264620623086015, "loss": 12.234, "step": 15878 }, { "epoch": 0.864675011741643, "grad_norm": 0.5829480172215896, "learning_rate": 0.00012645355829558926, "loss": 12.1982, "step": 15879 }, { "epoch": 0.8647294657382261, "grad_norm": 0.5889035316620181, "learning_rate": 0.00012644505407686973, "loss": 12.2011, "step": 15880 }, { "epoch": 0.8647839197348091, "grad_norm": 0.5656346434970699, "learning_rate": 0.000126436549652509, "loss": 12.2205, "step": 15881 }, { "epoch": 0.864838373731392, "grad_norm": 0.6134097132972022, "learning_rate": 0.00012642804502257328, "loss": 12.0625, "step": 15882 }, { "epoch": 0.864892827727975, "grad_norm": 0.555717845222476, "learning_rate": 0.00012641954018712863, "loss": 12.1802, "step": 15883 }, { "epoch": 0.864947281724558, "grad_norm": 0.6775557458447009, "learning_rate": 0.00012641103514624116, "loss": 12.2947, "step": 15884 }, { "epoch": 0.865001735721141, "grad_norm": 0.6434186072760585, "learning_rate": 0.0001264025298999771, "loss": 12.3112, "step": 15885 }, { "epoch": 0.8650561897177241, "grad_norm": 0.5606701802164176, "learning_rate": 0.0001263940244484025, "loss": 12.1202, "step": 15886 }, { "epoch": 0.8651106437143071, "grad_norm": 0.5973302884811093, "learning_rate": 0.00012638551879158358, "loss": 12.264, "step": 15887 }, { "epoch": 0.8651650977108901, "grad_norm": 0.5938117907409824, "learning_rate": 0.00012637701292958644, "loss": 12.1346, "step": 15888 }, { "epoch": 0.8652195517074731, "grad_norm": 0.5702681890575382, "learning_rate": 0.0001263685068624772, "loss": 12.1939, "step": 15889 }, { "epoch": 0.8652740057040561, "grad_norm": 0.5664421468399776, "learning_rate": 0.000126360000590322, "loss": 12.1546, "step": 15890 }, { "epoch": 0.8653284597006392, "grad_norm": 0.5531560334769265, "learning_rate": 0.00012635149411318705, "loss": 12.2315, "step": 15891 }, { "epoch": 0.8653829136972222, "grad_norm": 0.5814574553270802, "learning_rate": 0.0001263429874311385, "loss": 12.0902, "step": 15892 }, { "epoch": 0.8654373676938052, "grad_norm": 0.5422698332386978, "learning_rate": 0.00012633448054424242, "loss": 12.0986, "step": 15893 }, { "epoch": 0.8654918216903882, "grad_norm": 0.5539154433284016, "learning_rate": 0.000126325973452565, "loss": 12.1193, "step": 15894 }, { "epoch": 0.8655462756869712, "grad_norm": 0.5322445342023846, "learning_rate": 0.0001263174661561724, "loss": 12.1351, "step": 15895 }, { "epoch": 0.8656007296835542, "grad_norm": 0.6011871815047078, "learning_rate": 0.00012630895865513075, "loss": 12.2558, "step": 15896 }, { "epoch": 0.8656551836801373, "grad_norm": 0.5421870278380945, "learning_rate": 0.00012630045094950626, "loss": 12.0785, "step": 15897 }, { "epoch": 0.8657096376767203, "grad_norm": 0.5153219346986837, "learning_rate": 0.00012629194303936508, "loss": 12.0322, "step": 15898 }, { "epoch": 0.8657640916733033, "grad_norm": 0.5698421826144286, "learning_rate": 0.0001262834349247733, "loss": 12.0598, "step": 15899 }, { "epoch": 0.8658185456698863, "grad_norm": 0.5698797197970296, "learning_rate": 0.00012627492660579711, "loss": 12.1255, "step": 15900 }, { "epoch": 0.8658729996664692, "grad_norm": 0.5740622128057058, "learning_rate": 0.0001262664180825027, "loss": 12.1623, "step": 15901 }, { "epoch": 0.8659274536630522, "grad_norm": 0.6295996044812211, "learning_rate": 0.0001262579093549562, "loss": 12.4251, "step": 15902 }, { "epoch": 0.8659819076596353, "grad_norm": 0.6189536561994822, "learning_rate": 0.0001262494004232238, "loss": 12.2374, "step": 15903 }, { "epoch": 0.8660363616562183, "grad_norm": 0.5553251154063896, "learning_rate": 0.0001262408912873717, "loss": 12.1977, "step": 15904 }, { "epoch": 0.8660908156528013, "grad_norm": 0.5551849578424735, "learning_rate": 0.000126232381947466, "loss": 12.112, "step": 15905 }, { "epoch": 0.8661452696493843, "grad_norm": 0.6238235130029314, "learning_rate": 0.0001262238724035729, "loss": 12.141, "step": 15906 }, { "epoch": 0.8661997236459673, "grad_norm": 0.49946598932732883, "learning_rate": 0.00012621536265575856, "loss": 12.1029, "step": 15907 }, { "epoch": 0.8662541776425503, "grad_norm": 0.645038326550859, "learning_rate": 0.00012620685270408916, "loss": 12.1514, "step": 15908 }, { "epoch": 0.8663086316391334, "grad_norm": 0.584876027405954, "learning_rate": 0.0001261983425486309, "loss": 12.2783, "step": 15909 }, { "epoch": 0.8663630856357164, "grad_norm": 0.5779854503830523, "learning_rate": 0.00012618983218944994, "loss": 12.2156, "step": 15910 }, { "epoch": 0.8664175396322994, "grad_norm": 0.5737317733374087, "learning_rate": 0.00012618132162661242, "loss": 12.2477, "step": 15911 }, { "epoch": 0.8664719936288824, "grad_norm": 0.5514906059361818, "learning_rate": 0.00012617281086018458, "loss": 12.2409, "step": 15912 }, { "epoch": 0.8665264476254654, "grad_norm": 0.6499006968948686, "learning_rate": 0.00012616429989023258, "loss": 12.3086, "step": 15913 }, { "epoch": 0.8665809016220484, "grad_norm": 0.5509850457484902, "learning_rate": 0.00012615578871682257, "loss": 12.1739, "step": 15914 }, { "epoch": 0.8666353556186315, "grad_norm": 0.6157756457540347, "learning_rate": 0.00012614727734002075, "loss": 11.9785, "step": 15915 }, { "epoch": 0.8666898096152145, "grad_norm": 0.5260273113757105, "learning_rate": 0.00012613876575989335, "loss": 12.1181, "step": 15916 }, { "epoch": 0.8667442636117975, "grad_norm": 0.5745111404782917, "learning_rate": 0.00012613025397650653, "loss": 12.21, "step": 15917 }, { "epoch": 0.8667987176083805, "grad_norm": 0.601576928480999, "learning_rate": 0.00012612174198992646, "loss": 12.3353, "step": 15918 }, { "epoch": 0.8668531716049634, "grad_norm": 0.7151657198245608, "learning_rate": 0.00012611322980021932, "loss": 12.2895, "step": 15919 }, { "epoch": 0.8669076256015464, "grad_norm": 0.5150769140549022, "learning_rate": 0.00012610471740745135, "loss": 12.1851, "step": 15920 }, { "epoch": 0.8669620795981295, "grad_norm": 0.6268954466952763, "learning_rate": 0.0001260962048116887, "loss": 12.1937, "step": 15921 }, { "epoch": 0.8670165335947125, "grad_norm": 0.5102438388337576, "learning_rate": 0.00012608769201299762, "loss": 12.1903, "step": 15922 }, { "epoch": 0.8670709875912955, "grad_norm": 0.6492952084312248, "learning_rate": 0.00012607917901144423, "loss": 12.3463, "step": 15923 }, { "epoch": 0.8671254415878785, "grad_norm": 0.5412306855807728, "learning_rate": 0.0001260706658070948, "loss": 12.2514, "step": 15924 }, { "epoch": 0.8671798955844615, "grad_norm": 0.5436226777635539, "learning_rate": 0.00012606215240001549, "loss": 12.1734, "step": 15925 }, { "epoch": 0.8672343495810446, "grad_norm": 0.5435772683271166, "learning_rate": 0.00012605363879027252, "loss": 12.1831, "step": 15926 }, { "epoch": 0.8672888035776276, "grad_norm": 0.5662856637085216, "learning_rate": 0.00012604512497793211, "loss": 12.2577, "step": 15927 }, { "epoch": 0.8673432575742106, "grad_norm": 0.557901457511073, "learning_rate": 0.0001260366109630604, "loss": 12.1894, "step": 15928 }, { "epoch": 0.8673977115707936, "grad_norm": 0.5852888269369317, "learning_rate": 0.00012602809674572364, "loss": 12.1945, "step": 15929 }, { "epoch": 0.8674521655673766, "grad_norm": 0.532930528537664, "learning_rate": 0.00012601958232598802, "loss": 12.1464, "step": 15930 }, { "epoch": 0.8675066195639596, "grad_norm": 0.5427560184485759, "learning_rate": 0.00012601106770391982, "loss": 12.1841, "step": 15931 }, { "epoch": 0.8675610735605427, "grad_norm": 0.593897701522849, "learning_rate": 0.0001260025528795852, "loss": 12.1475, "step": 15932 }, { "epoch": 0.8676155275571257, "grad_norm": 0.644566088488215, "learning_rate": 0.0001259940378530503, "loss": 12.2218, "step": 15933 }, { "epoch": 0.8676699815537087, "grad_norm": 0.6236033276293929, "learning_rate": 0.00012598552262438141, "loss": 12.2827, "step": 15934 }, { "epoch": 0.8677244355502917, "grad_norm": 0.5774315218311655, "learning_rate": 0.00012597700719364476, "loss": 12.2084, "step": 15935 }, { "epoch": 0.8677788895468747, "grad_norm": 0.5348287784866989, "learning_rate": 0.00012596849156090655, "loss": 12.1903, "step": 15936 }, { "epoch": 0.8678333435434576, "grad_norm": 0.5716130435917542, "learning_rate": 0.00012595997572623302, "loss": 12.1704, "step": 15937 }, { "epoch": 0.8678877975400408, "grad_norm": 0.8535427189945483, "learning_rate": 0.00012595145968969033, "loss": 12.1884, "step": 15938 }, { "epoch": 0.8679422515366237, "grad_norm": 0.5641547745325012, "learning_rate": 0.00012594294345134473, "loss": 12.2976, "step": 15939 }, { "epoch": 0.8679967055332067, "grad_norm": 0.5748166546920048, "learning_rate": 0.00012593442701126247, "loss": 12.0867, "step": 15940 }, { "epoch": 0.8680511595297897, "grad_norm": 0.5416784728172392, "learning_rate": 0.00012592591036950974, "loss": 12.2147, "step": 15941 }, { "epoch": 0.8681056135263727, "grad_norm": 0.5389782673423665, "learning_rate": 0.00012591739352615282, "loss": 12.2652, "step": 15942 }, { "epoch": 0.8681600675229557, "grad_norm": 0.5646252422136855, "learning_rate": 0.0001259088764812579, "loss": 12.2263, "step": 15943 }, { "epoch": 0.8682145215195388, "grad_norm": 0.5201360681378036, "learning_rate": 0.00012590035923489115, "loss": 12.0908, "step": 15944 }, { "epoch": 0.8682689755161218, "grad_norm": 0.5889451038920502, "learning_rate": 0.00012589184178711887, "loss": 12.0581, "step": 15945 }, { "epoch": 0.8683234295127048, "grad_norm": 0.5434486158643616, "learning_rate": 0.00012588332413800734, "loss": 12.3294, "step": 15946 }, { "epoch": 0.8683778835092878, "grad_norm": 0.5894799008113736, "learning_rate": 0.0001258748062876227, "loss": 12.3475, "step": 15947 }, { "epoch": 0.8684323375058708, "grad_norm": 0.5559143802835625, "learning_rate": 0.0001258662882360313, "loss": 12.1807, "step": 15948 }, { "epoch": 0.8684867915024538, "grad_norm": 0.5751187142046755, "learning_rate": 0.00012585776998329923, "loss": 12.2895, "step": 15949 }, { "epoch": 0.8685412454990369, "grad_norm": 0.583166156976266, "learning_rate": 0.0001258492515294928, "loss": 12.374, "step": 15950 }, { "epoch": 0.8685956994956199, "grad_norm": 0.5732070056996998, "learning_rate": 0.0001258407328746783, "loss": 12.1528, "step": 15951 }, { "epoch": 0.8686501534922029, "grad_norm": 0.5332503377591635, "learning_rate": 0.0001258322140189219, "loss": 12.0977, "step": 15952 }, { "epoch": 0.8687046074887859, "grad_norm": 0.5375694322894206, "learning_rate": 0.0001258236949622899, "loss": 12.1752, "step": 15953 }, { "epoch": 0.8687590614853689, "grad_norm": 0.5557590566613545, "learning_rate": 0.0001258151757048485, "loss": 12.177, "step": 15954 }, { "epoch": 0.868813515481952, "grad_norm": 0.5326035517730949, "learning_rate": 0.00012580665624666395, "loss": 12.1465, "step": 15955 }, { "epoch": 0.868867969478535, "grad_norm": 0.5769864726318715, "learning_rate": 0.0001257981365878025, "loss": 12.2239, "step": 15956 }, { "epoch": 0.8689224234751179, "grad_norm": 0.5476567244141276, "learning_rate": 0.00012578961672833044, "loss": 12.2517, "step": 15957 }, { "epoch": 0.8689768774717009, "grad_norm": 0.591401878428727, "learning_rate": 0.00012578109666831403, "loss": 12.1768, "step": 15958 }, { "epoch": 0.8690313314682839, "grad_norm": 0.5794924956390661, "learning_rate": 0.00012577257640781944, "loss": 12.2739, "step": 15959 }, { "epoch": 0.8690857854648669, "grad_norm": 0.5852044421285363, "learning_rate": 0.00012576405594691298, "loss": 12.1281, "step": 15960 }, { "epoch": 0.86914023946145, "grad_norm": 0.5417282944487868, "learning_rate": 0.00012575553528566092, "loss": 12.0221, "step": 15961 }, { "epoch": 0.869194693458033, "grad_norm": 0.5253617600990087, "learning_rate": 0.00012574701442412945, "loss": 12.2346, "step": 15962 }, { "epoch": 0.869249147454616, "grad_norm": 0.587901673765946, "learning_rate": 0.00012573849336238496, "loss": 12.1188, "step": 15963 }, { "epoch": 0.869303601451199, "grad_norm": 0.5501555510980765, "learning_rate": 0.00012572997210049354, "loss": 12.2694, "step": 15964 }, { "epoch": 0.869358055447782, "grad_norm": 0.5475981794367448, "learning_rate": 0.00012572145063852161, "loss": 12.2831, "step": 15965 }, { "epoch": 0.869412509444365, "grad_norm": 0.5385544190151177, "learning_rate": 0.00012571292897653534, "loss": 12.2199, "step": 15966 }, { "epoch": 0.8694669634409481, "grad_norm": 0.5752778166783943, "learning_rate": 0.000125704407114601, "loss": 12.2106, "step": 15967 }, { "epoch": 0.8695214174375311, "grad_norm": 0.5854288021900113, "learning_rate": 0.0001256958850527849, "loss": 12.2438, "step": 15968 }, { "epoch": 0.8695758714341141, "grad_norm": 0.5484390545530441, "learning_rate": 0.00012568736279115332, "loss": 12.211, "step": 15969 }, { "epoch": 0.8696303254306971, "grad_norm": 0.5633289860451813, "learning_rate": 0.00012567884032977245, "loss": 12.2505, "step": 15970 }, { "epoch": 0.8696847794272801, "grad_norm": 0.5497065293508597, "learning_rate": 0.00012567031766870865, "loss": 12.1282, "step": 15971 }, { "epoch": 0.8697392334238631, "grad_norm": 0.5536133855620301, "learning_rate": 0.00012566179480802812, "loss": 12.1886, "step": 15972 }, { "epoch": 0.8697936874204462, "grad_norm": 0.525991757190078, "learning_rate": 0.0001256532717477972, "loss": 12.1135, "step": 15973 }, { "epoch": 0.8698481414170292, "grad_norm": 0.5349464762038328, "learning_rate": 0.00012564474848808212, "loss": 12.248, "step": 15974 }, { "epoch": 0.8699025954136121, "grad_norm": 0.5385765964720809, "learning_rate": 0.00012563622502894918, "loss": 12.0118, "step": 15975 }, { "epoch": 0.8699570494101951, "grad_norm": 0.559178233160023, "learning_rate": 0.00012562770137046464, "loss": 12.1902, "step": 15976 }, { "epoch": 0.8700115034067781, "grad_norm": 0.5540507729228215, "learning_rate": 0.00012561917751269483, "loss": 12.1979, "step": 15977 }, { "epoch": 0.8700659574033611, "grad_norm": 0.5140207796261256, "learning_rate": 0.00012561065345570598, "loss": 12.0578, "step": 15978 }, { "epoch": 0.8701204113999442, "grad_norm": 0.5288952409117764, "learning_rate": 0.00012560212919956437, "loss": 12.2029, "step": 15979 }, { "epoch": 0.8701748653965272, "grad_norm": 0.5725414765013682, "learning_rate": 0.00012559360474433636, "loss": 12.1941, "step": 15980 }, { "epoch": 0.8702293193931102, "grad_norm": 0.5931801761743127, "learning_rate": 0.00012558508009008812, "loss": 12.2551, "step": 15981 }, { "epoch": 0.8702837733896932, "grad_norm": 0.5256172262315336, "learning_rate": 0.00012557655523688608, "loss": 12.123, "step": 15982 }, { "epoch": 0.8703382273862762, "grad_norm": 0.5238440893579909, "learning_rate": 0.00012556803018479642, "loss": 12.2338, "step": 15983 }, { "epoch": 0.8703926813828592, "grad_norm": 0.5536812282601996, "learning_rate": 0.00012555950493388547, "loss": 12.2798, "step": 15984 }, { "epoch": 0.8704471353794423, "grad_norm": 0.5504843169868991, "learning_rate": 0.00012555097948421952, "loss": 12.1371, "step": 15985 }, { "epoch": 0.8705015893760253, "grad_norm": 0.5987785474860935, "learning_rate": 0.00012554245383586488, "loss": 12.3446, "step": 15986 }, { "epoch": 0.8705560433726083, "grad_norm": 0.5325786338129379, "learning_rate": 0.00012553392798888785, "loss": 11.9497, "step": 15987 }, { "epoch": 0.8706104973691913, "grad_norm": 0.5434399515345769, "learning_rate": 0.00012552540194335466, "loss": 12.2137, "step": 15988 }, { "epoch": 0.8706649513657743, "grad_norm": 0.5620219933284507, "learning_rate": 0.00012551687569933173, "loss": 12.2886, "step": 15989 }, { "epoch": 0.8707194053623574, "grad_norm": 0.5546283490990715, "learning_rate": 0.00012550834925688525, "loss": 12.1043, "step": 15990 }, { "epoch": 0.8707738593589404, "grad_norm": 0.47488405181773846, "learning_rate": 0.0001254998226160816, "loss": 12.0254, "step": 15991 }, { "epoch": 0.8708283133555234, "grad_norm": 0.5528150439905408, "learning_rate": 0.000125491295776987, "loss": 12.0765, "step": 15992 }, { "epoch": 0.8708827673521063, "grad_norm": 0.5498831187915221, "learning_rate": 0.0001254827687396679, "loss": 12.0998, "step": 15993 }, { "epoch": 0.8709372213486893, "grad_norm": 0.543385726227821, "learning_rate": 0.00012547424150419044, "loss": 12.2286, "step": 15994 }, { "epoch": 0.8709916753452723, "grad_norm": 0.5621492505997553, "learning_rate": 0.00012546571407062103, "loss": 12.2143, "step": 15995 }, { "epoch": 0.8710461293418554, "grad_norm": 0.5433169361833831, "learning_rate": 0.00012545718643902594, "loss": 12.2198, "step": 15996 }, { "epoch": 0.8711005833384384, "grad_norm": 0.5223570553806087, "learning_rate": 0.0001254486586094715, "loss": 12.0896, "step": 15997 }, { "epoch": 0.8711550373350214, "grad_norm": 0.5249600347028466, "learning_rate": 0.00012544013058202405, "loss": 12.1902, "step": 15998 }, { "epoch": 0.8712094913316044, "grad_norm": 0.5305491138861335, "learning_rate": 0.00012543160235674986, "loss": 12.0778, "step": 15999 }, { "epoch": 0.8712639453281874, "grad_norm": 0.5129323620281291, "learning_rate": 0.00012542307393371526, "loss": 12.1419, "step": 16000 }, { "epoch": 0.8713183993247704, "grad_norm": 0.6005913292330536, "learning_rate": 0.0001254145453129866, "loss": 12.0793, "step": 16001 }, { "epoch": 0.8713728533213535, "grad_norm": 0.5728093693687083, "learning_rate": 0.00012540601649463015, "loss": 12.123, "step": 16002 }, { "epoch": 0.8714273073179365, "grad_norm": 0.5636253436070369, "learning_rate": 0.00012539748747871228, "loss": 12.1248, "step": 16003 }, { "epoch": 0.8714817613145195, "grad_norm": 0.5975695198443635, "learning_rate": 0.0001253889582652993, "loss": 12.1997, "step": 16004 }, { "epoch": 0.8715362153111025, "grad_norm": 0.5694835079399884, "learning_rate": 0.00012538042885445745, "loss": 12.2186, "step": 16005 }, { "epoch": 0.8715906693076855, "grad_norm": 0.533297799927533, "learning_rate": 0.00012537189924625316, "loss": 12.2206, "step": 16006 }, { "epoch": 0.8716451233042685, "grad_norm": 0.5422775699384693, "learning_rate": 0.00012536336944075276, "loss": 12.1786, "step": 16007 }, { "epoch": 0.8716995773008516, "grad_norm": 0.5741067704232433, "learning_rate": 0.00012535483943802253, "loss": 12.2374, "step": 16008 }, { "epoch": 0.8717540312974346, "grad_norm": 0.539661125867655, "learning_rate": 0.0001253463092381288, "loss": 12.1391, "step": 16009 }, { "epoch": 0.8718084852940176, "grad_norm": 0.5816111108862853, "learning_rate": 0.00012533777884113793, "loss": 12.1623, "step": 16010 }, { "epoch": 0.8718629392906005, "grad_norm": 0.5311156149067617, "learning_rate": 0.00012532924824711623, "loss": 12.2057, "step": 16011 }, { "epoch": 0.8719173932871835, "grad_norm": 0.5295481418916593, "learning_rate": 0.00012532071745613007, "loss": 12.1391, "step": 16012 }, { "epoch": 0.8719718472837665, "grad_norm": 0.6036077984497444, "learning_rate": 0.00012531218646824577, "loss": 12.1833, "step": 16013 }, { "epoch": 0.8720263012803496, "grad_norm": 0.5358288325689889, "learning_rate": 0.00012530365528352964, "loss": 12.1514, "step": 16014 }, { "epoch": 0.8720807552769326, "grad_norm": 0.6269518042813348, "learning_rate": 0.00012529512390204808, "loss": 12.2951, "step": 16015 }, { "epoch": 0.8721352092735156, "grad_norm": 0.6034229536886009, "learning_rate": 0.00012528659232386732, "loss": 12.2408, "step": 16016 }, { "epoch": 0.8721896632700986, "grad_norm": 0.5514941628900512, "learning_rate": 0.00012527806054905382, "loss": 12.0462, "step": 16017 }, { "epoch": 0.8722441172666816, "grad_norm": 0.6823523013440215, "learning_rate": 0.0001252695285776739, "loss": 12.3112, "step": 16018 }, { "epoch": 0.8722985712632646, "grad_norm": 0.6098942419908867, "learning_rate": 0.0001252609964097939, "loss": 12.175, "step": 16019 }, { "epoch": 0.8723530252598477, "grad_norm": 0.5719599376070134, "learning_rate": 0.00012525246404548011, "loss": 12.1688, "step": 16020 }, { "epoch": 0.8724074792564307, "grad_norm": 0.5623532728458146, "learning_rate": 0.00012524393148479897, "loss": 12.0794, "step": 16021 }, { "epoch": 0.8724619332530137, "grad_norm": 0.5240231925996959, "learning_rate": 0.0001252353987278167, "loss": 12.1055, "step": 16022 }, { "epoch": 0.8725163872495967, "grad_norm": 0.6506632285409836, "learning_rate": 0.00012522686577459984, "loss": 12.2739, "step": 16023 }, { "epoch": 0.8725708412461797, "grad_norm": 0.5723476929353505, "learning_rate": 0.00012521833262521458, "loss": 12.1607, "step": 16024 }, { "epoch": 0.8726252952427628, "grad_norm": 0.6015507472500625, "learning_rate": 0.00012520979927972738, "loss": 12.2483, "step": 16025 }, { "epoch": 0.8726797492393458, "grad_norm": 0.5956510012054886, "learning_rate": 0.00012520126573820452, "loss": 12.2262, "step": 16026 }, { "epoch": 0.8727342032359288, "grad_norm": 0.5526715767225209, "learning_rate": 0.00012519273200071237, "loss": 12.1711, "step": 16027 }, { "epoch": 0.8727886572325118, "grad_norm": 0.5974678518446006, "learning_rate": 0.00012518419806731735, "loss": 12.2869, "step": 16028 }, { "epoch": 0.8728431112290947, "grad_norm": 0.5520163993498713, "learning_rate": 0.0001251756639380858, "loss": 12.0081, "step": 16029 }, { "epoch": 0.8728975652256777, "grad_norm": 0.5639711796095425, "learning_rate": 0.00012516712961308402, "loss": 12.2842, "step": 16030 }, { "epoch": 0.8729520192222608, "grad_norm": 0.5580007563756508, "learning_rate": 0.00012515859509237845, "loss": 12.3199, "step": 16031 }, { "epoch": 0.8730064732188438, "grad_norm": 0.6335581879602564, "learning_rate": 0.0001251500603760354, "loss": 12.2911, "step": 16032 }, { "epoch": 0.8730609272154268, "grad_norm": 0.6078406349282465, "learning_rate": 0.00012514152546412127, "loss": 12.1284, "step": 16033 }, { "epoch": 0.8731153812120098, "grad_norm": 0.5154839706557784, "learning_rate": 0.00012513299035670246, "loss": 12.1873, "step": 16034 }, { "epoch": 0.8731698352085928, "grad_norm": 0.5187457942704194, "learning_rate": 0.00012512445505384525, "loss": 12.1551, "step": 16035 }, { "epoch": 0.8732242892051758, "grad_norm": 0.6452981420258556, "learning_rate": 0.00012511591955561608, "loss": 12.188, "step": 16036 }, { "epoch": 0.8732787432017589, "grad_norm": 0.6740558251184862, "learning_rate": 0.00012510738386208132, "loss": 12.3964, "step": 16037 }, { "epoch": 0.8733331971983419, "grad_norm": 0.5663356963926833, "learning_rate": 0.00012509884797330732, "loss": 12.0535, "step": 16038 }, { "epoch": 0.8733876511949249, "grad_norm": 0.5878177856957262, "learning_rate": 0.00012509031188936046, "loss": 12.0532, "step": 16039 }, { "epoch": 0.8734421051915079, "grad_norm": 0.5952682212176018, "learning_rate": 0.00012508177561030716, "loss": 12.1387, "step": 16040 }, { "epoch": 0.8734965591880909, "grad_norm": 0.5413794683777703, "learning_rate": 0.00012507323913621375, "loss": 12.1888, "step": 16041 }, { "epoch": 0.8735510131846739, "grad_norm": 0.5450852611443711, "learning_rate": 0.0001250647024671466, "loss": 12.1893, "step": 16042 }, { "epoch": 0.873605467181257, "grad_norm": 0.574623152064138, "learning_rate": 0.00012505616560317217, "loss": 12.2776, "step": 16043 }, { "epoch": 0.87365992117784, "grad_norm": 0.4820315833893964, "learning_rate": 0.00012504762854435676, "loss": 12.0415, "step": 16044 }, { "epoch": 0.873714375174423, "grad_norm": 0.5191268713251201, "learning_rate": 0.0001250390912907668, "loss": 12.2193, "step": 16045 }, { "epoch": 0.873768829171006, "grad_norm": 0.5708885380831076, "learning_rate": 0.00012503055384246867, "loss": 12.1649, "step": 16046 }, { "epoch": 0.873823283167589, "grad_norm": 0.626996981524449, "learning_rate": 0.00012502201619952875, "loss": 12.1362, "step": 16047 }, { "epoch": 0.8738777371641719, "grad_norm": 0.582192853950474, "learning_rate": 0.00012501347836201343, "loss": 12.1748, "step": 16048 }, { "epoch": 0.873932191160755, "grad_norm": 0.5500699566784814, "learning_rate": 0.0001250049403299891, "loss": 12.2157, "step": 16049 }, { "epoch": 0.873986645157338, "grad_norm": 0.5405585547116991, "learning_rate": 0.00012499640210352219, "loss": 12.212, "step": 16050 }, { "epoch": 0.874041099153921, "grad_norm": 0.5180377431941692, "learning_rate": 0.00012498786368267905, "loss": 12.1162, "step": 16051 }, { "epoch": 0.874095553150504, "grad_norm": 0.5348153153227052, "learning_rate": 0.0001249793250675261, "loss": 12.1576, "step": 16052 }, { "epoch": 0.874150007147087, "grad_norm": 0.604938726107424, "learning_rate": 0.00012497078625812975, "loss": 12.0198, "step": 16053 }, { "epoch": 0.87420446114367, "grad_norm": 0.5422674776228027, "learning_rate": 0.00012496224725455632, "loss": 12.1241, "step": 16054 }, { "epoch": 0.8742589151402531, "grad_norm": 0.5538596352364217, "learning_rate": 0.0001249537080568723, "loss": 12.0372, "step": 16055 }, { "epoch": 0.8743133691368361, "grad_norm": 0.5431686420588868, "learning_rate": 0.00012494516866514406, "loss": 12.187, "step": 16056 }, { "epoch": 0.8743678231334191, "grad_norm": 0.5386989318844304, "learning_rate": 0.000124936629079438, "loss": 12.2008, "step": 16057 }, { "epoch": 0.8744222771300021, "grad_norm": 0.544736835791952, "learning_rate": 0.00012492808929982056, "loss": 12.3372, "step": 16058 }, { "epoch": 0.8744767311265851, "grad_norm": 0.6666965240807943, "learning_rate": 0.0001249195493263581, "loss": 12.1557, "step": 16059 }, { "epoch": 0.8745311851231682, "grad_norm": 0.5583615651695085, "learning_rate": 0.00012491100915911702, "loss": 12.3522, "step": 16060 }, { "epoch": 0.8745856391197512, "grad_norm": 0.6220010193284105, "learning_rate": 0.00012490246879816376, "loss": 12.2049, "step": 16061 }, { "epoch": 0.8746400931163342, "grad_norm": 0.5007739341726704, "learning_rate": 0.00012489392824356475, "loss": 12.1438, "step": 16062 }, { "epoch": 0.8746945471129172, "grad_norm": 0.51788794478555, "learning_rate": 0.0001248853874953864, "loss": 12.1486, "step": 16063 }, { "epoch": 0.8747490011095002, "grad_norm": 0.5350124014753489, "learning_rate": 0.00012487684655369507, "loss": 12.1837, "step": 16064 }, { "epoch": 0.8748034551060831, "grad_norm": 0.5369649604490867, "learning_rate": 0.00012486830541855718, "loss": 12.2681, "step": 16065 }, { "epoch": 0.8748579091026663, "grad_norm": 0.536904150078264, "learning_rate": 0.0001248597640900392, "loss": 12.1093, "step": 16066 }, { "epoch": 0.8749123630992492, "grad_norm": 0.5389369443292675, "learning_rate": 0.00012485122256820756, "loss": 11.9681, "step": 16067 }, { "epoch": 0.8749668170958322, "grad_norm": 0.5705666993112773, "learning_rate": 0.00012484268085312863, "loss": 12.1541, "step": 16068 }, { "epoch": 0.8750212710924152, "grad_norm": 0.5060632652893851, "learning_rate": 0.00012483413894486884, "loss": 12.2575, "step": 16069 }, { "epoch": 0.8750757250889982, "grad_norm": 0.5912434449471482, "learning_rate": 0.00012482559684349461, "loss": 12.1034, "step": 16070 }, { "epoch": 0.8751301790855812, "grad_norm": 0.5795383894091256, "learning_rate": 0.00012481705454907237, "loss": 12.038, "step": 16071 }, { "epoch": 0.8751846330821643, "grad_norm": 0.5638125410718151, "learning_rate": 0.00012480851206166858, "loss": 12.1461, "step": 16072 }, { "epoch": 0.8752390870787473, "grad_norm": 0.5825779199294095, "learning_rate": 0.00012479996938134964, "loss": 12.0403, "step": 16073 }, { "epoch": 0.8752935410753303, "grad_norm": 0.5298141174132485, "learning_rate": 0.00012479142650818195, "loss": 12.0994, "step": 16074 }, { "epoch": 0.8753479950719133, "grad_norm": 0.6573123207917692, "learning_rate": 0.00012478288344223198, "loss": 12.3921, "step": 16075 }, { "epoch": 0.8754024490684963, "grad_norm": 0.6068568486815071, "learning_rate": 0.00012477434018356615, "loss": 12.1627, "step": 16076 }, { "epoch": 0.8754569030650793, "grad_norm": 0.609707243377987, "learning_rate": 0.0001247657967322509, "loss": 12.0332, "step": 16077 }, { "epoch": 0.8755113570616624, "grad_norm": 0.5680797285820538, "learning_rate": 0.00012475725308835268, "loss": 12.1601, "step": 16078 }, { "epoch": 0.8755658110582454, "grad_norm": 0.5526014221770983, "learning_rate": 0.0001247487092519379, "loss": 12.1961, "step": 16079 }, { "epoch": 0.8756202650548284, "grad_norm": 0.6115334683613696, "learning_rate": 0.00012474016522307302, "loss": 12.2713, "step": 16080 }, { "epoch": 0.8756747190514114, "grad_norm": 0.6858296789782814, "learning_rate": 0.00012473162100182442, "loss": 12.1809, "step": 16081 }, { "epoch": 0.8757291730479944, "grad_norm": 0.5441928771376101, "learning_rate": 0.0001247230765882586, "loss": 12.1297, "step": 16082 }, { "epoch": 0.8757836270445774, "grad_norm": 0.5270975788304015, "learning_rate": 0.00012471453198244204, "loss": 12.0124, "step": 16083 }, { "epoch": 0.8758380810411605, "grad_norm": 0.5791745606955657, "learning_rate": 0.0001247059871844411, "loss": 12.0933, "step": 16084 }, { "epoch": 0.8758925350377434, "grad_norm": 0.7093793546578929, "learning_rate": 0.00012469744219432226, "loss": 12.3378, "step": 16085 }, { "epoch": 0.8759469890343264, "grad_norm": 0.5754791802733077, "learning_rate": 0.00012468889701215197, "loss": 12.1667, "step": 16086 }, { "epoch": 0.8760014430309094, "grad_norm": 0.5710204967149822, "learning_rate": 0.00012468035163799667, "loss": 12.143, "step": 16087 }, { "epoch": 0.8760558970274924, "grad_norm": 0.5749549445115179, "learning_rate": 0.0001246718060719228, "loss": 12.1046, "step": 16088 }, { "epoch": 0.8761103510240754, "grad_norm": 0.5773812106423682, "learning_rate": 0.00012466326031399688, "loss": 12.2384, "step": 16089 }, { "epoch": 0.8761648050206585, "grad_norm": 0.5467160672068934, "learning_rate": 0.00012465471436428526, "loss": 12.1804, "step": 16090 }, { "epoch": 0.8762192590172415, "grad_norm": 0.5905830453636292, "learning_rate": 0.00012464616822285447, "loss": 12.0921, "step": 16091 }, { "epoch": 0.8762737130138245, "grad_norm": 0.48989163227231247, "learning_rate": 0.00012463762188977094, "loss": 11.9986, "step": 16092 }, { "epoch": 0.8763281670104075, "grad_norm": 0.6316447646107993, "learning_rate": 0.0001246290753651011, "loss": 12.1741, "step": 16093 }, { "epoch": 0.8763826210069905, "grad_norm": 0.5579716652402554, "learning_rate": 0.0001246205286489115, "loss": 12.1777, "step": 16094 }, { "epoch": 0.8764370750035736, "grad_norm": 0.5929747326262457, "learning_rate": 0.00012461198174126852, "loss": 12.3101, "step": 16095 }, { "epoch": 0.8764915290001566, "grad_norm": 0.5366363838568602, "learning_rate": 0.00012460343464223864, "loss": 12.1976, "step": 16096 }, { "epoch": 0.8765459829967396, "grad_norm": 0.5221075684624025, "learning_rate": 0.00012459488735188832, "loss": 11.9716, "step": 16097 }, { "epoch": 0.8766004369933226, "grad_norm": 0.5476321426506422, "learning_rate": 0.000124586339870284, "loss": 12.1252, "step": 16098 }, { "epoch": 0.8766548909899056, "grad_norm": 0.5792030541270509, "learning_rate": 0.0001245777921974922, "loss": 12.2472, "step": 16099 }, { "epoch": 0.8767093449864886, "grad_norm": 0.6985294307629022, "learning_rate": 0.0001245692443335794, "loss": 12.1955, "step": 16100 }, { "epoch": 0.8767637989830717, "grad_norm": 0.5577872331396814, "learning_rate": 0.000124560696278612, "loss": 12.0629, "step": 16101 }, { "epoch": 0.8768182529796547, "grad_norm": 0.5853442110348637, "learning_rate": 0.00012455214803265652, "loss": 12.1465, "step": 16102 }, { "epoch": 0.8768727069762376, "grad_norm": 0.5525518738121004, "learning_rate": 0.0001245435995957794, "loss": 12.1607, "step": 16103 }, { "epoch": 0.8769271609728206, "grad_norm": 0.5284200111715768, "learning_rate": 0.0001245350509680472, "loss": 12.204, "step": 16104 }, { "epoch": 0.8769816149694036, "grad_norm": 0.6105951133886501, "learning_rate": 0.00012452650214952624, "loss": 12.2329, "step": 16105 }, { "epoch": 0.8770360689659866, "grad_norm": 0.538846310148232, "learning_rate": 0.00012451795314028313, "loss": 11.8713, "step": 16106 }, { "epoch": 0.8770905229625697, "grad_norm": 0.5356062524521141, "learning_rate": 0.0001245094039403843, "loss": 12.1835, "step": 16107 }, { "epoch": 0.8771449769591527, "grad_norm": 0.5572700303346961, "learning_rate": 0.00012450085454989625, "loss": 12.0499, "step": 16108 }, { "epoch": 0.8771994309557357, "grad_norm": 0.6277722706686351, "learning_rate": 0.00012449230496888543, "loss": 12.2874, "step": 16109 }, { "epoch": 0.8772538849523187, "grad_norm": 0.5989562610312907, "learning_rate": 0.00012448375519741835, "loss": 12.2477, "step": 16110 }, { "epoch": 0.8773083389489017, "grad_norm": 0.591640548205753, "learning_rate": 0.00012447520523556146, "loss": 12.2729, "step": 16111 }, { "epoch": 0.8773627929454847, "grad_norm": 0.5043005990234394, "learning_rate": 0.00012446665508338128, "loss": 12.0538, "step": 16112 }, { "epoch": 0.8774172469420678, "grad_norm": 0.5474216805855566, "learning_rate": 0.0001244581047409443, "loss": 12.2178, "step": 16113 }, { "epoch": 0.8774717009386508, "grad_norm": 0.5857427999025753, "learning_rate": 0.000124449554208317, "loss": 12.162, "step": 16114 }, { "epoch": 0.8775261549352338, "grad_norm": 0.5805090016549473, "learning_rate": 0.00012444100348556585, "loss": 12.2238, "step": 16115 }, { "epoch": 0.8775806089318168, "grad_norm": 0.6105765736875828, "learning_rate": 0.00012443245257275735, "loss": 12.2629, "step": 16116 }, { "epoch": 0.8776350629283998, "grad_norm": 0.518552692809577, "learning_rate": 0.000124423901469958, "loss": 12.0606, "step": 16117 }, { "epoch": 0.8776895169249828, "grad_norm": 0.5763076452780304, "learning_rate": 0.00012441535017723433, "loss": 12.1436, "step": 16118 }, { "epoch": 0.8777439709215659, "grad_norm": 0.5341231124511039, "learning_rate": 0.0001244067986946528, "loss": 12.191, "step": 16119 }, { "epoch": 0.8777984249181489, "grad_norm": 0.5500435671005613, "learning_rate": 0.00012439824702227987, "loss": 12.2206, "step": 16120 }, { "epoch": 0.8778528789147318, "grad_norm": 0.5525902266037133, "learning_rate": 0.0001243896951601821, "loss": 12.1845, "step": 16121 }, { "epoch": 0.8779073329113148, "grad_norm": 0.6192969566452226, "learning_rate": 0.00012438114310842598, "loss": 12.2346, "step": 16122 }, { "epoch": 0.8779617869078978, "grad_norm": 0.5553910515782029, "learning_rate": 0.000124372590867078, "loss": 12.2288, "step": 16123 }, { "epoch": 0.8780162409044809, "grad_norm": 0.5477482552336438, "learning_rate": 0.00012436403843620468, "loss": 12.2031, "step": 16124 }, { "epoch": 0.8780706949010639, "grad_norm": 0.5097790965963074, "learning_rate": 0.00012435548581587246, "loss": 12.1158, "step": 16125 }, { "epoch": 0.8781251488976469, "grad_norm": 0.6096381870021689, "learning_rate": 0.00012434693300614793, "loss": 12.2922, "step": 16126 }, { "epoch": 0.8781796028942299, "grad_norm": 0.5502228426678689, "learning_rate": 0.00012433838000709755, "loss": 12.1405, "step": 16127 }, { "epoch": 0.8782340568908129, "grad_norm": 0.6006586891159624, "learning_rate": 0.0001243298268187879, "loss": 12.3248, "step": 16128 }, { "epoch": 0.8782885108873959, "grad_norm": 0.5581646276150829, "learning_rate": 0.0001243212734412854, "loss": 12.1715, "step": 16129 }, { "epoch": 0.878342964883979, "grad_norm": 0.5495172790231901, "learning_rate": 0.00012431271987465661, "loss": 12.2233, "step": 16130 }, { "epoch": 0.878397418880562, "grad_norm": 0.5375101576174629, "learning_rate": 0.00012430416611896797, "loss": 12.1081, "step": 16131 }, { "epoch": 0.878451872877145, "grad_norm": 0.5300128739993558, "learning_rate": 0.0001242956121742861, "loss": 12.2207, "step": 16132 }, { "epoch": 0.878506326873728, "grad_norm": 0.5687779995223394, "learning_rate": 0.00012428705804067751, "loss": 12.189, "step": 16133 }, { "epoch": 0.878560780870311, "grad_norm": 0.5836162326318499, "learning_rate": 0.00012427850371820868, "loss": 12.2623, "step": 16134 }, { "epoch": 0.878615234866894, "grad_norm": 0.5308718848591922, "learning_rate": 0.0001242699492069461, "loss": 12.2993, "step": 16135 }, { "epoch": 0.8786696888634771, "grad_norm": 0.49513660224252215, "learning_rate": 0.00012426139450695634, "loss": 12.0951, "step": 16136 }, { "epoch": 0.8787241428600601, "grad_norm": 0.5178325865330834, "learning_rate": 0.0001242528396183059, "loss": 11.9599, "step": 16137 }, { "epoch": 0.8787785968566431, "grad_norm": 0.5969490784962753, "learning_rate": 0.00012424428454106128, "loss": 12.2505, "step": 16138 }, { "epoch": 0.878833050853226, "grad_norm": 0.5901651262878582, "learning_rate": 0.0001242357292752891, "loss": 12.2431, "step": 16139 }, { "epoch": 0.878887504849809, "grad_norm": 0.5301687700694988, "learning_rate": 0.00012422717382105583, "loss": 12.1567, "step": 16140 }, { "epoch": 0.878941958846392, "grad_norm": 0.5653153214152838, "learning_rate": 0.00012421861817842796, "loss": 12.2456, "step": 16141 }, { "epoch": 0.8789964128429751, "grad_norm": 0.6632815726539283, "learning_rate": 0.000124210062347472, "loss": 12.4149, "step": 16142 }, { "epoch": 0.8790508668395581, "grad_norm": 0.5648761481147258, "learning_rate": 0.0001242015063282546, "loss": 12.2223, "step": 16143 }, { "epoch": 0.8791053208361411, "grad_norm": 0.5085759784843153, "learning_rate": 0.00012419295012084224, "loss": 12.2876, "step": 16144 }, { "epoch": 0.8791597748327241, "grad_norm": 0.5453487051823546, "learning_rate": 0.00012418439372530141, "loss": 12.1462, "step": 16145 }, { "epoch": 0.8792142288293071, "grad_norm": 0.5181223589736799, "learning_rate": 0.0001241758371416987, "loss": 12.0158, "step": 16146 }, { "epoch": 0.8792686828258901, "grad_norm": 0.677804920864788, "learning_rate": 0.00012416728037010062, "loss": 12.3514, "step": 16147 }, { "epoch": 0.8793231368224732, "grad_norm": 0.5147318760050673, "learning_rate": 0.00012415872341057369, "loss": 12.1635, "step": 16148 }, { "epoch": 0.8793775908190562, "grad_norm": 0.522595656999475, "learning_rate": 0.00012415016626318452, "loss": 12.0867, "step": 16149 }, { "epoch": 0.8794320448156392, "grad_norm": 0.6116936916835619, "learning_rate": 0.00012414160892799958, "loss": 12.1803, "step": 16150 }, { "epoch": 0.8794864988122222, "grad_norm": 0.5489823607717342, "learning_rate": 0.00012413305140508544, "loss": 12.1459, "step": 16151 }, { "epoch": 0.8795409528088052, "grad_norm": 0.5998587379305671, "learning_rate": 0.00012412449369450865, "loss": 12.0937, "step": 16152 }, { "epoch": 0.8795954068053882, "grad_norm": 0.6123606539421548, "learning_rate": 0.00012411593579633574, "loss": 12.1539, "step": 16153 }, { "epoch": 0.8796498608019713, "grad_norm": 0.6051188759788264, "learning_rate": 0.0001241073777106333, "loss": 12.173, "step": 16154 }, { "epoch": 0.8797043147985543, "grad_norm": 0.541487724905037, "learning_rate": 0.0001240988194374678, "loss": 12.0629, "step": 16155 }, { "epoch": 0.8797587687951373, "grad_norm": 0.5286488857519637, "learning_rate": 0.00012409026097690587, "loss": 12.237, "step": 16156 }, { "epoch": 0.8798132227917203, "grad_norm": 0.5222242290522313, "learning_rate": 0.00012408170232901404, "loss": 12.1247, "step": 16157 }, { "epoch": 0.8798676767883032, "grad_norm": 0.5605723823245377, "learning_rate": 0.00012407314349385885, "loss": 12.2575, "step": 16158 }, { "epoch": 0.8799221307848863, "grad_norm": 0.5311519997094444, "learning_rate": 0.00012406458447150685, "loss": 12.1567, "step": 16159 }, { "epoch": 0.8799765847814693, "grad_norm": 0.522106869753087, "learning_rate": 0.0001240560252620246, "loss": 12.1882, "step": 16160 }, { "epoch": 0.8800310387780523, "grad_norm": 0.5700146146601299, "learning_rate": 0.00012404746586547867, "loss": 11.8611, "step": 16161 }, { "epoch": 0.8800854927746353, "grad_norm": 0.5800248799485075, "learning_rate": 0.00012403890628193563, "loss": 12.0938, "step": 16162 }, { "epoch": 0.8801399467712183, "grad_norm": 0.5157443715077356, "learning_rate": 0.00012403034651146198, "loss": 12.1552, "step": 16163 }, { "epoch": 0.8801944007678013, "grad_norm": 0.5421930476489523, "learning_rate": 0.00012402178655412436, "loss": 12.1899, "step": 16164 }, { "epoch": 0.8802488547643844, "grad_norm": 0.6029714786139897, "learning_rate": 0.0001240132264099893, "loss": 12.1137, "step": 16165 }, { "epoch": 0.8803033087609674, "grad_norm": 0.5451626473064809, "learning_rate": 0.00012400466607912332, "loss": 12.1901, "step": 16166 }, { "epoch": 0.8803577627575504, "grad_norm": 0.5134452705606481, "learning_rate": 0.0001239961055615931, "loss": 12.1074, "step": 16167 }, { "epoch": 0.8804122167541334, "grad_norm": 0.5925524126284681, "learning_rate": 0.0001239875448574651, "loss": 12.3034, "step": 16168 }, { "epoch": 0.8804666707507164, "grad_norm": 0.5287324031552987, "learning_rate": 0.0001239789839668059, "loss": 12.2371, "step": 16169 }, { "epoch": 0.8805211247472994, "grad_norm": 0.5181531375923086, "learning_rate": 0.00012397042288968214, "loss": 12.079, "step": 16170 }, { "epoch": 0.8805755787438825, "grad_norm": 0.509508990530057, "learning_rate": 0.00012396186162616038, "loss": 11.9842, "step": 16171 }, { "epoch": 0.8806300327404655, "grad_norm": 0.5914236089719387, "learning_rate": 0.00012395330017630712, "loss": 12.2112, "step": 16172 }, { "epoch": 0.8806844867370485, "grad_norm": 0.5585576561169054, "learning_rate": 0.00012394473854018898, "loss": 12.0564, "step": 16173 }, { "epoch": 0.8807389407336315, "grad_norm": 0.5256569160661628, "learning_rate": 0.00012393617671787254, "loss": 12.1797, "step": 16174 }, { "epoch": 0.8807933947302145, "grad_norm": 0.5503036986398105, "learning_rate": 0.0001239276147094244, "loss": 12.1849, "step": 16175 }, { "epoch": 0.8808478487267974, "grad_norm": 0.5899224050988658, "learning_rate": 0.0001239190525149111, "loss": 12.2352, "step": 16176 }, { "epoch": 0.8809023027233805, "grad_norm": 0.5690699450700633, "learning_rate": 0.0001239104901343992, "loss": 12.114, "step": 16177 }, { "epoch": 0.8809567567199635, "grad_norm": 0.5769616797004105, "learning_rate": 0.00012390192756795538, "loss": 12.0492, "step": 16178 }, { "epoch": 0.8810112107165465, "grad_norm": 0.570383479686332, "learning_rate": 0.00012389336481564614, "loss": 12.1202, "step": 16179 }, { "epoch": 0.8810656647131295, "grad_norm": 0.5639287176449898, "learning_rate": 0.00012388480187753808, "loss": 12.0932, "step": 16180 }, { "epoch": 0.8811201187097125, "grad_norm": 0.5824002134140114, "learning_rate": 0.0001238762387536978, "loss": 12.2077, "step": 16181 }, { "epoch": 0.8811745727062955, "grad_norm": 0.5580492724272099, "learning_rate": 0.0001238676754441919, "loss": 12.1451, "step": 16182 }, { "epoch": 0.8812290267028786, "grad_norm": 0.5599035260414097, "learning_rate": 0.00012385911194908692, "loss": 12.1846, "step": 16183 }, { "epoch": 0.8812834806994616, "grad_norm": 0.5425513211680949, "learning_rate": 0.00012385054826844952, "loss": 11.9813, "step": 16184 }, { "epoch": 0.8813379346960446, "grad_norm": 0.5685357381558466, "learning_rate": 0.00012384198440234625, "loss": 12.0235, "step": 16185 }, { "epoch": 0.8813923886926276, "grad_norm": 0.5543533366808976, "learning_rate": 0.00012383342035084368, "loss": 12.1006, "step": 16186 }, { "epoch": 0.8814468426892106, "grad_norm": 0.5634053378083402, "learning_rate": 0.00012382485611400846, "loss": 12.1838, "step": 16187 }, { "epoch": 0.8815012966857936, "grad_norm": 0.6518959759414273, "learning_rate": 0.00012381629169190715, "loss": 12.2529, "step": 16188 }, { "epoch": 0.8815557506823767, "grad_norm": 0.6343989078058219, "learning_rate": 0.0001238077270846064, "loss": 12.1136, "step": 16189 }, { "epoch": 0.8816102046789597, "grad_norm": 0.5619803061551234, "learning_rate": 0.00012379916229217274, "loss": 12.0956, "step": 16190 }, { "epoch": 0.8816646586755427, "grad_norm": 0.5500800945417131, "learning_rate": 0.00012379059731467277, "loss": 12.1264, "step": 16191 }, { "epoch": 0.8817191126721257, "grad_norm": 0.6534795437272554, "learning_rate": 0.00012378203215217316, "loss": 12.1569, "step": 16192 }, { "epoch": 0.8817735666687087, "grad_norm": 0.5751586600273532, "learning_rate": 0.0001237734668047405, "loss": 12.1951, "step": 16193 }, { "epoch": 0.8818280206652918, "grad_norm": 0.5799723742902898, "learning_rate": 0.0001237649012724414, "loss": 12.2284, "step": 16194 }, { "epoch": 0.8818824746618747, "grad_norm": 0.5652758554051909, "learning_rate": 0.00012375633555534237, "loss": 12.078, "step": 16195 }, { "epoch": 0.8819369286584577, "grad_norm": 0.5470256921634873, "learning_rate": 0.00012374776965351012, "loss": 12.1292, "step": 16196 }, { "epoch": 0.8819913826550407, "grad_norm": 0.5500982540293085, "learning_rate": 0.00012373920356701122, "loss": 12.1825, "step": 16197 }, { "epoch": 0.8820458366516237, "grad_norm": 0.553900653584647, "learning_rate": 0.0001237306372959123, "loss": 12.1216, "step": 16198 }, { "epoch": 0.8821002906482067, "grad_norm": 0.5561992559435806, "learning_rate": 0.00012372207084027998, "loss": 12.2922, "step": 16199 }, { "epoch": 0.8821547446447898, "grad_norm": 0.5356433230478098, "learning_rate": 0.00012371350420018083, "loss": 12.1967, "step": 16200 }, { "epoch": 0.8822091986413728, "grad_norm": 0.5244114586518281, "learning_rate": 0.00012370493737568153, "loss": 12.2054, "step": 16201 }, { "epoch": 0.8822636526379558, "grad_norm": 0.6249332730815699, "learning_rate": 0.00012369637036684862, "loss": 12.1978, "step": 16202 }, { "epoch": 0.8823181066345388, "grad_norm": 0.5166215210519938, "learning_rate": 0.00012368780317374876, "loss": 12.172, "step": 16203 }, { "epoch": 0.8823725606311218, "grad_norm": 0.5655592203207452, "learning_rate": 0.00012367923579644863, "loss": 12.0116, "step": 16204 }, { "epoch": 0.8824270146277048, "grad_norm": 0.5342191190418638, "learning_rate": 0.00012367066823501475, "loss": 12.1477, "step": 16205 }, { "epoch": 0.8824814686242879, "grad_norm": 0.5122266335572258, "learning_rate": 0.0001236621004895138, "loss": 12.2533, "step": 16206 }, { "epoch": 0.8825359226208709, "grad_norm": 0.6474094156178377, "learning_rate": 0.00012365353256001238, "loss": 12.2779, "step": 16207 }, { "epoch": 0.8825903766174539, "grad_norm": 0.534846689296231, "learning_rate": 0.00012364496444657708, "loss": 12.1578, "step": 16208 }, { "epoch": 0.8826448306140369, "grad_norm": 0.5461817063501252, "learning_rate": 0.00012363639614927465, "loss": 12.2337, "step": 16209 }, { "epoch": 0.8826992846106199, "grad_norm": 0.5458809676123125, "learning_rate": 0.00012362782766817162, "loss": 12.2354, "step": 16210 }, { "epoch": 0.8827537386072029, "grad_norm": 0.49927285854458225, "learning_rate": 0.0001236192590033346, "loss": 12.107, "step": 16211 }, { "epoch": 0.882808192603786, "grad_norm": 0.5523903728560546, "learning_rate": 0.00012361069015483028, "loss": 12.1729, "step": 16212 }, { "epoch": 0.882862646600369, "grad_norm": 0.5428571481794514, "learning_rate": 0.00012360212112272527, "loss": 12.3167, "step": 16213 }, { "epoch": 0.8829171005969519, "grad_norm": 0.5616631045634104, "learning_rate": 0.00012359355190708622, "loss": 12.1382, "step": 16214 }, { "epoch": 0.8829715545935349, "grad_norm": 0.5735167151423458, "learning_rate": 0.00012358498250797975, "loss": 12.1902, "step": 16215 }, { "epoch": 0.8830260085901179, "grad_norm": 0.505655421612921, "learning_rate": 0.00012357641292547249, "loss": 12.1472, "step": 16216 }, { "epoch": 0.8830804625867009, "grad_norm": 0.5735619841639722, "learning_rate": 0.0001235678431596311, "loss": 12.1078, "step": 16217 }, { "epoch": 0.883134916583284, "grad_norm": 0.686628470289581, "learning_rate": 0.0001235592732105222, "loss": 12.2523, "step": 16218 }, { "epoch": 0.883189370579867, "grad_norm": 0.5126576113589593, "learning_rate": 0.00012355070307821245, "loss": 12.2123, "step": 16219 }, { "epoch": 0.88324382457645, "grad_norm": 0.6286287193684238, "learning_rate": 0.0001235421327627685, "loss": 12.2636, "step": 16220 }, { "epoch": 0.883298278573033, "grad_norm": 0.5610115455566963, "learning_rate": 0.00012353356226425693, "loss": 12.1751, "step": 16221 }, { "epoch": 0.883352732569616, "grad_norm": 0.5548192270939025, "learning_rate": 0.00012352499158274448, "loss": 12.2444, "step": 16222 }, { "epoch": 0.883407186566199, "grad_norm": 0.5182734831287024, "learning_rate": 0.0001235164207182977, "loss": 12.0495, "step": 16223 }, { "epoch": 0.8834616405627821, "grad_norm": 0.5282561495176271, "learning_rate": 0.00012350784967098333, "loss": 12.2166, "step": 16224 }, { "epoch": 0.8835160945593651, "grad_norm": 0.5400405941500195, "learning_rate": 0.000123499278440868, "loss": 12.1352, "step": 16225 }, { "epoch": 0.8835705485559481, "grad_norm": 0.5242033447130563, "learning_rate": 0.0001234907070280183, "loss": 12.1647, "step": 16226 }, { "epoch": 0.8836250025525311, "grad_norm": 0.5239109929409396, "learning_rate": 0.00012348213543250094, "loss": 12.1428, "step": 16227 }, { "epoch": 0.8836794565491141, "grad_norm": 0.5410506015404936, "learning_rate": 0.00012347356365438253, "loss": 12.1778, "step": 16228 }, { "epoch": 0.8837339105456972, "grad_norm": 0.6075298664685101, "learning_rate": 0.0001234649916937298, "loss": 12.1646, "step": 16229 }, { "epoch": 0.8837883645422802, "grad_norm": 0.5405876936004674, "learning_rate": 0.00012345641955060932, "loss": 12.2044, "step": 16230 }, { "epoch": 0.8838428185388632, "grad_norm": 0.5208606016764383, "learning_rate": 0.0001234478472250878, "loss": 12.0691, "step": 16231 }, { "epoch": 0.8838972725354461, "grad_norm": 0.5368623083267269, "learning_rate": 0.00012343927471723188, "loss": 12.2568, "step": 16232 }, { "epoch": 0.8839517265320291, "grad_norm": 0.5468750069143732, "learning_rate": 0.00012343070202710824, "loss": 12.0706, "step": 16233 }, { "epoch": 0.8840061805286121, "grad_norm": 0.5499608163084332, "learning_rate": 0.00012342212915478353, "loss": 12.2757, "step": 16234 }, { "epoch": 0.8840606345251952, "grad_norm": 0.5851504111453643, "learning_rate": 0.0001234135561003244, "loss": 12.2076, "step": 16235 }, { "epoch": 0.8841150885217782, "grad_norm": 0.5974130400887863, "learning_rate": 0.00012340498286379756, "loss": 12.1854, "step": 16236 }, { "epoch": 0.8841695425183612, "grad_norm": 0.5491304519499025, "learning_rate": 0.00012339640944526964, "loss": 12.2238, "step": 16237 }, { "epoch": 0.8842239965149442, "grad_norm": 0.5346073392197845, "learning_rate": 0.0001233878358448073, "loss": 12.202, "step": 16238 }, { "epoch": 0.8842784505115272, "grad_norm": 0.6736336593523841, "learning_rate": 0.00012337926206247723, "loss": 12.3677, "step": 16239 }, { "epoch": 0.8843329045081102, "grad_norm": 0.5083872491556979, "learning_rate": 0.0001233706880983461, "loss": 12.2147, "step": 16240 }, { "epoch": 0.8843873585046933, "grad_norm": 0.5800111367392793, "learning_rate": 0.00012336211395248058, "loss": 12.1883, "step": 16241 }, { "epoch": 0.8844418125012763, "grad_norm": 0.5869973957798797, "learning_rate": 0.00012335353962494736, "loss": 12.1675, "step": 16242 }, { "epoch": 0.8844962664978593, "grad_norm": 0.4909759782375525, "learning_rate": 0.0001233449651158131, "loss": 12.1226, "step": 16243 }, { "epoch": 0.8845507204944423, "grad_norm": 0.5963019301703091, "learning_rate": 0.00012333639042514446, "loss": 12.1958, "step": 16244 }, { "epoch": 0.8846051744910253, "grad_norm": 0.5483578119503779, "learning_rate": 0.00012332781555300816, "loss": 12.2078, "step": 16245 }, { "epoch": 0.8846596284876083, "grad_norm": 0.50012001467015, "learning_rate": 0.0001233192404994708, "loss": 12.1573, "step": 16246 }, { "epoch": 0.8847140824841914, "grad_norm": 0.5462820967201222, "learning_rate": 0.00012331066526459917, "loss": 12.1758, "step": 16247 }, { "epoch": 0.8847685364807744, "grad_norm": 0.5483925328240585, "learning_rate": 0.00012330208984845986, "loss": 12.1894, "step": 16248 }, { "epoch": 0.8848229904773574, "grad_norm": 0.5219144410997871, "learning_rate": 0.00012329351425111962, "loss": 12.1431, "step": 16249 }, { "epoch": 0.8848774444739403, "grad_norm": 0.5361374767689396, "learning_rate": 0.00012328493847264512, "loss": 12.1482, "step": 16250 }, { "epoch": 0.8849318984705233, "grad_norm": 0.5427429428242615, "learning_rate": 0.00012327636251310297, "loss": 12.258, "step": 16251 }, { "epoch": 0.8849863524671063, "grad_norm": 0.5912904179396163, "learning_rate": 0.00012326778637255996, "loss": 12.0507, "step": 16252 }, { "epoch": 0.8850408064636894, "grad_norm": 0.5378832184296238, "learning_rate": 0.00012325921005108275, "loss": 12.1293, "step": 16253 }, { "epoch": 0.8850952604602724, "grad_norm": 0.6089376138378523, "learning_rate": 0.000123250633548738, "loss": 12.1337, "step": 16254 }, { "epoch": 0.8851497144568554, "grad_norm": 0.5601034014331702, "learning_rate": 0.00012324205686559245, "loss": 12.2097, "step": 16255 }, { "epoch": 0.8852041684534384, "grad_norm": 0.5591657526431818, "learning_rate": 0.00012323348000171277, "loss": 12.0945, "step": 16256 }, { "epoch": 0.8852586224500214, "grad_norm": 0.5661957903672931, "learning_rate": 0.0001232249029571656, "loss": 12.1274, "step": 16257 }, { "epoch": 0.8853130764466045, "grad_norm": 0.5427624453408229, "learning_rate": 0.00012321632573201774, "loss": 12.1423, "step": 16258 }, { "epoch": 0.8853675304431875, "grad_norm": 0.6076214886273502, "learning_rate": 0.0001232077483263358, "loss": 12.1057, "step": 16259 }, { "epoch": 0.8854219844397705, "grad_norm": 0.5781501899130344, "learning_rate": 0.00012319917074018658, "loss": 12.1624, "step": 16260 }, { "epoch": 0.8854764384363535, "grad_norm": 0.5986244677879341, "learning_rate": 0.00012319059297363668, "loss": 12.2996, "step": 16261 }, { "epoch": 0.8855308924329365, "grad_norm": 0.6197660951665949, "learning_rate": 0.00012318201502675285, "loss": 12.154, "step": 16262 }, { "epoch": 0.8855853464295195, "grad_norm": 0.535054653933544, "learning_rate": 0.00012317343689960175, "loss": 12.0921, "step": 16263 }, { "epoch": 0.8856398004261026, "grad_norm": 0.565728316731146, "learning_rate": 0.00012316485859225016, "loss": 12.0625, "step": 16264 }, { "epoch": 0.8856942544226856, "grad_norm": 0.6148431799302818, "learning_rate": 0.0001231562801047647, "loss": 12.1517, "step": 16265 }, { "epoch": 0.8857487084192686, "grad_norm": 0.5625341975593025, "learning_rate": 0.00012314770143721218, "loss": 12.1513, "step": 16266 }, { "epoch": 0.8858031624158516, "grad_norm": 0.5596002114804742, "learning_rate": 0.00012313912258965924, "loss": 12.1375, "step": 16267 }, { "epoch": 0.8858576164124345, "grad_norm": 0.5337452297070369, "learning_rate": 0.00012313054356217256, "loss": 12.1935, "step": 16268 }, { "epoch": 0.8859120704090175, "grad_norm": 0.561215436744151, "learning_rate": 0.00012312196435481892, "loss": 12.2311, "step": 16269 }, { "epoch": 0.8859665244056006, "grad_norm": 0.701137105823549, "learning_rate": 0.000123113384967665, "loss": 12.1859, "step": 16270 }, { "epoch": 0.8860209784021836, "grad_norm": 0.6259806114999253, "learning_rate": 0.00012310480540077753, "loss": 12.2817, "step": 16271 }, { "epoch": 0.8860754323987666, "grad_norm": 0.575558700552531, "learning_rate": 0.00012309622565422323, "loss": 12.0026, "step": 16272 }, { "epoch": 0.8861298863953496, "grad_norm": 0.579891197993033, "learning_rate": 0.0001230876457280688, "loss": 12.1153, "step": 16273 }, { "epoch": 0.8861843403919326, "grad_norm": 0.5505349526446447, "learning_rate": 0.0001230790656223809, "loss": 11.9594, "step": 16274 }, { "epoch": 0.8862387943885156, "grad_norm": 0.6402945163461424, "learning_rate": 0.00012307048533722643, "loss": 12.2174, "step": 16275 }, { "epoch": 0.8862932483850987, "grad_norm": 0.5457796048605926, "learning_rate": 0.00012306190487267193, "loss": 12.0925, "step": 16276 }, { "epoch": 0.8863477023816817, "grad_norm": 0.5305821367555109, "learning_rate": 0.0001230533242287842, "loss": 12.1706, "step": 16277 }, { "epoch": 0.8864021563782647, "grad_norm": 0.5239564589926617, "learning_rate": 0.00012304474340562994, "loss": 12.1302, "step": 16278 }, { "epoch": 0.8864566103748477, "grad_norm": 0.5413941416062154, "learning_rate": 0.00012303616240327592, "loss": 12.0329, "step": 16279 }, { "epoch": 0.8865110643714307, "grad_norm": 0.5185014039488977, "learning_rate": 0.00012302758122178882, "loss": 12.0784, "step": 16280 }, { "epoch": 0.8865655183680137, "grad_norm": 0.4951657358478465, "learning_rate": 0.00012301899986123539, "loss": 12.1503, "step": 16281 }, { "epoch": 0.8866199723645968, "grad_norm": 0.5694149924321806, "learning_rate": 0.00012301041832168236, "loss": 12.2309, "step": 16282 }, { "epoch": 0.8866744263611798, "grad_norm": 0.5313585264494142, "learning_rate": 0.00012300183660319647, "loss": 12.1177, "step": 16283 }, { "epoch": 0.8867288803577628, "grad_norm": 0.6007296788229879, "learning_rate": 0.00012299325470584442, "loss": 12.1911, "step": 16284 }, { "epoch": 0.8867833343543458, "grad_norm": 0.5937648289150693, "learning_rate": 0.00012298467262969297, "loss": 12.3804, "step": 16285 }, { "epoch": 0.8868377883509287, "grad_norm": 0.5532089813326304, "learning_rate": 0.00012297609037480886, "loss": 12.0865, "step": 16286 }, { "epoch": 0.8868922423475117, "grad_norm": 0.6688215915836628, "learning_rate": 0.0001229675079412588, "loss": 12.1452, "step": 16287 }, { "epoch": 0.8869466963440948, "grad_norm": 0.5951680028150612, "learning_rate": 0.00012295892532910956, "loss": 12.3691, "step": 16288 }, { "epoch": 0.8870011503406778, "grad_norm": 0.5231733871097624, "learning_rate": 0.00012295034253842789, "loss": 12.0778, "step": 16289 }, { "epoch": 0.8870556043372608, "grad_norm": 0.5423677131696757, "learning_rate": 0.00012294175956928047, "loss": 12.1868, "step": 16290 }, { "epoch": 0.8871100583338438, "grad_norm": 0.4946320742927819, "learning_rate": 0.00012293317642173408, "loss": 12.0247, "step": 16291 }, { "epoch": 0.8871645123304268, "grad_norm": 0.5529772902355801, "learning_rate": 0.0001229245930958555, "loss": 12.1839, "step": 16292 }, { "epoch": 0.8872189663270099, "grad_norm": 0.6392097378935568, "learning_rate": 0.0001229160095917114, "loss": 12.286, "step": 16293 }, { "epoch": 0.8872734203235929, "grad_norm": 0.5291321917582374, "learning_rate": 0.00012290742590936857, "loss": 12.1872, "step": 16294 }, { "epoch": 0.8873278743201759, "grad_norm": 0.5831656257225017, "learning_rate": 0.00012289884204889378, "loss": 12.1548, "step": 16295 }, { "epoch": 0.8873823283167589, "grad_norm": 0.5499164854122962, "learning_rate": 0.00012289025801035373, "loss": 12.1475, "step": 16296 }, { "epoch": 0.8874367823133419, "grad_norm": 0.5528434084145767, "learning_rate": 0.0001228816737938152, "loss": 12.0544, "step": 16297 }, { "epoch": 0.8874912363099249, "grad_norm": 0.5137783709029952, "learning_rate": 0.00012287308939934496, "loss": 12.1277, "step": 16298 }, { "epoch": 0.887545690306508, "grad_norm": 0.5625471444950313, "learning_rate": 0.00012286450482700974, "loss": 12.2227, "step": 16299 }, { "epoch": 0.887600144303091, "grad_norm": 0.5936241104738309, "learning_rate": 0.00012285592007687626, "loss": 12.1372, "step": 16300 }, { "epoch": 0.887654598299674, "grad_norm": 0.5238975637094012, "learning_rate": 0.0001228473351490113, "loss": 12.0677, "step": 16301 }, { "epoch": 0.887709052296257, "grad_norm": 0.6606203666543881, "learning_rate": 0.00012283875004348167, "loss": 12.2815, "step": 16302 }, { "epoch": 0.88776350629284, "grad_norm": 0.5478035977552437, "learning_rate": 0.0001228301647603541, "loss": 12.1251, "step": 16303 }, { "epoch": 0.887817960289423, "grad_norm": 0.5400120488267081, "learning_rate": 0.00012282157929969533, "loss": 12.1598, "step": 16304 }, { "epoch": 0.887872414286006, "grad_norm": 0.6254885091083855, "learning_rate": 0.00012281299366157216, "loss": 12.2532, "step": 16305 }, { "epoch": 0.887926868282589, "grad_norm": 0.5666635884476116, "learning_rate": 0.00012280440784605124, "loss": 12.3314, "step": 16306 }, { "epoch": 0.887981322279172, "grad_norm": 0.5571324077998513, "learning_rate": 0.0001227958218531995, "loss": 12.2664, "step": 16307 }, { "epoch": 0.888035776275755, "grad_norm": 0.6383871738610409, "learning_rate": 0.00012278723568308358, "loss": 12.268, "step": 16308 }, { "epoch": 0.888090230272338, "grad_norm": 0.5489827954744986, "learning_rate": 0.00012277864933577033, "loss": 12.0518, "step": 16309 }, { "epoch": 0.888144684268921, "grad_norm": 0.5291341260448726, "learning_rate": 0.00012277006281132647, "loss": 12.1884, "step": 16310 }, { "epoch": 0.8881991382655041, "grad_norm": 0.6066827861748462, "learning_rate": 0.00012276147610981876, "loss": 12.1157, "step": 16311 }, { "epoch": 0.8882535922620871, "grad_norm": 0.5441557383002226, "learning_rate": 0.00012275288923131402, "loss": 12.0808, "step": 16312 }, { "epoch": 0.8883080462586701, "grad_norm": 0.4965751123135751, "learning_rate": 0.000122744302175879, "loss": 12.1255, "step": 16313 }, { "epoch": 0.8883625002552531, "grad_norm": 0.6936057137138693, "learning_rate": 0.00012273571494358045, "loss": 12.2344, "step": 16314 }, { "epoch": 0.8884169542518361, "grad_norm": 0.6014344061380988, "learning_rate": 0.00012272712753448522, "loss": 12.1617, "step": 16315 }, { "epoch": 0.8884714082484191, "grad_norm": 0.5859981341056422, "learning_rate": 0.00012271853994866, "loss": 12.0722, "step": 16316 }, { "epoch": 0.8885258622450022, "grad_norm": 0.573634206675812, "learning_rate": 0.00012270995218617155, "loss": 12.0755, "step": 16317 }, { "epoch": 0.8885803162415852, "grad_norm": 0.5927714899422986, "learning_rate": 0.00012270136424708675, "loss": 12.1773, "step": 16318 }, { "epoch": 0.8886347702381682, "grad_norm": 0.5807420605409117, "learning_rate": 0.00012269277613147233, "loss": 12.1732, "step": 16319 }, { "epoch": 0.8886892242347512, "grad_norm": 0.633388832174088, "learning_rate": 0.00012268418783939513, "loss": 12.1529, "step": 16320 }, { "epoch": 0.8887436782313342, "grad_norm": 0.6540598015292096, "learning_rate": 0.0001226755993709218, "loss": 11.977, "step": 16321 }, { "epoch": 0.8887981322279171, "grad_norm": 0.557404386843737, "learning_rate": 0.00012266701072611926, "loss": 12.097, "step": 16322 }, { "epoch": 0.8888525862245003, "grad_norm": 0.5960222979783917, "learning_rate": 0.0001226584219050542, "loss": 12.0737, "step": 16323 }, { "epoch": 0.8889070402210832, "grad_norm": 0.6314024877140787, "learning_rate": 0.00012264983290779347, "loss": 12.1702, "step": 16324 }, { "epoch": 0.8889614942176662, "grad_norm": 0.8419863945950672, "learning_rate": 0.00012264124373440388, "loss": 12.085, "step": 16325 }, { "epoch": 0.8890159482142492, "grad_norm": 0.5432509180119829, "learning_rate": 0.00012263265438495214, "loss": 12.069, "step": 16326 }, { "epoch": 0.8890704022108322, "grad_norm": 0.5902105609285478, "learning_rate": 0.0001226240648595051, "loss": 12.1508, "step": 16327 }, { "epoch": 0.8891248562074153, "grad_norm": 0.6650731727870468, "learning_rate": 0.00012261547515812952, "loss": 12.1691, "step": 16328 }, { "epoch": 0.8891793102039983, "grad_norm": 0.5477451446445849, "learning_rate": 0.00012260688528089222, "loss": 12.1464, "step": 16329 }, { "epoch": 0.8892337642005813, "grad_norm": 0.5608551356558081, "learning_rate": 0.00012259829522786003, "loss": 12.1764, "step": 16330 }, { "epoch": 0.8892882181971643, "grad_norm": 0.516292820888293, "learning_rate": 0.00012258970499909964, "loss": 12.1369, "step": 16331 }, { "epoch": 0.8893426721937473, "grad_norm": 0.558635622035302, "learning_rate": 0.00012258111459467796, "loss": 12.2017, "step": 16332 }, { "epoch": 0.8893971261903303, "grad_norm": 0.585917460390887, "learning_rate": 0.00012257252401466173, "loss": 12.2535, "step": 16333 }, { "epoch": 0.8894515801869134, "grad_norm": 0.5802831891033983, "learning_rate": 0.00012256393325911776, "loss": 12.1516, "step": 16334 }, { "epoch": 0.8895060341834964, "grad_norm": 0.5930622050620903, "learning_rate": 0.00012255534232811287, "loss": 12.1491, "step": 16335 }, { "epoch": 0.8895604881800794, "grad_norm": 0.5219656216979696, "learning_rate": 0.00012254675122171387, "loss": 12.1651, "step": 16336 }, { "epoch": 0.8896149421766624, "grad_norm": 0.5821970410221826, "learning_rate": 0.00012253815993998752, "loss": 12.1802, "step": 16337 }, { "epoch": 0.8896693961732454, "grad_norm": 0.5259181992579247, "learning_rate": 0.00012252956848300068, "loss": 12.2109, "step": 16338 }, { "epoch": 0.8897238501698284, "grad_norm": 0.562719113000839, "learning_rate": 0.0001225209768508201, "loss": 12.2265, "step": 16339 }, { "epoch": 0.8897783041664115, "grad_norm": 0.547578636302829, "learning_rate": 0.00012251238504351267, "loss": 12.1624, "step": 16340 }, { "epoch": 0.8898327581629945, "grad_norm": 0.5196405505890513, "learning_rate": 0.00012250379306114517, "loss": 12.0975, "step": 16341 }, { "epoch": 0.8898872121595774, "grad_norm": 0.6273714554787057, "learning_rate": 0.00012249520090378436, "loss": 12.3457, "step": 16342 }, { "epoch": 0.8899416661561604, "grad_norm": 0.5091159924147887, "learning_rate": 0.00012248660857149712, "loss": 12.2966, "step": 16343 }, { "epoch": 0.8899961201527434, "grad_norm": 0.5232684819703758, "learning_rate": 0.00012247801606435024, "loss": 12.1101, "step": 16344 }, { "epoch": 0.8900505741493264, "grad_norm": 0.5616859527095743, "learning_rate": 0.00012246942338241053, "loss": 12.1192, "step": 16345 }, { "epoch": 0.8901050281459095, "grad_norm": 0.6289801674171244, "learning_rate": 0.00012246083052574482, "loss": 12.1321, "step": 16346 }, { "epoch": 0.8901594821424925, "grad_norm": 0.5605368192195113, "learning_rate": 0.0001224522374944199, "loss": 12.1582, "step": 16347 }, { "epoch": 0.8902139361390755, "grad_norm": 0.59515441220397, "learning_rate": 0.00012244364428850267, "loss": 12.2295, "step": 16348 }, { "epoch": 0.8902683901356585, "grad_norm": 0.5613411178657675, "learning_rate": 0.00012243505090805986, "loss": 12.2211, "step": 16349 }, { "epoch": 0.8903228441322415, "grad_norm": 0.625794527188511, "learning_rate": 0.00012242645735315835, "loss": 12.0044, "step": 16350 }, { "epoch": 0.8903772981288245, "grad_norm": 0.6345129634846822, "learning_rate": 0.0001224178636238649, "loss": 12.1942, "step": 16351 }, { "epoch": 0.8904317521254076, "grad_norm": 0.5797800622091027, "learning_rate": 0.00012240926972024644, "loss": 12.0692, "step": 16352 }, { "epoch": 0.8904862061219906, "grad_norm": 0.6149532271097778, "learning_rate": 0.0001224006756423697, "loss": 12.163, "step": 16353 }, { "epoch": 0.8905406601185736, "grad_norm": 0.6390410226858315, "learning_rate": 0.0001223920813903016, "loss": 12.2444, "step": 16354 }, { "epoch": 0.8905951141151566, "grad_norm": 0.5575843300674095, "learning_rate": 0.00012238348696410887, "loss": 12.2189, "step": 16355 }, { "epoch": 0.8906495681117396, "grad_norm": 0.5934635885270143, "learning_rate": 0.00012237489236385842, "loss": 12.0775, "step": 16356 }, { "epoch": 0.8907040221083226, "grad_norm": 0.6940744268501108, "learning_rate": 0.00012236629758961704, "loss": 12.2922, "step": 16357 }, { "epoch": 0.8907584761049057, "grad_norm": 0.5486866213964949, "learning_rate": 0.00012235770264145158, "loss": 12.0936, "step": 16358 }, { "epoch": 0.8908129301014887, "grad_norm": 0.6384254601668586, "learning_rate": 0.00012234910751942888, "loss": 12.087, "step": 16359 }, { "epoch": 0.8908673840980716, "grad_norm": 0.5725247404303682, "learning_rate": 0.0001223405122236158, "loss": 12.1062, "step": 16360 }, { "epoch": 0.8909218380946546, "grad_norm": 0.6134408624917898, "learning_rate": 0.0001223319167540791, "loss": 12.2632, "step": 16361 }, { "epoch": 0.8909762920912376, "grad_norm": 0.5677903063661517, "learning_rate": 0.00012232332111088569, "loss": 12.1721, "step": 16362 }, { "epoch": 0.8910307460878207, "grad_norm": 0.5678554421060821, "learning_rate": 0.0001223147252941024, "loss": 12.2632, "step": 16363 }, { "epoch": 0.8910852000844037, "grad_norm": 0.5411360054176657, "learning_rate": 0.00012230612930379605, "loss": 12.1227, "step": 16364 }, { "epoch": 0.8911396540809867, "grad_norm": 0.5789415259530507, "learning_rate": 0.00012229753314003353, "loss": 12.0349, "step": 16365 }, { "epoch": 0.8911941080775697, "grad_norm": 0.6118033550791085, "learning_rate": 0.0001222889368028816, "loss": 12.3661, "step": 16366 }, { "epoch": 0.8912485620741527, "grad_norm": 0.5498114164510856, "learning_rate": 0.0001222803402924072, "loss": 12.0789, "step": 16367 }, { "epoch": 0.8913030160707357, "grad_norm": 0.5455619732312031, "learning_rate": 0.00012227174360867712, "loss": 12.1499, "step": 16368 }, { "epoch": 0.8913574700673188, "grad_norm": 0.5849315482521636, "learning_rate": 0.00012226314675175824, "loss": 12.123, "step": 16369 }, { "epoch": 0.8914119240639018, "grad_norm": 0.6706327880267889, "learning_rate": 0.00012225454972171742, "loss": 12.3123, "step": 16370 }, { "epoch": 0.8914663780604848, "grad_norm": 0.563472642508159, "learning_rate": 0.00012224595251862145, "loss": 12.1743, "step": 16371 }, { "epoch": 0.8915208320570678, "grad_norm": 0.5805514198217513, "learning_rate": 0.0001222373551425372, "loss": 12.2397, "step": 16372 }, { "epoch": 0.8915752860536508, "grad_norm": 0.5495186261985949, "learning_rate": 0.00012222875759353158, "loss": 12.2148, "step": 16373 }, { "epoch": 0.8916297400502338, "grad_norm": 0.5189203705272784, "learning_rate": 0.0001222201598716714, "loss": 12.1897, "step": 16374 }, { "epoch": 0.8916841940468169, "grad_norm": 0.5316500332118727, "learning_rate": 0.00012221156197702356, "loss": 12.1792, "step": 16375 }, { "epoch": 0.8917386480433999, "grad_norm": 0.5383019627872128, "learning_rate": 0.0001222029639096549, "loss": 12.1051, "step": 16376 }, { "epoch": 0.8917931020399829, "grad_norm": 0.5819361656303953, "learning_rate": 0.00012219436566963222, "loss": 12.3008, "step": 16377 }, { "epoch": 0.8918475560365658, "grad_norm": 0.6054698883628403, "learning_rate": 0.00012218576725702245, "loss": 12.2099, "step": 16378 }, { "epoch": 0.8919020100331488, "grad_norm": 0.56277415551667, "learning_rate": 0.00012217716867189243, "loss": 12.11, "step": 16379 }, { "epoch": 0.8919564640297318, "grad_norm": 0.5353622779874301, "learning_rate": 0.00012216856991430905, "loss": 12.1667, "step": 16380 }, { "epoch": 0.8920109180263149, "grad_norm": 0.5907613177814792, "learning_rate": 0.00012215997098433912, "loss": 12.2423, "step": 16381 }, { "epoch": 0.8920653720228979, "grad_norm": 0.5314572914567044, "learning_rate": 0.00012215137188204957, "loss": 12.2518, "step": 16382 }, { "epoch": 0.8921198260194809, "grad_norm": 0.5587918377791286, "learning_rate": 0.00012214277260750718, "loss": 12.1841, "step": 16383 }, { "epoch": 0.8921742800160639, "grad_norm": 0.5779357858442474, "learning_rate": 0.00012213417316077894, "loss": 12.0817, "step": 16384 }, { "epoch": 0.8922287340126469, "grad_norm": 0.5427932311031911, "learning_rate": 0.00012212557354193164, "loss": 12.1866, "step": 16385 }, { "epoch": 0.8922831880092299, "grad_norm": 0.5430726865693253, "learning_rate": 0.00012211697375103217, "loss": 12.0536, "step": 16386 }, { "epoch": 0.892337642005813, "grad_norm": 0.5788113619506856, "learning_rate": 0.0001221083737881474, "loss": 12.0162, "step": 16387 }, { "epoch": 0.892392096002396, "grad_norm": 0.5527416162532146, "learning_rate": 0.00012209977365334419, "loss": 12.1027, "step": 16388 }, { "epoch": 0.892446549998979, "grad_norm": 0.5431246939434727, "learning_rate": 0.00012209117334668944, "loss": 12.1254, "step": 16389 }, { "epoch": 0.892501003995562, "grad_norm": 0.5809144579873806, "learning_rate": 0.00012208257286825004, "loss": 12.0177, "step": 16390 }, { "epoch": 0.892555457992145, "grad_norm": 0.5657949703100597, "learning_rate": 0.00012207397221809286, "loss": 12.2054, "step": 16391 }, { "epoch": 0.8926099119887281, "grad_norm": 0.5635021259578552, "learning_rate": 0.00012206537139628476, "loss": 12.1643, "step": 16392 }, { "epoch": 0.8926643659853111, "grad_norm": 0.7856142703467077, "learning_rate": 0.00012205677040289263, "loss": 12.2693, "step": 16393 }, { "epoch": 0.8927188199818941, "grad_norm": 0.5431584101018093, "learning_rate": 0.00012204816923798332, "loss": 12.1436, "step": 16394 }, { "epoch": 0.892773273978477, "grad_norm": 0.5429192019043648, "learning_rate": 0.00012203956790162379, "loss": 12.2422, "step": 16395 }, { "epoch": 0.89282772797506, "grad_norm": 0.5613179884500105, "learning_rate": 0.00012203096639388088, "loss": 12.143, "step": 16396 }, { "epoch": 0.892882181971643, "grad_norm": 0.5946760079053818, "learning_rate": 0.00012202236471482147, "loss": 12.1774, "step": 16397 }, { "epoch": 0.8929366359682261, "grad_norm": 0.553657927886927, "learning_rate": 0.00012201376286451247, "loss": 12.0548, "step": 16398 }, { "epoch": 0.8929910899648091, "grad_norm": 0.5354058557561385, "learning_rate": 0.00012200516084302074, "loss": 12.2015, "step": 16399 }, { "epoch": 0.8930455439613921, "grad_norm": 0.5010026502091414, "learning_rate": 0.00012199655865041318, "loss": 12.102, "step": 16400 }, { "epoch": 0.8930999979579751, "grad_norm": 0.5683799680633952, "learning_rate": 0.00012198795628675673, "loss": 11.9626, "step": 16401 }, { "epoch": 0.8931544519545581, "grad_norm": 0.5685946238732208, "learning_rate": 0.00012197935375211822, "loss": 12.1334, "step": 16402 }, { "epoch": 0.8932089059511411, "grad_norm": 0.5321421061657889, "learning_rate": 0.00012197075104656457, "loss": 12.0824, "step": 16403 }, { "epoch": 0.8932633599477242, "grad_norm": 0.6369712125898802, "learning_rate": 0.00012196214817016267, "loss": 12.0904, "step": 16404 }, { "epoch": 0.8933178139443072, "grad_norm": 0.5264122019388323, "learning_rate": 0.0001219535451229794, "loss": 11.9427, "step": 16405 }, { "epoch": 0.8933722679408902, "grad_norm": 0.5250222597786064, "learning_rate": 0.00012194494190508175, "loss": 12.2276, "step": 16406 }, { "epoch": 0.8934267219374732, "grad_norm": 0.5850483257175525, "learning_rate": 0.00012193633851653652, "loss": 12.3033, "step": 16407 }, { "epoch": 0.8934811759340562, "grad_norm": 0.5621707395066748, "learning_rate": 0.00012192773495741063, "loss": 12.2006, "step": 16408 }, { "epoch": 0.8935356299306392, "grad_norm": 0.6184809514475227, "learning_rate": 0.00012191913122777098, "loss": 12.1792, "step": 16409 }, { "epoch": 0.8935900839272223, "grad_norm": 0.5997452134431591, "learning_rate": 0.00012191052732768453, "loss": 12.1659, "step": 16410 }, { "epoch": 0.8936445379238053, "grad_norm": 0.6570410268290692, "learning_rate": 0.00012190192325721812, "loss": 12.1779, "step": 16411 }, { "epoch": 0.8936989919203883, "grad_norm": 0.579003499709148, "learning_rate": 0.0001218933190164387, "loss": 12.2527, "step": 16412 }, { "epoch": 0.8937534459169713, "grad_norm": 0.5530522847933657, "learning_rate": 0.00012188471460541315, "loss": 12.0883, "step": 16413 }, { "epoch": 0.8938078999135542, "grad_norm": 0.6791854967291375, "learning_rate": 0.00012187611002420838, "loss": 12.1236, "step": 16414 }, { "epoch": 0.8938623539101372, "grad_norm": 0.5153430179909868, "learning_rate": 0.00012186750527289132, "loss": 12.1704, "step": 16415 }, { "epoch": 0.8939168079067203, "grad_norm": 0.5253876428987704, "learning_rate": 0.00012185890035152887, "loss": 12.2482, "step": 16416 }, { "epoch": 0.8939712619033033, "grad_norm": 0.5821672884226603, "learning_rate": 0.00012185029526018794, "loss": 12.2772, "step": 16417 }, { "epoch": 0.8940257158998863, "grad_norm": 0.5224363701557859, "learning_rate": 0.00012184168999893546, "loss": 12.1356, "step": 16418 }, { "epoch": 0.8940801698964693, "grad_norm": 0.588352730562737, "learning_rate": 0.00012183308456783832, "loss": 12.2662, "step": 16419 }, { "epoch": 0.8941346238930523, "grad_norm": 0.5155512892761352, "learning_rate": 0.00012182447896696347, "loss": 12.0925, "step": 16420 }, { "epoch": 0.8941890778896353, "grad_norm": 0.5575012062730131, "learning_rate": 0.00012181587319637782, "loss": 12.1494, "step": 16421 }, { "epoch": 0.8942435318862184, "grad_norm": 0.5537627331780988, "learning_rate": 0.00012180726725614826, "loss": 12.0568, "step": 16422 }, { "epoch": 0.8942979858828014, "grad_norm": 0.5203552197086324, "learning_rate": 0.00012179866114634174, "loss": 12.12, "step": 16423 }, { "epoch": 0.8943524398793844, "grad_norm": 0.5283891939212064, "learning_rate": 0.00012179005486702517, "loss": 12.0989, "step": 16424 }, { "epoch": 0.8944068938759674, "grad_norm": 0.5485087823080463, "learning_rate": 0.00012178144841826548, "loss": 12.159, "step": 16425 }, { "epoch": 0.8944613478725504, "grad_norm": 0.5690013527694032, "learning_rate": 0.0001217728418001296, "loss": 12.1885, "step": 16426 }, { "epoch": 0.8945158018691335, "grad_norm": 0.5463935145707699, "learning_rate": 0.00012176423501268445, "loss": 12.2017, "step": 16427 }, { "epoch": 0.8945702558657165, "grad_norm": 0.5334736599123772, "learning_rate": 0.00012175562805599696, "loss": 12.2179, "step": 16428 }, { "epoch": 0.8946247098622995, "grad_norm": 0.5351992571302758, "learning_rate": 0.00012174702093013403, "loss": 12.1498, "step": 16429 }, { "epoch": 0.8946791638588825, "grad_norm": 0.6364589176976457, "learning_rate": 0.00012173841363516265, "loss": 12.1802, "step": 16430 }, { "epoch": 0.8947336178554655, "grad_norm": 0.5253528278949113, "learning_rate": 0.00012172980617114975, "loss": 12.1021, "step": 16431 }, { "epoch": 0.8947880718520484, "grad_norm": 0.5145999047777803, "learning_rate": 0.00012172119853816217, "loss": 12.2855, "step": 16432 }, { "epoch": 0.8948425258486316, "grad_norm": 0.6613421138830665, "learning_rate": 0.00012171259073626693, "loss": 11.9993, "step": 16433 }, { "epoch": 0.8948969798452145, "grad_norm": 0.4970791425366797, "learning_rate": 0.00012170398276553094, "loss": 11.9895, "step": 16434 }, { "epoch": 0.8949514338417975, "grad_norm": 0.5340672710813823, "learning_rate": 0.00012169537462602117, "loss": 12.0471, "step": 16435 }, { "epoch": 0.8950058878383805, "grad_norm": 0.5608441757057984, "learning_rate": 0.00012168676631780451, "loss": 12.0705, "step": 16436 }, { "epoch": 0.8950603418349635, "grad_norm": 0.5554717285872464, "learning_rate": 0.0001216781578409479, "loss": 12.3042, "step": 16437 }, { "epoch": 0.8951147958315465, "grad_norm": 0.5135038200349453, "learning_rate": 0.00012166954919551832, "loss": 12.1329, "step": 16438 }, { "epoch": 0.8951692498281296, "grad_norm": 0.5576360282006322, "learning_rate": 0.00012166094038158267, "loss": 12.2105, "step": 16439 }, { "epoch": 0.8952237038247126, "grad_norm": 0.4971445563517799, "learning_rate": 0.00012165233139920793, "loss": 12.178, "step": 16440 }, { "epoch": 0.8952781578212956, "grad_norm": 0.5787494931942184, "learning_rate": 0.00012164372224846106, "loss": 12.145, "step": 16441 }, { "epoch": 0.8953326118178786, "grad_norm": 0.5199979770663736, "learning_rate": 0.00012163511292940894, "loss": 12.0949, "step": 16442 }, { "epoch": 0.8953870658144616, "grad_norm": 0.5910601885480786, "learning_rate": 0.00012162650344211855, "loss": 12.2081, "step": 16443 }, { "epoch": 0.8954415198110446, "grad_norm": 0.5637481454779644, "learning_rate": 0.00012161789378665684, "loss": 12.1374, "step": 16444 }, { "epoch": 0.8954959738076277, "grad_norm": 0.5433839584875032, "learning_rate": 0.00012160928396309077, "loss": 12.1934, "step": 16445 }, { "epoch": 0.8955504278042107, "grad_norm": 0.5625919676665406, "learning_rate": 0.00012160067397148732, "loss": 12.0736, "step": 16446 }, { "epoch": 0.8956048818007937, "grad_norm": 0.5686657784743229, "learning_rate": 0.00012159206381191337, "loss": 12.1472, "step": 16447 }, { "epoch": 0.8956593357973767, "grad_norm": 0.5481632261766926, "learning_rate": 0.00012158345348443592, "loss": 12.1238, "step": 16448 }, { "epoch": 0.8957137897939597, "grad_norm": 0.5579295242376465, "learning_rate": 0.00012157484298912189, "loss": 12.1607, "step": 16449 }, { "epoch": 0.8957682437905427, "grad_norm": 0.5298093247807992, "learning_rate": 0.0001215662323260383, "loss": 12.2074, "step": 16450 }, { "epoch": 0.8958226977871258, "grad_norm": 0.5798119261994258, "learning_rate": 0.00012155762149525207, "loss": 12.1358, "step": 16451 }, { "epoch": 0.8958771517837087, "grad_norm": 0.5433865315844972, "learning_rate": 0.00012154901049683014, "loss": 12.0943, "step": 16452 }, { "epoch": 0.8959316057802917, "grad_norm": 0.5994413735552185, "learning_rate": 0.00012154039933083949, "loss": 12.3357, "step": 16453 }, { "epoch": 0.8959860597768747, "grad_norm": 0.5811815042877232, "learning_rate": 0.00012153178799734707, "loss": 12.1058, "step": 16454 }, { "epoch": 0.8960405137734577, "grad_norm": 0.5344823959899129, "learning_rate": 0.00012152317649641989, "loss": 12.0799, "step": 16455 }, { "epoch": 0.8960949677700407, "grad_norm": 0.6342418638772556, "learning_rate": 0.0001215145648281249, "loss": 12.1704, "step": 16456 }, { "epoch": 0.8961494217666238, "grad_norm": 0.5936470227195616, "learning_rate": 0.00012150595299252898, "loss": 12.3105, "step": 16457 }, { "epoch": 0.8962038757632068, "grad_norm": 0.4930485342447415, "learning_rate": 0.00012149734098969921, "loss": 12.0914, "step": 16458 }, { "epoch": 0.8962583297597898, "grad_norm": 0.570412094111875, "learning_rate": 0.00012148872881970248, "loss": 12.2792, "step": 16459 }, { "epoch": 0.8963127837563728, "grad_norm": 0.5553123223184898, "learning_rate": 0.0001214801164826058, "loss": 12.1716, "step": 16460 }, { "epoch": 0.8963672377529558, "grad_norm": 0.5598916626976647, "learning_rate": 0.00012147150397847616, "loss": 12.1481, "step": 16461 }, { "epoch": 0.8964216917495389, "grad_norm": 0.573612473014463, "learning_rate": 0.00012146289130738046, "loss": 12.22, "step": 16462 }, { "epoch": 0.8964761457461219, "grad_norm": 0.5584110522872364, "learning_rate": 0.00012145427846938575, "loss": 11.9865, "step": 16463 }, { "epoch": 0.8965305997427049, "grad_norm": 0.5648022930877249, "learning_rate": 0.00012144566546455897, "loss": 12.1107, "step": 16464 }, { "epoch": 0.8965850537392879, "grad_norm": 0.4750639851872308, "learning_rate": 0.00012143705229296707, "loss": 11.9892, "step": 16465 }, { "epoch": 0.8966395077358709, "grad_norm": 0.5240865844768169, "learning_rate": 0.00012142843895467711, "loss": 12.1236, "step": 16466 }, { "epoch": 0.8966939617324539, "grad_norm": 0.5603857222852854, "learning_rate": 0.00012141982544975596, "loss": 12.2482, "step": 16467 }, { "epoch": 0.896748415729037, "grad_norm": 0.5896166433903733, "learning_rate": 0.00012141121177827068, "loss": 12.2365, "step": 16468 }, { "epoch": 0.89680286972562, "grad_norm": 0.5330245279845494, "learning_rate": 0.00012140259794028823, "loss": 12.08, "step": 16469 }, { "epoch": 0.896857323722203, "grad_norm": 0.5299275708835319, "learning_rate": 0.0001213939839358756, "loss": 12.1218, "step": 16470 }, { "epoch": 0.8969117777187859, "grad_norm": 0.5266691139680327, "learning_rate": 0.00012138536976509973, "loss": 12.244, "step": 16471 }, { "epoch": 0.8969662317153689, "grad_norm": 0.5208286285047266, "learning_rate": 0.00012137675542802767, "loss": 12.1085, "step": 16472 }, { "epoch": 0.8970206857119519, "grad_norm": 0.5754574343157628, "learning_rate": 0.00012136814092472635, "loss": 12.0592, "step": 16473 }, { "epoch": 0.897075139708535, "grad_norm": 0.584767279675821, "learning_rate": 0.00012135952625526278, "loss": 12.1781, "step": 16474 }, { "epoch": 0.897129593705118, "grad_norm": 0.5304841517778917, "learning_rate": 0.00012135091141970399, "loss": 12.1799, "step": 16475 }, { "epoch": 0.897184047701701, "grad_norm": 0.5576125404035996, "learning_rate": 0.00012134229641811689, "loss": 12.3244, "step": 16476 }, { "epoch": 0.897238501698284, "grad_norm": 0.5819146834555918, "learning_rate": 0.00012133368125056854, "loss": 12.2412, "step": 16477 }, { "epoch": 0.897292955694867, "grad_norm": 0.5235517952639437, "learning_rate": 0.00012132506591712592, "loss": 12.1088, "step": 16478 }, { "epoch": 0.89734740969145, "grad_norm": 0.5092297491141731, "learning_rate": 0.00012131645041785598, "loss": 12.0861, "step": 16479 }, { "epoch": 0.8974018636880331, "grad_norm": 0.5423376099011958, "learning_rate": 0.00012130783475282575, "loss": 12.3334, "step": 16480 }, { "epoch": 0.8974563176846161, "grad_norm": 0.6165992620351156, "learning_rate": 0.00012129921892210223, "loss": 12.1034, "step": 16481 }, { "epoch": 0.8975107716811991, "grad_norm": 0.560353220103204, "learning_rate": 0.0001212906029257524, "loss": 12.171, "step": 16482 }, { "epoch": 0.8975652256777821, "grad_norm": 0.6155562428640652, "learning_rate": 0.0001212819867638433, "loss": 12.198, "step": 16483 }, { "epoch": 0.8976196796743651, "grad_norm": 0.5437538950050923, "learning_rate": 0.00012127337043644189, "loss": 12.1642, "step": 16484 }, { "epoch": 0.8976741336709481, "grad_norm": 0.6687829324129224, "learning_rate": 0.00012126475394361518, "loss": 12.1437, "step": 16485 }, { "epoch": 0.8977285876675312, "grad_norm": 0.5454772058136936, "learning_rate": 0.00012125613728543017, "loss": 12.1478, "step": 16486 }, { "epoch": 0.8977830416641142, "grad_norm": 0.5472486153196883, "learning_rate": 0.00012124752046195386, "loss": 12.1001, "step": 16487 }, { "epoch": 0.8978374956606971, "grad_norm": 0.5995583406684463, "learning_rate": 0.0001212389034732533, "loss": 12.1316, "step": 16488 }, { "epoch": 0.8978919496572801, "grad_norm": 0.6390986154680263, "learning_rate": 0.00012123028631939546, "loss": 12.2673, "step": 16489 }, { "epoch": 0.8979464036538631, "grad_norm": 0.5580090457577993, "learning_rate": 0.00012122166900044734, "loss": 12.0893, "step": 16490 }, { "epoch": 0.8980008576504461, "grad_norm": 0.6298978408215506, "learning_rate": 0.000121213051516476, "loss": 12.0943, "step": 16491 }, { "epoch": 0.8980553116470292, "grad_norm": 0.5786218651642691, "learning_rate": 0.00012120443386754833, "loss": 12.2038, "step": 16492 }, { "epoch": 0.8981097656436122, "grad_norm": 0.6447769323897492, "learning_rate": 0.00012119581605373149, "loss": 12.324, "step": 16493 }, { "epoch": 0.8981642196401952, "grad_norm": 0.5846483339186045, "learning_rate": 0.00012118719807509242, "loss": 12.0203, "step": 16494 }, { "epoch": 0.8982186736367782, "grad_norm": 0.5444446996695285, "learning_rate": 0.00012117857993169815, "loss": 12.1582, "step": 16495 }, { "epoch": 0.8982731276333612, "grad_norm": 0.5039319594598647, "learning_rate": 0.00012116996162361569, "loss": 12.1209, "step": 16496 }, { "epoch": 0.8983275816299443, "grad_norm": 0.5367761049127899, "learning_rate": 0.00012116134315091205, "loss": 12.1595, "step": 16497 }, { "epoch": 0.8983820356265273, "grad_norm": 0.5986676385179812, "learning_rate": 0.00012115272451365425, "loss": 12.1886, "step": 16498 }, { "epoch": 0.8984364896231103, "grad_norm": 0.5056536004141298, "learning_rate": 0.00012114410571190932, "loss": 12.155, "step": 16499 }, { "epoch": 0.8984909436196933, "grad_norm": 0.5538299734606021, "learning_rate": 0.00012113548674574428, "loss": 12.1304, "step": 16500 }, { "epoch": 0.8985453976162763, "grad_norm": 0.6101289756036824, "learning_rate": 0.00012112686761522618, "loss": 12.2362, "step": 16501 }, { "epoch": 0.8985998516128593, "grad_norm": 0.5493581205779898, "learning_rate": 0.00012111824832042198, "loss": 12.1193, "step": 16502 }, { "epoch": 0.8986543056094424, "grad_norm": 0.5364252298008217, "learning_rate": 0.00012110962886139874, "loss": 12.0637, "step": 16503 }, { "epoch": 0.8987087596060254, "grad_norm": 0.6534881826446343, "learning_rate": 0.00012110100923822347, "loss": 12.0993, "step": 16504 }, { "epoch": 0.8987632136026084, "grad_norm": 0.5688817236521643, "learning_rate": 0.00012109238945096324, "loss": 12.1232, "step": 16505 }, { "epoch": 0.8988176675991914, "grad_norm": 0.571168880384876, "learning_rate": 0.00012108376949968507, "loss": 12.1747, "step": 16506 }, { "epoch": 0.8988721215957743, "grad_norm": 0.581402689275133, "learning_rate": 0.00012107514938445597, "loss": 12.0934, "step": 16507 }, { "epoch": 0.8989265755923573, "grad_norm": 0.565341996031247, "learning_rate": 0.00012106652910534295, "loss": 12.1374, "step": 16508 }, { "epoch": 0.8989810295889404, "grad_norm": 0.5806175343777127, "learning_rate": 0.00012105790866241304, "loss": 12.1423, "step": 16509 }, { "epoch": 0.8990354835855234, "grad_norm": 0.6088934935079416, "learning_rate": 0.00012104928805573335, "loss": 12.1107, "step": 16510 }, { "epoch": 0.8990899375821064, "grad_norm": 0.592673300247471, "learning_rate": 0.00012104066728537087, "loss": 12.221, "step": 16511 }, { "epoch": 0.8991443915786894, "grad_norm": 0.5911083965399428, "learning_rate": 0.0001210320463513926, "loss": 12.2857, "step": 16512 }, { "epoch": 0.8991988455752724, "grad_norm": 0.6696587962542493, "learning_rate": 0.00012102342525386563, "loss": 12.2196, "step": 16513 }, { "epoch": 0.8992532995718554, "grad_norm": 0.6126665467482124, "learning_rate": 0.00012101480399285694, "loss": 12.1568, "step": 16514 }, { "epoch": 0.8993077535684385, "grad_norm": 0.5412566276103402, "learning_rate": 0.00012100618256843365, "loss": 12.1299, "step": 16515 }, { "epoch": 0.8993622075650215, "grad_norm": 0.5894568191245585, "learning_rate": 0.00012099756098066277, "loss": 12.0663, "step": 16516 }, { "epoch": 0.8994166615616045, "grad_norm": 0.5975120390893832, "learning_rate": 0.00012098893922961132, "loss": 12.2112, "step": 16517 }, { "epoch": 0.8994711155581875, "grad_norm": 0.501871125377737, "learning_rate": 0.00012098031731534636, "loss": 12.1129, "step": 16518 }, { "epoch": 0.8995255695547705, "grad_norm": 0.5719878838629991, "learning_rate": 0.00012097169523793492, "loss": 12.2414, "step": 16519 }, { "epoch": 0.8995800235513535, "grad_norm": 0.5550650218828809, "learning_rate": 0.00012096307299744407, "loss": 12.1494, "step": 16520 }, { "epoch": 0.8996344775479366, "grad_norm": 0.5500582375834041, "learning_rate": 0.00012095445059394086, "loss": 12.2454, "step": 16521 }, { "epoch": 0.8996889315445196, "grad_norm": 0.5435338973403163, "learning_rate": 0.00012094582802749233, "loss": 12.09, "step": 16522 }, { "epoch": 0.8997433855411026, "grad_norm": 0.5817020199326123, "learning_rate": 0.0001209372052981655, "loss": 12.0923, "step": 16523 }, { "epoch": 0.8997978395376856, "grad_norm": 0.5539529391293029, "learning_rate": 0.00012092858240602746, "loss": 12.1604, "step": 16524 }, { "epoch": 0.8998522935342685, "grad_norm": 0.5991940563450556, "learning_rate": 0.00012091995935114529, "loss": 12.2028, "step": 16525 }, { "epoch": 0.8999067475308516, "grad_norm": 0.5861434759316408, "learning_rate": 0.00012091133613358594, "loss": 12.2572, "step": 16526 }, { "epoch": 0.8999612015274346, "grad_norm": 0.5538040311209945, "learning_rate": 0.0001209027127534166, "loss": 12.0785, "step": 16527 }, { "epoch": 0.9000156555240176, "grad_norm": 0.5297275052982207, "learning_rate": 0.00012089408921070424, "loss": 12.1511, "step": 16528 }, { "epoch": 0.9000701095206006, "grad_norm": 0.5395278733873242, "learning_rate": 0.00012088546550551592, "loss": 12.1181, "step": 16529 }, { "epoch": 0.9001245635171836, "grad_norm": 0.5456466831033283, "learning_rate": 0.00012087684163791873, "loss": 12.1539, "step": 16530 }, { "epoch": 0.9001790175137666, "grad_norm": 0.5656408884837834, "learning_rate": 0.0001208682176079797, "loss": 12.2807, "step": 16531 }, { "epoch": 0.9002334715103497, "grad_norm": 0.543053283049204, "learning_rate": 0.00012085959341576596, "loss": 12.1513, "step": 16532 }, { "epoch": 0.9002879255069327, "grad_norm": 0.5275488355315441, "learning_rate": 0.00012085096906134447, "loss": 12.167, "step": 16533 }, { "epoch": 0.9003423795035157, "grad_norm": 0.5525419018374172, "learning_rate": 0.00012084234454478239, "loss": 12.0559, "step": 16534 }, { "epoch": 0.9003968335000987, "grad_norm": 0.5121672343176237, "learning_rate": 0.00012083371986614671, "loss": 12.1287, "step": 16535 }, { "epoch": 0.9004512874966817, "grad_norm": 0.5679055100171616, "learning_rate": 0.00012082509502550454, "loss": 12.1174, "step": 16536 }, { "epoch": 0.9005057414932647, "grad_norm": 0.6259631770417831, "learning_rate": 0.00012081647002292296, "loss": 12.1103, "step": 16537 }, { "epoch": 0.9005601954898478, "grad_norm": 0.6373837573994012, "learning_rate": 0.00012080784485846899, "loss": 12.1739, "step": 16538 }, { "epoch": 0.9006146494864308, "grad_norm": 0.5627806904812449, "learning_rate": 0.00012079921953220975, "loss": 12.1975, "step": 16539 }, { "epoch": 0.9006691034830138, "grad_norm": 0.5666742303334366, "learning_rate": 0.00012079059404421227, "loss": 12.1863, "step": 16540 }, { "epoch": 0.9007235574795968, "grad_norm": 0.5884420986046116, "learning_rate": 0.00012078196839454365, "loss": 12.257, "step": 16541 }, { "epoch": 0.9007780114761798, "grad_norm": 0.6108872627903197, "learning_rate": 0.00012077334258327097, "loss": 12.1131, "step": 16542 }, { "epoch": 0.9008324654727627, "grad_norm": 0.6058293067479195, "learning_rate": 0.00012076471661046129, "loss": 12.146, "step": 16543 }, { "epoch": 0.9008869194693458, "grad_norm": 0.5609878745463391, "learning_rate": 0.00012075609047618169, "loss": 12.2037, "step": 16544 }, { "epoch": 0.9009413734659288, "grad_norm": 0.5445608032269247, "learning_rate": 0.00012074746418049924, "loss": 12.1039, "step": 16545 }, { "epoch": 0.9009958274625118, "grad_norm": 0.5625864225762172, "learning_rate": 0.00012073883772348105, "loss": 12.2862, "step": 16546 }, { "epoch": 0.9010502814590948, "grad_norm": 0.5848363906000628, "learning_rate": 0.00012073021110519416, "loss": 12.1611, "step": 16547 }, { "epoch": 0.9011047354556778, "grad_norm": 0.5851829608040879, "learning_rate": 0.00012072158432570569, "loss": 12.2337, "step": 16548 }, { "epoch": 0.9011591894522608, "grad_norm": 0.5248127918593556, "learning_rate": 0.00012071295738508268, "loss": 12.2178, "step": 16549 }, { "epoch": 0.9012136434488439, "grad_norm": 0.5568758893873234, "learning_rate": 0.00012070433028339226, "loss": 12.1396, "step": 16550 }, { "epoch": 0.9012680974454269, "grad_norm": 0.5732195807465644, "learning_rate": 0.00012069570302070148, "loss": 12.1719, "step": 16551 }, { "epoch": 0.9013225514420099, "grad_norm": 0.6810937493211286, "learning_rate": 0.00012068707559707746, "loss": 12.2129, "step": 16552 }, { "epoch": 0.9013770054385929, "grad_norm": 0.5264730181108157, "learning_rate": 0.00012067844801258726, "loss": 12.1977, "step": 16553 }, { "epoch": 0.9014314594351759, "grad_norm": 0.5334773683000122, "learning_rate": 0.00012066982026729798, "loss": 12.1088, "step": 16554 }, { "epoch": 0.9014859134317589, "grad_norm": 0.6875605989114848, "learning_rate": 0.0001206611923612767, "loss": 12.0489, "step": 16555 }, { "epoch": 0.901540367428342, "grad_norm": 0.6142806055177614, "learning_rate": 0.00012065256429459056, "loss": 12.2554, "step": 16556 }, { "epoch": 0.901594821424925, "grad_norm": 0.5594151040235987, "learning_rate": 0.00012064393606730662, "loss": 12.1569, "step": 16557 }, { "epoch": 0.901649275421508, "grad_norm": 0.5922801599066374, "learning_rate": 0.00012063530767949191, "loss": 12.1549, "step": 16558 }, { "epoch": 0.901703729418091, "grad_norm": 0.6089845067197045, "learning_rate": 0.00012062667913121363, "loss": 12.2396, "step": 16559 }, { "epoch": 0.901758183414674, "grad_norm": 0.5340371089060799, "learning_rate": 0.00012061805042253881, "loss": 11.9331, "step": 16560 }, { "epoch": 0.9018126374112571, "grad_norm": 0.55790354309246, "learning_rate": 0.0001206094215535346, "loss": 12.0415, "step": 16561 }, { "epoch": 0.90186709140784, "grad_norm": 0.6135537059450782, "learning_rate": 0.00012060079252426809, "loss": 12.1952, "step": 16562 }, { "epoch": 0.901921545404423, "grad_norm": 0.5821693916055684, "learning_rate": 0.00012059216333480632, "loss": 12.2118, "step": 16563 }, { "epoch": 0.901975999401006, "grad_norm": 0.5744206999331853, "learning_rate": 0.00012058353398521644, "loss": 12.2648, "step": 16564 }, { "epoch": 0.902030453397589, "grad_norm": 0.6871102465432373, "learning_rate": 0.00012057490447556556, "loss": 12.1974, "step": 16565 }, { "epoch": 0.902084907394172, "grad_norm": 0.5584545841478958, "learning_rate": 0.00012056627480592077, "loss": 12.1063, "step": 16566 }, { "epoch": 0.9021393613907551, "grad_norm": 0.5289270437003082, "learning_rate": 0.0001205576449763492, "loss": 12.0957, "step": 16567 }, { "epoch": 0.9021938153873381, "grad_norm": 0.613888501086125, "learning_rate": 0.0001205490149869179, "loss": 12.1563, "step": 16568 }, { "epoch": 0.9022482693839211, "grad_norm": 0.6974437227061876, "learning_rate": 0.00012054038483769401, "loss": 12.1972, "step": 16569 }, { "epoch": 0.9023027233805041, "grad_norm": 0.5782039366655223, "learning_rate": 0.00012053175452874466, "loss": 12.1505, "step": 16570 }, { "epoch": 0.9023571773770871, "grad_norm": 0.5986953500684845, "learning_rate": 0.00012052312406013694, "loss": 12.0572, "step": 16571 }, { "epoch": 0.9024116313736701, "grad_norm": 0.5781745381427984, "learning_rate": 0.00012051449343193799, "loss": 12.1791, "step": 16572 }, { "epoch": 0.9024660853702532, "grad_norm": 0.580489142635654, "learning_rate": 0.00012050586264421489, "loss": 12.1939, "step": 16573 }, { "epoch": 0.9025205393668362, "grad_norm": 0.5888350093249815, "learning_rate": 0.00012049723169703474, "loss": 12.1028, "step": 16574 }, { "epoch": 0.9025749933634192, "grad_norm": 0.6599516366929871, "learning_rate": 0.00012048860059046468, "loss": 12.33, "step": 16575 }, { "epoch": 0.9026294473600022, "grad_norm": 0.6568993104827798, "learning_rate": 0.00012047996932457182, "loss": 12.3188, "step": 16576 }, { "epoch": 0.9026839013565852, "grad_norm": 0.5526626692920116, "learning_rate": 0.0001204713378994233, "loss": 12.1452, "step": 16577 }, { "epoch": 0.9027383553531682, "grad_norm": 0.5411002258022113, "learning_rate": 0.00012046270631508623, "loss": 12.2017, "step": 16578 }, { "epoch": 0.9027928093497513, "grad_norm": 0.5304822759479126, "learning_rate": 0.00012045407457162772, "loss": 11.9785, "step": 16579 }, { "epoch": 0.9028472633463343, "grad_norm": 0.668688733482345, "learning_rate": 0.00012044544266911488, "loss": 12.1479, "step": 16580 }, { "epoch": 0.9029017173429172, "grad_norm": 0.5969611170805564, "learning_rate": 0.00012043681060761484, "loss": 12.1249, "step": 16581 }, { "epoch": 0.9029561713395002, "grad_norm": 0.6044686527675578, "learning_rate": 0.0001204281783871948, "loss": 12.2064, "step": 16582 }, { "epoch": 0.9030106253360832, "grad_norm": 0.577749237566279, "learning_rate": 0.00012041954600792175, "loss": 12.1072, "step": 16583 }, { "epoch": 0.9030650793326662, "grad_norm": 0.5766044759471065, "learning_rate": 0.00012041091346986292, "loss": 12.2084, "step": 16584 }, { "epoch": 0.9031195333292493, "grad_norm": 0.5806415623737022, "learning_rate": 0.0001204022807730854, "loss": 12.1572, "step": 16585 }, { "epoch": 0.9031739873258323, "grad_norm": 0.5656482861317721, "learning_rate": 0.0001203936479176563, "loss": 12.1701, "step": 16586 }, { "epoch": 0.9032284413224153, "grad_norm": 0.619887669845093, "learning_rate": 0.00012038501490364281, "loss": 12.19, "step": 16587 }, { "epoch": 0.9032828953189983, "grad_norm": 0.595453210340934, "learning_rate": 0.00012037638173111201, "loss": 12.0868, "step": 16588 }, { "epoch": 0.9033373493155813, "grad_norm": 0.5307560222648793, "learning_rate": 0.00012036774840013103, "loss": 12.0417, "step": 16589 }, { "epoch": 0.9033918033121643, "grad_norm": 0.5689723062921288, "learning_rate": 0.00012035911491076704, "loss": 12.0614, "step": 16590 }, { "epoch": 0.9034462573087474, "grad_norm": 0.5369195786912859, "learning_rate": 0.00012035048126308715, "loss": 12.1579, "step": 16591 }, { "epoch": 0.9035007113053304, "grad_norm": 0.528098688405729, "learning_rate": 0.00012034184745715853, "loss": 12.1516, "step": 16592 }, { "epoch": 0.9035551653019134, "grad_norm": 0.6048731746648017, "learning_rate": 0.00012033321349304827, "loss": 12.1478, "step": 16593 }, { "epoch": 0.9036096192984964, "grad_norm": 0.5574691218373621, "learning_rate": 0.00012032457937082353, "loss": 12.1007, "step": 16594 }, { "epoch": 0.9036640732950794, "grad_norm": 0.5729354710039366, "learning_rate": 0.00012031594509055146, "loss": 12.2509, "step": 16595 }, { "epoch": 0.9037185272916625, "grad_norm": 0.5898254632000199, "learning_rate": 0.00012030731065229918, "loss": 12.178, "step": 16596 }, { "epoch": 0.9037729812882455, "grad_norm": 0.5430476778389886, "learning_rate": 0.00012029867605613385, "loss": 12.1013, "step": 16597 }, { "epoch": 0.9038274352848285, "grad_norm": 0.5719532844173403, "learning_rate": 0.00012029004130212263, "loss": 12.2637, "step": 16598 }, { "epoch": 0.9038818892814114, "grad_norm": 0.5673084511445369, "learning_rate": 0.00012028140639033262, "loss": 12.2498, "step": 16599 }, { "epoch": 0.9039363432779944, "grad_norm": 0.5575708863107153, "learning_rate": 0.00012027277132083103, "loss": 12.0741, "step": 16600 }, { "epoch": 0.9039907972745774, "grad_norm": 0.5685739983488483, "learning_rate": 0.00012026413609368495, "loss": 12.1816, "step": 16601 }, { "epoch": 0.9040452512711605, "grad_norm": 0.581791608106945, "learning_rate": 0.00012025550070896155, "loss": 12.1209, "step": 16602 }, { "epoch": 0.9040997052677435, "grad_norm": 0.5168490029790749, "learning_rate": 0.00012024686516672796, "loss": 12.1303, "step": 16603 }, { "epoch": 0.9041541592643265, "grad_norm": 0.5735573094212707, "learning_rate": 0.0001202382294670514, "loss": 12.1718, "step": 16604 }, { "epoch": 0.9042086132609095, "grad_norm": 0.5851835687748101, "learning_rate": 0.00012022959360999893, "loss": 12.1299, "step": 16605 }, { "epoch": 0.9042630672574925, "grad_norm": 0.5561947463654558, "learning_rate": 0.00012022095759563777, "loss": 12.0567, "step": 16606 }, { "epoch": 0.9043175212540755, "grad_norm": 0.57096710325472, "learning_rate": 0.00012021232142403502, "loss": 12.1633, "step": 16607 }, { "epoch": 0.9043719752506586, "grad_norm": 0.6064023929898223, "learning_rate": 0.0001202036850952579, "loss": 12.195, "step": 16608 }, { "epoch": 0.9044264292472416, "grad_norm": 0.7204348761597378, "learning_rate": 0.00012019504860937352, "loss": 12.2921, "step": 16609 }, { "epoch": 0.9044808832438246, "grad_norm": 0.5646044232101393, "learning_rate": 0.00012018641196644906, "loss": 12.1701, "step": 16610 }, { "epoch": 0.9045353372404076, "grad_norm": 0.5274641380881213, "learning_rate": 0.00012017777516655169, "loss": 12.06, "step": 16611 }, { "epoch": 0.9045897912369906, "grad_norm": 0.5251427691853415, "learning_rate": 0.00012016913820974856, "loss": 12.2064, "step": 16612 }, { "epoch": 0.9046442452335736, "grad_norm": 0.6021069458024625, "learning_rate": 0.00012016050109610679, "loss": 12.111, "step": 16613 }, { "epoch": 0.9046986992301567, "grad_norm": 0.5338626495772549, "learning_rate": 0.00012015186382569362, "loss": 12.1724, "step": 16614 }, { "epoch": 0.9047531532267397, "grad_norm": 0.5322612991595553, "learning_rate": 0.00012014322639857616, "loss": 12.1126, "step": 16615 }, { "epoch": 0.9048076072233227, "grad_norm": 0.596317737541182, "learning_rate": 0.0001201345888148216, "loss": 12.1244, "step": 16616 }, { "epoch": 0.9048620612199056, "grad_norm": 0.5481072720850805, "learning_rate": 0.00012012595107449712, "loss": 12.2287, "step": 16617 }, { "epoch": 0.9049165152164886, "grad_norm": 0.5679762111084247, "learning_rate": 0.00012011731317766983, "loss": 12.1883, "step": 16618 }, { "epoch": 0.9049709692130716, "grad_norm": 0.5880208205737794, "learning_rate": 0.00012010867512440695, "loss": 12.1742, "step": 16619 }, { "epoch": 0.9050254232096547, "grad_norm": 0.6104970007982246, "learning_rate": 0.00012010003691477564, "loss": 12.2109, "step": 16620 }, { "epoch": 0.9050798772062377, "grad_norm": 0.5823536654745095, "learning_rate": 0.00012009139854884308, "loss": 12.1697, "step": 16621 }, { "epoch": 0.9051343312028207, "grad_norm": 0.6349717560007302, "learning_rate": 0.00012008276002667646, "loss": 12.0318, "step": 16622 }, { "epoch": 0.9051887851994037, "grad_norm": 0.5989307276431446, "learning_rate": 0.0001200741213483429, "loss": 12.0962, "step": 16623 }, { "epoch": 0.9052432391959867, "grad_norm": 0.6016610302270592, "learning_rate": 0.00012006548251390959, "loss": 12.1917, "step": 16624 }, { "epoch": 0.9052976931925697, "grad_norm": 0.5617629352693115, "learning_rate": 0.00012005684352344375, "loss": 12.0168, "step": 16625 }, { "epoch": 0.9053521471891528, "grad_norm": 0.7072618121372366, "learning_rate": 0.00012004820437701252, "loss": 12.1314, "step": 16626 }, { "epoch": 0.9054066011857358, "grad_norm": 0.8022261000813399, "learning_rate": 0.00012003956507468312, "loss": 12.1558, "step": 16627 }, { "epoch": 0.9054610551823188, "grad_norm": 0.6477531181079941, "learning_rate": 0.00012003092561652267, "loss": 12.184, "step": 16628 }, { "epoch": 0.9055155091789018, "grad_norm": 0.565053757093744, "learning_rate": 0.00012002228600259838, "loss": 12.2107, "step": 16629 }, { "epoch": 0.9055699631754848, "grad_norm": 0.6237518603632382, "learning_rate": 0.00012001364623297744, "loss": 12.045, "step": 16630 }, { "epoch": 0.9056244171720679, "grad_norm": 0.6334450869783368, "learning_rate": 0.00012000500630772705, "loss": 12.1567, "step": 16631 }, { "epoch": 0.9056788711686509, "grad_norm": 0.5331753187207506, "learning_rate": 0.00011999636622691438, "loss": 12.1466, "step": 16632 }, { "epoch": 0.9057333251652339, "grad_norm": 0.5556861753442944, "learning_rate": 0.0001199877259906066, "loss": 12.2399, "step": 16633 }, { "epoch": 0.9057877791618169, "grad_norm": 0.6053269076177362, "learning_rate": 0.0001199790855988709, "loss": 12.2552, "step": 16634 }, { "epoch": 0.9058422331583998, "grad_norm": 0.5511390999628655, "learning_rate": 0.00011997044505177446, "loss": 12.2137, "step": 16635 }, { "epoch": 0.9058966871549828, "grad_norm": 0.5596620142884977, "learning_rate": 0.00011996180434938451, "loss": 12.0874, "step": 16636 }, { "epoch": 0.9059511411515659, "grad_norm": 0.7251786263883778, "learning_rate": 0.00011995316349176825, "loss": 12.1136, "step": 16637 }, { "epoch": 0.9060055951481489, "grad_norm": 0.5735425418309598, "learning_rate": 0.00011994452247899284, "loss": 12.1922, "step": 16638 }, { "epoch": 0.9060600491447319, "grad_norm": 0.5538004555453727, "learning_rate": 0.00011993588131112545, "loss": 12.2168, "step": 16639 }, { "epoch": 0.9061145031413149, "grad_norm": 0.6079380700170678, "learning_rate": 0.0001199272399882333, "loss": 12.1329, "step": 16640 }, { "epoch": 0.9061689571378979, "grad_norm": 0.617454855656419, "learning_rate": 0.0001199185985103836, "loss": 12.144, "step": 16641 }, { "epoch": 0.9062234111344809, "grad_norm": 0.5169310498004772, "learning_rate": 0.00011990995687764357, "loss": 12.2623, "step": 16642 }, { "epoch": 0.906277865131064, "grad_norm": 0.6177406350488743, "learning_rate": 0.00011990131509008036, "loss": 12.12, "step": 16643 }, { "epoch": 0.906332319127647, "grad_norm": 0.6277145773760757, "learning_rate": 0.00011989267314776116, "loss": 12.2011, "step": 16644 }, { "epoch": 0.90638677312423, "grad_norm": 0.5425803391625272, "learning_rate": 0.00011988403105075323, "loss": 12.0853, "step": 16645 }, { "epoch": 0.906441227120813, "grad_norm": 0.5910161248310666, "learning_rate": 0.0001198753887991237, "loss": 12.2245, "step": 16646 }, { "epoch": 0.906495681117396, "grad_norm": 0.5450148777653727, "learning_rate": 0.00011986674639293984, "loss": 12.2561, "step": 16647 }, { "epoch": 0.906550135113979, "grad_norm": 0.6509899028924494, "learning_rate": 0.00011985810383226884, "loss": 12.3318, "step": 16648 }, { "epoch": 0.9066045891105621, "grad_norm": 0.5842785402708647, "learning_rate": 0.00011984946111717787, "loss": 12.1969, "step": 16649 }, { "epoch": 0.9066590431071451, "grad_norm": 0.5736884415395951, "learning_rate": 0.00011984081824773418, "loss": 12.2531, "step": 16650 }, { "epoch": 0.9067134971037281, "grad_norm": 0.6329788568815561, "learning_rate": 0.00011983217522400494, "loss": 12.2265, "step": 16651 }, { "epoch": 0.906767951100311, "grad_norm": 0.5200775966974495, "learning_rate": 0.0001198235320460574, "loss": 12.1019, "step": 16652 }, { "epoch": 0.906822405096894, "grad_norm": 0.6080449242599029, "learning_rate": 0.00011981488871395874, "loss": 12.2413, "step": 16653 }, { "epoch": 0.906876859093477, "grad_norm": 0.5179714179732703, "learning_rate": 0.0001198062452277762, "loss": 12.2128, "step": 16654 }, { "epoch": 0.9069313130900601, "grad_norm": 0.5315355453059781, "learning_rate": 0.00011979760158757693, "loss": 12.1813, "step": 16655 }, { "epoch": 0.9069857670866431, "grad_norm": 0.540844043152265, "learning_rate": 0.00011978895779342823, "loss": 12.0854, "step": 16656 }, { "epoch": 0.9070402210832261, "grad_norm": 0.5503422274480156, "learning_rate": 0.00011978031384539727, "loss": 12.2659, "step": 16657 }, { "epoch": 0.9070946750798091, "grad_norm": 0.5766267629462053, "learning_rate": 0.00011977166974355127, "loss": 12.252, "step": 16658 }, { "epoch": 0.9071491290763921, "grad_norm": 0.5981171855869646, "learning_rate": 0.00011976302548795746, "loss": 12.1501, "step": 16659 }, { "epoch": 0.9072035830729752, "grad_norm": 0.6071357970665403, "learning_rate": 0.00011975438107868302, "loss": 12.237, "step": 16660 }, { "epoch": 0.9072580370695582, "grad_norm": 0.5337519119985165, "learning_rate": 0.00011974573651579521, "loss": 12.0513, "step": 16661 }, { "epoch": 0.9073124910661412, "grad_norm": 0.6093536507145341, "learning_rate": 0.00011973709179936125, "loss": 12.2212, "step": 16662 }, { "epoch": 0.9073669450627242, "grad_norm": 0.5361183955849707, "learning_rate": 0.00011972844692944835, "loss": 11.9772, "step": 16663 }, { "epoch": 0.9074213990593072, "grad_norm": 0.6072014184975755, "learning_rate": 0.00011971980190612375, "loss": 12.0981, "step": 16664 }, { "epoch": 0.9074758530558902, "grad_norm": 0.5604623769710646, "learning_rate": 0.00011971115672945466, "loss": 12.2179, "step": 16665 }, { "epoch": 0.9075303070524733, "grad_norm": 0.6645669085703189, "learning_rate": 0.0001197025113995083, "loss": 12.3859, "step": 16666 }, { "epoch": 0.9075847610490563, "grad_norm": 0.6003976294910272, "learning_rate": 0.00011969386591635192, "loss": 12.164, "step": 16667 }, { "epoch": 0.9076392150456393, "grad_norm": 0.6585124722512923, "learning_rate": 0.00011968522028005273, "loss": 12.2127, "step": 16668 }, { "epoch": 0.9076936690422223, "grad_norm": 0.5535103951097059, "learning_rate": 0.00011967657449067795, "loss": 12.1997, "step": 16669 }, { "epoch": 0.9077481230388053, "grad_norm": 0.541805955353317, "learning_rate": 0.00011966792854829485, "loss": 12.0348, "step": 16670 }, { "epoch": 0.9078025770353882, "grad_norm": 0.5215776723442405, "learning_rate": 0.00011965928245297063, "loss": 12.1675, "step": 16671 }, { "epoch": 0.9078570310319714, "grad_norm": 0.5248796095810393, "learning_rate": 0.00011965063620477252, "loss": 12.0794, "step": 16672 }, { "epoch": 0.9079114850285543, "grad_norm": 0.48543629832874036, "learning_rate": 0.0001196419898037678, "loss": 12.0909, "step": 16673 }, { "epoch": 0.9079659390251373, "grad_norm": 0.5810116401522981, "learning_rate": 0.00011963334325002364, "loss": 12.1623, "step": 16674 }, { "epoch": 0.9080203930217203, "grad_norm": 0.5140735331995465, "learning_rate": 0.00011962469654360733, "loss": 12.0415, "step": 16675 }, { "epoch": 0.9080748470183033, "grad_norm": 0.5242609749151819, "learning_rate": 0.00011961604968458609, "loss": 12.1652, "step": 16676 }, { "epoch": 0.9081293010148863, "grad_norm": 0.5012690451280981, "learning_rate": 0.00011960740267302715, "loss": 12.0782, "step": 16677 }, { "epoch": 0.9081837550114694, "grad_norm": 0.5704474808309116, "learning_rate": 0.00011959875550899775, "loss": 12.2232, "step": 16678 }, { "epoch": 0.9082382090080524, "grad_norm": 0.5242457472681777, "learning_rate": 0.00011959010819256515, "loss": 12.1655, "step": 16679 }, { "epoch": 0.9082926630046354, "grad_norm": 0.5767583316134496, "learning_rate": 0.00011958146072379659, "loss": 12.0799, "step": 16680 }, { "epoch": 0.9083471170012184, "grad_norm": 0.612337674023645, "learning_rate": 0.00011957281310275929, "loss": 12.1277, "step": 16681 }, { "epoch": 0.9084015709978014, "grad_norm": 0.5639431431197535, "learning_rate": 0.00011956416532952052, "loss": 12.1048, "step": 16682 }, { "epoch": 0.9084560249943844, "grad_norm": 0.5781707248709169, "learning_rate": 0.00011955551740414754, "loss": 12.0985, "step": 16683 }, { "epoch": 0.9085104789909675, "grad_norm": 0.5415750537886928, "learning_rate": 0.00011954686932670755, "loss": 12.0249, "step": 16684 }, { "epoch": 0.9085649329875505, "grad_norm": 0.5771376377405151, "learning_rate": 0.00011953822109726785, "loss": 12.1427, "step": 16685 }, { "epoch": 0.9086193869841335, "grad_norm": 0.5812078814955836, "learning_rate": 0.00011952957271589565, "loss": 12.2247, "step": 16686 }, { "epoch": 0.9086738409807165, "grad_norm": 0.5473565554611068, "learning_rate": 0.00011952092418265821, "loss": 12.0588, "step": 16687 }, { "epoch": 0.9087282949772995, "grad_norm": 0.5833788408235893, "learning_rate": 0.00011951227549762283, "loss": 12.102, "step": 16688 }, { "epoch": 0.9087827489738824, "grad_norm": 0.5727748231134404, "learning_rate": 0.00011950362666085665, "loss": 12.1467, "step": 16689 }, { "epoch": 0.9088372029704656, "grad_norm": 0.5539800357546315, "learning_rate": 0.00011949497767242706, "loss": 12.1409, "step": 16690 }, { "epoch": 0.9088916569670485, "grad_norm": 0.5645437034529261, "learning_rate": 0.00011948632853240122, "loss": 12.0354, "step": 16691 }, { "epoch": 0.9089461109636315, "grad_norm": 0.605985359763697, "learning_rate": 0.00011947767924084645, "loss": 12.1695, "step": 16692 }, { "epoch": 0.9090005649602145, "grad_norm": 0.5683245753476472, "learning_rate": 0.00011946902979782999, "loss": 12.0926, "step": 16693 }, { "epoch": 0.9090550189567975, "grad_norm": 0.5298631423959123, "learning_rate": 0.00011946038020341905, "loss": 12.2171, "step": 16694 }, { "epoch": 0.9091094729533806, "grad_norm": 0.6042090785026276, "learning_rate": 0.00011945173045768095, "loss": 12.2209, "step": 16695 }, { "epoch": 0.9091639269499636, "grad_norm": 0.5521047723399338, "learning_rate": 0.00011944308056068292, "loss": 12.0592, "step": 16696 }, { "epoch": 0.9092183809465466, "grad_norm": 0.5184238358553434, "learning_rate": 0.00011943443051249224, "loss": 12.1804, "step": 16697 }, { "epoch": 0.9092728349431296, "grad_norm": 0.6132916575172702, "learning_rate": 0.00011942578031317619, "loss": 12.2734, "step": 16698 }, { "epoch": 0.9093272889397126, "grad_norm": 0.5377853963467617, "learning_rate": 0.00011941712996280201, "loss": 12.0987, "step": 16699 }, { "epoch": 0.9093817429362956, "grad_norm": 0.4954452948200757, "learning_rate": 0.00011940847946143696, "loss": 12.1779, "step": 16700 }, { "epoch": 0.9094361969328787, "grad_norm": 0.5452706709754639, "learning_rate": 0.00011939982880914828, "loss": 12.1383, "step": 16701 }, { "epoch": 0.9094906509294617, "grad_norm": 0.5960469894657686, "learning_rate": 0.00011939117800600333, "loss": 12.1946, "step": 16702 }, { "epoch": 0.9095451049260447, "grad_norm": 0.6250770426978768, "learning_rate": 0.00011938252705206934, "loss": 12.12, "step": 16703 }, { "epoch": 0.9095995589226277, "grad_norm": 0.5619249572154056, "learning_rate": 0.00011937387594741353, "loss": 12.1961, "step": 16704 }, { "epoch": 0.9096540129192107, "grad_norm": 0.5960458283295135, "learning_rate": 0.00011936522469210323, "loss": 12.1709, "step": 16705 }, { "epoch": 0.9097084669157937, "grad_norm": 0.545670326510443, "learning_rate": 0.00011935657328620566, "loss": 12.0751, "step": 16706 }, { "epoch": 0.9097629209123768, "grad_norm": 0.5305274367137507, "learning_rate": 0.00011934792172978815, "loss": 12.1436, "step": 16707 }, { "epoch": 0.9098173749089598, "grad_norm": 0.5447327452757997, "learning_rate": 0.00011933927002291801, "loss": 12.0792, "step": 16708 }, { "epoch": 0.9098718289055427, "grad_norm": 0.6511668231413018, "learning_rate": 0.0001193306181656624, "loss": 12.1077, "step": 16709 }, { "epoch": 0.9099262829021257, "grad_norm": 0.5451503446692807, "learning_rate": 0.00011932196615808868, "loss": 12.1108, "step": 16710 }, { "epoch": 0.9099807368987087, "grad_norm": 0.5807730649400389, "learning_rate": 0.0001193133140002641, "loss": 12.246, "step": 16711 }, { "epoch": 0.9100351908952917, "grad_norm": 0.5497627488136255, "learning_rate": 0.00011930466169225595, "loss": 12.1494, "step": 16712 }, { "epoch": 0.9100896448918748, "grad_norm": 0.568200531087904, "learning_rate": 0.00011929600923413156, "loss": 12.1344, "step": 16713 }, { "epoch": 0.9101440988884578, "grad_norm": 0.5289936895464843, "learning_rate": 0.00011928735662595812, "loss": 12.1177, "step": 16714 }, { "epoch": 0.9101985528850408, "grad_norm": 0.5598408108034993, "learning_rate": 0.00011927870386780298, "loss": 12.1175, "step": 16715 }, { "epoch": 0.9102530068816238, "grad_norm": 0.6177995714639638, "learning_rate": 0.00011927005095973341, "loss": 12.1444, "step": 16716 }, { "epoch": 0.9103074608782068, "grad_norm": 0.6274524773670057, "learning_rate": 0.00011926139790181663, "loss": 12.2214, "step": 16717 }, { "epoch": 0.9103619148747898, "grad_norm": 0.5085526453142428, "learning_rate": 0.00011925274469412007, "loss": 12.2215, "step": 16718 }, { "epoch": 0.9104163688713729, "grad_norm": 0.5974215900567931, "learning_rate": 0.00011924409133671091, "loss": 12.123, "step": 16719 }, { "epoch": 0.9104708228679559, "grad_norm": 0.5536194685243843, "learning_rate": 0.00011923543782965647, "loss": 12.1997, "step": 16720 }, { "epoch": 0.9105252768645389, "grad_norm": 0.548810186607552, "learning_rate": 0.00011922678417302404, "loss": 12.1594, "step": 16721 }, { "epoch": 0.9105797308611219, "grad_norm": 0.5358261416721035, "learning_rate": 0.0001192181303668809, "loss": 12.1352, "step": 16722 }, { "epoch": 0.9106341848577049, "grad_norm": 0.5869635801603279, "learning_rate": 0.00011920947641129437, "loss": 12.1825, "step": 16723 }, { "epoch": 0.9106886388542879, "grad_norm": 0.5178457255217145, "learning_rate": 0.00011920082230633172, "loss": 12.1535, "step": 16724 }, { "epoch": 0.910743092850871, "grad_norm": 0.5471474478485095, "learning_rate": 0.00011919216805206026, "loss": 12.0578, "step": 16725 }, { "epoch": 0.910797546847454, "grad_norm": 0.5618150486227647, "learning_rate": 0.00011918351364854728, "loss": 12.1135, "step": 16726 }, { "epoch": 0.910852000844037, "grad_norm": 0.5054061078826275, "learning_rate": 0.00011917485909586008, "loss": 12.1893, "step": 16727 }, { "epoch": 0.9109064548406199, "grad_norm": 0.5295583824315849, "learning_rate": 0.00011916620439406597, "loss": 12.131, "step": 16728 }, { "epoch": 0.9109609088372029, "grad_norm": 0.5810858529402816, "learning_rate": 0.00011915754954323222, "loss": 12.0111, "step": 16729 }, { "epoch": 0.911015362833786, "grad_norm": 0.581320298295613, "learning_rate": 0.00011914889454342617, "loss": 12.2068, "step": 16730 }, { "epoch": 0.911069816830369, "grad_norm": 0.5417895266138278, "learning_rate": 0.0001191402393947151, "loss": 12.1504, "step": 16731 }, { "epoch": 0.911124270826952, "grad_norm": 0.5312038085499952, "learning_rate": 0.00011913158409716631, "loss": 12.0605, "step": 16732 }, { "epoch": 0.911178724823535, "grad_norm": 0.5206094610344083, "learning_rate": 0.00011912292865084713, "loss": 12.1059, "step": 16733 }, { "epoch": 0.911233178820118, "grad_norm": 0.5489024764471662, "learning_rate": 0.00011911427305582486, "loss": 12.0937, "step": 16734 }, { "epoch": 0.911287632816701, "grad_norm": 0.5308736008490176, "learning_rate": 0.00011910561731216676, "loss": 12.2083, "step": 16735 }, { "epoch": 0.9113420868132841, "grad_norm": 0.5017524052748702, "learning_rate": 0.0001190969614199402, "loss": 12.079, "step": 16736 }, { "epoch": 0.9113965408098671, "grad_norm": 0.5330214212294354, "learning_rate": 0.00011908830537921247, "loss": 12.2847, "step": 16737 }, { "epoch": 0.9114509948064501, "grad_norm": 0.542780798730531, "learning_rate": 0.00011907964919005085, "loss": 12.253, "step": 16738 }, { "epoch": 0.9115054488030331, "grad_norm": 0.526216751731207, "learning_rate": 0.0001190709928525227, "loss": 12.1371, "step": 16739 }, { "epoch": 0.9115599027996161, "grad_norm": 0.5608380080254172, "learning_rate": 0.0001190623363666953, "loss": 12.0116, "step": 16740 }, { "epoch": 0.9116143567961991, "grad_norm": 0.5547421600370966, "learning_rate": 0.000119053679732636, "loss": 12.1079, "step": 16741 }, { "epoch": 0.9116688107927822, "grad_norm": 0.614869012298698, "learning_rate": 0.00011904502295041206, "loss": 12.2039, "step": 16742 }, { "epoch": 0.9117232647893652, "grad_norm": 0.5727502640962558, "learning_rate": 0.00011903636602009087, "loss": 12.1143, "step": 16743 }, { "epoch": 0.9117777187859482, "grad_norm": 0.5575695243558199, "learning_rate": 0.00011902770894173967, "loss": 12.1738, "step": 16744 }, { "epoch": 0.9118321727825311, "grad_norm": 0.5975922358915751, "learning_rate": 0.0001190190517154258, "loss": 12.277, "step": 16745 }, { "epoch": 0.9118866267791141, "grad_norm": 0.6040288247120295, "learning_rate": 0.00011901039434121661, "loss": 12.1355, "step": 16746 }, { "epoch": 0.9119410807756971, "grad_norm": 0.6450256711017941, "learning_rate": 0.00011900173681917944, "loss": 12.1493, "step": 16747 }, { "epoch": 0.9119955347722802, "grad_norm": 0.4947022240185635, "learning_rate": 0.00011899307914938157, "loss": 12.1176, "step": 16748 }, { "epoch": 0.9120499887688632, "grad_norm": 0.5670942648674901, "learning_rate": 0.0001189844213318903, "loss": 12.2005, "step": 16749 }, { "epoch": 0.9121044427654462, "grad_norm": 0.5765238273736617, "learning_rate": 0.00011897576336677297, "loss": 12.0534, "step": 16750 }, { "epoch": 0.9121588967620292, "grad_norm": 0.5754905418803878, "learning_rate": 0.00011896710525409696, "loss": 12.1506, "step": 16751 }, { "epoch": 0.9122133507586122, "grad_norm": 0.597427615061958, "learning_rate": 0.00011895844699392952, "loss": 12.132, "step": 16752 }, { "epoch": 0.9122678047551952, "grad_norm": 0.5888072462379587, "learning_rate": 0.00011894978858633808, "loss": 12.1797, "step": 16753 }, { "epoch": 0.9123222587517783, "grad_norm": 0.5648742161166406, "learning_rate": 0.00011894113003138987, "loss": 12.0295, "step": 16754 }, { "epoch": 0.9123767127483613, "grad_norm": 0.580604401098401, "learning_rate": 0.00011893247132915224, "loss": 12.0438, "step": 16755 }, { "epoch": 0.9124311667449443, "grad_norm": 0.6148933844315049, "learning_rate": 0.00011892381247969256, "loss": 12.1361, "step": 16756 }, { "epoch": 0.9124856207415273, "grad_norm": 0.5691730989122715, "learning_rate": 0.00011891515348307812, "loss": 12.0467, "step": 16757 }, { "epoch": 0.9125400747381103, "grad_norm": 0.5702840363937903, "learning_rate": 0.00011890649433937631, "loss": 12.1824, "step": 16758 }, { "epoch": 0.9125945287346933, "grad_norm": 0.7081968855598006, "learning_rate": 0.00011889783504865442, "loss": 12.1837, "step": 16759 }, { "epoch": 0.9126489827312764, "grad_norm": 0.5773421638162692, "learning_rate": 0.00011888917561097978, "loss": 12.2206, "step": 16760 }, { "epoch": 0.9127034367278594, "grad_norm": 0.6249849239293285, "learning_rate": 0.00011888051602641971, "loss": 12.2438, "step": 16761 }, { "epoch": 0.9127578907244424, "grad_norm": 0.5705795563291992, "learning_rate": 0.00011887185629504162, "loss": 12.1928, "step": 16762 }, { "epoch": 0.9128123447210253, "grad_norm": 0.565809674777921, "learning_rate": 0.00011886319641691284, "loss": 12.1484, "step": 16763 }, { "epoch": 0.9128667987176083, "grad_norm": 0.6011464088918341, "learning_rate": 0.00011885453639210064, "loss": 12.0994, "step": 16764 }, { "epoch": 0.9129212527141914, "grad_norm": 0.5115240947395492, "learning_rate": 0.00011884587622067243, "loss": 12.1974, "step": 16765 }, { "epoch": 0.9129757067107744, "grad_norm": 0.5979106644846061, "learning_rate": 0.00011883721590269548, "loss": 12.2424, "step": 16766 }, { "epoch": 0.9130301607073574, "grad_norm": 0.6040356674593268, "learning_rate": 0.00011882855543823721, "loss": 12.2643, "step": 16767 }, { "epoch": 0.9130846147039404, "grad_norm": 0.6333776562522606, "learning_rate": 0.00011881989482736496, "loss": 12.2181, "step": 16768 }, { "epoch": 0.9131390687005234, "grad_norm": 0.5974897631083352, "learning_rate": 0.00011881123407014602, "loss": 12.2313, "step": 16769 }, { "epoch": 0.9131935226971064, "grad_norm": 0.5542553523882653, "learning_rate": 0.00011880257316664778, "loss": 12.2712, "step": 16770 }, { "epoch": 0.9132479766936895, "grad_norm": 0.5402366430344611, "learning_rate": 0.00011879391211693757, "loss": 12.1131, "step": 16771 }, { "epoch": 0.9133024306902725, "grad_norm": 0.511598019376126, "learning_rate": 0.00011878525092108272, "loss": 12.0113, "step": 16772 }, { "epoch": 0.9133568846868555, "grad_norm": 0.5790887579476809, "learning_rate": 0.00011877658957915068, "loss": 12.3035, "step": 16773 }, { "epoch": 0.9134113386834385, "grad_norm": 0.5195244342633508, "learning_rate": 0.00011876792809120867, "loss": 12.1332, "step": 16774 }, { "epoch": 0.9134657926800215, "grad_norm": 0.569483171720159, "learning_rate": 0.00011875926645732413, "loss": 12.0297, "step": 16775 }, { "epoch": 0.9135202466766045, "grad_norm": 0.5953612424801111, "learning_rate": 0.00011875060467756438, "loss": 12.3428, "step": 16776 }, { "epoch": 0.9135747006731876, "grad_norm": 0.5306157216561193, "learning_rate": 0.0001187419427519968, "loss": 12.1489, "step": 16777 }, { "epoch": 0.9136291546697706, "grad_norm": 0.5446436430199308, "learning_rate": 0.0001187332806806887, "loss": 12.192, "step": 16778 }, { "epoch": 0.9136836086663536, "grad_norm": 0.5653427027897644, "learning_rate": 0.00011872461846370748, "loss": 12.184, "step": 16779 }, { "epoch": 0.9137380626629366, "grad_norm": 0.5296990011718393, "learning_rate": 0.0001187159561011205, "loss": 12.2265, "step": 16780 }, { "epoch": 0.9137925166595195, "grad_norm": 0.5344827860754364, "learning_rate": 0.0001187072935929951, "loss": 12.1615, "step": 16781 }, { "epoch": 0.9138469706561025, "grad_norm": 0.5383766982801177, "learning_rate": 0.00011869863093939864, "loss": 12.1102, "step": 16782 }, { "epoch": 0.9139014246526856, "grad_norm": 1.0582908680442866, "learning_rate": 0.0001186899681403985, "loss": 12.2392, "step": 16783 }, { "epoch": 0.9139558786492686, "grad_norm": 0.6430932939318632, "learning_rate": 0.00011868130519606202, "loss": 12.1362, "step": 16784 }, { "epoch": 0.9140103326458516, "grad_norm": 0.5464335146088309, "learning_rate": 0.00011867264210645659, "loss": 12.1514, "step": 16785 }, { "epoch": 0.9140647866424346, "grad_norm": 0.5557665961056553, "learning_rate": 0.00011866397887164958, "loss": 12.2397, "step": 16786 }, { "epoch": 0.9141192406390176, "grad_norm": 0.5393363904487715, "learning_rate": 0.0001186553154917083, "loss": 12.1043, "step": 16787 }, { "epoch": 0.9141736946356006, "grad_norm": 0.5548053206768346, "learning_rate": 0.00011864665196670018, "loss": 12.2035, "step": 16788 }, { "epoch": 0.9142281486321837, "grad_norm": 0.6222726367876813, "learning_rate": 0.00011863798829669257, "loss": 12.0901, "step": 16789 }, { "epoch": 0.9142826026287667, "grad_norm": 0.5679218291031215, "learning_rate": 0.00011862932448175283, "loss": 12.0993, "step": 16790 }, { "epoch": 0.9143370566253497, "grad_norm": 0.5486238511901713, "learning_rate": 0.00011862066052194833, "loss": 12.2531, "step": 16791 }, { "epoch": 0.9143915106219327, "grad_norm": 0.5321184478689914, "learning_rate": 0.00011861199641734648, "loss": 12.1077, "step": 16792 }, { "epoch": 0.9144459646185157, "grad_norm": 0.5435200884960917, "learning_rate": 0.0001186033321680146, "loss": 12.1614, "step": 16793 }, { "epoch": 0.9145004186150988, "grad_norm": 0.5414606235146378, "learning_rate": 0.00011859466777402009, "loss": 12.2113, "step": 16794 }, { "epoch": 0.9145548726116818, "grad_norm": 0.5413555138696808, "learning_rate": 0.00011858600323543035, "loss": 12.1002, "step": 16795 }, { "epoch": 0.9146093266082648, "grad_norm": 0.5328312176322992, "learning_rate": 0.0001185773385523127, "loss": 12.0829, "step": 16796 }, { "epoch": 0.9146637806048478, "grad_norm": 0.5645118336086263, "learning_rate": 0.00011856867372473456, "loss": 12.279, "step": 16797 }, { "epoch": 0.9147182346014308, "grad_norm": 0.594588448885202, "learning_rate": 0.00011856000875276332, "loss": 12.1769, "step": 16798 }, { "epoch": 0.9147726885980138, "grad_norm": 0.5671975738301612, "learning_rate": 0.00011855134363646631, "loss": 12.1391, "step": 16799 }, { "epoch": 0.9148271425945969, "grad_norm": 0.5324538805071298, "learning_rate": 0.00011854267837591095, "loss": 12.1197, "step": 16800 }, { "epoch": 0.9148815965911798, "grad_norm": 0.6474357680124087, "learning_rate": 0.00011853401297116462, "loss": 12.1605, "step": 16801 }, { "epoch": 0.9149360505877628, "grad_norm": 0.5367501737187403, "learning_rate": 0.00011852534742229469, "loss": 12.0937, "step": 16802 }, { "epoch": 0.9149905045843458, "grad_norm": 0.5720099048286376, "learning_rate": 0.00011851668172936858, "loss": 12.0828, "step": 16803 }, { "epoch": 0.9150449585809288, "grad_norm": 0.5583897433769571, "learning_rate": 0.0001185080158924536, "loss": 12.1997, "step": 16804 }, { "epoch": 0.9150994125775118, "grad_norm": 0.54360969188526, "learning_rate": 0.0001184993499116172, "loss": 12.1959, "step": 16805 }, { "epoch": 0.9151538665740949, "grad_norm": 0.522848415236117, "learning_rate": 0.00011849068378692676, "loss": 12.1708, "step": 16806 }, { "epoch": 0.9152083205706779, "grad_norm": 0.6759180594616391, "learning_rate": 0.00011848201751844967, "loss": 12.2251, "step": 16807 }, { "epoch": 0.9152627745672609, "grad_norm": 0.5675597634248547, "learning_rate": 0.00011847335110625333, "loss": 12.0897, "step": 16808 }, { "epoch": 0.9153172285638439, "grad_norm": 0.5468070558084621, "learning_rate": 0.00011846468455040509, "loss": 12.1422, "step": 16809 }, { "epoch": 0.9153716825604269, "grad_norm": 0.5786821751893668, "learning_rate": 0.00011845601785097233, "loss": 12.1366, "step": 16810 }, { "epoch": 0.9154261365570099, "grad_norm": 0.5456084060971332, "learning_rate": 0.00011844735100802253, "loss": 12.2959, "step": 16811 }, { "epoch": 0.915480590553593, "grad_norm": 0.5598519840243396, "learning_rate": 0.00011843868402162301, "loss": 12.076, "step": 16812 }, { "epoch": 0.915535044550176, "grad_norm": 0.5677753649414218, "learning_rate": 0.00011843001689184123, "loss": 12.2355, "step": 16813 }, { "epoch": 0.915589498546759, "grad_norm": 0.5350599288088638, "learning_rate": 0.00011842134961874451, "loss": 12.1107, "step": 16814 }, { "epoch": 0.915643952543342, "grad_norm": 0.5516091666079663, "learning_rate": 0.00011841268220240027, "loss": 12.145, "step": 16815 }, { "epoch": 0.915698406539925, "grad_norm": 0.5692472115070899, "learning_rate": 0.00011840401464287596, "loss": 11.952, "step": 16816 }, { "epoch": 0.915752860536508, "grad_norm": 0.598519974976252, "learning_rate": 0.00011839534694023893, "loss": 12.0693, "step": 16817 }, { "epoch": 0.915807314533091, "grad_norm": 0.5313337553609586, "learning_rate": 0.0001183866790945566, "loss": 12.1934, "step": 16818 }, { "epoch": 0.915861768529674, "grad_norm": 0.5734942775980757, "learning_rate": 0.00011837801110589639, "loss": 12.1456, "step": 16819 }, { "epoch": 0.915916222526257, "grad_norm": 0.5834459456631099, "learning_rate": 0.00011836934297432568, "loss": 12.1803, "step": 16820 }, { "epoch": 0.91597067652284, "grad_norm": 0.5465156658572613, "learning_rate": 0.00011836067469991184, "loss": 12.0439, "step": 16821 }, { "epoch": 0.916025130519423, "grad_norm": 0.5169095512192226, "learning_rate": 0.00011835200628272234, "loss": 12.0206, "step": 16822 }, { "epoch": 0.916079584516006, "grad_norm": 0.5766012605581224, "learning_rate": 0.00011834333772282455, "loss": 12.132, "step": 16823 }, { "epoch": 0.9161340385125891, "grad_norm": 0.5590255109156209, "learning_rate": 0.00011833466902028594, "loss": 12.1715, "step": 16824 }, { "epoch": 0.9161884925091721, "grad_norm": 0.5938053946101209, "learning_rate": 0.00011832600017517382, "loss": 12.171, "step": 16825 }, { "epoch": 0.9162429465057551, "grad_norm": 0.6132387898997561, "learning_rate": 0.00011831733118755566, "loss": 12.1305, "step": 16826 }, { "epoch": 0.9162974005023381, "grad_norm": 0.6982163513457541, "learning_rate": 0.00011830866205749884, "loss": 12.2049, "step": 16827 }, { "epoch": 0.9163518544989211, "grad_norm": 0.5599338143513132, "learning_rate": 0.00011829999278507083, "loss": 12.1475, "step": 16828 }, { "epoch": 0.9164063084955042, "grad_norm": 0.6149948822024341, "learning_rate": 0.00011829132337033899, "loss": 12.1869, "step": 16829 }, { "epoch": 0.9164607624920872, "grad_norm": 0.6304619061712248, "learning_rate": 0.00011828265381337076, "loss": 12.2142, "step": 16830 }, { "epoch": 0.9165152164886702, "grad_norm": 0.4988338920506397, "learning_rate": 0.00011827398411423354, "loss": 12.006, "step": 16831 }, { "epoch": 0.9165696704852532, "grad_norm": 0.6173633751853064, "learning_rate": 0.00011826531427299475, "loss": 12.1481, "step": 16832 }, { "epoch": 0.9166241244818362, "grad_norm": 0.5357110912482415, "learning_rate": 0.00011825664428972181, "loss": 12.0745, "step": 16833 }, { "epoch": 0.9166785784784192, "grad_norm": 0.6967866106962702, "learning_rate": 0.00011824797416448216, "loss": 12.2176, "step": 16834 }, { "epoch": 0.9167330324750023, "grad_norm": 0.6544247026014385, "learning_rate": 0.0001182393038973432, "loss": 12.287, "step": 16835 }, { "epoch": 0.9167874864715853, "grad_norm": 0.5644438850069057, "learning_rate": 0.00011823063348837235, "loss": 12.1593, "step": 16836 }, { "epoch": 0.9168419404681682, "grad_norm": 0.5911753838445318, "learning_rate": 0.00011822196293763704, "loss": 12.1714, "step": 16837 }, { "epoch": 0.9168963944647512, "grad_norm": 0.5883370221029104, "learning_rate": 0.00011821329224520465, "loss": 12.1827, "step": 16838 }, { "epoch": 0.9169508484613342, "grad_norm": 0.5699740769714241, "learning_rate": 0.0001182046214111427, "loss": 12.0986, "step": 16839 }, { "epoch": 0.9170053024579172, "grad_norm": 0.6434367700000917, "learning_rate": 0.00011819595043551854, "loss": 12.2088, "step": 16840 }, { "epoch": 0.9170597564545003, "grad_norm": 0.5810603582792637, "learning_rate": 0.00011818727931839964, "loss": 12.2819, "step": 16841 }, { "epoch": 0.9171142104510833, "grad_norm": 0.7095274139215517, "learning_rate": 0.00011817860805985337, "loss": 12.2811, "step": 16842 }, { "epoch": 0.9171686644476663, "grad_norm": 0.6104125487630834, "learning_rate": 0.0001181699366599472, "loss": 12.0946, "step": 16843 }, { "epoch": 0.9172231184442493, "grad_norm": 0.5246410991558379, "learning_rate": 0.00011816126511874859, "loss": 11.9659, "step": 16844 }, { "epoch": 0.9172775724408323, "grad_norm": 0.6089764921307709, "learning_rate": 0.0001181525934363249, "loss": 12.1241, "step": 16845 }, { "epoch": 0.9173320264374153, "grad_norm": 0.5785060702910032, "learning_rate": 0.00011814392161274361, "loss": 12.1418, "step": 16846 }, { "epoch": 0.9173864804339984, "grad_norm": 0.6352933490510204, "learning_rate": 0.00011813524964807215, "loss": 12.1555, "step": 16847 }, { "epoch": 0.9174409344305814, "grad_norm": 0.5131026586102728, "learning_rate": 0.00011812657754237795, "loss": 12.0899, "step": 16848 }, { "epoch": 0.9174953884271644, "grad_norm": 0.5821382588317344, "learning_rate": 0.00011811790529572842, "loss": 12.1434, "step": 16849 }, { "epoch": 0.9175498424237474, "grad_norm": 0.5447482139681347, "learning_rate": 0.00011810923290819104, "loss": 12.1217, "step": 16850 }, { "epoch": 0.9176042964203304, "grad_norm": 0.539484649326593, "learning_rate": 0.00011810056037983322, "loss": 12.1442, "step": 16851 }, { "epoch": 0.9176587504169134, "grad_norm": 0.6068708590455357, "learning_rate": 0.00011809188771072241, "loss": 12.0923, "step": 16852 }, { "epoch": 0.9177132044134965, "grad_norm": 0.5918613326342391, "learning_rate": 0.00011808321490092605, "loss": 12.2175, "step": 16853 }, { "epoch": 0.9177676584100795, "grad_norm": 0.49693951625138766, "learning_rate": 0.00011807454195051158, "loss": 12.0775, "step": 16854 }, { "epoch": 0.9178221124066624, "grad_norm": 0.585130212317906, "learning_rate": 0.00011806586885954642, "loss": 12.114, "step": 16855 }, { "epoch": 0.9178765664032454, "grad_norm": 0.5801909218668415, "learning_rate": 0.00011805719562809807, "loss": 12.2656, "step": 16856 }, { "epoch": 0.9179310203998284, "grad_norm": 0.5522355927824034, "learning_rate": 0.00011804852225623391, "loss": 12.2535, "step": 16857 }, { "epoch": 0.9179854743964114, "grad_norm": 0.5427828363518948, "learning_rate": 0.00011803984874402143, "loss": 12.0822, "step": 16858 }, { "epoch": 0.9180399283929945, "grad_norm": 0.5661936373007997, "learning_rate": 0.00011803117509152805, "loss": 12.0889, "step": 16859 }, { "epoch": 0.9180943823895775, "grad_norm": 0.637976219585058, "learning_rate": 0.00011802250129882124, "loss": 12.0885, "step": 16860 }, { "epoch": 0.9181488363861605, "grad_norm": 0.6508504526745578, "learning_rate": 0.00011801382736596842, "loss": 12.1481, "step": 16861 }, { "epoch": 0.9182032903827435, "grad_norm": 0.5508959358035357, "learning_rate": 0.00011800515329303707, "loss": 12.239, "step": 16862 }, { "epoch": 0.9182577443793265, "grad_norm": 0.6082919722422235, "learning_rate": 0.00011799647908009463, "loss": 12.2931, "step": 16863 }, { "epoch": 0.9183121983759096, "grad_norm": 0.5692475687106825, "learning_rate": 0.00011798780472720854, "loss": 12.0328, "step": 16864 }, { "epoch": 0.9183666523724926, "grad_norm": 0.544721000401607, "learning_rate": 0.00011797913023444626, "loss": 12.1171, "step": 16865 }, { "epoch": 0.9184211063690756, "grad_norm": 0.5488719223519434, "learning_rate": 0.00011797045560187527, "loss": 12.2137, "step": 16866 }, { "epoch": 0.9184755603656586, "grad_norm": 0.541763616208224, "learning_rate": 0.000117961780829563, "loss": 12.1395, "step": 16867 }, { "epoch": 0.9185300143622416, "grad_norm": 0.5545722866569928, "learning_rate": 0.00011795310591757691, "loss": 12.2065, "step": 16868 }, { "epoch": 0.9185844683588246, "grad_norm": 0.8852819561606639, "learning_rate": 0.00011794443086598446, "loss": 12.1008, "step": 16869 }, { "epoch": 0.9186389223554077, "grad_norm": 0.5838155507667105, "learning_rate": 0.00011793575567485308, "loss": 12.1819, "step": 16870 }, { "epoch": 0.9186933763519907, "grad_norm": 0.5629298051200554, "learning_rate": 0.00011792708034425027, "loss": 12.0794, "step": 16871 }, { "epoch": 0.9187478303485737, "grad_norm": 0.6035217143909042, "learning_rate": 0.00011791840487424348, "loss": 12.1327, "step": 16872 }, { "epoch": 0.9188022843451567, "grad_norm": 0.5309279145877762, "learning_rate": 0.00011790972926490018, "loss": 12.2339, "step": 16873 }, { "epoch": 0.9188567383417396, "grad_norm": 0.5765969595397223, "learning_rate": 0.00011790105351628782, "loss": 12.2462, "step": 16874 }, { "epoch": 0.9189111923383226, "grad_norm": 0.6436547861263355, "learning_rate": 0.00011789237762847385, "loss": 12.1905, "step": 16875 }, { "epoch": 0.9189656463349057, "grad_norm": 0.5637473395664372, "learning_rate": 0.00011788370160152575, "loss": 12.1323, "step": 16876 }, { "epoch": 0.9190201003314887, "grad_norm": 0.5530859550148169, "learning_rate": 0.00011787502543551099, "loss": 12.099, "step": 16877 }, { "epoch": 0.9190745543280717, "grad_norm": 0.6553952935885003, "learning_rate": 0.00011786634913049703, "loss": 12.2056, "step": 16878 }, { "epoch": 0.9191290083246547, "grad_norm": 0.6089985457195687, "learning_rate": 0.00011785767268655139, "loss": 12.1522, "step": 16879 }, { "epoch": 0.9191834623212377, "grad_norm": 0.5779445588832643, "learning_rate": 0.00011784899610374144, "loss": 12.223, "step": 16880 }, { "epoch": 0.9192379163178207, "grad_norm": 0.7227344842874233, "learning_rate": 0.00011784031938213471, "loss": 12.2078, "step": 16881 }, { "epoch": 0.9192923703144038, "grad_norm": 0.6875875596524184, "learning_rate": 0.00011783164252179866, "loss": 12.2143, "step": 16882 }, { "epoch": 0.9193468243109868, "grad_norm": 0.556742417679642, "learning_rate": 0.00011782296552280079, "loss": 12.2031, "step": 16883 }, { "epoch": 0.9194012783075698, "grad_norm": 0.6334430636052265, "learning_rate": 0.00011781428838520856, "loss": 12.1927, "step": 16884 }, { "epoch": 0.9194557323041528, "grad_norm": 0.6206500293231294, "learning_rate": 0.00011780561110908941, "loss": 12.326, "step": 16885 }, { "epoch": 0.9195101863007358, "grad_norm": 0.5766624897076231, "learning_rate": 0.00011779693369451086, "loss": 12.2196, "step": 16886 }, { "epoch": 0.9195646402973188, "grad_norm": 0.583325671430832, "learning_rate": 0.00011778825614154031, "loss": 12.1991, "step": 16887 }, { "epoch": 0.9196190942939019, "grad_norm": 0.5834547821471319, "learning_rate": 0.00011777957845024537, "loss": 12.1825, "step": 16888 }, { "epoch": 0.9196735482904849, "grad_norm": 0.5174570598618606, "learning_rate": 0.00011777090062069343, "loss": 12.1828, "step": 16889 }, { "epoch": 0.9197280022870679, "grad_norm": 0.622195757252222, "learning_rate": 0.00011776222265295199, "loss": 12.1731, "step": 16890 }, { "epoch": 0.9197824562836509, "grad_norm": 0.5755695132597045, "learning_rate": 0.00011775354454708851, "loss": 12.2292, "step": 16891 }, { "epoch": 0.9198369102802338, "grad_norm": 0.620146372251081, "learning_rate": 0.00011774486630317048, "loss": 12.159, "step": 16892 }, { "epoch": 0.9198913642768168, "grad_norm": 0.6189664842317881, "learning_rate": 0.00011773618792126542, "loss": 12.1616, "step": 16893 }, { "epoch": 0.9199458182733999, "grad_norm": 0.5461333527479741, "learning_rate": 0.0001177275094014408, "loss": 12.0693, "step": 16894 }, { "epoch": 0.9200002722699829, "grad_norm": 0.6395709052423875, "learning_rate": 0.00011771883074376406, "loss": 12.1698, "step": 16895 }, { "epoch": 0.9200547262665659, "grad_norm": 0.5707100063782455, "learning_rate": 0.00011771015194830273, "loss": 12.1584, "step": 16896 }, { "epoch": 0.9201091802631489, "grad_norm": 0.6704402001258953, "learning_rate": 0.00011770147301512429, "loss": 12.2118, "step": 16897 }, { "epoch": 0.9201636342597319, "grad_norm": 0.553766648058549, "learning_rate": 0.00011769279394429622, "loss": 12.1416, "step": 16898 }, { "epoch": 0.920218088256315, "grad_norm": 0.5445145089497211, "learning_rate": 0.00011768411473588603, "loss": 12.0892, "step": 16899 }, { "epoch": 0.920272542252898, "grad_norm": 0.5384689612754129, "learning_rate": 0.0001176754353899612, "loss": 12.1376, "step": 16900 }, { "epoch": 0.920326996249481, "grad_norm": 0.6064363198046522, "learning_rate": 0.00011766675590658922, "loss": 12.178, "step": 16901 }, { "epoch": 0.920381450246064, "grad_norm": 0.5880024864089798, "learning_rate": 0.00011765807628583758, "loss": 12.1358, "step": 16902 }, { "epoch": 0.920435904242647, "grad_norm": 0.534753270596174, "learning_rate": 0.00011764939652777376, "loss": 12.1993, "step": 16903 }, { "epoch": 0.92049035823923, "grad_norm": 0.6295164454306247, "learning_rate": 0.0001176407166324653, "loss": 12.1118, "step": 16904 }, { "epoch": 0.9205448122358131, "grad_norm": 0.6023643479493942, "learning_rate": 0.00011763203659997965, "loss": 12.2011, "step": 16905 }, { "epoch": 0.9205992662323961, "grad_norm": 0.5757732415578404, "learning_rate": 0.00011762335643038433, "loss": 12.1225, "step": 16906 }, { "epoch": 0.9206537202289791, "grad_norm": 0.5355212834199039, "learning_rate": 0.00011761467612374684, "loss": 12.1321, "step": 16907 }, { "epoch": 0.9207081742255621, "grad_norm": 0.5048507275544266, "learning_rate": 0.00011760599568013468, "loss": 12.0923, "step": 16908 }, { "epoch": 0.920762628222145, "grad_norm": 0.5551109810794038, "learning_rate": 0.00011759731509961534, "loss": 12.1753, "step": 16909 }, { "epoch": 0.920817082218728, "grad_norm": 0.5191144251279728, "learning_rate": 0.00011758863438225631, "loss": 12.0751, "step": 16910 }, { "epoch": 0.9208715362153111, "grad_norm": 0.5625247792112014, "learning_rate": 0.00011757995352812514, "loss": 12.1726, "step": 16911 }, { "epoch": 0.9209259902118941, "grad_norm": 0.5466597820845551, "learning_rate": 0.00011757127253728928, "loss": 12.1765, "step": 16912 }, { "epoch": 0.9209804442084771, "grad_norm": 0.5194500983946128, "learning_rate": 0.00011756259140981627, "loss": 12.0903, "step": 16913 }, { "epoch": 0.9210348982050601, "grad_norm": 0.541872524740386, "learning_rate": 0.0001175539101457736, "loss": 12.2169, "step": 16914 }, { "epoch": 0.9210893522016431, "grad_norm": 0.6645674759196384, "learning_rate": 0.00011754522874522877, "loss": 12.2635, "step": 16915 }, { "epoch": 0.9211438061982261, "grad_norm": 0.5777726404571027, "learning_rate": 0.00011753654720824932, "loss": 12.0605, "step": 16916 }, { "epoch": 0.9211982601948092, "grad_norm": 0.5564081110481262, "learning_rate": 0.00011752786553490272, "loss": 12.1363, "step": 16917 }, { "epoch": 0.9212527141913922, "grad_norm": 0.6655345929789, "learning_rate": 0.00011751918372525652, "loss": 12.1562, "step": 16918 }, { "epoch": 0.9213071681879752, "grad_norm": 0.5578526087544446, "learning_rate": 0.00011751050177937818, "loss": 12.1479, "step": 16919 }, { "epoch": 0.9213616221845582, "grad_norm": 0.5116884761127108, "learning_rate": 0.00011750181969733528, "loss": 12.0556, "step": 16920 }, { "epoch": 0.9214160761811412, "grad_norm": 0.5883180073812652, "learning_rate": 0.00011749313747919526, "loss": 12.152, "step": 16921 }, { "epoch": 0.9214705301777242, "grad_norm": 0.71928827180107, "learning_rate": 0.00011748445512502568, "loss": 12.1099, "step": 16922 }, { "epoch": 0.9215249841743073, "grad_norm": 0.5801052874839137, "learning_rate": 0.00011747577263489405, "loss": 12.1079, "step": 16923 }, { "epoch": 0.9215794381708903, "grad_norm": 0.5791300993972605, "learning_rate": 0.00011746709000886786, "loss": 12.0104, "step": 16924 }, { "epoch": 0.9216338921674733, "grad_norm": 0.5914497436093543, "learning_rate": 0.00011745840724701467, "loss": 12.1728, "step": 16925 }, { "epoch": 0.9216883461640563, "grad_norm": 0.6521740942280411, "learning_rate": 0.00011744972434940199, "loss": 12.2992, "step": 16926 }, { "epoch": 0.9217428001606393, "grad_norm": 0.6474119664235948, "learning_rate": 0.0001174410413160973, "loss": 12.3222, "step": 16927 }, { "epoch": 0.9217972541572224, "grad_norm": 0.6115920339132841, "learning_rate": 0.00011743235814716816, "loss": 12.1622, "step": 16928 }, { "epoch": 0.9218517081538053, "grad_norm": 0.571447806039776, "learning_rate": 0.0001174236748426821, "loss": 12.1173, "step": 16929 }, { "epoch": 0.9219061621503883, "grad_norm": 0.6527582706546112, "learning_rate": 0.00011741499140270658, "loss": 12.07, "step": 16930 }, { "epoch": 0.9219606161469713, "grad_norm": 0.5238781389090017, "learning_rate": 0.0001174063078273092, "loss": 12.1486, "step": 16931 }, { "epoch": 0.9220150701435543, "grad_norm": 0.53294174741349, "learning_rate": 0.00011739762411655741, "loss": 12.1161, "step": 16932 }, { "epoch": 0.9220695241401373, "grad_norm": 0.54549396412689, "learning_rate": 0.00011738894027051882, "loss": 12.0161, "step": 16933 }, { "epoch": 0.9221239781367204, "grad_norm": 0.5851738088661577, "learning_rate": 0.00011738025628926092, "loss": 12.0931, "step": 16934 }, { "epoch": 0.9221784321333034, "grad_norm": 0.5742586998918857, "learning_rate": 0.00011737157217285122, "loss": 11.9496, "step": 16935 }, { "epoch": 0.9222328861298864, "grad_norm": 0.5667530315524123, "learning_rate": 0.00011736288792135721, "loss": 12.1213, "step": 16936 }, { "epoch": 0.9222873401264694, "grad_norm": 0.5389019254871072, "learning_rate": 0.00011735420353484654, "loss": 12.1589, "step": 16937 }, { "epoch": 0.9223417941230524, "grad_norm": 0.5653999762083491, "learning_rate": 0.00011734551901338664, "loss": 12.1234, "step": 16938 }, { "epoch": 0.9223962481196354, "grad_norm": 0.5291458672313059, "learning_rate": 0.0001173368343570451, "loss": 11.9858, "step": 16939 }, { "epoch": 0.9224507021162185, "grad_norm": 0.5492650586281563, "learning_rate": 0.00011732814956588942, "loss": 12.217, "step": 16940 }, { "epoch": 0.9225051561128015, "grad_norm": 0.5499049350097684, "learning_rate": 0.00011731946463998711, "loss": 12.1631, "step": 16941 }, { "epoch": 0.9225596101093845, "grad_norm": 0.6615920540288064, "learning_rate": 0.00011731077957940578, "loss": 12.1106, "step": 16942 }, { "epoch": 0.9226140641059675, "grad_norm": 0.5381055966689792, "learning_rate": 0.00011730209438421288, "loss": 12.2094, "step": 16943 }, { "epoch": 0.9226685181025505, "grad_norm": 0.5609330805323823, "learning_rate": 0.00011729340905447606, "loss": 12.1898, "step": 16944 }, { "epoch": 0.9227229720991335, "grad_norm": 0.5939709915701091, "learning_rate": 0.00011728472359026275, "loss": 12.2755, "step": 16945 }, { "epoch": 0.9227774260957166, "grad_norm": 0.627128974055508, "learning_rate": 0.00011727603799164053, "loss": 12.1736, "step": 16946 }, { "epoch": 0.9228318800922996, "grad_norm": 0.5773341602635771, "learning_rate": 0.00011726735225867693, "loss": 12.0639, "step": 16947 }, { "epoch": 0.9228863340888825, "grad_norm": 0.6039647843673996, "learning_rate": 0.00011725866639143952, "loss": 12.178, "step": 16948 }, { "epoch": 0.9229407880854655, "grad_norm": 0.5758653069003834, "learning_rate": 0.00011724998038999585, "loss": 12.0593, "step": 16949 }, { "epoch": 0.9229952420820485, "grad_norm": 0.5899518105644326, "learning_rate": 0.0001172412942544134, "loss": 12.2028, "step": 16950 }, { "epoch": 0.9230496960786315, "grad_norm": 0.5652653779097088, "learning_rate": 0.00011723260798475976, "loss": 12.2616, "step": 16951 }, { "epoch": 0.9231041500752146, "grad_norm": 0.5888009417927444, "learning_rate": 0.00011722392158110249, "loss": 12.0104, "step": 16952 }, { "epoch": 0.9231586040717976, "grad_norm": 0.5669795658816074, "learning_rate": 0.00011721523504350909, "loss": 12.2135, "step": 16953 }, { "epoch": 0.9232130580683806, "grad_norm": 0.611730610878642, "learning_rate": 0.00011720654837204717, "loss": 12.2023, "step": 16954 }, { "epoch": 0.9232675120649636, "grad_norm": 0.51904773839831, "learning_rate": 0.00011719786156678423, "loss": 12.1355, "step": 16955 }, { "epoch": 0.9233219660615466, "grad_norm": 0.6915896744511061, "learning_rate": 0.00011718917462778782, "loss": 12.2847, "step": 16956 }, { "epoch": 0.9233764200581296, "grad_norm": 0.6096948831390273, "learning_rate": 0.00011718048755512552, "loss": 12.0716, "step": 16957 }, { "epoch": 0.9234308740547127, "grad_norm": 0.6273856058339771, "learning_rate": 0.00011717180034886484, "loss": 12.2011, "step": 16958 }, { "epoch": 0.9234853280512957, "grad_norm": 0.557686025784662, "learning_rate": 0.00011716311300907339, "loss": 12.2558, "step": 16959 }, { "epoch": 0.9235397820478787, "grad_norm": 0.5179974039851645, "learning_rate": 0.0001171544255358187, "loss": 12.1361, "step": 16960 }, { "epoch": 0.9235942360444617, "grad_norm": 0.6006390284439724, "learning_rate": 0.0001171457379291683, "loss": 12.1918, "step": 16961 }, { "epoch": 0.9236486900410447, "grad_norm": 0.5772358993306812, "learning_rate": 0.00011713705018918979, "loss": 12.0963, "step": 16962 }, { "epoch": 0.9237031440376278, "grad_norm": 0.5863016849465408, "learning_rate": 0.00011712836231595067, "loss": 12.1746, "step": 16963 }, { "epoch": 0.9237575980342108, "grad_norm": 0.5758399347318343, "learning_rate": 0.00011711967430951853, "loss": 12.1444, "step": 16964 }, { "epoch": 0.9238120520307938, "grad_norm": 0.5939292813079025, "learning_rate": 0.000117110986169961, "loss": 12.276, "step": 16965 }, { "epoch": 0.9238665060273767, "grad_norm": 0.541052639544623, "learning_rate": 0.00011710229789734551, "loss": 12.2945, "step": 16966 }, { "epoch": 0.9239209600239597, "grad_norm": 0.505180080963915, "learning_rate": 0.00011709360949173968, "loss": 11.9867, "step": 16967 }, { "epoch": 0.9239754140205427, "grad_norm": 0.5817509351335834, "learning_rate": 0.0001170849209532111, "loss": 12.055, "step": 16968 }, { "epoch": 0.9240298680171258, "grad_norm": 0.540099205163771, "learning_rate": 0.00011707623228182729, "loss": 12.1972, "step": 16969 }, { "epoch": 0.9240843220137088, "grad_norm": 0.5672608784576997, "learning_rate": 0.00011706754347765587, "loss": 12.2125, "step": 16970 }, { "epoch": 0.9241387760102918, "grad_norm": 0.5743230894713309, "learning_rate": 0.00011705885454076435, "loss": 12.1818, "step": 16971 }, { "epoch": 0.9241932300068748, "grad_norm": 0.5462976545022575, "learning_rate": 0.00011705016547122032, "loss": 12.209, "step": 16972 }, { "epoch": 0.9242476840034578, "grad_norm": 0.5832898955105185, "learning_rate": 0.00011704147626909134, "loss": 12.3018, "step": 16973 }, { "epoch": 0.9243021380000408, "grad_norm": 0.7573427014823444, "learning_rate": 0.00011703278693444498, "loss": 12.1651, "step": 16974 }, { "epoch": 0.9243565919966239, "grad_norm": 0.5762810005951213, "learning_rate": 0.00011702409746734882, "loss": 12.1974, "step": 16975 }, { "epoch": 0.9244110459932069, "grad_norm": 0.5916727994725474, "learning_rate": 0.0001170154078678704, "loss": 12.3085, "step": 16976 }, { "epoch": 0.9244654999897899, "grad_norm": 0.7111713426251668, "learning_rate": 0.00011700671813607734, "loss": 12.2125, "step": 16977 }, { "epoch": 0.9245199539863729, "grad_norm": 0.5577895221948833, "learning_rate": 0.00011699802827203718, "loss": 12.0943, "step": 16978 }, { "epoch": 0.9245744079829559, "grad_norm": 0.5312133239506819, "learning_rate": 0.0001169893382758175, "loss": 12.1762, "step": 16979 }, { "epoch": 0.9246288619795389, "grad_norm": 0.5249657690938024, "learning_rate": 0.00011698064814748586, "loss": 12.1692, "step": 16980 }, { "epoch": 0.924683315976122, "grad_norm": 0.5837377384106609, "learning_rate": 0.00011697195788710988, "loss": 11.9651, "step": 16981 }, { "epoch": 0.924737769972705, "grad_norm": 0.5471769020073902, "learning_rate": 0.0001169632674947571, "loss": 12.1227, "step": 16982 }, { "epoch": 0.924792223969288, "grad_norm": 0.6455960922545634, "learning_rate": 0.0001169545769704951, "loss": 12.1644, "step": 16983 }, { "epoch": 0.924846677965871, "grad_norm": 0.6383411558745579, "learning_rate": 0.00011694588631439147, "loss": 12.1891, "step": 16984 }, { "epoch": 0.9249011319624539, "grad_norm": 0.5272725937755892, "learning_rate": 0.00011693719552651378, "loss": 12.0553, "step": 16985 }, { "epoch": 0.9249555859590369, "grad_norm": 0.5199498291500081, "learning_rate": 0.00011692850460692964, "loss": 12.1509, "step": 16986 }, { "epoch": 0.92501003995562, "grad_norm": 0.5810983895908186, "learning_rate": 0.0001169198135557066, "loss": 12.1364, "step": 16987 }, { "epoch": 0.925064493952203, "grad_norm": 0.5786941192229852, "learning_rate": 0.00011691112237291224, "loss": 12.2021, "step": 16988 }, { "epoch": 0.925118947948786, "grad_norm": 0.5227920236417976, "learning_rate": 0.00011690243105861416, "loss": 12.0734, "step": 16989 }, { "epoch": 0.925173401945369, "grad_norm": 0.605307586156183, "learning_rate": 0.00011689373961287995, "loss": 12.1793, "step": 16990 }, { "epoch": 0.925227855941952, "grad_norm": 0.5932639111977884, "learning_rate": 0.00011688504803577718, "loss": 12.1466, "step": 16991 }, { "epoch": 0.925282309938535, "grad_norm": 0.6001483622846763, "learning_rate": 0.00011687635632737346, "loss": 12.2558, "step": 16992 }, { "epoch": 0.9253367639351181, "grad_norm": 0.6580245600331703, "learning_rate": 0.00011686766448773634, "loss": 12.2324, "step": 16993 }, { "epoch": 0.9253912179317011, "grad_norm": 0.6248616057460186, "learning_rate": 0.00011685897251693345, "loss": 12.0222, "step": 16994 }, { "epoch": 0.9254456719282841, "grad_norm": 0.6384201382347111, "learning_rate": 0.00011685028041503236, "loss": 12.2823, "step": 16995 }, { "epoch": 0.9255001259248671, "grad_norm": 0.5851656381722564, "learning_rate": 0.00011684158818210064, "loss": 12.1895, "step": 16996 }, { "epoch": 0.9255545799214501, "grad_norm": 0.6050196836611006, "learning_rate": 0.00011683289581820593, "loss": 12.1716, "step": 16997 }, { "epoch": 0.9256090339180332, "grad_norm": 0.5977317895680825, "learning_rate": 0.0001168242033234158, "loss": 12.1989, "step": 16998 }, { "epoch": 0.9256634879146162, "grad_norm": 0.5553430762164486, "learning_rate": 0.00011681551069779784, "loss": 12.099, "step": 16999 }, { "epoch": 0.9257179419111992, "grad_norm": 0.6662028747923707, "learning_rate": 0.00011680681794141965, "loss": 12.1246, "step": 17000 }, { "epoch": 0.9257723959077822, "grad_norm": 0.6443699801689361, "learning_rate": 0.00011679812505434882, "loss": 12.0578, "step": 17001 }, { "epoch": 0.9258268499043651, "grad_norm": 0.5643059761112635, "learning_rate": 0.00011678943203665294, "loss": 12.1783, "step": 17002 }, { "epoch": 0.9258813039009481, "grad_norm": 0.6073214020860899, "learning_rate": 0.00011678073888839965, "loss": 12.2232, "step": 17003 }, { "epoch": 0.9259357578975312, "grad_norm": 0.6071808298586896, "learning_rate": 0.00011677204560965649, "loss": 12.2522, "step": 17004 }, { "epoch": 0.9259902118941142, "grad_norm": 0.6351405987968302, "learning_rate": 0.00011676335220049112, "loss": 12.2171, "step": 17005 }, { "epoch": 0.9260446658906972, "grad_norm": 0.5831917980551805, "learning_rate": 0.0001167546586609711, "loss": 12.146, "step": 17006 }, { "epoch": 0.9260991198872802, "grad_norm": 0.6338100597883539, "learning_rate": 0.00011674596499116404, "loss": 12.169, "step": 17007 }, { "epoch": 0.9261535738838632, "grad_norm": 0.5556819623122808, "learning_rate": 0.00011673727119113756, "loss": 12.0856, "step": 17008 }, { "epoch": 0.9262080278804462, "grad_norm": 0.5452693802865192, "learning_rate": 0.00011672857726095923, "loss": 12.2743, "step": 17009 }, { "epoch": 0.9262624818770293, "grad_norm": 0.5845607427851699, "learning_rate": 0.0001167198832006967, "loss": 12.3197, "step": 17010 }, { "epoch": 0.9263169358736123, "grad_norm": 0.5174778972822843, "learning_rate": 0.00011671118901041755, "loss": 12.1127, "step": 17011 }, { "epoch": 0.9263713898701953, "grad_norm": 0.5765368900728236, "learning_rate": 0.00011670249469018939, "loss": 12.2682, "step": 17012 }, { "epoch": 0.9264258438667783, "grad_norm": 0.5312777638235685, "learning_rate": 0.00011669380024007981, "loss": 12.1433, "step": 17013 }, { "epoch": 0.9264802978633613, "grad_norm": 0.4942873016918579, "learning_rate": 0.00011668510566015644, "loss": 12.0418, "step": 17014 }, { "epoch": 0.9265347518599443, "grad_norm": 0.56684886519407, "learning_rate": 0.00011667641095048693, "loss": 12.1658, "step": 17015 }, { "epoch": 0.9265892058565274, "grad_norm": 0.5438479777725457, "learning_rate": 0.00011666771611113885, "loss": 12.3115, "step": 17016 }, { "epoch": 0.9266436598531104, "grad_norm": 0.5589051106902513, "learning_rate": 0.00011665902114217979, "loss": 11.9983, "step": 17017 }, { "epoch": 0.9266981138496934, "grad_norm": 0.55847418262368, "learning_rate": 0.00011665032604367736, "loss": 12.059, "step": 17018 }, { "epoch": 0.9267525678462764, "grad_norm": 0.5630048352703018, "learning_rate": 0.00011664163081569923, "loss": 12.1887, "step": 17019 }, { "epoch": 0.9268070218428593, "grad_norm": 0.5732274252668589, "learning_rate": 0.00011663293545831302, "loss": 12.3322, "step": 17020 }, { "epoch": 0.9268614758394423, "grad_norm": 0.5943019439752977, "learning_rate": 0.00011662423997158629, "loss": 12.1177, "step": 17021 }, { "epoch": 0.9269159298360254, "grad_norm": 0.5640282485806937, "learning_rate": 0.00011661554435558668, "loss": 12.1892, "step": 17022 }, { "epoch": 0.9269703838326084, "grad_norm": 0.5874228700734306, "learning_rate": 0.00011660684861038181, "loss": 12.1637, "step": 17023 }, { "epoch": 0.9270248378291914, "grad_norm": 0.5497658023687364, "learning_rate": 0.00011659815273603927, "loss": 12.1317, "step": 17024 }, { "epoch": 0.9270792918257744, "grad_norm": 0.7441199081125127, "learning_rate": 0.00011658945673262675, "loss": 12.3582, "step": 17025 }, { "epoch": 0.9271337458223574, "grad_norm": 0.6354938189067647, "learning_rate": 0.00011658076060021184, "loss": 12.2086, "step": 17026 }, { "epoch": 0.9271881998189404, "grad_norm": 0.5038329814281183, "learning_rate": 0.00011657206433886214, "loss": 12.1215, "step": 17027 }, { "epoch": 0.9272426538155235, "grad_norm": 0.5855466980777795, "learning_rate": 0.00011656336794864528, "loss": 12.2244, "step": 17028 }, { "epoch": 0.9272971078121065, "grad_norm": 0.6551742005373945, "learning_rate": 0.0001165546714296289, "loss": 12.1614, "step": 17029 }, { "epoch": 0.9273515618086895, "grad_norm": 0.6345041410268087, "learning_rate": 0.00011654597478188061, "loss": 12.115, "step": 17030 }, { "epoch": 0.9274060158052725, "grad_norm": 0.5470199496279786, "learning_rate": 0.00011653727800546805, "loss": 12.1861, "step": 17031 }, { "epoch": 0.9274604698018555, "grad_norm": 0.6602093942963679, "learning_rate": 0.00011652858110045886, "loss": 12.0375, "step": 17032 }, { "epoch": 0.9275149237984386, "grad_norm": 0.6575298540859775, "learning_rate": 0.00011651988406692062, "loss": 12.3605, "step": 17033 }, { "epoch": 0.9275693777950216, "grad_norm": 0.5075422298046308, "learning_rate": 0.00011651118690492102, "loss": 12.0472, "step": 17034 }, { "epoch": 0.9276238317916046, "grad_norm": 0.6874168942810434, "learning_rate": 0.00011650248961452765, "loss": 12.1397, "step": 17035 }, { "epoch": 0.9276782857881876, "grad_norm": 0.573716036356838, "learning_rate": 0.00011649379219580816, "loss": 12.1485, "step": 17036 }, { "epoch": 0.9277327397847706, "grad_norm": 0.594723876278983, "learning_rate": 0.00011648509464883018, "loss": 12.2291, "step": 17037 }, { "epoch": 0.9277871937813535, "grad_norm": 0.5567430749098153, "learning_rate": 0.0001164763969736613, "loss": 12.1369, "step": 17038 }, { "epoch": 0.9278416477779367, "grad_norm": 0.6402356541374973, "learning_rate": 0.00011646769917036923, "loss": 12.2883, "step": 17039 }, { "epoch": 0.9278961017745196, "grad_norm": 0.5948790511726406, "learning_rate": 0.00011645900123902156, "loss": 12.1557, "step": 17040 }, { "epoch": 0.9279505557711026, "grad_norm": 0.5748905277106298, "learning_rate": 0.00011645030317968594, "loss": 12.1694, "step": 17041 }, { "epoch": 0.9280050097676856, "grad_norm": 0.6725080394286576, "learning_rate": 0.00011644160499243002, "loss": 12.1196, "step": 17042 }, { "epoch": 0.9280594637642686, "grad_norm": 0.6136217550622626, "learning_rate": 0.00011643290667732141, "loss": 12.114, "step": 17043 }, { "epoch": 0.9281139177608516, "grad_norm": 0.5746529170486847, "learning_rate": 0.00011642420823442774, "loss": 12.1748, "step": 17044 }, { "epoch": 0.9281683717574347, "grad_norm": 0.7606424559623073, "learning_rate": 0.00011641550966381669, "loss": 12.1793, "step": 17045 }, { "epoch": 0.9282228257540177, "grad_norm": 0.5736704979817694, "learning_rate": 0.00011640681096555588, "loss": 12.2137, "step": 17046 }, { "epoch": 0.9282772797506007, "grad_norm": 0.5956818081165554, "learning_rate": 0.00011639811213971297, "loss": 12.2121, "step": 17047 }, { "epoch": 0.9283317337471837, "grad_norm": 0.5155022066522056, "learning_rate": 0.00011638941318635557, "loss": 12.2178, "step": 17048 }, { "epoch": 0.9283861877437667, "grad_norm": 0.6042941383500556, "learning_rate": 0.00011638071410555136, "loss": 12.1546, "step": 17049 }, { "epoch": 0.9284406417403497, "grad_norm": 0.6940539649814073, "learning_rate": 0.00011637201489736797, "loss": 12.1122, "step": 17050 }, { "epoch": 0.9284950957369328, "grad_norm": 0.5318145162753902, "learning_rate": 0.00011636331556187303, "loss": 12.1872, "step": 17051 }, { "epoch": 0.9285495497335158, "grad_norm": 0.6024309285582887, "learning_rate": 0.00011635461609913422, "loss": 12.2451, "step": 17052 }, { "epoch": 0.9286040037300988, "grad_norm": 0.5717953753086978, "learning_rate": 0.00011634591650921916, "loss": 12.1309, "step": 17053 }, { "epoch": 0.9286584577266818, "grad_norm": 0.6785739383229514, "learning_rate": 0.00011633721679219553, "loss": 12.0856, "step": 17054 }, { "epoch": 0.9287129117232648, "grad_norm": 0.5574867157515434, "learning_rate": 0.00011632851694813096, "loss": 12.2054, "step": 17055 }, { "epoch": 0.9287673657198477, "grad_norm": 0.5600461548071675, "learning_rate": 0.00011631981697709308, "loss": 12.2204, "step": 17056 }, { "epoch": 0.9288218197164309, "grad_norm": 0.5760992539546776, "learning_rate": 0.00011631111687914959, "loss": 12.1364, "step": 17057 }, { "epoch": 0.9288762737130138, "grad_norm": 0.5484661974854979, "learning_rate": 0.00011630241665436812, "loss": 12.2272, "step": 17058 }, { "epoch": 0.9289307277095968, "grad_norm": 0.6020621004749886, "learning_rate": 0.00011629371630281632, "loss": 12.1409, "step": 17059 }, { "epoch": 0.9289851817061798, "grad_norm": 0.554976863830771, "learning_rate": 0.00011628501582456186, "loss": 12.1648, "step": 17060 }, { "epoch": 0.9290396357027628, "grad_norm": 0.5700672634511531, "learning_rate": 0.00011627631521967237, "loss": 12.1624, "step": 17061 }, { "epoch": 0.9290940896993459, "grad_norm": 0.5677438278839031, "learning_rate": 0.00011626761448821551, "loss": 12.1885, "step": 17062 }, { "epoch": 0.9291485436959289, "grad_norm": 0.7112087422260562, "learning_rate": 0.00011625891363025896, "loss": 12.3993, "step": 17063 }, { "epoch": 0.9292029976925119, "grad_norm": 0.611967896965925, "learning_rate": 0.00011625021264587037, "loss": 12.1846, "step": 17064 }, { "epoch": 0.9292574516890949, "grad_norm": 0.5658316156390023, "learning_rate": 0.00011624151153511745, "loss": 12.1155, "step": 17065 }, { "epoch": 0.9293119056856779, "grad_norm": 0.5161574686800847, "learning_rate": 0.00011623281029806776, "loss": 12.102, "step": 17066 }, { "epoch": 0.9293663596822609, "grad_norm": 0.5298763617916129, "learning_rate": 0.000116224108934789, "loss": 12.1406, "step": 17067 }, { "epoch": 0.929420813678844, "grad_norm": 0.6023836352490733, "learning_rate": 0.00011621540744534886, "loss": 12.1412, "step": 17068 }, { "epoch": 0.929475267675427, "grad_norm": 0.6430555819292236, "learning_rate": 0.00011620670582981504, "loss": 12.17, "step": 17069 }, { "epoch": 0.92952972167201, "grad_norm": 0.5346111639127659, "learning_rate": 0.00011619800408825511, "loss": 12.2956, "step": 17070 }, { "epoch": 0.929584175668593, "grad_norm": 0.5934744123185334, "learning_rate": 0.00011618930222073681, "loss": 12.1488, "step": 17071 }, { "epoch": 0.929638629665176, "grad_norm": 0.5902132907553823, "learning_rate": 0.00011618060022732778, "loss": 12.137, "step": 17072 }, { "epoch": 0.929693083661759, "grad_norm": 0.5783018779464009, "learning_rate": 0.00011617189810809566, "loss": 12.1546, "step": 17073 }, { "epoch": 0.9297475376583421, "grad_norm": 0.5436054397773015, "learning_rate": 0.00011616319586310815, "loss": 12.0984, "step": 17074 }, { "epoch": 0.929801991654925, "grad_norm": 0.5234324656657, "learning_rate": 0.00011615449349243297, "loss": 12.1047, "step": 17075 }, { "epoch": 0.929856445651508, "grad_norm": 0.5774191004854853, "learning_rate": 0.0001161457909961377, "loss": 12.1284, "step": 17076 }, { "epoch": 0.929910899648091, "grad_norm": 0.5535469054706854, "learning_rate": 0.00011613708837429005, "loss": 12.1228, "step": 17077 }, { "epoch": 0.929965353644674, "grad_norm": 0.5883833818085753, "learning_rate": 0.00011612838562695772, "loss": 12.2282, "step": 17078 }, { "epoch": 0.930019807641257, "grad_norm": 0.5181596944895872, "learning_rate": 0.0001161196827542083, "loss": 12.1033, "step": 17079 }, { "epoch": 0.9300742616378401, "grad_norm": 0.5877125272973959, "learning_rate": 0.00011611097975610959, "loss": 12.1928, "step": 17080 }, { "epoch": 0.9301287156344231, "grad_norm": 0.5048991643199667, "learning_rate": 0.00011610227663272917, "loss": 12.1095, "step": 17081 }, { "epoch": 0.9301831696310061, "grad_norm": 0.6103487489415191, "learning_rate": 0.00011609357338413476, "loss": 11.9787, "step": 17082 }, { "epoch": 0.9302376236275891, "grad_norm": 0.5328646129693538, "learning_rate": 0.000116084870010394, "loss": 12.1755, "step": 17083 }, { "epoch": 0.9302920776241721, "grad_norm": 0.6728846970846543, "learning_rate": 0.00011607616651157461, "loss": 12.0957, "step": 17084 }, { "epoch": 0.9303465316207551, "grad_norm": 0.5457521547207682, "learning_rate": 0.00011606746288774426, "loss": 12.2521, "step": 17085 }, { "epoch": 0.9304009856173382, "grad_norm": 0.5770038977786668, "learning_rate": 0.00011605875913897062, "loss": 12.0996, "step": 17086 }, { "epoch": 0.9304554396139212, "grad_norm": 0.5414805298595619, "learning_rate": 0.00011605005526532136, "loss": 12.2006, "step": 17087 }, { "epoch": 0.9305098936105042, "grad_norm": 0.551139919852669, "learning_rate": 0.0001160413512668642, "loss": 12.1229, "step": 17088 }, { "epoch": 0.9305643476070872, "grad_norm": 0.5455866926700138, "learning_rate": 0.00011603264714366677, "loss": 12.082, "step": 17089 }, { "epoch": 0.9306188016036702, "grad_norm": 0.5882208100494172, "learning_rate": 0.00011602394289579681, "loss": 12.1895, "step": 17090 }, { "epoch": 0.9306732556002532, "grad_norm": 0.5537036790854779, "learning_rate": 0.00011601523852332199, "loss": 12.1104, "step": 17091 }, { "epoch": 0.9307277095968363, "grad_norm": 0.5617690974975039, "learning_rate": 0.00011600653402630999, "loss": 12.2329, "step": 17092 }, { "epoch": 0.9307821635934193, "grad_norm": 0.5464658860363083, "learning_rate": 0.00011599782940482849, "loss": 12.1214, "step": 17093 }, { "epoch": 0.9308366175900022, "grad_norm": 0.5451502554132573, "learning_rate": 0.00011598912465894518, "loss": 12.129, "step": 17094 }, { "epoch": 0.9308910715865852, "grad_norm": 0.5438190252079544, "learning_rate": 0.00011598041978872776, "loss": 12.102, "step": 17095 }, { "epoch": 0.9309455255831682, "grad_norm": 0.5555309407005641, "learning_rate": 0.00011597171479424391, "loss": 12.1562, "step": 17096 }, { "epoch": 0.9309999795797513, "grad_norm": 0.5117316313785095, "learning_rate": 0.00011596300967556132, "loss": 12.0907, "step": 17097 }, { "epoch": 0.9310544335763343, "grad_norm": 0.610060574719516, "learning_rate": 0.00011595430443274771, "loss": 12.1539, "step": 17098 }, { "epoch": 0.9311088875729173, "grad_norm": 0.674916055042679, "learning_rate": 0.00011594559906587075, "loss": 12.3322, "step": 17099 }, { "epoch": 0.9311633415695003, "grad_norm": 0.5116038869777758, "learning_rate": 0.00011593689357499813, "loss": 12.1525, "step": 17100 }, { "epoch": 0.9312177955660833, "grad_norm": 0.5640310944624323, "learning_rate": 0.00011592818796019756, "loss": 12.2007, "step": 17101 }, { "epoch": 0.9312722495626663, "grad_norm": 0.6681081203799246, "learning_rate": 0.00011591948222153672, "loss": 12.1388, "step": 17102 }, { "epoch": 0.9313267035592494, "grad_norm": 0.5026041518446537, "learning_rate": 0.00011591077635908332, "loss": 12.1392, "step": 17103 }, { "epoch": 0.9313811575558324, "grad_norm": 0.5451971935992687, "learning_rate": 0.00011590207037290506, "loss": 12.2014, "step": 17104 }, { "epoch": 0.9314356115524154, "grad_norm": 0.6192185932773268, "learning_rate": 0.00011589336426306963, "loss": 12.2781, "step": 17105 }, { "epoch": 0.9314900655489984, "grad_norm": 0.5562586554348979, "learning_rate": 0.00011588465802964472, "loss": 12.0791, "step": 17106 }, { "epoch": 0.9315445195455814, "grad_norm": 0.5452650641975968, "learning_rate": 0.00011587595167269807, "loss": 12.1655, "step": 17107 }, { "epoch": 0.9315989735421644, "grad_norm": 0.7624867306533909, "learning_rate": 0.00011586724519229734, "loss": 12.1808, "step": 17108 }, { "epoch": 0.9316534275387475, "grad_norm": 0.6006007027077219, "learning_rate": 0.00011585853858851026, "loss": 12.1602, "step": 17109 }, { "epoch": 0.9317078815353305, "grad_norm": 0.5315077069002945, "learning_rate": 0.00011584983186140453, "loss": 12.2366, "step": 17110 }, { "epoch": 0.9317623355319135, "grad_norm": 0.6186125620931717, "learning_rate": 0.00011584112501104785, "loss": 12.1332, "step": 17111 }, { "epoch": 0.9318167895284964, "grad_norm": 0.5338508517958055, "learning_rate": 0.0001158324180375079, "loss": 12.1224, "step": 17112 }, { "epoch": 0.9318712435250794, "grad_norm": 0.5366362161509145, "learning_rate": 0.00011582371094085243, "loss": 12.1756, "step": 17113 }, { "epoch": 0.9319256975216624, "grad_norm": 0.5225751437955348, "learning_rate": 0.00011581500372114912, "loss": 12.1414, "step": 17114 }, { "epoch": 0.9319801515182455, "grad_norm": 0.5488376445852008, "learning_rate": 0.00011580629637846572, "loss": 12.2341, "step": 17115 }, { "epoch": 0.9320346055148285, "grad_norm": 0.6082275491325198, "learning_rate": 0.00011579758891286988, "loss": 12.1232, "step": 17116 }, { "epoch": 0.9320890595114115, "grad_norm": 0.7465683112808241, "learning_rate": 0.00011578888132442935, "loss": 12.1643, "step": 17117 }, { "epoch": 0.9321435135079945, "grad_norm": 0.5606054589322913, "learning_rate": 0.00011578017361321183, "loss": 12.0169, "step": 17118 }, { "epoch": 0.9321979675045775, "grad_norm": 0.48457016303443323, "learning_rate": 0.00011577146577928501, "loss": 12.2196, "step": 17119 }, { "epoch": 0.9322524215011605, "grad_norm": 0.6522209736071727, "learning_rate": 0.00011576275782271666, "loss": 12.1335, "step": 17120 }, { "epoch": 0.9323068754977436, "grad_norm": 0.5705310071264384, "learning_rate": 0.00011575404974357447, "loss": 12.1468, "step": 17121 }, { "epoch": 0.9323613294943266, "grad_norm": 0.6997733089409353, "learning_rate": 0.00011574534154192611, "loss": 12.2644, "step": 17122 }, { "epoch": 0.9324157834909096, "grad_norm": 0.53727646261493, "learning_rate": 0.00011573663321783935, "loss": 12.2136, "step": 17123 }, { "epoch": 0.9324702374874926, "grad_norm": 0.5202735440742016, "learning_rate": 0.00011572792477138188, "loss": 12.0472, "step": 17124 }, { "epoch": 0.9325246914840756, "grad_norm": 0.5814386451768224, "learning_rate": 0.00011571921620262145, "loss": 12.1675, "step": 17125 }, { "epoch": 0.9325791454806586, "grad_norm": 0.597725688567498, "learning_rate": 0.00011571050751162577, "loss": 12.2002, "step": 17126 }, { "epoch": 0.9326335994772417, "grad_norm": 0.5559482868976506, "learning_rate": 0.00011570179869846252, "loss": 12.1005, "step": 17127 }, { "epoch": 0.9326880534738247, "grad_norm": 0.5902442834706637, "learning_rate": 0.00011569308976319946, "loss": 12.2999, "step": 17128 }, { "epoch": 0.9327425074704077, "grad_norm": 0.6024852485207571, "learning_rate": 0.00011568438070590429, "loss": 12.2589, "step": 17129 }, { "epoch": 0.9327969614669906, "grad_norm": 0.5433397289395396, "learning_rate": 0.00011567567152664476, "loss": 12.2047, "step": 17130 }, { "epoch": 0.9328514154635736, "grad_norm": 0.5315996546762495, "learning_rate": 0.00011566696222548858, "loss": 12.1595, "step": 17131 }, { "epoch": 0.9329058694601567, "grad_norm": 0.6238510583867447, "learning_rate": 0.00011565825280250348, "loss": 12.037, "step": 17132 }, { "epoch": 0.9329603234567397, "grad_norm": 0.5691266977985826, "learning_rate": 0.00011564954325775714, "loss": 12.2516, "step": 17133 }, { "epoch": 0.9330147774533227, "grad_norm": 0.5831989456009242, "learning_rate": 0.00011564083359131736, "loss": 12.2016, "step": 17134 }, { "epoch": 0.9330692314499057, "grad_norm": 0.5385068948884913, "learning_rate": 0.00011563212380325184, "loss": 12.0845, "step": 17135 }, { "epoch": 0.9331236854464887, "grad_norm": 0.5410471288548246, "learning_rate": 0.00011562341389362831, "loss": 12.16, "step": 17136 }, { "epoch": 0.9331781394430717, "grad_norm": 0.5636545198374513, "learning_rate": 0.00011561470386251449, "loss": 12.2888, "step": 17137 }, { "epoch": 0.9332325934396548, "grad_norm": 0.5218132981958773, "learning_rate": 0.00011560599370997812, "loss": 12.113, "step": 17138 }, { "epoch": 0.9332870474362378, "grad_norm": 0.5833455834743435, "learning_rate": 0.00011559728343608687, "loss": 12.0584, "step": 17139 }, { "epoch": 0.9333415014328208, "grad_norm": 0.5390141802259124, "learning_rate": 0.00011558857304090858, "loss": 12.0942, "step": 17140 }, { "epoch": 0.9333959554294038, "grad_norm": 0.5172155285648882, "learning_rate": 0.00011557986252451095, "loss": 12.1737, "step": 17141 }, { "epoch": 0.9334504094259868, "grad_norm": 0.5277277962681094, "learning_rate": 0.00011557115188696163, "loss": 11.991, "step": 17142 }, { "epoch": 0.9335048634225698, "grad_norm": 0.5706722403948381, "learning_rate": 0.00011556244112832848, "loss": 12.0166, "step": 17143 }, { "epoch": 0.9335593174191529, "grad_norm": 0.5892789767614426, "learning_rate": 0.00011555373024867912, "loss": 12.2115, "step": 17144 }, { "epoch": 0.9336137714157359, "grad_norm": 0.575780453957793, "learning_rate": 0.00011554501924808139, "loss": 12.0539, "step": 17145 }, { "epoch": 0.9336682254123189, "grad_norm": 0.5678263145783361, "learning_rate": 0.00011553630812660298, "loss": 12.1123, "step": 17146 }, { "epoch": 0.9337226794089019, "grad_norm": 0.5739674121461581, "learning_rate": 0.00011552759688431165, "loss": 12.143, "step": 17147 }, { "epoch": 0.9337771334054848, "grad_norm": 0.523763579277213, "learning_rate": 0.0001155188855212751, "loss": 12.0914, "step": 17148 }, { "epoch": 0.9338315874020678, "grad_norm": 0.5190423460737542, "learning_rate": 0.00011551017403756108, "loss": 11.9937, "step": 17149 }, { "epoch": 0.933886041398651, "grad_norm": 0.5127265184209078, "learning_rate": 0.00011550146243323734, "loss": 12.0303, "step": 17150 }, { "epoch": 0.9339404953952339, "grad_norm": 0.6086177874858811, "learning_rate": 0.00011549275070837167, "loss": 12.2432, "step": 17151 }, { "epoch": 0.9339949493918169, "grad_norm": 0.5150423244136452, "learning_rate": 0.00011548403886303176, "loss": 12.1052, "step": 17152 }, { "epoch": 0.9340494033883999, "grad_norm": 0.546345240118378, "learning_rate": 0.00011547532689728535, "loss": 12.1896, "step": 17153 }, { "epoch": 0.9341038573849829, "grad_norm": 0.6130336189200207, "learning_rate": 0.0001154666148112002, "loss": 12.0921, "step": 17154 }, { "epoch": 0.9341583113815659, "grad_norm": 0.5476637616569271, "learning_rate": 0.00011545790260484409, "loss": 12.0947, "step": 17155 }, { "epoch": 0.934212765378149, "grad_norm": 0.5547513808519101, "learning_rate": 0.00011544919027828472, "loss": 12.0596, "step": 17156 }, { "epoch": 0.934267219374732, "grad_norm": 0.5931607387938834, "learning_rate": 0.00011544047783158984, "loss": 12.1686, "step": 17157 }, { "epoch": 0.934321673371315, "grad_norm": 0.599223671621129, "learning_rate": 0.00011543176526482722, "loss": 12.0938, "step": 17158 }, { "epoch": 0.934376127367898, "grad_norm": 0.565651878471124, "learning_rate": 0.00011542305257806463, "loss": 12.1767, "step": 17159 }, { "epoch": 0.934430581364481, "grad_norm": 0.5577459724178409, "learning_rate": 0.00011541433977136977, "loss": 12.078, "step": 17160 }, { "epoch": 0.934485035361064, "grad_norm": 0.5095102241397735, "learning_rate": 0.00011540562684481042, "loss": 12.1707, "step": 17161 }, { "epoch": 0.9345394893576471, "grad_norm": 0.568478015192735, "learning_rate": 0.00011539691379845435, "loss": 12.0611, "step": 17162 }, { "epoch": 0.9345939433542301, "grad_norm": 0.5619916172790094, "learning_rate": 0.00011538820063236928, "loss": 12.2408, "step": 17163 }, { "epoch": 0.9346483973508131, "grad_norm": 0.5480443047733544, "learning_rate": 0.00011537948734662299, "loss": 12.025, "step": 17164 }, { "epoch": 0.9347028513473961, "grad_norm": 0.49224784515548436, "learning_rate": 0.00011537077394128321, "loss": 12.029, "step": 17165 }, { "epoch": 0.934757305343979, "grad_norm": 0.49048885080856797, "learning_rate": 0.00011536206041641776, "loss": 12.1366, "step": 17166 }, { "epoch": 0.9348117593405622, "grad_norm": 0.519980158503926, "learning_rate": 0.00011535334677209431, "loss": 12.0942, "step": 17167 }, { "epoch": 0.9348662133371451, "grad_norm": 0.5725900985707221, "learning_rate": 0.00011534463300838067, "loss": 12.2885, "step": 17168 }, { "epoch": 0.9349206673337281, "grad_norm": 0.5930581777586704, "learning_rate": 0.0001153359191253446, "loss": 12.1438, "step": 17169 }, { "epoch": 0.9349751213303111, "grad_norm": 0.592854074271066, "learning_rate": 0.00011532720512305384, "loss": 12.1531, "step": 17170 }, { "epoch": 0.9350295753268941, "grad_norm": 0.5424309855938475, "learning_rate": 0.00011531849100157617, "loss": 12.1337, "step": 17171 }, { "epoch": 0.9350840293234771, "grad_norm": 0.5206493011432844, "learning_rate": 0.00011530977676097934, "loss": 12.1362, "step": 17172 }, { "epoch": 0.9351384833200602, "grad_norm": 0.5947327816436266, "learning_rate": 0.00011530106240133112, "loss": 12.1783, "step": 17173 }, { "epoch": 0.9351929373166432, "grad_norm": 0.5273619577220753, "learning_rate": 0.00011529234792269927, "loss": 12.0998, "step": 17174 }, { "epoch": 0.9352473913132262, "grad_norm": 0.5809187372992236, "learning_rate": 0.00011528363332515155, "loss": 11.9999, "step": 17175 }, { "epoch": 0.9353018453098092, "grad_norm": 0.5708666708407913, "learning_rate": 0.00011527491860875575, "loss": 12.1564, "step": 17176 }, { "epoch": 0.9353562993063922, "grad_norm": 0.5071641418210248, "learning_rate": 0.00011526620377357961, "loss": 12.0695, "step": 17177 }, { "epoch": 0.9354107533029752, "grad_norm": 0.5717476277001297, "learning_rate": 0.00011525748881969091, "loss": 12.1292, "step": 17178 }, { "epoch": 0.9354652072995583, "grad_norm": 0.48412158610907385, "learning_rate": 0.00011524877374715743, "loss": 12.0474, "step": 17179 }, { "epoch": 0.9355196612961413, "grad_norm": 0.6004876501095229, "learning_rate": 0.00011524005855604692, "loss": 12.107, "step": 17180 }, { "epoch": 0.9355741152927243, "grad_norm": 0.5485097559377725, "learning_rate": 0.0001152313432464272, "loss": 11.9742, "step": 17181 }, { "epoch": 0.9356285692893073, "grad_norm": 0.5582121852771219, "learning_rate": 0.00011522262781836593, "loss": 12.0482, "step": 17182 }, { "epoch": 0.9356830232858903, "grad_norm": 0.6497945649112843, "learning_rate": 0.000115213912271931, "loss": 12.0386, "step": 17183 }, { "epoch": 0.9357374772824733, "grad_norm": 0.5430460869746485, "learning_rate": 0.00011520519660719009, "loss": 12.0862, "step": 17184 }, { "epoch": 0.9357919312790564, "grad_norm": 0.5625820920059277, "learning_rate": 0.00011519648082421107, "loss": 12.1591, "step": 17185 }, { "epoch": 0.9358463852756393, "grad_norm": 0.6288034785425524, "learning_rate": 0.00011518776492306167, "loss": 12.2041, "step": 17186 }, { "epoch": 0.9359008392722223, "grad_norm": 0.5300944084181654, "learning_rate": 0.00011517904890380963, "loss": 12.162, "step": 17187 }, { "epoch": 0.9359552932688053, "grad_norm": 0.5571033320051584, "learning_rate": 0.00011517033276652276, "loss": 11.9701, "step": 17188 }, { "epoch": 0.9360097472653883, "grad_norm": 0.6130325143516594, "learning_rate": 0.00011516161651126884, "loss": 12.077, "step": 17189 }, { "epoch": 0.9360642012619713, "grad_norm": 0.5379809855893798, "learning_rate": 0.00011515290013811565, "loss": 12.1274, "step": 17190 }, { "epoch": 0.9361186552585544, "grad_norm": 0.5765900320169567, "learning_rate": 0.00011514418364713102, "loss": 12.1006, "step": 17191 }, { "epoch": 0.9361731092551374, "grad_norm": 0.5541974507656546, "learning_rate": 0.00011513546703838263, "loss": 12.2088, "step": 17192 }, { "epoch": 0.9362275632517204, "grad_norm": 0.6294734378681487, "learning_rate": 0.0001151267503119383, "loss": 12.2842, "step": 17193 }, { "epoch": 0.9362820172483034, "grad_norm": 0.4899631038419695, "learning_rate": 0.00011511803346786583, "loss": 12.1487, "step": 17194 }, { "epoch": 0.9363364712448864, "grad_norm": 0.6317754888221799, "learning_rate": 0.000115109316506233, "loss": 12.2118, "step": 17195 }, { "epoch": 0.9363909252414695, "grad_norm": 0.577960628241482, "learning_rate": 0.00011510059942710762, "loss": 12.0954, "step": 17196 }, { "epoch": 0.9364453792380525, "grad_norm": 0.5164315592205132, "learning_rate": 0.00011509188223055742, "loss": 12.0404, "step": 17197 }, { "epoch": 0.9364998332346355, "grad_norm": 0.5667785249426822, "learning_rate": 0.00011508316491665021, "loss": 12.1055, "step": 17198 }, { "epoch": 0.9365542872312185, "grad_norm": 0.5659445918451027, "learning_rate": 0.00011507444748545375, "loss": 12.0473, "step": 17199 }, { "epoch": 0.9366087412278015, "grad_norm": 0.5705775571743129, "learning_rate": 0.0001150657299370359, "loss": 12.1798, "step": 17200 }, { "epoch": 0.9366631952243845, "grad_norm": 0.5987243004380423, "learning_rate": 0.00011505701227146441, "loss": 12.0715, "step": 17201 }, { "epoch": 0.9367176492209676, "grad_norm": 0.5876089105774565, "learning_rate": 0.00011504829448880708, "loss": 12.0864, "step": 17202 }, { "epoch": 0.9367721032175506, "grad_norm": 0.5385016581645973, "learning_rate": 0.00011503957658913165, "loss": 12.1301, "step": 17203 }, { "epoch": 0.9368265572141335, "grad_norm": 0.6261101427354301, "learning_rate": 0.00011503085857250595, "loss": 12.2408, "step": 17204 }, { "epoch": 0.9368810112107165, "grad_norm": 0.7358535512856772, "learning_rate": 0.00011502214043899779, "loss": 12.2206, "step": 17205 }, { "epoch": 0.9369354652072995, "grad_norm": 0.5778289403753424, "learning_rate": 0.00011501342218867496, "loss": 12.141, "step": 17206 }, { "epoch": 0.9369899192038825, "grad_norm": 0.5286401393506325, "learning_rate": 0.00011500470382160524, "loss": 12.1795, "step": 17207 }, { "epoch": 0.9370443732004656, "grad_norm": 0.5701706314532361, "learning_rate": 0.0001149959853378564, "loss": 12.1545, "step": 17208 }, { "epoch": 0.9370988271970486, "grad_norm": 0.5960299180323986, "learning_rate": 0.0001149872667374963, "loss": 12.1785, "step": 17209 }, { "epoch": 0.9371532811936316, "grad_norm": 0.5829854024390061, "learning_rate": 0.00011497854802059265, "loss": 12.1051, "step": 17210 }, { "epoch": 0.9372077351902146, "grad_norm": 0.5881724224735968, "learning_rate": 0.00011496982918721333, "loss": 12.2929, "step": 17211 }, { "epoch": 0.9372621891867976, "grad_norm": 0.5541811460465499, "learning_rate": 0.00011496111023742611, "loss": 12.2226, "step": 17212 }, { "epoch": 0.9373166431833806, "grad_norm": 0.5486564396126202, "learning_rate": 0.0001149523911712988, "loss": 12.1091, "step": 17213 }, { "epoch": 0.9373710971799637, "grad_norm": 0.6097862643791315, "learning_rate": 0.00011494367198889915, "loss": 12.155, "step": 17214 }, { "epoch": 0.9374255511765467, "grad_norm": 0.545545428506642, "learning_rate": 0.00011493495269029501, "loss": 12.0725, "step": 17215 }, { "epoch": 0.9374800051731297, "grad_norm": 0.609667211079781, "learning_rate": 0.0001149262332755542, "loss": 12.0824, "step": 17216 }, { "epoch": 0.9375344591697127, "grad_norm": 0.5271418269924881, "learning_rate": 0.00011491751374474447, "loss": 12.1602, "step": 17217 }, { "epoch": 0.9375889131662957, "grad_norm": 0.5761514603166165, "learning_rate": 0.00011490879409793367, "loss": 12.1334, "step": 17218 }, { "epoch": 0.9376433671628787, "grad_norm": 0.5680582296778267, "learning_rate": 0.00011490007433518956, "loss": 12.1941, "step": 17219 }, { "epoch": 0.9376978211594618, "grad_norm": 0.6078517328718738, "learning_rate": 0.00011489135445658001, "loss": 12.0645, "step": 17220 }, { "epoch": 0.9377522751560448, "grad_norm": 0.509081853414556, "learning_rate": 0.00011488263446217278, "loss": 12.0702, "step": 17221 }, { "epoch": 0.9378067291526277, "grad_norm": 0.5501102501809502, "learning_rate": 0.00011487391435203568, "loss": 12.0714, "step": 17222 }, { "epoch": 0.9378611831492107, "grad_norm": 0.5744126645045073, "learning_rate": 0.00011486519412623654, "loss": 12.1208, "step": 17223 }, { "epoch": 0.9379156371457937, "grad_norm": 0.5587439549384924, "learning_rate": 0.00011485647378484312, "loss": 12.1202, "step": 17224 }, { "epoch": 0.9379700911423767, "grad_norm": 0.561473849628231, "learning_rate": 0.0001148477533279233, "loss": 12.1321, "step": 17225 }, { "epoch": 0.9380245451389598, "grad_norm": 0.5659402032513542, "learning_rate": 0.00011483903275554486, "loss": 12.0077, "step": 17226 }, { "epoch": 0.9380789991355428, "grad_norm": 0.5377675953612329, "learning_rate": 0.00011483031206777562, "loss": 12.1217, "step": 17227 }, { "epoch": 0.9381334531321258, "grad_norm": 0.5242989901422854, "learning_rate": 0.00011482159126468338, "loss": 12.1191, "step": 17228 }, { "epoch": 0.9381879071287088, "grad_norm": 0.5297059636906761, "learning_rate": 0.00011481287034633595, "loss": 12.09, "step": 17229 }, { "epoch": 0.9382423611252918, "grad_norm": 0.5568369256020095, "learning_rate": 0.00011480414931280114, "loss": 12.2279, "step": 17230 }, { "epoch": 0.9382968151218749, "grad_norm": 0.5244586564878074, "learning_rate": 0.00011479542816414681, "loss": 12.1117, "step": 17231 }, { "epoch": 0.9383512691184579, "grad_norm": 0.5413030521735377, "learning_rate": 0.00011478670690044075, "loss": 12.3001, "step": 17232 }, { "epoch": 0.9384057231150409, "grad_norm": 0.5701559086699557, "learning_rate": 0.00011477798552175076, "loss": 12.208, "step": 17233 }, { "epoch": 0.9384601771116239, "grad_norm": 0.512633275163757, "learning_rate": 0.0001147692640281447, "loss": 12.1601, "step": 17234 }, { "epoch": 0.9385146311082069, "grad_norm": 0.5953874673114348, "learning_rate": 0.00011476054241969035, "loss": 12.2078, "step": 17235 }, { "epoch": 0.9385690851047899, "grad_norm": 0.5853808960841238, "learning_rate": 0.00011475182069645556, "loss": 12.1906, "step": 17236 }, { "epoch": 0.938623539101373, "grad_norm": 0.5601311855180856, "learning_rate": 0.00011474309885850811, "loss": 12.1287, "step": 17237 }, { "epoch": 0.938677993097956, "grad_norm": 0.5993874750692453, "learning_rate": 0.00011473437690591589, "loss": 12.3005, "step": 17238 }, { "epoch": 0.938732447094539, "grad_norm": 0.5524879737952889, "learning_rate": 0.00011472565483874665, "loss": 12.2526, "step": 17239 }, { "epoch": 0.938786901091122, "grad_norm": 0.6498326786926967, "learning_rate": 0.00011471693265706827, "loss": 12.1835, "step": 17240 }, { "epoch": 0.9388413550877049, "grad_norm": 0.547793442948194, "learning_rate": 0.00011470821036094856, "loss": 12.0828, "step": 17241 }, { "epoch": 0.9388958090842879, "grad_norm": 0.5926849825805017, "learning_rate": 0.0001146994879504553, "loss": 12.305, "step": 17242 }, { "epoch": 0.938950263080871, "grad_norm": 0.5139655451009593, "learning_rate": 0.00011469076542565637, "loss": 12.0733, "step": 17243 }, { "epoch": 0.939004717077454, "grad_norm": 0.5806839240526883, "learning_rate": 0.00011468204278661961, "loss": 12.0816, "step": 17244 }, { "epoch": 0.939059171074037, "grad_norm": 0.5360932660865105, "learning_rate": 0.00011467332003341281, "loss": 12.1387, "step": 17245 }, { "epoch": 0.93911362507062, "grad_norm": 0.5959651623575238, "learning_rate": 0.00011466459716610382, "loss": 12.1715, "step": 17246 }, { "epoch": 0.939168079067203, "grad_norm": 0.49148132757862095, "learning_rate": 0.00011465587418476047, "loss": 12.1486, "step": 17247 }, { "epoch": 0.939222533063786, "grad_norm": 0.5828766323037756, "learning_rate": 0.00011464715108945055, "loss": 12.1267, "step": 17248 }, { "epoch": 0.9392769870603691, "grad_norm": 0.556492706857238, "learning_rate": 0.00011463842788024192, "loss": 12.1077, "step": 17249 }, { "epoch": 0.9393314410569521, "grad_norm": 0.529851964618464, "learning_rate": 0.00011462970455720246, "loss": 12.0681, "step": 17250 }, { "epoch": 0.9393858950535351, "grad_norm": 0.5119082388277009, "learning_rate": 0.00011462098112039997, "loss": 12.1816, "step": 17251 }, { "epoch": 0.9394403490501181, "grad_norm": 0.5067629636196034, "learning_rate": 0.00011461225756990226, "loss": 12.1203, "step": 17252 }, { "epoch": 0.9394948030467011, "grad_norm": 0.5617860450760956, "learning_rate": 0.00011460353390577716, "loss": 12.2252, "step": 17253 }, { "epoch": 0.9395492570432841, "grad_norm": 0.4995096624139038, "learning_rate": 0.00011459481012809256, "loss": 12.1493, "step": 17254 }, { "epoch": 0.9396037110398672, "grad_norm": 0.5723783085188626, "learning_rate": 0.00011458608623691627, "loss": 12.1434, "step": 17255 }, { "epoch": 0.9396581650364502, "grad_norm": 0.6267075493003642, "learning_rate": 0.00011457736223231612, "loss": 12.1304, "step": 17256 }, { "epoch": 0.9397126190330332, "grad_norm": 0.5694948368007134, "learning_rate": 0.00011456863811435998, "loss": 12.0896, "step": 17257 }, { "epoch": 0.9397670730296162, "grad_norm": 0.5344020892925807, "learning_rate": 0.00011455991388311564, "loss": 12.2124, "step": 17258 }, { "epoch": 0.9398215270261991, "grad_norm": 0.542197920612981, "learning_rate": 0.00011455118953865096, "loss": 12.1603, "step": 17259 }, { "epoch": 0.9398759810227821, "grad_norm": 0.5471556562301783, "learning_rate": 0.00011454246508103379, "loss": 12.0228, "step": 17260 }, { "epoch": 0.9399304350193652, "grad_norm": 0.5374465446352593, "learning_rate": 0.00011453374051033199, "loss": 12.2189, "step": 17261 }, { "epoch": 0.9399848890159482, "grad_norm": 0.5364038580856191, "learning_rate": 0.00011452501582661341, "loss": 12.2213, "step": 17262 }, { "epoch": 0.9400393430125312, "grad_norm": 0.5654931347362897, "learning_rate": 0.00011451629102994583, "loss": 12.1549, "step": 17263 }, { "epoch": 0.9400937970091142, "grad_norm": 0.6023270565342111, "learning_rate": 0.00011450756612039715, "loss": 12.0658, "step": 17264 }, { "epoch": 0.9401482510056972, "grad_norm": 0.5695175187289669, "learning_rate": 0.00011449884109803519, "loss": 12.2456, "step": 17265 }, { "epoch": 0.9402027050022803, "grad_norm": 0.6502306398488362, "learning_rate": 0.00011449011596292783, "loss": 12.2209, "step": 17266 }, { "epoch": 0.9402571589988633, "grad_norm": 0.5582934648368669, "learning_rate": 0.0001144813907151429, "loss": 12.1111, "step": 17267 }, { "epoch": 0.9403116129954463, "grad_norm": 0.6445743452834216, "learning_rate": 0.00011447266535474824, "loss": 12.1412, "step": 17268 }, { "epoch": 0.9403660669920293, "grad_norm": 0.5357891514697972, "learning_rate": 0.0001144639398818117, "loss": 12.1874, "step": 17269 }, { "epoch": 0.9404205209886123, "grad_norm": 0.5512280444023175, "learning_rate": 0.00011445521429640114, "loss": 12.1655, "step": 17270 }, { "epoch": 0.9404749749851953, "grad_norm": 0.625777175265489, "learning_rate": 0.0001144464885985844, "loss": 12.0704, "step": 17271 }, { "epoch": 0.9405294289817784, "grad_norm": 0.5525918408539039, "learning_rate": 0.00011443776278842937, "loss": 11.9734, "step": 17272 }, { "epoch": 0.9405838829783614, "grad_norm": 0.5754948191262231, "learning_rate": 0.00011442903686600386, "loss": 12.1846, "step": 17273 }, { "epoch": 0.9406383369749444, "grad_norm": 0.5790135252150729, "learning_rate": 0.00011442031083137574, "loss": 12.223, "step": 17274 }, { "epoch": 0.9406927909715274, "grad_norm": 0.5600864123416874, "learning_rate": 0.00011441158468461286, "loss": 12.0566, "step": 17275 }, { "epoch": 0.9407472449681104, "grad_norm": 0.5736911960390423, "learning_rate": 0.00011440285842578306, "loss": 12.1032, "step": 17276 }, { "epoch": 0.9408016989646933, "grad_norm": 0.5543856612987784, "learning_rate": 0.00011439413205495428, "loss": 12.0983, "step": 17277 }, { "epoch": 0.9408561529612764, "grad_norm": 0.5984601213406878, "learning_rate": 0.00011438540557219429, "loss": 12.2118, "step": 17278 }, { "epoch": 0.9409106069578594, "grad_norm": 0.5530215820344018, "learning_rate": 0.00011437667897757093, "loss": 12.2162, "step": 17279 }, { "epoch": 0.9409650609544424, "grad_norm": 0.5248977660305737, "learning_rate": 0.00011436795227115216, "loss": 12.1515, "step": 17280 }, { "epoch": 0.9410195149510254, "grad_norm": 0.5153105000297468, "learning_rate": 0.00011435922545300572, "loss": 12.0293, "step": 17281 }, { "epoch": 0.9410739689476084, "grad_norm": 0.5206047845654533, "learning_rate": 0.00011435049852319961, "loss": 12.1381, "step": 17282 }, { "epoch": 0.9411284229441914, "grad_norm": 0.5185415528814564, "learning_rate": 0.00011434177148180159, "loss": 12.0153, "step": 17283 }, { "epoch": 0.9411828769407745, "grad_norm": 0.5520501755180541, "learning_rate": 0.00011433304432887952, "loss": 12.1841, "step": 17284 }, { "epoch": 0.9412373309373575, "grad_norm": 0.6256029817077796, "learning_rate": 0.00011432431706450133, "loss": 12.119, "step": 17285 }, { "epoch": 0.9412917849339405, "grad_norm": 0.5374855220323205, "learning_rate": 0.00011431558968873482, "loss": 12.198, "step": 17286 }, { "epoch": 0.9413462389305235, "grad_norm": 0.5433292626029383, "learning_rate": 0.0001143068622016479, "loss": 12.0907, "step": 17287 }, { "epoch": 0.9414006929271065, "grad_norm": 0.5521975390496797, "learning_rate": 0.00011429813460330841, "loss": 12.1391, "step": 17288 }, { "epoch": 0.9414551469236895, "grad_norm": 0.5429237677242953, "learning_rate": 0.00011428940689378423, "loss": 12.1754, "step": 17289 }, { "epoch": 0.9415096009202726, "grad_norm": 0.6030906254884438, "learning_rate": 0.00011428067907314324, "loss": 12.2256, "step": 17290 }, { "epoch": 0.9415640549168556, "grad_norm": 0.5743037190203135, "learning_rate": 0.00011427195114145328, "loss": 11.9909, "step": 17291 }, { "epoch": 0.9416185089134386, "grad_norm": 0.5468077782858237, "learning_rate": 0.00011426322309878223, "loss": 12.0723, "step": 17292 }, { "epoch": 0.9416729629100216, "grad_norm": 0.5599611855486325, "learning_rate": 0.00011425449494519798, "loss": 12.0496, "step": 17293 }, { "epoch": 0.9417274169066046, "grad_norm": 0.5863866721261957, "learning_rate": 0.00011424576668076838, "loss": 12.0658, "step": 17294 }, { "epoch": 0.9417818709031875, "grad_norm": 0.562459945614839, "learning_rate": 0.00011423703830556132, "loss": 12.1998, "step": 17295 }, { "epoch": 0.9418363248997706, "grad_norm": 0.5531156188848322, "learning_rate": 0.00011422830981964465, "loss": 12.1009, "step": 17296 }, { "epoch": 0.9418907788963536, "grad_norm": 0.5583643188921626, "learning_rate": 0.00011421958122308625, "loss": 12.1192, "step": 17297 }, { "epoch": 0.9419452328929366, "grad_norm": 0.568445649499285, "learning_rate": 0.00011421085251595402, "loss": 12.1539, "step": 17298 }, { "epoch": 0.9419996868895196, "grad_norm": 0.5543496850795123, "learning_rate": 0.00011420212369831579, "loss": 12.1915, "step": 17299 }, { "epoch": 0.9420541408861026, "grad_norm": 0.6451922077957235, "learning_rate": 0.0001141933947702395, "loss": 12.1941, "step": 17300 }, { "epoch": 0.9421085948826857, "grad_norm": 0.5803207346318053, "learning_rate": 0.00011418466573179297, "loss": 12.1449, "step": 17301 }, { "epoch": 0.9421630488792687, "grad_norm": 0.5794351050123866, "learning_rate": 0.00011417593658304411, "loss": 12.1409, "step": 17302 }, { "epoch": 0.9422175028758517, "grad_norm": 0.6212903231713002, "learning_rate": 0.00011416720732406078, "loss": 12.2715, "step": 17303 }, { "epoch": 0.9422719568724347, "grad_norm": 0.550008672363222, "learning_rate": 0.00011415847795491088, "loss": 11.8332, "step": 17304 }, { "epoch": 0.9423264108690177, "grad_norm": 0.5297503330256259, "learning_rate": 0.00011414974847566226, "loss": 11.9181, "step": 17305 }, { "epoch": 0.9423808648656007, "grad_norm": 0.5472197784649815, "learning_rate": 0.00011414101888638284, "loss": 12.0817, "step": 17306 }, { "epoch": 0.9424353188621838, "grad_norm": 0.561480153899593, "learning_rate": 0.00011413228918714051, "loss": 12.2081, "step": 17307 }, { "epoch": 0.9424897728587668, "grad_norm": 0.5529541879321098, "learning_rate": 0.0001141235593780031, "loss": 12.109, "step": 17308 }, { "epoch": 0.9425442268553498, "grad_norm": 0.6121092589831865, "learning_rate": 0.00011411482945903853, "loss": 12.1352, "step": 17309 }, { "epoch": 0.9425986808519328, "grad_norm": 0.5248546324420222, "learning_rate": 0.00011410609943031467, "loss": 12.1414, "step": 17310 }, { "epoch": 0.9426531348485158, "grad_norm": 0.5544470836079637, "learning_rate": 0.00011409736929189943, "loss": 12.1372, "step": 17311 }, { "epoch": 0.9427075888450988, "grad_norm": 0.6278726221777126, "learning_rate": 0.00011408863904386068, "loss": 12.2407, "step": 17312 }, { "epoch": 0.9427620428416819, "grad_norm": 0.5198308450201492, "learning_rate": 0.0001140799086862663, "loss": 12.0772, "step": 17313 }, { "epoch": 0.9428164968382649, "grad_norm": 0.6000937497366587, "learning_rate": 0.00011407117821918419, "loss": 12.1906, "step": 17314 }, { "epoch": 0.9428709508348478, "grad_norm": 0.504756532291079, "learning_rate": 0.00011406244764268223, "loss": 12.0159, "step": 17315 }, { "epoch": 0.9429254048314308, "grad_norm": 0.547540736074649, "learning_rate": 0.00011405371695682834, "loss": 12.2065, "step": 17316 }, { "epoch": 0.9429798588280138, "grad_norm": 0.5870598282344961, "learning_rate": 0.00011404498616169039, "loss": 12.247, "step": 17317 }, { "epoch": 0.9430343128245968, "grad_norm": 0.6027365008204192, "learning_rate": 0.00011403625525733628, "loss": 12.1875, "step": 17318 }, { "epoch": 0.9430887668211799, "grad_norm": 0.6294453674112667, "learning_rate": 0.00011402752424383385, "loss": 12.1007, "step": 17319 }, { "epoch": 0.9431432208177629, "grad_norm": 0.60501481424706, "learning_rate": 0.00011401879312125108, "loss": 12.2884, "step": 17320 }, { "epoch": 0.9431976748143459, "grad_norm": 0.5559023358331184, "learning_rate": 0.0001140100618896558, "loss": 12.1927, "step": 17321 }, { "epoch": 0.9432521288109289, "grad_norm": 0.49646878847966375, "learning_rate": 0.00011400133054911597, "loss": 12.0381, "step": 17322 }, { "epoch": 0.9433065828075119, "grad_norm": 0.534139219927877, "learning_rate": 0.00011399259909969942, "loss": 12.1031, "step": 17323 }, { "epoch": 0.9433610368040949, "grad_norm": 0.5331067913106019, "learning_rate": 0.00011398386754147405, "loss": 12.1819, "step": 17324 }, { "epoch": 0.943415490800678, "grad_norm": 0.5361743784298774, "learning_rate": 0.00011397513587450779, "loss": 12.0945, "step": 17325 }, { "epoch": 0.943469944797261, "grad_norm": 0.5337030188842238, "learning_rate": 0.00011396640409886854, "loss": 12.1484, "step": 17326 }, { "epoch": 0.943524398793844, "grad_norm": 0.6535158266766455, "learning_rate": 0.00011395767221462421, "loss": 12.2561, "step": 17327 }, { "epoch": 0.943578852790427, "grad_norm": 0.5496197226487632, "learning_rate": 0.00011394894022184266, "loss": 12.0781, "step": 17328 }, { "epoch": 0.94363330678701, "grad_norm": 0.5964472425795224, "learning_rate": 0.0001139402081205918, "loss": 12.1233, "step": 17329 }, { "epoch": 0.9436877607835931, "grad_norm": 0.5233459287429948, "learning_rate": 0.00011393147591093954, "loss": 12.2202, "step": 17330 }, { "epoch": 0.9437422147801761, "grad_norm": 0.5396707815865741, "learning_rate": 0.00011392274359295381, "loss": 12.0967, "step": 17331 }, { "epoch": 0.943796668776759, "grad_norm": 0.5324413065684591, "learning_rate": 0.00011391401116670248, "loss": 12.097, "step": 17332 }, { "epoch": 0.943851122773342, "grad_norm": 0.518466699604795, "learning_rate": 0.00011390527863225349, "loss": 12.0776, "step": 17333 }, { "epoch": 0.943905576769925, "grad_norm": 0.6130210757512741, "learning_rate": 0.00011389654598967469, "loss": 12.174, "step": 17334 }, { "epoch": 0.943960030766508, "grad_norm": 0.5183145927597441, "learning_rate": 0.00011388781323903403, "loss": 12.1552, "step": 17335 }, { "epoch": 0.9440144847630911, "grad_norm": 0.5357415610664379, "learning_rate": 0.00011387908038039938, "loss": 12.1089, "step": 17336 }, { "epoch": 0.9440689387596741, "grad_norm": 0.5192848791681334, "learning_rate": 0.00011387034741383872, "loss": 12.0486, "step": 17337 }, { "epoch": 0.9441233927562571, "grad_norm": 0.5211735670403752, "learning_rate": 0.00011386161433941988, "loss": 12.2151, "step": 17338 }, { "epoch": 0.9441778467528401, "grad_norm": 0.5316633016104322, "learning_rate": 0.00011385288115721082, "loss": 12.0402, "step": 17339 }, { "epoch": 0.9442323007494231, "grad_norm": 0.5882106176391751, "learning_rate": 0.00011384414786727942, "loss": 12.1058, "step": 17340 }, { "epoch": 0.9442867547460061, "grad_norm": 0.6081795735550459, "learning_rate": 0.00011383541446969362, "loss": 12.1262, "step": 17341 }, { "epoch": 0.9443412087425892, "grad_norm": 0.50259690448361, "learning_rate": 0.0001138266809645213, "loss": 12.0509, "step": 17342 }, { "epoch": 0.9443956627391722, "grad_norm": 0.5571861587041949, "learning_rate": 0.0001138179473518304, "loss": 12.1127, "step": 17343 }, { "epoch": 0.9444501167357552, "grad_norm": 0.5622518601050132, "learning_rate": 0.00011380921363168882, "loss": 12.1726, "step": 17344 }, { "epoch": 0.9445045707323382, "grad_norm": 0.5339367631474244, "learning_rate": 0.00011380047980416449, "loss": 12.1181, "step": 17345 }, { "epoch": 0.9445590247289212, "grad_norm": 0.5763643587724595, "learning_rate": 0.00011379174586932528, "loss": 12.0472, "step": 17346 }, { "epoch": 0.9446134787255042, "grad_norm": 0.53330973830854, "learning_rate": 0.00011378301182723918, "loss": 12.0635, "step": 17347 }, { "epoch": 0.9446679327220873, "grad_norm": 0.5257490232366746, "learning_rate": 0.00011377427767797404, "loss": 12.1081, "step": 17348 }, { "epoch": 0.9447223867186703, "grad_norm": 0.6136431341982396, "learning_rate": 0.00011376554342159785, "loss": 12.2335, "step": 17349 }, { "epoch": 0.9447768407152533, "grad_norm": 0.546794308497035, "learning_rate": 0.00011375680905817844, "loss": 12.1004, "step": 17350 }, { "epoch": 0.9448312947118362, "grad_norm": 0.6030840671502765, "learning_rate": 0.00011374807458778378, "loss": 12.1877, "step": 17351 }, { "epoch": 0.9448857487084192, "grad_norm": 0.5508296104098631, "learning_rate": 0.00011373934001048181, "loss": 12.237, "step": 17352 }, { "epoch": 0.9449402027050022, "grad_norm": 0.5684494836431603, "learning_rate": 0.00011373060532634041, "loss": 12.0696, "step": 17353 }, { "epoch": 0.9449946567015853, "grad_norm": 0.5099595884068093, "learning_rate": 0.00011372187053542753, "loss": 12.0227, "step": 17354 }, { "epoch": 0.9450491106981683, "grad_norm": 0.6102015865848511, "learning_rate": 0.00011371313563781107, "loss": 12.2132, "step": 17355 }, { "epoch": 0.9451035646947513, "grad_norm": 0.5640477794515227, "learning_rate": 0.00011370440063355898, "loss": 12.1198, "step": 17356 }, { "epoch": 0.9451580186913343, "grad_norm": 0.6693167233760415, "learning_rate": 0.00011369566552273919, "loss": 12.268, "step": 17357 }, { "epoch": 0.9452124726879173, "grad_norm": 0.5674765937082062, "learning_rate": 0.0001136869303054196, "loss": 12.066, "step": 17358 }, { "epoch": 0.9452669266845003, "grad_norm": 0.5607735624035676, "learning_rate": 0.00011367819498166812, "loss": 12.1589, "step": 17359 }, { "epoch": 0.9453213806810834, "grad_norm": 0.5681497183526665, "learning_rate": 0.00011366945955155271, "loss": 12.0458, "step": 17360 }, { "epoch": 0.9453758346776664, "grad_norm": 0.5625421382395502, "learning_rate": 0.00011366072401514129, "loss": 12.0679, "step": 17361 }, { "epoch": 0.9454302886742494, "grad_norm": 0.5649134208134503, "learning_rate": 0.00011365198837250182, "loss": 12.1959, "step": 17362 }, { "epoch": 0.9454847426708324, "grad_norm": 0.5822452562809511, "learning_rate": 0.00011364325262370215, "loss": 12.2539, "step": 17363 }, { "epoch": 0.9455391966674154, "grad_norm": 0.6101939104399868, "learning_rate": 0.0001136345167688103, "loss": 11.9465, "step": 17364 }, { "epoch": 0.9455936506639985, "grad_norm": 0.5451417122742256, "learning_rate": 0.00011362578080789413, "loss": 12.1123, "step": 17365 }, { "epoch": 0.9456481046605815, "grad_norm": 0.5445237279344438, "learning_rate": 0.00011361704474102162, "loss": 12.102, "step": 17366 }, { "epoch": 0.9457025586571645, "grad_norm": 0.5558014321065162, "learning_rate": 0.0001136083085682607, "loss": 12.1779, "step": 17367 }, { "epoch": 0.9457570126537475, "grad_norm": 0.5443084486631766, "learning_rate": 0.00011359957228967926, "loss": 12.0391, "step": 17368 }, { "epoch": 0.9458114666503304, "grad_norm": 0.5223724320434633, "learning_rate": 0.00011359083590534527, "loss": 12.0774, "step": 17369 }, { "epoch": 0.9458659206469134, "grad_norm": 0.6259468637705189, "learning_rate": 0.00011358209941532668, "loss": 12.2839, "step": 17370 }, { "epoch": 0.9459203746434965, "grad_norm": 0.501076660026277, "learning_rate": 0.0001135733628196914, "loss": 12.1208, "step": 17371 }, { "epoch": 0.9459748286400795, "grad_norm": 0.5180692728575292, "learning_rate": 0.0001135646261185074, "loss": 12.0093, "step": 17372 }, { "epoch": 0.9460292826366625, "grad_norm": 0.48719332354599426, "learning_rate": 0.00011355588931184256, "loss": 12.1396, "step": 17373 }, { "epoch": 0.9460837366332455, "grad_norm": 0.6126639995736392, "learning_rate": 0.00011354715239976483, "loss": 12.271, "step": 17374 }, { "epoch": 0.9461381906298285, "grad_norm": 0.5393699437816664, "learning_rate": 0.00011353841538234221, "loss": 12.1046, "step": 17375 }, { "epoch": 0.9461926446264115, "grad_norm": 0.5420248069570972, "learning_rate": 0.00011352967825964259, "loss": 12.1104, "step": 17376 }, { "epoch": 0.9462470986229946, "grad_norm": 0.5331460613546519, "learning_rate": 0.00011352094103173394, "loss": 12.0273, "step": 17377 }, { "epoch": 0.9463015526195776, "grad_norm": 0.4752978198092573, "learning_rate": 0.00011351220369868416, "loss": 12.2514, "step": 17378 }, { "epoch": 0.9463560066161606, "grad_norm": 0.6074988970316852, "learning_rate": 0.00011350346626056121, "loss": 12.2263, "step": 17379 }, { "epoch": 0.9464104606127436, "grad_norm": 0.5837326630588333, "learning_rate": 0.00011349472871743306, "loss": 12.1478, "step": 17380 }, { "epoch": 0.9464649146093266, "grad_norm": 0.7559202053192198, "learning_rate": 0.00011348599106936762, "loss": 12.2578, "step": 17381 }, { "epoch": 0.9465193686059096, "grad_norm": 0.6222093173873497, "learning_rate": 0.00011347725331643289, "loss": 12.2416, "step": 17382 }, { "epoch": 0.9465738226024927, "grad_norm": 0.5169883056107297, "learning_rate": 0.00011346851545869674, "loss": 12.0261, "step": 17383 }, { "epoch": 0.9466282765990757, "grad_norm": 0.5574210783211934, "learning_rate": 0.00011345977749622718, "loss": 12.1017, "step": 17384 }, { "epoch": 0.9466827305956587, "grad_norm": 0.6661342012506033, "learning_rate": 0.0001134510394290921, "loss": 12.1888, "step": 17385 }, { "epoch": 0.9467371845922417, "grad_norm": 0.5531060894825526, "learning_rate": 0.0001134423012573595, "loss": 12.1628, "step": 17386 }, { "epoch": 0.9467916385888246, "grad_norm": 1.0749222571419381, "learning_rate": 0.00011343356298109732, "loss": 12.1868, "step": 17387 }, { "epoch": 0.9468460925854076, "grad_norm": 0.5015559730252587, "learning_rate": 0.00011342482460037352, "loss": 12.1034, "step": 17388 }, { "epoch": 0.9469005465819907, "grad_norm": 0.5686342159403498, "learning_rate": 0.000113416086115256, "loss": 12.0834, "step": 17389 }, { "epoch": 0.9469550005785737, "grad_norm": 0.5218876135221702, "learning_rate": 0.00011340734752581274, "loss": 12.0357, "step": 17390 }, { "epoch": 0.9470094545751567, "grad_norm": 0.5554576526720056, "learning_rate": 0.00011339860883211171, "loss": 12.1212, "step": 17391 }, { "epoch": 0.9470639085717397, "grad_norm": 0.6075109268687903, "learning_rate": 0.00011338987003422086, "loss": 12.1669, "step": 17392 }, { "epoch": 0.9471183625683227, "grad_norm": 0.5674266591827817, "learning_rate": 0.00011338113113220814, "loss": 12.0762, "step": 17393 }, { "epoch": 0.9471728165649057, "grad_norm": 0.5507636381804221, "learning_rate": 0.00011337239212614148, "loss": 12.0855, "step": 17394 }, { "epoch": 0.9472272705614888, "grad_norm": 0.5326074009086318, "learning_rate": 0.00011336365301608887, "loss": 12.0757, "step": 17395 }, { "epoch": 0.9472817245580718, "grad_norm": 0.5373601830555752, "learning_rate": 0.00011335491380211823, "loss": 12.04, "step": 17396 }, { "epoch": 0.9473361785546548, "grad_norm": 0.5781711851409429, "learning_rate": 0.00011334617448429754, "loss": 12.1499, "step": 17397 }, { "epoch": 0.9473906325512378, "grad_norm": 0.62400690303693, "learning_rate": 0.00011333743506269479, "loss": 12.3313, "step": 17398 }, { "epoch": 0.9474450865478208, "grad_norm": 0.5670385056747921, "learning_rate": 0.0001133286955373779, "loss": 11.9934, "step": 17399 }, { "epoch": 0.9474995405444039, "grad_norm": 0.5790623640026985, "learning_rate": 0.00011331995590841482, "loss": 12.3538, "step": 17400 }, { "epoch": 0.9475539945409869, "grad_norm": 0.4869641044386059, "learning_rate": 0.00011331121617587355, "loss": 12.131, "step": 17401 }, { "epoch": 0.9476084485375699, "grad_norm": 0.605769205367832, "learning_rate": 0.000113302476339822, "loss": 12.2189, "step": 17402 }, { "epoch": 0.9476629025341529, "grad_norm": 0.5477340165700033, "learning_rate": 0.00011329373640032821, "loss": 12.2405, "step": 17403 }, { "epoch": 0.9477173565307359, "grad_norm": 0.5295438034670991, "learning_rate": 0.00011328499635746009, "loss": 12.1444, "step": 17404 }, { "epoch": 0.9477718105273188, "grad_norm": 0.5227246506383778, "learning_rate": 0.00011327625621128556, "loss": 12.128, "step": 17405 }, { "epoch": 0.947826264523902, "grad_norm": 0.5701971780820198, "learning_rate": 0.00011326751596187269, "loss": 12.0952, "step": 17406 }, { "epoch": 0.9478807185204849, "grad_norm": 0.5291297001215025, "learning_rate": 0.00011325877560928932, "loss": 12.0341, "step": 17407 }, { "epoch": 0.9479351725170679, "grad_norm": 0.561216245895998, "learning_rate": 0.00011325003515360357, "loss": 12.0441, "step": 17408 }, { "epoch": 0.9479896265136509, "grad_norm": 0.7248037630493238, "learning_rate": 0.00011324129459488329, "loss": 12.0241, "step": 17409 }, { "epoch": 0.9480440805102339, "grad_norm": 0.5478144736346154, "learning_rate": 0.00011323255393319647, "loss": 12.1368, "step": 17410 }, { "epoch": 0.9480985345068169, "grad_norm": 0.5832870659400218, "learning_rate": 0.00011322381316861112, "loss": 12.2738, "step": 17411 }, { "epoch": 0.9481529885034, "grad_norm": 0.5652275283387745, "learning_rate": 0.00011321507230119517, "loss": 12.1513, "step": 17412 }, { "epoch": 0.948207442499983, "grad_norm": 0.538845252796443, "learning_rate": 0.00011320633133101659, "loss": 12.0958, "step": 17413 }, { "epoch": 0.948261896496566, "grad_norm": 0.5822453906528845, "learning_rate": 0.00011319759025814335, "loss": 12.1057, "step": 17414 }, { "epoch": 0.948316350493149, "grad_norm": 0.6519578863689457, "learning_rate": 0.00011318884908264347, "loss": 12.3193, "step": 17415 }, { "epoch": 0.948370804489732, "grad_norm": 0.602744532893201, "learning_rate": 0.00011318010780458488, "loss": 12.1319, "step": 17416 }, { "epoch": 0.948425258486315, "grad_norm": 0.6262288472678893, "learning_rate": 0.00011317136642403554, "loss": 12.0864, "step": 17417 }, { "epoch": 0.9484797124828981, "grad_norm": 0.5392669614315889, "learning_rate": 0.00011316262494106347, "loss": 12.2444, "step": 17418 }, { "epoch": 0.9485341664794811, "grad_norm": 0.6239141160838986, "learning_rate": 0.0001131538833557366, "loss": 12.1257, "step": 17419 }, { "epoch": 0.9485886204760641, "grad_norm": 0.6145631515186998, "learning_rate": 0.00011314514166812295, "loss": 12.1822, "step": 17420 }, { "epoch": 0.9486430744726471, "grad_norm": 0.596281164024472, "learning_rate": 0.00011313639987829046, "loss": 12.1477, "step": 17421 }, { "epoch": 0.9486975284692301, "grad_norm": 0.5929985373760998, "learning_rate": 0.00011312765798630711, "loss": 12.0916, "step": 17422 }, { "epoch": 0.948751982465813, "grad_norm": 0.8604325299052797, "learning_rate": 0.00011311891599224092, "loss": 11.9498, "step": 17423 }, { "epoch": 0.9488064364623962, "grad_norm": 0.5983641398134796, "learning_rate": 0.00011311017389615981, "loss": 12.1452, "step": 17424 }, { "epoch": 0.9488608904589791, "grad_norm": 0.5306545391820514, "learning_rate": 0.0001131014316981318, "loss": 12.0887, "step": 17425 }, { "epoch": 0.9489153444555621, "grad_norm": 0.6019247104345119, "learning_rate": 0.00011309268939822486, "loss": 12.0831, "step": 17426 }, { "epoch": 0.9489697984521451, "grad_norm": 0.6030923956429187, "learning_rate": 0.00011308394699650697, "loss": 12.1672, "step": 17427 }, { "epoch": 0.9490242524487281, "grad_norm": 0.5375711174347375, "learning_rate": 0.00011307520449304614, "loss": 12.0472, "step": 17428 }, { "epoch": 0.9490787064453111, "grad_norm": 0.5398776364743896, "learning_rate": 0.0001130664618879103, "loss": 12.2488, "step": 17429 }, { "epoch": 0.9491331604418942, "grad_norm": 0.543823949854254, "learning_rate": 0.00011305771918116746, "loss": 12.1812, "step": 17430 }, { "epoch": 0.9491876144384772, "grad_norm": 0.5784847529335118, "learning_rate": 0.00011304897637288561, "loss": 12.0837, "step": 17431 }, { "epoch": 0.9492420684350602, "grad_norm": 0.5714020178424278, "learning_rate": 0.00011304023346313273, "loss": 12.1253, "step": 17432 }, { "epoch": 0.9492965224316432, "grad_norm": 0.4896225675110771, "learning_rate": 0.00011303149045197682, "loss": 12.0577, "step": 17433 }, { "epoch": 0.9493509764282262, "grad_norm": 0.5872673682035371, "learning_rate": 0.00011302274733948583, "loss": 12.1131, "step": 17434 }, { "epoch": 0.9494054304248093, "grad_norm": 0.5501561471508206, "learning_rate": 0.00011301400412572781, "loss": 12.1594, "step": 17435 }, { "epoch": 0.9494598844213923, "grad_norm": 0.6206510333411461, "learning_rate": 0.00011300526081077068, "loss": 12.3073, "step": 17436 }, { "epoch": 0.9495143384179753, "grad_norm": 0.5840100543707513, "learning_rate": 0.00011299651739468246, "loss": 12.17, "step": 17437 }, { "epoch": 0.9495687924145583, "grad_norm": 0.6196232425988139, "learning_rate": 0.00011298777387753118, "loss": 12.1427, "step": 17438 }, { "epoch": 0.9496232464111413, "grad_norm": 0.70709403486026, "learning_rate": 0.00011297903025938476, "loss": 12.1109, "step": 17439 }, { "epoch": 0.9496777004077243, "grad_norm": 0.6074900970206965, "learning_rate": 0.00011297028654031121, "loss": 12.0835, "step": 17440 }, { "epoch": 0.9497321544043074, "grad_norm": 0.6208250634797887, "learning_rate": 0.00011296154272037856, "loss": 12.1172, "step": 17441 }, { "epoch": 0.9497866084008904, "grad_norm": 0.5726205600853366, "learning_rate": 0.00011295279879965477, "loss": 12.1401, "step": 17442 }, { "epoch": 0.9498410623974733, "grad_norm": 0.5696277608532468, "learning_rate": 0.00011294405477820787, "loss": 12.2064, "step": 17443 }, { "epoch": 0.9498955163940563, "grad_norm": 0.5569385381906664, "learning_rate": 0.00011293531065610581, "loss": 12.1694, "step": 17444 }, { "epoch": 0.9499499703906393, "grad_norm": 0.6089017667986101, "learning_rate": 0.00011292656643341659, "loss": 12.1573, "step": 17445 }, { "epoch": 0.9500044243872223, "grad_norm": 0.5086641759852797, "learning_rate": 0.00011291782211020823, "loss": 12.098, "step": 17446 }, { "epoch": 0.9500588783838054, "grad_norm": 0.5903911370652851, "learning_rate": 0.00011290907768654872, "loss": 12.1332, "step": 17447 }, { "epoch": 0.9501133323803884, "grad_norm": 0.5118851465113656, "learning_rate": 0.00011290033316250608, "loss": 11.9755, "step": 17448 }, { "epoch": 0.9501677863769714, "grad_norm": 0.5796184135739372, "learning_rate": 0.00011289158853814827, "loss": 12.2098, "step": 17449 }, { "epoch": 0.9502222403735544, "grad_norm": 0.5154883143599293, "learning_rate": 0.0001128828438135433, "loss": 11.988, "step": 17450 }, { "epoch": 0.9502766943701374, "grad_norm": 0.5610163462632073, "learning_rate": 0.00011287409898875916, "loss": 12.2074, "step": 17451 }, { "epoch": 0.9503311483667204, "grad_norm": 0.5747716835570177, "learning_rate": 0.00011286535406386389, "loss": 12.079, "step": 17452 }, { "epoch": 0.9503856023633035, "grad_norm": 0.5182737197924299, "learning_rate": 0.0001128566090389255, "loss": 12.1829, "step": 17453 }, { "epoch": 0.9504400563598865, "grad_norm": 0.5359861646375332, "learning_rate": 0.00011284786391401191, "loss": 12.1085, "step": 17454 }, { "epoch": 0.9504945103564695, "grad_norm": 0.6373633744777849, "learning_rate": 0.00011283911868919119, "loss": 12.1897, "step": 17455 }, { "epoch": 0.9505489643530525, "grad_norm": 0.5455363959029207, "learning_rate": 0.00011283037336453132, "loss": 12.1452, "step": 17456 }, { "epoch": 0.9506034183496355, "grad_norm": 0.5436581687885378, "learning_rate": 0.00011282162794010034, "loss": 12.1088, "step": 17457 }, { "epoch": 0.9506578723462185, "grad_norm": 0.5684905876488265, "learning_rate": 0.00011281288241596624, "loss": 12.2331, "step": 17458 }, { "epoch": 0.9507123263428016, "grad_norm": 0.5930821930183624, "learning_rate": 0.000112804136792197, "loss": 11.9246, "step": 17459 }, { "epoch": 0.9507667803393846, "grad_norm": 0.5410520640706985, "learning_rate": 0.00011279539106886064, "loss": 12.033, "step": 17460 }, { "epoch": 0.9508212343359675, "grad_norm": 0.5541788671385539, "learning_rate": 0.00011278664524602516, "loss": 12.1253, "step": 17461 }, { "epoch": 0.9508756883325505, "grad_norm": 0.5638897814342899, "learning_rate": 0.00011277789932375858, "loss": 12.1153, "step": 17462 }, { "epoch": 0.9509301423291335, "grad_norm": 0.5427936828451158, "learning_rate": 0.00011276915330212894, "loss": 11.9585, "step": 17463 }, { "epoch": 0.9509845963257166, "grad_norm": 0.6050927126754538, "learning_rate": 0.00011276040718120422, "loss": 12.1493, "step": 17464 }, { "epoch": 0.9510390503222996, "grad_norm": 0.5121729638018014, "learning_rate": 0.00011275166096105243, "loss": 11.9161, "step": 17465 }, { "epoch": 0.9510935043188826, "grad_norm": 0.5349722848715646, "learning_rate": 0.00011274291464174158, "loss": 12.0515, "step": 17466 }, { "epoch": 0.9511479583154656, "grad_norm": 0.6259830681803067, "learning_rate": 0.00011273416822333969, "loss": 12.1932, "step": 17467 }, { "epoch": 0.9512024123120486, "grad_norm": 0.5447584965802076, "learning_rate": 0.00011272542170591478, "loss": 12.1151, "step": 17468 }, { "epoch": 0.9512568663086316, "grad_norm": 0.5172873978728029, "learning_rate": 0.00011271667508953485, "loss": 12.1788, "step": 17469 }, { "epoch": 0.9513113203052147, "grad_norm": 0.5173157791537659, "learning_rate": 0.00011270792837426791, "loss": 12.042, "step": 17470 }, { "epoch": 0.9513657743017977, "grad_norm": 0.6380131731191625, "learning_rate": 0.000112699181560182, "loss": 12.0384, "step": 17471 }, { "epoch": 0.9514202282983807, "grad_norm": 0.5753540757727378, "learning_rate": 0.00011269043464734513, "loss": 12.0551, "step": 17472 }, { "epoch": 0.9514746822949637, "grad_norm": 0.7165165108917677, "learning_rate": 0.00011268168763582529, "loss": 12.3405, "step": 17473 }, { "epoch": 0.9515291362915467, "grad_norm": 0.5157512469301627, "learning_rate": 0.00011267294052569055, "loss": 12.0039, "step": 17474 }, { "epoch": 0.9515835902881297, "grad_norm": 0.5786763748974254, "learning_rate": 0.00011266419331700888, "loss": 12.241, "step": 17475 }, { "epoch": 0.9516380442847128, "grad_norm": 0.5817474153048059, "learning_rate": 0.00011265544600984831, "loss": 12.1188, "step": 17476 }, { "epoch": 0.9516924982812958, "grad_norm": 0.5102441058380783, "learning_rate": 0.0001126466986042769, "loss": 12.0575, "step": 17477 }, { "epoch": 0.9517469522778788, "grad_norm": 0.6148685340058035, "learning_rate": 0.00011263795110036261, "loss": 12.2371, "step": 17478 }, { "epoch": 0.9518014062744617, "grad_norm": 0.6335387459693504, "learning_rate": 0.00011262920349817352, "loss": 12.0826, "step": 17479 }, { "epoch": 0.9518558602710447, "grad_norm": 0.5463841829279674, "learning_rate": 0.00011262045579777763, "loss": 12.0817, "step": 17480 }, { "epoch": 0.9519103142676277, "grad_norm": 0.5303469750614441, "learning_rate": 0.00011261170799924291, "loss": 12.1507, "step": 17481 }, { "epoch": 0.9519647682642108, "grad_norm": 0.5621517509390335, "learning_rate": 0.00011260296010263749, "loss": 12.1779, "step": 17482 }, { "epoch": 0.9520192222607938, "grad_norm": 0.5725064239488651, "learning_rate": 0.00011259421210802931, "loss": 12.147, "step": 17483 }, { "epoch": 0.9520736762573768, "grad_norm": 0.5288320670227035, "learning_rate": 0.00011258546401548641, "loss": 12.0917, "step": 17484 }, { "epoch": 0.9521281302539598, "grad_norm": 0.5175473878683832, "learning_rate": 0.00011257671582507687, "loss": 12.1715, "step": 17485 }, { "epoch": 0.9521825842505428, "grad_norm": 0.5596797458969293, "learning_rate": 0.00011256796753686867, "loss": 12.1307, "step": 17486 }, { "epoch": 0.9522370382471258, "grad_norm": 0.56719958493079, "learning_rate": 0.00011255921915092982, "loss": 12.1031, "step": 17487 }, { "epoch": 0.9522914922437089, "grad_norm": 0.5592444241773291, "learning_rate": 0.00011255047066732842, "loss": 12.0593, "step": 17488 }, { "epoch": 0.9523459462402919, "grad_norm": 0.5679003470701051, "learning_rate": 0.0001125417220861324, "loss": 12.2328, "step": 17489 }, { "epoch": 0.9524004002368749, "grad_norm": 0.5376761644488985, "learning_rate": 0.00011253297340740987, "loss": 12.1243, "step": 17490 }, { "epoch": 0.9524548542334579, "grad_norm": 0.5672837763664881, "learning_rate": 0.00011252422463122884, "loss": 12.0671, "step": 17491 }, { "epoch": 0.9525093082300409, "grad_norm": 0.6140860516736624, "learning_rate": 0.00011251547575765735, "loss": 12.2153, "step": 17492 }, { "epoch": 0.9525637622266239, "grad_norm": 0.5151714774041493, "learning_rate": 0.00011250672678676342, "loss": 12.0233, "step": 17493 }, { "epoch": 0.952618216223207, "grad_norm": 0.6192749628451423, "learning_rate": 0.00011249797771861506, "loss": 12.1945, "step": 17494 }, { "epoch": 0.95267267021979, "grad_norm": 0.5071903732072338, "learning_rate": 0.00011248922855328035, "loss": 12.1009, "step": 17495 }, { "epoch": 0.952727124216373, "grad_norm": 0.5047572016443033, "learning_rate": 0.0001124804792908273, "loss": 12.137, "step": 17496 }, { "epoch": 0.952781578212956, "grad_norm": 0.5803959738561252, "learning_rate": 0.00011247172993132394, "loss": 12.3401, "step": 17497 }, { "epoch": 0.9528360322095389, "grad_norm": 0.6369007115438952, "learning_rate": 0.00011246298047483834, "loss": 12.1584, "step": 17498 }, { "epoch": 0.952890486206122, "grad_norm": 0.6022015694890215, "learning_rate": 0.00011245423092143852, "loss": 12.2493, "step": 17499 }, { "epoch": 0.952944940202705, "grad_norm": 0.5495827166066689, "learning_rate": 0.00011244548127119245, "loss": 12.0801, "step": 17500 }, { "epoch": 0.952999394199288, "grad_norm": 0.5195826351260587, "learning_rate": 0.00011243673152416827, "loss": 12.0984, "step": 17501 }, { "epoch": 0.953053848195871, "grad_norm": 0.541215822448782, "learning_rate": 0.000112427981680434, "loss": 12.0597, "step": 17502 }, { "epoch": 0.953108302192454, "grad_norm": 0.546730479220331, "learning_rate": 0.00011241923174005767, "loss": 12.1773, "step": 17503 }, { "epoch": 0.953162756189037, "grad_norm": 0.5547994486952583, "learning_rate": 0.00011241048170310726, "loss": 12.1626, "step": 17504 }, { "epoch": 0.9532172101856201, "grad_norm": 0.546797868136595, "learning_rate": 0.00011240173156965088, "loss": 12.2097, "step": 17505 }, { "epoch": 0.9532716641822031, "grad_norm": 0.6532019259814937, "learning_rate": 0.00011239298133975656, "loss": 12.282, "step": 17506 }, { "epoch": 0.9533261181787861, "grad_norm": 0.5831149839815883, "learning_rate": 0.00011238423101349234, "loss": 12.2849, "step": 17507 }, { "epoch": 0.9533805721753691, "grad_norm": 0.5331895682878139, "learning_rate": 0.00011237548059092629, "loss": 12.2181, "step": 17508 }, { "epoch": 0.9534350261719521, "grad_norm": 0.5594185352289169, "learning_rate": 0.00011236673007212639, "loss": 12.0504, "step": 17509 }, { "epoch": 0.9534894801685351, "grad_norm": 0.6052484244390849, "learning_rate": 0.00011235797945716073, "loss": 12.2553, "step": 17510 }, { "epoch": 0.9535439341651182, "grad_norm": 0.5459851995298468, "learning_rate": 0.00011234922874609735, "loss": 12.2289, "step": 17511 }, { "epoch": 0.9535983881617012, "grad_norm": 0.5216090581938867, "learning_rate": 0.00011234047793900429, "loss": 12.0912, "step": 17512 }, { "epoch": 0.9536528421582842, "grad_norm": 0.6099670336744216, "learning_rate": 0.00011233172703594962, "loss": 12.2609, "step": 17513 }, { "epoch": 0.9537072961548672, "grad_norm": 0.5814475487547149, "learning_rate": 0.0001123229760370014, "loss": 12.1752, "step": 17514 }, { "epoch": 0.9537617501514501, "grad_norm": 0.597815180264054, "learning_rate": 0.00011231422494222761, "loss": 12.007, "step": 17515 }, { "epoch": 0.9538162041480331, "grad_norm": 0.49954782488132937, "learning_rate": 0.00011230547375169634, "loss": 12.0644, "step": 17516 }, { "epoch": 0.9538706581446162, "grad_norm": 0.5026829090145971, "learning_rate": 0.00011229672246547562, "loss": 12.1274, "step": 17517 }, { "epoch": 0.9539251121411992, "grad_norm": 0.5387715703406172, "learning_rate": 0.00011228797108363358, "loss": 12.0936, "step": 17518 }, { "epoch": 0.9539795661377822, "grad_norm": 0.5469127944330988, "learning_rate": 0.0001122792196062382, "loss": 12.1236, "step": 17519 }, { "epoch": 0.9540340201343652, "grad_norm": 0.506500734264548, "learning_rate": 0.00011227046803335755, "loss": 11.9995, "step": 17520 }, { "epoch": 0.9540884741309482, "grad_norm": 0.5343499369968282, "learning_rate": 0.00011226171636505967, "loss": 12.1079, "step": 17521 }, { "epoch": 0.9541429281275312, "grad_norm": 0.530386561431065, "learning_rate": 0.00011225296460141262, "loss": 12.0347, "step": 17522 }, { "epoch": 0.9541973821241143, "grad_norm": 0.5424374998579926, "learning_rate": 0.0001122442127424845, "loss": 12.1531, "step": 17523 }, { "epoch": 0.9542518361206973, "grad_norm": 0.558572783863881, "learning_rate": 0.00011223546078834328, "loss": 12.1718, "step": 17524 }, { "epoch": 0.9543062901172803, "grad_norm": 0.5112976002495363, "learning_rate": 0.0001122267087390571, "loss": 12.1203, "step": 17525 }, { "epoch": 0.9543607441138633, "grad_norm": 0.6139608398164536, "learning_rate": 0.00011221795659469396, "loss": 12.2115, "step": 17526 }, { "epoch": 0.9544151981104463, "grad_norm": 0.5590312583921028, "learning_rate": 0.00011220920435532197, "loss": 12.1637, "step": 17527 }, { "epoch": 0.9544696521070293, "grad_norm": 0.5220602531012433, "learning_rate": 0.00011220045202100913, "loss": 12.0965, "step": 17528 }, { "epoch": 0.9545241061036124, "grad_norm": 0.5146669588181602, "learning_rate": 0.00011219169959182354, "loss": 12.1581, "step": 17529 }, { "epoch": 0.9545785601001954, "grad_norm": 0.5103508856909138, "learning_rate": 0.00011218294706783323, "loss": 12.0123, "step": 17530 }, { "epoch": 0.9546330140967784, "grad_norm": 0.5394214752467305, "learning_rate": 0.00011217419444910631, "loss": 12.1238, "step": 17531 }, { "epoch": 0.9546874680933614, "grad_norm": 0.6403842059952954, "learning_rate": 0.0001121654417357108, "loss": 12.2112, "step": 17532 }, { "epoch": 0.9547419220899444, "grad_norm": 0.5181254678820698, "learning_rate": 0.00011215668892771478, "loss": 12.0037, "step": 17533 }, { "epoch": 0.9547963760865275, "grad_norm": 0.5086013058427543, "learning_rate": 0.0001121479360251863, "loss": 12.1228, "step": 17534 }, { "epoch": 0.9548508300831104, "grad_norm": 0.5257888747973913, "learning_rate": 0.00011213918302819344, "loss": 12.2236, "step": 17535 }, { "epoch": 0.9549052840796934, "grad_norm": 0.5328935148950803, "learning_rate": 0.00011213042993680424, "loss": 12.0387, "step": 17536 }, { "epoch": 0.9549597380762764, "grad_norm": 0.5784220847540573, "learning_rate": 0.00011212167675108683, "loss": 12.1117, "step": 17537 }, { "epoch": 0.9550141920728594, "grad_norm": 0.5529509688232136, "learning_rate": 0.00011211292347110918, "loss": 12.1224, "step": 17538 }, { "epoch": 0.9550686460694424, "grad_norm": 0.6476686014260221, "learning_rate": 0.0001121041700969394, "loss": 12.1743, "step": 17539 }, { "epoch": 0.9551231000660255, "grad_norm": 0.6194846217104752, "learning_rate": 0.0001120954166286456, "loss": 12.1517, "step": 17540 }, { "epoch": 0.9551775540626085, "grad_norm": 0.5867983675726799, "learning_rate": 0.00011208666306629581, "loss": 12.1768, "step": 17541 }, { "epoch": 0.9552320080591915, "grad_norm": 0.5464992669933516, "learning_rate": 0.00011207790940995808, "loss": 12.0183, "step": 17542 }, { "epoch": 0.9552864620557745, "grad_norm": 0.5798443116280011, "learning_rate": 0.0001120691556597005, "loss": 12.0348, "step": 17543 }, { "epoch": 0.9553409160523575, "grad_norm": 0.5732012006437913, "learning_rate": 0.00011206040181559117, "loss": 12.1493, "step": 17544 }, { "epoch": 0.9553953700489405, "grad_norm": 0.5767419723503235, "learning_rate": 0.0001120516478776981, "loss": 11.9773, "step": 17545 }, { "epoch": 0.9554498240455236, "grad_norm": 0.5359491833504862, "learning_rate": 0.00011204289384608941, "loss": 11.8632, "step": 17546 }, { "epoch": 0.9555042780421066, "grad_norm": 0.5848547547696398, "learning_rate": 0.00011203413972083315, "loss": 12.1047, "step": 17547 }, { "epoch": 0.9555587320386896, "grad_norm": 0.5465695726591704, "learning_rate": 0.00011202538550199742, "loss": 12.0137, "step": 17548 }, { "epoch": 0.9556131860352726, "grad_norm": 0.49996478425654056, "learning_rate": 0.00011201663118965025, "loss": 12.0775, "step": 17549 }, { "epoch": 0.9556676400318556, "grad_norm": 0.5747110527477752, "learning_rate": 0.00011200787678385975, "loss": 12.1007, "step": 17550 }, { "epoch": 0.9557220940284386, "grad_norm": 0.6505968349618484, "learning_rate": 0.000111999122284694, "loss": 12.2088, "step": 17551 }, { "epoch": 0.9557765480250217, "grad_norm": 0.5489434484718236, "learning_rate": 0.00011199036769222105, "loss": 12.26, "step": 17552 }, { "epoch": 0.9558310020216046, "grad_norm": 0.5103149364972873, "learning_rate": 0.000111981613006509, "loss": 12.174, "step": 17553 }, { "epoch": 0.9558854560181876, "grad_norm": 0.5294935254432681, "learning_rate": 0.0001119728582276259, "loss": 12.1472, "step": 17554 }, { "epoch": 0.9559399100147706, "grad_norm": 0.5714016666252779, "learning_rate": 0.00011196410335563984, "loss": 12.0593, "step": 17555 }, { "epoch": 0.9559943640113536, "grad_norm": 0.5641777737296872, "learning_rate": 0.00011195534839061895, "loss": 12.1639, "step": 17556 }, { "epoch": 0.9560488180079366, "grad_norm": 0.5434267121340497, "learning_rate": 0.00011194659333263122, "loss": 12.0999, "step": 17557 }, { "epoch": 0.9561032720045197, "grad_norm": 0.5531724406692369, "learning_rate": 0.00011193783818174482, "loss": 12.1736, "step": 17558 }, { "epoch": 0.9561577260011027, "grad_norm": 0.5639080263258313, "learning_rate": 0.00011192908293802778, "loss": 12.1458, "step": 17559 }, { "epoch": 0.9562121799976857, "grad_norm": 0.5885309555968149, "learning_rate": 0.00011192032760154814, "loss": 11.9894, "step": 17560 }, { "epoch": 0.9562666339942687, "grad_norm": 0.5576789959998956, "learning_rate": 0.00011191157217237406, "loss": 12.1119, "step": 17561 }, { "epoch": 0.9563210879908517, "grad_norm": 0.5315643033369111, "learning_rate": 0.00011190281665057362, "loss": 12.199, "step": 17562 }, { "epoch": 0.9563755419874347, "grad_norm": 0.5283050591210666, "learning_rate": 0.00011189406103621487, "loss": 12.2325, "step": 17563 }, { "epoch": 0.9564299959840178, "grad_norm": 0.5224092214354858, "learning_rate": 0.00011188530532936592, "loss": 12.1109, "step": 17564 }, { "epoch": 0.9564844499806008, "grad_norm": 0.6066525480426279, "learning_rate": 0.00011187654953009483, "loss": 12.1813, "step": 17565 }, { "epoch": 0.9565389039771838, "grad_norm": 0.649472746570281, "learning_rate": 0.00011186779363846966, "loss": 12.1454, "step": 17566 }, { "epoch": 0.9565933579737668, "grad_norm": 0.5786411478000614, "learning_rate": 0.00011185903765455859, "loss": 12.1886, "step": 17567 }, { "epoch": 0.9566478119703498, "grad_norm": 0.5527424783159517, "learning_rate": 0.00011185028157842962, "loss": 12.0374, "step": 17568 }, { "epoch": 0.9567022659669329, "grad_norm": 0.5821605386294587, "learning_rate": 0.00011184152541015092, "loss": 12.1065, "step": 17569 }, { "epoch": 0.9567567199635159, "grad_norm": 0.5925912060183305, "learning_rate": 0.00011183276914979051, "loss": 12.3158, "step": 17570 }, { "epoch": 0.9568111739600988, "grad_norm": 0.5634063289150791, "learning_rate": 0.00011182401279741648, "loss": 12.057, "step": 17571 }, { "epoch": 0.9568656279566818, "grad_norm": 0.5824811736735698, "learning_rate": 0.00011181525635309695, "loss": 12.2049, "step": 17572 }, { "epoch": 0.9569200819532648, "grad_norm": 0.5775520866209082, "learning_rate": 0.0001118064998169, "loss": 12.1704, "step": 17573 }, { "epoch": 0.9569745359498478, "grad_norm": 0.48993449407470774, "learning_rate": 0.00011179774318889378, "loss": 12.1007, "step": 17574 }, { "epoch": 0.9570289899464309, "grad_norm": 0.5439971950253318, "learning_rate": 0.00011178898646914629, "loss": 12.1628, "step": 17575 }, { "epoch": 0.9570834439430139, "grad_norm": 0.5620142288513571, "learning_rate": 0.00011178022965772566, "loss": 12.1764, "step": 17576 }, { "epoch": 0.9571378979395969, "grad_norm": 0.5586042403613316, "learning_rate": 0.00011177147275469997, "loss": 12.0713, "step": 17577 }, { "epoch": 0.9571923519361799, "grad_norm": 0.6908235593149696, "learning_rate": 0.00011176271576013738, "loss": 12.3033, "step": 17578 }, { "epoch": 0.9572468059327629, "grad_norm": 0.5475781376792936, "learning_rate": 0.00011175395867410592, "loss": 11.989, "step": 17579 }, { "epoch": 0.9573012599293459, "grad_norm": 0.6810380687100889, "learning_rate": 0.00011174520149667371, "loss": 12.0593, "step": 17580 }, { "epoch": 0.957355713925929, "grad_norm": 0.5340136630295348, "learning_rate": 0.00011173644422790883, "loss": 12.0288, "step": 17581 }, { "epoch": 0.957410167922512, "grad_norm": 0.5542940066010866, "learning_rate": 0.00011172768686787938, "loss": 12.1359, "step": 17582 }, { "epoch": 0.957464621919095, "grad_norm": 0.5892735554281785, "learning_rate": 0.00011171892941665349, "loss": 12.1703, "step": 17583 }, { "epoch": 0.957519075915678, "grad_norm": 0.544821778155144, "learning_rate": 0.00011171017187429926, "loss": 12.1065, "step": 17584 }, { "epoch": 0.957573529912261, "grad_norm": 0.5092524735271818, "learning_rate": 0.00011170141424088476, "loss": 12.0485, "step": 17585 }, { "epoch": 0.957627983908844, "grad_norm": 0.5919783181085536, "learning_rate": 0.00011169265651647809, "loss": 12.1461, "step": 17586 }, { "epoch": 0.9576824379054271, "grad_norm": 0.5603813725359015, "learning_rate": 0.00011168389870114735, "loss": 12.1747, "step": 17587 }, { "epoch": 0.9577368919020101, "grad_norm": 0.6012158867500161, "learning_rate": 0.00011167514079496064, "loss": 12.2645, "step": 17588 }, { "epoch": 0.957791345898593, "grad_norm": 0.5660345202872905, "learning_rate": 0.00011166638279798614, "loss": 12.1592, "step": 17589 }, { "epoch": 0.957845799895176, "grad_norm": 0.5603806205536941, "learning_rate": 0.00011165762471029184, "loss": 12.1503, "step": 17590 }, { "epoch": 0.957900253891759, "grad_norm": 0.5918656178193153, "learning_rate": 0.0001116488665319459, "loss": 12.2107, "step": 17591 }, { "epoch": 0.957954707888342, "grad_norm": 0.5341569004437059, "learning_rate": 0.00011164010826301645, "loss": 12.191, "step": 17592 }, { "epoch": 0.9580091618849251, "grad_norm": 0.6158463315800793, "learning_rate": 0.00011163134990357153, "loss": 12.2008, "step": 17593 }, { "epoch": 0.9580636158815081, "grad_norm": 0.5816977629065496, "learning_rate": 0.00011162259145367931, "loss": 12.1926, "step": 17594 }, { "epoch": 0.9581180698780911, "grad_norm": 0.5728226483155598, "learning_rate": 0.00011161383291340786, "loss": 12.0512, "step": 17595 }, { "epoch": 0.9581725238746741, "grad_norm": 0.5456983562201944, "learning_rate": 0.00011160507428282529, "loss": 12.1537, "step": 17596 }, { "epoch": 0.9582269778712571, "grad_norm": 0.5588182377531732, "learning_rate": 0.00011159631556199971, "loss": 12.2735, "step": 17597 }, { "epoch": 0.9582814318678402, "grad_norm": 0.5403151397688128, "learning_rate": 0.00011158755675099925, "loss": 12.0898, "step": 17598 }, { "epoch": 0.9583358858644232, "grad_norm": 0.6025445149650608, "learning_rate": 0.00011157879784989202, "loss": 12.077, "step": 17599 }, { "epoch": 0.9583903398610062, "grad_norm": 0.5381067905909196, "learning_rate": 0.00011157003885874609, "loss": 12.1695, "step": 17600 }, { "epoch": 0.9584447938575892, "grad_norm": 0.509999086899731, "learning_rate": 0.0001115612797776296, "loss": 12.0278, "step": 17601 }, { "epoch": 0.9584992478541722, "grad_norm": 0.5077297192498021, "learning_rate": 0.00011155252060661068, "loss": 12.1266, "step": 17602 }, { "epoch": 0.9585537018507552, "grad_norm": 0.5601415730476714, "learning_rate": 0.00011154376134575742, "loss": 12.07, "step": 17603 }, { "epoch": 0.9586081558473383, "grad_norm": 0.519053095973141, "learning_rate": 0.00011153500199513791, "loss": 12.1834, "step": 17604 }, { "epoch": 0.9586626098439213, "grad_norm": 0.5574930188491317, "learning_rate": 0.0001115262425548203, "loss": 12.1422, "step": 17605 }, { "epoch": 0.9587170638405043, "grad_norm": 0.5596730089308446, "learning_rate": 0.0001115174830248727, "loss": 12.1437, "step": 17606 }, { "epoch": 0.9587715178370873, "grad_norm": 0.5587333800127735, "learning_rate": 0.00011150872340536323, "loss": 12.1504, "step": 17607 }, { "epoch": 0.9588259718336702, "grad_norm": 0.5281109827424201, "learning_rate": 0.00011149996369635997, "loss": 12.083, "step": 17608 }, { "epoch": 0.9588804258302532, "grad_norm": 0.556475140686353, "learning_rate": 0.00011149120389793108, "loss": 12.0109, "step": 17609 }, { "epoch": 0.9589348798268363, "grad_norm": 0.6320131511446037, "learning_rate": 0.00011148244401014467, "loss": 11.9375, "step": 17610 }, { "epoch": 0.9589893338234193, "grad_norm": 0.5369064905067129, "learning_rate": 0.00011147368403306884, "loss": 12.1104, "step": 17611 }, { "epoch": 0.9590437878200023, "grad_norm": 0.5518176679425901, "learning_rate": 0.00011146492396677173, "loss": 12.0945, "step": 17612 }, { "epoch": 0.9590982418165853, "grad_norm": 0.5533819450011978, "learning_rate": 0.00011145616381132143, "loss": 12.0637, "step": 17613 }, { "epoch": 0.9591526958131683, "grad_norm": 0.5464334412896337, "learning_rate": 0.00011144740356678611, "loss": 12.1286, "step": 17614 }, { "epoch": 0.9592071498097513, "grad_norm": 0.5515032427606107, "learning_rate": 0.0001114386432332338, "loss": 12.0493, "step": 17615 }, { "epoch": 0.9592616038063344, "grad_norm": 0.48501813224335094, "learning_rate": 0.00011142988281073274, "loss": 12.1414, "step": 17616 }, { "epoch": 0.9593160578029174, "grad_norm": 0.508094754675668, "learning_rate": 0.00011142112229935097, "loss": 12.113, "step": 17617 }, { "epoch": 0.9593705117995004, "grad_norm": 0.5712777383401623, "learning_rate": 0.00011141236169915665, "loss": 12.2388, "step": 17618 }, { "epoch": 0.9594249657960834, "grad_norm": 0.49801658666359233, "learning_rate": 0.00011140360101021789, "loss": 12.1054, "step": 17619 }, { "epoch": 0.9594794197926664, "grad_norm": 0.5334850006632109, "learning_rate": 0.0001113948402326028, "loss": 12.1435, "step": 17620 }, { "epoch": 0.9595338737892494, "grad_norm": 0.5502584110738724, "learning_rate": 0.00011138607936637952, "loss": 12.0844, "step": 17621 }, { "epoch": 0.9595883277858325, "grad_norm": 0.6631027245293025, "learning_rate": 0.00011137731841161621, "loss": 12.1147, "step": 17622 }, { "epoch": 0.9596427817824155, "grad_norm": 0.6061199396871476, "learning_rate": 0.00011136855736838092, "loss": 12.1675, "step": 17623 }, { "epoch": 0.9596972357789985, "grad_norm": 0.5381270689582018, "learning_rate": 0.0001113597962367419, "loss": 12.2246, "step": 17624 }, { "epoch": 0.9597516897755815, "grad_norm": 0.5291846897679273, "learning_rate": 0.00011135103501676711, "loss": 12.0552, "step": 17625 }, { "epoch": 0.9598061437721644, "grad_norm": 0.5619720136638755, "learning_rate": 0.00011134227370852479, "loss": 12.2252, "step": 17626 }, { "epoch": 0.9598605977687474, "grad_norm": 0.5260468703160102, "learning_rate": 0.00011133351231208307, "loss": 12.093, "step": 17627 }, { "epoch": 0.9599150517653305, "grad_norm": 0.5527473910586487, "learning_rate": 0.00011132475082751004, "loss": 12.126, "step": 17628 }, { "epoch": 0.9599695057619135, "grad_norm": 0.5415193205337437, "learning_rate": 0.00011131598925487387, "loss": 11.9692, "step": 17629 }, { "epoch": 0.9600239597584965, "grad_norm": 0.5793518969808102, "learning_rate": 0.00011130722759424266, "loss": 12.0674, "step": 17630 }, { "epoch": 0.9600784137550795, "grad_norm": 0.6041437604961865, "learning_rate": 0.00011129846584568453, "loss": 12.1541, "step": 17631 }, { "epoch": 0.9601328677516625, "grad_norm": 0.5358846274478933, "learning_rate": 0.00011128970400926766, "loss": 12.1374, "step": 17632 }, { "epoch": 0.9601873217482456, "grad_norm": 0.5344201600769312, "learning_rate": 0.00011128094208506014, "loss": 12.138, "step": 17633 }, { "epoch": 0.9602417757448286, "grad_norm": 0.658089256867609, "learning_rate": 0.00011127218007313016, "loss": 12.1471, "step": 17634 }, { "epoch": 0.9602962297414116, "grad_norm": 0.5888500353636016, "learning_rate": 0.00011126341797354578, "loss": 12.1384, "step": 17635 }, { "epoch": 0.9603506837379946, "grad_norm": 0.5721624240589097, "learning_rate": 0.0001112546557863752, "loss": 12.1714, "step": 17636 }, { "epoch": 0.9604051377345776, "grad_norm": 0.5605762852581129, "learning_rate": 0.00011124589351168648, "loss": 12.0539, "step": 17637 }, { "epoch": 0.9604595917311606, "grad_norm": 0.6076806266827197, "learning_rate": 0.00011123713114954784, "loss": 12.1637, "step": 17638 }, { "epoch": 0.9605140457277437, "grad_norm": 0.5530730637761129, "learning_rate": 0.00011122836870002739, "loss": 12.0228, "step": 17639 }, { "epoch": 0.9605684997243267, "grad_norm": 0.5300881258613511, "learning_rate": 0.00011121960616319327, "loss": 12.1155, "step": 17640 }, { "epoch": 0.9606229537209097, "grad_norm": 0.5156942259719506, "learning_rate": 0.0001112108435391136, "loss": 12.1455, "step": 17641 }, { "epoch": 0.9606774077174927, "grad_norm": 0.5948524091580496, "learning_rate": 0.00011120208082785653, "loss": 12.1968, "step": 17642 }, { "epoch": 0.9607318617140757, "grad_norm": 0.5832431241578581, "learning_rate": 0.00011119331802949016, "loss": 12.1199, "step": 17643 }, { "epoch": 0.9607863157106586, "grad_norm": 0.5666121368406475, "learning_rate": 0.00011118455514408272, "loss": 12.0781, "step": 17644 }, { "epoch": 0.9608407697072417, "grad_norm": 0.534523597152492, "learning_rate": 0.0001111757921717023, "loss": 12.0978, "step": 17645 }, { "epoch": 0.9608952237038247, "grad_norm": 0.5605840777661149, "learning_rate": 0.00011116702911241703, "loss": 12.1463, "step": 17646 }, { "epoch": 0.9609496777004077, "grad_norm": 0.5344915870698154, "learning_rate": 0.00011115826596629508, "loss": 12.169, "step": 17647 }, { "epoch": 0.9610041316969907, "grad_norm": 0.593752192813583, "learning_rate": 0.00011114950273340456, "loss": 12.1766, "step": 17648 }, { "epoch": 0.9610585856935737, "grad_norm": 0.5331963208485385, "learning_rate": 0.00011114073941381369, "loss": 12.1211, "step": 17649 }, { "epoch": 0.9611130396901567, "grad_norm": 0.6414026744605359, "learning_rate": 0.00011113197600759053, "loss": 12.1183, "step": 17650 }, { "epoch": 0.9611674936867398, "grad_norm": 0.6785769352508368, "learning_rate": 0.00011112321251480324, "loss": 12.2273, "step": 17651 }, { "epoch": 0.9612219476833228, "grad_norm": 0.5554691315348811, "learning_rate": 0.00011111444893552, "loss": 11.9135, "step": 17652 }, { "epoch": 0.9612764016799058, "grad_norm": 0.5031722199806385, "learning_rate": 0.00011110568526980896, "loss": 12.1629, "step": 17653 }, { "epoch": 0.9613308556764888, "grad_norm": 0.5353251694854835, "learning_rate": 0.00011109692151773822, "loss": 12.1008, "step": 17654 }, { "epoch": 0.9613853096730718, "grad_norm": 0.5602726696502285, "learning_rate": 0.00011108815767937598, "loss": 12.0703, "step": 17655 }, { "epoch": 0.9614397636696548, "grad_norm": 0.5902007713398969, "learning_rate": 0.00011107939375479035, "loss": 12.1335, "step": 17656 }, { "epoch": 0.9614942176662379, "grad_norm": 0.5433913493252555, "learning_rate": 0.00011107062974404949, "loss": 12.0951, "step": 17657 }, { "epoch": 0.9615486716628209, "grad_norm": 0.5543159795777511, "learning_rate": 0.00011106186564722156, "loss": 12.1574, "step": 17658 }, { "epoch": 0.9616031256594039, "grad_norm": 0.5747791802624055, "learning_rate": 0.00011105310146437473, "loss": 12.0994, "step": 17659 }, { "epoch": 0.9616575796559869, "grad_norm": 0.5090408070972073, "learning_rate": 0.00011104433719557711, "loss": 12.1205, "step": 17660 }, { "epoch": 0.9617120336525699, "grad_norm": 0.508055808689014, "learning_rate": 0.00011103557284089688, "loss": 12.1905, "step": 17661 }, { "epoch": 0.9617664876491528, "grad_norm": 0.5597201386370542, "learning_rate": 0.00011102680840040218, "loss": 12.0787, "step": 17662 }, { "epoch": 0.961820941645736, "grad_norm": 0.49863322307253, "learning_rate": 0.00011101804387416117, "loss": 12.151, "step": 17663 }, { "epoch": 0.9618753956423189, "grad_norm": 0.6389963740600546, "learning_rate": 0.000111009279262242, "loss": 12.2213, "step": 17664 }, { "epoch": 0.9619298496389019, "grad_norm": 0.6343941955728799, "learning_rate": 0.00011100051456471283, "loss": 12.0429, "step": 17665 }, { "epoch": 0.9619843036354849, "grad_norm": 0.5353282170232694, "learning_rate": 0.00011099174978164182, "loss": 12.1827, "step": 17666 }, { "epoch": 0.9620387576320679, "grad_norm": 0.5617569530792962, "learning_rate": 0.00011098298491309711, "loss": 12.2126, "step": 17667 }, { "epoch": 0.962093211628651, "grad_norm": 0.5868797748808696, "learning_rate": 0.00011097421995914687, "loss": 12.2281, "step": 17668 }, { "epoch": 0.962147665625234, "grad_norm": 0.534053789297622, "learning_rate": 0.00011096545491985926, "loss": 12.1078, "step": 17669 }, { "epoch": 0.962202119621817, "grad_norm": 0.722243932219114, "learning_rate": 0.00011095668979530242, "loss": 12.3244, "step": 17670 }, { "epoch": 0.9622565736184, "grad_norm": 0.5627001055793047, "learning_rate": 0.00011094792458554455, "loss": 12.09, "step": 17671 }, { "epoch": 0.962311027614983, "grad_norm": 0.5470272643302989, "learning_rate": 0.00011093915929065378, "loss": 12.0246, "step": 17672 }, { "epoch": 0.962365481611566, "grad_norm": 0.5584494205081291, "learning_rate": 0.00011093039391069823, "loss": 12.1595, "step": 17673 }, { "epoch": 0.9624199356081491, "grad_norm": 0.5521649001758041, "learning_rate": 0.00011092162844574616, "loss": 12.0636, "step": 17674 }, { "epoch": 0.9624743896047321, "grad_norm": 0.5869416157238587, "learning_rate": 0.00011091286289586564, "loss": 12.2035, "step": 17675 }, { "epoch": 0.9625288436013151, "grad_norm": 0.6040009460125166, "learning_rate": 0.00011090409726112487, "loss": 12.246, "step": 17676 }, { "epoch": 0.9625832975978981, "grad_norm": 0.525820810259676, "learning_rate": 0.00011089533154159202, "loss": 12.1115, "step": 17677 }, { "epoch": 0.9626377515944811, "grad_norm": 0.550318637173561, "learning_rate": 0.00011088656573733524, "loss": 12.0287, "step": 17678 }, { "epoch": 0.962692205591064, "grad_norm": 0.5773091445174735, "learning_rate": 0.00011087779984842273, "loss": 12.0927, "step": 17679 }, { "epoch": 0.9627466595876472, "grad_norm": 0.5159794268401052, "learning_rate": 0.00011086903387492257, "loss": 12.1421, "step": 17680 }, { "epoch": 0.9628011135842302, "grad_norm": 0.5845435845603085, "learning_rate": 0.00011086026781690299, "loss": 12.1661, "step": 17681 }, { "epoch": 0.9628555675808131, "grad_norm": 0.570819557846207, "learning_rate": 0.00011085150167443217, "loss": 12.1378, "step": 17682 }, { "epoch": 0.9629100215773961, "grad_norm": 0.5376903021291405, "learning_rate": 0.00011084273544757826, "loss": 12.0382, "step": 17683 }, { "epoch": 0.9629644755739791, "grad_norm": 0.6044262125617864, "learning_rate": 0.00011083396913640942, "loss": 12.2581, "step": 17684 }, { "epoch": 0.9630189295705621, "grad_norm": 0.5417333473630804, "learning_rate": 0.00011082520274099382, "loss": 12.1245, "step": 17685 }, { "epoch": 0.9630733835671452, "grad_norm": 0.5730865039814494, "learning_rate": 0.00011081643626139957, "loss": 12.2003, "step": 17686 }, { "epoch": 0.9631278375637282, "grad_norm": 0.5888580206213043, "learning_rate": 0.00011080766969769493, "loss": 12.1222, "step": 17687 }, { "epoch": 0.9631822915603112, "grad_norm": 0.5706176479690456, "learning_rate": 0.00011079890304994807, "loss": 12.0993, "step": 17688 }, { "epoch": 0.9632367455568942, "grad_norm": 0.6091264510310652, "learning_rate": 0.0001107901363182271, "loss": 12.1513, "step": 17689 }, { "epoch": 0.9632911995534772, "grad_norm": 0.5900429395195937, "learning_rate": 0.00011078136950260025, "loss": 12.1448, "step": 17690 }, { "epoch": 0.9633456535500602, "grad_norm": 0.5261933964563386, "learning_rate": 0.00011077260260313565, "loss": 12.1517, "step": 17691 }, { "epoch": 0.9634001075466433, "grad_norm": 0.5556032226876118, "learning_rate": 0.00011076383561990145, "loss": 12.1654, "step": 17692 }, { "epoch": 0.9634545615432263, "grad_norm": 0.6023480256814611, "learning_rate": 0.0001107550685529659, "loss": 12.1451, "step": 17693 }, { "epoch": 0.9635090155398093, "grad_norm": 0.5415615996758417, "learning_rate": 0.0001107463014023971, "loss": 12.1361, "step": 17694 }, { "epoch": 0.9635634695363923, "grad_norm": 0.5984563325807083, "learning_rate": 0.00011073753416826331, "loss": 12.2119, "step": 17695 }, { "epoch": 0.9636179235329753, "grad_norm": 0.522196686225686, "learning_rate": 0.00011072876685063262, "loss": 12.1152, "step": 17696 }, { "epoch": 0.9636723775295583, "grad_norm": 0.5544319855034716, "learning_rate": 0.00011071999944957321, "loss": 12.0433, "step": 17697 }, { "epoch": 0.9637268315261414, "grad_norm": 0.6373008070139872, "learning_rate": 0.00011071123196515332, "loss": 12.2079, "step": 17698 }, { "epoch": 0.9637812855227244, "grad_norm": 0.5717749510728661, "learning_rate": 0.0001107024643974411, "loss": 12.0661, "step": 17699 }, { "epoch": 0.9638357395193073, "grad_norm": 0.5143182302457973, "learning_rate": 0.00011069369674650474, "loss": 12.0557, "step": 17700 }, { "epoch": 0.9638901935158903, "grad_norm": 0.6181546577870921, "learning_rate": 0.00011068492901241237, "loss": 12.3, "step": 17701 }, { "epoch": 0.9639446475124733, "grad_norm": 0.5729675225960451, "learning_rate": 0.0001106761611952322, "loss": 12.2037, "step": 17702 }, { "epoch": 0.9639991015090564, "grad_norm": 0.6016097035970154, "learning_rate": 0.0001106673932950324, "loss": 12.2443, "step": 17703 }, { "epoch": 0.9640535555056394, "grad_norm": 0.5869604428655072, "learning_rate": 0.00011065862531188116, "loss": 12.1124, "step": 17704 }, { "epoch": 0.9641080095022224, "grad_norm": 0.4965795860117334, "learning_rate": 0.00011064985724584671, "loss": 12.0727, "step": 17705 }, { "epoch": 0.9641624634988054, "grad_norm": 0.6585504075663329, "learning_rate": 0.00011064108909699715, "loss": 12.2583, "step": 17706 }, { "epoch": 0.9642169174953884, "grad_norm": 0.5649950048506434, "learning_rate": 0.00011063232086540069, "loss": 11.9266, "step": 17707 }, { "epoch": 0.9642713714919714, "grad_norm": 0.6744596465749182, "learning_rate": 0.00011062355255112552, "loss": 12.1738, "step": 17708 }, { "epoch": 0.9643258254885545, "grad_norm": 0.5533997239712947, "learning_rate": 0.00011061478415423983, "loss": 12.1556, "step": 17709 }, { "epoch": 0.9643802794851375, "grad_norm": 0.5526966187371531, "learning_rate": 0.00011060601567481181, "loss": 12.1112, "step": 17710 }, { "epoch": 0.9644347334817205, "grad_norm": 0.5610632617639335, "learning_rate": 0.00011059724711290961, "loss": 12.0933, "step": 17711 }, { "epoch": 0.9644891874783035, "grad_norm": 0.6103466403065875, "learning_rate": 0.00011058847846860147, "loss": 12.2399, "step": 17712 }, { "epoch": 0.9645436414748865, "grad_norm": 0.6296615986685131, "learning_rate": 0.00011057970974195553, "loss": 12.0778, "step": 17713 }, { "epoch": 0.9645980954714695, "grad_norm": 0.5589011964809298, "learning_rate": 0.00011057094093303997, "loss": 12.2017, "step": 17714 }, { "epoch": 0.9646525494680526, "grad_norm": 0.5706032467020418, "learning_rate": 0.00011056217204192306, "loss": 12.1385, "step": 17715 }, { "epoch": 0.9647070034646356, "grad_norm": 0.5380067185680935, "learning_rate": 0.00011055340306867288, "loss": 12.1828, "step": 17716 }, { "epoch": 0.9647614574612186, "grad_norm": 0.5678778439882624, "learning_rate": 0.00011054463401335769, "loss": 12.009, "step": 17717 }, { "epoch": 0.9648159114578015, "grad_norm": 0.5380345678007299, "learning_rate": 0.00011053586487604563, "loss": 12.0443, "step": 17718 }, { "epoch": 0.9648703654543845, "grad_norm": 0.5474811307620698, "learning_rate": 0.00011052709565680493, "loss": 12.1248, "step": 17719 }, { "epoch": 0.9649248194509675, "grad_norm": 0.5442985197543375, "learning_rate": 0.00011051832635570379, "loss": 12.0413, "step": 17720 }, { "epoch": 0.9649792734475506, "grad_norm": 0.5285841645711664, "learning_rate": 0.00011050955697281036, "loss": 12.0744, "step": 17721 }, { "epoch": 0.9650337274441336, "grad_norm": 0.5890287986663996, "learning_rate": 0.00011050078750819284, "loss": 12.0981, "step": 17722 }, { "epoch": 0.9650881814407166, "grad_norm": 0.5926662553098975, "learning_rate": 0.00011049201796191945, "loss": 12.135, "step": 17723 }, { "epoch": 0.9651426354372996, "grad_norm": 0.556356239472455, "learning_rate": 0.00011048324833405839, "loss": 12.0762, "step": 17724 }, { "epoch": 0.9651970894338826, "grad_norm": 0.5723507110626933, "learning_rate": 0.00011047447862467781, "loss": 12.095, "step": 17725 }, { "epoch": 0.9652515434304656, "grad_norm": 0.6284135079318599, "learning_rate": 0.00011046570883384593, "loss": 12.2335, "step": 17726 }, { "epoch": 0.9653059974270487, "grad_norm": 0.5506230573164945, "learning_rate": 0.00011045693896163094, "loss": 12.1929, "step": 17727 }, { "epoch": 0.9653604514236317, "grad_norm": 0.5633949960312391, "learning_rate": 0.00011044816900810105, "loss": 12.2633, "step": 17728 }, { "epoch": 0.9654149054202147, "grad_norm": 0.5426691026278101, "learning_rate": 0.00011043939897332442, "loss": 12.0683, "step": 17729 }, { "epoch": 0.9654693594167977, "grad_norm": 0.556582190461888, "learning_rate": 0.0001104306288573693, "loss": 12.0986, "step": 17730 }, { "epoch": 0.9655238134133807, "grad_norm": 0.5444972117144539, "learning_rate": 0.00011042185866030386, "loss": 12.2007, "step": 17731 }, { "epoch": 0.9655782674099638, "grad_norm": 0.6540297175499002, "learning_rate": 0.00011041308838219628, "loss": 12.009, "step": 17732 }, { "epoch": 0.9656327214065468, "grad_norm": 0.5411306170642678, "learning_rate": 0.0001104043180231148, "loss": 12.095, "step": 17733 }, { "epoch": 0.9656871754031298, "grad_norm": 0.5715791532516885, "learning_rate": 0.00011039554758312758, "loss": 12.2714, "step": 17734 }, { "epoch": 0.9657416293997128, "grad_norm": 0.48470411648455586, "learning_rate": 0.00011038677706230285, "loss": 11.9713, "step": 17735 }, { "epoch": 0.9657960833962957, "grad_norm": 0.5726443792608469, "learning_rate": 0.00011037800646070879, "loss": 12.2113, "step": 17736 }, { "epoch": 0.9658505373928787, "grad_norm": 0.5876872602975912, "learning_rate": 0.00011036923577841363, "loss": 12.1221, "step": 17737 }, { "epoch": 0.9659049913894618, "grad_norm": 0.6080695032280878, "learning_rate": 0.00011036046501548554, "loss": 12.1666, "step": 17738 }, { "epoch": 0.9659594453860448, "grad_norm": 0.5340274869133574, "learning_rate": 0.00011035169417199274, "loss": 12.2262, "step": 17739 }, { "epoch": 0.9660138993826278, "grad_norm": 0.5118138027530789, "learning_rate": 0.00011034292324800342, "loss": 12.0464, "step": 17740 }, { "epoch": 0.9660683533792108, "grad_norm": 0.557631660530052, "learning_rate": 0.00011033415224358581, "loss": 12.0623, "step": 17741 }, { "epoch": 0.9661228073757938, "grad_norm": 0.5622665514054274, "learning_rate": 0.00011032538115880809, "loss": 12.1165, "step": 17742 }, { "epoch": 0.9661772613723768, "grad_norm": 0.6304248670208875, "learning_rate": 0.00011031660999373847, "loss": 12.2801, "step": 17743 }, { "epoch": 0.9662317153689599, "grad_norm": 0.5487926451839776, "learning_rate": 0.00011030783874844517, "loss": 12.2362, "step": 17744 }, { "epoch": 0.9662861693655429, "grad_norm": 0.5422608903886796, "learning_rate": 0.00011029906742299641, "loss": 12.1801, "step": 17745 }, { "epoch": 0.9663406233621259, "grad_norm": 0.5263104983244903, "learning_rate": 0.00011029029601746033, "loss": 12.0961, "step": 17746 }, { "epoch": 0.9663950773587089, "grad_norm": 0.5613791572161544, "learning_rate": 0.00011028152453190518, "loss": 12.1236, "step": 17747 }, { "epoch": 0.9664495313552919, "grad_norm": 0.5716835210741285, "learning_rate": 0.00011027275296639921, "loss": 12.1599, "step": 17748 }, { "epoch": 0.9665039853518749, "grad_norm": 0.4813681714826752, "learning_rate": 0.00011026398132101057, "loss": 12.0847, "step": 17749 }, { "epoch": 0.966558439348458, "grad_norm": 0.5561675500158483, "learning_rate": 0.0001102552095958075, "loss": 12.069, "step": 17750 }, { "epoch": 0.966612893345041, "grad_norm": 0.5888608590341523, "learning_rate": 0.00011024643779085819, "loss": 12.1539, "step": 17751 }, { "epoch": 0.966667347341624, "grad_norm": 0.5710489765909422, "learning_rate": 0.00011023766590623085, "loss": 12.1051, "step": 17752 }, { "epoch": 0.966721801338207, "grad_norm": 0.5384051393906014, "learning_rate": 0.00011022889394199371, "loss": 12.1687, "step": 17753 }, { "epoch": 0.96677625533479, "grad_norm": 0.6134717562884549, "learning_rate": 0.000110220121898215, "loss": 12.2179, "step": 17754 }, { "epoch": 0.9668307093313729, "grad_norm": 0.6099856991129531, "learning_rate": 0.0001102113497749629, "loss": 12.1484, "step": 17755 }, { "epoch": 0.966885163327956, "grad_norm": 0.5933025472236174, "learning_rate": 0.00011020257757230563, "loss": 12.0995, "step": 17756 }, { "epoch": 0.966939617324539, "grad_norm": 0.6946683299584384, "learning_rate": 0.00011019380529031138, "loss": 12.1113, "step": 17757 }, { "epoch": 0.966994071321122, "grad_norm": 0.5606703202864911, "learning_rate": 0.00011018503292904841, "loss": 12.0408, "step": 17758 }, { "epoch": 0.967048525317705, "grad_norm": 0.5298869291861762, "learning_rate": 0.00011017626048858491, "loss": 12.2027, "step": 17759 }, { "epoch": 0.967102979314288, "grad_norm": 0.5606826260571348, "learning_rate": 0.00011016748796898913, "loss": 12.0425, "step": 17760 }, { "epoch": 0.967157433310871, "grad_norm": 0.5834963687793829, "learning_rate": 0.00011015871537032923, "loss": 12.1075, "step": 17761 }, { "epoch": 0.9672118873074541, "grad_norm": 0.5592877249862209, "learning_rate": 0.00011014994269267347, "loss": 12.0068, "step": 17762 }, { "epoch": 0.9672663413040371, "grad_norm": 0.5764920098079752, "learning_rate": 0.00011014116993609001, "loss": 12.1481, "step": 17763 }, { "epoch": 0.9673207953006201, "grad_norm": 0.5605968035673906, "learning_rate": 0.00011013239710064716, "loss": 12.1694, "step": 17764 }, { "epoch": 0.9673752492972031, "grad_norm": 0.5566859412922541, "learning_rate": 0.00011012362418641309, "loss": 12.2004, "step": 17765 }, { "epoch": 0.9674297032937861, "grad_norm": 0.5714501816840976, "learning_rate": 0.00011011485119345602, "loss": 12.1422, "step": 17766 }, { "epoch": 0.9674841572903692, "grad_norm": 0.5339414451837563, "learning_rate": 0.00011010607812184415, "loss": 12.0454, "step": 17767 }, { "epoch": 0.9675386112869522, "grad_norm": 0.5231871867158163, "learning_rate": 0.00011009730497164572, "loss": 12.0991, "step": 17768 }, { "epoch": 0.9675930652835352, "grad_norm": 0.5128397437189953, "learning_rate": 0.00011008853174292895, "loss": 12.0688, "step": 17769 }, { "epoch": 0.9676475192801182, "grad_norm": 0.5893411496827528, "learning_rate": 0.0001100797584357621, "loss": 12.1022, "step": 17770 }, { "epoch": 0.9677019732767012, "grad_norm": 0.541464055289501, "learning_rate": 0.00011007098505021334, "loss": 12.0411, "step": 17771 }, { "epoch": 0.9677564272732841, "grad_norm": 0.5888980133927575, "learning_rate": 0.0001100622115863509, "loss": 12.1288, "step": 17772 }, { "epoch": 0.9678108812698673, "grad_norm": 0.6788846626525558, "learning_rate": 0.00011005343804424302, "loss": 12.1538, "step": 17773 }, { "epoch": 0.9678653352664502, "grad_norm": 0.5944965815213048, "learning_rate": 0.00011004466442395792, "loss": 12.0351, "step": 17774 }, { "epoch": 0.9679197892630332, "grad_norm": 0.5203669199642835, "learning_rate": 0.00011003589072556384, "loss": 12.1308, "step": 17775 }, { "epoch": 0.9679742432596162, "grad_norm": 0.5225541564802234, "learning_rate": 0.00011002711694912898, "loss": 12.1828, "step": 17776 }, { "epoch": 0.9680286972561992, "grad_norm": 0.5809015328095049, "learning_rate": 0.00011001834309472157, "loss": 12.094, "step": 17777 }, { "epoch": 0.9680831512527822, "grad_norm": 0.5858089071552731, "learning_rate": 0.00011000956916240985, "loss": 12.1914, "step": 17778 }, { "epoch": 0.9681376052493653, "grad_norm": 0.5900559454762778, "learning_rate": 0.00011000079515226204, "loss": 12.1834, "step": 17779 }, { "epoch": 0.9681920592459483, "grad_norm": 0.5748059397943471, "learning_rate": 0.00010999202106434637, "loss": 12.1808, "step": 17780 }, { "epoch": 0.9682465132425313, "grad_norm": 0.5127534384417431, "learning_rate": 0.00010998324689873107, "loss": 11.8834, "step": 17781 }, { "epoch": 0.9683009672391143, "grad_norm": 0.5320361643919194, "learning_rate": 0.00010997447265548437, "loss": 12.0902, "step": 17782 }, { "epoch": 0.9683554212356973, "grad_norm": 0.5377705507502049, "learning_rate": 0.00010996569833467449, "loss": 12.1417, "step": 17783 }, { "epoch": 0.9684098752322803, "grad_norm": 0.5265742666871742, "learning_rate": 0.00010995692393636968, "loss": 12.1022, "step": 17784 }, { "epoch": 0.9684643292288634, "grad_norm": 0.5190329800999794, "learning_rate": 0.00010994814946063816, "loss": 11.987, "step": 17785 }, { "epoch": 0.9685187832254464, "grad_norm": 0.5031755079473749, "learning_rate": 0.00010993937490754815, "loss": 12.1866, "step": 17786 }, { "epoch": 0.9685732372220294, "grad_norm": 0.5982588842106145, "learning_rate": 0.00010993060027716791, "loss": 12.0101, "step": 17787 }, { "epoch": 0.9686276912186124, "grad_norm": 0.550467381176369, "learning_rate": 0.00010992182556956562, "loss": 12.1632, "step": 17788 }, { "epoch": 0.9686821452151954, "grad_norm": 0.6039951623824136, "learning_rate": 0.00010991305078480957, "loss": 12.1521, "step": 17789 }, { "epoch": 0.9687365992117783, "grad_norm": 0.5398755963835501, "learning_rate": 0.000109904275922968, "loss": 12.1698, "step": 17790 }, { "epoch": 0.9687910532083615, "grad_norm": 0.5309708003642307, "learning_rate": 0.0001098955009841091, "loss": 12.0762, "step": 17791 }, { "epoch": 0.9688455072049444, "grad_norm": 0.5860085116735837, "learning_rate": 0.00010988672596830112, "loss": 12.3387, "step": 17792 }, { "epoch": 0.9688999612015274, "grad_norm": 0.5844423560245651, "learning_rate": 0.00010987795087561232, "loss": 12.1434, "step": 17793 }, { "epoch": 0.9689544151981104, "grad_norm": 0.6662312473550055, "learning_rate": 0.0001098691757061109, "loss": 12.0429, "step": 17794 }, { "epoch": 0.9690088691946934, "grad_norm": 0.5643006113425973, "learning_rate": 0.00010986040045986512, "loss": 12.1875, "step": 17795 }, { "epoch": 0.9690633231912764, "grad_norm": 0.5834323785455291, "learning_rate": 0.0001098516251369432, "loss": 12.1732, "step": 17796 }, { "epoch": 0.9691177771878595, "grad_norm": 0.5963028530585607, "learning_rate": 0.0001098428497374134, "loss": 12.0946, "step": 17797 }, { "epoch": 0.9691722311844425, "grad_norm": 0.6125634195562416, "learning_rate": 0.00010983407426134396, "loss": 12.1698, "step": 17798 }, { "epoch": 0.9692266851810255, "grad_norm": 0.615414593265336, "learning_rate": 0.0001098252987088031, "loss": 12.107, "step": 17799 }, { "epoch": 0.9692811391776085, "grad_norm": 0.533551237794284, "learning_rate": 0.0001098165230798591, "loss": 12.0986, "step": 17800 }, { "epoch": 0.9693355931741915, "grad_norm": 0.6055228077578817, "learning_rate": 0.00010980774737458011, "loss": 12.2149, "step": 17801 }, { "epoch": 0.9693900471707746, "grad_norm": 0.571576525970899, "learning_rate": 0.00010979897159303447, "loss": 12.1945, "step": 17802 }, { "epoch": 0.9694445011673576, "grad_norm": 0.6362828666064082, "learning_rate": 0.00010979019573529037, "loss": 12.0863, "step": 17803 }, { "epoch": 0.9694989551639406, "grad_norm": 0.5302447488075314, "learning_rate": 0.00010978141980141608, "loss": 12.0461, "step": 17804 }, { "epoch": 0.9695534091605236, "grad_norm": 0.4924079645027928, "learning_rate": 0.00010977264379147985, "loss": 12.0715, "step": 17805 }, { "epoch": 0.9696078631571066, "grad_norm": 0.5327589830801305, "learning_rate": 0.00010976386770554983, "loss": 12.1169, "step": 17806 }, { "epoch": 0.9696623171536896, "grad_norm": 0.5716778845807983, "learning_rate": 0.00010975509154369439, "loss": 12.0048, "step": 17807 }, { "epoch": 0.9697167711502727, "grad_norm": 0.5491710740186433, "learning_rate": 0.00010974631530598171, "loss": 12.0912, "step": 17808 }, { "epoch": 0.9697712251468557, "grad_norm": 0.5898653267786542, "learning_rate": 0.00010973753899248005, "loss": 12.2711, "step": 17809 }, { "epoch": 0.9698256791434386, "grad_norm": 0.6055769095504616, "learning_rate": 0.00010972876260325769, "loss": 12.2608, "step": 17810 }, { "epoch": 0.9698801331400216, "grad_norm": 0.6172966019298604, "learning_rate": 0.0001097199861383828, "loss": 12.1219, "step": 17811 }, { "epoch": 0.9699345871366046, "grad_norm": 0.5145952516661499, "learning_rate": 0.00010971120959792365, "loss": 12.1383, "step": 17812 }, { "epoch": 0.9699890411331876, "grad_norm": 0.5068824144878783, "learning_rate": 0.00010970243298194853, "loss": 12.098, "step": 17813 }, { "epoch": 0.9700434951297707, "grad_norm": 0.5364138632936271, "learning_rate": 0.00010969365629052566, "loss": 12.0551, "step": 17814 }, { "epoch": 0.9700979491263537, "grad_norm": 0.5586029030784768, "learning_rate": 0.00010968487952372333, "loss": 12.1015, "step": 17815 }, { "epoch": 0.9701524031229367, "grad_norm": 0.5385237767372034, "learning_rate": 0.0001096761026816097, "loss": 12.119, "step": 17816 }, { "epoch": 0.9702068571195197, "grad_norm": 0.5140552714449329, "learning_rate": 0.00010966732576425309, "loss": 12.1271, "step": 17817 }, { "epoch": 0.9702613111161027, "grad_norm": 0.6726861153897186, "learning_rate": 0.00010965854877172172, "loss": 12.008, "step": 17818 }, { "epoch": 0.9703157651126857, "grad_norm": 0.519146769147053, "learning_rate": 0.00010964977170408387, "loss": 12.1725, "step": 17819 }, { "epoch": 0.9703702191092688, "grad_norm": 0.53818594048876, "learning_rate": 0.00010964099456140781, "loss": 12.1311, "step": 17820 }, { "epoch": 0.9704246731058518, "grad_norm": 0.5203950796474744, "learning_rate": 0.00010963221734376172, "loss": 12.0239, "step": 17821 }, { "epoch": 0.9704791271024348, "grad_norm": 0.5622449476707068, "learning_rate": 0.0001096234400512139, "loss": 12.2025, "step": 17822 }, { "epoch": 0.9705335810990178, "grad_norm": 0.5555495555370853, "learning_rate": 0.00010961466268383258, "loss": 12.0297, "step": 17823 }, { "epoch": 0.9705880350956008, "grad_norm": 0.5917465536259531, "learning_rate": 0.00010960588524168604, "loss": 12.1552, "step": 17824 }, { "epoch": 0.9706424890921838, "grad_norm": 0.6715298222792764, "learning_rate": 0.00010959710772484256, "loss": 12.1018, "step": 17825 }, { "epoch": 0.9706969430887669, "grad_norm": 0.5380566954643132, "learning_rate": 0.00010958833013337033, "loss": 11.9396, "step": 17826 }, { "epoch": 0.9707513970853499, "grad_norm": 0.5373949699354345, "learning_rate": 0.00010957955246733764, "loss": 12.1633, "step": 17827 }, { "epoch": 0.9708058510819328, "grad_norm": 0.5680912823299027, "learning_rate": 0.00010957077472681274, "loss": 12.1199, "step": 17828 }, { "epoch": 0.9708603050785158, "grad_norm": 0.5263261907686924, "learning_rate": 0.00010956199691186388, "loss": 12.1293, "step": 17829 }, { "epoch": 0.9709147590750988, "grad_norm": 0.581323079354999, "learning_rate": 0.00010955321902255935, "loss": 12.2449, "step": 17830 }, { "epoch": 0.9709692130716818, "grad_norm": 0.5268762044732026, "learning_rate": 0.00010954444105896739, "loss": 12.1004, "step": 17831 }, { "epoch": 0.9710236670682649, "grad_norm": 0.5384281904412787, "learning_rate": 0.00010953566302115625, "loss": 12.1106, "step": 17832 }, { "epoch": 0.9710781210648479, "grad_norm": 0.5466667623989441, "learning_rate": 0.00010952688490919419, "loss": 12.1187, "step": 17833 }, { "epoch": 0.9711325750614309, "grad_norm": 0.7741320210031013, "learning_rate": 0.00010951810672314946, "loss": 12.0845, "step": 17834 }, { "epoch": 0.9711870290580139, "grad_norm": 0.5379145355908516, "learning_rate": 0.00010950932846309034, "loss": 12.1771, "step": 17835 }, { "epoch": 0.9712414830545969, "grad_norm": 0.5433365394397129, "learning_rate": 0.00010950055012908513, "loss": 12.1766, "step": 17836 }, { "epoch": 0.97129593705118, "grad_norm": 0.6430230755614863, "learning_rate": 0.00010949177172120202, "loss": 12.0224, "step": 17837 }, { "epoch": 0.971350391047763, "grad_norm": 0.5650864017654597, "learning_rate": 0.00010948299323950928, "loss": 11.9911, "step": 17838 }, { "epoch": 0.971404845044346, "grad_norm": 0.5924520159138199, "learning_rate": 0.00010947421468407522, "loss": 12.0785, "step": 17839 }, { "epoch": 0.971459299040929, "grad_norm": 0.5629261714071007, "learning_rate": 0.00010946543605496806, "loss": 12.0858, "step": 17840 }, { "epoch": 0.971513753037512, "grad_norm": 0.6190367299550407, "learning_rate": 0.0001094566573522561, "loss": 12.1683, "step": 17841 }, { "epoch": 0.971568207034095, "grad_norm": 0.5187158542715369, "learning_rate": 0.00010944787857600758, "loss": 12.137, "step": 17842 }, { "epoch": 0.9716226610306781, "grad_norm": 0.5514563108516193, "learning_rate": 0.00010943909972629078, "loss": 11.932, "step": 17843 }, { "epoch": 0.9716771150272611, "grad_norm": 0.5467067275674893, "learning_rate": 0.00010943032080317394, "loss": 12.0348, "step": 17844 }, { "epoch": 0.971731569023844, "grad_norm": 0.9845368637661265, "learning_rate": 0.00010942154180672535, "loss": 12.1329, "step": 17845 }, { "epoch": 0.971786023020427, "grad_norm": 0.5554885818409444, "learning_rate": 0.00010941276273701328, "loss": 12.0405, "step": 17846 }, { "epoch": 0.97184047701701, "grad_norm": 0.57104996085094, "learning_rate": 0.00010940398359410598, "loss": 12.111, "step": 17847 }, { "epoch": 0.971894931013593, "grad_norm": 0.5571519314162232, "learning_rate": 0.00010939520437807174, "loss": 12.133, "step": 17848 }, { "epoch": 0.9719493850101761, "grad_norm": 0.5739596263599989, "learning_rate": 0.0001093864250889788, "loss": 12.0966, "step": 17849 }, { "epoch": 0.9720038390067591, "grad_norm": 0.5896902198048481, "learning_rate": 0.00010937764572689544, "loss": 12.0106, "step": 17850 }, { "epoch": 0.9720582930033421, "grad_norm": 0.5185716274990738, "learning_rate": 0.00010936886629188993, "loss": 12.0609, "step": 17851 }, { "epoch": 0.9721127469999251, "grad_norm": 0.5247556641643809, "learning_rate": 0.00010936008678403057, "loss": 12.214, "step": 17852 }, { "epoch": 0.9721672009965081, "grad_norm": 0.5813388227502093, "learning_rate": 0.0001093513072033856, "loss": 12.1157, "step": 17853 }, { "epoch": 0.9722216549930911, "grad_norm": 0.6682536334487343, "learning_rate": 0.00010934252755002328, "loss": 12.2233, "step": 17854 }, { "epoch": 0.9722761089896742, "grad_norm": 0.8240527314908686, "learning_rate": 0.00010933374782401191, "loss": 12.259, "step": 17855 }, { "epoch": 0.9723305629862572, "grad_norm": 0.5808292742415025, "learning_rate": 0.00010932496802541976, "loss": 12.0409, "step": 17856 }, { "epoch": 0.9723850169828402, "grad_norm": 0.6116237421436368, "learning_rate": 0.00010931618815431508, "loss": 12.0902, "step": 17857 }, { "epoch": 0.9724394709794232, "grad_norm": 0.5712556415067888, "learning_rate": 0.00010930740821076618, "loss": 12.1183, "step": 17858 }, { "epoch": 0.9724939249760062, "grad_norm": 0.5495703744822813, "learning_rate": 0.00010929862819484129, "loss": 12.2186, "step": 17859 }, { "epoch": 0.9725483789725892, "grad_norm": 0.53985474865941, "learning_rate": 0.0001092898481066087, "loss": 12.0988, "step": 17860 }, { "epoch": 0.9726028329691723, "grad_norm": 0.567346809786572, "learning_rate": 0.0001092810679461367, "loss": 11.9864, "step": 17861 }, { "epoch": 0.9726572869657553, "grad_norm": 0.5352723508895281, "learning_rate": 0.00010927228771349358, "loss": 12.0967, "step": 17862 }, { "epoch": 0.9727117409623383, "grad_norm": 0.5864110588587573, "learning_rate": 0.00010926350740874757, "loss": 12.35, "step": 17863 }, { "epoch": 0.9727661949589212, "grad_norm": 0.5903631071486342, "learning_rate": 0.000109254727031967, "loss": 12.145, "step": 17864 }, { "epoch": 0.9728206489555042, "grad_norm": 0.6475830505747637, "learning_rate": 0.0001092459465832201, "loss": 12.252, "step": 17865 }, { "epoch": 0.9728751029520873, "grad_norm": 0.5593561081555694, "learning_rate": 0.00010923716606257517, "loss": 12.1333, "step": 17866 }, { "epoch": 0.9729295569486703, "grad_norm": 0.6374503415738403, "learning_rate": 0.0001092283854701005, "loss": 12.0877, "step": 17867 }, { "epoch": 0.9729840109452533, "grad_norm": 0.5556889201637897, "learning_rate": 0.00010921960480586435, "loss": 12.1325, "step": 17868 }, { "epoch": 0.9730384649418363, "grad_norm": 0.6601778675459046, "learning_rate": 0.00010921082406993502, "loss": 12.1835, "step": 17869 }, { "epoch": 0.9730929189384193, "grad_norm": 0.6017919349112363, "learning_rate": 0.00010920204326238075, "loss": 12.1647, "step": 17870 }, { "epoch": 0.9731473729350023, "grad_norm": 0.5306760234908892, "learning_rate": 0.00010919326238326988, "loss": 12.0787, "step": 17871 }, { "epoch": 0.9732018269315854, "grad_norm": 0.5448361215848412, "learning_rate": 0.00010918448143267062, "loss": 12.1605, "step": 17872 }, { "epoch": 0.9732562809281684, "grad_norm": 0.5346807218112846, "learning_rate": 0.00010917570041065132, "loss": 12.1521, "step": 17873 }, { "epoch": 0.9733107349247514, "grad_norm": 0.5584559000184194, "learning_rate": 0.00010916691931728022, "loss": 12.1005, "step": 17874 }, { "epoch": 0.9733651889213344, "grad_norm": 0.5442613405654735, "learning_rate": 0.00010915813815262564, "loss": 12.1057, "step": 17875 }, { "epoch": 0.9734196429179174, "grad_norm": 0.5360045078582204, "learning_rate": 0.00010914935691675586, "loss": 12.136, "step": 17876 }, { "epoch": 0.9734740969145004, "grad_norm": 0.5755332612033531, "learning_rate": 0.00010914057560973909, "loss": 12.297, "step": 17877 }, { "epoch": 0.9735285509110835, "grad_norm": 0.5595959670494073, "learning_rate": 0.00010913179423164368, "loss": 12.2594, "step": 17878 }, { "epoch": 0.9735830049076665, "grad_norm": 0.47179394787793116, "learning_rate": 0.00010912301278253793, "loss": 12.0613, "step": 17879 }, { "epoch": 0.9736374589042495, "grad_norm": 0.6288161960743753, "learning_rate": 0.0001091142312624901, "loss": 12.1996, "step": 17880 }, { "epoch": 0.9736919129008325, "grad_norm": 0.5473793202384529, "learning_rate": 0.00010910544967156849, "loss": 12.1201, "step": 17881 }, { "epoch": 0.9737463668974154, "grad_norm": 0.5471473747021309, "learning_rate": 0.00010909666800984136, "loss": 12.1405, "step": 17882 }, { "epoch": 0.9738008208939984, "grad_norm": 0.5209045787765305, "learning_rate": 0.000109087886277377, "loss": 12.0672, "step": 17883 }, { "epoch": 0.9738552748905815, "grad_norm": 0.5390163506174233, "learning_rate": 0.00010907910447424373, "loss": 12.0384, "step": 17884 }, { "epoch": 0.9739097288871645, "grad_norm": 0.5157419432595162, "learning_rate": 0.00010907032260050982, "loss": 12.0893, "step": 17885 }, { "epoch": 0.9739641828837475, "grad_norm": 0.56821905247156, "learning_rate": 0.00010906154065624356, "loss": 12.1278, "step": 17886 }, { "epoch": 0.9740186368803305, "grad_norm": 0.5938279214721289, "learning_rate": 0.00010905275864151326, "loss": 12.1691, "step": 17887 }, { "epoch": 0.9740730908769135, "grad_norm": 0.599030570834302, "learning_rate": 0.00010904397655638717, "loss": 12.1988, "step": 17888 }, { "epoch": 0.9741275448734965, "grad_norm": 0.49742525863148496, "learning_rate": 0.00010903519440093357, "loss": 12.0655, "step": 17889 }, { "epoch": 0.9741819988700796, "grad_norm": 0.5680825248237323, "learning_rate": 0.00010902641217522083, "loss": 12.1229, "step": 17890 }, { "epoch": 0.9742364528666626, "grad_norm": 0.5945795928096985, "learning_rate": 0.00010901762987931718, "loss": 12.1609, "step": 17891 }, { "epoch": 0.9742909068632456, "grad_norm": 0.5741389455881379, "learning_rate": 0.00010900884751329095, "loss": 12.1743, "step": 17892 }, { "epoch": 0.9743453608598286, "grad_norm": 0.6398993803084989, "learning_rate": 0.00010900006507721036, "loss": 12.2676, "step": 17893 }, { "epoch": 0.9743998148564116, "grad_norm": 0.6475032989074506, "learning_rate": 0.00010899128257114377, "loss": 12.2419, "step": 17894 }, { "epoch": 0.9744542688529946, "grad_norm": 0.5941851830064357, "learning_rate": 0.00010898249999515949, "loss": 12.2465, "step": 17895 }, { "epoch": 0.9745087228495777, "grad_norm": 0.5181394519052931, "learning_rate": 0.00010897371734932578, "loss": 12.0279, "step": 17896 }, { "epoch": 0.9745631768461607, "grad_norm": 0.5359386713850717, "learning_rate": 0.00010896493463371092, "loss": 12.0701, "step": 17897 }, { "epoch": 0.9746176308427437, "grad_norm": 0.5777851464649929, "learning_rate": 0.00010895615184838324, "loss": 12.2015, "step": 17898 }, { "epoch": 0.9746720848393267, "grad_norm": 0.5159998961382526, "learning_rate": 0.00010894736899341101, "loss": 11.9164, "step": 17899 }, { "epoch": 0.9747265388359097, "grad_norm": 0.5260505807175065, "learning_rate": 0.00010893858606886251, "loss": 12.1511, "step": 17900 }, { "epoch": 0.9747809928324928, "grad_norm": 0.5636914625494629, "learning_rate": 0.00010892980307480612, "loss": 12.0698, "step": 17901 }, { "epoch": 0.9748354468290757, "grad_norm": 0.6476801604471305, "learning_rate": 0.00010892102001131006, "loss": 12.2996, "step": 17902 }, { "epoch": 0.9748899008256587, "grad_norm": 0.5250071769126085, "learning_rate": 0.00010891223687844266, "loss": 12.0254, "step": 17903 }, { "epoch": 0.9749443548222417, "grad_norm": 0.631637222132833, "learning_rate": 0.00010890345367627219, "loss": 12.2131, "step": 17904 }, { "epoch": 0.9749988088188247, "grad_norm": 0.6668428762840949, "learning_rate": 0.00010889467040486699, "loss": 12.0375, "step": 17905 }, { "epoch": 0.9750532628154077, "grad_norm": 0.550951569104823, "learning_rate": 0.00010888588706429532, "loss": 12.1446, "step": 17906 }, { "epoch": 0.9751077168119908, "grad_norm": 0.5678601252962668, "learning_rate": 0.00010887710365462554, "loss": 12.1614, "step": 17907 }, { "epoch": 0.9751621708085738, "grad_norm": 0.5540205006570619, "learning_rate": 0.00010886832017592588, "loss": 12.1959, "step": 17908 }, { "epoch": 0.9752166248051568, "grad_norm": 0.5464565421988227, "learning_rate": 0.00010885953662826467, "loss": 12.0261, "step": 17909 }, { "epoch": 0.9752710788017398, "grad_norm": 0.575540090418497, "learning_rate": 0.00010885075301171024, "loss": 12.0915, "step": 17910 }, { "epoch": 0.9753255327983228, "grad_norm": 0.6135515535481806, "learning_rate": 0.00010884196932633086, "loss": 12.1961, "step": 17911 }, { "epoch": 0.9753799867949058, "grad_norm": 0.6282773902043612, "learning_rate": 0.00010883318557219486, "loss": 12.0977, "step": 17912 }, { "epoch": 0.9754344407914889, "grad_norm": 0.5265484255082639, "learning_rate": 0.00010882440174937052, "loss": 12.0893, "step": 17913 }, { "epoch": 0.9754888947880719, "grad_norm": 0.5622450247447242, "learning_rate": 0.00010881561785792614, "loss": 12.1413, "step": 17914 }, { "epoch": 0.9755433487846549, "grad_norm": 0.6854546710007255, "learning_rate": 0.00010880683389793005, "loss": 12.108, "step": 17915 }, { "epoch": 0.9755978027812379, "grad_norm": 0.6047047159213987, "learning_rate": 0.00010879804986945053, "loss": 12.0257, "step": 17916 }, { "epoch": 0.9756522567778209, "grad_norm": 0.6069312642192195, "learning_rate": 0.0001087892657725559, "loss": 12.1501, "step": 17917 }, { "epoch": 0.9757067107744039, "grad_norm": 0.6288459884045596, "learning_rate": 0.00010878048160731447, "loss": 12.0399, "step": 17918 }, { "epoch": 0.975761164770987, "grad_norm": 0.548933130048583, "learning_rate": 0.00010877169737379454, "loss": 12.2037, "step": 17919 }, { "epoch": 0.97581561876757, "grad_norm": 0.5325861854351972, "learning_rate": 0.00010876291307206444, "loss": 12.1012, "step": 17920 }, { "epoch": 0.9758700727641529, "grad_norm": 0.5778575483593685, "learning_rate": 0.00010875412870219244, "loss": 12.1451, "step": 17921 }, { "epoch": 0.9759245267607359, "grad_norm": 0.7127994538232065, "learning_rate": 0.00010874534426424685, "loss": 12.2466, "step": 17922 }, { "epoch": 0.9759789807573189, "grad_norm": 0.6218779145716515, "learning_rate": 0.00010873655975829601, "loss": 12.0478, "step": 17923 }, { "epoch": 0.9760334347539019, "grad_norm": 0.6401297530367082, "learning_rate": 0.00010872777518440825, "loss": 12.1656, "step": 17924 }, { "epoch": 0.976087888750485, "grad_norm": 0.5689038896878923, "learning_rate": 0.00010871899054265179, "loss": 12.1548, "step": 17925 }, { "epoch": 0.976142342747068, "grad_norm": 0.6836915922090367, "learning_rate": 0.00010871020583309506, "loss": 12.1765, "step": 17926 }, { "epoch": 0.976196796743651, "grad_norm": 0.5045814336424027, "learning_rate": 0.00010870142105580626, "loss": 12.05, "step": 17927 }, { "epoch": 0.976251250740234, "grad_norm": 0.5209395539749949, "learning_rate": 0.00010869263621085374, "loss": 12.1797, "step": 17928 }, { "epoch": 0.976305704736817, "grad_norm": 0.6326383548968321, "learning_rate": 0.00010868385129830587, "loss": 12.1397, "step": 17929 }, { "epoch": 0.9763601587334, "grad_norm": 0.5497160922094432, "learning_rate": 0.0001086750663182309, "loss": 12.0658, "step": 17930 }, { "epoch": 0.9764146127299831, "grad_norm": 0.6505528718449232, "learning_rate": 0.00010866628127069716, "loss": 12.0582, "step": 17931 }, { "epoch": 0.9764690667265661, "grad_norm": 0.5561586077183442, "learning_rate": 0.00010865749615577295, "loss": 12.0352, "step": 17932 }, { "epoch": 0.9765235207231491, "grad_norm": 0.5374969245163094, "learning_rate": 0.0001086487109735266, "loss": 12.1648, "step": 17933 }, { "epoch": 0.9765779747197321, "grad_norm": 0.5859100683304503, "learning_rate": 0.00010863992572402642, "loss": 12.1309, "step": 17934 }, { "epoch": 0.9766324287163151, "grad_norm": 0.5627313103501189, "learning_rate": 0.00010863114040734075, "loss": 12.1343, "step": 17935 }, { "epoch": 0.9766868827128982, "grad_norm": 0.527947713424401, "learning_rate": 0.00010862235502353788, "loss": 11.9982, "step": 17936 }, { "epoch": 0.9767413367094812, "grad_norm": 0.5412487045573909, "learning_rate": 0.00010861356957268613, "loss": 12.1632, "step": 17937 }, { "epoch": 0.9767957907060641, "grad_norm": 0.5426660693000421, "learning_rate": 0.00010860478405485379, "loss": 12.1363, "step": 17938 }, { "epoch": 0.9768502447026471, "grad_norm": 0.5833797058817659, "learning_rate": 0.00010859599847010922, "loss": 12.0881, "step": 17939 }, { "epoch": 0.9769046986992301, "grad_norm": 0.6033501398873252, "learning_rate": 0.00010858721281852074, "loss": 12.2206, "step": 17940 }, { "epoch": 0.9769591526958131, "grad_norm": 0.5812379565297366, "learning_rate": 0.00010857842710015666, "loss": 12.1085, "step": 17941 }, { "epoch": 0.9770136066923962, "grad_norm": 0.575416761901028, "learning_rate": 0.00010856964131508526, "loss": 12.0669, "step": 17942 }, { "epoch": 0.9770680606889792, "grad_norm": 0.5377194087122438, "learning_rate": 0.0001085608554633749, "loss": 12.1132, "step": 17943 }, { "epoch": 0.9771225146855622, "grad_norm": 0.6368059974770356, "learning_rate": 0.00010855206954509391, "loss": 12.1264, "step": 17944 }, { "epoch": 0.9771769686821452, "grad_norm": 0.5726633456620404, "learning_rate": 0.00010854328356031059, "loss": 12.1398, "step": 17945 }, { "epoch": 0.9772314226787282, "grad_norm": 0.5546276963249545, "learning_rate": 0.00010853449750909328, "loss": 11.9444, "step": 17946 }, { "epoch": 0.9772858766753112, "grad_norm": 0.5278076544779617, "learning_rate": 0.00010852571139151027, "loss": 12.1496, "step": 17947 }, { "epoch": 0.9773403306718943, "grad_norm": 0.5866875037092192, "learning_rate": 0.00010851692520762989, "loss": 11.8878, "step": 17948 }, { "epoch": 0.9773947846684773, "grad_norm": 0.6536364797697218, "learning_rate": 0.00010850813895752043, "loss": 12.1626, "step": 17949 }, { "epoch": 0.9774492386650603, "grad_norm": 0.500705799452207, "learning_rate": 0.00010849935264125031, "loss": 12.1102, "step": 17950 }, { "epoch": 0.9775036926616433, "grad_norm": 0.6214715212457284, "learning_rate": 0.0001084905662588878, "loss": 12.2064, "step": 17951 }, { "epoch": 0.9775581466582263, "grad_norm": 0.5569672393862045, "learning_rate": 0.00010848177981050123, "loss": 12.099, "step": 17952 }, { "epoch": 0.9776126006548093, "grad_norm": 0.5271052084321294, "learning_rate": 0.0001084729932961589, "loss": 12.134, "step": 17953 }, { "epoch": 0.9776670546513924, "grad_norm": 0.5813167820322008, "learning_rate": 0.00010846420671592913, "loss": 11.932, "step": 17954 }, { "epoch": 0.9777215086479754, "grad_norm": 0.5652022496642765, "learning_rate": 0.00010845542006988028, "loss": 12.084, "step": 17955 }, { "epoch": 0.9777759626445583, "grad_norm": 0.6489469752678734, "learning_rate": 0.0001084466333580807, "loss": 12.1296, "step": 17956 }, { "epoch": 0.9778304166411413, "grad_norm": 0.5975893786025105, "learning_rate": 0.00010843784658059865, "loss": 12.0141, "step": 17957 }, { "epoch": 0.9778848706377243, "grad_norm": 0.5519125805054584, "learning_rate": 0.0001084290597375025, "loss": 12.2335, "step": 17958 }, { "epoch": 0.9779393246343073, "grad_norm": 0.5395481619309116, "learning_rate": 0.00010842027282886059, "loss": 11.9542, "step": 17959 }, { "epoch": 0.9779937786308904, "grad_norm": 0.6353045346707804, "learning_rate": 0.00010841148585474117, "loss": 12.1194, "step": 17960 }, { "epoch": 0.9780482326274734, "grad_norm": 0.5823209191961672, "learning_rate": 0.00010840269881521268, "loss": 12.1265, "step": 17961 }, { "epoch": 0.9781026866240564, "grad_norm": 0.5776965927558751, "learning_rate": 0.00010839391171034336, "loss": 12.0926, "step": 17962 }, { "epoch": 0.9781571406206394, "grad_norm": 0.6333194385576171, "learning_rate": 0.0001083851245402016, "loss": 12.1832, "step": 17963 }, { "epoch": 0.9782115946172224, "grad_norm": 0.6352767740368886, "learning_rate": 0.0001083763373048557, "loss": 12.0946, "step": 17964 }, { "epoch": 0.9782660486138054, "grad_norm": 0.5645207598118537, "learning_rate": 0.00010836755000437397, "loss": 12.0763, "step": 17965 }, { "epoch": 0.9783205026103885, "grad_norm": 0.5937879276484013, "learning_rate": 0.00010835876263882481, "loss": 12.0289, "step": 17966 }, { "epoch": 0.9783749566069715, "grad_norm": 0.6074090833865735, "learning_rate": 0.00010834997520827648, "loss": 12.1025, "step": 17967 }, { "epoch": 0.9784294106035545, "grad_norm": 0.5633156870660759, "learning_rate": 0.00010834118771279736, "loss": 12.1154, "step": 17968 }, { "epoch": 0.9784838646001375, "grad_norm": 0.5320985531607659, "learning_rate": 0.00010833240015245573, "loss": 12.1585, "step": 17969 }, { "epoch": 0.9785383185967205, "grad_norm": 0.6191167257525989, "learning_rate": 0.00010832361252731999, "loss": 11.9955, "step": 17970 }, { "epoch": 0.9785927725933036, "grad_norm": 0.5422379859682265, "learning_rate": 0.00010831482483745845, "loss": 12.1218, "step": 17971 }, { "epoch": 0.9786472265898866, "grad_norm": 0.5304028876755441, "learning_rate": 0.00010830603708293942, "loss": 12.2471, "step": 17972 }, { "epoch": 0.9787016805864696, "grad_norm": 0.6449485704565627, "learning_rate": 0.00010829724926383127, "loss": 12.225, "step": 17973 }, { "epoch": 0.9787561345830526, "grad_norm": 0.5306371704220962, "learning_rate": 0.0001082884613802023, "loss": 12.1296, "step": 17974 }, { "epoch": 0.9788105885796355, "grad_norm": 0.5167911108564022, "learning_rate": 0.00010827967343212087, "loss": 12.0481, "step": 17975 }, { "epoch": 0.9788650425762185, "grad_norm": 0.5274856214728664, "learning_rate": 0.00010827088541965531, "loss": 12.1757, "step": 17976 }, { "epoch": 0.9789194965728016, "grad_norm": 0.5521135327126813, "learning_rate": 0.00010826209734287396, "loss": 12.0129, "step": 17977 }, { "epoch": 0.9789739505693846, "grad_norm": 0.5761098151717915, "learning_rate": 0.00010825330920184515, "loss": 12.2367, "step": 17978 }, { "epoch": 0.9790284045659676, "grad_norm": 0.5076363892495853, "learning_rate": 0.00010824452099663725, "loss": 12.1218, "step": 17979 }, { "epoch": 0.9790828585625506, "grad_norm": 0.5273708890063069, "learning_rate": 0.00010823573272731855, "loss": 12.0445, "step": 17980 }, { "epoch": 0.9791373125591336, "grad_norm": 0.546803219978149, "learning_rate": 0.0001082269443939574, "loss": 12.1183, "step": 17981 }, { "epoch": 0.9791917665557166, "grad_norm": 0.5666503969234457, "learning_rate": 0.00010821815599662217, "loss": 12.1089, "step": 17982 }, { "epoch": 0.9792462205522997, "grad_norm": 0.5976456319310408, "learning_rate": 0.00010820936753538118, "loss": 12.2222, "step": 17983 }, { "epoch": 0.9793006745488827, "grad_norm": 0.5807534805493791, "learning_rate": 0.00010820057901030276, "loss": 12.1417, "step": 17984 }, { "epoch": 0.9793551285454657, "grad_norm": 0.5618407017610094, "learning_rate": 0.00010819179042145527, "loss": 12.1541, "step": 17985 }, { "epoch": 0.9794095825420487, "grad_norm": 0.5399095579670065, "learning_rate": 0.00010818300176890703, "loss": 12.165, "step": 17986 }, { "epoch": 0.9794640365386317, "grad_norm": 0.5595505093425492, "learning_rate": 0.00010817421305272642, "loss": 11.9565, "step": 17987 }, { "epoch": 0.9795184905352147, "grad_norm": 0.6173433296027873, "learning_rate": 0.00010816542427298173, "loss": 12.1378, "step": 17988 }, { "epoch": 0.9795729445317978, "grad_norm": 0.5635028286575519, "learning_rate": 0.00010815663542974135, "loss": 12.0596, "step": 17989 }, { "epoch": 0.9796273985283808, "grad_norm": 0.5128177697315897, "learning_rate": 0.00010814784652307361, "loss": 12.1665, "step": 17990 }, { "epoch": 0.9796818525249638, "grad_norm": 0.5451806256854534, "learning_rate": 0.00010813905755304686, "loss": 12.0466, "step": 17991 }, { "epoch": 0.9797363065215468, "grad_norm": 0.546160671865186, "learning_rate": 0.00010813026851972941, "loss": 12.2315, "step": 17992 }, { "epoch": 0.9797907605181297, "grad_norm": 0.6423773110763001, "learning_rate": 0.00010812147942318961, "loss": 12.0426, "step": 17993 }, { "epoch": 0.9798452145147127, "grad_norm": 0.49260296577336354, "learning_rate": 0.00010811269026349587, "loss": 12.0204, "step": 17994 }, { "epoch": 0.9798996685112958, "grad_norm": 0.5733819918469207, "learning_rate": 0.00010810390104071646, "loss": 12.1513, "step": 17995 }, { "epoch": 0.9799541225078788, "grad_norm": 0.5574227799978274, "learning_rate": 0.00010809511175491976, "loss": 12.0072, "step": 17996 }, { "epoch": 0.9800085765044618, "grad_norm": 0.5219117221008631, "learning_rate": 0.00010808632240617414, "loss": 12.1725, "step": 17997 }, { "epoch": 0.9800630305010448, "grad_norm": 0.5741168714520937, "learning_rate": 0.00010807753299454789, "loss": 11.9589, "step": 17998 }, { "epoch": 0.9801174844976278, "grad_norm": 0.5677294057678615, "learning_rate": 0.00010806874352010938, "loss": 12.0291, "step": 17999 }, { "epoch": 0.9801719384942109, "grad_norm": 0.5318485484630929, "learning_rate": 0.00010805995398292699, "loss": 12.1372, "step": 18000 }, { "epoch": 0.9802263924907939, "grad_norm": 0.5543616701944382, "learning_rate": 0.00010805116438306903, "loss": 12.1638, "step": 18001 }, { "epoch": 0.9802808464873769, "grad_norm": 0.5477869601113502, "learning_rate": 0.0001080423747206039, "loss": 12.0025, "step": 18002 }, { "epoch": 0.9803353004839599, "grad_norm": 0.5230337758187195, "learning_rate": 0.0001080335849955999, "loss": 12.0703, "step": 18003 }, { "epoch": 0.9803897544805429, "grad_norm": 0.5306094902096865, "learning_rate": 0.00010802479520812536, "loss": 11.9167, "step": 18004 }, { "epoch": 0.9804442084771259, "grad_norm": 0.6366613017358728, "learning_rate": 0.00010801600535824869, "loss": 12.1444, "step": 18005 }, { "epoch": 0.980498662473709, "grad_norm": 0.5727482572658238, "learning_rate": 0.00010800721544603822, "loss": 12.0977, "step": 18006 }, { "epoch": 0.980553116470292, "grad_norm": 0.5089284317859405, "learning_rate": 0.00010799842547156234, "loss": 12.0473, "step": 18007 }, { "epoch": 0.980607570466875, "grad_norm": 0.5667934645021488, "learning_rate": 0.00010798963543488932, "loss": 12.137, "step": 18008 }, { "epoch": 0.980662024463458, "grad_norm": 0.6196388965212127, "learning_rate": 0.00010798084533608754, "loss": 12.2894, "step": 18009 }, { "epoch": 0.980716478460041, "grad_norm": 0.562464564711028, "learning_rate": 0.0001079720551752254, "loss": 12.1624, "step": 18010 }, { "epoch": 0.980770932456624, "grad_norm": 0.5348173101555526, "learning_rate": 0.0001079632649523712, "loss": 12.2295, "step": 18011 }, { "epoch": 0.980825386453207, "grad_norm": 0.5570937732739201, "learning_rate": 0.00010795447466759335, "loss": 12.1079, "step": 18012 }, { "epoch": 0.98087984044979, "grad_norm": 0.5647120952256477, "learning_rate": 0.00010794568432096015, "loss": 12.1854, "step": 18013 }, { "epoch": 0.980934294446373, "grad_norm": 0.5815748623492203, "learning_rate": 0.00010793689391253996, "loss": 12.1952, "step": 18014 }, { "epoch": 0.980988748442956, "grad_norm": 0.5537602408110703, "learning_rate": 0.00010792810344240114, "loss": 12.1408, "step": 18015 }, { "epoch": 0.981043202439539, "grad_norm": 0.540086712463153, "learning_rate": 0.0001079193129106121, "loss": 12.0253, "step": 18016 }, { "epoch": 0.981097656436122, "grad_norm": 0.7435614062524526, "learning_rate": 0.00010791052231724115, "loss": 12.1708, "step": 18017 }, { "epoch": 0.9811521104327051, "grad_norm": 0.5266529019604518, "learning_rate": 0.00010790173166235665, "loss": 12.1274, "step": 18018 }, { "epoch": 0.9812065644292881, "grad_norm": 0.5536468972179033, "learning_rate": 0.00010789294094602694, "loss": 12.1007, "step": 18019 }, { "epoch": 0.9812610184258711, "grad_norm": 0.644183661388461, "learning_rate": 0.00010788415016832039, "loss": 12.0889, "step": 18020 }, { "epoch": 0.9813154724224541, "grad_norm": 0.5300747203805085, "learning_rate": 0.00010787535932930537, "loss": 12.1142, "step": 18021 }, { "epoch": 0.9813699264190371, "grad_norm": 0.5795795154313257, "learning_rate": 0.00010786656842905028, "loss": 12.0046, "step": 18022 }, { "epoch": 0.9814243804156201, "grad_norm": 0.5725988801129808, "learning_rate": 0.0001078577774676234, "loss": 12.2399, "step": 18023 }, { "epoch": 0.9814788344122032, "grad_norm": 0.5818950412111472, "learning_rate": 0.00010784898644509313, "loss": 12.0231, "step": 18024 }, { "epoch": 0.9815332884087862, "grad_norm": 0.5411298785123068, "learning_rate": 0.00010784019536152783, "loss": 11.8646, "step": 18025 }, { "epoch": 0.9815877424053692, "grad_norm": 0.5472834620427034, "learning_rate": 0.00010783140421699582, "loss": 11.995, "step": 18026 }, { "epoch": 0.9816421964019522, "grad_norm": 0.5295784041111941, "learning_rate": 0.00010782261301156555, "loss": 12.1112, "step": 18027 }, { "epoch": 0.9816966503985352, "grad_norm": 0.5634509356491804, "learning_rate": 0.0001078138217453053, "loss": 12.0938, "step": 18028 }, { "epoch": 0.9817511043951181, "grad_norm": 0.5739913712880079, "learning_rate": 0.00010780503041828347, "loss": 12.1133, "step": 18029 }, { "epoch": 0.9818055583917012, "grad_norm": 0.5719679045022055, "learning_rate": 0.00010779623903056842, "loss": 12.183, "step": 18030 }, { "epoch": 0.9818600123882842, "grad_norm": 0.5433390891457947, "learning_rate": 0.0001077874475822285, "loss": 12.072, "step": 18031 }, { "epoch": 0.9819144663848672, "grad_norm": 0.596004488459137, "learning_rate": 0.00010777865607333208, "loss": 12.0495, "step": 18032 }, { "epoch": 0.9819689203814502, "grad_norm": 0.5773870752684507, "learning_rate": 0.00010776986450394753, "loss": 12.2028, "step": 18033 }, { "epoch": 0.9820233743780332, "grad_norm": 0.5502718478168531, "learning_rate": 0.0001077610728741432, "loss": 12.0119, "step": 18034 }, { "epoch": 0.9820778283746163, "grad_norm": 0.6144172766414152, "learning_rate": 0.00010775228118398748, "loss": 12.1112, "step": 18035 }, { "epoch": 0.9821322823711993, "grad_norm": 0.5632127743324946, "learning_rate": 0.0001077434894335487, "loss": 12.0601, "step": 18036 }, { "epoch": 0.9821867363677823, "grad_norm": 0.5986532670411304, "learning_rate": 0.00010773469762289528, "loss": 12.1375, "step": 18037 }, { "epoch": 0.9822411903643653, "grad_norm": 0.5348121444254492, "learning_rate": 0.00010772590575209553, "loss": 12.0448, "step": 18038 }, { "epoch": 0.9822956443609483, "grad_norm": 0.6977366636723346, "learning_rate": 0.00010771711382121786, "loss": 12.0596, "step": 18039 }, { "epoch": 0.9823500983575313, "grad_norm": 0.5281573876140327, "learning_rate": 0.0001077083218303306, "loss": 12.1564, "step": 18040 }, { "epoch": 0.9824045523541144, "grad_norm": 0.6582946415270958, "learning_rate": 0.00010769952977950215, "loss": 12.2643, "step": 18041 }, { "epoch": 0.9824590063506974, "grad_norm": 0.5929653534678702, "learning_rate": 0.00010769073766880085, "loss": 12.0594, "step": 18042 }, { "epoch": 0.9825134603472804, "grad_norm": 0.5457434530855422, "learning_rate": 0.00010768194549829508, "loss": 12.164, "step": 18043 }, { "epoch": 0.9825679143438634, "grad_norm": 0.5500436007280676, "learning_rate": 0.00010767315326805323, "loss": 12.2298, "step": 18044 }, { "epoch": 0.9826223683404464, "grad_norm": 0.5224746472815626, "learning_rate": 0.00010766436097814365, "loss": 12.134, "step": 18045 }, { "epoch": 0.9826768223370294, "grad_norm": 0.5619769872720628, "learning_rate": 0.00010765556862863472, "loss": 11.9188, "step": 18046 }, { "epoch": 0.9827312763336125, "grad_norm": 0.5357558992994437, "learning_rate": 0.0001076467762195948, "loss": 12.1788, "step": 18047 }, { "epoch": 0.9827857303301955, "grad_norm": 0.5352705390795878, "learning_rate": 0.00010763798375109226, "loss": 11.9371, "step": 18048 }, { "epoch": 0.9828401843267784, "grad_norm": 0.5181102203185849, "learning_rate": 0.00010762919122319548, "loss": 12.072, "step": 18049 }, { "epoch": 0.9828946383233614, "grad_norm": 0.5860233741332485, "learning_rate": 0.00010762039863597284, "loss": 12.1642, "step": 18050 }, { "epoch": 0.9829490923199444, "grad_norm": 0.5729014679436384, "learning_rate": 0.00010761160598949269, "loss": 12.2005, "step": 18051 }, { "epoch": 0.9830035463165274, "grad_norm": 0.5920741834598858, "learning_rate": 0.00010760281328382344, "loss": 12.2143, "step": 18052 }, { "epoch": 0.9830580003131105, "grad_norm": 0.5143756884138256, "learning_rate": 0.0001075940205190334, "loss": 12.152, "step": 18053 }, { "epoch": 0.9831124543096935, "grad_norm": 0.565118066991634, "learning_rate": 0.000107585227695191, "loss": 12.0064, "step": 18054 }, { "epoch": 0.9831669083062765, "grad_norm": 0.5744740854247854, "learning_rate": 0.0001075764348123646, "loss": 11.9919, "step": 18055 }, { "epoch": 0.9832213623028595, "grad_norm": 0.6331700601523153, "learning_rate": 0.00010756764187062257, "loss": 12.0929, "step": 18056 }, { "epoch": 0.9832758162994425, "grad_norm": 0.6835017108603204, "learning_rate": 0.00010755884887003331, "loss": 12.2208, "step": 18057 }, { "epoch": 0.9833302702960255, "grad_norm": 0.5656699014455033, "learning_rate": 0.00010755005581066513, "loss": 12.1017, "step": 18058 }, { "epoch": 0.9833847242926086, "grad_norm": 0.5311723015634895, "learning_rate": 0.00010754126269258647, "loss": 11.9106, "step": 18059 }, { "epoch": 0.9834391782891916, "grad_norm": 0.5720387434249207, "learning_rate": 0.00010753246951586568, "loss": 12.0141, "step": 18060 }, { "epoch": 0.9834936322857746, "grad_norm": 0.6936595517459607, "learning_rate": 0.00010752367628057116, "loss": 12.189, "step": 18061 }, { "epoch": 0.9835480862823576, "grad_norm": 0.4982052031486083, "learning_rate": 0.00010751488298677128, "loss": 12.0908, "step": 18062 }, { "epoch": 0.9836025402789406, "grad_norm": 0.5599579395059051, "learning_rate": 0.00010750608963453438, "loss": 12.1435, "step": 18063 }, { "epoch": 0.9836569942755236, "grad_norm": 0.5998340544163681, "learning_rate": 0.00010749729622392888, "loss": 12.075, "step": 18064 }, { "epoch": 0.9837114482721067, "grad_norm": 0.5876829349332333, "learning_rate": 0.00010748850275502316, "loss": 12.0845, "step": 18065 }, { "epoch": 0.9837659022686897, "grad_norm": 0.5161903199408583, "learning_rate": 0.00010747970922788557, "loss": 11.9695, "step": 18066 }, { "epoch": 0.9838203562652726, "grad_norm": 0.6270574736815702, "learning_rate": 0.00010747091564258454, "loss": 12.1766, "step": 18067 }, { "epoch": 0.9838748102618556, "grad_norm": 0.5369537586885043, "learning_rate": 0.00010746212199918838, "loss": 12.0638, "step": 18068 }, { "epoch": 0.9839292642584386, "grad_norm": 0.5853144281322175, "learning_rate": 0.00010745332829776548, "loss": 11.9787, "step": 18069 }, { "epoch": 0.9839837182550217, "grad_norm": 0.60185097776034, "learning_rate": 0.0001074445345383843, "loss": 12.009, "step": 18070 }, { "epoch": 0.9840381722516047, "grad_norm": 0.5639261433739271, "learning_rate": 0.00010743574072111315, "loss": 12.104, "step": 18071 }, { "epoch": 0.9840926262481877, "grad_norm": 0.5537166577957787, "learning_rate": 0.00010742694684602046, "loss": 11.9331, "step": 18072 }, { "epoch": 0.9841470802447707, "grad_norm": 0.6591769633023632, "learning_rate": 0.00010741815291317459, "loss": 12.3092, "step": 18073 }, { "epoch": 0.9842015342413537, "grad_norm": 0.5947384769187573, "learning_rate": 0.00010740935892264387, "loss": 12.2152, "step": 18074 }, { "epoch": 0.9842559882379367, "grad_norm": 0.5599849847542595, "learning_rate": 0.00010740056487449674, "loss": 12.1216, "step": 18075 }, { "epoch": 0.9843104422345198, "grad_norm": 0.5517022014958264, "learning_rate": 0.0001073917707688016, "loss": 12.0721, "step": 18076 }, { "epoch": 0.9843648962311028, "grad_norm": 0.618082533686037, "learning_rate": 0.00010738297660562682, "loss": 12.0813, "step": 18077 }, { "epoch": 0.9844193502276858, "grad_norm": 0.5772571839574674, "learning_rate": 0.00010737418238504078, "loss": 12.195, "step": 18078 }, { "epoch": 0.9844738042242688, "grad_norm": 0.5673878961561942, "learning_rate": 0.00010736538810711184, "loss": 12.0617, "step": 18079 }, { "epoch": 0.9845282582208518, "grad_norm": 0.5546780893853501, "learning_rate": 0.00010735659377190841, "loss": 12.0691, "step": 18080 }, { "epoch": 0.9845827122174348, "grad_norm": 0.5853216934400048, "learning_rate": 0.00010734779937949883, "loss": 12.1178, "step": 18081 }, { "epoch": 0.9846371662140179, "grad_norm": 0.5358952268618732, "learning_rate": 0.0001073390049299516, "loss": 11.9943, "step": 18082 }, { "epoch": 0.9846916202106009, "grad_norm": 0.5755377660157842, "learning_rate": 0.00010733021042333502, "loss": 12.1944, "step": 18083 }, { "epoch": 0.9847460742071839, "grad_norm": 0.566788817104964, "learning_rate": 0.00010732141585971747, "loss": 12.1504, "step": 18084 }, { "epoch": 0.9848005282037668, "grad_norm": 0.6196705651253785, "learning_rate": 0.00010731262123916739, "loss": 12.1013, "step": 18085 }, { "epoch": 0.9848549822003498, "grad_norm": 0.5980150147717634, "learning_rate": 0.00010730382656175311, "loss": 11.9677, "step": 18086 }, { "epoch": 0.9849094361969328, "grad_norm": 0.5577501627316889, "learning_rate": 0.00010729503182754308, "loss": 12.1253, "step": 18087 }, { "epoch": 0.9849638901935159, "grad_norm": 0.4919465787672597, "learning_rate": 0.00010728623703660562, "loss": 12.0263, "step": 18088 }, { "epoch": 0.9850183441900989, "grad_norm": 0.5853269404716969, "learning_rate": 0.00010727744218900921, "loss": 12.1506, "step": 18089 }, { "epoch": 0.9850727981866819, "grad_norm": 0.6055052020375156, "learning_rate": 0.00010726864728482215, "loss": 12.0832, "step": 18090 }, { "epoch": 0.9851272521832649, "grad_norm": 0.625309898276744, "learning_rate": 0.00010725985232411288, "loss": 12.15, "step": 18091 }, { "epoch": 0.9851817061798479, "grad_norm": 0.5676098635808333, "learning_rate": 0.0001072510573069498, "loss": 12.0437, "step": 18092 }, { "epoch": 0.9852361601764309, "grad_norm": 0.5124787306749896, "learning_rate": 0.00010724226223340125, "loss": 12.1212, "step": 18093 }, { "epoch": 0.985290614173014, "grad_norm": 0.5933495437373781, "learning_rate": 0.00010723346710353568, "loss": 11.9825, "step": 18094 }, { "epoch": 0.985345068169597, "grad_norm": 0.6130369637735575, "learning_rate": 0.00010722467191742144, "loss": 12.0794, "step": 18095 }, { "epoch": 0.98539952216618, "grad_norm": 0.5928905745980033, "learning_rate": 0.00010721587667512695, "loss": 12.1596, "step": 18096 }, { "epoch": 0.985453976162763, "grad_norm": 0.5369197231079916, "learning_rate": 0.0001072070813767206, "loss": 11.9061, "step": 18097 }, { "epoch": 0.985508430159346, "grad_norm": 0.520335964142194, "learning_rate": 0.00010719828602227075, "loss": 12.0001, "step": 18098 }, { "epoch": 0.985562884155929, "grad_norm": 0.5918418697364416, "learning_rate": 0.00010718949061184585, "loss": 12.091, "step": 18099 }, { "epoch": 0.9856173381525121, "grad_norm": 1.0030124548635482, "learning_rate": 0.00010718069514551426, "loss": 12.1456, "step": 18100 }, { "epoch": 0.9856717921490951, "grad_norm": 0.6393789302062024, "learning_rate": 0.00010717189962334437, "loss": 12.1362, "step": 18101 }, { "epoch": 0.985726246145678, "grad_norm": 0.5601102862060541, "learning_rate": 0.00010716310404540459, "loss": 12.0762, "step": 18102 }, { "epoch": 0.985780700142261, "grad_norm": 0.5786649803546119, "learning_rate": 0.0001071543084117633, "loss": 12.0578, "step": 18103 }, { "epoch": 0.985835154138844, "grad_norm": 0.5872084322909208, "learning_rate": 0.00010714551272248891, "loss": 12.2205, "step": 18104 }, { "epoch": 0.9858896081354271, "grad_norm": 0.5060151325768041, "learning_rate": 0.00010713671697764984, "loss": 12.0591, "step": 18105 }, { "epoch": 0.9859440621320101, "grad_norm": 0.5528163577261255, "learning_rate": 0.00010712792117731445, "loss": 12.0899, "step": 18106 }, { "epoch": 0.9859985161285931, "grad_norm": 0.565991675505829, "learning_rate": 0.00010711912532155115, "loss": 12.1375, "step": 18107 }, { "epoch": 0.9860529701251761, "grad_norm": 0.5447176746423495, "learning_rate": 0.0001071103294104283, "loss": 12.0478, "step": 18108 }, { "epoch": 0.9861074241217591, "grad_norm": 0.696030140774074, "learning_rate": 0.00010710153344401439, "loss": 12.3183, "step": 18109 }, { "epoch": 0.9861618781183421, "grad_norm": 0.5880121858085362, "learning_rate": 0.00010709273742237776, "loss": 12.0449, "step": 18110 }, { "epoch": 0.9862163321149252, "grad_norm": 0.49293080395869104, "learning_rate": 0.00010708394134558678, "loss": 11.9812, "step": 18111 }, { "epoch": 0.9862707861115082, "grad_norm": 0.6154783409642931, "learning_rate": 0.00010707514521370994, "loss": 12.0409, "step": 18112 }, { "epoch": 0.9863252401080912, "grad_norm": 0.5396766468657854, "learning_rate": 0.00010706634902681551, "loss": 12.022, "step": 18113 }, { "epoch": 0.9863796941046742, "grad_norm": 0.5503026621440306, "learning_rate": 0.000107057552784972, "loss": 12.1035, "step": 18114 }, { "epoch": 0.9864341481012572, "grad_norm": 0.5962588816072459, "learning_rate": 0.00010704875648824777, "loss": 11.9612, "step": 18115 }, { "epoch": 0.9864886020978402, "grad_norm": 0.631680261499398, "learning_rate": 0.00010703996013671124, "loss": 12.0747, "step": 18116 }, { "epoch": 0.9865430560944233, "grad_norm": 0.542234954428847, "learning_rate": 0.00010703116373043082, "loss": 12.1221, "step": 18117 }, { "epoch": 0.9865975100910063, "grad_norm": 0.6474710621477797, "learning_rate": 0.00010702236726947485, "loss": 12.2001, "step": 18118 }, { "epoch": 0.9866519640875893, "grad_norm": 0.5821040606293187, "learning_rate": 0.00010701357075391178, "loss": 12.099, "step": 18119 }, { "epoch": 0.9867064180841723, "grad_norm": 0.6329945116586946, "learning_rate": 0.00010700477418381003, "loss": 12.2078, "step": 18120 }, { "epoch": 0.9867608720807552, "grad_norm": 0.5319668920455514, "learning_rate": 0.00010699597755923796, "loss": 12.0948, "step": 18121 }, { "epoch": 0.9868153260773382, "grad_norm": 0.5900748869649325, "learning_rate": 0.000106987180880264, "loss": 12.1813, "step": 18122 }, { "epoch": 0.9868697800739213, "grad_norm": 0.6149433502827112, "learning_rate": 0.00010697838414695657, "loss": 12.2179, "step": 18123 }, { "epoch": 0.9869242340705043, "grad_norm": 0.562985381858628, "learning_rate": 0.00010696958735938403, "loss": 12.1519, "step": 18124 }, { "epoch": 0.9869786880670873, "grad_norm": 0.5658279954443907, "learning_rate": 0.00010696079051761483, "loss": 12.091, "step": 18125 }, { "epoch": 0.9870331420636703, "grad_norm": 0.6312905273552373, "learning_rate": 0.00010695199362171733, "loss": 12.1555, "step": 18126 }, { "epoch": 0.9870875960602533, "grad_norm": 0.5886784688030845, "learning_rate": 0.00010694319667175998, "loss": 12.205, "step": 18127 }, { "epoch": 0.9871420500568363, "grad_norm": 0.4950797462419129, "learning_rate": 0.00010693439966781118, "loss": 11.8538, "step": 18128 }, { "epoch": 0.9871965040534194, "grad_norm": 0.5522936025725195, "learning_rate": 0.00010692560260993931, "loss": 12.1399, "step": 18129 }, { "epoch": 0.9872509580500024, "grad_norm": 0.6427225968953367, "learning_rate": 0.00010691680549821277, "loss": 12.07, "step": 18130 }, { "epoch": 0.9873054120465854, "grad_norm": 0.5154353236629954, "learning_rate": 0.00010690800833270002, "loss": 12.0826, "step": 18131 }, { "epoch": 0.9873598660431684, "grad_norm": 0.6143806323295387, "learning_rate": 0.00010689921111346943, "loss": 12.1745, "step": 18132 }, { "epoch": 0.9874143200397514, "grad_norm": 0.6140008287528899, "learning_rate": 0.00010689041384058944, "loss": 12.3005, "step": 18133 }, { "epoch": 0.9874687740363345, "grad_norm": 0.5372926978927672, "learning_rate": 0.00010688161651412843, "loss": 12.0937, "step": 18134 }, { "epoch": 0.9875232280329175, "grad_norm": 0.6016048547083367, "learning_rate": 0.00010687281913415477, "loss": 12.0777, "step": 18135 }, { "epoch": 0.9875776820295005, "grad_norm": 0.5706361887179785, "learning_rate": 0.00010686402170073695, "loss": 12.2219, "step": 18136 }, { "epoch": 0.9876321360260835, "grad_norm": 0.5111482874190844, "learning_rate": 0.00010685522421394334, "loss": 12.0913, "step": 18137 }, { "epoch": 0.9876865900226665, "grad_norm": 0.5407132927064728, "learning_rate": 0.00010684642667384239, "loss": 12.2046, "step": 18138 }, { "epoch": 0.9877410440192494, "grad_norm": 0.5551703354684117, "learning_rate": 0.00010683762908050243, "loss": 12.0568, "step": 18139 }, { "epoch": 0.9877954980158326, "grad_norm": 0.556144476717898, "learning_rate": 0.00010682883143399194, "loss": 12.1016, "step": 18140 }, { "epoch": 0.9878499520124155, "grad_norm": 0.52610775539807, "learning_rate": 0.0001068200337343793, "loss": 12.0702, "step": 18141 }, { "epoch": 0.9879044060089985, "grad_norm": 0.5839966922936672, "learning_rate": 0.00010681123598173295, "loss": 12.2517, "step": 18142 }, { "epoch": 0.9879588600055815, "grad_norm": 0.5289322739960025, "learning_rate": 0.0001068024381761213, "loss": 12.1082, "step": 18143 }, { "epoch": 0.9880133140021645, "grad_norm": 0.5662415515881658, "learning_rate": 0.00010679364031761273, "loss": 11.9966, "step": 18144 }, { "epoch": 0.9880677679987475, "grad_norm": 0.595066729336303, "learning_rate": 0.00010678484240627566, "loss": 12.0954, "step": 18145 }, { "epoch": 0.9881222219953306, "grad_norm": 0.756702540809058, "learning_rate": 0.00010677604444217853, "loss": 12.0219, "step": 18146 }, { "epoch": 0.9881766759919136, "grad_norm": 0.584760628442418, "learning_rate": 0.00010676724642538973, "loss": 12.1959, "step": 18147 }, { "epoch": 0.9882311299884966, "grad_norm": 0.5955713714862642, "learning_rate": 0.00010675844835597772, "loss": 12.1417, "step": 18148 }, { "epoch": 0.9882855839850796, "grad_norm": 0.545147343129215, "learning_rate": 0.00010674965023401087, "loss": 12.1022, "step": 18149 }, { "epoch": 0.9883400379816626, "grad_norm": 0.5710069833773501, "learning_rate": 0.00010674085205955759, "loss": 12.1044, "step": 18150 }, { "epoch": 0.9883944919782456, "grad_norm": 0.5329622577089048, "learning_rate": 0.00010673205383268632, "loss": 12.0673, "step": 18151 }, { "epoch": 0.9884489459748287, "grad_norm": 0.5487041818413038, "learning_rate": 0.00010672325555346545, "loss": 12.1085, "step": 18152 }, { "epoch": 0.9885033999714117, "grad_norm": 0.5415157448620397, "learning_rate": 0.00010671445722196346, "loss": 12.1328, "step": 18153 }, { "epoch": 0.9885578539679947, "grad_norm": 0.5624921937625975, "learning_rate": 0.00010670565883824872, "loss": 12.1548, "step": 18154 }, { "epoch": 0.9886123079645777, "grad_norm": 0.6212541456400029, "learning_rate": 0.00010669686040238964, "loss": 12.1634, "step": 18155 }, { "epoch": 0.9886667619611607, "grad_norm": 0.5194000136149667, "learning_rate": 0.00010668806191445466, "loss": 11.9366, "step": 18156 }, { "epoch": 0.9887212159577436, "grad_norm": 0.5155042191634405, "learning_rate": 0.00010667926337451217, "loss": 12.1547, "step": 18157 }, { "epoch": 0.9887756699543268, "grad_norm": 0.5246555161359897, "learning_rate": 0.0001066704647826306, "loss": 12.0819, "step": 18158 }, { "epoch": 0.9888301239509097, "grad_norm": 0.5078959519576165, "learning_rate": 0.0001066616661388784, "loss": 12.1861, "step": 18159 }, { "epoch": 0.9888845779474927, "grad_norm": 0.6152159808564477, "learning_rate": 0.00010665286744332397, "loss": 12.2136, "step": 18160 }, { "epoch": 0.9889390319440757, "grad_norm": 0.5111939428258314, "learning_rate": 0.00010664406869603572, "loss": 12.0874, "step": 18161 }, { "epoch": 0.9889934859406587, "grad_norm": 0.5119214118859521, "learning_rate": 0.00010663526989708209, "loss": 12.0325, "step": 18162 }, { "epoch": 0.9890479399372417, "grad_norm": 0.5219377048092392, "learning_rate": 0.00010662647104653146, "loss": 11.8204, "step": 18163 }, { "epoch": 0.9891023939338248, "grad_norm": 0.5443243507868772, "learning_rate": 0.00010661767214445229, "loss": 12.1334, "step": 18164 }, { "epoch": 0.9891568479304078, "grad_norm": 0.5892688335860076, "learning_rate": 0.000106608873190913, "loss": 12.0278, "step": 18165 }, { "epoch": 0.9892113019269908, "grad_norm": 0.5884705795429934, "learning_rate": 0.00010660007418598199, "loss": 12.082, "step": 18166 }, { "epoch": 0.9892657559235738, "grad_norm": 0.5458190704317927, "learning_rate": 0.00010659127512972771, "loss": 12.1603, "step": 18167 }, { "epoch": 0.9893202099201568, "grad_norm": 0.6312072324698195, "learning_rate": 0.00010658247602221855, "loss": 12.1664, "step": 18168 }, { "epoch": 0.9893746639167399, "grad_norm": 0.580064155012794, "learning_rate": 0.00010657367686352298, "loss": 12.0865, "step": 18169 }, { "epoch": 0.9894291179133229, "grad_norm": 0.5564038444545667, "learning_rate": 0.0001065648776537094, "loss": 12.2043, "step": 18170 }, { "epoch": 0.9894835719099059, "grad_norm": 0.5270218157556361, "learning_rate": 0.0001065560783928462, "loss": 12.0985, "step": 18171 }, { "epoch": 0.9895380259064889, "grad_norm": 0.5923197475586851, "learning_rate": 0.00010654727908100183, "loss": 12.0379, "step": 18172 }, { "epoch": 0.9895924799030719, "grad_norm": 0.5829247033872216, "learning_rate": 0.00010653847971824476, "loss": 12.0139, "step": 18173 }, { "epoch": 0.9896469338996549, "grad_norm": 0.5516682687822828, "learning_rate": 0.00010652968030464334, "loss": 12.0776, "step": 18174 }, { "epoch": 0.989701387896238, "grad_norm": 0.5639592853444282, "learning_rate": 0.00010652088084026606, "loss": 12.1019, "step": 18175 }, { "epoch": 0.989755841892821, "grad_norm": 0.5384906436819519, "learning_rate": 0.00010651208132518129, "loss": 11.9862, "step": 18176 }, { "epoch": 0.989810295889404, "grad_norm": 0.6070968546473495, "learning_rate": 0.0001065032817594575, "loss": 12.0275, "step": 18177 }, { "epoch": 0.9898647498859869, "grad_norm": 0.5808379207583481, "learning_rate": 0.00010649448214316314, "loss": 12.1359, "step": 18178 }, { "epoch": 0.9899192038825699, "grad_norm": 0.4988094591452025, "learning_rate": 0.00010648568247636653, "loss": 12.0005, "step": 18179 }, { "epoch": 0.9899736578791529, "grad_norm": 0.5838907201723186, "learning_rate": 0.00010647688275913616, "loss": 12.172, "step": 18180 }, { "epoch": 0.990028111875736, "grad_norm": 0.6488156091959372, "learning_rate": 0.00010646808299154049, "loss": 12.1838, "step": 18181 }, { "epoch": 0.990082565872319, "grad_norm": 0.53625159665213, "learning_rate": 0.00010645928317364795, "loss": 12.1533, "step": 18182 }, { "epoch": 0.990137019868902, "grad_norm": 0.5081836936590858, "learning_rate": 0.00010645048330552692, "loss": 12.0303, "step": 18183 }, { "epoch": 0.990191473865485, "grad_norm": 0.5390404382885886, "learning_rate": 0.00010644168338724583, "loss": 12.1266, "step": 18184 }, { "epoch": 0.990245927862068, "grad_norm": 0.5996580270093308, "learning_rate": 0.00010643288341887314, "loss": 11.9662, "step": 18185 }, { "epoch": 0.990300381858651, "grad_norm": 0.4910316126595332, "learning_rate": 0.00010642408340047728, "loss": 12.1151, "step": 18186 }, { "epoch": 0.9903548358552341, "grad_norm": 0.5270594120462088, "learning_rate": 0.00010641528333212667, "loss": 12.1097, "step": 18187 }, { "epoch": 0.9904092898518171, "grad_norm": 0.5193287254087474, "learning_rate": 0.00010640648321388976, "loss": 12.0473, "step": 18188 }, { "epoch": 0.9904637438484001, "grad_norm": 0.5753604760633858, "learning_rate": 0.00010639768304583493, "loss": 12.1701, "step": 18189 }, { "epoch": 0.9905181978449831, "grad_norm": 0.5739489111380263, "learning_rate": 0.00010638888282803064, "loss": 12.1724, "step": 18190 }, { "epoch": 0.9905726518415661, "grad_norm": 0.5255039712114434, "learning_rate": 0.00010638008256054532, "loss": 11.9917, "step": 18191 }, { "epoch": 0.9906271058381491, "grad_norm": 0.5672165055545463, "learning_rate": 0.00010637128224344743, "loss": 12.1203, "step": 18192 }, { "epoch": 0.9906815598347322, "grad_norm": 0.5776361743452018, "learning_rate": 0.0001063624818768054, "loss": 12.0376, "step": 18193 }, { "epoch": 0.9907360138313152, "grad_norm": 0.510166317980021, "learning_rate": 0.00010635368146068763, "loss": 11.9939, "step": 18194 }, { "epoch": 0.9907904678278981, "grad_norm": 0.5041406687352171, "learning_rate": 0.00010634488099516253, "loss": 11.9797, "step": 18195 }, { "epoch": 0.9908449218244811, "grad_norm": 0.5805858677346144, "learning_rate": 0.00010633608048029859, "loss": 11.9986, "step": 18196 }, { "epoch": 0.9908993758210641, "grad_norm": 0.5529385195231652, "learning_rate": 0.00010632727991616425, "loss": 12.0752, "step": 18197 }, { "epoch": 0.9909538298176471, "grad_norm": 0.5922940201271124, "learning_rate": 0.00010631847930282792, "loss": 12.1256, "step": 18198 }, { "epoch": 0.9910082838142302, "grad_norm": 0.5176892169025592, "learning_rate": 0.00010630967864035801, "loss": 12.1186, "step": 18199 }, { "epoch": 0.9910627378108132, "grad_norm": 0.5392799211225574, "learning_rate": 0.00010630087792882299, "loss": 12.0985, "step": 18200 }, { "epoch": 0.9911171918073962, "grad_norm": 0.5741306146407585, "learning_rate": 0.00010629207716829128, "loss": 12.2897, "step": 18201 }, { "epoch": 0.9911716458039792, "grad_norm": 0.6588728596304212, "learning_rate": 0.00010628327635883133, "loss": 12.1349, "step": 18202 }, { "epoch": 0.9912260998005622, "grad_norm": 0.5016770517506026, "learning_rate": 0.0001062744755005116, "loss": 12.1715, "step": 18203 }, { "epoch": 0.9912805537971453, "grad_norm": 0.6018119933793505, "learning_rate": 0.00010626567459340047, "loss": 12.0232, "step": 18204 }, { "epoch": 0.9913350077937283, "grad_norm": 0.5748866193437475, "learning_rate": 0.00010625687363756638, "loss": 12.0883, "step": 18205 }, { "epoch": 0.9913894617903113, "grad_norm": 0.5671183525950302, "learning_rate": 0.0001062480726330778, "loss": 12.1136, "step": 18206 }, { "epoch": 0.9914439157868943, "grad_norm": 0.5512127695258885, "learning_rate": 0.00010623927158000318, "loss": 12.1586, "step": 18207 }, { "epoch": 0.9914983697834773, "grad_norm": 0.5306245357987395, "learning_rate": 0.00010623047047841095, "loss": 12.0574, "step": 18208 }, { "epoch": 0.9915528237800603, "grad_norm": 0.7030287098542315, "learning_rate": 0.0001062216693283695, "loss": 12.1599, "step": 18209 }, { "epoch": 0.9916072777766434, "grad_norm": 0.5550678850782707, "learning_rate": 0.00010621286812994733, "loss": 12.0858, "step": 18210 }, { "epoch": 0.9916617317732264, "grad_norm": 0.614911713623915, "learning_rate": 0.00010620406688321285, "loss": 12.0705, "step": 18211 }, { "epoch": 0.9917161857698094, "grad_norm": 0.5689402836679145, "learning_rate": 0.00010619526558823447, "loss": 12.0661, "step": 18212 }, { "epoch": 0.9917706397663923, "grad_norm": 0.601418914315275, "learning_rate": 0.00010618646424508072, "loss": 12.0915, "step": 18213 }, { "epoch": 0.9918250937629753, "grad_norm": 0.5290614927342719, "learning_rate": 0.00010617766285381997, "loss": 12.0135, "step": 18214 }, { "epoch": 0.9918795477595583, "grad_norm": 0.6662437714409607, "learning_rate": 0.00010616886141452066, "loss": 12.1376, "step": 18215 }, { "epoch": 0.9919340017561414, "grad_norm": 0.5554799207212264, "learning_rate": 0.00010616005992725127, "loss": 12.0578, "step": 18216 }, { "epoch": 0.9919884557527244, "grad_norm": 0.5894407616829986, "learning_rate": 0.0001061512583920802, "loss": 12.1633, "step": 18217 }, { "epoch": 0.9920429097493074, "grad_norm": 0.6647002522545795, "learning_rate": 0.00010614245680907593, "loss": 12.0707, "step": 18218 }, { "epoch": 0.9920973637458904, "grad_norm": 0.5547368028447971, "learning_rate": 0.0001061336551783069, "loss": 12.1337, "step": 18219 }, { "epoch": 0.9921518177424734, "grad_norm": 0.5780326738983894, "learning_rate": 0.00010612485349984151, "loss": 12.1128, "step": 18220 }, { "epoch": 0.9922062717390564, "grad_norm": 0.6561917013664642, "learning_rate": 0.00010611605177374824, "loss": 12.2249, "step": 18221 }, { "epoch": 0.9922607257356395, "grad_norm": 0.5224015433566719, "learning_rate": 0.00010610725000009551, "loss": 12.0916, "step": 18222 }, { "epoch": 0.9923151797322225, "grad_norm": 0.7322683937666953, "learning_rate": 0.00010609844817895181, "loss": 12.0303, "step": 18223 }, { "epoch": 0.9923696337288055, "grad_norm": 0.5827006291050971, "learning_rate": 0.00010608964631038553, "loss": 12.1149, "step": 18224 }, { "epoch": 0.9924240877253885, "grad_norm": 0.6341172939816072, "learning_rate": 0.00010608084439446517, "loss": 12.2159, "step": 18225 }, { "epoch": 0.9924785417219715, "grad_norm": 0.5055789506140991, "learning_rate": 0.00010607204243125912, "loss": 12.0186, "step": 18226 }, { "epoch": 0.9925329957185545, "grad_norm": 0.5937480331497839, "learning_rate": 0.00010606324042083586, "loss": 12.2173, "step": 18227 }, { "epoch": 0.9925874497151376, "grad_norm": 0.5765533212067099, "learning_rate": 0.0001060544383632638, "loss": 12.1039, "step": 18228 }, { "epoch": 0.9926419037117206, "grad_norm": 0.5856380091206774, "learning_rate": 0.00010604563625861146, "loss": 12.1383, "step": 18229 }, { "epoch": 0.9926963577083036, "grad_norm": 0.5355559798841263, "learning_rate": 0.00010603683410694721, "loss": 12.0303, "step": 18230 }, { "epoch": 0.9927508117048865, "grad_norm": 0.5242059372670049, "learning_rate": 0.00010602803190833952, "loss": 12.0785, "step": 18231 }, { "epoch": 0.9928052657014695, "grad_norm": 0.5529125861572977, "learning_rate": 0.00010601922966285685, "loss": 12.1323, "step": 18232 }, { "epoch": 0.9928597196980525, "grad_norm": 0.579570952487471, "learning_rate": 0.00010601042737056763, "loss": 12.1193, "step": 18233 }, { "epoch": 0.9929141736946356, "grad_norm": 0.5615623403966941, "learning_rate": 0.00010600162503154034, "loss": 12.063, "step": 18234 }, { "epoch": 0.9929686276912186, "grad_norm": 0.5263312129752604, "learning_rate": 0.00010599282264584338, "loss": 12.0795, "step": 18235 }, { "epoch": 0.9930230816878016, "grad_norm": 0.5741760977770437, "learning_rate": 0.00010598402021354525, "loss": 12.1506, "step": 18236 }, { "epoch": 0.9930775356843846, "grad_norm": 0.5453528232582568, "learning_rate": 0.00010597521773471437, "loss": 12.1111, "step": 18237 }, { "epoch": 0.9931319896809676, "grad_norm": 0.5848245007509505, "learning_rate": 0.00010596641520941921, "loss": 12.0653, "step": 18238 }, { "epoch": 0.9931864436775507, "grad_norm": 0.5734984559056954, "learning_rate": 0.00010595761263772816, "loss": 12.0619, "step": 18239 }, { "epoch": 0.9932408976741337, "grad_norm": 0.5634072989729753, "learning_rate": 0.00010594881001970975, "loss": 11.9756, "step": 18240 }, { "epoch": 0.9932953516707167, "grad_norm": 0.5894397628042625, "learning_rate": 0.00010594000735543239, "loss": 11.9566, "step": 18241 }, { "epoch": 0.9933498056672997, "grad_norm": 0.5321180965263821, "learning_rate": 0.00010593120464496453, "loss": 12.0611, "step": 18242 }, { "epoch": 0.9934042596638827, "grad_norm": 0.5994931595328526, "learning_rate": 0.00010592240188837465, "loss": 12.105, "step": 18243 }, { "epoch": 0.9934587136604657, "grad_norm": 0.5939063125313075, "learning_rate": 0.00010591359908573115, "loss": 12.0165, "step": 18244 }, { "epoch": 0.9935131676570488, "grad_norm": 0.5686234838614362, "learning_rate": 0.00010590479623710252, "loss": 12.0928, "step": 18245 }, { "epoch": 0.9935676216536318, "grad_norm": 0.5747729490831621, "learning_rate": 0.00010589599334255722, "loss": 12.141, "step": 18246 }, { "epoch": 0.9936220756502148, "grad_norm": 0.5933524183797961, "learning_rate": 0.00010588719040216366, "loss": 12.0861, "step": 18247 }, { "epoch": 0.9936765296467978, "grad_norm": 0.5578922147546961, "learning_rate": 0.00010587838741599037, "loss": 12.107, "step": 18248 }, { "epoch": 0.9937309836433807, "grad_norm": 0.5309388163475087, "learning_rate": 0.00010586958438410572, "loss": 12.052, "step": 18249 }, { "epoch": 0.9937854376399637, "grad_norm": 0.5308965616404674, "learning_rate": 0.00010586078130657817, "loss": 12.0533, "step": 18250 }, { "epoch": 0.9938398916365468, "grad_norm": 0.5162989002287177, "learning_rate": 0.00010585197818347624, "loss": 11.9956, "step": 18251 }, { "epoch": 0.9938943456331298, "grad_norm": 0.5915427121879998, "learning_rate": 0.00010584317501486833, "loss": 12.0785, "step": 18252 }, { "epoch": 0.9939487996297128, "grad_norm": 0.5787166950197778, "learning_rate": 0.00010583437180082293, "loss": 12.0896, "step": 18253 }, { "epoch": 0.9940032536262958, "grad_norm": 0.5140441644242902, "learning_rate": 0.00010582556854140846, "loss": 12.0014, "step": 18254 }, { "epoch": 0.9940577076228788, "grad_norm": 0.5961023277506549, "learning_rate": 0.00010581676523669339, "loss": 12.1662, "step": 18255 }, { "epoch": 0.9941121616194618, "grad_norm": 0.5563828940257974, "learning_rate": 0.00010580796188674618, "loss": 12.09, "step": 18256 }, { "epoch": 0.9941666156160449, "grad_norm": 0.527189994076118, "learning_rate": 0.00010579915849163528, "loss": 11.9866, "step": 18257 }, { "epoch": 0.9942210696126279, "grad_norm": 0.6035817247554295, "learning_rate": 0.00010579035505142918, "loss": 12.2226, "step": 18258 }, { "epoch": 0.9942755236092109, "grad_norm": 0.5345851780364875, "learning_rate": 0.00010578155156619629, "loss": 12.1269, "step": 18259 }, { "epoch": 0.9943299776057939, "grad_norm": 0.7405022779870131, "learning_rate": 0.00010577274803600508, "loss": 12.0807, "step": 18260 }, { "epoch": 0.9943844316023769, "grad_norm": 0.5171137351541494, "learning_rate": 0.00010576394446092399, "loss": 12.0452, "step": 18261 }, { "epoch": 0.9944388855989599, "grad_norm": 0.5749189436715378, "learning_rate": 0.00010575514084102151, "loss": 12.1096, "step": 18262 }, { "epoch": 0.994493339595543, "grad_norm": 0.7449553375505649, "learning_rate": 0.00010574633717636615, "loss": 12.1099, "step": 18263 }, { "epoch": 0.994547793592126, "grad_norm": 0.6323078374538844, "learning_rate": 0.00010573753346702626, "loss": 12.2157, "step": 18264 }, { "epoch": 0.994602247588709, "grad_norm": 0.5005928734970015, "learning_rate": 0.00010572872971307035, "loss": 11.8993, "step": 18265 }, { "epoch": 0.994656701585292, "grad_norm": 0.5487925587482815, "learning_rate": 0.00010571992591456687, "loss": 12.1289, "step": 18266 }, { "epoch": 0.994711155581875, "grad_norm": 0.5645526783870632, "learning_rate": 0.00010571112207158429, "loss": 12.1062, "step": 18267 }, { "epoch": 0.994765609578458, "grad_norm": 0.5660600084508958, "learning_rate": 0.00010570231818419106, "loss": 12.0204, "step": 18268 }, { "epoch": 0.994820063575041, "grad_norm": 0.5105849321119167, "learning_rate": 0.00010569351425245569, "loss": 12.1643, "step": 18269 }, { "epoch": 0.994874517571624, "grad_norm": 0.5828619360755012, "learning_rate": 0.00010568471027644656, "loss": 12.1449, "step": 18270 }, { "epoch": 0.994928971568207, "grad_norm": 0.5563200390594023, "learning_rate": 0.00010567590625623219, "loss": 12.0673, "step": 18271 }, { "epoch": 0.99498342556479, "grad_norm": 0.5429151672763892, "learning_rate": 0.00010566710219188098, "loss": 12.1319, "step": 18272 }, { "epoch": 0.995037879561373, "grad_norm": 0.5584770559140745, "learning_rate": 0.00010565829808346146, "loss": 12.1062, "step": 18273 }, { "epoch": 0.9950923335579561, "grad_norm": 0.6520903283099112, "learning_rate": 0.00010564949393104208, "loss": 12.1104, "step": 18274 }, { "epoch": 0.9951467875545391, "grad_norm": 0.5716805206843671, "learning_rate": 0.00010564068973469128, "loss": 12.1117, "step": 18275 }, { "epoch": 0.9952012415511221, "grad_norm": 1.2849329251298771, "learning_rate": 0.00010563188549447754, "loss": 12.2314, "step": 18276 }, { "epoch": 0.9952556955477051, "grad_norm": 0.571927857807694, "learning_rate": 0.00010562308121046929, "loss": 12.098, "step": 18277 }, { "epoch": 0.9953101495442881, "grad_norm": 0.6030794560881759, "learning_rate": 0.000105614276882735, "loss": 11.971, "step": 18278 }, { "epoch": 0.9953646035408711, "grad_norm": 0.5171068645879546, "learning_rate": 0.0001056054725113432, "loss": 12.124, "step": 18279 }, { "epoch": 0.9954190575374542, "grad_norm": 0.5842618670819876, "learning_rate": 0.00010559666809636229, "loss": 12.0035, "step": 18280 }, { "epoch": 0.9954735115340372, "grad_norm": 0.5434906527951233, "learning_rate": 0.00010558786363786075, "loss": 12.1142, "step": 18281 }, { "epoch": 0.9955279655306202, "grad_norm": 0.49536206412147676, "learning_rate": 0.00010557905913590704, "loss": 12.0497, "step": 18282 }, { "epoch": 0.9955824195272032, "grad_norm": 0.585149941217784, "learning_rate": 0.00010557025459056962, "loss": 12.1341, "step": 18283 }, { "epoch": 0.9956368735237862, "grad_norm": 0.631751785017183, "learning_rate": 0.00010556145000191697, "loss": 12.0925, "step": 18284 }, { "epoch": 0.9956913275203692, "grad_norm": 0.5918976363038114, "learning_rate": 0.00010555264537001757, "loss": 12.293, "step": 18285 }, { "epoch": 0.9957457815169523, "grad_norm": 0.6808319682134729, "learning_rate": 0.00010554384069493985, "loss": 12.1553, "step": 18286 }, { "epoch": 0.9958002355135352, "grad_norm": 0.5531333942652283, "learning_rate": 0.00010553503597675231, "loss": 12.0735, "step": 18287 }, { "epoch": 0.9958546895101182, "grad_norm": 0.5628731217928776, "learning_rate": 0.00010552623121552339, "loss": 12.106, "step": 18288 }, { "epoch": 0.9959091435067012, "grad_norm": 0.5609251496824058, "learning_rate": 0.00010551742641132159, "loss": 12.0956, "step": 18289 }, { "epoch": 0.9959635975032842, "grad_norm": 0.568234454533744, "learning_rate": 0.00010550862156421532, "loss": 12.0748, "step": 18290 }, { "epoch": 0.9960180514998672, "grad_norm": 0.580822952517534, "learning_rate": 0.00010549981667427312, "loss": 12.0833, "step": 18291 }, { "epoch": 0.9960725054964503, "grad_norm": 0.5632507217341308, "learning_rate": 0.0001054910117415634, "loss": 12.1432, "step": 18292 }, { "epoch": 0.9961269594930333, "grad_norm": 0.6988838802306254, "learning_rate": 0.00010548220676615466, "loss": 12.3317, "step": 18293 }, { "epoch": 0.9961814134896163, "grad_norm": 0.6437110816396651, "learning_rate": 0.00010547340174811538, "loss": 11.9467, "step": 18294 }, { "epoch": 0.9962358674861993, "grad_norm": 0.5156467182743514, "learning_rate": 0.00010546459668751398, "loss": 11.8644, "step": 18295 }, { "epoch": 0.9962903214827823, "grad_norm": 0.5651016554038113, "learning_rate": 0.00010545579158441896, "loss": 12.0975, "step": 18296 }, { "epoch": 0.9963447754793653, "grad_norm": 0.547738898561608, "learning_rate": 0.0001054469864388988, "loss": 12.0015, "step": 18297 }, { "epoch": 0.9963992294759484, "grad_norm": 0.5631191104540555, "learning_rate": 0.00010543818125102197, "loss": 12.1064, "step": 18298 }, { "epoch": 0.9964536834725314, "grad_norm": 0.5402298000145086, "learning_rate": 0.00010542937602085692, "loss": 12.0611, "step": 18299 }, { "epoch": 0.9965081374691144, "grad_norm": 0.54779694513099, "learning_rate": 0.00010542057074847214, "loss": 12.1614, "step": 18300 }, { "epoch": 0.9965625914656974, "grad_norm": 0.5866882211911401, "learning_rate": 0.00010541176543393607, "loss": 12.1319, "step": 18301 }, { "epoch": 0.9966170454622804, "grad_norm": 0.5580272799539157, "learning_rate": 0.00010540296007731723, "loss": 12.1469, "step": 18302 }, { "epoch": 0.9966714994588635, "grad_norm": 0.604361212709162, "learning_rate": 0.00010539415467868406, "loss": 12.1564, "step": 18303 }, { "epoch": 0.9967259534554465, "grad_norm": 0.569177199760398, "learning_rate": 0.00010538534923810506, "loss": 12.1793, "step": 18304 }, { "epoch": 0.9967804074520294, "grad_norm": 0.5338852867265473, "learning_rate": 0.00010537654375564862, "loss": 12.0523, "step": 18305 }, { "epoch": 0.9968348614486124, "grad_norm": 0.5389476611207479, "learning_rate": 0.00010536773823138333, "loss": 12.1469, "step": 18306 }, { "epoch": 0.9968893154451954, "grad_norm": 0.5520982290746783, "learning_rate": 0.00010535893266537758, "loss": 12.0407, "step": 18307 }, { "epoch": 0.9969437694417784, "grad_norm": 0.5762786107442041, "learning_rate": 0.00010535012705769989, "loss": 11.9832, "step": 18308 }, { "epoch": 0.9969982234383615, "grad_norm": 0.5447738058091682, "learning_rate": 0.00010534132140841873, "loss": 12.2752, "step": 18309 }, { "epoch": 0.9970526774349445, "grad_norm": 0.48809500004019324, "learning_rate": 0.0001053325157176025, "loss": 12.0917, "step": 18310 }, { "epoch": 0.9971071314315275, "grad_norm": 0.5557349364671941, "learning_rate": 0.00010532370998531976, "loss": 12.106, "step": 18311 }, { "epoch": 0.9971615854281105, "grad_norm": 0.5191673464952495, "learning_rate": 0.00010531490421163897, "loss": 12.1023, "step": 18312 }, { "epoch": 0.9972160394246935, "grad_norm": 0.5318529763885843, "learning_rate": 0.00010530609839662857, "loss": 11.9875, "step": 18313 }, { "epoch": 0.9972704934212765, "grad_norm": 0.5989152240444041, "learning_rate": 0.00010529729254035712, "loss": 12.1512, "step": 18314 }, { "epoch": 0.9973249474178596, "grad_norm": 0.5312593750250028, "learning_rate": 0.00010528848664289299, "loss": 12.023, "step": 18315 }, { "epoch": 0.9973794014144426, "grad_norm": 0.6016810621628746, "learning_rate": 0.00010527968070430467, "loss": 12.1681, "step": 18316 }, { "epoch": 0.9974338554110256, "grad_norm": 0.5905204779838882, "learning_rate": 0.0001052708747246607, "loss": 11.9806, "step": 18317 }, { "epoch": 0.9974883094076086, "grad_norm": 0.5281817931669279, "learning_rate": 0.00010526206870402952, "loss": 12.0172, "step": 18318 }, { "epoch": 0.9975427634041916, "grad_norm": 0.5710793478121307, "learning_rate": 0.00010525326264247965, "loss": 12.0452, "step": 18319 }, { "epoch": 0.9975972174007746, "grad_norm": 0.5690343274241164, "learning_rate": 0.00010524445654007949, "loss": 12.1181, "step": 18320 }, { "epoch": 0.9976516713973577, "grad_norm": 0.6045292433790345, "learning_rate": 0.00010523565039689753, "loss": 12.1459, "step": 18321 }, { "epoch": 0.9977061253939407, "grad_norm": 0.5120165329316765, "learning_rate": 0.0001052268442130023, "loss": 12.0556, "step": 18322 }, { "epoch": 0.9977605793905236, "grad_norm": 0.5256668955838202, "learning_rate": 0.00010521803798846228, "loss": 12.1167, "step": 18323 }, { "epoch": 0.9978150333871066, "grad_norm": 0.5949637649102439, "learning_rate": 0.00010520923172334592, "loss": 12.2264, "step": 18324 }, { "epoch": 0.9978694873836896, "grad_norm": 0.49523772762042173, "learning_rate": 0.00010520042541772168, "loss": 12.122, "step": 18325 }, { "epoch": 0.9979239413802726, "grad_norm": 0.5778216801940866, "learning_rate": 0.00010519161907165806, "loss": 12.1547, "step": 18326 }, { "epoch": 0.9979783953768557, "grad_norm": 0.5770705735686229, "learning_rate": 0.00010518281268522352, "loss": 12.0834, "step": 18327 }, { "epoch": 0.9980328493734387, "grad_norm": 0.5596684489682436, "learning_rate": 0.00010517400625848657, "loss": 12.2114, "step": 18328 }, { "epoch": 0.9980873033700217, "grad_norm": 0.5670008931842118, "learning_rate": 0.00010516519979151572, "loss": 12.1024, "step": 18329 }, { "epoch": 0.9981417573666047, "grad_norm": 0.5357328828073912, "learning_rate": 0.00010515639328437938, "loss": 12.0188, "step": 18330 }, { "epoch": 0.9981962113631877, "grad_norm": 0.515364442144104, "learning_rate": 0.00010514758673714607, "loss": 12.1352, "step": 18331 }, { "epoch": 0.9982506653597707, "grad_norm": 0.5324150196422278, "learning_rate": 0.00010513878014988422, "loss": 12.0376, "step": 18332 }, { "epoch": 0.9983051193563538, "grad_norm": 0.7198694397915397, "learning_rate": 0.00010512997352266239, "loss": 12.2379, "step": 18333 }, { "epoch": 0.9983595733529368, "grad_norm": 0.7596864730164228, "learning_rate": 0.00010512116685554904, "loss": 12.2487, "step": 18334 }, { "epoch": 0.9984140273495198, "grad_norm": 0.5273094451393162, "learning_rate": 0.00010511236014861261, "loss": 12.0907, "step": 18335 }, { "epoch": 0.9984684813461028, "grad_norm": 0.6108561774057857, "learning_rate": 0.00010510355340192162, "loss": 12.0449, "step": 18336 }, { "epoch": 0.9985229353426858, "grad_norm": 0.5945808666942504, "learning_rate": 0.00010509474661554454, "loss": 12.0426, "step": 18337 }, { "epoch": 0.9985773893392689, "grad_norm": 0.6075217557210957, "learning_rate": 0.00010508593978954984, "loss": 12.0566, "step": 18338 }, { "epoch": 0.9986318433358519, "grad_norm": 0.5271617568745623, "learning_rate": 0.00010507713292400607, "loss": 12.1659, "step": 18339 }, { "epoch": 0.9986862973324349, "grad_norm": 0.6434501091325255, "learning_rate": 0.00010506832601898162, "loss": 12.1409, "step": 18340 }, { "epoch": 0.9987407513290179, "grad_norm": 0.601408971484078, "learning_rate": 0.00010505951907454504, "loss": 12.2454, "step": 18341 }, { "epoch": 0.9987952053256008, "grad_norm": 0.5887687221593112, "learning_rate": 0.00010505071209076478, "loss": 12.1978, "step": 18342 }, { "epoch": 0.9988496593221838, "grad_norm": 0.5492925724499284, "learning_rate": 0.00010504190506770932, "loss": 12.0751, "step": 18343 }, { "epoch": 0.9989041133187669, "grad_norm": 0.5594962061512018, "learning_rate": 0.00010503309800544718, "loss": 12.0993, "step": 18344 }, { "epoch": 0.9989585673153499, "grad_norm": 0.537376461163246, "learning_rate": 0.00010502429090404681, "loss": 12.1009, "step": 18345 }, { "epoch": 0.9990130213119329, "grad_norm": 0.5483466664034863, "learning_rate": 0.00010501548376357673, "loss": 12.0368, "step": 18346 }, { "epoch": 0.9990674753085159, "grad_norm": 0.5815370462057136, "learning_rate": 0.00010500667658410541, "loss": 12.2623, "step": 18347 }, { "epoch": 0.9991219293050989, "grad_norm": 0.5212947122431155, "learning_rate": 0.00010499786936570133, "loss": 12.0358, "step": 18348 }, { "epoch": 0.9991763833016819, "grad_norm": 0.543887849801361, "learning_rate": 0.00010498906210843296, "loss": 12.0294, "step": 18349 }, { "epoch": 0.999230837298265, "grad_norm": 0.5967475053340418, "learning_rate": 0.00010498025481236881, "loss": 12.0394, "step": 18350 }, { "epoch": 0.999285291294848, "grad_norm": 0.6079438630113693, "learning_rate": 0.0001049714474775774, "loss": 12.112, "step": 18351 }, { "epoch": 0.999339745291431, "grad_norm": 0.5832927223163846, "learning_rate": 0.00010496264010412714, "loss": 12.1601, "step": 18352 }, { "epoch": 0.999394199288014, "grad_norm": 0.5199728927111259, "learning_rate": 0.00010495383269208656, "loss": 12.1181, "step": 18353 }, { "epoch": 0.999448653284597, "grad_norm": 0.5941050390356781, "learning_rate": 0.00010494502524152417, "loss": 12.2261, "step": 18354 }, { "epoch": 0.99950310728118, "grad_norm": 0.6533739538334977, "learning_rate": 0.00010493621775250842, "loss": 12.1423, "step": 18355 }, { "epoch": 0.9995575612777631, "grad_norm": 0.6397750626797618, "learning_rate": 0.00010492741022510781, "loss": 11.9695, "step": 18356 }, { "epoch": 0.9996120152743461, "grad_norm": 0.615535482329696, "learning_rate": 0.00010491860265939084, "loss": 11.9663, "step": 18357 }, { "epoch": 0.9996664692709291, "grad_norm": 0.5402599506044298, "learning_rate": 0.000104909795055426, "loss": 11.8675, "step": 18358 }, { "epoch": 0.999720923267512, "grad_norm": 0.5627393779909873, "learning_rate": 0.00010490098741328174, "loss": 12.0418, "step": 18359 }, { "epoch": 0.999775377264095, "grad_norm": 0.5455322046325075, "learning_rate": 0.00010489217973302661, "loss": 12.0932, "step": 18360 }, { "epoch": 0.999829831260678, "grad_norm": 0.5342616361648647, "learning_rate": 0.00010488337201472905, "loss": 11.9299, "step": 18361 }, { "epoch": 0.9998842852572611, "grad_norm": 0.5799942188983298, "learning_rate": 0.00010487456425845758, "loss": 11.969, "step": 18362 }, { "epoch": 0.9999387392538441, "grad_norm": 0.583225236752225, "learning_rate": 0.00010486575646428067, "loss": 12.1362, "step": 18363 }, { "epoch": 0.9999931932504271, "grad_norm": 0.5327575410878634, "learning_rate": 0.00010485694863226687, "loss": 12.0343, "step": 18364 }, { "epoch": 1.0000476472470101, "grad_norm": 0.6505595880771092, "learning_rate": 0.00010484814076248455, "loss": 12.085, "step": 18365 }, { "epoch": 1.0001021012435931, "grad_norm": 0.5183267779782971, "learning_rate": 0.00010483933285500231, "loss": 11.9666, "step": 18366 }, { "epoch": 1.000156555240176, "grad_norm": 0.6106950132925038, "learning_rate": 0.00010483052490988861, "loss": 11.988, "step": 18367 }, { "epoch": 1.000211009236759, "grad_norm": 0.5487647852412104, "learning_rate": 0.00010482171692721192, "loss": 12.043, "step": 18368 }, { "epoch": 1.000265463233342, "grad_norm": 0.547568729580315, "learning_rate": 0.00010481290890704079, "loss": 12.086, "step": 18369 }, { "epoch": 1.0003199172299253, "grad_norm": 0.5356534198690128, "learning_rate": 0.00010480410084944363, "loss": 12.1115, "step": 18370 }, { "epoch": 1.0003743712265083, "grad_norm": 0.56201989986331, "learning_rate": 0.00010479529275448899, "loss": 11.7968, "step": 18371 }, { "epoch": 1.0004288252230913, "grad_norm": 0.5862078200930146, "learning_rate": 0.00010478648462224536, "loss": 12.098, "step": 18372 }, { "epoch": 1.0004832792196743, "grad_norm": 0.5426265159006913, "learning_rate": 0.0001047776764527812, "loss": 12.15, "step": 18373 }, { "epoch": 1.0005377332162573, "grad_norm": 0.6361834878239455, "learning_rate": 0.00010476886824616508, "loss": 12.1512, "step": 18374 }, { "epoch": 1.0005921872128403, "grad_norm": 0.643370746183964, "learning_rate": 0.00010476006000246539, "loss": 12.2027, "step": 18375 }, { "epoch": 1.0006466412094233, "grad_norm": 0.5831748434881041, "learning_rate": 0.00010475125172175066, "loss": 12.0607, "step": 18376 }, { "epoch": 1.0007010952060063, "grad_norm": 0.5546440422407036, "learning_rate": 0.00010474244340408943, "loss": 12.0811, "step": 18377 }, { "epoch": 1.0007555492025892, "grad_norm": 0.5409337616191884, "learning_rate": 0.00010473363504955017, "loss": 11.9853, "step": 18378 }, { "epoch": 1.0008100031991722, "grad_norm": 0.5619493544416494, "learning_rate": 0.00010472482665820139, "loss": 12.109, "step": 18379 }, { "epoch": 1.0008644571957552, "grad_norm": 0.4948552519989432, "learning_rate": 0.00010471601823011152, "loss": 12.121, "step": 18380 }, { "epoch": 1.0009189111923382, "grad_norm": 0.6170624624020086, "learning_rate": 0.00010470720976534913, "loss": 12.2022, "step": 18381 }, { "epoch": 1.0009733651889214, "grad_norm": 0.5417170613625275, "learning_rate": 0.00010469840126398265, "loss": 12.1306, "step": 18382 }, { "epoch": 1.0010278191855044, "grad_norm": 0.4905086632246415, "learning_rate": 0.00010468959272608063, "loss": 12.0468, "step": 18383 }, { "epoch": 1.0010822731820874, "grad_norm": 0.6156457670311729, "learning_rate": 0.00010468078415171159, "loss": 12.1791, "step": 18384 }, { "epoch": 1.0011367271786704, "grad_norm": 0.5629794884599503, "learning_rate": 0.00010467197554094397, "loss": 12.2719, "step": 18385 }, { "epoch": 1.0011911811752534, "grad_norm": 0.5730873010065706, "learning_rate": 0.00010466316689384625, "loss": 12.0505, "step": 18386 }, { "epoch": 1.0012456351718364, "grad_norm": 0.5959155320820246, "learning_rate": 0.00010465435821048697, "loss": 12.1176, "step": 18387 }, { "epoch": 1.0013000891684194, "grad_norm": 0.5252390152725271, "learning_rate": 0.00010464554949093463, "loss": 12.0478, "step": 18388 }, { "epoch": 1.0013545431650024, "grad_norm": 0.5594494652330558, "learning_rate": 0.00010463674073525775, "loss": 12.0372, "step": 18389 }, { "epoch": 1.0014089971615854, "grad_norm": 0.5531813407378738, "learning_rate": 0.00010462793194352478, "loss": 12.0106, "step": 18390 }, { "epoch": 1.0014634511581684, "grad_norm": 0.6363738001080059, "learning_rate": 0.00010461912311580422, "loss": 12.1426, "step": 18391 }, { "epoch": 1.0015179051547514, "grad_norm": 0.5505332682820571, "learning_rate": 0.00010461031425216459, "loss": 12.1038, "step": 18392 }, { "epoch": 1.0015723591513344, "grad_norm": 0.5347841437757687, "learning_rate": 0.00010460150535267436, "loss": 11.9116, "step": 18393 }, { "epoch": 1.0016268131479176, "grad_norm": 0.6161254610542731, "learning_rate": 0.0001045926964174021, "loss": 12.1878, "step": 18394 }, { "epoch": 1.0016812671445006, "grad_norm": 0.5473230783030792, "learning_rate": 0.00010458388744641622, "loss": 11.9945, "step": 18395 }, { "epoch": 1.0017357211410836, "grad_norm": 0.5361957484183014, "learning_rate": 0.00010457507843978529, "loss": 12.0654, "step": 18396 }, { "epoch": 1.0017901751376665, "grad_norm": 0.564843165650648, "learning_rate": 0.00010456626939757779, "loss": 12.0611, "step": 18397 }, { "epoch": 1.0018446291342495, "grad_norm": 0.49246724425305344, "learning_rate": 0.00010455746031986215, "loss": 12.0085, "step": 18398 }, { "epoch": 1.0018990831308325, "grad_norm": 0.517952796975952, "learning_rate": 0.00010454865120670701, "loss": 11.9902, "step": 18399 }, { "epoch": 1.0019535371274155, "grad_norm": 0.5715093319395536, "learning_rate": 0.00010453984205818078, "loss": 12.0314, "step": 18400 }, { "epoch": 1.0020079911239985, "grad_norm": 0.6381694403156342, "learning_rate": 0.00010453103287435196, "loss": 12.1184, "step": 18401 }, { "epoch": 1.0020624451205815, "grad_norm": 0.5189112439951066, "learning_rate": 0.00010452222365528906, "loss": 12.1006, "step": 18402 }, { "epoch": 1.0021168991171645, "grad_norm": 0.5427153553870643, "learning_rate": 0.00010451341440106059, "loss": 12.1646, "step": 18403 }, { "epoch": 1.0021713531137475, "grad_norm": 0.5508173660075053, "learning_rate": 0.00010450460511173508, "loss": 11.9587, "step": 18404 }, { "epoch": 1.0022258071103307, "grad_norm": 0.5125258189425049, "learning_rate": 0.00010449579578738097, "loss": 12.0517, "step": 18405 }, { "epoch": 1.0022802611069137, "grad_norm": 0.5640314476131568, "learning_rate": 0.00010448698642806682, "loss": 11.9638, "step": 18406 }, { "epoch": 1.0023347151034967, "grad_norm": 0.5247936949494446, "learning_rate": 0.0001044781770338611, "loss": 12.1338, "step": 18407 }, { "epoch": 1.0023891691000797, "grad_norm": 0.5699248662268073, "learning_rate": 0.00010446936760483235, "loss": 12.0554, "step": 18408 }, { "epoch": 1.0024436230966627, "grad_norm": 0.5832846353753555, "learning_rate": 0.00010446055814104903, "loss": 12.1558, "step": 18409 }, { "epoch": 1.0024980770932457, "grad_norm": 0.5888257898048711, "learning_rate": 0.00010445174864257967, "loss": 11.9725, "step": 18410 }, { "epoch": 1.0025525310898287, "grad_norm": 0.5342655938104394, "learning_rate": 0.00010444293910949277, "loss": 12.1343, "step": 18411 }, { "epoch": 1.0026069850864117, "grad_norm": 0.5222292673682406, "learning_rate": 0.00010443412954185681, "loss": 12.01, "step": 18412 }, { "epoch": 1.0026614390829947, "grad_norm": 0.5605641546372595, "learning_rate": 0.00010442531993974031, "loss": 12.1621, "step": 18413 }, { "epoch": 1.0027158930795776, "grad_norm": 0.5506957085219449, "learning_rate": 0.0001044165103032118, "loss": 12.017, "step": 18414 }, { "epoch": 1.0027703470761606, "grad_norm": 0.5362068520464105, "learning_rate": 0.00010440770063233976, "loss": 12.0291, "step": 18415 }, { "epoch": 1.0028248010727436, "grad_norm": 0.6547642284338772, "learning_rate": 0.00010439889092719271, "loss": 12.3063, "step": 18416 }, { "epoch": 1.0028792550693268, "grad_norm": 0.5953052514900333, "learning_rate": 0.00010439008118783913, "loss": 12.0355, "step": 18417 }, { "epoch": 1.0029337090659098, "grad_norm": 0.5622270585981601, "learning_rate": 0.00010438127141434754, "loss": 12.1138, "step": 18418 }, { "epoch": 1.0029881630624928, "grad_norm": 0.6166438221046772, "learning_rate": 0.00010437246160678647, "loss": 12.173, "step": 18419 }, { "epoch": 1.0030426170590758, "grad_norm": 0.6443790331998598, "learning_rate": 0.00010436365176522438, "loss": 12.1626, "step": 18420 }, { "epoch": 1.0030970710556588, "grad_norm": 0.5064232929602658, "learning_rate": 0.00010435484188972982, "loss": 12.036, "step": 18421 }, { "epoch": 1.0031515250522418, "grad_norm": 0.6420988468737352, "learning_rate": 0.00010434603198037127, "loss": 12.2683, "step": 18422 }, { "epoch": 1.0032059790488248, "grad_norm": 0.5796531182415882, "learning_rate": 0.00010433722203721725, "loss": 12.0797, "step": 18423 }, { "epoch": 1.0032604330454078, "grad_norm": 0.7296537883602282, "learning_rate": 0.00010432841206033627, "loss": 12.3333, "step": 18424 }, { "epoch": 1.0033148870419908, "grad_norm": 0.5213973256119997, "learning_rate": 0.00010431960204979683, "loss": 12.0892, "step": 18425 }, { "epoch": 1.0033693410385738, "grad_norm": 0.5898352075567528, "learning_rate": 0.00010431079200566745, "loss": 11.9569, "step": 18426 }, { "epoch": 1.0034237950351568, "grad_norm": 0.6532708033340328, "learning_rate": 0.00010430198192801662, "loss": 12.0374, "step": 18427 }, { "epoch": 1.0034782490317398, "grad_norm": 0.5499115261605702, "learning_rate": 0.00010429317181691283, "loss": 11.9641, "step": 18428 }, { "epoch": 1.003532703028323, "grad_norm": 0.6449371239041375, "learning_rate": 0.00010428436167242464, "loss": 12.0602, "step": 18429 }, { "epoch": 1.003587157024906, "grad_norm": 0.7144262127444471, "learning_rate": 0.00010427555149462053, "loss": 12.108, "step": 18430 }, { "epoch": 1.003641611021489, "grad_norm": 0.577597272843995, "learning_rate": 0.00010426674128356904, "loss": 12.0951, "step": 18431 }, { "epoch": 1.003696065018072, "grad_norm": 0.5852692927340701, "learning_rate": 0.0001042579310393386, "loss": 12.0938, "step": 18432 }, { "epoch": 1.003750519014655, "grad_norm": 0.7541386181334564, "learning_rate": 0.00010424912076199782, "loss": 12.1857, "step": 18433 }, { "epoch": 1.003804973011238, "grad_norm": 0.5903454039399789, "learning_rate": 0.00010424031045161516, "loss": 12.0776, "step": 18434 }, { "epoch": 1.003859427007821, "grad_norm": 0.5015008043754426, "learning_rate": 0.00010423150010825915, "loss": 12.0856, "step": 18435 }, { "epoch": 1.003913881004404, "grad_norm": 0.6864607542844916, "learning_rate": 0.00010422268973199822, "loss": 12.1423, "step": 18436 }, { "epoch": 1.003968335000987, "grad_norm": 0.576314090636686, "learning_rate": 0.000104213879322901, "loss": 12.1007, "step": 18437 }, { "epoch": 1.00402278899757, "grad_norm": 0.5434246964817178, "learning_rate": 0.00010420506888103593, "loss": 11.9121, "step": 18438 }, { "epoch": 1.004077242994153, "grad_norm": 0.6407559040439097, "learning_rate": 0.00010419625840647156, "loss": 12.1781, "step": 18439 }, { "epoch": 1.0041316969907361, "grad_norm": 0.6164183897597848, "learning_rate": 0.00010418744789927637, "loss": 12.1224, "step": 18440 }, { "epoch": 1.0041861509873191, "grad_norm": 0.57202267820432, "learning_rate": 0.0001041786373595189, "loss": 12.081, "step": 18441 }, { "epoch": 1.004240604983902, "grad_norm": 0.5641499945102079, "learning_rate": 0.0001041698267872676, "loss": 12.1598, "step": 18442 }, { "epoch": 1.004295058980485, "grad_norm": 0.6088130538427488, "learning_rate": 0.00010416101618259104, "loss": 12.1856, "step": 18443 }, { "epoch": 1.004349512977068, "grad_norm": 0.8869373652399809, "learning_rate": 0.00010415220554555774, "loss": 12.061, "step": 18444 }, { "epoch": 1.004403966973651, "grad_norm": 0.5206873460568164, "learning_rate": 0.00010414339487623618, "loss": 12.047, "step": 18445 }, { "epoch": 1.004458420970234, "grad_norm": 0.525736502612232, "learning_rate": 0.00010413458417469491, "loss": 12.0695, "step": 18446 }, { "epoch": 1.004512874966817, "grad_norm": 0.5385043026808839, "learning_rate": 0.00010412577344100239, "loss": 12.1558, "step": 18447 }, { "epoch": 1.0045673289634, "grad_norm": 0.5321986529369179, "learning_rate": 0.00010411696267522718, "loss": 12.0883, "step": 18448 }, { "epoch": 1.004621782959983, "grad_norm": 0.5432885165476395, "learning_rate": 0.00010410815187743776, "loss": 12.0963, "step": 18449 }, { "epoch": 1.004676236956566, "grad_norm": 0.5153007684808616, "learning_rate": 0.00010409934104770269, "loss": 12.1426, "step": 18450 }, { "epoch": 1.004730690953149, "grad_norm": 0.7251781770255962, "learning_rate": 0.00010409053018609045, "loss": 12.091, "step": 18451 }, { "epoch": 1.0047851449497323, "grad_norm": 0.5546233647379865, "learning_rate": 0.00010408171929266954, "loss": 12.0329, "step": 18452 }, { "epoch": 1.0048395989463152, "grad_norm": 0.5691353512399754, "learning_rate": 0.00010407290836750849, "loss": 12.0922, "step": 18453 }, { "epoch": 1.0048940529428982, "grad_norm": 0.5097120315588252, "learning_rate": 0.00010406409741067584, "loss": 12.0595, "step": 18454 }, { "epoch": 1.0049485069394812, "grad_norm": 0.5588728055036118, "learning_rate": 0.00010405528642224011, "loss": 12.2999, "step": 18455 }, { "epoch": 1.0050029609360642, "grad_norm": 0.5455730220948464, "learning_rate": 0.00010404647540226977, "loss": 12.0505, "step": 18456 }, { "epoch": 1.0050574149326472, "grad_norm": 0.4835546737187294, "learning_rate": 0.00010403766435083337, "loss": 12.0044, "step": 18457 }, { "epoch": 1.0051118689292302, "grad_norm": 0.5266537781447859, "learning_rate": 0.0001040288532679994, "loss": 12.0936, "step": 18458 }, { "epoch": 1.0051663229258132, "grad_norm": 0.7211166245977295, "learning_rate": 0.00010402004215383638, "loss": 12.2525, "step": 18459 }, { "epoch": 1.0052207769223962, "grad_norm": 0.5973374180082196, "learning_rate": 0.00010401123100841288, "loss": 12.1424, "step": 18460 }, { "epoch": 1.0052752309189792, "grad_norm": 0.5170532918173177, "learning_rate": 0.00010400241983179735, "loss": 12.0298, "step": 18461 }, { "epoch": 1.0053296849155622, "grad_norm": 0.5375349396750642, "learning_rate": 0.00010399360862405832, "loss": 12.1485, "step": 18462 }, { "epoch": 1.0053841389121452, "grad_norm": 0.520035242682927, "learning_rate": 0.00010398479738526434, "loss": 12.1356, "step": 18463 }, { "epoch": 1.0054385929087284, "grad_norm": 0.5432332946684317, "learning_rate": 0.00010397598611548387, "loss": 11.8634, "step": 18464 }, { "epoch": 1.0054930469053114, "grad_norm": 0.5893141249318277, "learning_rate": 0.00010396717481478551, "loss": 12.1584, "step": 18465 }, { "epoch": 1.0055475009018944, "grad_norm": 0.5408766358330444, "learning_rate": 0.00010395836348323771, "loss": 12.0764, "step": 18466 }, { "epoch": 1.0056019548984774, "grad_norm": 0.5598581105351644, "learning_rate": 0.00010394955212090903, "loss": 11.7836, "step": 18467 }, { "epoch": 1.0056564088950604, "grad_norm": 0.538544943045943, "learning_rate": 0.00010394074072786794, "loss": 11.8081, "step": 18468 }, { "epoch": 1.0057108628916434, "grad_norm": 0.5679894467120316, "learning_rate": 0.00010393192930418302, "loss": 11.9564, "step": 18469 }, { "epoch": 1.0057653168882263, "grad_norm": 0.6323405377769352, "learning_rate": 0.00010392311784992275, "loss": 12.0496, "step": 18470 }, { "epoch": 1.0058197708848093, "grad_norm": 0.5766824622562673, "learning_rate": 0.00010391430636515565, "loss": 12.0748, "step": 18471 }, { "epoch": 1.0058742248813923, "grad_norm": 0.5564550229807947, "learning_rate": 0.00010390549484995024, "loss": 11.993, "step": 18472 }, { "epoch": 1.0059286788779753, "grad_norm": 0.6228642070002696, "learning_rate": 0.00010389668330437507, "loss": 11.996, "step": 18473 }, { "epoch": 1.0059831328745583, "grad_norm": 0.5529858077664811, "learning_rate": 0.00010388787172849863, "loss": 12.076, "step": 18474 }, { "epoch": 1.0060375868711415, "grad_norm": 0.5923726170221378, "learning_rate": 0.00010387906012238943, "loss": 12.1028, "step": 18475 }, { "epoch": 1.0060920408677245, "grad_norm": 0.5525567253092625, "learning_rate": 0.00010387024848611604, "loss": 11.9582, "step": 18476 }, { "epoch": 1.0061464948643075, "grad_norm": 0.5672762174972753, "learning_rate": 0.00010386143681974692, "loss": 11.9318, "step": 18477 }, { "epoch": 1.0062009488608905, "grad_norm": 0.5536384723037703, "learning_rate": 0.00010385262512335063, "loss": 12.0877, "step": 18478 }, { "epoch": 1.0062554028574735, "grad_norm": 0.5384526911923556, "learning_rate": 0.00010384381339699567, "loss": 12.0977, "step": 18479 }, { "epoch": 1.0063098568540565, "grad_norm": 0.5593960088882626, "learning_rate": 0.00010383500164075059, "loss": 12.0818, "step": 18480 }, { "epoch": 1.0063643108506395, "grad_norm": 0.6027909589961592, "learning_rate": 0.00010382618985468389, "loss": 12.0996, "step": 18481 }, { "epoch": 1.0064187648472225, "grad_norm": 0.6295374486803064, "learning_rate": 0.00010381737803886409, "loss": 12.3242, "step": 18482 }, { "epoch": 1.0064732188438055, "grad_norm": 0.5972378296093238, "learning_rate": 0.00010380856619335973, "loss": 12.1621, "step": 18483 }, { "epoch": 1.0065276728403885, "grad_norm": 0.6392123771019983, "learning_rate": 0.0001037997543182393, "loss": 12.1677, "step": 18484 }, { "epoch": 1.0065821268369715, "grad_norm": 0.5159467258469377, "learning_rate": 0.00010379094241357134, "loss": 12.0815, "step": 18485 }, { "epoch": 1.0066365808335545, "grad_norm": 0.5290614198308999, "learning_rate": 0.0001037821304794244, "loss": 12.0834, "step": 18486 }, { "epoch": 1.0066910348301377, "grad_norm": 0.5618735098496637, "learning_rate": 0.00010377331851586699, "loss": 12.0976, "step": 18487 }, { "epoch": 1.0067454888267207, "grad_norm": 0.5436998168386749, "learning_rate": 0.00010376450652296759, "loss": 12.0281, "step": 18488 }, { "epoch": 1.0067999428233037, "grad_norm": 0.5783917074189285, "learning_rate": 0.00010375569450079476, "loss": 12.0415, "step": 18489 }, { "epoch": 1.0068543968198866, "grad_norm": 0.5613412995931357, "learning_rate": 0.00010374688244941707, "loss": 12.0988, "step": 18490 }, { "epoch": 1.0069088508164696, "grad_norm": 0.5748374790527079, "learning_rate": 0.00010373807036890291, "loss": 11.9973, "step": 18491 }, { "epoch": 1.0069633048130526, "grad_norm": 0.512362412059344, "learning_rate": 0.00010372925825932093, "loss": 11.956, "step": 18492 }, { "epoch": 1.0070177588096356, "grad_norm": 0.6758198926200745, "learning_rate": 0.00010372044612073961, "loss": 12.0425, "step": 18493 }, { "epoch": 1.0070722128062186, "grad_norm": 0.5579764231238112, "learning_rate": 0.00010371163395322749, "loss": 12.06, "step": 18494 }, { "epoch": 1.0071266668028016, "grad_norm": 0.5418374459029155, "learning_rate": 0.00010370282175685308, "loss": 12.021, "step": 18495 }, { "epoch": 1.0071811207993846, "grad_norm": 0.6048706028105221, "learning_rate": 0.00010369400953168489, "loss": 12.1112, "step": 18496 }, { "epoch": 1.0072355747959676, "grad_norm": 0.5569148287664402, "learning_rate": 0.00010368519727779147, "loss": 12.1173, "step": 18497 }, { "epoch": 1.0072900287925506, "grad_norm": 0.527313019239876, "learning_rate": 0.00010367638499524132, "loss": 12.1017, "step": 18498 }, { "epoch": 1.0073444827891338, "grad_norm": 0.5977487624522466, "learning_rate": 0.00010366757268410302, "loss": 12.112, "step": 18499 }, { "epoch": 1.0073989367857168, "grad_norm": 0.5721359655878242, "learning_rate": 0.00010365876034444506, "loss": 12.0808, "step": 18500 }, { "epoch": 1.0074533907822998, "grad_norm": 0.5107380598881754, "learning_rate": 0.00010364994797633594, "loss": 11.9403, "step": 18501 }, { "epoch": 1.0075078447788828, "grad_norm": 0.6051001091539683, "learning_rate": 0.0001036411355798442, "loss": 12.0673, "step": 18502 }, { "epoch": 1.0075622987754658, "grad_norm": 0.6471543229360646, "learning_rate": 0.00010363232315503841, "loss": 12.1068, "step": 18503 }, { "epoch": 1.0076167527720488, "grad_norm": 0.564946503043798, "learning_rate": 0.00010362351070198705, "loss": 11.9981, "step": 18504 }, { "epoch": 1.0076712067686318, "grad_norm": 0.5337812757312993, "learning_rate": 0.00010361469822075869, "loss": 12.1306, "step": 18505 }, { "epoch": 1.0077256607652147, "grad_norm": 0.6579308245165749, "learning_rate": 0.0001036058857114218, "loss": 12.0056, "step": 18506 }, { "epoch": 1.0077801147617977, "grad_norm": 0.5874164251985708, "learning_rate": 0.00010359707317404494, "loss": 12.1955, "step": 18507 }, { "epoch": 1.0078345687583807, "grad_norm": 0.5816155901461761, "learning_rate": 0.00010358826060869664, "loss": 12.006, "step": 18508 }, { "epoch": 1.0078890227549637, "grad_norm": 0.6768919139281958, "learning_rate": 0.00010357944801544541, "loss": 11.9988, "step": 18509 }, { "epoch": 1.007943476751547, "grad_norm": 0.6709855862271281, "learning_rate": 0.00010357063539435985, "loss": 12.206, "step": 18510 }, { "epoch": 1.00799793074813, "grad_norm": 0.5917606577492475, "learning_rate": 0.00010356182274550838, "loss": 12.0946, "step": 18511 }, { "epoch": 1.008052384744713, "grad_norm": 0.5531900919101767, "learning_rate": 0.00010355301006895958, "loss": 12.1235, "step": 18512 }, { "epoch": 1.008106838741296, "grad_norm": 0.5723874950368045, "learning_rate": 0.00010354419736478198, "loss": 12.0916, "step": 18513 }, { "epoch": 1.008161292737879, "grad_norm": 0.6002824915363437, "learning_rate": 0.00010353538463304411, "loss": 12.1344, "step": 18514 }, { "epoch": 1.008215746734462, "grad_norm": 0.5513697032116269, "learning_rate": 0.00010352657187381451, "loss": 12.0326, "step": 18515 }, { "epoch": 1.008270200731045, "grad_norm": 0.5574745845879414, "learning_rate": 0.0001035177590871617, "loss": 12.0332, "step": 18516 }, { "epoch": 1.008324654727628, "grad_norm": 0.5538227245458262, "learning_rate": 0.00010350894627315417, "loss": 12.177, "step": 18517 }, { "epoch": 1.0083791087242109, "grad_norm": 0.6256207524064054, "learning_rate": 0.00010350013343186051, "loss": 12.1336, "step": 18518 }, { "epoch": 1.0084335627207939, "grad_norm": 0.544668977596636, "learning_rate": 0.0001034913205633492, "loss": 12.1373, "step": 18519 }, { "epoch": 1.0084880167173769, "grad_norm": 0.5971507286826666, "learning_rate": 0.00010348250766768885, "loss": 12.0926, "step": 18520 }, { "epoch": 1.0085424707139599, "grad_norm": 0.5554077923661265, "learning_rate": 0.00010347369474494791, "loss": 12.0171, "step": 18521 }, { "epoch": 1.008596924710543, "grad_norm": 0.6661562125947798, "learning_rate": 0.00010346488179519491, "loss": 12.0987, "step": 18522 }, { "epoch": 1.008651378707126, "grad_norm": 0.5218887040607149, "learning_rate": 0.00010345606881849843, "loss": 12.1033, "step": 18523 }, { "epoch": 1.008705832703709, "grad_norm": 0.5641752186109523, "learning_rate": 0.00010344725581492695, "loss": 12.2255, "step": 18524 }, { "epoch": 1.008760286700292, "grad_norm": 0.5770632410257892, "learning_rate": 0.00010343844278454908, "loss": 12.1212, "step": 18525 }, { "epoch": 1.008814740696875, "grad_norm": 0.5254078477638074, "learning_rate": 0.00010342962972743329, "loss": 11.8766, "step": 18526 }, { "epoch": 1.008869194693458, "grad_norm": 0.5831145330438583, "learning_rate": 0.00010342081664364811, "loss": 11.9643, "step": 18527 }, { "epoch": 1.008923648690041, "grad_norm": 0.556557972163656, "learning_rate": 0.00010341200353326211, "loss": 12.1719, "step": 18528 }, { "epoch": 1.008978102686624, "grad_norm": 0.5579480008859713, "learning_rate": 0.00010340319039634376, "loss": 12.1567, "step": 18529 }, { "epoch": 1.009032556683207, "grad_norm": 0.5204330978413887, "learning_rate": 0.00010339437723296166, "loss": 12.0736, "step": 18530 }, { "epoch": 1.00908701067979, "grad_norm": 0.5512177356789631, "learning_rate": 0.0001033855640431843, "loss": 12.0599, "step": 18531 }, { "epoch": 1.009141464676373, "grad_norm": 0.5358028471647213, "learning_rate": 0.00010337675082708023, "loss": 11.9859, "step": 18532 }, { "epoch": 1.009195918672956, "grad_norm": 0.5478092408227072, "learning_rate": 0.00010336793758471797, "loss": 11.9727, "step": 18533 }, { "epoch": 1.0092503726695392, "grad_norm": 0.5514440248075692, "learning_rate": 0.00010335912431616608, "loss": 12.0721, "step": 18534 }, { "epoch": 1.0093048266661222, "grad_norm": 0.659187372167803, "learning_rate": 0.00010335031102149306, "loss": 12.1512, "step": 18535 }, { "epoch": 1.0093592806627052, "grad_norm": 0.539932060291416, "learning_rate": 0.00010334149770076747, "loss": 12.1408, "step": 18536 }, { "epoch": 1.0094137346592882, "grad_norm": 0.6274236836397442, "learning_rate": 0.00010333268435405783, "loss": 12.2776, "step": 18537 }, { "epoch": 1.0094681886558712, "grad_norm": 0.6185688062471368, "learning_rate": 0.00010332387098143267, "loss": 12.1801, "step": 18538 }, { "epoch": 1.0095226426524542, "grad_norm": 0.5242210851813076, "learning_rate": 0.00010331505758296054, "loss": 12.0495, "step": 18539 }, { "epoch": 1.0095770966490372, "grad_norm": 0.5033737270364694, "learning_rate": 0.00010330624415870998, "loss": 11.9722, "step": 18540 }, { "epoch": 1.0096315506456202, "grad_norm": 0.5950935511479065, "learning_rate": 0.00010329743070874949, "loss": 12.1449, "step": 18541 }, { "epoch": 1.0096860046422031, "grad_norm": 0.5280528480410401, "learning_rate": 0.00010328861723314763, "loss": 11.964, "step": 18542 }, { "epoch": 1.0097404586387861, "grad_norm": 0.5309463025005629, "learning_rate": 0.00010327980373197294, "loss": 12.0875, "step": 18543 }, { "epoch": 1.0097949126353691, "grad_norm": 0.5320408860846423, "learning_rate": 0.00010327099020529393, "loss": 12.0683, "step": 18544 }, { "epoch": 1.0098493666319523, "grad_norm": 0.6038898483784096, "learning_rate": 0.00010326217665317916, "loss": 11.9718, "step": 18545 }, { "epoch": 1.0099038206285353, "grad_norm": 0.6014477134507331, "learning_rate": 0.00010325336307569717, "loss": 12.13, "step": 18546 }, { "epoch": 1.0099582746251183, "grad_norm": 0.5495599267243306, "learning_rate": 0.00010324454947291647, "loss": 12.045, "step": 18547 }, { "epoch": 1.0100127286217013, "grad_norm": 0.5280405649469392, "learning_rate": 0.00010323573584490561, "loss": 12.0402, "step": 18548 }, { "epoch": 1.0100671826182843, "grad_norm": 0.5590605689991598, "learning_rate": 0.00010322692219173314, "loss": 12.1249, "step": 18549 }, { "epoch": 1.0101216366148673, "grad_norm": 0.6570155279799302, "learning_rate": 0.00010321810851346758, "loss": 12.0431, "step": 18550 }, { "epoch": 1.0101760906114503, "grad_norm": 0.5111924942415713, "learning_rate": 0.00010320929481017743, "loss": 12.0129, "step": 18551 }, { "epoch": 1.0102305446080333, "grad_norm": 0.5239726843256711, "learning_rate": 0.0001032004810819313, "loss": 12.0665, "step": 18552 }, { "epoch": 1.0102849986046163, "grad_norm": 0.5345809725825313, "learning_rate": 0.00010319166732879768, "loss": 12.0353, "step": 18553 }, { "epoch": 1.0103394526011993, "grad_norm": 0.630011898885438, "learning_rate": 0.00010318285355084512, "loss": 12.1699, "step": 18554 }, { "epoch": 1.0103939065977823, "grad_norm": 0.5191116007882293, "learning_rate": 0.00010317403974814217, "loss": 11.9667, "step": 18555 }, { "epoch": 1.0104483605943653, "grad_norm": 0.4831786103221667, "learning_rate": 0.00010316522592075734, "loss": 12.1, "step": 18556 }, { "epoch": 1.0105028145909485, "grad_norm": 0.5561397048347925, "learning_rate": 0.00010315641206875919, "loss": 12.0128, "step": 18557 }, { "epoch": 1.0105572685875315, "grad_norm": 0.6440097086515711, "learning_rate": 0.00010314759819221624, "loss": 12.2757, "step": 18558 }, { "epoch": 1.0106117225841145, "grad_norm": 0.656110245218873, "learning_rate": 0.00010313878429119705, "loss": 12.1454, "step": 18559 }, { "epoch": 1.0106661765806975, "grad_norm": 0.5903269479289363, "learning_rate": 0.00010312997036577014, "loss": 11.9893, "step": 18560 }, { "epoch": 1.0107206305772805, "grad_norm": 0.6022232729710705, "learning_rate": 0.00010312115641600408, "loss": 12.0534, "step": 18561 }, { "epoch": 1.0107750845738634, "grad_norm": 0.5060077336455366, "learning_rate": 0.00010311234244196735, "loss": 11.9423, "step": 18562 }, { "epoch": 1.0108295385704464, "grad_norm": 0.5435731159339288, "learning_rate": 0.00010310352844372855, "loss": 12.089, "step": 18563 }, { "epoch": 1.0108839925670294, "grad_norm": 0.5674002940175599, "learning_rate": 0.00010309471442135617, "loss": 12.0693, "step": 18564 }, { "epoch": 1.0109384465636124, "grad_norm": 0.5404684656688796, "learning_rate": 0.0001030859003749188, "loss": 12.1342, "step": 18565 }, { "epoch": 1.0109929005601954, "grad_norm": 0.5961366588591839, "learning_rate": 0.00010307708630448494, "loss": 11.9892, "step": 18566 }, { "epoch": 1.0110473545567784, "grad_norm": 0.5454753540434794, "learning_rate": 0.00010306827221012312, "loss": 12.0481, "step": 18567 }, { "epoch": 1.0111018085533616, "grad_norm": 0.566266637646021, "learning_rate": 0.0001030594580919019, "loss": 12.1063, "step": 18568 }, { "epoch": 1.0111562625499446, "grad_norm": 0.5533092897930931, "learning_rate": 0.00010305064394988984, "loss": 12.0457, "step": 18569 }, { "epoch": 1.0112107165465276, "grad_norm": 0.5386943447678215, "learning_rate": 0.00010304182978415544, "loss": 11.8717, "step": 18570 }, { "epoch": 1.0112651705431106, "grad_norm": 0.5470324120297019, "learning_rate": 0.0001030330155947673, "loss": 12.0962, "step": 18571 }, { "epoch": 1.0113196245396936, "grad_norm": 0.5911071622271485, "learning_rate": 0.00010302420138179391, "loss": 12.2112, "step": 18572 }, { "epoch": 1.0113740785362766, "grad_norm": 0.5854015202959052, "learning_rate": 0.00010301538714530379, "loss": 12.0793, "step": 18573 }, { "epoch": 1.0114285325328596, "grad_norm": 0.5698593275883355, "learning_rate": 0.00010300657288536553, "loss": 12.0904, "step": 18574 }, { "epoch": 1.0114829865294426, "grad_norm": 0.5832187817899874, "learning_rate": 0.00010299775860204768, "loss": 12.1363, "step": 18575 }, { "epoch": 1.0115374405260256, "grad_norm": 0.5679029416512514, "learning_rate": 0.00010298894429541874, "loss": 12.1225, "step": 18576 }, { "epoch": 1.0115918945226086, "grad_norm": 0.5673043538062682, "learning_rate": 0.00010298012996554727, "loss": 11.9131, "step": 18577 }, { "epoch": 1.0116463485191916, "grad_norm": 0.5366736456338547, "learning_rate": 0.00010297131561250182, "loss": 11.7861, "step": 18578 }, { "epoch": 1.0117008025157745, "grad_norm": 0.6059117847345783, "learning_rate": 0.00010296250123635087, "loss": 12.0755, "step": 18579 }, { "epoch": 1.0117552565123578, "grad_norm": 0.5766746731730145, "learning_rate": 0.00010295368683716305, "loss": 12.1941, "step": 18580 }, { "epoch": 1.0118097105089408, "grad_norm": 0.5975853286218374, "learning_rate": 0.0001029448724150069, "loss": 12.2667, "step": 18581 }, { "epoch": 1.0118641645055237, "grad_norm": 0.5813723381540811, "learning_rate": 0.0001029360579699509, "loss": 12.0985, "step": 18582 }, { "epoch": 1.0119186185021067, "grad_norm": 0.5144480498398242, "learning_rate": 0.00010292724350206359, "loss": 12.1034, "step": 18583 }, { "epoch": 1.0119730724986897, "grad_norm": 0.5496274656727532, "learning_rate": 0.00010291842901141357, "loss": 11.8125, "step": 18584 }, { "epoch": 1.0120275264952727, "grad_norm": 0.5870109527864531, "learning_rate": 0.00010290961449806935, "loss": 12.1074, "step": 18585 }, { "epoch": 1.0120819804918557, "grad_norm": 0.5517661081173563, "learning_rate": 0.00010290079996209949, "loss": 12.1656, "step": 18586 }, { "epoch": 1.0121364344884387, "grad_norm": 0.5204688912020631, "learning_rate": 0.00010289198540357252, "loss": 12.1468, "step": 18587 }, { "epoch": 1.0121908884850217, "grad_norm": 0.6109212134761495, "learning_rate": 0.00010288317082255698, "loss": 12.0229, "step": 18588 }, { "epoch": 1.0122453424816047, "grad_norm": 0.50168078275934, "learning_rate": 0.00010287435621912145, "loss": 12.0959, "step": 18589 }, { "epoch": 1.0122997964781877, "grad_norm": 0.5870154397372992, "learning_rate": 0.00010286554159333439, "loss": 12.1773, "step": 18590 }, { "epoch": 1.0123542504747707, "grad_norm": 0.5661339588060216, "learning_rate": 0.00010285672694526445, "loss": 12.1347, "step": 18591 }, { "epoch": 1.012408704471354, "grad_norm": 0.5685607270196777, "learning_rate": 0.0001028479122749801, "loss": 11.9809, "step": 18592 }, { "epoch": 1.0124631584679369, "grad_norm": 0.586910468464697, "learning_rate": 0.0001028390975825499, "loss": 12.1174, "step": 18593 }, { "epoch": 1.0125176124645199, "grad_norm": 0.5493072224736333, "learning_rate": 0.00010283028286804241, "loss": 12.0765, "step": 18594 }, { "epoch": 1.0125720664611029, "grad_norm": 0.6860285133901204, "learning_rate": 0.00010282146813152616, "loss": 12.1214, "step": 18595 }, { "epoch": 1.0126265204576859, "grad_norm": 0.5578553443266105, "learning_rate": 0.00010281265337306971, "loss": 12.1132, "step": 18596 }, { "epoch": 1.0126809744542689, "grad_norm": 0.760650444662143, "learning_rate": 0.00010280383859274159, "loss": 12.1466, "step": 18597 }, { "epoch": 1.0127354284508518, "grad_norm": 0.5341634657264303, "learning_rate": 0.00010279502379061035, "loss": 12.0617, "step": 18598 }, { "epoch": 1.0127898824474348, "grad_norm": 0.598461962481969, "learning_rate": 0.00010278620896674453, "loss": 12.0937, "step": 18599 }, { "epoch": 1.0128443364440178, "grad_norm": 0.6275053519413482, "learning_rate": 0.00010277739412121267, "loss": 12.0502, "step": 18600 }, { "epoch": 1.0128987904406008, "grad_norm": 0.5627921254857454, "learning_rate": 0.00010276857925408337, "loss": 12.0195, "step": 18601 }, { "epoch": 1.0129532444371838, "grad_norm": 0.6277134432781751, "learning_rate": 0.00010275976436542509, "loss": 12.0657, "step": 18602 }, { "epoch": 1.0130076984337668, "grad_norm": 0.552236707841139, "learning_rate": 0.00010275094945530645, "loss": 12.087, "step": 18603 }, { "epoch": 1.01306215243035, "grad_norm": 0.5339410393649697, "learning_rate": 0.00010274213452379595, "loss": 12.0573, "step": 18604 }, { "epoch": 1.013116606426933, "grad_norm": 0.5199770355858867, "learning_rate": 0.00010273331957096215, "loss": 12.0424, "step": 18605 }, { "epoch": 1.013171060423516, "grad_norm": 0.6059916556226567, "learning_rate": 0.00010272450459687362, "loss": 12.1755, "step": 18606 }, { "epoch": 1.013225514420099, "grad_norm": 0.6439764951819686, "learning_rate": 0.00010271568960159887, "loss": 11.9937, "step": 18607 }, { "epoch": 1.013279968416682, "grad_norm": 0.5616246688610501, "learning_rate": 0.00010270687458520645, "loss": 12.2604, "step": 18608 }, { "epoch": 1.013334422413265, "grad_norm": 0.5976513162085014, "learning_rate": 0.00010269805954776495, "loss": 12.0994, "step": 18609 }, { "epoch": 1.013388876409848, "grad_norm": 0.5814974973020262, "learning_rate": 0.00010268924448934285, "loss": 12.225, "step": 18610 }, { "epoch": 1.013443330406431, "grad_norm": 0.5557409579034941, "learning_rate": 0.00010268042941000874, "loss": 12.1234, "step": 18611 }, { "epoch": 1.013497784403014, "grad_norm": 0.5961948506538783, "learning_rate": 0.00010267161430983119, "loss": 12.1489, "step": 18612 }, { "epoch": 1.013552238399597, "grad_norm": 0.5275476176618449, "learning_rate": 0.00010266279918887872, "loss": 12.0462, "step": 18613 }, { "epoch": 1.01360669239618, "grad_norm": 0.46270763533338205, "learning_rate": 0.00010265398404721984, "loss": 12.0466, "step": 18614 }, { "epoch": 1.0136611463927632, "grad_norm": 0.5396062804897083, "learning_rate": 0.00010264516888492315, "loss": 12.1478, "step": 18615 }, { "epoch": 1.0137156003893462, "grad_norm": 0.5603888801196434, "learning_rate": 0.00010263635370205722, "loss": 12.0761, "step": 18616 }, { "epoch": 1.0137700543859292, "grad_norm": 0.5949006213362621, "learning_rate": 0.00010262753849869051, "loss": 12.1037, "step": 18617 }, { "epoch": 1.0138245083825121, "grad_norm": 0.5478553417772944, "learning_rate": 0.00010261872327489164, "loss": 12.0866, "step": 18618 }, { "epoch": 1.0138789623790951, "grad_norm": 0.5663167078664715, "learning_rate": 0.00010260990803072915, "loss": 12.1534, "step": 18619 }, { "epoch": 1.0139334163756781, "grad_norm": 0.5931662652583294, "learning_rate": 0.00010260109276627154, "loss": 12.0215, "step": 18620 }, { "epoch": 1.0139878703722611, "grad_norm": 0.5409318674949393, "learning_rate": 0.00010259227748158747, "loss": 12.1054, "step": 18621 }, { "epoch": 1.0140423243688441, "grad_norm": 0.5896010595760806, "learning_rate": 0.00010258346217674532, "loss": 12.1124, "step": 18622 }, { "epoch": 1.014096778365427, "grad_norm": 0.5106183988735412, "learning_rate": 0.0001025746468518138, "loss": 12.1734, "step": 18623 }, { "epoch": 1.01415123236201, "grad_norm": 0.5312656153615478, "learning_rate": 0.00010256583150686136, "loss": 11.9229, "step": 18624 }, { "epoch": 1.014205686358593, "grad_norm": 0.5194900895636492, "learning_rate": 0.00010255701614195661, "loss": 12.1093, "step": 18625 }, { "epoch": 1.014260140355176, "grad_norm": 0.5859684979836088, "learning_rate": 0.00010254820075716809, "loss": 12.1531, "step": 18626 }, { "epoch": 1.0143145943517593, "grad_norm": 0.5551928335979637, "learning_rate": 0.00010253938535256431, "loss": 12.1345, "step": 18627 }, { "epoch": 1.0143690483483423, "grad_norm": 0.5775510465028477, "learning_rate": 0.00010253056992821382, "loss": 12.1252, "step": 18628 }, { "epoch": 1.0144235023449253, "grad_norm": 0.5427232453071151, "learning_rate": 0.0001025217544841852, "loss": 12.0609, "step": 18629 }, { "epoch": 1.0144779563415083, "grad_norm": 0.6115163583493086, "learning_rate": 0.00010251293902054701, "loss": 12.0571, "step": 18630 }, { "epoch": 1.0145324103380913, "grad_norm": 0.5386722267689895, "learning_rate": 0.0001025041235373678, "loss": 12.0379, "step": 18631 }, { "epoch": 1.0145868643346743, "grad_norm": 0.5506371185172241, "learning_rate": 0.0001024953080347161, "loss": 12.0624, "step": 18632 }, { "epoch": 1.0146413183312573, "grad_norm": 0.5827098744078735, "learning_rate": 0.00010248649251266042, "loss": 11.9455, "step": 18633 }, { "epoch": 1.0146957723278403, "grad_norm": 0.592762604673711, "learning_rate": 0.0001024776769712694, "loss": 12.078, "step": 18634 }, { "epoch": 1.0147502263244232, "grad_norm": 0.5656728108497389, "learning_rate": 0.00010246886141061154, "loss": 12.1246, "step": 18635 }, { "epoch": 1.0148046803210062, "grad_norm": 0.5517144152095224, "learning_rate": 0.00010246004583075544, "loss": 12.0933, "step": 18636 }, { "epoch": 1.0148591343175892, "grad_norm": 0.5596194562044612, "learning_rate": 0.00010245123023176957, "loss": 12.0172, "step": 18637 }, { "epoch": 1.0149135883141724, "grad_norm": 0.5771751091706115, "learning_rate": 0.00010244241461372252, "loss": 12.2037, "step": 18638 }, { "epoch": 1.0149680423107554, "grad_norm": 0.5211343834215023, "learning_rate": 0.00010243359897668283, "loss": 12.0722, "step": 18639 }, { "epoch": 1.0150224963073384, "grad_norm": 0.5699645617576364, "learning_rate": 0.00010242478332071907, "loss": 12.1202, "step": 18640 }, { "epoch": 1.0150769503039214, "grad_norm": 0.5044244164856309, "learning_rate": 0.00010241596764589985, "loss": 12.0559, "step": 18641 }, { "epoch": 1.0151314043005044, "grad_norm": 0.534380669527365, "learning_rate": 0.0001024071519522936, "loss": 11.8102, "step": 18642 }, { "epoch": 1.0151858582970874, "grad_norm": 0.5889098771521126, "learning_rate": 0.00010239833623996895, "loss": 12.0705, "step": 18643 }, { "epoch": 1.0152403122936704, "grad_norm": 0.541823910174673, "learning_rate": 0.00010238952050899442, "loss": 12.1835, "step": 18644 }, { "epoch": 1.0152947662902534, "grad_norm": 0.6260545936225633, "learning_rate": 0.00010238070475943857, "loss": 12.0786, "step": 18645 }, { "epoch": 1.0153492202868364, "grad_norm": 0.51878762670923, "learning_rate": 0.00010237188899137, "loss": 11.8656, "step": 18646 }, { "epoch": 1.0154036742834194, "grad_norm": 0.5707399005959253, "learning_rate": 0.00010236307320485721, "loss": 12.0872, "step": 18647 }, { "epoch": 1.0154581282800024, "grad_norm": 0.5309342136202445, "learning_rate": 0.00010235425739996876, "loss": 12.2071, "step": 18648 }, { "epoch": 1.0155125822765854, "grad_norm": 0.5721763583756236, "learning_rate": 0.00010234544157677322, "loss": 12.201, "step": 18649 }, { "epoch": 1.0155670362731686, "grad_norm": 0.5475461071889699, "learning_rate": 0.00010233662573533909, "loss": 12.103, "step": 18650 }, { "epoch": 1.0156214902697516, "grad_norm": 0.5219852763402135, "learning_rate": 0.00010232780987573502, "loss": 12.017, "step": 18651 }, { "epoch": 1.0156759442663346, "grad_norm": 0.5092900645153045, "learning_rate": 0.0001023189939980295, "loss": 12.0555, "step": 18652 }, { "epoch": 1.0157303982629176, "grad_norm": 0.5388537015061275, "learning_rate": 0.00010231017810229108, "loss": 12.0605, "step": 18653 }, { "epoch": 1.0157848522595005, "grad_norm": 0.588907815625058, "learning_rate": 0.00010230136218858832, "loss": 12.0845, "step": 18654 }, { "epoch": 1.0158393062560835, "grad_norm": 0.5230904598736458, "learning_rate": 0.00010229254625698981, "loss": 12.1564, "step": 18655 }, { "epoch": 1.0158937602526665, "grad_norm": 0.5401076147941122, "learning_rate": 0.00010228373030756404, "loss": 12.0952, "step": 18656 }, { "epoch": 1.0159482142492495, "grad_norm": 0.5646841759125969, "learning_rate": 0.00010227491434037963, "loss": 12.0907, "step": 18657 }, { "epoch": 1.0160026682458325, "grad_norm": 0.5290137046555882, "learning_rate": 0.0001022660983555051, "loss": 12.0885, "step": 18658 }, { "epoch": 1.0160571222424155, "grad_norm": 0.5215509143252841, "learning_rate": 0.000102257282353009, "loss": 12.0678, "step": 18659 }, { "epoch": 1.0161115762389985, "grad_norm": 0.5440424781375254, "learning_rate": 0.00010224846633295988, "loss": 12.1164, "step": 18660 }, { "epoch": 1.0161660302355815, "grad_norm": 0.6125029465965208, "learning_rate": 0.00010223965029542632, "loss": 12.1944, "step": 18661 }, { "epoch": 1.0162204842321647, "grad_norm": 0.5361094072719735, "learning_rate": 0.00010223083424047689, "loss": 11.9944, "step": 18662 }, { "epoch": 1.0162749382287477, "grad_norm": 0.5382271270974258, "learning_rate": 0.00010222201816818009, "loss": 12.0014, "step": 18663 }, { "epoch": 1.0163293922253307, "grad_norm": 0.5677736707067792, "learning_rate": 0.00010221320207860452, "loss": 11.8384, "step": 18664 }, { "epoch": 1.0163838462219137, "grad_norm": 0.5571622510638461, "learning_rate": 0.0001022043859718187, "loss": 12.0366, "step": 18665 }, { "epoch": 1.0164383002184967, "grad_norm": 0.562284739796698, "learning_rate": 0.00010219556984789123, "loss": 12.062, "step": 18666 }, { "epoch": 1.0164927542150797, "grad_norm": 0.5145086992467447, "learning_rate": 0.00010218675370689061, "loss": 11.8804, "step": 18667 }, { "epoch": 1.0165472082116627, "grad_norm": 0.6198656528706378, "learning_rate": 0.00010217793754888544, "loss": 12.1277, "step": 18668 }, { "epoch": 1.0166016622082457, "grad_norm": 0.5516678091316137, "learning_rate": 0.00010216912137394428, "loss": 12.085, "step": 18669 }, { "epoch": 1.0166561162048287, "grad_norm": 0.576048071907269, "learning_rate": 0.00010216030518213564, "loss": 12.0683, "step": 18670 }, { "epoch": 1.0167105702014116, "grad_norm": 0.5530427825086842, "learning_rate": 0.00010215148897352814, "loss": 12.06, "step": 18671 }, { "epoch": 1.0167650241979946, "grad_norm": 0.5391895765062983, "learning_rate": 0.00010214267274819027, "loss": 12.0372, "step": 18672 }, { "epoch": 1.0168194781945776, "grad_norm": 0.5363936955345292, "learning_rate": 0.00010213385650619063, "loss": 11.9683, "step": 18673 }, { "epoch": 1.0168739321911608, "grad_norm": 0.5527397413340591, "learning_rate": 0.00010212504024759775, "loss": 12.1035, "step": 18674 }, { "epoch": 1.0169283861877438, "grad_norm": 0.5391156205400923, "learning_rate": 0.00010211622397248022, "loss": 12.0273, "step": 18675 }, { "epoch": 1.0169828401843268, "grad_norm": 0.5325203017680954, "learning_rate": 0.00010210740768090659, "loss": 12.134, "step": 18676 }, { "epoch": 1.0170372941809098, "grad_norm": 0.5224397247187977, "learning_rate": 0.00010209859137294535, "loss": 12.087, "step": 18677 }, { "epoch": 1.0170917481774928, "grad_norm": 0.508210641030574, "learning_rate": 0.00010208977504866514, "loss": 12.1013, "step": 18678 }, { "epoch": 1.0171462021740758, "grad_norm": 0.5594443000847564, "learning_rate": 0.0001020809587081345, "loss": 12.2038, "step": 18679 }, { "epoch": 1.0172006561706588, "grad_norm": 0.5325988145437173, "learning_rate": 0.00010207214235142197, "loss": 12.0305, "step": 18680 }, { "epoch": 1.0172551101672418, "grad_norm": 0.46681354969671174, "learning_rate": 0.00010206332597859614, "loss": 12.0177, "step": 18681 }, { "epoch": 1.0173095641638248, "grad_norm": 0.55466047261685, "learning_rate": 0.00010205450958972549, "loss": 12.1903, "step": 18682 }, { "epoch": 1.0173640181604078, "grad_norm": 0.5211680837345727, "learning_rate": 0.00010204569318487867, "loss": 12.0376, "step": 18683 }, { "epoch": 1.0174184721569908, "grad_norm": 0.531284352544976, "learning_rate": 0.00010203687676412416, "loss": 12.1672, "step": 18684 }, { "epoch": 1.017472926153574, "grad_norm": 0.5212183307590827, "learning_rate": 0.00010202806032753059, "loss": 12.1094, "step": 18685 }, { "epoch": 1.017527380150157, "grad_norm": 0.5192248511028463, "learning_rate": 0.00010201924387516648, "loss": 11.8798, "step": 18686 }, { "epoch": 1.01758183414674, "grad_norm": 0.5739008702696496, "learning_rate": 0.00010201042740710039, "loss": 12.0077, "step": 18687 }, { "epoch": 1.017636288143323, "grad_norm": 0.5127739004057067, "learning_rate": 0.00010200161092340083, "loss": 12.0929, "step": 18688 }, { "epoch": 1.017690742139906, "grad_norm": 0.5732873555817004, "learning_rate": 0.00010199279442413645, "loss": 12.0487, "step": 18689 }, { "epoch": 1.017745196136489, "grad_norm": 0.609518732694126, "learning_rate": 0.00010198397790937577, "loss": 12.1473, "step": 18690 }, { "epoch": 1.017799650133072, "grad_norm": 0.5678661229413832, "learning_rate": 0.00010197516137918734, "loss": 11.9684, "step": 18691 }, { "epoch": 1.017854104129655, "grad_norm": 0.6481102039488043, "learning_rate": 0.00010196634483363974, "loss": 12.0967, "step": 18692 }, { "epoch": 1.017908558126238, "grad_norm": 0.6070736689133404, "learning_rate": 0.00010195752827280149, "loss": 12.0106, "step": 18693 }, { "epoch": 1.017963012122821, "grad_norm": 0.5671805438971573, "learning_rate": 0.00010194871169674117, "loss": 12.1093, "step": 18694 }, { "epoch": 1.018017466119404, "grad_norm": 0.5596878343691645, "learning_rate": 0.00010193989510552732, "loss": 12.1828, "step": 18695 }, { "epoch": 1.018071920115987, "grad_norm": 0.5731959237727028, "learning_rate": 0.00010193107849922859, "loss": 12.1028, "step": 18696 }, { "epoch": 1.0181263741125701, "grad_norm": 0.557951890413181, "learning_rate": 0.0001019222618779134, "loss": 12.0865, "step": 18697 }, { "epoch": 1.0181808281091531, "grad_norm": 0.5547917029109849, "learning_rate": 0.00010191344524165043, "loss": 12.0514, "step": 18698 }, { "epoch": 1.018235282105736, "grad_norm": 0.536152548658443, "learning_rate": 0.00010190462859050813, "loss": 12.0461, "step": 18699 }, { "epoch": 1.018289736102319, "grad_norm": 0.5658531788317116, "learning_rate": 0.00010189581192455515, "loss": 12.0915, "step": 18700 }, { "epoch": 1.018344190098902, "grad_norm": 0.5396720468549974, "learning_rate": 0.00010188699524386003, "loss": 12.14, "step": 18701 }, { "epoch": 1.018398644095485, "grad_norm": 0.5452850946768681, "learning_rate": 0.00010187817854849133, "loss": 12.179, "step": 18702 }, { "epoch": 1.018453098092068, "grad_norm": 0.5293811185081714, "learning_rate": 0.00010186936183851759, "loss": 12.0484, "step": 18703 }, { "epoch": 1.018507552088651, "grad_norm": 0.5231718692384336, "learning_rate": 0.00010186054511400735, "loss": 11.9543, "step": 18704 }, { "epoch": 1.018562006085234, "grad_norm": 0.537399970978767, "learning_rate": 0.00010185172837502921, "loss": 12.0999, "step": 18705 }, { "epoch": 1.018616460081817, "grad_norm": 0.5760471061648441, "learning_rate": 0.00010184291162165172, "loss": 12.0845, "step": 18706 }, { "epoch": 1.0186709140784, "grad_norm": 0.5970680416849219, "learning_rate": 0.00010183409485394348, "loss": 12.0448, "step": 18707 }, { "epoch": 1.0187253680749833, "grad_norm": 0.5310220685462248, "learning_rate": 0.00010182527807197297, "loss": 12.1472, "step": 18708 }, { "epoch": 1.0187798220715663, "grad_norm": 0.4984097673992301, "learning_rate": 0.0001018164612758088, "loss": 12.0098, "step": 18709 }, { "epoch": 1.0188342760681492, "grad_norm": 0.5165077518009882, "learning_rate": 0.0001018076444655195, "loss": 12.1112, "step": 18710 }, { "epoch": 1.0188887300647322, "grad_norm": 0.48983985957564, "learning_rate": 0.00010179882764117368, "loss": 12.1085, "step": 18711 }, { "epoch": 1.0189431840613152, "grad_norm": 0.5714553058215357, "learning_rate": 0.00010179001080283989, "loss": 12.0102, "step": 18712 }, { "epoch": 1.0189976380578982, "grad_norm": 0.5127940910981188, "learning_rate": 0.00010178119395058665, "loss": 12.0016, "step": 18713 }, { "epoch": 1.0190520920544812, "grad_norm": 0.5399543294573546, "learning_rate": 0.00010177237708448255, "loss": 12.0651, "step": 18714 }, { "epoch": 1.0191065460510642, "grad_norm": 0.5470640580069777, "learning_rate": 0.00010176356020459617, "loss": 12.1846, "step": 18715 }, { "epoch": 1.0191610000476472, "grad_norm": 0.6234479197958068, "learning_rate": 0.000101754743310996, "loss": 11.93, "step": 18716 }, { "epoch": 1.0192154540442302, "grad_norm": 0.5405988025274492, "learning_rate": 0.00010174592640375072, "loss": 12.1231, "step": 18717 }, { "epoch": 1.0192699080408132, "grad_norm": 0.5078125142188712, "learning_rate": 0.00010173710948292878, "loss": 12.0405, "step": 18718 }, { "epoch": 1.0193243620373962, "grad_norm": 0.6023465117020487, "learning_rate": 0.00010172829254859879, "loss": 11.965, "step": 18719 }, { "epoch": 1.0193788160339794, "grad_norm": 0.5971529182156567, "learning_rate": 0.0001017194756008293, "loss": 12.27, "step": 18720 }, { "epoch": 1.0194332700305624, "grad_norm": 0.5185990422077906, "learning_rate": 0.00010171065863968889, "loss": 12.0221, "step": 18721 }, { "epoch": 1.0194877240271454, "grad_norm": 0.5241390327715381, "learning_rate": 0.00010170184166524612, "loss": 12.1332, "step": 18722 }, { "epoch": 1.0195421780237284, "grad_norm": 0.5010651696477758, "learning_rate": 0.00010169302467756953, "loss": 12.0712, "step": 18723 }, { "epoch": 1.0195966320203114, "grad_norm": 0.49437860750760554, "learning_rate": 0.0001016842076767277, "loss": 12.1145, "step": 18724 }, { "epoch": 1.0196510860168944, "grad_norm": 0.6015474759820658, "learning_rate": 0.00010167539066278919, "loss": 12.1264, "step": 18725 }, { "epoch": 1.0197055400134774, "grad_norm": 0.5337388508705605, "learning_rate": 0.00010166657363582257, "loss": 12.0994, "step": 18726 }, { "epoch": 1.0197599940100603, "grad_norm": 0.5894897986921385, "learning_rate": 0.00010165775659589639, "loss": 12.1271, "step": 18727 }, { "epoch": 1.0198144480066433, "grad_norm": 0.5502165812525377, "learning_rate": 0.00010164893954307919, "loss": 12.0668, "step": 18728 }, { "epoch": 1.0198689020032263, "grad_norm": 0.5448571962174019, "learning_rate": 0.00010164012247743959, "loss": 12.0644, "step": 18729 }, { "epoch": 1.0199233559998093, "grad_norm": 0.590115771224414, "learning_rate": 0.0001016313053990461, "loss": 12.1188, "step": 18730 }, { "epoch": 1.0199778099963923, "grad_norm": 0.5500449590839104, "learning_rate": 0.00010162248830796733, "loss": 12.0466, "step": 18731 }, { "epoch": 1.0200322639929755, "grad_norm": 0.5412151787779529, "learning_rate": 0.00010161367120427181, "loss": 12.033, "step": 18732 }, { "epoch": 1.0200867179895585, "grad_norm": 0.6254430498979263, "learning_rate": 0.00010160485408802811, "loss": 12.2435, "step": 18733 }, { "epoch": 1.0201411719861415, "grad_norm": 0.5336524786885203, "learning_rate": 0.00010159603695930479, "loss": 12.0075, "step": 18734 }, { "epoch": 1.0201956259827245, "grad_norm": 0.5837363686218118, "learning_rate": 0.00010158721981817044, "loss": 12.0391, "step": 18735 }, { "epoch": 1.0202500799793075, "grad_norm": 0.6079896229766106, "learning_rate": 0.00010157840266469359, "loss": 12.0356, "step": 18736 }, { "epoch": 1.0203045339758905, "grad_norm": 0.5245603346550712, "learning_rate": 0.0001015695854989428, "loss": 12.1001, "step": 18737 }, { "epoch": 1.0203589879724735, "grad_norm": 0.5537582378652263, "learning_rate": 0.00010156076832098666, "loss": 11.9315, "step": 18738 }, { "epoch": 1.0204134419690565, "grad_norm": 0.5868965655185666, "learning_rate": 0.00010155195113089373, "loss": 12.0819, "step": 18739 }, { "epoch": 1.0204678959656395, "grad_norm": 0.5751484799267724, "learning_rate": 0.00010154313392873257, "loss": 12.0688, "step": 18740 }, { "epoch": 1.0205223499622225, "grad_norm": 0.5558567163544844, "learning_rate": 0.00010153431671457174, "loss": 12.0395, "step": 18741 }, { "epoch": 1.0205768039588055, "grad_norm": 0.5417134858944368, "learning_rate": 0.00010152549948847982, "loss": 11.9737, "step": 18742 }, { "epoch": 1.0206312579553887, "grad_norm": 0.5436549049238402, "learning_rate": 0.0001015166822505253, "loss": 12.0564, "step": 18743 }, { "epoch": 1.0206857119519717, "grad_norm": 0.5613973847463412, "learning_rate": 0.00010150786500077687, "loss": 12.0953, "step": 18744 }, { "epoch": 1.0207401659485547, "grad_norm": 0.6256121230482002, "learning_rate": 0.00010149904773930301, "loss": 12.0651, "step": 18745 }, { "epoch": 1.0207946199451376, "grad_norm": 0.563324983194387, "learning_rate": 0.0001014902304661723, "loss": 11.9725, "step": 18746 }, { "epoch": 1.0208490739417206, "grad_norm": 0.5449957795585058, "learning_rate": 0.00010148141318145333, "loss": 12.0534, "step": 18747 }, { "epoch": 1.0209035279383036, "grad_norm": 0.5355158319275114, "learning_rate": 0.00010147259588521458, "loss": 12.0776, "step": 18748 }, { "epoch": 1.0209579819348866, "grad_norm": 0.5237347803738321, "learning_rate": 0.00010146377857752472, "loss": 12.1556, "step": 18749 }, { "epoch": 1.0210124359314696, "grad_norm": 0.6142868047989184, "learning_rate": 0.00010145496125845227, "loss": 12.0608, "step": 18750 }, { "epoch": 1.0210668899280526, "grad_norm": 0.5820946703122036, "learning_rate": 0.0001014461439280658, "loss": 11.9877, "step": 18751 }, { "epoch": 1.0211213439246356, "grad_norm": 0.5053531335963286, "learning_rate": 0.0001014373265864339, "loss": 11.9562, "step": 18752 }, { "epoch": 1.0211757979212186, "grad_norm": 0.6387350861190165, "learning_rate": 0.00010142850923362505, "loss": 12.2674, "step": 18753 }, { "epoch": 1.0212302519178016, "grad_norm": 0.555291955522916, "learning_rate": 0.0001014196918697079, "loss": 12.0657, "step": 18754 }, { "epoch": 1.0212847059143848, "grad_norm": 0.5306074304395978, "learning_rate": 0.00010141087449475098, "loss": 11.9807, "step": 18755 }, { "epoch": 1.0213391599109678, "grad_norm": 0.6337599413581059, "learning_rate": 0.00010140205710882287, "loss": 12.1726, "step": 18756 }, { "epoch": 1.0213936139075508, "grad_norm": 0.5208889541758751, "learning_rate": 0.00010139323971199216, "loss": 11.9897, "step": 18757 }, { "epoch": 1.0214480679041338, "grad_norm": 0.5865241119324267, "learning_rate": 0.00010138442230432735, "loss": 11.9329, "step": 18758 }, { "epoch": 1.0215025219007168, "grad_norm": 0.5366785199446498, "learning_rate": 0.00010137560488589701, "loss": 12.0962, "step": 18759 }, { "epoch": 1.0215569758972998, "grad_norm": 0.5554973337117057, "learning_rate": 0.00010136678745676977, "loss": 12.098, "step": 18760 }, { "epoch": 1.0216114298938828, "grad_norm": 0.5298973012350927, "learning_rate": 0.00010135797001701417, "loss": 12.1539, "step": 18761 }, { "epoch": 1.0216658838904658, "grad_norm": 0.589512390220508, "learning_rate": 0.00010134915256669878, "loss": 12.0498, "step": 18762 }, { "epoch": 1.0217203378870487, "grad_norm": 0.5424917531098438, "learning_rate": 0.00010134033510589213, "loss": 11.9644, "step": 18763 }, { "epoch": 1.0217747918836317, "grad_norm": 0.5703553316374649, "learning_rate": 0.00010133151763466282, "loss": 12.0504, "step": 18764 }, { "epoch": 1.0218292458802147, "grad_norm": 0.4877443637565837, "learning_rate": 0.00010132270015307937, "loss": 12.0059, "step": 18765 }, { "epoch": 1.0218836998767977, "grad_norm": 0.5437625770251306, "learning_rate": 0.00010131388266121041, "loss": 12.0672, "step": 18766 }, { "epoch": 1.021938153873381, "grad_norm": 0.5800226829041949, "learning_rate": 0.0001013050651591245, "loss": 12.0847, "step": 18767 }, { "epoch": 1.021992607869964, "grad_norm": 0.5554761728053199, "learning_rate": 0.00010129624764689016, "loss": 12.0638, "step": 18768 }, { "epoch": 1.022047061866547, "grad_norm": 0.5443212101981026, "learning_rate": 0.00010128743012457598, "loss": 11.9379, "step": 18769 }, { "epoch": 1.02210151586313, "grad_norm": 0.5293439211571511, "learning_rate": 0.00010127861259225053, "loss": 12.105, "step": 18770 }, { "epoch": 1.022155969859713, "grad_norm": 0.5387399325359294, "learning_rate": 0.00010126979504998235, "loss": 12.1094, "step": 18771 }, { "epoch": 1.022210423856296, "grad_norm": 0.5950910700409785, "learning_rate": 0.00010126097749784007, "loss": 12.0754, "step": 18772 }, { "epoch": 1.022264877852879, "grad_norm": 0.57269072448735, "learning_rate": 0.0001012521599358922, "loss": 11.9847, "step": 18773 }, { "epoch": 1.0223193318494619, "grad_norm": 0.4811092820120146, "learning_rate": 0.00010124334236420734, "loss": 11.9244, "step": 18774 }, { "epoch": 1.0223737858460449, "grad_norm": 0.5758684597581273, "learning_rate": 0.00010123452478285403, "loss": 12.1405, "step": 18775 }, { "epoch": 1.0224282398426279, "grad_norm": 0.5886928417106222, "learning_rate": 0.0001012257071919008, "loss": 11.9732, "step": 18776 }, { "epoch": 1.0224826938392109, "grad_norm": 0.5781547324240595, "learning_rate": 0.00010121688959141635, "loss": 12.1145, "step": 18777 }, { "epoch": 1.022537147835794, "grad_norm": 0.6275044757931225, "learning_rate": 0.00010120807198146914, "loss": 12.2266, "step": 18778 }, { "epoch": 1.022591601832377, "grad_norm": 0.494586053572155, "learning_rate": 0.00010119925436212772, "loss": 12.0316, "step": 18779 }, { "epoch": 1.02264605582896, "grad_norm": 0.587922190618471, "learning_rate": 0.00010119043673346071, "loss": 12.0634, "step": 18780 }, { "epoch": 1.022700509825543, "grad_norm": 0.5239084659528589, "learning_rate": 0.0001011816190955367, "loss": 12.0887, "step": 18781 }, { "epoch": 1.022754963822126, "grad_norm": 0.516350361404884, "learning_rate": 0.00010117280144842419, "loss": 12.0924, "step": 18782 }, { "epoch": 1.022809417818709, "grad_norm": 0.552523659378862, "learning_rate": 0.00010116398379219179, "loss": 12.1637, "step": 18783 }, { "epoch": 1.022863871815292, "grad_norm": 0.5703124789098535, "learning_rate": 0.00010115516612690805, "loss": 12.0717, "step": 18784 }, { "epoch": 1.022918325811875, "grad_norm": 0.5249871002893897, "learning_rate": 0.00010114634845264155, "loss": 12.1681, "step": 18785 }, { "epoch": 1.022972779808458, "grad_norm": 0.5586287033768093, "learning_rate": 0.00010113753076946084, "loss": 12.0204, "step": 18786 }, { "epoch": 1.023027233805041, "grad_norm": 0.5640718775388597, "learning_rate": 0.0001011287130774345, "loss": 12.1506, "step": 18787 }, { "epoch": 1.023081687801624, "grad_norm": 0.6064181594035376, "learning_rate": 0.0001011198953766311, "loss": 12.2754, "step": 18788 }, { "epoch": 1.023136141798207, "grad_norm": 0.6162582884740045, "learning_rate": 0.00010111107766711922, "loss": 12.0792, "step": 18789 }, { "epoch": 1.0231905957947902, "grad_norm": 0.5912703805811506, "learning_rate": 0.0001011022599489674, "loss": 12.0633, "step": 18790 }, { "epoch": 1.0232450497913732, "grad_norm": 0.5679337295473877, "learning_rate": 0.00010109344222224425, "loss": 12.0239, "step": 18791 }, { "epoch": 1.0232995037879562, "grad_norm": 0.661333811706787, "learning_rate": 0.00010108462448701827, "loss": 12.0685, "step": 18792 }, { "epoch": 1.0233539577845392, "grad_norm": 0.5829655380196324, "learning_rate": 0.0001010758067433581, "loss": 12.16, "step": 18793 }, { "epoch": 1.0234084117811222, "grad_norm": 0.5264206204636439, "learning_rate": 0.00010106698899133227, "loss": 12.02, "step": 18794 }, { "epoch": 1.0234628657777052, "grad_norm": 0.5469072976668753, "learning_rate": 0.00010105817123100933, "loss": 11.9848, "step": 18795 }, { "epoch": 1.0235173197742882, "grad_norm": 0.5762653358726504, "learning_rate": 0.0001010493534624579, "loss": 12.1183, "step": 18796 }, { "epoch": 1.0235717737708712, "grad_norm": 0.5336393034489133, "learning_rate": 0.0001010405356857465, "loss": 11.9188, "step": 18797 }, { "epoch": 1.0236262277674542, "grad_norm": 0.5976938336576505, "learning_rate": 0.00010103171790094375, "loss": 12.0602, "step": 18798 }, { "epoch": 1.0236806817640371, "grad_norm": 0.5245667416753186, "learning_rate": 0.00010102290010811816, "loss": 12.0888, "step": 18799 }, { "epoch": 1.0237351357606201, "grad_norm": 0.5591741725535495, "learning_rate": 0.00010101408230733833, "loss": 12.0078, "step": 18800 }, { "epoch": 1.0237895897572034, "grad_norm": 0.555771938556028, "learning_rate": 0.00010100526449867284, "loss": 12.2226, "step": 18801 }, { "epoch": 1.0238440437537863, "grad_norm": 0.5595947083970808, "learning_rate": 0.00010099644668219027, "loss": 12.1012, "step": 18802 }, { "epoch": 1.0238984977503693, "grad_norm": 0.604175891713645, "learning_rate": 0.00010098762885795909, "loss": 12.1059, "step": 18803 }, { "epoch": 1.0239529517469523, "grad_norm": 0.5114344969036151, "learning_rate": 0.00010097881102604798, "loss": 12.0845, "step": 18804 }, { "epoch": 1.0240074057435353, "grad_norm": 0.5097780349521573, "learning_rate": 0.0001009699931865255, "loss": 12.03, "step": 18805 }, { "epoch": 1.0240618597401183, "grad_norm": 0.530155162660626, "learning_rate": 0.00010096117533946014, "loss": 12.0368, "step": 18806 }, { "epoch": 1.0241163137367013, "grad_norm": 0.589263072886528, "learning_rate": 0.00010095235748492058, "loss": 11.9318, "step": 18807 }, { "epoch": 1.0241707677332843, "grad_norm": 0.5603419836453702, "learning_rate": 0.00010094353962297526, "loss": 12.0558, "step": 18808 }, { "epoch": 1.0242252217298673, "grad_norm": 0.5514549651704579, "learning_rate": 0.00010093472175369286, "loss": 12.0309, "step": 18809 }, { "epoch": 1.0242796757264503, "grad_norm": 0.5324272182449006, "learning_rate": 0.00010092590387714189, "loss": 12.1358, "step": 18810 }, { "epoch": 1.0243341297230333, "grad_norm": 0.5853805115201112, "learning_rate": 0.00010091708599339095, "loss": 11.9852, "step": 18811 }, { "epoch": 1.0243885837196163, "grad_norm": 0.4729042685800001, "learning_rate": 0.00010090826810250862, "loss": 11.9887, "step": 18812 }, { "epoch": 1.0244430377161995, "grad_norm": 0.6148014443636458, "learning_rate": 0.00010089945020456342, "loss": 12.0331, "step": 18813 }, { "epoch": 1.0244974917127825, "grad_norm": 0.635639106078519, "learning_rate": 0.0001008906322996239, "loss": 12.1934, "step": 18814 }, { "epoch": 1.0245519457093655, "grad_norm": 0.5633292596161977, "learning_rate": 0.00010088181438775873, "loss": 12.0455, "step": 18815 }, { "epoch": 1.0246063997059485, "grad_norm": 0.5560741591155846, "learning_rate": 0.00010087299646903639, "loss": 12.0702, "step": 18816 }, { "epoch": 1.0246608537025315, "grad_norm": 0.5651794701755701, "learning_rate": 0.00010086417854352552, "loss": 11.9584, "step": 18817 }, { "epoch": 1.0247153076991145, "grad_norm": 0.7044712598019117, "learning_rate": 0.00010085536061129463, "loss": 12.2901, "step": 18818 }, { "epoch": 1.0247697616956974, "grad_norm": 0.5552101311602395, "learning_rate": 0.00010084654267241231, "loss": 12.1134, "step": 18819 }, { "epoch": 1.0248242156922804, "grad_norm": 0.5905702158058971, "learning_rate": 0.00010083772472694713, "loss": 12.0672, "step": 18820 }, { "epoch": 1.0248786696888634, "grad_norm": 0.5573521983669069, "learning_rate": 0.00010082890677496766, "loss": 12.0905, "step": 18821 }, { "epoch": 1.0249331236854464, "grad_norm": 0.6824700731125648, "learning_rate": 0.0001008200888165425, "loss": 12.2232, "step": 18822 }, { "epoch": 1.0249875776820294, "grad_norm": 0.5409647664250958, "learning_rate": 0.00010081127085174019, "loss": 12.1013, "step": 18823 }, { "epoch": 1.0250420316786124, "grad_norm": 0.5736042445755989, "learning_rate": 0.00010080245288062928, "loss": 12.0641, "step": 18824 }, { "epoch": 1.0250964856751956, "grad_norm": 0.6756140383975451, "learning_rate": 0.00010079363490327833, "loss": 12.0934, "step": 18825 }, { "epoch": 1.0251509396717786, "grad_norm": 0.536914119748739, "learning_rate": 0.00010078481691975599, "loss": 11.9342, "step": 18826 }, { "epoch": 1.0252053936683616, "grad_norm": 0.5894137031872616, "learning_rate": 0.00010077599893013079, "loss": 12.0131, "step": 18827 }, { "epoch": 1.0252598476649446, "grad_norm": 0.5786068936267161, "learning_rate": 0.00010076718093447126, "loss": 12.047, "step": 18828 }, { "epoch": 1.0253143016615276, "grad_norm": 0.5762402343191538, "learning_rate": 0.00010075836293284602, "loss": 12.0729, "step": 18829 }, { "epoch": 1.0253687556581106, "grad_norm": 0.6076074032551696, "learning_rate": 0.00010074954492532362, "loss": 12.1409, "step": 18830 }, { "epoch": 1.0254232096546936, "grad_norm": 0.5985679684566773, "learning_rate": 0.0001007407269119726, "loss": 12.1766, "step": 18831 }, { "epoch": 1.0254776636512766, "grad_norm": 0.5925485044810468, "learning_rate": 0.00010073190889286164, "loss": 12.2808, "step": 18832 }, { "epoch": 1.0255321176478596, "grad_norm": 0.5589669324917635, "learning_rate": 0.00010072309086805918, "loss": 12.131, "step": 18833 }, { "epoch": 1.0255865716444426, "grad_norm": 0.5891691454120911, "learning_rate": 0.00010071427283763385, "loss": 12.1005, "step": 18834 }, { "epoch": 1.0256410256410255, "grad_norm": 0.5507771665067509, "learning_rate": 0.00010070545480165422, "loss": 12.1302, "step": 18835 }, { "epoch": 1.0256954796376085, "grad_norm": 0.5988299559898301, "learning_rate": 0.0001006966367601888, "loss": 12.0119, "step": 18836 }, { "epoch": 1.0257499336341918, "grad_norm": 0.6153467427206969, "learning_rate": 0.0001006878187133063, "loss": 12.1839, "step": 18837 }, { "epoch": 1.0258043876307747, "grad_norm": 0.5110678266933942, "learning_rate": 0.00010067900066107519, "loss": 12.0238, "step": 18838 }, { "epoch": 1.0258588416273577, "grad_norm": 0.5701816809723554, "learning_rate": 0.00010067018260356402, "loss": 12.0576, "step": 18839 }, { "epoch": 1.0259132956239407, "grad_norm": 0.5255673439636622, "learning_rate": 0.00010066136454084143, "loss": 11.9858, "step": 18840 }, { "epoch": 1.0259677496205237, "grad_norm": 0.5161129919900771, "learning_rate": 0.00010065254647297595, "loss": 11.9873, "step": 18841 }, { "epoch": 1.0260222036171067, "grad_norm": 0.5421377360540655, "learning_rate": 0.00010064372840003615, "loss": 12.1362, "step": 18842 }, { "epoch": 1.0260766576136897, "grad_norm": 0.5521988833133786, "learning_rate": 0.00010063491032209063, "loss": 12.0913, "step": 18843 }, { "epoch": 1.0261311116102727, "grad_norm": 0.5272891891303668, "learning_rate": 0.00010062609223920791, "loss": 12.1025, "step": 18844 }, { "epoch": 1.0261855656068557, "grad_norm": 0.5198128569718347, "learning_rate": 0.00010061727415145661, "loss": 12.0384, "step": 18845 }, { "epoch": 1.0262400196034387, "grad_norm": 0.6355999518648248, "learning_rate": 0.00010060845605890528, "loss": 12.0885, "step": 18846 }, { "epoch": 1.0262944736000217, "grad_norm": 0.5310977995229271, "learning_rate": 0.00010059963796162251, "loss": 12.136, "step": 18847 }, { "epoch": 1.026348927596605, "grad_norm": 0.5305271542221838, "learning_rate": 0.00010059081985967682, "loss": 12.1, "step": 18848 }, { "epoch": 1.026403381593188, "grad_norm": 0.5428545717662631, "learning_rate": 0.00010058200175313684, "loss": 12.0657, "step": 18849 }, { "epoch": 1.0264578355897709, "grad_norm": 0.5361102279776829, "learning_rate": 0.00010057318364207111, "loss": 12.0446, "step": 18850 }, { "epoch": 1.0265122895863539, "grad_norm": 0.5577618963549263, "learning_rate": 0.00010056436552654822, "loss": 12.1281, "step": 18851 }, { "epoch": 1.0265667435829369, "grad_norm": 0.5283476355851188, "learning_rate": 0.0001005555474066367, "loss": 12.0621, "step": 18852 }, { "epoch": 1.0266211975795199, "grad_norm": 0.6257579941479259, "learning_rate": 0.00010054672928240519, "loss": 12.1508, "step": 18853 }, { "epoch": 1.0266756515761029, "grad_norm": 0.603240107097262, "learning_rate": 0.0001005379111539222, "loss": 12.156, "step": 18854 }, { "epoch": 1.0267301055726858, "grad_norm": 0.5314831448689977, "learning_rate": 0.0001005290930212563, "loss": 12.087, "step": 18855 }, { "epoch": 1.0267845595692688, "grad_norm": 0.5783292631175788, "learning_rate": 0.00010052027488447612, "loss": 12.0822, "step": 18856 }, { "epoch": 1.0268390135658518, "grad_norm": 0.5412065536058184, "learning_rate": 0.00010051145674365019, "loss": 12.103, "step": 18857 }, { "epoch": 1.0268934675624348, "grad_norm": 0.5276334499808119, "learning_rate": 0.00010050263859884708, "loss": 12.1075, "step": 18858 }, { "epoch": 1.0269479215590178, "grad_norm": 0.5480083335352859, "learning_rate": 0.00010049382045013536, "loss": 12.0958, "step": 18859 }, { "epoch": 1.027002375555601, "grad_norm": 0.5588973912889202, "learning_rate": 0.00010048500229758362, "loss": 12.0568, "step": 18860 }, { "epoch": 1.027056829552184, "grad_norm": 0.6231732824018359, "learning_rate": 0.0001004761841412604, "loss": 12.0312, "step": 18861 }, { "epoch": 1.027111283548767, "grad_norm": 0.5221917786578817, "learning_rate": 0.00010046736598123432, "loss": 11.9448, "step": 18862 }, { "epoch": 1.02716573754535, "grad_norm": 0.585005556567255, "learning_rate": 0.00010045854781757392, "loss": 12.1332, "step": 18863 }, { "epoch": 1.027220191541933, "grad_norm": 0.5552817641655828, "learning_rate": 0.00010044972965034775, "loss": 11.9784, "step": 18864 }, { "epoch": 1.027274645538516, "grad_norm": 0.5862540149650539, "learning_rate": 0.00010044091147962442, "loss": 12.1105, "step": 18865 }, { "epoch": 1.027329099535099, "grad_norm": 0.5233113635404432, "learning_rate": 0.0001004320933054725, "loss": 12.0754, "step": 18866 }, { "epoch": 1.027383553531682, "grad_norm": 0.6066314094699042, "learning_rate": 0.00010042327512796055, "loss": 12.1417, "step": 18867 }, { "epoch": 1.027438007528265, "grad_norm": 0.5895423082417945, "learning_rate": 0.00010041445694715716, "loss": 11.9777, "step": 18868 }, { "epoch": 1.027492461524848, "grad_norm": 0.5582136229584224, "learning_rate": 0.00010040563876313082, "loss": 12.0938, "step": 18869 }, { "epoch": 1.027546915521431, "grad_norm": 0.746659076517137, "learning_rate": 0.00010039682057595021, "loss": 12.1253, "step": 18870 }, { "epoch": 1.0276013695180142, "grad_norm": 0.5547433612075412, "learning_rate": 0.00010038800238568384, "loss": 12.044, "step": 18871 }, { "epoch": 1.0276558235145972, "grad_norm": 0.6296806052435052, "learning_rate": 0.00010037918419240033, "loss": 12.1935, "step": 18872 }, { "epoch": 1.0277102775111802, "grad_norm": 0.5703231434418233, "learning_rate": 0.0001003703659961682, "loss": 12.1033, "step": 18873 }, { "epoch": 1.0277647315077632, "grad_norm": 0.6215786874718152, "learning_rate": 0.00010036154779705602, "loss": 12.0414, "step": 18874 }, { "epoch": 1.0278191855043461, "grad_norm": 0.5927630134487893, "learning_rate": 0.00010035272959513243, "loss": 12.06, "step": 18875 }, { "epoch": 1.0278736395009291, "grad_norm": 0.6420508848971314, "learning_rate": 0.0001003439113904659, "loss": 12.0871, "step": 18876 }, { "epoch": 1.0279280934975121, "grad_norm": 0.5103978927575662, "learning_rate": 0.00010033509318312511, "loss": 12.0255, "step": 18877 }, { "epoch": 1.0279825474940951, "grad_norm": 0.552259339180842, "learning_rate": 0.00010032627497317857, "loss": 12.0814, "step": 18878 }, { "epoch": 1.0280370014906781, "grad_norm": 0.5748723134775683, "learning_rate": 0.00010031745676069484, "loss": 12.0785, "step": 18879 }, { "epoch": 1.028091455487261, "grad_norm": 0.6033855987766538, "learning_rate": 0.0001003086385457425, "loss": 12.0556, "step": 18880 }, { "epoch": 1.028145909483844, "grad_norm": 0.5400001704991296, "learning_rate": 0.00010029982032839016, "loss": 12.1273, "step": 18881 }, { "epoch": 1.028200363480427, "grad_norm": 0.5946102420376946, "learning_rate": 0.00010029100210870636, "loss": 12.0104, "step": 18882 }, { "epoch": 1.0282548174770103, "grad_norm": 0.5710579850850007, "learning_rate": 0.0001002821838867597, "loss": 12.0413, "step": 18883 }, { "epoch": 1.0283092714735933, "grad_norm": 0.5396354673695882, "learning_rate": 0.00010027336566261871, "loss": 11.9725, "step": 18884 }, { "epoch": 1.0283637254701763, "grad_norm": 0.5580340497124632, "learning_rate": 0.00010026454743635196, "loss": 12.0463, "step": 18885 }, { "epoch": 1.0284181794667593, "grad_norm": 0.5317955365880455, "learning_rate": 0.00010025572920802808, "loss": 11.9057, "step": 18886 }, { "epoch": 1.0284726334633423, "grad_norm": 0.5387542121345305, "learning_rate": 0.00010024691097771559, "loss": 12.178, "step": 18887 }, { "epoch": 1.0285270874599253, "grad_norm": 0.5915490733558898, "learning_rate": 0.0001002380927454831, "loss": 12.0586, "step": 18888 }, { "epoch": 1.0285815414565083, "grad_norm": 0.5333617342298406, "learning_rate": 0.00010022927451139915, "loss": 12.0333, "step": 18889 }, { "epoch": 1.0286359954530913, "grad_norm": 0.5935844742153724, "learning_rate": 0.00010022045627553232, "loss": 12.1081, "step": 18890 }, { "epoch": 1.0286904494496742, "grad_norm": 0.6277507616477012, "learning_rate": 0.00010021163803795115, "loss": 12.0982, "step": 18891 }, { "epoch": 1.0287449034462572, "grad_norm": 0.5572110632178765, "learning_rate": 0.00010020281979872431, "loss": 12.1265, "step": 18892 }, { "epoch": 1.0287993574428402, "grad_norm": 0.5242220196092897, "learning_rate": 0.00010019400155792029, "loss": 12.0825, "step": 18893 }, { "epoch": 1.0288538114394232, "grad_norm": 0.5328799708456232, "learning_rate": 0.00010018518331560767, "loss": 11.9993, "step": 18894 }, { "epoch": 1.0289082654360064, "grad_norm": 0.5325890831838296, "learning_rate": 0.00010017636507185504, "loss": 12.1233, "step": 18895 }, { "epoch": 1.0289627194325894, "grad_norm": 0.5734016280637351, "learning_rate": 0.00010016754682673096, "loss": 11.9025, "step": 18896 }, { "epoch": 1.0290171734291724, "grad_norm": 0.6176216690609465, "learning_rate": 0.00010015872858030397, "loss": 12.2049, "step": 18897 }, { "epoch": 1.0290716274257554, "grad_norm": 0.6092051065623323, "learning_rate": 0.00010014991033264274, "loss": 12.2688, "step": 18898 }, { "epoch": 1.0291260814223384, "grad_norm": 0.508883902672158, "learning_rate": 0.00010014109208381577, "loss": 12.005, "step": 18899 }, { "epoch": 1.0291805354189214, "grad_norm": 0.5493388982078555, "learning_rate": 0.00010013227383389163, "loss": 12.0163, "step": 18900 }, { "epoch": 1.0292349894155044, "grad_norm": 0.5805459862865998, "learning_rate": 0.00010012345558293892, "loss": 12.0681, "step": 18901 }, { "epoch": 1.0292894434120874, "grad_norm": 0.5456664730677401, "learning_rate": 0.00010011463733102615, "loss": 11.9971, "step": 18902 }, { "epoch": 1.0293438974086704, "grad_norm": 0.5602136876295947, "learning_rate": 0.00010010581907822201, "loss": 12.0039, "step": 18903 }, { "epoch": 1.0293983514052534, "grad_norm": 0.5162878149882081, "learning_rate": 0.00010009700082459496, "loss": 12.0213, "step": 18904 }, { "epoch": 1.0294528054018364, "grad_norm": 0.5679069808387381, "learning_rate": 0.00010008818257021363, "loss": 12.2285, "step": 18905 }, { "epoch": 1.0295072593984194, "grad_norm": 0.5750738974008015, "learning_rate": 0.0001000793643151466, "loss": 12.1621, "step": 18906 }, { "epoch": 1.0295617133950026, "grad_norm": 0.5284897092297695, "learning_rate": 0.00010007054605946237, "loss": 11.9923, "step": 18907 }, { "epoch": 1.0296161673915856, "grad_norm": 0.5325368772512918, "learning_rate": 0.0001000617278032296, "loss": 12.012, "step": 18908 }, { "epoch": 1.0296706213881686, "grad_norm": 0.6267462450949988, "learning_rate": 0.00010005290954651681, "loss": 11.9871, "step": 18909 }, { "epoch": 1.0297250753847516, "grad_norm": 0.632236511368181, "learning_rate": 0.00010004409128939258, "loss": 12.2269, "step": 18910 }, { "epoch": 1.0297795293813345, "grad_norm": 0.6192054578441146, "learning_rate": 0.0001000352730319255, "loss": 12.0524, "step": 18911 }, { "epoch": 1.0298339833779175, "grad_norm": 0.5384892591463919, "learning_rate": 0.00010002645477418413, "loss": 11.9744, "step": 18912 }, { "epoch": 1.0298884373745005, "grad_norm": 0.5472096567294882, "learning_rate": 0.00010001763651623706, "loss": 11.8775, "step": 18913 }, { "epoch": 1.0299428913710835, "grad_norm": 0.5666239111993764, "learning_rate": 0.00010000881825815283, "loss": 11.9846, "step": 18914 }, { "epoch": 1.0299973453676665, "grad_norm": 0.5516408627753739, "learning_rate": 0.0001, "loss": 12.0715, "step": 18915 }, { "epoch": 1.0300517993642495, "grad_norm": 0.5649090063726684, "learning_rate": 9.999118174184721e-05, "loss": 12.0079, "step": 18916 }, { "epoch": 1.0301062533608325, "grad_norm": 0.6835071626704871, "learning_rate": 9.998236348376298e-05, "loss": 12.1264, "step": 18917 }, { "epoch": 1.0301607073574157, "grad_norm": 0.6434222873091657, "learning_rate": 9.997354522581588e-05, "loss": 11.988, "step": 18918 }, { "epoch": 1.0302151613539987, "grad_norm": 0.52579603385314, "learning_rate": 9.996472696807449e-05, "loss": 12.023, "step": 18919 }, { "epoch": 1.0302696153505817, "grad_norm": 0.5374831986440027, "learning_rate": 9.99559087106074e-05, "loss": 11.9577, "step": 18920 }, { "epoch": 1.0303240693471647, "grad_norm": 0.6623159749593205, "learning_rate": 9.994709045348323e-05, "loss": 12.0303, "step": 18921 }, { "epoch": 1.0303785233437477, "grad_norm": 0.5375802379687675, "learning_rate": 9.993827219677044e-05, "loss": 12.0215, "step": 18922 }, { "epoch": 1.0304329773403307, "grad_norm": 0.5363274067085633, "learning_rate": 9.992945394053764e-05, "loss": 12.1377, "step": 18923 }, { "epoch": 1.0304874313369137, "grad_norm": 0.5387464707574885, "learning_rate": 9.992063568485344e-05, "loss": 12.1371, "step": 18924 }, { "epoch": 1.0305418853334967, "grad_norm": 0.5193837127488434, "learning_rate": 9.991181742978638e-05, "loss": 12.1071, "step": 18925 }, { "epoch": 1.0305963393300797, "grad_norm": 0.5706090335186389, "learning_rate": 9.990299917540506e-05, "loss": 12.0525, "step": 18926 }, { "epoch": 1.0306507933266627, "grad_norm": 0.5737329560214532, "learning_rate": 9.989418092177801e-05, "loss": 12.0354, "step": 18927 }, { "epoch": 1.0307052473232456, "grad_norm": 0.5472470873820268, "learning_rate": 9.988536266897384e-05, "loss": 11.9458, "step": 18928 }, { "epoch": 1.0307597013198286, "grad_norm": 0.5789577964659437, "learning_rate": 9.987654441706112e-05, "loss": 12.2022, "step": 18929 }, { "epoch": 1.0308141553164119, "grad_norm": 0.6027020474997213, "learning_rate": 9.986772616610838e-05, "loss": 12.0335, "step": 18930 }, { "epoch": 1.0308686093129948, "grad_norm": 0.559245253812309, "learning_rate": 9.985890791618428e-05, "loss": 11.9799, "step": 18931 }, { "epoch": 1.0309230633095778, "grad_norm": 0.5994855825821289, "learning_rate": 9.985008966735731e-05, "loss": 12.1266, "step": 18932 }, { "epoch": 1.0309775173061608, "grad_norm": 0.5604127136129978, "learning_rate": 9.984127141969605e-05, "loss": 12.0725, "step": 18933 }, { "epoch": 1.0310319713027438, "grad_norm": 0.584477924604607, "learning_rate": 9.983245317326907e-05, "loss": 12.0621, "step": 18934 }, { "epoch": 1.0310864252993268, "grad_norm": 0.543314089318705, "learning_rate": 9.9823634928145e-05, "loss": 12.0443, "step": 18935 }, { "epoch": 1.0311408792959098, "grad_norm": 0.6018410254304223, "learning_rate": 9.981481668439236e-05, "loss": 12.1504, "step": 18936 }, { "epoch": 1.0311953332924928, "grad_norm": 0.6363326342899834, "learning_rate": 9.980599844207974e-05, "loss": 11.9964, "step": 18937 }, { "epoch": 1.0312497872890758, "grad_norm": 0.5265196045905317, "learning_rate": 9.97971802012757e-05, "loss": 12.1074, "step": 18938 }, { "epoch": 1.0313042412856588, "grad_norm": 0.577653919376557, "learning_rate": 9.978836196204884e-05, "loss": 12.0779, "step": 18939 }, { "epoch": 1.0313586952822418, "grad_norm": 0.6271410352203819, "learning_rate": 9.97795437244677e-05, "loss": 12.024, "step": 18940 }, { "epoch": 1.031413149278825, "grad_norm": 0.545593795326789, "learning_rate": 9.977072548860089e-05, "loss": 12.2078, "step": 18941 }, { "epoch": 1.031467603275408, "grad_norm": 0.7223263909651635, "learning_rate": 9.976190725451694e-05, "loss": 12.1842, "step": 18942 }, { "epoch": 1.031522057271991, "grad_norm": 0.5921282937729481, "learning_rate": 9.975308902228444e-05, "loss": 11.9853, "step": 18943 }, { "epoch": 1.031576511268574, "grad_norm": 0.5496087592341936, "learning_rate": 9.974427079197195e-05, "loss": 12.0496, "step": 18944 }, { "epoch": 1.031630965265157, "grad_norm": 0.5592203365582061, "learning_rate": 9.973545256364806e-05, "loss": 12.1286, "step": 18945 }, { "epoch": 1.03168541926174, "grad_norm": 0.4917921990182803, "learning_rate": 9.972663433738132e-05, "loss": 12.021, "step": 18946 }, { "epoch": 1.031739873258323, "grad_norm": 0.6193108216780243, "learning_rate": 9.971781611324031e-05, "loss": 12.0472, "step": 18947 }, { "epoch": 1.031794327254906, "grad_norm": 0.6255273614029856, "learning_rate": 9.970899789129365e-05, "loss": 12.0162, "step": 18948 }, { "epoch": 1.031848781251489, "grad_norm": 0.5431678806221594, "learning_rate": 9.970017967160984e-05, "loss": 12.0859, "step": 18949 }, { "epoch": 1.031903235248072, "grad_norm": 0.5535537159302271, "learning_rate": 9.96913614542575e-05, "loss": 12.0325, "step": 18950 }, { "epoch": 1.031957689244655, "grad_norm": 0.5821790594525964, "learning_rate": 9.968254323930521e-05, "loss": 12.0404, "step": 18951 }, { "epoch": 1.032012143241238, "grad_norm": 0.5588029126194511, "learning_rate": 9.967372502682148e-05, "loss": 12.0284, "step": 18952 }, { "epoch": 1.0320665972378211, "grad_norm": 0.5647439710683829, "learning_rate": 9.966490681687493e-05, "loss": 12.0481, "step": 18953 }, { "epoch": 1.0321210512344041, "grad_norm": 0.5671428492368455, "learning_rate": 9.965608860953411e-05, "loss": 12.0792, "step": 18954 }, { "epoch": 1.0321755052309871, "grad_norm": 0.7456889785914095, "learning_rate": 9.964727040486761e-05, "loss": 12.1077, "step": 18955 }, { "epoch": 1.03222995922757, "grad_norm": 0.6149926470951985, "learning_rate": 9.963845220294399e-05, "loss": 12.1788, "step": 18956 }, { "epoch": 1.032284413224153, "grad_norm": 0.5729471131619598, "learning_rate": 9.96296340038318e-05, "loss": 12.0537, "step": 18957 }, { "epoch": 1.032338867220736, "grad_norm": 0.5583344837668531, "learning_rate": 9.962081580759968e-05, "loss": 12.2213, "step": 18958 }, { "epoch": 1.032393321217319, "grad_norm": 0.5723927892301177, "learning_rate": 9.961199761431614e-05, "loss": 12.0922, "step": 18959 }, { "epoch": 1.032447775213902, "grad_norm": 0.5593927752480166, "learning_rate": 9.960317942404978e-05, "loss": 12.1068, "step": 18960 }, { "epoch": 1.032502229210485, "grad_norm": 0.5554236215804222, "learning_rate": 9.959436123686919e-05, "loss": 12.114, "step": 18961 }, { "epoch": 1.032556683207068, "grad_norm": 0.5022972277314655, "learning_rate": 9.958554305284289e-05, "loss": 12.1123, "step": 18962 }, { "epoch": 1.032611137203651, "grad_norm": 0.5686257319727192, "learning_rate": 9.957672487203948e-05, "loss": 12.0303, "step": 18963 }, { "epoch": 1.032665591200234, "grad_norm": 0.5288932357435517, "learning_rate": 9.956790669452751e-05, "loss": 12.1181, "step": 18964 }, { "epoch": 1.0327200451968173, "grad_norm": 0.5488837938781982, "learning_rate": 9.955908852037559e-05, "loss": 12.1645, "step": 18965 }, { "epoch": 1.0327744991934003, "grad_norm": 0.5616232895228722, "learning_rate": 9.955027034965226e-05, "loss": 12.1661, "step": 18966 }, { "epoch": 1.0328289531899832, "grad_norm": 0.7209270444507411, "learning_rate": 9.954145218242611e-05, "loss": 12.1788, "step": 18967 }, { "epoch": 1.0328834071865662, "grad_norm": 0.5679915308447578, "learning_rate": 9.953263401876568e-05, "loss": 11.8683, "step": 18968 }, { "epoch": 1.0329378611831492, "grad_norm": 0.5261119171360865, "learning_rate": 9.952381585873959e-05, "loss": 12.0068, "step": 18969 }, { "epoch": 1.0329923151797322, "grad_norm": 0.5270703981807149, "learning_rate": 9.95149977024164e-05, "loss": 12.1285, "step": 18970 }, { "epoch": 1.0330467691763152, "grad_norm": 0.5303365022994876, "learning_rate": 9.950617954986469e-05, "loss": 12.0673, "step": 18971 }, { "epoch": 1.0331012231728982, "grad_norm": 0.5403425930205259, "learning_rate": 9.949736140115295e-05, "loss": 12.0178, "step": 18972 }, { "epoch": 1.0331556771694812, "grad_norm": 0.5136572749686279, "learning_rate": 9.948854325634984e-05, "loss": 12.0827, "step": 18973 }, { "epoch": 1.0332101311660642, "grad_norm": 0.5512873047640114, "learning_rate": 9.94797251155239e-05, "loss": 11.9257, "step": 18974 }, { "epoch": 1.0332645851626472, "grad_norm": 0.5574323799652663, "learning_rate": 9.94709069787437e-05, "loss": 12.092, "step": 18975 }, { "epoch": 1.0333190391592302, "grad_norm": 0.576200540979803, "learning_rate": 9.946208884607782e-05, "loss": 12.2042, "step": 18976 }, { "epoch": 1.0333734931558134, "grad_norm": 0.6078327664055824, "learning_rate": 9.945327071759484e-05, "loss": 12.1118, "step": 18977 }, { "epoch": 1.0334279471523964, "grad_norm": 0.5972974214912442, "learning_rate": 9.944445259336332e-05, "loss": 12.0486, "step": 18978 }, { "epoch": 1.0334824011489794, "grad_norm": 0.5406042584852266, "learning_rate": 9.943563447345179e-05, "loss": 12.1173, "step": 18979 }, { "epoch": 1.0335368551455624, "grad_norm": 0.5917761282289588, "learning_rate": 9.942681635792888e-05, "loss": 12.2366, "step": 18980 }, { "epoch": 1.0335913091421454, "grad_norm": 0.5517644699994737, "learning_rate": 9.94179982468632e-05, "loss": 12.1436, "step": 18981 }, { "epoch": 1.0336457631387284, "grad_norm": 0.6317805706296912, "learning_rate": 9.940918014032323e-05, "loss": 12.1843, "step": 18982 }, { "epoch": 1.0337002171353113, "grad_norm": 0.5568818214034049, "learning_rate": 9.940036203837753e-05, "loss": 12.0803, "step": 18983 }, { "epoch": 1.0337546711318943, "grad_norm": 0.5646030978093064, "learning_rate": 9.939154394109473e-05, "loss": 12.0041, "step": 18984 }, { "epoch": 1.0338091251284773, "grad_norm": 0.5578569716580203, "learning_rate": 9.938272584854341e-05, "loss": 12.1064, "step": 18985 }, { "epoch": 1.0338635791250603, "grad_norm": 0.4875324723148824, "learning_rate": 9.937390776079211e-05, "loss": 11.9227, "step": 18986 }, { "epoch": 1.0339180331216433, "grad_norm": 0.5457733707355378, "learning_rate": 9.936508967790941e-05, "loss": 12.1069, "step": 18987 }, { "epoch": 1.0339724871182265, "grad_norm": 0.5743029530179137, "learning_rate": 9.935627159996386e-05, "loss": 12.0204, "step": 18988 }, { "epoch": 1.0340269411148095, "grad_norm": 0.5679480383613924, "learning_rate": 9.934745352702406e-05, "loss": 12.0522, "step": 18989 }, { "epoch": 1.0340813951113925, "grad_norm": 0.6754449658587147, "learning_rate": 9.933863545915856e-05, "loss": 12.2621, "step": 18990 }, { "epoch": 1.0341358491079755, "grad_norm": 0.5515893171453582, "learning_rate": 9.9329817396436e-05, "loss": 12.139, "step": 18991 }, { "epoch": 1.0341903031045585, "grad_norm": 0.544087538047309, "learning_rate": 9.932099933892486e-05, "loss": 11.9348, "step": 18992 }, { "epoch": 1.0342447571011415, "grad_norm": 0.5359449039020331, "learning_rate": 9.931218128669373e-05, "loss": 12.0435, "step": 18993 }, { "epoch": 1.0342992110977245, "grad_norm": 0.5110707846928103, "learning_rate": 9.93033632398112e-05, "loss": 12.0414, "step": 18994 }, { "epoch": 1.0343536650943075, "grad_norm": 0.4865654922492864, "learning_rate": 9.929454519834581e-05, "loss": 12.0509, "step": 18995 }, { "epoch": 1.0344081190908905, "grad_norm": 0.5841905001779324, "learning_rate": 9.928572716236616e-05, "loss": 12.0051, "step": 18996 }, { "epoch": 1.0344625730874735, "grad_norm": 0.5291241370961548, "learning_rate": 9.927690913194085e-05, "loss": 12.0463, "step": 18997 }, { "epoch": 1.0345170270840565, "grad_norm": 0.5640243804897475, "learning_rate": 9.92680911071384e-05, "loss": 11.8798, "step": 18998 }, { "epoch": 1.0345714810806395, "grad_norm": 0.542482064559819, "learning_rate": 9.925927308802738e-05, "loss": 12.117, "step": 18999 }, { "epoch": 1.0346259350772227, "grad_norm": 0.5547007371518616, "learning_rate": 9.92504550746764e-05, "loss": 12.1063, "step": 19000 }, { "epoch": 1.0346803890738057, "grad_norm": 0.5291276435062658, "learning_rate": 9.924163706715402e-05, "loss": 12.0121, "step": 19001 }, { "epoch": 1.0347348430703887, "grad_norm": 0.54012404073616, "learning_rate": 9.923281906552877e-05, "loss": 12.0397, "step": 19002 }, { "epoch": 1.0347892970669716, "grad_norm": 0.562145421207624, "learning_rate": 9.922400106986926e-05, "loss": 12.1357, "step": 19003 }, { "epoch": 1.0348437510635546, "grad_norm": 0.5854892169410031, "learning_rate": 9.921518308024403e-05, "loss": 12.1699, "step": 19004 }, { "epoch": 1.0348982050601376, "grad_norm": 0.5723611719763151, "learning_rate": 9.920636509672168e-05, "loss": 12.0045, "step": 19005 }, { "epoch": 1.0349526590567206, "grad_norm": 0.5125566985009139, "learning_rate": 9.919754711937074e-05, "loss": 12.0007, "step": 19006 }, { "epoch": 1.0350071130533036, "grad_norm": 0.500166941294284, "learning_rate": 9.918872914825984e-05, "loss": 11.9961, "step": 19007 }, { "epoch": 1.0350615670498866, "grad_norm": 0.5550410627066128, "learning_rate": 9.917991118345751e-05, "loss": 11.9788, "step": 19008 }, { "epoch": 1.0351160210464696, "grad_norm": 0.5116309465811975, "learning_rate": 9.917109322503232e-05, "loss": 12.0869, "step": 19009 }, { "epoch": 1.0351704750430526, "grad_norm": 0.5259939680912251, "learning_rate": 9.916227527305287e-05, "loss": 12.066, "step": 19010 }, { "epoch": 1.0352249290396358, "grad_norm": 0.5247178368056492, "learning_rate": 9.915345732758772e-05, "loss": 12.0876, "step": 19011 }, { "epoch": 1.0352793830362188, "grad_norm": 0.5336971277089876, "learning_rate": 9.91446393887054e-05, "loss": 12.0686, "step": 19012 }, { "epoch": 1.0353338370328018, "grad_norm": 0.5337462776018592, "learning_rate": 9.913582145647452e-05, "loss": 11.9633, "step": 19013 }, { "epoch": 1.0353882910293848, "grad_norm": 0.5420632483238385, "learning_rate": 9.912700353096362e-05, "loss": 12.1343, "step": 19014 }, { "epoch": 1.0354427450259678, "grad_norm": 0.5160256288543138, "learning_rate": 9.91181856122413e-05, "loss": 12.128, "step": 19015 }, { "epoch": 1.0354971990225508, "grad_norm": 0.478344528213043, "learning_rate": 9.91093677003761e-05, "loss": 12.0558, "step": 19016 }, { "epoch": 1.0355516530191338, "grad_norm": 0.4993093672597959, "learning_rate": 9.910054979543662e-05, "loss": 12.1166, "step": 19017 }, { "epoch": 1.0356061070157168, "grad_norm": 0.6508014470582698, "learning_rate": 9.90917318974914e-05, "loss": 12.1159, "step": 19018 }, { "epoch": 1.0356605610122998, "grad_norm": 0.532951712132628, "learning_rate": 9.908291400660906e-05, "loss": 12.1033, "step": 19019 }, { "epoch": 1.0357150150088827, "grad_norm": 0.5157335528993278, "learning_rate": 9.90740961228581e-05, "loss": 11.9439, "step": 19020 }, { "epoch": 1.0357694690054657, "grad_norm": 0.5211195795040291, "learning_rate": 9.906527824630715e-05, "loss": 12.0271, "step": 19021 }, { "epoch": 1.0358239230020487, "grad_norm": 0.4964343356487457, "learning_rate": 9.905646037702475e-05, "loss": 12.0784, "step": 19022 }, { "epoch": 1.035878376998632, "grad_norm": 0.5121292915204849, "learning_rate": 9.904764251507946e-05, "loss": 11.8588, "step": 19023 }, { "epoch": 1.035932830995215, "grad_norm": 0.5395009713858713, "learning_rate": 9.903882466053987e-05, "loss": 12.078, "step": 19024 }, { "epoch": 1.035987284991798, "grad_norm": 0.5724794808973698, "learning_rate": 9.903000681347453e-05, "loss": 12.1624, "step": 19025 }, { "epoch": 1.036041738988381, "grad_norm": 0.528610169588521, "learning_rate": 9.902118897395203e-05, "loss": 12.1195, "step": 19026 }, { "epoch": 1.036096192984964, "grad_norm": 0.5874990058259184, "learning_rate": 9.901237114204092e-05, "loss": 12.0297, "step": 19027 }, { "epoch": 1.036150646981547, "grad_norm": 0.5577128810521385, "learning_rate": 9.900355331780976e-05, "loss": 12.1688, "step": 19028 }, { "epoch": 1.03620510097813, "grad_norm": 0.5359707995224262, "learning_rate": 9.899473550132717e-05, "loss": 12.0934, "step": 19029 }, { "epoch": 1.036259554974713, "grad_norm": 0.5320585447586443, "learning_rate": 9.898591769266166e-05, "loss": 12.0178, "step": 19030 }, { "epoch": 1.0363140089712959, "grad_norm": 0.5996530740109022, "learning_rate": 9.897709989188189e-05, "loss": 12.1333, "step": 19031 }, { "epoch": 1.0363684629678789, "grad_norm": 0.530592100026717, "learning_rate": 9.896828209905629e-05, "loss": 11.8981, "step": 19032 }, { "epoch": 1.0364229169644619, "grad_norm": 0.5071047991797685, "learning_rate": 9.895946431425351e-05, "loss": 12.0599, "step": 19033 }, { "epoch": 1.036477370961045, "grad_norm": 0.5045948386650949, "learning_rate": 9.895064653754212e-05, "loss": 12.0018, "step": 19034 }, { "epoch": 1.036531824957628, "grad_norm": 0.5357652354755498, "learning_rate": 9.894182876899069e-05, "loss": 12.106, "step": 19035 }, { "epoch": 1.036586278954211, "grad_norm": 0.49056228823431847, "learning_rate": 9.893301100866777e-05, "loss": 12.0361, "step": 19036 }, { "epoch": 1.036640732950794, "grad_norm": 0.514162932443665, "learning_rate": 9.892419325664193e-05, "loss": 11.9886, "step": 19037 }, { "epoch": 1.036695186947377, "grad_norm": 0.48106338028908796, "learning_rate": 9.891537551298175e-05, "loss": 12.0299, "step": 19038 }, { "epoch": 1.03674964094396, "grad_norm": 0.5403447290973654, "learning_rate": 9.890655777775576e-05, "loss": 11.9933, "step": 19039 }, { "epoch": 1.036804094940543, "grad_norm": 0.5711878205368841, "learning_rate": 9.889774005103258e-05, "loss": 12.0667, "step": 19040 }, { "epoch": 1.036858548937126, "grad_norm": 0.5227090801811834, "learning_rate": 9.888892233288081e-05, "loss": 11.9956, "step": 19041 }, { "epoch": 1.036913002933709, "grad_norm": 0.5509038475517687, "learning_rate": 9.888010462336893e-05, "loss": 12.0979, "step": 19042 }, { "epoch": 1.036967456930292, "grad_norm": 0.5564037998168841, "learning_rate": 9.887128692256554e-05, "loss": 12.0903, "step": 19043 }, { "epoch": 1.037021910926875, "grad_norm": 0.5489284125232368, "learning_rate": 9.886246923053918e-05, "loss": 12.124, "step": 19044 }, { "epoch": 1.037076364923458, "grad_norm": 0.48695931999911213, "learning_rate": 9.885365154735849e-05, "loss": 11.9784, "step": 19045 }, { "epoch": 1.0371308189200412, "grad_norm": 0.5119116175650932, "learning_rate": 9.884483387309197e-05, "loss": 12.0831, "step": 19046 }, { "epoch": 1.0371852729166242, "grad_norm": 0.5402343458051497, "learning_rate": 9.883601620780825e-05, "loss": 12.0994, "step": 19047 }, { "epoch": 1.0372397269132072, "grad_norm": 0.5443656623920647, "learning_rate": 9.882719855157584e-05, "loss": 12.0989, "step": 19048 }, { "epoch": 1.0372941809097902, "grad_norm": 0.5613854849086186, "learning_rate": 9.881838090446334e-05, "loss": 11.9348, "step": 19049 }, { "epoch": 1.0373486349063732, "grad_norm": 0.5716948056406087, "learning_rate": 9.88095632665393e-05, "loss": 11.9324, "step": 19050 }, { "epoch": 1.0374030889029562, "grad_norm": 0.5615163943071505, "learning_rate": 9.880074563787232e-05, "loss": 12.079, "step": 19051 }, { "epoch": 1.0374575428995392, "grad_norm": 0.5416536736159172, "learning_rate": 9.879192801853093e-05, "loss": 11.9152, "step": 19052 }, { "epoch": 1.0375119968961222, "grad_norm": 0.5257042329245045, "learning_rate": 9.87831104085837e-05, "loss": 11.9454, "step": 19053 }, { "epoch": 1.0375664508927052, "grad_norm": 0.5153521266220805, "learning_rate": 9.87742928080992e-05, "loss": 12.022, "step": 19054 }, { "epoch": 1.0376209048892882, "grad_norm": 0.532221667724164, "learning_rate": 9.8765475217146e-05, "loss": 12.061, "step": 19055 }, { "epoch": 1.0376753588858711, "grad_norm": 0.5538100715675632, "learning_rate": 9.875665763579269e-05, "loss": 12.1585, "step": 19056 }, { "epoch": 1.0377298128824541, "grad_norm": 0.5054629450823357, "learning_rate": 9.874784006410782e-05, "loss": 11.7964, "step": 19057 }, { "epoch": 1.0377842668790374, "grad_norm": 0.5457479810413797, "learning_rate": 9.873902250215994e-05, "loss": 11.8406, "step": 19058 }, { "epoch": 1.0378387208756203, "grad_norm": 0.5415794466342914, "learning_rate": 9.873020495001765e-05, "loss": 12.1593, "step": 19059 }, { "epoch": 1.0378931748722033, "grad_norm": 0.5725519633500826, "learning_rate": 9.87213874077495e-05, "loss": 12.055, "step": 19060 }, { "epoch": 1.0379476288687863, "grad_norm": 0.5763961304813658, "learning_rate": 9.871256987542404e-05, "loss": 12.1032, "step": 19061 }, { "epoch": 1.0380020828653693, "grad_norm": 0.5393060127149656, "learning_rate": 9.870375235310989e-05, "loss": 11.9136, "step": 19062 }, { "epoch": 1.0380565368619523, "grad_norm": 0.5573715002208469, "learning_rate": 9.869493484087556e-05, "loss": 12.0489, "step": 19063 }, { "epoch": 1.0381109908585353, "grad_norm": 0.5628184000735703, "learning_rate": 9.868611733878961e-05, "loss": 12.0815, "step": 19064 }, { "epoch": 1.0381654448551183, "grad_norm": 0.6216920499794681, "learning_rate": 9.867729984692065e-05, "loss": 12.0082, "step": 19065 }, { "epoch": 1.0382198988517013, "grad_norm": 0.5540984827577488, "learning_rate": 9.86684823653372e-05, "loss": 12.0654, "step": 19066 }, { "epoch": 1.0382743528482843, "grad_norm": 0.5134707959928382, "learning_rate": 9.865966489410789e-05, "loss": 12.0314, "step": 19067 }, { "epoch": 1.0383288068448673, "grad_norm": 0.5443577726995507, "learning_rate": 9.865084743330124e-05, "loss": 12.0544, "step": 19068 }, { "epoch": 1.0383832608414503, "grad_norm": 0.5897961064460123, "learning_rate": 9.864202998298583e-05, "loss": 12.0468, "step": 19069 }, { "epoch": 1.0384377148380335, "grad_norm": 0.5367005604858965, "learning_rate": 9.863321254323022e-05, "loss": 11.9909, "step": 19070 }, { "epoch": 1.0384921688346165, "grad_norm": 0.571728376343309, "learning_rate": 9.862439511410297e-05, "loss": 11.984, "step": 19071 }, { "epoch": 1.0385466228311995, "grad_norm": 0.5030556817084691, "learning_rate": 9.86155776956727e-05, "loss": 12.0831, "step": 19072 }, { "epoch": 1.0386010768277825, "grad_norm": 0.5938392851995647, "learning_rate": 9.86067602880079e-05, "loss": 12.0718, "step": 19073 }, { "epoch": 1.0386555308243655, "grad_norm": 0.5332649327413911, "learning_rate": 9.859794289117716e-05, "loss": 11.9327, "step": 19074 }, { "epoch": 1.0387099848209485, "grad_norm": 0.5293914447218716, "learning_rate": 9.858912550524903e-05, "loss": 12.0487, "step": 19075 }, { "epoch": 1.0387644388175314, "grad_norm": 0.5238701352832031, "learning_rate": 9.858030813029214e-05, "loss": 12.1218, "step": 19076 }, { "epoch": 1.0388188928141144, "grad_norm": 0.6132977437798783, "learning_rate": 9.857149076637496e-05, "loss": 12.1199, "step": 19077 }, { "epoch": 1.0388733468106974, "grad_norm": 0.5851041481376358, "learning_rate": 9.856267341356613e-05, "loss": 11.8652, "step": 19078 }, { "epoch": 1.0389278008072804, "grad_norm": 0.5249538310080266, "learning_rate": 9.85538560719342e-05, "loss": 11.9839, "step": 19079 }, { "epoch": 1.0389822548038634, "grad_norm": 0.5606023303923444, "learning_rate": 9.854503874154773e-05, "loss": 11.9515, "step": 19080 }, { "epoch": 1.0390367088004466, "grad_norm": 0.5288047906674928, "learning_rate": 9.853622142247528e-05, "loss": 12.0651, "step": 19081 }, { "epoch": 1.0390911627970296, "grad_norm": 0.5558862569835413, "learning_rate": 9.852740411478543e-05, "loss": 12.0179, "step": 19082 }, { "epoch": 1.0391456167936126, "grad_norm": 0.605587366926134, "learning_rate": 9.851858681854673e-05, "loss": 12.1148, "step": 19083 }, { "epoch": 1.0392000707901956, "grad_norm": 0.6323635269478186, "learning_rate": 9.850976953382773e-05, "loss": 12.0581, "step": 19084 }, { "epoch": 1.0392545247867786, "grad_norm": 0.5735279356919927, "learning_rate": 9.850095226069702e-05, "loss": 12.2183, "step": 19085 }, { "epoch": 1.0393089787833616, "grad_norm": 0.5211586953071619, "learning_rate": 9.849213499922316e-05, "loss": 11.8532, "step": 19086 }, { "epoch": 1.0393634327799446, "grad_norm": 0.5557305775491236, "learning_rate": 9.848331774947471e-05, "loss": 11.9333, "step": 19087 }, { "epoch": 1.0394178867765276, "grad_norm": 0.5457468580264888, "learning_rate": 9.84745005115202e-05, "loss": 12.0708, "step": 19088 }, { "epoch": 1.0394723407731106, "grad_norm": 0.5179863842829507, "learning_rate": 9.846568328542827e-05, "loss": 12.0847, "step": 19089 }, { "epoch": 1.0395267947696936, "grad_norm": 0.5237475254373459, "learning_rate": 9.845686607126744e-05, "loss": 12.0979, "step": 19090 }, { "epoch": 1.0395812487662766, "grad_norm": 0.6168798677338446, "learning_rate": 9.844804886910627e-05, "loss": 12.062, "step": 19091 }, { "epoch": 1.0396357027628595, "grad_norm": 0.5484336017930553, "learning_rate": 9.843923167901336e-05, "loss": 12.0649, "step": 19092 }, { "epoch": 1.0396901567594428, "grad_norm": 0.5006446798431007, "learning_rate": 9.843041450105722e-05, "loss": 12.0172, "step": 19093 }, { "epoch": 1.0397446107560258, "grad_norm": 0.5387552497608356, "learning_rate": 9.842159733530645e-05, "loss": 12.0331, "step": 19094 }, { "epoch": 1.0397990647526087, "grad_norm": 0.5992092342972867, "learning_rate": 9.841278018182959e-05, "loss": 12.042, "step": 19095 }, { "epoch": 1.0398535187491917, "grad_norm": 0.5518016802549429, "learning_rate": 9.840396304069522e-05, "loss": 12.1501, "step": 19096 }, { "epoch": 1.0399079727457747, "grad_norm": 0.5231586092041085, "learning_rate": 9.839514591197191e-05, "loss": 12.1477, "step": 19097 }, { "epoch": 1.0399624267423577, "grad_norm": 0.5250451595601056, "learning_rate": 9.838632879572821e-05, "loss": 11.9877, "step": 19098 }, { "epoch": 1.0400168807389407, "grad_norm": 0.5751355650493677, "learning_rate": 9.837751169203268e-05, "loss": 12.0432, "step": 19099 }, { "epoch": 1.0400713347355237, "grad_norm": 0.5475822383731445, "learning_rate": 9.836869460095388e-05, "loss": 12.1132, "step": 19100 }, { "epoch": 1.0401257887321067, "grad_norm": 0.5095680052981686, "learning_rate": 9.83598775225604e-05, "loss": 12.0877, "step": 19101 }, { "epoch": 1.0401802427286897, "grad_norm": 0.5203026601536467, "learning_rate": 9.835106045692084e-05, "loss": 12.1754, "step": 19102 }, { "epoch": 1.0402346967252727, "grad_norm": 0.5393357415466559, "learning_rate": 9.834224340410366e-05, "loss": 12.0501, "step": 19103 }, { "epoch": 1.040289150721856, "grad_norm": 0.5147226504600757, "learning_rate": 9.833342636417747e-05, "loss": 12.2127, "step": 19104 }, { "epoch": 1.040343604718439, "grad_norm": 0.5151805541245005, "learning_rate": 9.832460933721083e-05, "loss": 12.0432, "step": 19105 }, { "epoch": 1.040398058715022, "grad_norm": 0.5298712966251693, "learning_rate": 9.831579232327231e-05, "loss": 11.9219, "step": 19106 }, { "epoch": 1.0404525127116049, "grad_norm": 0.5028227447111524, "learning_rate": 9.830697532243049e-05, "loss": 12.0594, "step": 19107 }, { "epoch": 1.0405069667081879, "grad_norm": 0.5437150776845209, "learning_rate": 9.82981583347539e-05, "loss": 12.0402, "step": 19108 }, { "epoch": 1.0405614207047709, "grad_norm": 0.5714209694702647, "learning_rate": 9.828934136031113e-05, "loss": 12.0347, "step": 19109 }, { "epoch": 1.0406158747013539, "grad_norm": 0.5090023205310441, "learning_rate": 9.828052439917071e-05, "loss": 11.9786, "step": 19110 }, { "epoch": 1.0406703286979369, "grad_norm": 0.5052478240697585, "learning_rate": 9.827170745140121e-05, "loss": 11.9894, "step": 19111 }, { "epoch": 1.0407247826945198, "grad_norm": 0.520875318006523, "learning_rate": 9.826289051707127e-05, "loss": 12.0789, "step": 19112 }, { "epoch": 1.0407792366911028, "grad_norm": 0.5776766893725211, "learning_rate": 9.825407359624935e-05, "loss": 11.9286, "step": 19113 }, { "epoch": 1.0408336906876858, "grad_norm": 0.5587302106616825, "learning_rate": 9.824525668900402e-05, "loss": 11.995, "step": 19114 }, { "epoch": 1.0408881446842688, "grad_norm": 0.5382811136105777, "learning_rate": 9.823643979540386e-05, "loss": 11.9325, "step": 19115 }, { "epoch": 1.040942598680852, "grad_norm": 0.5148025263395594, "learning_rate": 9.822762291551746e-05, "loss": 11.9106, "step": 19116 }, { "epoch": 1.040997052677435, "grad_norm": 0.5838594177462733, "learning_rate": 9.821880604941337e-05, "loss": 12.2477, "step": 19117 }, { "epoch": 1.041051506674018, "grad_norm": 0.5123221411956282, "learning_rate": 9.820998919716013e-05, "loss": 12.0669, "step": 19118 }, { "epoch": 1.041105960670601, "grad_norm": 0.565955948169352, "learning_rate": 9.820117235882633e-05, "loss": 12.0751, "step": 19119 }, { "epoch": 1.041160414667184, "grad_norm": 0.5621022639430323, "learning_rate": 9.81923555344805e-05, "loss": 12.0712, "step": 19120 }, { "epoch": 1.041214868663767, "grad_norm": 0.5499786944827505, "learning_rate": 9.818353872419121e-05, "loss": 12.1968, "step": 19121 }, { "epoch": 1.04126932266035, "grad_norm": 0.5583103952439287, "learning_rate": 9.817472192802707e-05, "loss": 12.1014, "step": 19122 }, { "epoch": 1.041323776656933, "grad_norm": 0.5163468250550807, "learning_rate": 9.816590514605657e-05, "loss": 11.9431, "step": 19123 }, { "epoch": 1.041378230653516, "grad_norm": 0.5665414309500235, "learning_rate": 9.815708837834829e-05, "loss": 12.0765, "step": 19124 }, { "epoch": 1.041432684650099, "grad_norm": 0.5285343488866202, "learning_rate": 9.814827162497082e-05, "loss": 12.055, "step": 19125 }, { "epoch": 1.041487138646682, "grad_norm": 0.5315934905705026, "learning_rate": 9.813945488599266e-05, "loss": 12.0311, "step": 19126 }, { "epoch": 1.041541592643265, "grad_norm": 0.6469743773818489, "learning_rate": 9.813063816148244e-05, "loss": 12.1042, "step": 19127 }, { "epoch": 1.0415960466398482, "grad_norm": 0.5832980168406593, "learning_rate": 9.81218214515087e-05, "loss": 12.1441, "step": 19128 }, { "epoch": 1.0416505006364312, "grad_norm": 0.5807553796234487, "learning_rate": 9.811300475613997e-05, "loss": 12.1559, "step": 19129 }, { "epoch": 1.0417049546330142, "grad_norm": 0.5623617827311373, "learning_rate": 9.810418807544483e-05, "loss": 12.1247, "step": 19130 }, { "epoch": 1.0417594086295972, "grad_norm": 0.5565142904968856, "learning_rate": 9.809537140949187e-05, "loss": 12.0773, "step": 19131 }, { "epoch": 1.0418138626261801, "grad_norm": 0.48345586368369114, "learning_rate": 9.808655475834962e-05, "loss": 12.0419, "step": 19132 }, { "epoch": 1.0418683166227631, "grad_norm": 0.6031622305737758, "learning_rate": 9.807773812208662e-05, "loss": 12.1528, "step": 19133 }, { "epoch": 1.0419227706193461, "grad_norm": 0.5163447645287528, "learning_rate": 9.806892150077147e-05, "loss": 12.037, "step": 19134 }, { "epoch": 1.0419772246159291, "grad_norm": 0.5209695862658643, "learning_rate": 9.80601048944727e-05, "loss": 12.0789, "step": 19135 }, { "epoch": 1.0420316786125121, "grad_norm": 0.5494787522018224, "learning_rate": 9.805128830325887e-05, "loss": 12.1595, "step": 19136 }, { "epoch": 1.042086132609095, "grad_norm": 0.5891751228617196, "learning_rate": 9.804247172719854e-05, "loss": 12.1231, "step": 19137 }, { "epoch": 1.042140586605678, "grad_norm": 0.5817300733494469, "learning_rate": 9.803365516636028e-05, "loss": 12.1236, "step": 19138 }, { "epoch": 1.042195040602261, "grad_norm": 0.5814731823831777, "learning_rate": 9.802483862081267e-05, "loss": 12.1709, "step": 19139 }, { "epoch": 1.0422494945988443, "grad_norm": 0.5579691487335727, "learning_rate": 9.801602209062424e-05, "loss": 12.1035, "step": 19140 }, { "epoch": 1.0423039485954273, "grad_norm": 0.5740579702293666, "learning_rate": 9.800720557586354e-05, "loss": 11.904, "step": 19141 }, { "epoch": 1.0423584025920103, "grad_norm": 0.5928448752546925, "learning_rate": 9.799838907659918e-05, "loss": 12.1609, "step": 19142 }, { "epoch": 1.0424128565885933, "grad_norm": 0.5212588888353008, "learning_rate": 9.798957259289966e-05, "loss": 12.1121, "step": 19143 }, { "epoch": 1.0424673105851763, "grad_norm": 0.5754309059415892, "learning_rate": 9.798075612483356e-05, "loss": 12.0057, "step": 19144 }, { "epoch": 1.0425217645817593, "grad_norm": 0.544196322848585, "learning_rate": 9.797193967246943e-05, "loss": 12.0195, "step": 19145 }, { "epoch": 1.0425762185783423, "grad_norm": 0.5588146386938192, "learning_rate": 9.796312323587585e-05, "loss": 12.0907, "step": 19146 }, { "epoch": 1.0426306725749253, "grad_norm": 0.567915320166216, "learning_rate": 9.795430681512137e-05, "loss": 12.0485, "step": 19147 }, { "epoch": 1.0426851265715082, "grad_norm": 0.5346818621860815, "learning_rate": 9.794549041027454e-05, "loss": 12.1685, "step": 19148 }, { "epoch": 1.0427395805680912, "grad_norm": 0.5617888190170663, "learning_rate": 9.793667402140388e-05, "loss": 12.1387, "step": 19149 }, { "epoch": 1.0427940345646742, "grad_norm": 0.524658144114567, "learning_rate": 9.792785764857802e-05, "loss": 12.0566, "step": 19150 }, { "epoch": 1.0428484885612574, "grad_norm": 0.5713117601184278, "learning_rate": 9.79190412918655e-05, "loss": 12.0968, "step": 19151 }, { "epoch": 1.0429029425578404, "grad_norm": 0.49936954976258013, "learning_rate": 9.791022495133489e-05, "loss": 12.0575, "step": 19152 }, { "epoch": 1.0429573965544234, "grad_norm": 0.6129223975829453, "learning_rate": 9.790140862705468e-05, "loss": 12.193, "step": 19153 }, { "epoch": 1.0430118505510064, "grad_norm": 0.6142847898671409, "learning_rate": 9.789259231909346e-05, "loss": 12.1012, "step": 19154 }, { "epoch": 1.0430663045475894, "grad_norm": 0.5700271812475715, "learning_rate": 9.788377602751982e-05, "loss": 12.015, "step": 19155 }, { "epoch": 1.0431207585441724, "grad_norm": 0.4888156128269216, "learning_rate": 9.787495975240227e-05, "loss": 11.9991, "step": 19156 }, { "epoch": 1.0431752125407554, "grad_norm": 0.5083256571510111, "learning_rate": 9.78661434938094e-05, "loss": 11.995, "step": 19157 }, { "epoch": 1.0432296665373384, "grad_norm": 0.6171669719782117, "learning_rate": 9.785732725180977e-05, "loss": 12.0293, "step": 19158 }, { "epoch": 1.0432841205339214, "grad_norm": 0.585761777581188, "learning_rate": 9.78485110264719e-05, "loss": 11.9625, "step": 19159 }, { "epoch": 1.0433385745305044, "grad_norm": 0.6041261370127157, "learning_rate": 9.783969481786435e-05, "loss": 12.0765, "step": 19160 }, { "epoch": 1.0433930285270874, "grad_norm": 0.6467978619240164, "learning_rate": 9.783087862605572e-05, "loss": 12.1038, "step": 19161 }, { "epoch": 1.0434474825236704, "grad_norm": 0.5517345887780721, "learning_rate": 9.782206245111459e-05, "loss": 12.184, "step": 19162 }, { "epoch": 1.0435019365202536, "grad_norm": 0.5585662895914788, "learning_rate": 9.781324629310942e-05, "loss": 12.1267, "step": 19163 }, { "epoch": 1.0435563905168366, "grad_norm": 0.5072476663086728, "learning_rate": 9.780443015210881e-05, "loss": 12.1028, "step": 19164 }, { "epoch": 1.0436108445134196, "grad_norm": 0.5149472102231276, "learning_rate": 9.779561402818131e-05, "loss": 12.1073, "step": 19165 }, { "epoch": 1.0436652985100026, "grad_norm": 0.5470801062193369, "learning_rate": 9.778679792139552e-05, "loss": 12.0336, "step": 19166 }, { "epoch": 1.0437197525065856, "grad_norm": 0.4981163730176472, "learning_rate": 9.777798183181993e-05, "loss": 12.0814, "step": 19167 }, { "epoch": 1.0437742065031685, "grad_norm": 0.5134029700065394, "learning_rate": 9.776916575952314e-05, "loss": 12.08, "step": 19168 }, { "epoch": 1.0438286604997515, "grad_norm": 0.5832556991628526, "learning_rate": 9.776034970457369e-05, "loss": 12.0783, "step": 19169 }, { "epoch": 1.0438831144963345, "grad_norm": 0.5377942104959453, "learning_rate": 9.775153366704013e-05, "loss": 12.1176, "step": 19170 }, { "epoch": 1.0439375684929175, "grad_norm": 0.6218772734868826, "learning_rate": 9.774271764699101e-05, "loss": 11.987, "step": 19171 }, { "epoch": 1.0439920224895005, "grad_norm": 0.5466535332473306, "learning_rate": 9.773390164449495e-05, "loss": 11.9507, "step": 19172 }, { "epoch": 1.0440464764860835, "grad_norm": 0.5297762922881801, "learning_rate": 9.772508565962042e-05, "loss": 11.9809, "step": 19173 }, { "epoch": 1.0441009304826667, "grad_norm": 0.5582817533417825, "learning_rate": 9.7716269692436e-05, "loss": 12.1064, "step": 19174 }, { "epoch": 1.0441553844792497, "grad_norm": 0.5852479088644624, "learning_rate": 9.770745374301022e-05, "loss": 12.076, "step": 19175 }, { "epoch": 1.0442098384758327, "grad_norm": 0.5511628923438155, "learning_rate": 9.76986378114117e-05, "loss": 12.1843, "step": 19176 }, { "epoch": 1.0442642924724157, "grad_norm": 0.5528512384448054, "learning_rate": 9.768982189770894e-05, "loss": 12.042, "step": 19177 }, { "epoch": 1.0443187464689987, "grad_norm": 0.5579554993906317, "learning_rate": 9.768100600197053e-05, "loss": 12.0521, "step": 19178 }, { "epoch": 1.0443732004655817, "grad_norm": 0.5920345224342375, "learning_rate": 9.7672190124265e-05, "loss": 12.1227, "step": 19179 }, { "epoch": 1.0444276544621647, "grad_norm": 0.5402265066864046, "learning_rate": 9.76633742646609e-05, "loss": 12.1896, "step": 19180 }, { "epoch": 1.0444821084587477, "grad_norm": 0.5230294482354321, "learning_rate": 9.76545584232268e-05, "loss": 11.9813, "step": 19181 }, { "epoch": 1.0445365624553307, "grad_norm": 0.5513612958958378, "learning_rate": 9.764574260003128e-05, "loss": 12.1628, "step": 19182 }, { "epoch": 1.0445910164519137, "grad_norm": 0.5647767986230641, "learning_rate": 9.763692679514284e-05, "loss": 11.8818, "step": 19183 }, { "epoch": 1.0446454704484966, "grad_norm": 0.5521198979994846, "learning_rate": 9.762811100863003e-05, "loss": 12.1333, "step": 19184 }, { "epoch": 1.0446999244450796, "grad_norm": 0.5230827615522323, "learning_rate": 9.761929524056145e-05, "loss": 12.1484, "step": 19185 }, { "epoch": 1.0447543784416629, "grad_norm": 0.5349816199825447, "learning_rate": 9.761047949100558e-05, "loss": 12.0065, "step": 19186 }, { "epoch": 1.0448088324382458, "grad_norm": 0.5682286337147697, "learning_rate": 9.760166376003107e-05, "loss": 12.1066, "step": 19187 }, { "epoch": 1.0448632864348288, "grad_norm": 0.533617795780643, "learning_rate": 9.759284804770642e-05, "loss": 12.0198, "step": 19188 }, { "epoch": 1.0449177404314118, "grad_norm": 0.6171098731477254, "learning_rate": 9.758403235410019e-05, "loss": 12.1067, "step": 19189 }, { "epoch": 1.0449721944279948, "grad_norm": 0.5329021938234568, "learning_rate": 9.757521667928092e-05, "loss": 11.9268, "step": 19190 }, { "epoch": 1.0450266484245778, "grad_norm": 0.5366953787561417, "learning_rate": 9.756640102331718e-05, "loss": 12.0649, "step": 19191 }, { "epoch": 1.0450811024211608, "grad_norm": 0.5661101853878987, "learning_rate": 9.755758538627753e-05, "loss": 12.1772, "step": 19192 }, { "epoch": 1.0451355564177438, "grad_norm": 0.6285858479349945, "learning_rate": 9.754876976823049e-05, "loss": 12.073, "step": 19193 }, { "epoch": 1.0451900104143268, "grad_norm": 0.5891952748044051, "learning_rate": 9.753995416924462e-05, "loss": 11.997, "step": 19194 }, { "epoch": 1.0452444644109098, "grad_norm": 0.5354460728775402, "learning_rate": 9.753113858938847e-05, "loss": 12.0828, "step": 19195 }, { "epoch": 1.0452989184074928, "grad_norm": 0.6363409217279798, "learning_rate": 9.752232302873061e-05, "loss": 12.0971, "step": 19196 }, { "epoch": 1.0453533724040758, "grad_norm": 0.6408417171455034, "learning_rate": 9.751350748733959e-05, "loss": 12.0337, "step": 19197 }, { "epoch": 1.045407826400659, "grad_norm": 0.5498651446757598, "learning_rate": 9.750469196528392e-05, "loss": 12.0722, "step": 19198 }, { "epoch": 1.045462280397242, "grad_norm": 0.6273062786380214, "learning_rate": 9.749587646263221e-05, "loss": 11.9887, "step": 19199 }, { "epoch": 1.045516734393825, "grad_norm": 0.631159313500319, "learning_rate": 9.748706097945298e-05, "loss": 12.0544, "step": 19200 }, { "epoch": 1.045571188390408, "grad_norm": 0.5390729842486164, "learning_rate": 9.74782455158148e-05, "loss": 12.036, "step": 19201 }, { "epoch": 1.045625642386991, "grad_norm": 0.5481233693458728, "learning_rate": 9.746943007178622e-05, "loss": 12.1109, "step": 19202 }, { "epoch": 1.045680096383574, "grad_norm": 0.6065527736780751, "learning_rate": 9.746061464743575e-05, "loss": 12.1451, "step": 19203 }, { "epoch": 1.045734550380157, "grad_norm": 0.573328558430312, "learning_rate": 9.745179924283196e-05, "loss": 12.1437, "step": 19204 }, { "epoch": 1.04578900437674, "grad_norm": 0.5542725518096085, "learning_rate": 9.744298385804341e-05, "loss": 11.9381, "step": 19205 }, { "epoch": 1.045843458373323, "grad_norm": 0.5887718192429499, "learning_rate": 9.743416849313866e-05, "loss": 12.1066, "step": 19206 }, { "epoch": 1.045897912369906, "grad_norm": 0.5810414666499816, "learning_rate": 9.742535314818624e-05, "loss": 12.1456, "step": 19207 }, { "epoch": 1.045952366366489, "grad_norm": 0.5934364387216299, "learning_rate": 9.74165378232547e-05, "loss": 12.07, "step": 19208 }, { "epoch": 1.046006820363072, "grad_norm": 0.544803947709447, "learning_rate": 9.740772251841257e-05, "loss": 11.9487, "step": 19209 }, { "epoch": 1.0460612743596551, "grad_norm": 0.6212074752348654, "learning_rate": 9.739890723372845e-05, "loss": 12.0921, "step": 19210 }, { "epoch": 1.0461157283562381, "grad_norm": 0.6278018299328106, "learning_rate": 9.739009196927086e-05, "loss": 12.0741, "step": 19211 }, { "epoch": 1.046170182352821, "grad_norm": 0.5454639466797658, "learning_rate": 9.738127672510836e-05, "loss": 12.0252, "step": 19212 }, { "epoch": 1.046224636349404, "grad_norm": 0.5713008151891188, "learning_rate": 9.737246150130951e-05, "loss": 11.949, "step": 19213 }, { "epoch": 1.046279090345987, "grad_norm": 0.5813303234196573, "learning_rate": 9.736364629794283e-05, "loss": 12.1801, "step": 19214 }, { "epoch": 1.04633354434257, "grad_norm": 0.563521297845805, "learning_rate": 9.735483111507686e-05, "loss": 11.8658, "step": 19215 }, { "epoch": 1.046387998339153, "grad_norm": 0.5257352010378813, "learning_rate": 9.734601595278018e-05, "loss": 12.0067, "step": 19216 }, { "epoch": 1.046442452335736, "grad_norm": 0.5098668625026491, "learning_rate": 9.733720081112132e-05, "loss": 12.052, "step": 19217 }, { "epoch": 1.046496906332319, "grad_norm": 0.5296134836923844, "learning_rate": 9.732838569016884e-05, "loss": 12.1223, "step": 19218 }, { "epoch": 1.046551360328902, "grad_norm": 0.5534850423144859, "learning_rate": 9.731957058999127e-05, "loss": 12.0633, "step": 19219 }, { "epoch": 1.046605814325485, "grad_norm": 0.5026348269791794, "learning_rate": 9.731075551065714e-05, "loss": 12.0089, "step": 19220 }, { "epoch": 1.0466602683220683, "grad_norm": 0.5766132676570002, "learning_rate": 9.730194045223506e-05, "loss": 12.0963, "step": 19221 }, { "epoch": 1.0467147223186513, "grad_norm": 0.5168019245496915, "learning_rate": 9.729312541479355e-05, "loss": 12.0861, "step": 19222 }, { "epoch": 1.0467691763152343, "grad_norm": 0.6158430966438531, "learning_rate": 9.728431039840118e-05, "loss": 12.0681, "step": 19223 }, { "epoch": 1.0468236303118172, "grad_norm": 0.5121067618505272, "learning_rate": 9.72754954031264e-05, "loss": 11.8811, "step": 19224 }, { "epoch": 1.0468780843084002, "grad_norm": 0.5995318782549505, "learning_rate": 9.726668042903786e-05, "loss": 12.1476, "step": 19225 }, { "epoch": 1.0469325383049832, "grad_norm": 0.5251005441098441, "learning_rate": 9.725786547620407e-05, "loss": 12.0328, "step": 19226 }, { "epoch": 1.0469869923015662, "grad_norm": 0.5925345936070446, "learning_rate": 9.724905054469357e-05, "loss": 12.2029, "step": 19227 }, { "epoch": 1.0470414462981492, "grad_norm": 0.5644629534226009, "learning_rate": 9.724023563457492e-05, "loss": 12.0922, "step": 19228 }, { "epoch": 1.0470959002947322, "grad_norm": 0.5145965892903692, "learning_rate": 9.723142074591665e-05, "loss": 12.0152, "step": 19229 }, { "epoch": 1.0471503542913152, "grad_norm": 0.617468919722614, "learning_rate": 9.722260587878734e-05, "loss": 11.9832, "step": 19230 }, { "epoch": 1.0472048082878982, "grad_norm": 0.5774638227556544, "learning_rate": 9.721379103325548e-05, "loss": 12.1204, "step": 19231 }, { "epoch": 1.0472592622844812, "grad_norm": 0.48206586144768443, "learning_rate": 9.720497620938965e-05, "loss": 12.013, "step": 19232 }, { "epoch": 1.0473137162810644, "grad_norm": 0.6548010509130806, "learning_rate": 9.719616140725846e-05, "loss": 12.1303, "step": 19233 }, { "epoch": 1.0473681702776474, "grad_norm": 0.698867251761497, "learning_rate": 9.718734662693034e-05, "loss": 12.2486, "step": 19234 }, { "epoch": 1.0474226242742304, "grad_norm": 0.5256828253879882, "learning_rate": 9.717853186847386e-05, "loss": 12.0381, "step": 19235 }, { "epoch": 1.0474770782708134, "grad_norm": 0.6207702632861498, "learning_rate": 9.716971713195762e-05, "loss": 12.1398, "step": 19236 }, { "epoch": 1.0475315322673964, "grad_norm": 0.5712375661066159, "learning_rate": 9.716090241745012e-05, "loss": 12.0401, "step": 19237 }, { "epoch": 1.0475859862639794, "grad_norm": 0.4935113312443913, "learning_rate": 9.715208772501992e-05, "loss": 12.0417, "step": 19238 }, { "epoch": 1.0476404402605624, "grad_norm": 0.6217792785204247, "learning_rate": 9.714327305473558e-05, "loss": 12.1523, "step": 19239 }, { "epoch": 1.0476948942571453, "grad_norm": 0.5723809438569106, "learning_rate": 9.713445840666562e-05, "loss": 12.0664, "step": 19240 }, { "epoch": 1.0477493482537283, "grad_norm": 0.5677739931138212, "learning_rate": 9.712564378087858e-05, "loss": 12.0922, "step": 19241 }, { "epoch": 1.0478038022503113, "grad_norm": 0.5640126414664229, "learning_rate": 9.7116829177443e-05, "loss": 12.131, "step": 19242 }, { "epoch": 1.0478582562468943, "grad_norm": 0.6640791811526993, "learning_rate": 9.710801459642751e-05, "loss": 11.9364, "step": 19243 }, { "epoch": 1.0479127102434775, "grad_norm": 0.5250140126089315, "learning_rate": 9.709920003790054e-05, "loss": 12.0516, "step": 19244 }, { "epoch": 1.0479671642400605, "grad_norm": 0.6142053809536904, "learning_rate": 9.709038550193068e-05, "loss": 12.0713, "step": 19245 }, { "epoch": 1.0480216182366435, "grad_norm": 0.5986476016795224, "learning_rate": 9.708157098858645e-05, "loss": 12.1929, "step": 19246 }, { "epoch": 1.0480760722332265, "grad_norm": 0.6491176720117351, "learning_rate": 9.707275649793642e-05, "loss": 12.0925, "step": 19247 }, { "epoch": 1.0481305262298095, "grad_norm": 0.6034636611147448, "learning_rate": 9.706394203004914e-05, "loss": 11.9917, "step": 19248 }, { "epoch": 1.0481849802263925, "grad_norm": 0.5886294219976741, "learning_rate": 9.705512758499313e-05, "loss": 12.1267, "step": 19249 }, { "epoch": 1.0482394342229755, "grad_norm": 0.6028214189036069, "learning_rate": 9.704631316283695e-05, "loss": 11.9418, "step": 19250 }, { "epoch": 1.0482938882195585, "grad_norm": 0.5716779343276686, "learning_rate": 9.703749876364913e-05, "loss": 12.0819, "step": 19251 }, { "epoch": 1.0483483422161415, "grad_norm": 0.5664468715715333, "learning_rate": 9.702868438749822e-05, "loss": 12.1577, "step": 19252 }, { "epoch": 1.0484027962127245, "grad_norm": 0.6202810352599408, "learning_rate": 9.701987003445278e-05, "loss": 11.9958, "step": 19253 }, { "epoch": 1.0484572502093075, "grad_norm": 0.6287441056132557, "learning_rate": 9.70110557045813e-05, "loss": 12.074, "step": 19254 }, { "epoch": 1.0485117042058905, "grad_norm": 0.6081310291659571, "learning_rate": 9.700224139795236e-05, "loss": 12.0567, "step": 19255 }, { "epoch": 1.0485661582024737, "grad_norm": 0.6344891453275684, "learning_rate": 9.699342711463448e-05, "loss": 12.0007, "step": 19256 }, { "epoch": 1.0486206121990567, "grad_norm": 0.5084448846445512, "learning_rate": 9.698461285469624e-05, "loss": 12.0539, "step": 19257 }, { "epoch": 1.0486750661956397, "grad_norm": 0.5475980659381137, "learning_rate": 9.697579861820611e-05, "loss": 12.1256, "step": 19258 }, { "epoch": 1.0487295201922227, "grad_norm": 0.523600751764549, "learning_rate": 9.696698440523271e-05, "loss": 12.1141, "step": 19259 }, { "epoch": 1.0487839741888056, "grad_norm": 0.6069424425625963, "learning_rate": 9.695817021584454e-05, "loss": 12.0484, "step": 19260 }, { "epoch": 1.0488384281853886, "grad_norm": 0.5742100551630694, "learning_rate": 9.694935605011017e-05, "loss": 12.0797, "step": 19261 }, { "epoch": 1.0488928821819716, "grad_norm": 0.5941016295730495, "learning_rate": 9.69405419080981e-05, "loss": 12.1006, "step": 19262 }, { "epoch": 1.0489473361785546, "grad_norm": 0.554492287310076, "learning_rate": 9.693172778987692e-05, "loss": 12.0692, "step": 19263 }, { "epoch": 1.0490017901751376, "grad_norm": 0.6211903334798932, "learning_rate": 9.69229136955151e-05, "loss": 11.9765, "step": 19264 }, { "epoch": 1.0490562441717206, "grad_norm": 0.6354330720514915, "learning_rate": 9.691409962508124e-05, "loss": 12.0981, "step": 19265 }, { "epoch": 1.0491106981683036, "grad_norm": 0.581561168575633, "learning_rate": 9.690528557864386e-05, "loss": 12.0893, "step": 19266 }, { "epoch": 1.0491651521648866, "grad_norm": 0.5602093649184966, "learning_rate": 9.689647155627149e-05, "loss": 11.992, "step": 19267 }, { "epoch": 1.0492196061614698, "grad_norm": 0.541758484464551, "learning_rate": 9.688765755803268e-05, "loss": 11.9762, "step": 19268 }, { "epoch": 1.0492740601580528, "grad_norm": 0.5567906003713667, "learning_rate": 9.687884358399594e-05, "loss": 12.0616, "step": 19269 }, { "epoch": 1.0493285141546358, "grad_norm": 0.5468525484934957, "learning_rate": 9.687002963422986e-05, "loss": 12.0693, "step": 19270 }, { "epoch": 1.0493829681512188, "grad_norm": 0.6748688401673388, "learning_rate": 9.686121570880294e-05, "loss": 12.0306, "step": 19271 }, { "epoch": 1.0494374221478018, "grad_norm": 0.6129731420684097, "learning_rate": 9.685240180778376e-05, "loss": 12.0475, "step": 19272 }, { "epoch": 1.0494918761443848, "grad_norm": 0.5224925834604115, "learning_rate": 9.684358793124084e-05, "loss": 12.1006, "step": 19273 }, { "epoch": 1.0495463301409678, "grad_norm": 0.525354978290664, "learning_rate": 9.683477407924268e-05, "loss": 12.0718, "step": 19274 }, { "epoch": 1.0496007841375508, "grad_norm": 0.5501231338286172, "learning_rate": 9.682596025185786e-05, "loss": 12.037, "step": 19275 }, { "epoch": 1.0496552381341338, "grad_norm": 0.5525824228493883, "learning_rate": 9.68171464491549e-05, "loss": 11.8471, "step": 19276 }, { "epoch": 1.0497096921307167, "grad_norm": 0.5237595322292766, "learning_rate": 9.680833267120234e-05, "loss": 12.1528, "step": 19277 }, { "epoch": 1.0497641461272997, "grad_norm": 0.547492121864182, "learning_rate": 9.679951891806873e-05, "loss": 12.1126, "step": 19278 }, { "epoch": 1.0498186001238827, "grad_norm": 0.5423175265745502, "learning_rate": 9.679070518982259e-05, "loss": 12.0847, "step": 19279 }, { "epoch": 1.049873054120466, "grad_norm": 0.5409640984493687, "learning_rate": 9.678189148653246e-05, "loss": 12.018, "step": 19280 }, { "epoch": 1.049927508117049, "grad_norm": 0.5342685429477202, "learning_rate": 9.677307780826687e-05, "loss": 12.0387, "step": 19281 }, { "epoch": 1.049981962113632, "grad_norm": 0.5299288139375267, "learning_rate": 9.676426415509439e-05, "loss": 12.0441, "step": 19282 }, { "epoch": 1.050036416110215, "grad_norm": 0.664810509725584, "learning_rate": 9.675545052708358e-05, "loss": 12.095, "step": 19283 }, { "epoch": 1.050090870106798, "grad_norm": 0.5478869296306361, "learning_rate": 9.674663692430286e-05, "loss": 12.0243, "step": 19284 }, { "epoch": 1.050145324103381, "grad_norm": 0.5575254940100597, "learning_rate": 9.673782334682085e-05, "loss": 12.0398, "step": 19285 }, { "epoch": 1.050199778099964, "grad_norm": 0.5672195448002826, "learning_rate": 9.672900979470608e-05, "loss": 12.0406, "step": 19286 }, { "epoch": 1.050254232096547, "grad_norm": 0.5550502418984661, "learning_rate": 9.672019626802708e-05, "loss": 12.0973, "step": 19287 }, { "epoch": 1.0503086860931299, "grad_norm": 0.5527939947632422, "learning_rate": 9.671138276685238e-05, "loss": 12.1604, "step": 19288 }, { "epoch": 1.0503631400897129, "grad_norm": 0.524206438490076, "learning_rate": 9.670256929125053e-05, "loss": 12.1217, "step": 19289 }, { "epoch": 1.0504175940862959, "grad_norm": 0.56160257257029, "learning_rate": 9.669375584129005e-05, "loss": 12.0312, "step": 19290 }, { "epoch": 1.050472048082879, "grad_norm": 0.5204986254117848, "learning_rate": 9.668494241703945e-05, "loss": 12.0738, "step": 19291 }, { "epoch": 1.050526502079462, "grad_norm": 0.5324262785130692, "learning_rate": 9.667612901856732e-05, "loss": 11.7358, "step": 19292 }, { "epoch": 1.050580956076045, "grad_norm": 0.5772614596923383, "learning_rate": 9.666731564594222e-05, "loss": 12.0383, "step": 19293 }, { "epoch": 1.050635410072628, "grad_norm": 0.5660892623770573, "learning_rate": 9.665850229923258e-05, "loss": 12.0707, "step": 19294 }, { "epoch": 1.050689864069211, "grad_norm": 0.5317718569733886, "learning_rate": 9.664968897850695e-05, "loss": 12.0076, "step": 19295 }, { "epoch": 1.050744318065794, "grad_norm": 0.5517253525771574, "learning_rate": 9.664087568383394e-05, "loss": 12.115, "step": 19296 }, { "epoch": 1.050798772062377, "grad_norm": 0.5468316584293257, "learning_rate": 9.663206241528204e-05, "loss": 12.1009, "step": 19297 }, { "epoch": 1.05085322605896, "grad_norm": 0.539461242898282, "learning_rate": 9.662324917291979e-05, "loss": 12.1092, "step": 19298 }, { "epoch": 1.050907680055543, "grad_norm": 0.4899975306952434, "learning_rate": 9.661443595681573e-05, "loss": 11.9413, "step": 19299 }, { "epoch": 1.050962134052126, "grad_norm": 0.5716632962528633, "learning_rate": 9.660562276703838e-05, "loss": 12.0186, "step": 19300 }, { "epoch": 1.051016588048709, "grad_norm": 0.531144636239472, "learning_rate": 9.659680960365626e-05, "loss": 11.9862, "step": 19301 }, { "epoch": 1.051071042045292, "grad_norm": 0.5140450260765325, "learning_rate": 9.658799646673793e-05, "loss": 12.1339, "step": 19302 }, { "epoch": 1.0511254960418752, "grad_norm": 0.5405131973007933, "learning_rate": 9.657918335635194e-05, "loss": 12.002, "step": 19303 }, { "epoch": 1.0511799500384582, "grad_norm": 0.5421925293191863, "learning_rate": 9.657037027256676e-05, "loss": 12.0472, "step": 19304 }, { "epoch": 1.0512344040350412, "grad_norm": 0.5010776098536464, "learning_rate": 9.656155721545094e-05, "loss": 12.0539, "step": 19305 }, { "epoch": 1.0512888580316242, "grad_norm": 0.5373765483696279, "learning_rate": 9.655274418507307e-05, "loss": 12.0694, "step": 19306 }, { "epoch": 1.0513433120282072, "grad_norm": 0.5229203708756343, "learning_rate": 9.654393118150159e-05, "loss": 11.9288, "step": 19307 }, { "epoch": 1.0513977660247902, "grad_norm": 0.523050323587943, "learning_rate": 9.653511820480511e-05, "loss": 12.0839, "step": 19308 }, { "epoch": 1.0514522200213732, "grad_norm": 0.52587795112077, "learning_rate": 9.652630525505213e-05, "loss": 12.0395, "step": 19309 }, { "epoch": 1.0515066740179562, "grad_norm": 0.5440393969364299, "learning_rate": 9.651749233231117e-05, "loss": 12.0127, "step": 19310 }, { "epoch": 1.0515611280145392, "grad_norm": 0.5877520952516384, "learning_rate": 9.65086794366508e-05, "loss": 12.1713, "step": 19311 }, { "epoch": 1.0516155820111222, "grad_norm": 0.5626253843201025, "learning_rate": 9.649986656813951e-05, "loss": 12.0419, "step": 19312 }, { "epoch": 1.0516700360077051, "grad_norm": 0.560972630053483, "learning_rate": 9.649105372684586e-05, "loss": 11.9527, "step": 19313 }, { "epoch": 1.0517244900042884, "grad_norm": 0.5201536054810402, "learning_rate": 9.648224091283835e-05, "loss": 11.8398, "step": 19314 }, { "epoch": 1.0517789440008714, "grad_norm": 0.5708413242075481, "learning_rate": 9.647342812618553e-05, "loss": 12.0981, "step": 19315 }, { "epoch": 1.0518333979974543, "grad_norm": 0.538158698770974, "learning_rate": 9.646461536695591e-05, "loss": 11.9265, "step": 19316 }, { "epoch": 1.0518878519940373, "grad_norm": 0.5379127553390638, "learning_rate": 9.645580263521805e-05, "loss": 12.1438, "step": 19317 }, { "epoch": 1.0519423059906203, "grad_norm": 0.5904144811653973, "learning_rate": 9.644698993104044e-05, "loss": 11.9552, "step": 19318 }, { "epoch": 1.0519967599872033, "grad_norm": 0.5484578526745109, "learning_rate": 9.643817725449163e-05, "loss": 12.0061, "step": 19319 }, { "epoch": 1.0520512139837863, "grad_norm": 0.5452909707857968, "learning_rate": 9.642936460564019e-05, "loss": 12.0431, "step": 19320 }, { "epoch": 1.0521056679803693, "grad_norm": 0.5896543186751604, "learning_rate": 9.642055198455457e-05, "loss": 12.1793, "step": 19321 }, { "epoch": 1.0521601219769523, "grad_norm": 0.5424352016462574, "learning_rate": 9.641173939130337e-05, "loss": 11.9724, "step": 19322 }, { "epoch": 1.0522145759735353, "grad_norm": 0.548154158162294, "learning_rate": 9.640292682595508e-05, "loss": 11.9925, "step": 19323 }, { "epoch": 1.0522690299701183, "grad_norm": 0.5113758994292995, "learning_rate": 9.639411428857823e-05, "loss": 12.145, "step": 19324 }, { "epoch": 1.0523234839667013, "grad_norm": 0.5444394344892941, "learning_rate": 9.638530177924136e-05, "loss": 12.011, "step": 19325 }, { "epoch": 1.0523779379632845, "grad_norm": 0.5504481687682757, "learning_rate": 9.637648929801297e-05, "loss": 11.9842, "step": 19326 }, { "epoch": 1.0524323919598675, "grad_norm": 0.5950719535763734, "learning_rate": 9.636767684496162e-05, "loss": 12.0425, "step": 19327 }, { "epoch": 1.0524868459564505, "grad_norm": 0.5807610266139246, "learning_rate": 9.635886442015582e-05, "loss": 12.0858, "step": 19328 }, { "epoch": 1.0525412999530335, "grad_norm": 0.5517142506957727, "learning_rate": 9.635005202366407e-05, "loss": 11.974, "step": 19329 }, { "epoch": 1.0525957539496165, "grad_norm": 0.6235125416785774, "learning_rate": 9.634123965555495e-05, "loss": 12.0126, "step": 19330 }, { "epoch": 1.0526502079461995, "grad_norm": 0.5510346855094569, "learning_rate": 9.633242731589698e-05, "loss": 12.0586, "step": 19331 }, { "epoch": 1.0527046619427824, "grad_norm": 0.5484858157775377, "learning_rate": 9.632361500475866e-05, "loss": 12.1546, "step": 19332 }, { "epoch": 1.0527591159393654, "grad_norm": 0.5569662093016531, "learning_rate": 9.631480272220855e-05, "loss": 12.0479, "step": 19333 }, { "epoch": 1.0528135699359484, "grad_norm": 0.5913187816809812, "learning_rate": 9.630599046831513e-05, "loss": 12.048, "step": 19334 }, { "epoch": 1.0528680239325314, "grad_norm": 0.5415003726459723, "learning_rate": 9.629717824314696e-05, "loss": 12.1657, "step": 19335 }, { "epoch": 1.0529224779291144, "grad_norm": 0.5880368853026926, "learning_rate": 9.628836604677253e-05, "loss": 11.9169, "step": 19336 }, { "epoch": 1.0529769319256976, "grad_norm": 0.6214821322491036, "learning_rate": 9.627955387926041e-05, "loss": 11.9572, "step": 19337 }, { "epoch": 1.0530313859222806, "grad_norm": 0.5374898237994633, "learning_rate": 9.627074174067909e-05, "loss": 12.0442, "step": 19338 }, { "epoch": 1.0530858399188636, "grad_norm": 0.5723645363983014, "learning_rate": 9.62619296310971e-05, "loss": 12.2039, "step": 19339 }, { "epoch": 1.0531402939154466, "grad_norm": 0.5839935285049328, "learning_rate": 9.625311755058296e-05, "loss": 11.9861, "step": 19340 }, { "epoch": 1.0531947479120296, "grad_norm": 0.5062275376127757, "learning_rate": 9.624430549920523e-05, "loss": 11.994, "step": 19341 }, { "epoch": 1.0532492019086126, "grad_norm": 0.5318769916007122, "learning_rate": 9.62354934770324e-05, "loss": 12.105, "step": 19342 }, { "epoch": 1.0533036559051956, "grad_norm": 0.5984575688586973, "learning_rate": 9.622668148413306e-05, "loss": 12.076, "step": 19343 }, { "epoch": 1.0533581099017786, "grad_norm": 0.5588173460010458, "learning_rate": 9.621786952057561e-05, "loss": 11.9369, "step": 19344 }, { "epoch": 1.0534125638983616, "grad_norm": 0.5773820580009359, "learning_rate": 9.620905758642867e-05, "loss": 12.0063, "step": 19345 }, { "epoch": 1.0534670178949446, "grad_norm": 0.5589950202886801, "learning_rate": 9.620024568176071e-05, "loss": 12.0991, "step": 19346 }, { "epoch": 1.0535214718915276, "grad_norm": 0.6144528426187391, "learning_rate": 9.61914338066403e-05, "loss": 11.9937, "step": 19347 }, { "epoch": 1.0535759258881106, "grad_norm": 0.5543121445619258, "learning_rate": 9.618262196113594e-05, "loss": 12.1332, "step": 19348 }, { "epoch": 1.0536303798846938, "grad_norm": 0.5227719779637813, "learning_rate": 9.617381014531614e-05, "loss": 12.1155, "step": 19349 }, { "epoch": 1.0536848338812768, "grad_norm": 0.47408434348923856, "learning_rate": 9.616499835924943e-05, "loss": 12.0586, "step": 19350 }, { "epoch": 1.0537392878778598, "grad_norm": 0.5539712618336022, "learning_rate": 9.615618660300434e-05, "loss": 12.0447, "step": 19351 }, { "epoch": 1.0537937418744427, "grad_norm": 0.5532620334707203, "learning_rate": 9.614737487664938e-05, "loss": 12.1215, "step": 19352 }, { "epoch": 1.0538481958710257, "grad_norm": 0.4965213600162703, "learning_rate": 9.613856318025308e-05, "loss": 11.9894, "step": 19353 }, { "epoch": 1.0539026498676087, "grad_norm": 0.5440742933537539, "learning_rate": 9.612975151388401e-05, "loss": 12.0088, "step": 19354 }, { "epoch": 1.0539571038641917, "grad_norm": 0.6165878924064676, "learning_rate": 9.61209398776106e-05, "loss": 11.9843, "step": 19355 }, { "epoch": 1.0540115578607747, "grad_norm": 0.5577108598869567, "learning_rate": 9.61121282715014e-05, "loss": 12.0244, "step": 19356 }, { "epoch": 1.0540660118573577, "grad_norm": 0.5275445677344449, "learning_rate": 9.610331669562495e-05, "loss": 12.0602, "step": 19357 }, { "epoch": 1.0541204658539407, "grad_norm": 0.5336427825182262, "learning_rate": 9.609450515004977e-05, "loss": 12.1088, "step": 19358 }, { "epoch": 1.0541749198505237, "grad_norm": 0.5544962996409228, "learning_rate": 9.608569363484436e-05, "loss": 12.0743, "step": 19359 }, { "epoch": 1.0542293738471067, "grad_norm": 0.5712553238000525, "learning_rate": 9.607688215007728e-05, "loss": 12.0085, "step": 19360 }, { "epoch": 1.05428382784369, "grad_norm": 0.5872483044221241, "learning_rate": 9.6068070695817e-05, "loss": 12.0825, "step": 19361 }, { "epoch": 1.054338281840273, "grad_norm": 0.5301262680488378, "learning_rate": 9.605925927213207e-05, "loss": 12.0546, "step": 19362 }, { "epoch": 1.0543927358368559, "grad_norm": 0.5941182422544317, "learning_rate": 9.605044787909098e-05, "loss": 11.9446, "step": 19363 }, { "epoch": 1.0544471898334389, "grad_norm": 0.5981957575214851, "learning_rate": 9.604163651676232e-05, "loss": 11.9136, "step": 19364 }, { "epoch": 1.0545016438300219, "grad_norm": 0.5661166063853473, "learning_rate": 9.603282518521453e-05, "loss": 12.0079, "step": 19365 }, { "epoch": 1.0545560978266049, "grad_norm": 0.5799740773054626, "learning_rate": 9.602401388451615e-05, "loss": 11.96, "step": 19366 }, { "epoch": 1.0546105518231879, "grad_norm": 0.5895977707861102, "learning_rate": 9.60152026147357e-05, "loss": 12.0868, "step": 19367 }, { "epoch": 1.0546650058197709, "grad_norm": 0.5138707350489121, "learning_rate": 9.60063913759417e-05, "loss": 12.1091, "step": 19368 }, { "epoch": 1.0547194598163538, "grad_norm": 0.5883943166266282, "learning_rate": 9.599758016820269e-05, "loss": 11.9937, "step": 19369 }, { "epoch": 1.0547739138129368, "grad_norm": 0.5673055198369863, "learning_rate": 9.598876899158715e-05, "loss": 12.0495, "step": 19370 }, { "epoch": 1.0548283678095198, "grad_norm": 0.5766062835107026, "learning_rate": 9.597995784616363e-05, "loss": 12.0353, "step": 19371 }, { "epoch": 1.0548828218061028, "grad_norm": 0.6058495704831871, "learning_rate": 9.597114673200062e-05, "loss": 12.0471, "step": 19372 }, { "epoch": 1.054937275802686, "grad_norm": 0.5255673156686416, "learning_rate": 9.596233564916665e-05, "loss": 12.0796, "step": 19373 }, { "epoch": 1.054991729799269, "grad_norm": 0.6016153150932537, "learning_rate": 9.595352459773025e-05, "loss": 12.1089, "step": 19374 }, { "epoch": 1.055046183795852, "grad_norm": 0.556265804799959, "learning_rate": 9.594471357775993e-05, "loss": 12.0073, "step": 19375 }, { "epoch": 1.055100637792435, "grad_norm": 0.5608704283257167, "learning_rate": 9.593590258932417e-05, "loss": 11.8563, "step": 19376 }, { "epoch": 1.055155091789018, "grad_norm": 0.5781513393423386, "learning_rate": 9.592709163249153e-05, "loss": 12.1517, "step": 19377 }, { "epoch": 1.055209545785601, "grad_norm": 0.5657138345497488, "learning_rate": 9.591828070733047e-05, "loss": 12.0803, "step": 19378 }, { "epoch": 1.055263999782184, "grad_norm": 0.4916113789165632, "learning_rate": 9.590946981390958e-05, "loss": 12.1075, "step": 19379 }, { "epoch": 1.055318453778767, "grad_norm": 0.5017788759661065, "learning_rate": 9.590065895229732e-05, "loss": 12.0225, "step": 19380 }, { "epoch": 1.05537290777535, "grad_norm": 0.6423824315364451, "learning_rate": 9.589184812256225e-05, "loss": 12.0609, "step": 19381 }, { "epoch": 1.055427361771933, "grad_norm": 0.5300731974853954, "learning_rate": 9.588303732477283e-05, "loss": 12.0407, "step": 19382 }, { "epoch": 1.055481815768516, "grad_norm": 0.533408299576154, "learning_rate": 9.587422655899762e-05, "loss": 12.114, "step": 19383 }, { "epoch": 1.0555362697650992, "grad_norm": 0.565065946695608, "learning_rate": 9.586541582530514e-05, "loss": 12.0338, "step": 19384 }, { "epoch": 1.0555907237616822, "grad_norm": 0.5949280397479372, "learning_rate": 9.585660512376384e-05, "loss": 11.9603, "step": 19385 }, { "epoch": 1.0556451777582652, "grad_norm": 0.5447417047295078, "learning_rate": 9.58477944544423e-05, "loss": 11.9043, "step": 19386 }, { "epoch": 1.0556996317548482, "grad_norm": 0.6134685780708565, "learning_rate": 9.583898381740898e-05, "loss": 12.0248, "step": 19387 }, { "epoch": 1.0557540857514311, "grad_norm": 0.5944890783600187, "learning_rate": 9.583017321273243e-05, "loss": 11.9876, "step": 19388 }, { "epoch": 1.0558085397480141, "grad_norm": 0.5340596628731121, "learning_rate": 9.582136264048114e-05, "loss": 12.1036, "step": 19389 }, { "epoch": 1.0558629937445971, "grad_norm": 0.6800523990437837, "learning_rate": 9.581255210072364e-05, "loss": 12.0343, "step": 19390 }, { "epoch": 1.0559174477411801, "grad_norm": 0.5345874595919592, "learning_rate": 9.580374159352845e-05, "loss": 12.071, "step": 19391 }, { "epoch": 1.0559719017377631, "grad_norm": 0.48246863244610083, "learning_rate": 9.579493111896406e-05, "loss": 11.9095, "step": 19392 }, { "epoch": 1.0560263557343461, "grad_norm": 0.564840799839841, "learning_rate": 9.5786120677099e-05, "loss": 11.8749, "step": 19393 }, { "epoch": 1.056080809730929, "grad_norm": 0.5859494509249186, "learning_rate": 9.577731026800179e-05, "loss": 12.0816, "step": 19394 }, { "epoch": 1.056135263727512, "grad_norm": 0.5356116104743887, "learning_rate": 9.57684998917409e-05, "loss": 11.9958, "step": 19395 }, { "epoch": 1.0561897177240953, "grad_norm": 0.602266790571894, "learning_rate": 9.575968954838487e-05, "loss": 12.1512, "step": 19396 }, { "epoch": 1.0562441717206783, "grad_norm": 0.6157757269371643, "learning_rate": 9.57508792380022e-05, "loss": 12.1429, "step": 19397 }, { "epoch": 1.0562986257172613, "grad_norm": 0.5980005309795724, "learning_rate": 9.57420689606614e-05, "loss": 12.0587, "step": 19398 }, { "epoch": 1.0563530797138443, "grad_norm": 0.5908034979075625, "learning_rate": 9.5733258716431e-05, "loss": 12.1337, "step": 19399 }, { "epoch": 1.0564075337104273, "grad_norm": 0.5343902534124586, "learning_rate": 9.572444850537948e-05, "loss": 11.9634, "step": 19400 }, { "epoch": 1.0564619877070103, "grad_norm": 0.5842238118117028, "learning_rate": 9.571563832757536e-05, "loss": 12.0509, "step": 19401 }, { "epoch": 1.0565164417035933, "grad_norm": 0.61275874507477, "learning_rate": 9.570682818308715e-05, "loss": 12.0451, "step": 19402 }, { "epoch": 1.0565708957001763, "grad_norm": 0.5382671440730098, "learning_rate": 9.56980180719834e-05, "loss": 12.1103, "step": 19403 }, { "epoch": 1.0566253496967593, "grad_norm": 0.5133640065547999, "learning_rate": 9.568920799433261e-05, "loss": 11.9877, "step": 19404 }, { "epoch": 1.0566798036933422, "grad_norm": 0.5625235678126909, "learning_rate": 9.568039795020319e-05, "loss": 12.07, "step": 19405 }, { "epoch": 1.0567342576899252, "grad_norm": 0.6284570348095425, "learning_rate": 9.567158793966374e-05, "loss": 12.1412, "step": 19406 }, { "epoch": 1.0567887116865085, "grad_norm": 0.576328893838415, "learning_rate": 9.566277796278276e-05, "loss": 11.9983, "step": 19407 }, { "epoch": 1.0568431656830914, "grad_norm": 0.5531351910635469, "learning_rate": 9.565396801962874e-05, "loss": 12.0894, "step": 19408 }, { "epoch": 1.0568976196796744, "grad_norm": 0.5862286076121059, "learning_rate": 9.56451581102702e-05, "loss": 12.0713, "step": 19409 }, { "epoch": 1.0569520736762574, "grad_norm": 0.49995967750380066, "learning_rate": 9.563634823477563e-05, "loss": 12.0698, "step": 19410 }, { "epoch": 1.0570065276728404, "grad_norm": 0.5108491936131714, "learning_rate": 9.562753839321355e-05, "loss": 12.0247, "step": 19411 }, { "epoch": 1.0570609816694234, "grad_norm": 0.5810846071878304, "learning_rate": 9.561872858565245e-05, "loss": 12.1781, "step": 19412 }, { "epoch": 1.0571154356660064, "grad_norm": 0.5720799743612933, "learning_rate": 9.560991881216088e-05, "loss": 11.911, "step": 19413 }, { "epoch": 1.0571698896625894, "grad_norm": 0.5087805100179055, "learning_rate": 9.560110907280734e-05, "loss": 11.9394, "step": 19414 }, { "epoch": 1.0572243436591724, "grad_norm": 0.5364730959626036, "learning_rate": 9.559229936766028e-05, "loss": 12.0287, "step": 19415 }, { "epoch": 1.0572787976557554, "grad_norm": 0.5284840458321186, "learning_rate": 9.558348969678822e-05, "loss": 11.9812, "step": 19416 }, { "epoch": 1.0573332516523384, "grad_norm": 0.5570402363204487, "learning_rate": 9.55746800602597e-05, "loss": 12.0449, "step": 19417 }, { "epoch": 1.0573877056489214, "grad_norm": 0.6044717380725996, "learning_rate": 9.556587045814321e-05, "loss": 12.1534, "step": 19418 }, { "epoch": 1.0574421596455046, "grad_norm": 0.5526742651145066, "learning_rate": 9.555706089050727e-05, "loss": 12.1023, "step": 19419 }, { "epoch": 1.0574966136420876, "grad_norm": 0.5613785787803098, "learning_rate": 9.554825135742037e-05, "loss": 12.0306, "step": 19420 }, { "epoch": 1.0575510676386706, "grad_norm": 0.5217897384186181, "learning_rate": 9.553944185895098e-05, "loss": 11.9021, "step": 19421 }, { "epoch": 1.0576055216352536, "grad_norm": 0.5383082673278936, "learning_rate": 9.553063239516766e-05, "loss": 12.214, "step": 19422 }, { "epoch": 1.0576599756318366, "grad_norm": 0.5102142305765428, "learning_rate": 9.552182296613888e-05, "loss": 12.0834, "step": 19423 }, { "epoch": 1.0577144296284196, "grad_norm": 0.5830261124845366, "learning_rate": 9.55130135719332e-05, "loss": 12.0077, "step": 19424 }, { "epoch": 1.0577688836250025, "grad_norm": 0.5591125259697896, "learning_rate": 9.550420421261905e-05, "loss": 12.0461, "step": 19425 }, { "epoch": 1.0578233376215855, "grad_norm": 0.5584932430347865, "learning_rate": 9.549539488826497e-05, "loss": 12.0933, "step": 19426 }, { "epoch": 1.0578777916181685, "grad_norm": 0.606281460145651, "learning_rate": 9.548658559893942e-05, "loss": 12.1265, "step": 19427 }, { "epoch": 1.0579322456147515, "grad_norm": 0.5216271627094825, "learning_rate": 9.547777634471095e-05, "loss": 12.0587, "step": 19428 }, { "epoch": 1.0579866996113345, "grad_norm": 0.6148456316966616, "learning_rate": 9.546896712564807e-05, "loss": 12.3049, "step": 19429 }, { "epoch": 1.0580411536079175, "grad_norm": 0.5183933982480431, "learning_rate": 9.546015794181925e-05, "loss": 12.1366, "step": 19430 }, { "epoch": 1.0580956076045007, "grad_norm": 0.5372059927978043, "learning_rate": 9.5451348793293e-05, "loss": 11.9844, "step": 19431 }, { "epoch": 1.0581500616010837, "grad_norm": 0.5360406190489272, "learning_rate": 9.544253968013784e-05, "loss": 11.9618, "step": 19432 }, { "epoch": 1.0582045155976667, "grad_norm": 0.5348052474035279, "learning_rate": 9.543373060242225e-05, "loss": 12.1427, "step": 19433 }, { "epoch": 1.0582589695942497, "grad_norm": 0.5252043494377767, "learning_rate": 9.542492156021475e-05, "loss": 12.0432, "step": 19434 }, { "epoch": 1.0583134235908327, "grad_norm": 0.5148590361233946, "learning_rate": 9.541611255358381e-05, "loss": 11.8975, "step": 19435 }, { "epoch": 1.0583678775874157, "grad_norm": 0.5501657885838557, "learning_rate": 9.540730358259795e-05, "loss": 12.0395, "step": 19436 }, { "epoch": 1.0584223315839987, "grad_norm": 0.5489019089616707, "learning_rate": 9.539849464732566e-05, "loss": 12.0335, "step": 19437 }, { "epoch": 1.0584767855805817, "grad_norm": 0.6431949386899316, "learning_rate": 9.538968574783543e-05, "loss": 12.0754, "step": 19438 }, { "epoch": 1.0585312395771647, "grad_norm": 0.6179053784918723, "learning_rate": 9.53808768841958e-05, "loss": 12.1651, "step": 19439 }, { "epoch": 1.0585856935737477, "grad_norm": 0.529575356971183, "learning_rate": 9.537206805647524e-05, "loss": 11.9781, "step": 19440 }, { "epoch": 1.0586401475703306, "grad_norm": 0.5669109622144446, "learning_rate": 9.536325926474227e-05, "loss": 12.092, "step": 19441 }, { "epoch": 1.0586946015669136, "grad_norm": 0.5637886966334247, "learning_rate": 9.535445050906536e-05, "loss": 12.0313, "step": 19442 }, { "epoch": 1.0587490555634969, "grad_norm": 0.5354476191036248, "learning_rate": 9.534564178951302e-05, "loss": 11.9178, "step": 19443 }, { "epoch": 1.0588035095600798, "grad_norm": 0.5809895908083332, "learning_rate": 9.533683310615378e-05, "loss": 12.1999, "step": 19444 }, { "epoch": 1.0588579635566628, "grad_norm": 0.5156859117911462, "learning_rate": 9.532802445905608e-05, "loss": 12.0573, "step": 19445 }, { "epoch": 1.0589124175532458, "grad_norm": 0.524969301738632, "learning_rate": 9.531921584828845e-05, "loss": 12.1403, "step": 19446 }, { "epoch": 1.0589668715498288, "grad_norm": 0.5506294772032159, "learning_rate": 9.531040727391938e-05, "loss": 11.8896, "step": 19447 }, { "epoch": 1.0590213255464118, "grad_norm": 0.515800456877427, "learning_rate": 9.530159873601738e-05, "loss": 12.0135, "step": 19448 }, { "epoch": 1.0590757795429948, "grad_norm": 0.5434180280161715, "learning_rate": 9.529279023465089e-05, "loss": 12.0846, "step": 19449 }, { "epoch": 1.0591302335395778, "grad_norm": 0.49115390638875817, "learning_rate": 9.528398176988849e-05, "loss": 11.9628, "step": 19450 }, { "epoch": 1.0591846875361608, "grad_norm": 0.5960538189920022, "learning_rate": 9.527517334179864e-05, "loss": 12.1663, "step": 19451 }, { "epoch": 1.0592391415327438, "grad_norm": 0.6018235777881578, "learning_rate": 9.526636495044984e-05, "loss": 12.0909, "step": 19452 }, { "epoch": 1.0592935955293268, "grad_norm": 0.5492354738252602, "learning_rate": 9.525755659591057e-05, "loss": 12.0349, "step": 19453 }, { "epoch": 1.05934804952591, "grad_norm": 0.5318644500366964, "learning_rate": 9.524874827824936e-05, "loss": 12.1293, "step": 19454 }, { "epoch": 1.059402503522493, "grad_norm": 0.5377986794027186, "learning_rate": 9.523993999753466e-05, "loss": 12.0167, "step": 19455 }, { "epoch": 1.059456957519076, "grad_norm": 0.5683297293895401, "learning_rate": 9.523113175383498e-05, "loss": 12.1201, "step": 19456 }, { "epoch": 1.059511411515659, "grad_norm": 0.5281439523592487, "learning_rate": 9.522232354721882e-05, "loss": 12.0985, "step": 19457 }, { "epoch": 1.059565865512242, "grad_norm": 0.5574099817183141, "learning_rate": 9.521351537775467e-05, "loss": 12.0668, "step": 19458 }, { "epoch": 1.059620319508825, "grad_norm": 0.5481013815660997, "learning_rate": 9.520470724551104e-05, "loss": 12.0165, "step": 19459 }, { "epoch": 1.059674773505408, "grad_norm": 0.5258555008954695, "learning_rate": 9.51958991505564e-05, "loss": 12.1447, "step": 19460 }, { "epoch": 1.059729227501991, "grad_norm": 0.5913586523355205, "learning_rate": 9.518709109295922e-05, "loss": 11.9954, "step": 19461 }, { "epoch": 1.059783681498574, "grad_norm": 0.5426523361113232, "learning_rate": 9.517828307278807e-05, "loss": 12.1235, "step": 19462 }, { "epoch": 1.059838135495157, "grad_norm": 0.6265948071629064, "learning_rate": 9.51694750901114e-05, "loss": 12.0963, "step": 19463 }, { "epoch": 1.05989258949174, "grad_norm": 0.5890047993985151, "learning_rate": 9.516066714499772e-05, "loss": 12.1072, "step": 19464 }, { "epoch": 1.059947043488323, "grad_norm": 0.5556535862287739, "learning_rate": 9.515185923751547e-05, "loss": 12.0839, "step": 19465 }, { "epoch": 1.0600014974849061, "grad_norm": 0.5251302373355244, "learning_rate": 9.514305136773318e-05, "loss": 12.1077, "step": 19466 }, { "epoch": 1.0600559514814891, "grad_norm": 0.5924405189573182, "learning_rate": 9.513424353571934e-05, "loss": 12.1408, "step": 19467 }, { "epoch": 1.0601104054780721, "grad_norm": 0.533802556666249, "learning_rate": 9.512543574154245e-05, "loss": 11.9806, "step": 19468 }, { "epoch": 1.060164859474655, "grad_norm": 0.5166721009870676, "learning_rate": 9.511662798527096e-05, "loss": 12.1354, "step": 19469 }, { "epoch": 1.060219313471238, "grad_norm": 0.5407636437450513, "learning_rate": 9.510782026697343e-05, "loss": 11.9844, "step": 19470 }, { "epoch": 1.060273767467821, "grad_norm": 0.5331792045706529, "learning_rate": 9.509901258671827e-05, "loss": 12.0391, "step": 19471 }, { "epoch": 1.060328221464404, "grad_norm": 0.5508183535166898, "learning_rate": 9.5090204944574e-05, "loss": 12.0949, "step": 19472 }, { "epoch": 1.060382675460987, "grad_norm": 0.5688078405882937, "learning_rate": 9.508139734060915e-05, "loss": 12.086, "step": 19473 }, { "epoch": 1.06043712945757, "grad_norm": 0.5733624994938668, "learning_rate": 9.507258977489223e-05, "loss": 12.0644, "step": 19474 }, { "epoch": 1.060491583454153, "grad_norm": 0.5443580440120191, "learning_rate": 9.506378224749163e-05, "loss": 12.0725, "step": 19475 }, { "epoch": 1.060546037450736, "grad_norm": 0.5061060765819786, "learning_rate": 9.505497475847586e-05, "loss": 12.0051, "step": 19476 }, { "epoch": 1.0606004914473193, "grad_norm": 0.571105398310208, "learning_rate": 9.504616730791345e-05, "loss": 12.1534, "step": 19477 }, { "epoch": 1.0606549454439023, "grad_norm": 0.5546744851328956, "learning_rate": 9.503735989587289e-05, "loss": 12.0144, "step": 19478 }, { "epoch": 1.0607093994404853, "grad_norm": 0.5460818596781322, "learning_rate": 9.502855252242264e-05, "loss": 12.0555, "step": 19479 }, { "epoch": 1.0607638534370682, "grad_norm": 0.545806618163049, "learning_rate": 9.50197451876312e-05, "loss": 11.9989, "step": 19480 }, { "epoch": 1.0608183074336512, "grad_norm": 0.5690009421971667, "learning_rate": 9.501093789156706e-05, "loss": 12.1087, "step": 19481 }, { "epoch": 1.0608727614302342, "grad_norm": 0.5247933770362788, "learning_rate": 9.50021306342987e-05, "loss": 12.0453, "step": 19482 }, { "epoch": 1.0609272154268172, "grad_norm": 0.5369592685857938, "learning_rate": 9.49933234158946e-05, "loss": 11.8929, "step": 19483 }, { "epoch": 1.0609816694234002, "grad_norm": 0.5544839201165852, "learning_rate": 9.49845162364233e-05, "loss": 12.0165, "step": 19484 }, { "epoch": 1.0610361234199832, "grad_norm": 0.5717511692715508, "learning_rate": 9.497570909595322e-05, "loss": 12.03, "step": 19485 }, { "epoch": 1.0610905774165662, "grad_norm": 0.5651487710115926, "learning_rate": 9.496690199455286e-05, "loss": 12.047, "step": 19486 }, { "epoch": 1.0611450314131492, "grad_norm": 0.5318155068592443, "learning_rate": 9.49580949322907e-05, "loss": 12.0402, "step": 19487 }, { "epoch": 1.0611994854097322, "grad_norm": 0.5704015631935876, "learning_rate": 9.494928790923525e-05, "loss": 12.0697, "step": 19488 }, { "epoch": 1.0612539394063154, "grad_norm": 0.513769491953973, "learning_rate": 9.494048092545499e-05, "loss": 11.8972, "step": 19489 }, { "epoch": 1.0613083934028984, "grad_norm": 0.5308279098688354, "learning_rate": 9.49316739810184e-05, "loss": 11.9496, "step": 19490 }, { "epoch": 1.0613628473994814, "grad_norm": 0.5652421659166454, "learning_rate": 9.492286707599395e-05, "loss": 12.0402, "step": 19491 }, { "epoch": 1.0614173013960644, "grad_norm": 0.6012514551528869, "learning_rate": 9.491406021045016e-05, "loss": 12.0058, "step": 19492 }, { "epoch": 1.0614717553926474, "grad_norm": 0.6147753858557873, "learning_rate": 9.490525338445547e-05, "loss": 12.1222, "step": 19493 }, { "epoch": 1.0615262093892304, "grad_norm": 0.4778765331953231, "learning_rate": 9.489644659807842e-05, "loss": 12.003, "step": 19494 }, { "epoch": 1.0615806633858134, "grad_norm": 0.5490182527436342, "learning_rate": 9.488763985138742e-05, "loss": 12.0958, "step": 19495 }, { "epoch": 1.0616351173823964, "grad_norm": 0.6310603682268788, "learning_rate": 9.487883314445101e-05, "loss": 12.0279, "step": 19496 }, { "epoch": 1.0616895713789793, "grad_norm": 0.5287781068237357, "learning_rate": 9.487002647733763e-05, "loss": 11.9563, "step": 19497 }, { "epoch": 1.0617440253755623, "grad_norm": 0.5139150948204092, "learning_rate": 9.486121985011579e-05, "loss": 12.152, "step": 19498 }, { "epoch": 1.0617984793721453, "grad_norm": 0.537475871099728, "learning_rate": 9.485241326285397e-05, "loss": 11.9787, "step": 19499 }, { "epoch": 1.0618529333687283, "grad_norm": 0.5714777270599907, "learning_rate": 9.484360671562064e-05, "loss": 11.9879, "step": 19500 }, { "epoch": 1.0619073873653115, "grad_norm": 0.6072144152856551, "learning_rate": 9.483480020848431e-05, "loss": 12.0891, "step": 19501 }, { "epoch": 1.0619618413618945, "grad_norm": 0.5306079632416387, "learning_rate": 9.482599374151342e-05, "loss": 12.0473, "step": 19502 }, { "epoch": 1.0620162953584775, "grad_norm": 0.571602416194433, "learning_rate": 9.481718731477648e-05, "loss": 12.0993, "step": 19503 }, { "epoch": 1.0620707493550605, "grad_norm": 0.5274166829278563, "learning_rate": 9.480838092834196e-05, "loss": 12.0607, "step": 19504 }, { "epoch": 1.0621252033516435, "grad_norm": 0.5308149700387995, "learning_rate": 9.479957458227836e-05, "loss": 12.0445, "step": 19505 }, { "epoch": 1.0621796573482265, "grad_norm": 0.5719164937398165, "learning_rate": 9.479076827665413e-05, "loss": 12.2446, "step": 19506 }, { "epoch": 1.0622341113448095, "grad_norm": 0.5484840373867881, "learning_rate": 9.478196201153775e-05, "loss": 12.0868, "step": 19507 }, { "epoch": 1.0622885653413925, "grad_norm": 0.5174545724860756, "learning_rate": 9.47731557869977e-05, "loss": 12.0515, "step": 19508 }, { "epoch": 1.0623430193379755, "grad_norm": 0.5325860096749648, "learning_rate": 9.476434960310248e-05, "loss": 12.0681, "step": 19509 }, { "epoch": 1.0623974733345585, "grad_norm": 0.5766542534429969, "learning_rate": 9.475554345992052e-05, "loss": 12.1705, "step": 19510 }, { "epoch": 1.0624519273311415, "grad_norm": 0.6170470995755548, "learning_rate": 9.474673735752038e-05, "loss": 12.1246, "step": 19511 }, { "epoch": 1.0625063813277245, "grad_norm": 0.5433823077381589, "learning_rate": 9.473793129597047e-05, "loss": 12.1055, "step": 19512 }, { "epoch": 1.0625608353243077, "grad_norm": 0.5693997414772987, "learning_rate": 9.472912527533929e-05, "loss": 12.0046, "step": 19513 }, { "epoch": 1.0626152893208907, "grad_norm": 0.5397460210874416, "learning_rate": 9.472031929569533e-05, "loss": 12.1269, "step": 19514 }, { "epoch": 1.0626697433174737, "grad_norm": 0.6686676208401305, "learning_rate": 9.471151335710706e-05, "loss": 12.0209, "step": 19515 }, { "epoch": 1.0627241973140567, "grad_norm": 0.590749113523507, "learning_rate": 9.470270745964294e-05, "loss": 12.106, "step": 19516 }, { "epoch": 1.0627786513106396, "grad_norm": 0.5271313705140186, "learning_rate": 9.469390160337144e-05, "loss": 12.0992, "step": 19517 }, { "epoch": 1.0628331053072226, "grad_norm": 0.623477010706432, "learning_rate": 9.468509578836105e-05, "loss": 12.0731, "step": 19518 }, { "epoch": 1.0628875593038056, "grad_norm": 0.6143950276182006, "learning_rate": 9.467629001468025e-05, "loss": 12.1269, "step": 19519 }, { "epoch": 1.0629420133003886, "grad_norm": 0.5382089392313393, "learning_rate": 9.466748428239753e-05, "loss": 12.0799, "step": 19520 }, { "epoch": 1.0629964672969716, "grad_norm": 0.5141725536001891, "learning_rate": 9.465867859158131e-05, "loss": 12.1073, "step": 19521 }, { "epoch": 1.0630509212935546, "grad_norm": 0.5260665309132682, "learning_rate": 9.464987294230012e-05, "loss": 11.9465, "step": 19522 }, { "epoch": 1.0631053752901376, "grad_norm": 0.5607901580338629, "learning_rate": 9.464106733462242e-05, "loss": 11.952, "step": 19523 }, { "epoch": 1.0631598292867208, "grad_norm": 0.5205969784510579, "learning_rate": 9.463226176861668e-05, "loss": 12.0901, "step": 19524 }, { "epoch": 1.0632142832833038, "grad_norm": 0.5739424072770057, "learning_rate": 9.46234562443514e-05, "loss": 12.0407, "step": 19525 }, { "epoch": 1.0632687372798868, "grad_norm": 0.6787278281315261, "learning_rate": 9.461465076189499e-05, "loss": 12.1001, "step": 19526 }, { "epoch": 1.0633231912764698, "grad_norm": 0.541509223022238, "learning_rate": 9.460584532131596e-05, "loss": 12.0837, "step": 19527 }, { "epoch": 1.0633776452730528, "grad_norm": 0.5348501043810973, "learning_rate": 9.45970399226828e-05, "loss": 12.1429, "step": 19528 }, { "epoch": 1.0634320992696358, "grad_norm": 0.5568295755065166, "learning_rate": 9.458823456606394e-05, "loss": 11.994, "step": 19529 }, { "epoch": 1.0634865532662188, "grad_norm": 0.5496086327124223, "learning_rate": 9.457942925152788e-05, "loss": 12.0708, "step": 19530 }, { "epoch": 1.0635410072628018, "grad_norm": 0.4676056507372799, "learning_rate": 9.45706239791431e-05, "loss": 11.9249, "step": 19531 }, { "epoch": 1.0635954612593848, "grad_norm": 0.6022564596587495, "learning_rate": 9.456181874897803e-05, "loss": 12.0366, "step": 19532 }, { "epoch": 1.0636499152559677, "grad_norm": 0.5478683764357231, "learning_rate": 9.455301356110119e-05, "loss": 12.0423, "step": 19533 }, { "epoch": 1.0637043692525507, "grad_norm": 0.6722146474969767, "learning_rate": 9.454420841558103e-05, "loss": 12.206, "step": 19534 }, { "epoch": 1.0637588232491337, "grad_norm": 0.6034418312173684, "learning_rate": 9.453540331248607e-05, "loss": 12.155, "step": 19535 }, { "epoch": 1.063813277245717, "grad_norm": 0.5906543027720849, "learning_rate": 9.452659825188466e-05, "loss": 12.0325, "step": 19536 }, { "epoch": 1.0638677312423, "grad_norm": 0.6082275877099078, "learning_rate": 9.451779323384535e-05, "loss": 12.0325, "step": 19537 }, { "epoch": 1.063922185238883, "grad_norm": 0.5894290320323842, "learning_rate": 9.450898825843662e-05, "loss": 12.0888, "step": 19538 }, { "epoch": 1.063976639235466, "grad_norm": 0.5957966361615813, "learning_rate": 9.45001833257269e-05, "loss": 12.115, "step": 19539 }, { "epoch": 1.064031093232049, "grad_norm": 0.5244965725214917, "learning_rate": 9.449137843578469e-05, "loss": 12.0886, "step": 19540 }, { "epoch": 1.064085547228632, "grad_norm": 0.5694397091625615, "learning_rate": 9.448257358867845e-05, "loss": 12.0047, "step": 19541 }, { "epoch": 1.064140001225215, "grad_norm": 0.5498986514812435, "learning_rate": 9.447376878447662e-05, "loss": 12.0668, "step": 19542 }, { "epoch": 1.064194455221798, "grad_norm": 0.5324227496696069, "learning_rate": 9.44649640232477e-05, "loss": 12.0771, "step": 19543 }, { "epoch": 1.064248909218381, "grad_norm": 0.5567550837917793, "learning_rate": 9.445615930506014e-05, "loss": 12.1119, "step": 19544 }, { "epoch": 1.0643033632149639, "grad_norm": 0.4977052343332378, "learning_rate": 9.444735462998248e-05, "loss": 12.0982, "step": 19545 }, { "epoch": 1.0643578172115469, "grad_norm": 0.5370782475408601, "learning_rate": 9.443854999808305e-05, "loss": 12.0679, "step": 19546 }, { "epoch": 1.06441227120813, "grad_norm": 0.5201187509950764, "learning_rate": 9.442974540943039e-05, "loss": 12.0481, "step": 19547 }, { "epoch": 1.064466725204713, "grad_norm": 0.5978019918197744, "learning_rate": 9.442094086409298e-05, "loss": 12.0072, "step": 19548 }, { "epoch": 1.064521179201296, "grad_norm": 0.491584250591522, "learning_rate": 9.441213636213928e-05, "loss": 12.0287, "step": 19549 }, { "epoch": 1.064575633197879, "grad_norm": 0.5829670045537616, "learning_rate": 9.440333190363772e-05, "loss": 12.1143, "step": 19550 }, { "epoch": 1.064630087194462, "grad_norm": 0.4979406577192298, "learning_rate": 9.439452748865682e-05, "loss": 11.9809, "step": 19551 }, { "epoch": 1.064684541191045, "grad_norm": 0.5641474829559183, "learning_rate": 9.438572311726499e-05, "loss": 11.993, "step": 19552 }, { "epoch": 1.064738995187628, "grad_norm": 0.509601444512635, "learning_rate": 9.437691878953074e-05, "loss": 12.0681, "step": 19553 }, { "epoch": 1.064793449184211, "grad_norm": 0.5973256500028669, "learning_rate": 9.436811450552248e-05, "loss": 12.0173, "step": 19554 }, { "epoch": 1.064847903180794, "grad_norm": 0.48301332542954895, "learning_rate": 9.435931026530876e-05, "loss": 12.0674, "step": 19555 }, { "epoch": 1.064902357177377, "grad_norm": 0.6157513310070719, "learning_rate": 9.435050606895796e-05, "loss": 12.0662, "step": 19556 }, { "epoch": 1.06495681117396, "grad_norm": 0.5259273499843691, "learning_rate": 9.434170191653856e-05, "loss": 12.1298, "step": 19557 }, { "epoch": 1.065011265170543, "grad_norm": 0.533012420826072, "learning_rate": 9.433289780811905e-05, "loss": 11.9811, "step": 19558 }, { "epoch": 1.0650657191671262, "grad_norm": 0.5566711277744718, "learning_rate": 9.432409374376783e-05, "loss": 12.0475, "step": 19559 }, { "epoch": 1.0651201731637092, "grad_norm": 0.5622517247526756, "learning_rate": 9.431528972355345e-05, "loss": 12.0318, "step": 19560 }, { "epoch": 1.0651746271602922, "grad_norm": 0.5727603606850494, "learning_rate": 9.430648574754433e-05, "loss": 11.9535, "step": 19561 }, { "epoch": 1.0652290811568752, "grad_norm": 0.5337964597823531, "learning_rate": 9.429768181580894e-05, "loss": 11.9784, "step": 19562 }, { "epoch": 1.0652835351534582, "grad_norm": 0.598288206977741, "learning_rate": 9.428887792841572e-05, "loss": 12.0688, "step": 19563 }, { "epoch": 1.0653379891500412, "grad_norm": 0.5484710879790106, "learning_rate": 9.428007408543314e-05, "loss": 12.0379, "step": 19564 }, { "epoch": 1.0653924431466242, "grad_norm": 0.6066171274342483, "learning_rate": 9.42712702869297e-05, "loss": 12.0907, "step": 19565 }, { "epoch": 1.0654468971432072, "grad_norm": 0.5275822158642726, "learning_rate": 9.426246653297379e-05, "loss": 12.0258, "step": 19566 }, { "epoch": 1.0655013511397902, "grad_norm": 0.5733642999382029, "learning_rate": 9.42536628236339e-05, "loss": 12.0058, "step": 19567 }, { "epoch": 1.0655558051363732, "grad_norm": 0.47405334283595374, "learning_rate": 9.42448591589785e-05, "loss": 12.043, "step": 19568 }, { "epoch": 1.0656102591329562, "grad_norm": 0.6533793324086261, "learning_rate": 9.423605553907603e-05, "loss": 12.2868, "step": 19569 }, { "epoch": 1.0656647131295394, "grad_norm": 0.5733888302247439, "learning_rate": 9.422725196399495e-05, "loss": 12.0235, "step": 19570 }, { "epoch": 1.0657191671261224, "grad_norm": 0.5723354138462762, "learning_rate": 9.421844843380375e-05, "loss": 12.1644, "step": 19571 }, { "epoch": 1.0657736211227054, "grad_norm": 0.5991701179514328, "learning_rate": 9.420964494857085e-05, "loss": 12.0841, "step": 19572 }, { "epoch": 1.0658280751192883, "grad_norm": 0.558710862217451, "learning_rate": 9.420084150836473e-05, "loss": 12.0514, "step": 19573 }, { "epoch": 1.0658825291158713, "grad_norm": 0.5469144969444083, "learning_rate": 9.419203811325383e-05, "loss": 12.2402, "step": 19574 }, { "epoch": 1.0659369831124543, "grad_norm": 0.5620895209488639, "learning_rate": 9.418323476330664e-05, "loss": 11.9929, "step": 19575 }, { "epoch": 1.0659914371090373, "grad_norm": 0.5949008351746472, "learning_rate": 9.417443145859159e-05, "loss": 12.1162, "step": 19576 }, { "epoch": 1.0660458911056203, "grad_norm": 0.6035968787730213, "learning_rate": 9.416562819917712e-05, "loss": 12.0362, "step": 19577 }, { "epoch": 1.0661003451022033, "grad_norm": 0.5390471506547698, "learning_rate": 9.415682498513169e-05, "loss": 12.1288, "step": 19578 }, { "epoch": 1.0661547990987863, "grad_norm": 0.5114014464809856, "learning_rate": 9.414802181652379e-05, "loss": 12.0689, "step": 19579 }, { "epoch": 1.0662092530953693, "grad_norm": 0.5633583605680194, "learning_rate": 9.413921869342185e-05, "loss": 12.0915, "step": 19580 }, { "epoch": 1.0662637070919523, "grad_norm": 0.5469194789493036, "learning_rate": 9.41304156158943e-05, "loss": 12.1314, "step": 19581 }, { "epoch": 1.0663181610885353, "grad_norm": 0.5300734025899072, "learning_rate": 9.412161258400966e-05, "loss": 12.055, "step": 19582 }, { "epoch": 1.0663726150851185, "grad_norm": 0.5632702110680303, "learning_rate": 9.411280959783633e-05, "loss": 12.0251, "step": 19583 }, { "epoch": 1.0664270690817015, "grad_norm": 0.5470144743292025, "learning_rate": 9.410400665744279e-05, "loss": 12.028, "step": 19584 }, { "epoch": 1.0664815230782845, "grad_norm": 0.6196978189107037, "learning_rate": 9.40952037628975e-05, "loss": 12.0663, "step": 19585 }, { "epoch": 1.0665359770748675, "grad_norm": 0.5865783159570598, "learning_rate": 9.408640091426887e-05, "loss": 12.1339, "step": 19586 }, { "epoch": 1.0665904310714505, "grad_norm": 0.5862795288555418, "learning_rate": 9.407759811162539e-05, "loss": 12.0186, "step": 19587 }, { "epoch": 1.0666448850680335, "grad_norm": 0.619000559758314, "learning_rate": 9.406879535503549e-05, "loss": 12.2063, "step": 19588 }, { "epoch": 1.0666993390646164, "grad_norm": 0.5430701053644917, "learning_rate": 9.405999264456763e-05, "loss": 12.0267, "step": 19589 }, { "epoch": 1.0667537930611994, "grad_norm": 0.5631622351629713, "learning_rate": 9.405118998029027e-05, "loss": 12.1148, "step": 19590 }, { "epoch": 1.0668082470577824, "grad_norm": 0.5454245133367549, "learning_rate": 9.404238736227186e-05, "loss": 12.12, "step": 19591 }, { "epoch": 1.0668627010543654, "grad_norm": 0.5889022062111674, "learning_rate": 9.403358479058081e-05, "loss": 12.0937, "step": 19592 }, { "epoch": 1.0669171550509484, "grad_norm": 0.5065637392389071, "learning_rate": 9.402478226528562e-05, "loss": 12.0159, "step": 19593 }, { "epoch": 1.0669716090475316, "grad_norm": 0.5709809758938452, "learning_rate": 9.401597978645474e-05, "loss": 12.0441, "step": 19594 }, { "epoch": 1.0670260630441146, "grad_norm": 0.5713043581230659, "learning_rate": 9.400717735415665e-05, "loss": 12.1235, "step": 19595 }, { "epoch": 1.0670805170406976, "grad_norm": 0.5593388188584824, "learning_rate": 9.399837496845969e-05, "loss": 12.026, "step": 19596 }, { "epoch": 1.0671349710372806, "grad_norm": 0.5719142697400577, "learning_rate": 9.398957262943238e-05, "loss": 12.0984, "step": 19597 }, { "epoch": 1.0671894250338636, "grad_norm": 0.557749710207554, "learning_rate": 9.398077033714317e-05, "loss": 12.1424, "step": 19598 }, { "epoch": 1.0672438790304466, "grad_norm": 0.5740652249237118, "learning_rate": 9.397196809166052e-05, "loss": 11.9395, "step": 19599 }, { "epoch": 1.0672983330270296, "grad_norm": 0.5598491479754933, "learning_rate": 9.396316589305283e-05, "loss": 12.0671, "step": 19600 }, { "epoch": 1.0673527870236126, "grad_norm": 0.5444547240942728, "learning_rate": 9.395436374138857e-05, "loss": 12.0203, "step": 19601 }, { "epoch": 1.0674072410201956, "grad_norm": 0.5634520180772729, "learning_rate": 9.39455616367362e-05, "loss": 12.0253, "step": 19602 }, { "epoch": 1.0674616950167786, "grad_norm": 0.5152166816141512, "learning_rate": 9.393675957916415e-05, "loss": 12.0868, "step": 19603 }, { "epoch": 1.0675161490133616, "grad_norm": 0.5936970924799284, "learning_rate": 9.392795756874088e-05, "loss": 12.1438, "step": 19604 }, { "epoch": 1.0675706030099446, "grad_norm": 0.6169697993657006, "learning_rate": 9.391915560553488e-05, "loss": 12.1226, "step": 19605 }, { "epoch": 1.0676250570065278, "grad_norm": 0.5925611242588401, "learning_rate": 9.391035368961449e-05, "loss": 11.9379, "step": 19606 }, { "epoch": 1.0676795110031108, "grad_norm": 0.53909222039229, "learning_rate": 9.390155182104823e-05, "loss": 11.9986, "step": 19607 }, { "epoch": 1.0677339649996938, "grad_norm": 0.532438590720537, "learning_rate": 9.38927499999045e-05, "loss": 11.9909, "step": 19608 }, { "epoch": 1.0677884189962767, "grad_norm": 0.4970209959955968, "learning_rate": 9.388394822625179e-05, "loss": 12.0488, "step": 19609 }, { "epoch": 1.0678428729928597, "grad_norm": 0.5998770171857966, "learning_rate": 9.387514650015851e-05, "loss": 12.19, "step": 19610 }, { "epoch": 1.0678973269894427, "grad_norm": 0.552797692326681, "learning_rate": 9.386634482169313e-05, "loss": 12.1439, "step": 19611 }, { "epoch": 1.0679517809860257, "grad_norm": 0.501143711536902, "learning_rate": 9.385754319092409e-05, "loss": 11.9222, "step": 19612 }, { "epoch": 1.0680062349826087, "grad_norm": 0.524867903996969, "learning_rate": 9.384874160791981e-05, "loss": 11.9817, "step": 19613 }, { "epoch": 1.0680606889791917, "grad_norm": 0.4905934833221118, "learning_rate": 9.383994007274875e-05, "loss": 12.0082, "step": 19614 }, { "epoch": 1.0681151429757747, "grad_norm": 0.5459065117734525, "learning_rate": 9.383113858547939e-05, "loss": 12.0647, "step": 19615 }, { "epoch": 1.0681695969723577, "grad_norm": 0.5203237520036577, "learning_rate": 9.382233714618008e-05, "loss": 11.8745, "step": 19616 }, { "epoch": 1.068224050968941, "grad_norm": 0.5411184863716206, "learning_rate": 9.381353575491933e-05, "loss": 12.0235, "step": 19617 }, { "epoch": 1.068278504965524, "grad_norm": 0.517806378865651, "learning_rate": 9.380473441176554e-05, "loss": 12.0839, "step": 19618 }, { "epoch": 1.068332958962107, "grad_norm": 0.5317189778110568, "learning_rate": 9.379593311678719e-05, "loss": 12.0236, "step": 19619 }, { "epoch": 1.0683874129586899, "grad_norm": 0.6585334192721659, "learning_rate": 9.378713187005271e-05, "loss": 12.1009, "step": 19620 }, { "epoch": 1.0684418669552729, "grad_norm": 0.5453745591164424, "learning_rate": 9.377833067163052e-05, "loss": 12.1599, "step": 19621 }, { "epoch": 1.0684963209518559, "grad_norm": 0.5446659967144174, "learning_rate": 9.376952952158908e-05, "loss": 12.0764, "step": 19622 }, { "epoch": 1.0685507749484389, "grad_norm": 0.6447871707801308, "learning_rate": 9.376072841999683e-05, "loss": 12.1909, "step": 19623 }, { "epoch": 1.0686052289450219, "grad_norm": 0.5668415135101286, "learning_rate": 9.37519273669222e-05, "loss": 11.9781, "step": 19624 }, { "epoch": 1.0686596829416048, "grad_norm": 0.5501112084761463, "learning_rate": 9.374312636243366e-05, "loss": 12.0072, "step": 19625 }, { "epoch": 1.0687141369381878, "grad_norm": 0.5165365292243039, "learning_rate": 9.373432540659958e-05, "loss": 11.9746, "step": 19626 }, { "epoch": 1.0687685909347708, "grad_norm": 0.5392483614694529, "learning_rate": 9.372552449948845e-05, "loss": 12.0356, "step": 19627 }, { "epoch": 1.0688230449313538, "grad_norm": 0.5047342672491422, "learning_rate": 9.37167236411687e-05, "loss": 12.137, "step": 19628 }, { "epoch": 1.068877498927937, "grad_norm": 0.511943413415665, "learning_rate": 9.370792283170874e-05, "loss": 12.0181, "step": 19629 }, { "epoch": 1.06893195292452, "grad_norm": 0.5675433984161167, "learning_rate": 9.369912207117702e-05, "loss": 12.0602, "step": 19630 }, { "epoch": 1.068986406921103, "grad_norm": 0.5173814895325556, "learning_rate": 9.3690321359642e-05, "loss": 12.0295, "step": 19631 }, { "epoch": 1.069040860917686, "grad_norm": 0.5028811051182804, "learning_rate": 9.368152069717209e-05, "loss": 11.9977, "step": 19632 }, { "epoch": 1.069095314914269, "grad_norm": 0.5410129930454582, "learning_rate": 9.367272008383575e-05, "loss": 12.0648, "step": 19633 }, { "epoch": 1.069149768910852, "grad_norm": 0.525174263316461, "learning_rate": 9.36639195197014e-05, "loss": 12.0833, "step": 19634 }, { "epoch": 1.069204222907435, "grad_norm": 0.5199187033738576, "learning_rate": 9.365511900483749e-05, "loss": 11.9703, "step": 19635 }, { "epoch": 1.069258676904018, "grad_norm": 0.5663549514936985, "learning_rate": 9.364631853931242e-05, "loss": 12.0634, "step": 19636 }, { "epoch": 1.069313130900601, "grad_norm": 0.539456448316862, "learning_rate": 9.363751812319463e-05, "loss": 12.0389, "step": 19637 }, { "epoch": 1.069367584897184, "grad_norm": 0.6009446430801567, "learning_rate": 9.362871775655259e-05, "loss": 12.1115, "step": 19638 }, { "epoch": 1.069422038893767, "grad_norm": 0.5332866736126232, "learning_rate": 9.36199174394547e-05, "loss": 12.0605, "step": 19639 }, { "epoch": 1.0694764928903502, "grad_norm": 0.4817335547024529, "learning_rate": 9.361111717196939e-05, "loss": 11.9135, "step": 19640 }, { "epoch": 1.0695309468869332, "grad_norm": 0.6048174574996448, "learning_rate": 9.360231695416509e-05, "loss": 11.8708, "step": 19641 }, { "epoch": 1.0695854008835162, "grad_norm": 0.6008411408181712, "learning_rate": 9.359351678611027e-05, "loss": 12.1587, "step": 19642 }, { "epoch": 1.0696398548800992, "grad_norm": 0.5591678637215574, "learning_rate": 9.358471666787334e-05, "loss": 12.0818, "step": 19643 }, { "epoch": 1.0696943088766822, "grad_norm": 0.5587664758147319, "learning_rate": 9.357591659952272e-05, "loss": 12.1245, "step": 19644 }, { "epoch": 1.0697487628732651, "grad_norm": 0.5643472269024349, "learning_rate": 9.356711658112685e-05, "loss": 12.2275, "step": 19645 }, { "epoch": 1.0698032168698481, "grad_norm": 0.5681908835197387, "learning_rate": 9.355831661275419e-05, "loss": 12.1445, "step": 19646 }, { "epoch": 1.0698576708664311, "grad_norm": 0.5659950063808108, "learning_rate": 9.354951669447312e-05, "loss": 12.0002, "step": 19647 }, { "epoch": 1.0699121248630141, "grad_norm": 0.5655190560505993, "learning_rate": 9.354071682635208e-05, "loss": 12.124, "step": 19648 }, { "epoch": 1.0699665788595971, "grad_norm": 0.5745424362482439, "learning_rate": 9.353191700845952e-05, "loss": 12.0382, "step": 19649 }, { "epoch": 1.07002103285618, "grad_norm": 0.559601469047529, "learning_rate": 9.352311724086385e-05, "loss": 12.0086, "step": 19650 }, { "epoch": 1.070075486852763, "grad_norm": 0.6435323007808978, "learning_rate": 9.351431752363351e-05, "loss": 12.0784, "step": 19651 }, { "epoch": 1.070129940849346, "grad_norm": 0.5525163137115525, "learning_rate": 9.35055178568369e-05, "loss": 12.0001, "step": 19652 }, { "epoch": 1.0701843948459293, "grad_norm": 0.6056573547042106, "learning_rate": 9.34967182405425e-05, "loss": 12.1013, "step": 19653 }, { "epoch": 1.0702388488425123, "grad_norm": 0.5763798225266978, "learning_rate": 9.34879186748187e-05, "loss": 11.9068, "step": 19654 }, { "epoch": 1.0702933028390953, "grad_norm": 0.5364143506435711, "learning_rate": 9.347911915973394e-05, "loss": 11.9713, "step": 19655 }, { "epoch": 1.0703477568356783, "grad_norm": 0.5208839549174432, "learning_rate": 9.347031969535669e-05, "loss": 12.0338, "step": 19656 }, { "epoch": 1.0704022108322613, "grad_norm": 0.5959918016455371, "learning_rate": 9.346152028175527e-05, "loss": 12.0198, "step": 19657 }, { "epoch": 1.0704566648288443, "grad_norm": 0.5705087335878948, "learning_rate": 9.345272091899818e-05, "loss": 11.9041, "step": 19658 }, { "epoch": 1.0705111188254273, "grad_norm": 0.5065772424078389, "learning_rate": 9.344392160715383e-05, "loss": 12.0587, "step": 19659 }, { "epoch": 1.0705655728220103, "grad_norm": 0.54577926969694, "learning_rate": 9.343512234629064e-05, "loss": 12.167, "step": 19660 }, { "epoch": 1.0706200268185933, "grad_norm": 0.7992702878849013, "learning_rate": 9.342632313647703e-05, "loss": 12.2066, "step": 19661 }, { "epoch": 1.0706744808151762, "grad_norm": 0.5313366529744018, "learning_rate": 9.341752397778146e-05, "loss": 11.997, "step": 19662 }, { "epoch": 1.0707289348117595, "grad_norm": 0.5810677183782393, "learning_rate": 9.340872487027231e-05, "loss": 12.1089, "step": 19663 }, { "epoch": 1.0707833888083425, "grad_norm": 0.5265076902015842, "learning_rate": 9.339992581401801e-05, "loss": 12.012, "step": 19664 }, { "epoch": 1.0708378428049254, "grad_norm": 0.5335293416359094, "learning_rate": 9.339112680908701e-05, "loss": 12.0478, "step": 19665 }, { "epoch": 1.0708922968015084, "grad_norm": 0.6252614505693662, "learning_rate": 9.338232785554776e-05, "loss": 12.0812, "step": 19666 }, { "epoch": 1.0709467507980914, "grad_norm": 0.5053589716009665, "learning_rate": 9.337352895346858e-05, "loss": 12.057, "step": 19667 }, { "epoch": 1.0710012047946744, "grad_norm": 0.5472130868707699, "learning_rate": 9.336473010291795e-05, "loss": 12.1641, "step": 19668 }, { "epoch": 1.0710556587912574, "grad_norm": 0.6263777805974757, "learning_rate": 9.33559313039643e-05, "loss": 11.9064, "step": 19669 }, { "epoch": 1.0711101127878404, "grad_norm": 0.6581749372028808, "learning_rate": 9.334713255667606e-05, "loss": 12.1449, "step": 19670 }, { "epoch": 1.0711645667844234, "grad_norm": 0.5375481306366297, "learning_rate": 9.33383338611216e-05, "loss": 12.1026, "step": 19671 }, { "epoch": 1.0712190207810064, "grad_norm": 0.5738863975371193, "learning_rate": 9.332953521736941e-05, "loss": 12.0464, "step": 19672 }, { "epoch": 1.0712734747775894, "grad_norm": 0.5699091332528314, "learning_rate": 9.332073662548784e-05, "loss": 12.1012, "step": 19673 }, { "epoch": 1.0713279287741724, "grad_norm": 0.5551052554943668, "learning_rate": 9.331193808554538e-05, "loss": 12.0756, "step": 19674 }, { "epoch": 1.0713823827707554, "grad_norm": 0.5517571887946263, "learning_rate": 9.330313959761035e-05, "loss": 12.1347, "step": 19675 }, { "epoch": 1.0714368367673386, "grad_norm": 0.5738099283189709, "learning_rate": 9.329434116175132e-05, "loss": 12.0, "step": 19676 }, { "epoch": 1.0714912907639216, "grad_norm": 0.6123625032427639, "learning_rate": 9.328554277803657e-05, "loss": 12.1277, "step": 19677 }, { "epoch": 1.0715457447605046, "grad_norm": 0.555374077530613, "learning_rate": 9.327674444653456e-05, "loss": 12.1037, "step": 19678 }, { "epoch": 1.0716001987570876, "grad_norm": 0.5859849512211, "learning_rate": 9.326794616731369e-05, "loss": 12.2179, "step": 19679 }, { "epoch": 1.0716546527536706, "grad_norm": 0.5651225046719894, "learning_rate": 9.325914794044243e-05, "loss": 12.0778, "step": 19680 }, { "epoch": 1.0717091067502535, "grad_norm": 0.5450966905251793, "learning_rate": 9.325034976598916e-05, "loss": 12.0184, "step": 19681 }, { "epoch": 1.0717635607468365, "grad_norm": 0.5909705718224748, "learning_rate": 9.32415516440223e-05, "loss": 11.9641, "step": 19682 }, { "epoch": 1.0718180147434195, "grad_norm": 0.5573469872729605, "learning_rate": 9.323275357461028e-05, "loss": 11.9704, "step": 19683 }, { "epoch": 1.0718724687400025, "grad_norm": 0.5780723863034695, "learning_rate": 9.322395555782148e-05, "loss": 12.0434, "step": 19684 }, { "epoch": 1.0719269227365855, "grad_norm": 0.5458575677248596, "learning_rate": 9.321515759372436e-05, "loss": 12.0696, "step": 19685 }, { "epoch": 1.0719813767331685, "grad_norm": 0.5381775132127713, "learning_rate": 9.320635968238732e-05, "loss": 11.8941, "step": 19686 }, { "epoch": 1.0720358307297517, "grad_norm": 0.5852552369318467, "learning_rate": 9.319756182387876e-05, "loss": 12.1267, "step": 19687 }, { "epoch": 1.0720902847263347, "grad_norm": 0.495255603248781, "learning_rate": 9.318876401826708e-05, "loss": 12.0333, "step": 19688 }, { "epoch": 1.0721447387229177, "grad_norm": 0.6171089323945832, "learning_rate": 9.317996626562074e-05, "loss": 11.9859, "step": 19689 }, { "epoch": 1.0721991927195007, "grad_norm": 0.5408401519883336, "learning_rate": 9.317116856600807e-05, "loss": 11.9493, "step": 19690 }, { "epoch": 1.0722536467160837, "grad_norm": 0.5010606561302176, "learning_rate": 9.316237091949758e-05, "loss": 11.9302, "step": 19691 }, { "epoch": 1.0723081007126667, "grad_norm": 0.5705628538375778, "learning_rate": 9.315357332615763e-05, "loss": 12.0055, "step": 19692 }, { "epoch": 1.0723625547092497, "grad_norm": 0.5563152377068212, "learning_rate": 9.314477578605665e-05, "loss": 12.1444, "step": 19693 }, { "epoch": 1.0724170087058327, "grad_norm": 0.5762005531402592, "learning_rate": 9.313597829926306e-05, "loss": 12.1609, "step": 19694 }, { "epoch": 1.0724714627024157, "grad_norm": 0.5241867847401332, "learning_rate": 9.312718086584523e-05, "loss": 11.9909, "step": 19695 }, { "epoch": 1.0725259166989987, "grad_norm": 0.5294744951317413, "learning_rate": 9.311838348587162e-05, "loss": 12.113, "step": 19696 }, { "epoch": 1.0725803706955817, "grad_norm": 0.48081367114560963, "learning_rate": 9.31095861594106e-05, "loss": 12.0423, "step": 19697 }, { "epoch": 1.0726348246921646, "grad_norm": 0.529069169475715, "learning_rate": 9.310078888653059e-05, "loss": 12.0427, "step": 19698 }, { "epoch": 1.0726892786887479, "grad_norm": 0.5197802975156901, "learning_rate": 9.30919916673e-05, "loss": 12.0114, "step": 19699 }, { "epoch": 1.0727437326853309, "grad_norm": 0.5195023067243941, "learning_rate": 9.308319450178724e-05, "loss": 11.9279, "step": 19700 }, { "epoch": 1.0727981866819138, "grad_norm": 0.5350785743310953, "learning_rate": 9.30743973900607e-05, "loss": 12.0655, "step": 19701 }, { "epoch": 1.0728526406784968, "grad_norm": 0.54928519418668, "learning_rate": 9.306560033218883e-05, "loss": 12.0851, "step": 19702 }, { "epoch": 1.0729070946750798, "grad_norm": 0.5802036630071491, "learning_rate": 9.305680332824001e-05, "loss": 12.1587, "step": 19703 }, { "epoch": 1.0729615486716628, "grad_norm": 0.6146438445621326, "learning_rate": 9.304800637828266e-05, "loss": 11.8956, "step": 19704 }, { "epoch": 1.0730160026682458, "grad_norm": 0.519056383374783, "learning_rate": 9.303920948238518e-05, "loss": 11.9627, "step": 19705 }, { "epoch": 1.0730704566648288, "grad_norm": 0.5166818684415007, "learning_rate": 9.3030412640616e-05, "loss": 12.0785, "step": 19706 }, { "epoch": 1.0731249106614118, "grad_norm": 0.550748553582585, "learning_rate": 9.302161585304345e-05, "loss": 11.9352, "step": 19707 }, { "epoch": 1.0731793646579948, "grad_norm": 0.5569639363473886, "learning_rate": 9.301281911973601e-05, "loss": 11.9928, "step": 19708 }, { "epoch": 1.0732338186545778, "grad_norm": 0.565818920261101, "learning_rate": 9.300402244076206e-05, "loss": 12.042, "step": 19709 }, { "epoch": 1.073288272651161, "grad_norm": 0.5144223908841118, "learning_rate": 9.299522581619e-05, "loss": 11.9998, "step": 19710 }, { "epoch": 1.073342726647744, "grad_norm": 0.5705112569405207, "learning_rate": 9.298642924608824e-05, "loss": 12.1002, "step": 19711 }, { "epoch": 1.073397180644327, "grad_norm": 0.5158525970327664, "learning_rate": 9.297763273052517e-05, "loss": 12.0533, "step": 19712 }, { "epoch": 1.07345163464091, "grad_norm": 0.5697894517000133, "learning_rate": 9.29688362695692e-05, "loss": 11.9829, "step": 19713 }, { "epoch": 1.073506088637493, "grad_norm": 0.5632348411887524, "learning_rate": 9.296003986328875e-05, "loss": 12.1041, "step": 19714 }, { "epoch": 1.073560542634076, "grad_norm": 0.6554950222699993, "learning_rate": 9.295124351175222e-05, "loss": 12.1818, "step": 19715 }, { "epoch": 1.073614996630659, "grad_norm": 0.5770111055480366, "learning_rate": 9.294244721502804e-05, "loss": 11.9797, "step": 19716 }, { "epoch": 1.073669450627242, "grad_norm": 0.528664251870247, "learning_rate": 9.293365097318452e-05, "loss": 12.0613, "step": 19717 }, { "epoch": 1.073723904623825, "grad_norm": 0.6015470481259096, "learning_rate": 9.292485478629011e-05, "loss": 12.034, "step": 19718 }, { "epoch": 1.073778358620408, "grad_norm": 0.5302372269520177, "learning_rate": 9.291605865441324e-05, "loss": 12.0271, "step": 19719 }, { "epoch": 1.073832812616991, "grad_norm": 0.5323550202865295, "learning_rate": 9.290726257762228e-05, "loss": 12.022, "step": 19720 }, { "epoch": 1.073887266613574, "grad_norm": 0.5080055405797005, "learning_rate": 9.289846655598564e-05, "loss": 11.9097, "step": 19721 }, { "epoch": 1.0739417206101571, "grad_norm": 0.5355357619919747, "learning_rate": 9.28896705895717e-05, "loss": 12.0039, "step": 19722 }, { "epoch": 1.0739961746067401, "grad_norm": 0.5721031071639637, "learning_rate": 9.288087467844888e-05, "loss": 11.9958, "step": 19723 }, { "epoch": 1.0740506286033231, "grad_norm": 0.5302677196758343, "learning_rate": 9.287207882268556e-05, "loss": 11.977, "step": 19724 }, { "epoch": 1.0741050825999061, "grad_norm": 0.513418394157247, "learning_rate": 9.286328302235015e-05, "loss": 12.0055, "step": 19725 }, { "epoch": 1.074159536596489, "grad_norm": 0.5404659909582347, "learning_rate": 9.285448727751111e-05, "loss": 12.1113, "step": 19726 }, { "epoch": 1.074213990593072, "grad_norm": 0.5523398496251313, "learning_rate": 9.284569158823673e-05, "loss": 11.9773, "step": 19727 }, { "epoch": 1.074268444589655, "grad_norm": 0.5637373287061666, "learning_rate": 9.283689595459544e-05, "loss": 12.1073, "step": 19728 }, { "epoch": 1.074322898586238, "grad_norm": 0.4880685056636543, "learning_rate": 9.282810037665566e-05, "loss": 12.0374, "step": 19729 }, { "epoch": 1.074377352582821, "grad_norm": 0.5277581048098522, "learning_rate": 9.281930485448576e-05, "loss": 12.1272, "step": 19730 }, { "epoch": 1.074431806579404, "grad_norm": 0.5349540016588795, "learning_rate": 9.281050938815416e-05, "loss": 11.9587, "step": 19731 }, { "epoch": 1.074486260575987, "grad_norm": 0.5629341828892994, "learning_rate": 9.280171397772926e-05, "loss": 12.0396, "step": 19732 }, { "epoch": 1.0745407145725703, "grad_norm": 0.513798232372738, "learning_rate": 9.279291862327943e-05, "loss": 12.0225, "step": 19733 }, { "epoch": 1.0745951685691533, "grad_norm": 0.5353011570995032, "learning_rate": 9.278412332487306e-05, "loss": 12.1895, "step": 19734 }, { "epoch": 1.0746496225657363, "grad_norm": 0.5271662973407337, "learning_rate": 9.277532808257855e-05, "loss": 12.1193, "step": 19735 }, { "epoch": 1.0747040765623193, "grad_norm": 0.5634901604870521, "learning_rate": 9.276653289646437e-05, "loss": 12.0464, "step": 19736 }, { "epoch": 1.0747585305589022, "grad_norm": 0.5648214953375595, "learning_rate": 9.275773776659878e-05, "loss": 12.1405, "step": 19737 }, { "epoch": 1.0748129845554852, "grad_norm": 0.5593975129303271, "learning_rate": 9.274894269305025e-05, "loss": 11.97, "step": 19738 }, { "epoch": 1.0748674385520682, "grad_norm": 0.5080976573341622, "learning_rate": 9.274014767588714e-05, "loss": 11.9913, "step": 19739 }, { "epoch": 1.0749218925486512, "grad_norm": 0.5214783380460148, "learning_rate": 9.273135271517787e-05, "loss": 12.1178, "step": 19740 }, { "epoch": 1.0749763465452342, "grad_norm": 0.5308172619512298, "learning_rate": 9.272255781099082e-05, "loss": 12.0744, "step": 19741 }, { "epoch": 1.0750308005418172, "grad_norm": 0.6115011038726479, "learning_rate": 9.271376296339439e-05, "loss": 12.1452, "step": 19742 }, { "epoch": 1.0750852545384002, "grad_norm": 0.5792122936650925, "learning_rate": 9.270496817245696e-05, "loss": 11.9555, "step": 19743 }, { "epoch": 1.0751397085349832, "grad_norm": 0.5638891886366614, "learning_rate": 9.26961734382469e-05, "loss": 12.0211, "step": 19744 }, { "epoch": 1.0751941625315662, "grad_norm": 0.5014330119294012, "learning_rate": 9.268737876083265e-05, "loss": 11.9881, "step": 19745 }, { "epoch": 1.0752486165281494, "grad_norm": 0.5313886809459771, "learning_rate": 9.267858414028257e-05, "loss": 12.0741, "step": 19746 }, { "epoch": 1.0753030705247324, "grad_norm": 0.5412319133024731, "learning_rate": 9.266978957666504e-05, "loss": 11.9375, "step": 19747 }, { "epoch": 1.0753575245213154, "grad_norm": 0.560446843590823, "learning_rate": 9.266099507004844e-05, "loss": 12.0861, "step": 19748 }, { "epoch": 1.0754119785178984, "grad_norm": 0.5805220017073658, "learning_rate": 9.265220062050119e-05, "loss": 12.1195, "step": 19749 }, { "epoch": 1.0754664325144814, "grad_norm": 0.504645492060845, "learning_rate": 9.264340622809163e-05, "loss": 11.993, "step": 19750 }, { "epoch": 1.0755208865110644, "grad_norm": 0.5617676971852157, "learning_rate": 9.263461189288819e-05, "loss": 12.0088, "step": 19751 }, { "epoch": 1.0755753405076474, "grad_norm": 0.6004664230819657, "learning_rate": 9.262581761495926e-05, "loss": 12.0947, "step": 19752 }, { "epoch": 1.0756297945042304, "grad_norm": 0.5362974399259468, "learning_rate": 9.261702339437319e-05, "loss": 11.9674, "step": 19753 }, { "epoch": 1.0756842485008133, "grad_norm": 0.536708197993625, "learning_rate": 9.26082292311984e-05, "loss": 12.1122, "step": 19754 }, { "epoch": 1.0757387024973963, "grad_norm": 0.6283518034842255, "learning_rate": 9.259943512550325e-05, "loss": 12.0162, "step": 19755 }, { "epoch": 1.0757931564939793, "grad_norm": 0.6175713448932344, "learning_rate": 9.259064107735616e-05, "loss": 12.0244, "step": 19756 }, { "epoch": 1.0758476104905625, "grad_norm": 0.5444838556408125, "learning_rate": 9.258184708682546e-05, "loss": 12.0978, "step": 19757 }, { "epoch": 1.0759020644871455, "grad_norm": 0.5624415326003953, "learning_rate": 9.257305315397957e-05, "loss": 11.9287, "step": 19758 }, { "epoch": 1.0759565184837285, "grad_norm": 0.734909947442244, "learning_rate": 9.256425927888687e-05, "loss": 12.1097, "step": 19759 }, { "epoch": 1.0760109724803115, "grad_norm": 0.52136453544778, "learning_rate": 9.255546546161573e-05, "loss": 12.0123, "step": 19760 }, { "epoch": 1.0760654264768945, "grad_norm": 0.5185993467141862, "learning_rate": 9.254667170223453e-05, "loss": 12.0284, "step": 19761 }, { "epoch": 1.0761198804734775, "grad_norm": 0.5748138736745205, "learning_rate": 9.253787800081163e-05, "loss": 12.0021, "step": 19762 }, { "epoch": 1.0761743344700605, "grad_norm": 0.6109488906780046, "learning_rate": 9.25290843574155e-05, "loss": 12.1826, "step": 19763 }, { "epoch": 1.0762287884666435, "grad_norm": 0.6420538097109626, "learning_rate": 9.252029077211444e-05, "loss": 12.1369, "step": 19764 }, { "epoch": 1.0762832424632265, "grad_norm": 0.8507627928391415, "learning_rate": 9.251149724497685e-05, "loss": 12.0548, "step": 19765 }, { "epoch": 1.0763376964598095, "grad_norm": 0.6035701910113489, "learning_rate": 9.250270377607114e-05, "loss": 12.0962, "step": 19766 }, { "epoch": 1.0763921504563925, "grad_norm": 0.6450246044835068, "learning_rate": 9.249391036546564e-05, "loss": 12.0165, "step": 19767 }, { "epoch": 1.0764466044529755, "grad_norm": 0.697372254910513, "learning_rate": 9.248511701322876e-05, "loss": 11.9212, "step": 19768 }, { "epoch": 1.0765010584495587, "grad_norm": 0.5828957964868062, "learning_rate": 9.247632371942885e-05, "loss": 12.078, "step": 19769 }, { "epoch": 1.0765555124461417, "grad_norm": 0.6521946979037693, "learning_rate": 9.246753048413433e-05, "loss": 11.9964, "step": 19770 }, { "epoch": 1.0766099664427247, "grad_norm": 0.6143374194357522, "learning_rate": 9.245873730741356e-05, "loss": 11.9747, "step": 19771 }, { "epoch": 1.0766644204393077, "grad_norm": 0.5431569085936767, "learning_rate": 9.24499441893349e-05, "loss": 12.0608, "step": 19772 }, { "epoch": 1.0767188744358906, "grad_norm": 0.54729633266687, "learning_rate": 9.244115112996671e-05, "loss": 12.0335, "step": 19773 }, { "epoch": 1.0767733284324736, "grad_norm": 0.5452416566310941, "learning_rate": 9.243235812937743e-05, "loss": 12.1001, "step": 19774 }, { "epoch": 1.0768277824290566, "grad_norm": 0.5694708937075351, "learning_rate": 9.24235651876354e-05, "loss": 12.0374, "step": 19775 }, { "epoch": 1.0768822364256396, "grad_norm": 0.611623532142073, "learning_rate": 9.241477230480904e-05, "loss": 12.0619, "step": 19776 }, { "epoch": 1.0769366904222226, "grad_norm": 0.6025778412048199, "learning_rate": 9.240597948096662e-05, "loss": 12.1174, "step": 19777 }, { "epoch": 1.0769911444188056, "grad_norm": 0.5934774686655594, "learning_rate": 9.23971867161766e-05, "loss": 11.984, "step": 19778 }, { "epoch": 1.0770455984153886, "grad_norm": 0.5202670150688955, "learning_rate": 9.238839401050732e-05, "loss": 12.087, "step": 19779 }, { "epoch": 1.0771000524119718, "grad_norm": 0.5626755625868136, "learning_rate": 9.237960136402718e-05, "loss": 12.0306, "step": 19780 }, { "epoch": 1.0771545064085548, "grad_norm": 0.5285808303053771, "learning_rate": 9.237080877680453e-05, "loss": 12.0357, "step": 19781 }, { "epoch": 1.0772089604051378, "grad_norm": 0.4884744313035158, "learning_rate": 9.236201624890776e-05, "loss": 11.9146, "step": 19782 }, { "epoch": 1.0772634144017208, "grad_norm": 0.5199914902622657, "learning_rate": 9.235322378040522e-05, "loss": 12.0246, "step": 19783 }, { "epoch": 1.0773178683983038, "grad_norm": 0.5091585427844985, "learning_rate": 9.234443137136529e-05, "loss": 11.8571, "step": 19784 }, { "epoch": 1.0773723223948868, "grad_norm": 0.5342246407419041, "learning_rate": 9.233563902185633e-05, "loss": 12.1116, "step": 19785 }, { "epoch": 1.0774267763914698, "grad_norm": 0.5571358933957491, "learning_rate": 9.232684673194676e-05, "loss": 12.1752, "step": 19786 }, { "epoch": 1.0774812303880528, "grad_norm": 0.5312270871426307, "learning_rate": 9.231805450170495e-05, "loss": 12.0943, "step": 19787 }, { "epoch": 1.0775356843846358, "grad_norm": 0.5564105200114353, "learning_rate": 9.230926233119918e-05, "loss": 12.0351, "step": 19788 }, { "epoch": 1.0775901383812188, "grad_norm": 0.5238471065822412, "learning_rate": 9.230047022049788e-05, "loss": 11.9625, "step": 19789 }, { "epoch": 1.0776445923778017, "grad_norm": 0.6448027773800971, "learning_rate": 9.229167816966943e-05, "loss": 11.8557, "step": 19790 }, { "epoch": 1.0776990463743847, "grad_norm": 0.5475912973962153, "learning_rate": 9.228288617878217e-05, "loss": 11.902, "step": 19791 }, { "epoch": 1.077753500370968, "grad_norm": 0.48479578615231733, "learning_rate": 9.227409424790449e-05, "loss": 11.7981, "step": 19792 }, { "epoch": 1.077807954367551, "grad_norm": 0.4969317117219396, "learning_rate": 9.226530237710474e-05, "loss": 11.9856, "step": 19793 }, { "epoch": 1.077862408364134, "grad_norm": 0.5843041316087823, "learning_rate": 9.22565105664513e-05, "loss": 11.9749, "step": 19794 }, { "epoch": 1.077916862360717, "grad_norm": 0.4976117563615809, "learning_rate": 9.224771881601252e-05, "loss": 12.003, "step": 19795 }, { "epoch": 1.0779713163573, "grad_norm": 0.5603380367776644, "learning_rate": 9.22389271258568e-05, "loss": 11.9834, "step": 19796 }, { "epoch": 1.078025770353883, "grad_norm": 0.5859491969938188, "learning_rate": 9.223013549605252e-05, "loss": 12.2125, "step": 19797 }, { "epoch": 1.078080224350466, "grad_norm": 0.5718941113825561, "learning_rate": 9.222134392666797e-05, "loss": 12.162, "step": 19798 }, { "epoch": 1.078134678347049, "grad_norm": 0.4832940013238779, "learning_rate": 9.221255241777152e-05, "loss": 12.0352, "step": 19799 }, { "epoch": 1.078189132343632, "grad_norm": 0.5301172166326132, "learning_rate": 9.22037609694316e-05, "loss": 12.0537, "step": 19800 }, { "epoch": 1.0782435863402149, "grad_norm": 0.5360919272697344, "learning_rate": 9.219496958171656e-05, "loss": 12.0751, "step": 19801 }, { "epoch": 1.0782980403367979, "grad_norm": 0.5142437128335051, "learning_rate": 9.218617825469471e-05, "loss": 12.0602, "step": 19802 }, { "epoch": 1.078352494333381, "grad_norm": 0.573524430103686, "learning_rate": 9.217738698843447e-05, "loss": 12.0742, "step": 19803 }, { "epoch": 1.078406948329964, "grad_norm": 0.5571919577366389, "learning_rate": 9.216859578300418e-05, "loss": 11.9993, "step": 19804 }, { "epoch": 1.078461402326547, "grad_norm": 0.5056902483771932, "learning_rate": 9.215980463847221e-05, "loss": 12.0203, "step": 19805 }, { "epoch": 1.07851585632313, "grad_norm": 0.5503370149624551, "learning_rate": 9.215101355490688e-05, "loss": 12.0564, "step": 19806 }, { "epoch": 1.078570310319713, "grad_norm": 0.613107159969414, "learning_rate": 9.214222253237664e-05, "loss": 11.9812, "step": 19807 }, { "epoch": 1.078624764316296, "grad_norm": 0.5653136291345044, "learning_rate": 9.213343157094976e-05, "loss": 12.0614, "step": 19808 }, { "epoch": 1.078679218312879, "grad_norm": 0.5599890854147888, "learning_rate": 9.212464067069464e-05, "loss": 12.0691, "step": 19809 }, { "epoch": 1.078733672309462, "grad_norm": 0.5443108479194124, "learning_rate": 9.211584983167963e-05, "loss": 12.139, "step": 19810 }, { "epoch": 1.078788126306045, "grad_norm": 0.5315181193854013, "learning_rate": 9.210705905397307e-05, "loss": 12.0453, "step": 19811 }, { "epoch": 1.078842580302628, "grad_norm": 0.5482503737684693, "learning_rate": 9.209826833764338e-05, "loss": 12.0683, "step": 19812 }, { "epoch": 1.078897034299211, "grad_norm": 0.513803567111454, "learning_rate": 9.208947768275886e-05, "loss": 12.0799, "step": 19813 }, { "epoch": 1.078951488295794, "grad_norm": 0.4985326492421896, "learning_rate": 9.208068708938791e-05, "loss": 12.0466, "step": 19814 }, { "epoch": 1.079005942292377, "grad_norm": 0.525518283907291, "learning_rate": 9.207189655759885e-05, "loss": 11.9293, "step": 19815 }, { "epoch": 1.0790603962889602, "grad_norm": 0.5482022188079608, "learning_rate": 9.206310608746005e-05, "loss": 12.0235, "step": 19816 }, { "epoch": 1.0791148502855432, "grad_norm": 0.51705141705295, "learning_rate": 9.20543156790399e-05, "loss": 12.1233, "step": 19817 }, { "epoch": 1.0791693042821262, "grad_norm": 0.5997363988760009, "learning_rate": 9.20455253324067e-05, "loss": 12.0288, "step": 19818 }, { "epoch": 1.0792237582787092, "grad_norm": 0.5943740067115404, "learning_rate": 9.203673504762881e-05, "loss": 12.0617, "step": 19819 }, { "epoch": 1.0792782122752922, "grad_norm": 0.5372814630032169, "learning_rate": 9.202794482477464e-05, "loss": 11.9594, "step": 19820 }, { "epoch": 1.0793326662718752, "grad_norm": 0.5385223976676541, "learning_rate": 9.201915466391248e-05, "loss": 11.9173, "step": 19821 }, { "epoch": 1.0793871202684582, "grad_norm": 0.630652030358931, "learning_rate": 9.201036456511069e-05, "loss": 12.0707, "step": 19822 }, { "epoch": 1.0794415742650412, "grad_norm": 0.5256875959058562, "learning_rate": 9.200157452843768e-05, "loss": 12.0294, "step": 19823 }, { "epoch": 1.0794960282616242, "grad_norm": 0.5809380055850841, "learning_rate": 9.199278455396176e-05, "loss": 12.1731, "step": 19824 }, { "epoch": 1.0795504822582072, "grad_norm": 0.4861395432745022, "learning_rate": 9.19839946417513e-05, "loss": 11.9302, "step": 19825 }, { "epoch": 1.0796049362547901, "grad_norm": 0.5560515010457268, "learning_rate": 9.197520479187463e-05, "loss": 12.0977, "step": 19826 }, { "epoch": 1.0796593902513734, "grad_norm": 0.5504295389034873, "learning_rate": 9.196641500440014e-05, "loss": 11.9997, "step": 19827 }, { "epoch": 1.0797138442479564, "grad_norm": 0.5612610027101926, "learning_rate": 9.195762527939613e-05, "loss": 12.1131, "step": 19828 }, { "epoch": 1.0797682982445393, "grad_norm": 0.5119241540906272, "learning_rate": 9.194883561693098e-05, "loss": 11.9476, "step": 19829 }, { "epoch": 1.0798227522411223, "grad_norm": 0.5424708040339065, "learning_rate": 9.194004601707304e-05, "loss": 11.9638, "step": 19830 }, { "epoch": 1.0798772062377053, "grad_norm": 0.5738978063293178, "learning_rate": 9.193125647989064e-05, "loss": 12.1109, "step": 19831 }, { "epoch": 1.0799316602342883, "grad_norm": 0.6388819616332108, "learning_rate": 9.192246700545215e-05, "loss": 12.0414, "step": 19832 }, { "epoch": 1.0799861142308713, "grad_norm": 0.5863079235322, "learning_rate": 9.191367759382587e-05, "loss": 12.1259, "step": 19833 }, { "epoch": 1.0800405682274543, "grad_norm": 0.5865267660978265, "learning_rate": 9.190488824508024e-05, "loss": 11.9783, "step": 19834 }, { "epoch": 1.0800950222240373, "grad_norm": 0.5942721031349165, "learning_rate": 9.189609895928353e-05, "loss": 12.07, "step": 19835 }, { "epoch": 1.0801494762206203, "grad_norm": 0.5580497812786545, "learning_rate": 9.188730973650414e-05, "loss": 12.0397, "step": 19836 }, { "epoch": 1.0802039302172033, "grad_norm": 0.5574042050126845, "learning_rate": 9.18785205768104e-05, "loss": 12.1398, "step": 19837 }, { "epoch": 1.0802583842137863, "grad_norm": 0.5082332192849419, "learning_rate": 9.186973148027063e-05, "loss": 11.935, "step": 19838 }, { "epoch": 1.0803128382103695, "grad_norm": 0.506636418655922, "learning_rate": 9.186094244695318e-05, "loss": 12.0906, "step": 19839 }, { "epoch": 1.0803672922069525, "grad_norm": 0.5550175858574048, "learning_rate": 9.185215347692641e-05, "loss": 12.0529, "step": 19840 }, { "epoch": 1.0804217462035355, "grad_norm": 0.5306856622699136, "learning_rate": 9.184336457025866e-05, "loss": 12.0439, "step": 19841 }, { "epoch": 1.0804762002001185, "grad_norm": 0.5751168937192283, "learning_rate": 9.183457572701828e-05, "loss": 12.0151, "step": 19842 }, { "epoch": 1.0805306541967015, "grad_norm": 0.5028561230990091, "learning_rate": 9.18257869472736e-05, "loss": 11.926, "step": 19843 }, { "epoch": 1.0805851081932845, "grad_norm": 0.5415314966630953, "learning_rate": 9.181699823109296e-05, "loss": 12.0274, "step": 19844 }, { "epoch": 1.0806395621898675, "grad_norm": 0.5355038008075436, "learning_rate": 9.180820957854473e-05, "loss": 12.0452, "step": 19845 }, { "epoch": 1.0806940161864504, "grad_norm": 0.5102761512889469, "learning_rate": 9.179942098969725e-05, "loss": 12.0176, "step": 19846 }, { "epoch": 1.0807484701830334, "grad_norm": 0.5606942033044104, "learning_rate": 9.179063246461887e-05, "loss": 12.0285, "step": 19847 }, { "epoch": 1.0808029241796164, "grad_norm": 0.5991012448887261, "learning_rate": 9.178184400337786e-05, "loss": 12.1098, "step": 19848 }, { "epoch": 1.0808573781761994, "grad_norm": 0.5367535375112495, "learning_rate": 9.177305560604261e-05, "loss": 11.9596, "step": 19849 }, { "epoch": 1.0809118321727826, "grad_norm": 0.5004396697476243, "learning_rate": 9.176426727268148e-05, "loss": 12.0197, "step": 19850 }, { "epoch": 1.0809662861693656, "grad_norm": 0.5799104998920007, "learning_rate": 9.175547900336279e-05, "loss": 12.2113, "step": 19851 }, { "epoch": 1.0810207401659486, "grad_norm": 0.5749851243210843, "learning_rate": 9.174669079815486e-05, "loss": 12.086, "step": 19852 }, { "epoch": 1.0810751941625316, "grad_norm": 0.5086492814233681, "learning_rate": 9.173790265712605e-05, "loss": 11.9306, "step": 19853 }, { "epoch": 1.0811296481591146, "grad_norm": 0.5366057011087302, "learning_rate": 9.172911458034471e-05, "loss": 12.0747, "step": 19854 }, { "epoch": 1.0811841021556976, "grad_norm": 0.5403876564087411, "learning_rate": 9.172032656787913e-05, "loss": 11.9863, "step": 19855 }, { "epoch": 1.0812385561522806, "grad_norm": 0.5950587307869056, "learning_rate": 9.171153861979769e-05, "loss": 12.1185, "step": 19856 }, { "epoch": 1.0812930101488636, "grad_norm": 0.6135176279796881, "learning_rate": 9.170275073616877e-05, "loss": 12.0065, "step": 19857 }, { "epoch": 1.0813474641454466, "grad_norm": 0.5083830167400131, "learning_rate": 9.169396291706061e-05, "loss": 11.8831, "step": 19858 }, { "epoch": 1.0814019181420296, "grad_norm": 0.5703483834579192, "learning_rate": 9.168517516254158e-05, "loss": 12.0295, "step": 19859 }, { "epoch": 1.0814563721386126, "grad_norm": 0.6074469083028137, "learning_rate": 9.167638747268002e-05, "loss": 12.0883, "step": 19860 }, { "epoch": 1.0815108261351956, "grad_norm": 0.5191785966022419, "learning_rate": 9.166759984754428e-05, "loss": 12.0195, "step": 19861 }, { "epoch": 1.0815652801317788, "grad_norm": 0.5605617016454646, "learning_rate": 9.165881228720267e-05, "loss": 12.1311, "step": 19862 }, { "epoch": 1.0816197341283618, "grad_norm": 0.5390198212079476, "learning_rate": 9.165002479172354e-05, "loss": 12.1054, "step": 19863 }, { "epoch": 1.0816741881249448, "grad_norm": 0.5659962448785403, "learning_rate": 9.164123736117523e-05, "loss": 12.1215, "step": 19864 }, { "epoch": 1.0817286421215278, "grad_norm": 0.5921829218396504, "learning_rate": 9.163244999562604e-05, "loss": 12.0794, "step": 19865 }, { "epoch": 1.0817830961181107, "grad_norm": 0.5304361683476481, "learning_rate": 9.162366269514432e-05, "loss": 12.1065, "step": 19866 }, { "epoch": 1.0818375501146937, "grad_norm": 0.5807102749268369, "learning_rate": 9.161487545979844e-05, "loss": 12.0703, "step": 19867 }, { "epoch": 1.0818920041112767, "grad_norm": 0.5276436081169286, "learning_rate": 9.160608828965666e-05, "loss": 11.9785, "step": 19868 }, { "epoch": 1.0819464581078597, "grad_norm": 0.5661969343941075, "learning_rate": 9.159730118478737e-05, "loss": 12.0878, "step": 19869 }, { "epoch": 1.0820009121044427, "grad_norm": 0.5362217126007136, "learning_rate": 9.158851414525886e-05, "loss": 11.8735, "step": 19870 }, { "epoch": 1.0820553661010257, "grad_norm": 0.5508481037378804, "learning_rate": 9.157972717113945e-05, "loss": 12.1429, "step": 19871 }, { "epoch": 1.0821098200976087, "grad_norm": 0.562342947545884, "learning_rate": 9.15709402624975e-05, "loss": 12.1656, "step": 19872 }, { "epoch": 1.082164274094192, "grad_norm": 0.500928640737876, "learning_rate": 9.156215341940136e-05, "loss": 12.081, "step": 19873 }, { "epoch": 1.082218728090775, "grad_norm": 0.4966378820375146, "learning_rate": 9.155336664191932e-05, "loss": 12.1059, "step": 19874 }, { "epoch": 1.082273182087358, "grad_norm": 0.5528438165037192, "learning_rate": 9.154457993011972e-05, "loss": 11.9159, "step": 19875 }, { "epoch": 1.082327636083941, "grad_norm": 0.5484776550801352, "learning_rate": 9.153579328407088e-05, "loss": 11.9934, "step": 19876 }, { "epoch": 1.0823820900805239, "grad_norm": 0.5575050542309941, "learning_rate": 9.152700670384116e-05, "loss": 12.0534, "step": 19877 }, { "epoch": 1.0824365440771069, "grad_norm": 0.5426585602757926, "learning_rate": 9.151822018949881e-05, "loss": 12.0549, "step": 19878 }, { "epoch": 1.0824909980736899, "grad_norm": 0.6123470415390558, "learning_rate": 9.150943374111222e-05, "loss": 11.9764, "step": 19879 }, { "epoch": 1.0825454520702729, "grad_norm": 0.5821696164344822, "learning_rate": 9.15006473587497e-05, "loss": 12.0439, "step": 19880 }, { "epoch": 1.0825999060668559, "grad_norm": 0.5430943652819135, "learning_rate": 9.149186104247958e-05, "loss": 12.1013, "step": 19881 }, { "epoch": 1.0826543600634388, "grad_norm": 0.6327773300958239, "learning_rate": 9.148307479237014e-05, "loss": 12.0013, "step": 19882 }, { "epoch": 1.0827088140600218, "grad_norm": 0.518913600156964, "learning_rate": 9.147428860848977e-05, "loss": 12.094, "step": 19883 }, { "epoch": 1.0827632680566048, "grad_norm": 0.5645919083115407, "learning_rate": 9.146550249090675e-05, "loss": 11.8218, "step": 19884 }, { "epoch": 1.0828177220531878, "grad_norm": 0.5441439766884042, "learning_rate": 9.145671643968942e-05, "loss": 12.0403, "step": 19885 }, { "epoch": 1.082872176049771, "grad_norm": 0.5940575936371498, "learning_rate": 9.14479304549061e-05, "loss": 12.0329, "step": 19886 }, { "epoch": 1.082926630046354, "grad_norm": 0.6043211414787795, "learning_rate": 9.143914453662512e-05, "loss": 12.0626, "step": 19887 }, { "epoch": 1.082981084042937, "grad_norm": 0.541390862693401, "learning_rate": 9.143035868491476e-05, "loss": 12.048, "step": 19888 }, { "epoch": 1.08303553803952, "grad_norm": 0.531445579848505, "learning_rate": 9.142157289984337e-05, "loss": 12.0976, "step": 19889 }, { "epoch": 1.083089992036103, "grad_norm": 0.513369597722478, "learning_rate": 9.141278718147929e-05, "loss": 11.9035, "step": 19890 }, { "epoch": 1.083144446032686, "grad_norm": 0.5891037711902334, "learning_rate": 9.140400152989079e-05, "loss": 12.0648, "step": 19891 }, { "epoch": 1.083198900029269, "grad_norm": 0.5543595431977636, "learning_rate": 9.139521594514623e-05, "loss": 12.0995, "step": 19892 }, { "epoch": 1.083253354025852, "grad_norm": 0.5438529356820038, "learning_rate": 9.138643042731389e-05, "loss": 12.0272, "step": 19893 }, { "epoch": 1.083307808022435, "grad_norm": 0.5467179470834783, "learning_rate": 9.137764497646213e-05, "loss": 12.1245, "step": 19894 }, { "epoch": 1.083362262019018, "grad_norm": 0.4989757185609027, "learning_rate": 9.136885959265926e-05, "loss": 12.0709, "step": 19895 }, { "epoch": 1.083416716015601, "grad_norm": 0.54996556335291, "learning_rate": 9.136007427597358e-05, "loss": 11.9735, "step": 19896 }, { "epoch": 1.0834711700121842, "grad_norm": 0.5670794449956671, "learning_rate": 9.135128902647344e-05, "loss": 12.0571, "step": 19897 }, { "epoch": 1.0835256240087672, "grad_norm": 0.5164709777065184, "learning_rate": 9.134250384422708e-05, "loss": 12.0582, "step": 19898 }, { "epoch": 1.0835800780053502, "grad_norm": 0.5613349135182275, "learning_rate": 9.133371872930288e-05, "loss": 12.0784, "step": 19899 }, { "epoch": 1.0836345320019332, "grad_norm": 0.5563800171494543, "learning_rate": 9.132493368176913e-05, "loss": 12.08, "step": 19900 }, { "epoch": 1.0836889859985162, "grad_norm": 0.5206282421692374, "learning_rate": 9.131614870169416e-05, "loss": 12.1176, "step": 19901 }, { "epoch": 1.0837434399950991, "grad_norm": 0.5123339744790743, "learning_rate": 9.130736378914627e-05, "loss": 12.1279, "step": 19902 }, { "epoch": 1.0837978939916821, "grad_norm": 0.5556191526608115, "learning_rate": 9.129857894419377e-05, "loss": 11.9749, "step": 19903 }, { "epoch": 1.0838523479882651, "grad_norm": 0.5499176741013869, "learning_rate": 9.128979416690497e-05, "loss": 11.9974, "step": 19904 }, { "epoch": 1.0839068019848481, "grad_norm": 0.49136687224716824, "learning_rate": 9.12810094573482e-05, "loss": 11.9636, "step": 19905 }, { "epoch": 1.0839612559814311, "grad_norm": 0.5667764010346874, "learning_rate": 9.127222481559176e-05, "loss": 12.1023, "step": 19906 }, { "epoch": 1.084015709978014, "grad_norm": 0.5326825165723065, "learning_rate": 9.126344024170402e-05, "loss": 12.0809, "step": 19907 }, { "epoch": 1.084070163974597, "grad_norm": 0.5370302873191101, "learning_rate": 9.125465573575316e-05, "loss": 12.0619, "step": 19908 }, { "epoch": 1.0841246179711803, "grad_norm": 0.5560791563957217, "learning_rate": 9.12458712978076e-05, "loss": 12.0018, "step": 19909 }, { "epoch": 1.0841790719677633, "grad_norm": 0.5058743738074365, "learning_rate": 9.12370869279356e-05, "loss": 12.0217, "step": 19910 }, { "epoch": 1.0842335259643463, "grad_norm": 0.5990810630139142, "learning_rate": 9.122830262620547e-05, "loss": 12.1044, "step": 19911 }, { "epoch": 1.0842879799609293, "grad_norm": 0.5410580811988184, "learning_rate": 9.121951839268556e-05, "loss": 11.9739, "step": 19912 }, { "epoch": 1.0843424339575123, "grad_norm": 0.5568019602049695, "learning_rate": 9.121073422744413e-05, "loss": 12.1421, "step": 19913 }, { "epoch": 1.0843968879540953, "grad_norm": 0.5569345235690699, "learning_rate": 9.12019501305495e-05, "loss": 11.8679, "step": 19914 }, { "epoch": 1.0844513419506783, "grad_norm": 0.5183950269777624, "learning_rate": 9.119316610206998e-05, "loss": 12.0276, "step": 19915 }, { "epoch": 1.0845057959472613, "grad_norm": 0.5064129400906593, "learning_rate": 9.118438214207387e-05, "loss": 12.1255, "step": 19916 }, { "epoch": 1.0845602499438443, "grad_norm": 0.6021752218151646, "learning_rate": 9.117559825062953e-05, "loss": 12.0236, "step": 19917 }, { "epoch": 1.0846147039404272, "grad_norm": 0.5157551802055335, "learning_rate": 9.116681442780519e-05, "loss": 11.9216, "step": 19918 }, { "epoch": 1.0846691579370102, "grad_norm": 0.5436451505997936, "learning_rate": 9.115803067366918e-05, "loss": 12.1423, "step": 19919 }, { "epoch": 1.0847236119335935, "grad_norm": 0.5973381170371647, "learning_rate": 9.114924698828978e-05, "loss": 12.0881, "step": 19920 }, { "epoch": 1.0847780659301764, "grad_norm": 0.5206950377698941, "learning_rate": 9.114046337173534e-05, "loss": 12.1696, "step": 19921 }, { "epoch": 1.0848325199267594, "grad_norm": 0.5343122491422189, "learning_rate": 9.113167982407414e-05, "loss": 12.0945, "step": 19922 }, { "epoch": 1.0848869739233424, "grad_norm": 0.5405889694086997, "learning_rate": 9.11228963453745e-05, "loss": 12.1126, "step": 19923 }, { "epoch": 1.0849414279199254, "grad_norm": 0.6113488144750007, "learning_rate": 9.11141129357047e-05, "loss": 11.9141, "step": 19924 }, { "epoch": 1.0849958819165084, "grad_norm": 0.5452989191660556, "learning_rate": 9.110532959513304e-05, "loss": 12.0923, "step": 19925 }, { "epoch": 1.0850503359130914, "grad_norm": 0.5544969239297456, "learning_rate": 9.109654632372784e-05, "loss": 12.088, "step": 19926 }, { "epoch": 1.0851047899096744, "grad_norm": 0.6536920844173818, "learning_rate": 9.10877631215574e-05, "loss": 12.079, "step": 19927 }, { "epoch": 1.0851592439062574, "grad_norm": 0.5271331003375122, "learning_rate": 9.107897998869e-05, "loss": 12.1115, "step": 19928 }, { "epoch": 1.0852136979028404, "grad_norm": 0.49071808736334793, "learning_rate": 9.107019692519393e-05, "loss": 11.9722, "step": 19929 }, { "epoch": 1.0852681518994234, "grad_norm": 0.6372190248928601, "learning_rate": 9.106141393113752e-05, "loss": 12.0689, "step": 19930 }, { "epoch": 1.0853226058960064, "grad_norm": 0.5980830574240124, "learning_rate": 9.105263100658902e-05, "loss": 12.0512, "step": 19931 }, { "epoch": 1.0853770598925896, "grad_norm": 0.5493450308605987, "learning_rate": 9.10438481516168e-05, "loss": 12.0894, "step": 19932 }, { "epoch": 1.0854315138891726, "grad_norm": 0.6556303700259074, "learning_rate": 9.10350653662891e-05, "loss": 12.1051, "step": 19933 }, { "epoch": 1.0854859678857556, "grad_norm": 0.5177009660696416, "learning_rate": 9.102628265067425e-05, "loss": 11.9703, "step": 19934 }, { "epoch": 1.0855404218823386, "grad_norm": 0.5046612680720217, "learning_rate": 9.101750000484052e-05, "loss": 12.0555, "step": 19935 }, { "epoch": 1.0855948758789216, "grad_norm": 0.5705460610848567, "learning_rate": 9.100871742885622e-05, "loss": 11.8679, "step": 19936 }, { "epoch": 1.0856493298755046, "grad_norm": 0.5619400076838802, "learning_rate": 9.099993492278965e-05, "loss": 12.1102, "step": 19937 }, { "epoch": 1.0857037838720875, "grad_norm": 0.5665782307003279, "learning_rate": 9.09911524867091e-05, "loss": 12.0628, "step": 19938 }, { "epoch": 1.0857582378686705, "grad_norm": 0.5035529583625047, "learning_rate": 9.098237012068286e-05, "loss": 11.9725, "step": 19939 }, { "epoch": 1.0858126918652535, "grad_norm": 0.5110637395954961, "learning_rate": 9.09735878247792e-05, "loss": 11.9253, "step": 19940 }, { "epoch": 1.0858671458618365, "grad_norm": 0.539311907986121, "learning_rate": 9.096480559906645e-05, "loss": 12.0985, "step": 19941 }, { "epoch": 1.0859215998584195, "grad_norm": 0.6409624825959144, "learning_rate": 9.095602344361286e-05, "loss": 12.023, "step": 19942 }, { "epoch": 1.0859760538550027, "grad_norm": 0.5882799322296413, "learning_rate": 9.094724135848677e-05, "loss": 12.0073, "step": 19943 }, { "epoch": 1.0860305078515857, "grad_norm": 0.546907140635376, "learning_rate": 9.093845934375645e-05, "loss": 11.8597, "step": 19944 }, { "epoch": 1.0860849618481687, "grad_norm": 0.5879607208007526, "learning_rate": 9.092967739949019e-05, "loss": 11.9852, "step": 19945 }, { "epoch": 1.0861394158447517, "grad_norm": 0.5450063255763744, "learning_rate": 9.092089552575628e-05, "loss": 12.0052, "step": 19946 }, { "epoch": 1.0861938698413347, "grad_norm": 0.5314778428648255, "learning_rate": 9.0912113722623e-05, "loss": 12.0105, "step": 19947 }, { "epoch": 1.0862483238379177, "grad_norm": 0.5585952771522082, "learning_rate": 9.090333199015868e-05, "loss": 12.2198, "step": 19948 }, { "epoch": 1.0863027778345007, "grad_norm": 0.5732565799501155, "learning_rate": 9.089455032843155e-05, "loss": 12.0317, "step": 19949 }, { "epoch": 1.0863572318310837, "grad_norm": 0.5765505756080281, "learning_rate": 9.088576873750992e-05, "loss": 12.2115, "step": 19950 }, { "epoch": 1.0864116858276667, "grad_norm": 0.5513923575328709, "learning_rate": 9.087698721746209e-05, "loss": 12.103, "step": 19951 }, { "epoch": 1.0864661398242497, "grad_norm": 0.5820435479206023, "learning_rate": 9.086820576835634e-05, "loss": 12.0404, "step": 19952 }, { "epoch": 1.0865205938208327, "grad_norm": 0.6022803029144533, "learning_rate": 9.085942439026092e-05, "loss": 12.0666, "step": 19953 }, { "epoch": 1.0865750478174157, "grad_norm": 0.5037362536968005, "learning_rate": 9.085064308324418e-05, "loss": 11.9326, "step": 19954 }, { "epoch": 1.0866295018139986, "grad_norm": 0.5629619369554842, "learning_rate": 9.084186184737437e-05, "loss": 12.075, "step": 19955 }, { "epoch": 1.0866839558105819, "grad_norm": 0.7293732390996722, "learning_rate": 9.083308068271977e-05, "loss": 12.0475, "step": 19956 }, { "epoch": 1.0867384098071649, "grad_norm": 0.6011848720393436, "learning_rate": 9.082429958934869e-05, "loss": 11.9534, "step": 19957 }, { "epoch": 1.0867928638037478, "grad_norm": 0.6314019833993705, "learning_rate": 9.08155185673294e-05, "loss": 12.1621, "step": 19958 }, { "epoch": 1.0868473178003308, "grad_norm": 0.5321012509526907, "learning_rate": 9.080673761673016e-05, "loss": 11.9822, "step": 19959 }, { "epoch": 1.0869017717969138, "grad_norm": 0.5358458161111209, "learning_rate": 9.079795673761927e-05, "loss": 12.0879, "step": 19960 }, { "epoch": 1.0869562257934968, "grad_norm": 0.5516116169410465, "learning_rate": 9.078917593006502e-05, "loss": 12.0031, "step": 19961 }, { "epoch": 1.0870106797900798, "grad_norm": 0.5659572430474552, "learning_rate": 9.078039519413567e-05, "loss": 12.1379, "step": 19962 }, { "epoch": 1.0870651337866628, "grad_norm": 0.5606247963131835, "learning_rate": 9.077161452989952e-05, "loss": 11.9215, "step": 19963 }, { "epoch": 1.0871195877832458, "grad_norm": 0.5748818529484704, "learning_rate": 9.076283393742484e-05, "loss": 11.9057, "step": 19964 }, { "epoch": 1.0871740417798288, "grad_norm": 0.6397330396077927, "learning_rate": 9.075405341677989e-05, "loss": 12.0713, "step": 19965 }, { "epoch": 1.087228495776412, "grad_norm": 0.5587576937613958, "learning_rate": 9.0745272968033e-05, "loss": 12.0238, "step": 19966 }, { "epoch": 1.087282949772995, "grad_norm": 0.5372686915159008, "learning_rate": 9.073649259125242e-05, "loss": 11.7746, "step": 19967 }, { "epoch": 1.087337403769578, "grad_norm": 0.5633905760899219, "learning_rate": 9.072771228650646e-05, "loss": 11.9728, "step": 19968 }, { "epoch": 1.087391857766161, "grad_norm": 0.5949007043908271, "learning_rate": 9.071893205386331e-05, "loss": 12.115, "step": 19969 }, { "epoch": 1.087446311762744, "grad_norm": 0.5807917281893856, "learning_rate": 9.071015189339131e-05, "loss": 11.9429, "step": 19970 }, { "epoch": 1.087500765759327, "grad_norm": 0.5756543238824169, "learning_rate": 9.070137180515875e-05, "loss": 12.053, "step": 19971 }, { "epoch": 1.08755521975591, "grad_norm": 0.5297283885852245, "learning_rate": 9.069259178923386e-05, "loss": 11.8516, "step": 19972 }, { "epoch": 1.087609673752493, "grad_norm": 0.5551320857290687, "learning_rate": 9.068381184568494e-05, "loss": 12.0597, "step": 19973 }, { "epoch": 1.087664127749076, "grad_norm": 0.5625803842858186, "learning_rate": 9.067503197458027e-05, "loss": 11.9823, "step": 19974 }, { "epoch": 1.087718581745659, "grad_norm": 0.5795953180190437, "learning_rate": 9.066625217598812e-05, "loss": 12.1733, "step": 19975 }, { "epoch": 1.087773035742242, "grad_norm": 0.5473935443239211, "learning_rate": 9.06574724499767e-05, "loss": 12.0024, "step": 19976 }, { "epoch": 1.087827489738825, "grad_norm": 0.5667668328190282, "learning_rate": 9.064869279661439e-05, "loss": 11.9631, "step": 19977 }, { "epoch": 1.087881943735408, "grad_norm": 0.5266704522455978, "learning_rate": 9.063991321596948e-05, "loss": 12.045, "step": 19978 }, { "epoch": 1.0879363977319911, "grad_norm": 0.5844567425175714, "learning_rate": 9.063113370811009e-05, "loss": 12.139, "step": 19979 }, { "epoch": 1.0879908517285741, "grad_norm": 0.5825710730257588, "learning_rate": 9.062235427310457e-05, "loss": 12.0948, "step": 19980 }, { "epoch": 1.0880453057251571, "grad_norm": 0.6299771286484094, "learning_rate": 9.061357491102122e-05, "loss": 12.0189, "step": 19981 }, { "epoch": 1.0880997597217401, "grad_norm": 0.5363310138015335, "learning_rate": 9.060479562192829e-05, "loss": 11.8844, "step": 19982 }, { "epoch": 1.088154213718323, "grad_norm": 0.5707402502112603, "learning_rate": 9.059601640589403e-05, "loss": 12.0814, "step": 19983 }, { "epoch": 1.088208667714906, "grad_norm": 0.5639069057481662, "learning_rate": 9.058723726298673e-05, "loss": 11.9652, "step": 19984 }, { "epoch": 1.088263121711489, "grad_norm": 0.738216354293828, "learning_rate": 9.057845819327466e-05, "loss": 12.0537, "step": 19985 }, { "epoch": 1.088317575708072, "grad_norm": 0.5441504601844046, "learning_rate": 9.056967919682608e-05, "loss": 12.0143, "step": 19986 }, { "epoch": 1.088372029704655, "grad_norm": 0.6504410720166462, "learning_rate": 9.056090027370923e-05, "loss": 12.1874, "step": 19987 }, { "epoch": 1.088426483701238, "grad_norm": 0.6195252656867826, "learning_rate": 9.055212142399245e-05, "loss": 12.0596, "step": 19988 }, { "epoch": 1.088480937697821, "grad_norm": 0.5673916365171481, "learning_rate": 9.054334264774394e-05, "loss": 12.1216, "step": 19989 }, { "epoch": 1.0885353916944043, "grad_norm": 0.5562905035399163, "learning_rate": 9.053456394503197e-05, "loss": 11.995, "step": 19990 }, { "epoch": 1.0885898456909873, "grad_norm": 0.6864049145436131, "learning_rate": 9.052578531592479e-05, "loss": 11.9271, "step": 19991 }, { "epoch": 1.0886442996875703, "grad_norm": 0.554609993264955, "learning_rate": 9.051700676049073e-05, "loss": 11.9662, "step": 19992 }, { "epoch": 1.0886987536841533, "grad_norm": 0.5390644716978882, "learning_rate": 9.050822827879801e-05, "loss": 11.9687, "step": 19993 }, { "epoch": 1.0887532076807362, "grad_norm": 0.6197942093873294, "learning_rate": 9.04994498709149e-05, "loss": 11.9846, "step": 19994 }, { "epoch": 1.0888076616773192, "grad_norm": 0.5532516314359079, "learning_rate": 9.049067153690965e-05, "loss": 12.0499, "step": 19995 }, { "epoch": 1.0888621156739022, "grad_norm": 0.5730859665342325, "learning_rate": 9.048189327685055e-05, "loss": 12.216, "step": 19996 }, { "epoch": 1.0889165696704852, "grad_norm": 0.5500219671907687, "learning_rate": 9.047311509080584e-05, "loss": 12.0319, "step": 19997 }, { "epoch": 1.0889710236670682, "grad_norm": 0.5921074346229978, "learning_rate": 9.04643369788438e-05, "loss": 12.1958, "step": 19998 }, { "epoch": 1.0890254776636512, "grad_norm": 0.6028458643295544, "learning_rate": 9.045555894103265e-05, "loss": 11.9421, "step": 19999 }, { "epoch": 1.0890799316602342, "grad_norm": 0.5073641549341452, "learning_rate": 9.044678097744068e-05, "loss": 12.0276, "step": 20000 }, { "epoch": 1.0891343856568172, "grad_norm": 0.6065992995903755, "learning_rate": 9.043800308813614e-05, "loss": 12.1435, "step": 20001 }, { "epoch": 1.0891888396534004, "grad_norm": 0.5643750133310085, "learning_rate": 9.042922527318728e-05, "loss": 12.0401, "step": 20002 }, { "epoch": 1.0892432936499834, "grad_norm": 0.6550555665516066, "learning_rate": 9.042044753266238e-05, "loss": 12.1827, "step": 20003 }, { "epoch": 1.0892977476465664, "grad_norm": 0.4899034343825559, "learning_rate": 9.04116698666297e-05, "loss": 12.0562, "step": 20004 }, { "epoch": 1.0893522016431494, "grad_norm": 0.6185593649271679, "learning_rate": 9.040289227515745e-05, "loss": 12.0924, "step": 20005 }, { "epoch": 1.0894066556397324, "grad_norm": 0.570663308596088, "learning_rate": 9.039411475831395e-05, "loss": 11.9767, "step": 20006 }, { "epoch": 1.0894611096363154, "grad_norm": 0.5268150422309706, "learning_rate": 9.038533731616741e-05, "loss": 12.0193, "step": 20007 }, { "epoch": 1.0895155636328984, "grad_norm": 0.5258947574793097, "learning_rate": 9.037655994878614e-05, "loss": 12.1018, "step": 20008 }, { "epoch": 1.0895700176294814, "grad_norm": 0.575274588880538, "learning_rate": 9.036778265623832e-05, "loss": 11.9754, "step": 20009 }, { "epoch": 1.0896244716260644, "grad_norm": 0.5802939253572615, "learning_rate": 9.035900543859224e-05, "loss": 12.0168, "step": 20010 }, { "epoch": 1.0896789256226473, "grad_norm": 0.5122724331256518, "learning_rate": 9.035022829591613e-05, "loss": 11.9511, "step": 20011 }, { "epoch": 1.0897333796192303, "grad_norm": 0.4769146188379691, "learning_rate": 9.03414512282783e-05, "loss": 11.9888, "step": 20012 }, { "epoch": 1.0897878336158136, "grad_norm": 0.5265505779491405, "learning_rate": 9.033267423574693e-05, "loss": 11.9005, "step": 20013 }, { "epoch": 1.0898422876123965, "grad_norm": 0.5333724765858119, "learning_rate": 9.032389731839031e-05, "loss": 12.0755, "step": 20014 }, { "epoch": 1.0898967416089795, "grad_norm": 0.5193352601069574, "learning_rate": 9.03151204762767e-05, "loss": 12.1701, "step": 20015 }, { "epoch": 1.0899511956055625, "grad_norm": 0.49464098071032003, "learning_rate": 9.030634370947433e-05, "loss": 12.0579, "step": 20016 }, { "epoch": 1.0900056496021455, "grad_norm": 0.568910824507174, "learning_rate": 9.029756701805147e-05, "loss": 12.0425, "step": 20017 }, { "epoch": 1.0900601035987285, "grad_norm": 0.5670081222423411, "learning_rate": 9.028879040207638e-05, "loss": 11.9655, "step": 20018 }, { "epoch": 1.0901145575953115, "grad_norm": 0.5209534940742387, "learning_rate": 9.028001386161724e-05, "loss": 12.0386, "step": 20019 }, { "epoch": 1.0901690115918945, "grad_norm": 0.5257222292295778, "learning_rate": 9.027123739674236e-05, "loss": 12.1165, "step": 20020 }, { "epoch": 1.0902234655884775, "grad_norm": 0.562692073697708, "learning_rate": 9.026246100751996e-05, "loss": 12.1326, "step": 20021 }, { "epoch": 1.0902779195850605, "grad_norm": 0.5356455064032006, "learning_rate": 9.02536846940183e-05, "loss": 12.1093, "step": 20022 }, { "epoch": 1.0903323735816435, "grad_norm": 0.6178913806743136, "learning_rate": 9.024490845630564e-05, "loss": 12.0731, "step": 20023 }, { "epoch": 1.0903868275782265, "grad_norm": 0.5086643420023287, "learning_rate": 9.023613229445018e-05, "loss": 12.0523, "step": 20024 }, { "epoch": 1.0904412815748097, "grad_norm": 0.5219656284534158, "learning_rate": 9.022735620852019e-05, "loss": 11.7372, "step": 20025 }, { "epoch": 1.0904957355713927, "grad_norm": 0.5162106532165133, "learning_rate": 9.021858019858393e-05, "loss": 11.9562, "step": 20026 }, { "epoch": 1.0905501895679757, "grad_norm": 0.5000648210155602, "learning_rate": 9.020980426470963e-05, "loss": 11.9445, "step": 20027 }, { "epoch": 1.0906046435645587, "grad_norm": 0.534566777693027, "learning_rate": 9.020102840696558e-05, "loss": 12.0463, "step": 20028 }, { "epoch": 1.0906590975611417, "grad_norm": 0.5624082349442754, "learning_rate": 9.01922526254199e-05, "loss": 11.9304, "step": 20029 }, { "epoch": 1.0907135515577246, "grad_norm": 0.551904556369177, "learning_rate": 9.018347692014095e-05, "loss": 11.9993, "step": 20030 }, { "epoch": 1.0907680055543076, "grad_norm": 0.5942305762390805, "learning_rate": 9.017470129119692e-05, "loss": 12.092, "step": 20031 }, { "epoch": 1.0908224595508906, "grad_norm": 0.5024243307789622, "learning_rate": 9.016592573865606e-05, "loss": 12.0821, "step": 20032 }, { "epoch": 1.0908769135474736, "grad_norm": 0.52087478799685, "learning_rate": 9.01571502625866e-05, "loss": 12.0875, "step": 20033 }, { "epoch": 1.0909313675440566, "grad_norm": 0.49630439482687516, "learning_rate": 9.014837486305682e-05, "loss": 12.0044, "step": 20034 }, { "epoch": 1.0909858215406396, "grad_norm": 0.5379210037700731, "learning_rate": 9.013959954013492e-05, "loss": 12.0714, "step": 20035 }, { "epoch": 1.0910402755372228, "grad_norm": 0.5401713498441595, "learning_rate": 9.01308242938891e-05, "loss": 11.8857, "step": 20036 }, { "epoch": 1.0910947295338058, "grad_norm": 0.5356267210334869, "learning_rate": 9.012204912438769e-05, "loss": 12.1584, "step": 20037 }, { "epoch": 1.0911491835303888, "grad_norm": 0.5685396376260402, "learning_rate": 9.011327403169891e-05, "loss": 12.0256, "step": 20038 }, { "epoch": 1.0912036375269718, "grad_norm": 0.6397245614974457, "learning_rate": 9.010449901589094e-05, "loss": 12.0091, "step": 20039 }, { "epoch": 1.0912580915235548, "grad_norm": 0.5338238370933304, "learning_rate": 9.009572407703201e-05, "loss": 11.9764, "step": 20040 }, { "epoch": 1.0913125455201378, "grad_norm": 0.5576473803063768, "learning_rate": 9.008694921519044e-05, "loss": 11.9742, "step": 20041 }, { "epoch": 1.0913669995167208, "grad_norm": 0.5860825346362053, "learning_rate": 9.00781744304344e-05, "loss": 12.0667, "step": 20042 }, { "epoch": 1.0914214535133038, "grad_norm": 0.5144441272948119, "learning_rate": 9.006939972283213e-05, "loss": 11.9995, "step": 20043 }, { "epoch": 1.0914759075098868, "grad_norm": 0.5611980194921362, "learning_rate": 9.006062509245188e-05, "loss": 12.1328, "step": 20044 }, { "epoch": 1.0915303615064698, "grad_norm": 0.5431351711512413, "learning_rate": 9.005185053936186e-05, "loss": 12.0958, "step": 20045 }, { "epoch": 1.0915848155030528, "grad_norm": 0.6682242249022382, "learning_rate": 9.004307606363033e-05, "loss": 12.0687, "step": 20046 }, { "epoch": 1.0916392694996357, "grad_norm": 0.5671285226395476, "learning_rate": 9.00343016653255e-05, "loss": 11.931, "step": 20047 }, { "epoch": 1.0916937234962187, "grad_norm": 0.5021756500744794, "learning_rate": 9.002552734451566e-05, "loss": 11.944, "step": 20048 }, { "epoch": 1.091748177492802, "grad_norm": 0.5825252285222624, "learning_rate": 9.001675310126897e-05, "loss": 12.0314, "step": 20049 }, { "epoch": 1.091802631489385, "grad_norm": 0.608147473703928, "learning_rate": 9.000797893565367e-05, "loss": 12.222, "step": 20050 }, { "epoch": 1.091857085485968, "grad_norm": 0.5639127355682383, "learning_rate": 8.999920484773798e-05, "loss": 11.8921, "step": 20051 }, { "epoch": 1.091911539482551, "grad_norm": 0.5647725739831364, "learning_rate": 8.999043083759017e-05, "loss": 11.9675, "step": 20052 }, { "epoch": 1.091965993479134, "grad_norm": 0.5480108093055222, "learning_rate": 8.998165690527846e-05, "loss": 12.0654, "step": 20053 }, { "epoch": 1.092020447475717, "grad_norm": 0.6067634857647759, "learning_rate": 8.997288305087104e-05, "loss": 12.0903, "step": 20054 }, { "epoch": 1.0920749014723, "grad_norm": 0.5585022925172852, "learning_rate": 8.996410927443619e-05, "loss": 11.9205, "step": 20055 }, { "epoch": 1.092129355468883, "grad_norm": 0.5333906347604619, "learning_rate": 8.99553355760421e-05, "loss": 12.0811, "step": 20056 }, { "epoch": 1.092183809465466, "grad_norm": 0.5919546618318178, "learning_rate": 8.994656195575699e-05, "loss": 12.0538, "step": 20057 }, { "epoch": 1.0922382634620489, "grad_norm": 0.5386646282360394, "learning_rate": 8.993778841364915e-05, "loss": 12.0325, "step": 20058 }, { "epoch": 1.0922927174586319, "grad_norm": 0.5627313682717267, "learning_rate": 8.992901494978671e-05, "loss": 12.0302, "step": 20059 }, { "epoch": 1.092347171455215, "grad_norm": 0.66080896523592, "learning_rate": 8.992024156423795e-05, "loss": 12.0468, "step": 20060 }, { "epoch": 1.092401625451798, "grad_norm": 0.5483386811294285, "learning_rate": 8.991146825707107e-05, "loss": 12.0382, "step": 20061 }, { "epoch": 1.092456079448381, "grad_norm": 0.5698024277752847, "learning_rate": 8.99026950283543e-05, "loss": 12.0269, "step": 20062 }, { "epoch": 1.092510533444964, "grad_norm": 0.5034213710347741, "learning_rate": 8.989392187815587e-05, "loss": 11.9569, "step": 20063 }, { "epoch": 1.092564987441547, "grad_norm": 0.5252457182569386, "learning_rate": 8.988514880654402e-05, "loss": 12.0722, "step": 20064 }, { "epoch": 1.09261944143813, "grad_norm": 0.5485424380270981, "learning_rate": 8.987637581358693e-05, "loss": 12.1475, "step": 20065 }, { "epoch": 1.092673895434713, "grad_norm": 0.5563476844267424, "learning_rate": 8.986760289935285e-05, "loss": 12.0159, "step": 20066 }, { "epoch": 1.092728349431296, "grad_norm": 0.5586996576538787, "learning_rate": 8.985883006390999e-05, "loss": 12.0525, "step": 20067 }, { "epoch": 1.092782803427879, "grad_norm": 0.5824339016281458, "learning_rate": 8.985005730732658e-05, "loss": 12.0977, "step": 20068 }, { "epoch": 1.092837257424462, "grad_norm": 0.5784505890381453, "learning_rate": 8.984128462967081e-05, "loss": 12.0047, "step": 20069 }, { "epoch": 1.092891711421045, "grad_norm": 0.52087938675949, "learning_rate": 8.983251203101092e-05, "loss": 12.0008, "step": 20070 }, { "epoch": 1.092946165417628, "grad_norm": 0.6487274880530316, "learning_rate": 8.982373951141511e-05, "loss": 12.0405, "step": 20071 }, { "epoch": 1.0930006194142112, "grad_norm": 0.5389941667346818, "learning_rate": 8.981496707095162e-05, "loss": 12.1138, "step": 20072 }, { "epoch": 1.0930550734107942, "grad_norm": 0.5928294869034778, "learning_rate": 8.980619470968865e-05, "loss": 11.8825, "step": 20073 }, { "epoch": 1.0931095274073772, "grad_norm": 0.5458634722915138, "learning_rate": 8.97974224276944e-05, "loss": 11.9042, "step": 20074 }, { "epoch": 1.0931639814039602, "grad_norm": 0.5636647294429769, "learning_rate": 8.978865022503712e-05, "loss": 12.1298, "step": 20075 }, { "epoch": 1.0932184354005432, "grad_norm": 0.5938311159587132, "learning_rate": 8.9779878101785e-05, "loss": 12.1066, "step": 20076 }, { "epoch": 1.0932728893971262, "grad_norm": 0.5960768360565969, "learning_rate": 8.977110605800628e-05, "loss": 12.0255, "step": 20077 }, { "epoch": 1.0933273433937092, "grad_norm": 0.576698533575002, "learning_rate": 8.976233409376916e-05, "loss": 11.9549, "step": 20078 }, { "epoch": 1.0933817973902922, "grad_norm": 0.6343789106509875, "learning_rate": 8.975356220914184e-05, "loss": 11.9698, "step": 20079 }, { "epoch": 1.0934362513868752, "grad_norm": 0.5420024279986068, "learning_rate": 8.974479040419254e-05, "loss": 11.9848, "step": 20080 }, { "epoch": 1.0934907053834582, "grad_norm": 0.580584398832918, "learning_rate": 8.973601867898946e-05, "loss": 12.1149, "step": 20081 }, { "epoch": 1.0935451593800412, "grad_norm": 0.6080394022495313, "learning_rate": 8.972724703360083e-05, "loss": 12.0821, "step": 20082 }, { "epoch": 1.0935996133766244, "grad_norm": 0.6053854430232528, "learning_rate": 8.971847546809482e-05, "loss": 11.8476, "step": 20083 }, { "epoch": 1.0936540673732074, "grad_norm": 0.5395166590259115, "learning_rate": 8.970970398253971e-05, "loss": 12.0407, "step": 20084 }, { "epoch": 1.0937085213697904, "grad_norm": 0.5644145067321656, "learning_rate": 8.970093257700362e-05, "loss": 12.0285, "step": 20085 }, { "epoch": 1.0937629753663733, "grad_norm": 0.5903288855279883, "learning_rate": 8.969216125155483e-05, "loss": 11.9965, "step": 20086 }, { "epoch": 1.0938174293629563, "grad_norm": 0.5131401448105591, "learning_rate": 8.968339000626154e-05, "loss": 12.0213, "step": 20087 }, { "epoch": 1.0938718833595393, "grad_norm": 0.5098649236324263, "learning_rate": 8.967461884119191e-05, "loss": 12.0144, "step": 20088 }, { "epoch": 1.0939263373561223, "grad_norm": 0.6312531703837619, "learning_rate": 8.966584775641423e-05, "loss": 12.0738, "step": 20089 }, { "epoch": 1.0939807913527053, "grad_norm": 0.5581019802511759, "learning_rate": 8.96570767519966e-05, "loss": 11.9121, "step": 20090 }, { "epoch": 1.0940352453492883, "grad_norm": 0.5632930161470753, "learning_rate": 8.96483058280073e-05, "loss": 11.974, "step": 20091 }, { "epoch": 1.0940896993458713, "grad_norm": 0.6088835188583688, "learning_rate": 8.963953498451449e-05, "loss": 12.0442, "step": 20092 }, { "epoch": 1.0941441533424543, "grad_norm": 0.5602940826984238, "learning_rate": 8.963076422158641e-05, "loss": 12.0847, "step": 20093 }, { "epoch": 1.0941986073390373, "grad_norm": 0.5516614152175064, "learning_rate": 8.962199353929123e-05, "loss": 12.0685, "step": 20094 }, { "epoch": 1.0942530613356205, "grad_norm": 0.5263358727348857, "learning_rate": 8.961322293769718e-05, "loss": 11.9879, "step": 20095 }, { "epoch": 1.0943075153322035, "grad_norm": 0.5990840897682311, "learning_rate": 8.960445241687242e-05, "loss": 12.176, "step": 20096 }, { "epoch": 1.0943619693287865, "grad_norm": 0.6522974451215476, "learning_rate": 8.95956819768852e-05, "loss": 12.2024, "step": 20097 }, { "epoch": 1.0944164233253695, "grad_norm": 0.501404330370195, "learning_rate": 8.958691161780371e-05, "loss": 12.0772, "step": 20098 }, { "epoch": 1.0944708773219525, "grad_norm": 0.5666126899727636, "learning_rate": 8.95781413396962e-05, "loss": 11.9306, "step": 20099 }, { "epoch": 1.0945253313185355, "grad_norm": 0.5762612189338195, "learning_rate": 8.956937114263072e-05, "loss": 12.0253, "step": 20100 }, { "epoch": 1.0945797853151185, "grad_norm": 0.5554150959529307, "learning_rate": 8.956060102667559e-05, "loss": 12.0569, "step": 20101 }, { "epoch": 1.0946342393117015, "grad_norm": 0.5361180229130249, "learning_rate": 8.955183099189897e-05, "loss": 12.0235, "step": 20102 }, { "epoch": 1.0946886933082844, "grad_norm": 0.5243410510388125, "learning_rate": 8.954306103836908e-05, "loss": 11.9835, "step": 20103 }, { "epoch": 1.0947431473048674, "grad_norm": 0.5157913590340827, "learning_rate": 8.95342911661541e-05, "loss": 12.0156, "step": 20104 }, { "epoch": 1.0947976013014504, "grad_norm": 0.5460772186703869, "learning_rate": 8.952552137532222e-05, "loss": 12.1232, "step": 20105 }, { "epoch": 1.0948520552980336, "grad_norm": 0.685412340208087, "learning_rate": 8.951675166594165e-05, "loss": 12.0335, "step": 20106 }, { "epoch": 1.0949065092946166, "grad_norm": 0.6091407511085071, "learning_rate": 8.950798203808054e-05, "loss": 11.9644, "step": 20107 }, { "epoch": 1.0949609632911996, "grad_norm": 0.6472951916502481, "learning_rate": 8.949921249180715e-05, "loss": 12.1011, "step": 20108 }, { "epoch": 1.0950154172877826, "grad_norm": 0.5905049314827779, "learning_rate": 8.94904430271897e-05, "loss": 12.1145, "step": 20109 }, { "epoch": 1.0950698712843656, "grad_norm": 0.5224146984482988, "learning_rate": 8.948167364429628e-05, "loss": 12.0614, "step": 20110 }, { "epoch": 1.0951243252809486, "grad_norm": 0.619051476455258, "learning_rate": 8.947290434319508e-05, "loss": 12.1789, "step": 20111 }, { "epoch": 1.0951787792775316, "grad_norm": 0.5208124207888019, "learning_rate": 8.94641351239544e-05, "loss": 12.024, "step": 20112 }, { "epoch": 1.0952332332741146, "grad_norm": 0.5526687975045929, "learning_rate": 8.945536598664235e-05, "loss": 12.1497, "step": 20113 }, { "epoch": 1.0952876872706976, "grad_norm": 0.5252404891630289, "learning_rate": 8.944659693132715e-05, "loss": 12.0619, "step": 20114 }, { "epoch": 1.0953421412672806, "grad_norm": 0.5319056381621993, "learning_rate": 8.943782795807698e-05, "loss": 12.0097, "step": 20115 }, { "epoch": 1.0953965952638636, "grad_norm": 0.49652797801671555, "learning_rate": 8.942905906696004e-05, "loss": 11.9446, "step": 20116 }, { "epoch": 1.0954510492604466, "grad_norm": 0.5454351264879337, "learning_rate": 8.94202902580445e-05, "loss": 11.8903, "step": 20117 }, { "epoch": 1.0955055032570296, "grad_norm": 0.5506251747120456, "learning_rate": 8.941152153139854e-05, "loss": 12.1033, "step": 20118 }, { "epoch": 1.0955599572536128, "grad_norm": 0.5990370360918348, "learning_rate": 8.940275288709041e-05, "loss": 12.1675, "step": 20119 }, { "epoch": 1.0956144112501958, "grad_norm": 0.5337433802798721, "learning_rate": 8.939398432518823e-05, "loss": 12.0019, "step": 20120 }, { "epoch": 1.0956688652467788, "grad_norm": 0.5445019440378159, "learning_rate": 8.93852158457602e-05, "loss": 12.0999, "step": 20121 }, { "epoch": 1.0957233192433617, "grad_norm": 0.6067101126084533, "learning_rate": 8.937644744887451e-05, "loss": 12.1753, "step": 20122 }, { "epoch": 1.0957777732399447, "grad_norm": 0.6552684164311637, "learning_rate": 8.936767913459932e-05, "loss": 12.1914, "step": 20123 }, { "epoch": 1.0958322272365277, "grad_norm": 0.5696952036299877, "learning_rate": 8.935891090300288e-05, "loss": 12.0084, "step": 20124 }, { "epoch": 1.0958866812331107, "grad_norm": 0.5644830707317485, "learning_rate": 8.935014275415332e-05, "loss": 12.0161, "step": 20125 }, { "epoch": 1.0959411352296937, "grad_norm": 0.5801519224720506, "learning_rate": 8.934137468811883e-05, "loss": 12.0711, "step": 20126 }, { "epoch": 1.0959955892262767, "grad_norm": 0.5415007310440003, "learning_rate": 8.93326067049676e-05, "loss": 12.0049, "step": 20127 }, { "epoch": 1.0960500432228597, "grad_norm": 0.6284333256617635, "learning_rate": 8.932383880476782e-05, "loss": 12.0675, "step": 20128 }, { "epoch": 1.0961044972194427, "grad_norm": 0.620463081209787, "learning_rate": 8.931507098758768e-05, "loss": 12.1533, "step": 20129 }, { "epoch": 1.096158951216026, "grad_norm": 0.5116319195529034, "learning_rate": 8.930630325349531e-05, "loss": 12.0879, "step": 20130 }, { "epoch": 1.096213405212609, "grad_norm": 0.5817236198966054, "learning_rate": 8.929753560255892e-05, "loss": 11.973, "step": 20131 }, { "epoch": 1.096267859209192, "grad_norm": 0.5274798141762556, "learning_rate": 8.928876803484669e-05, "loss": 11.9422, "step": 20132 }, { "epoch": 1.096322313205775, "grad_norm": 0.5738998062283146, "learning_rate": 8.92800005504268e-05, "loss": 12.0354, "step": 20133 }, { "epoch": 1.0963767672023579, "grad_norm": 0.5315282150985224, "learning_rate": 8.92712331493674e-05, "loss": 11.9889, "step": 20134 }, { "epoch": 1.0964312211989409, "grad_norm": 0.5160836765505249, "learning_rate": 8.926246583173672e-05, "loss": 11.7833, "step": 20135 }, { "epoch": 1.0964856751955239, "grad_norm": 0.539779799869873, "learning_rate": 8.925369859760288e-05, "loss": 12.0612, "step": 20136 }, { "epoch": 1.0965401291921069, "grad_norm": 0.5651266964591511, "learning_rate": 8.924493144703411e-05, "loss": 12.0528, "step": 20137 }, { "epoch": 1.0965945831886899, "grad_norm": 0.606502159416733, "learning_rate": 8.923616438009855e-05, "loss": 12.1592, "step": 20138 }, { "epoch": 1.0966490371852728, "grad_norm": 0.5998599114540937, "learning_rate": 8.92273973968644e-05, "loss": 11.994, "step": 20139 }, { "epoch": 1.0967034911818558, "grad_norm": 0.5697227085835541, "learning_rate": 8.921863049739979e-05, "loss": 12.0578, "step": 20140 }, { "epoch": 1.0967579451784388, "grad_norm": 0.5481736629362859, "learning_rate": 8.920986368177292e-05, "loss": 12.0411, "step": 20141 }, { "epoch": 1.096812399175022, "grad_norm": 0.5980455811264401, "learning_rate": 8.920109695005195e-05, "loss": 12.0798, "step": 20142 }, { "epoch": 1.096866853171605, "grad_norm": 0.5560172118990516, "learning_rate": 8.919233030230507e-05, "loss": 12.1112, "step": 20143 }, { "epoch": 1.096921307168188, "grad_norm": 0.5615406718987744, "learning_rate": 8.918356373860044e-05, "loss": 11.9977, "step": 20144 }, { "epoch": 1.096975761164771, "grad_norm": 0.5818790254722687, "learning_rate": 8.917479725900622e-05, "loss": 12.1381, "step": 20145 }, { "epoch": 1.097030215161354, "grad_norm": 0.6037929523246859, "learning_rate": 8.91660308635906e-05, "loss": 12.0707, "step": 20146 }, { "epoch": 1.097084669157937, "grad_norm": 0.5516240368100374, "learning_rate": 8.915726455242174e-05, "loss": 12.0374, "step": 20147 }, { "epoch": 1.09713912315452, "grad_norm": 0.5310517505703545, "learning_rate": 8.914849832556782e-05, "loss": 12.1701, "step": 20148 }, { "epoch": 1.097193577151103, "grad_norm": 0.6207564108102965, "learning_rate": 8.913973218309702e-05, "loss": 12.1254, "step": 20149 }, { "epoch": 1.097248031147686, "grad_norm": 0.5907823547374166, "learning_rate": 8.913096612507745e-05, "loss": 12.0863, "step": 20150 }, { "epoch": 1.097302485144269, "grad_norm": 0.5575367026254822, "learning_rate": 8.912220015157732e-05, "loss": 11.9744, "step": 20151 }, { "epoch": 1.097356939140852, "grad_norm": 0.6751885967052378, "learning_rate": 8.911343426266478e-05, "loss": 12.0798, "step": 20152 }, { "epoch": 1.0974113931374352, "grad_norm": 0.5988546304632669, "learning_rate": 8.9104668458408e-05, "loss": 12.0581, "step": 20153 }, { "epoch": 1.0974658471340182, "grad_norm": 0.5312345113132421, "learning_rate": 8.909590273887516e-05, "loss": 12.0057, "step": 20154 }, { "epoch": 1.0975203011306012, "grad_norm": 0.6099930599315821, "learning_rate": 8.908713710413438e-05, "loss": 12.2494, "step": 20155 }, { "epoch": 1.0975747551271842, "grad_norm": 0.614170068332695, "learning_rate": 8.907837155425385e-05, "loss": 12.0462, "step": 20156 }, { "epoch": 1.0976292091237672, "grad_norm": 0.5822291739120788, "learning_rate": 8.906960608930176e-05, "loss": 12.0543, "step": 20157 }, { "epoch": 1.0976836631203502, "grad_norm": 0.5188234845750623, "learning_rate": 8.906084070934623e-05, "loss": 12.0245, "step": 20158 }, { "epoch": 1.0977381171169331, "grad_norm": 0.5011474986009845, "learning_rate": 8.905207541445551e-05, "loss": 12.0592, "step": 20159 }, { "epoch": 1.0977925711135161, "grad_norm": 0.6494712131666551, "learning_rate": 8.904331020469759e-05, "loss": 12.1785, "step": 20160 }, { "epoch": 1.0978470251100991, "grad_norm": 0.5733330915908963, "learning_rate": 8.903454508014076e-05, "loss": 12.0438, "step": 20161 }, { "epoch": 1.0979014791066821, "grad_norm": 0.5601787541031239, "learning_rate": 8.902578004085315e-05, "loss": 12.0481, "step": 20162 }, { "epoch": 1.0979559331032651, "grad_norm": 0.6770310032193882, "learning_rate": 8.901701508690291e-05, "loss": 12.0976, "step": 20163 }, { "epoch": 1.098010387099848, "grad_norm": 0.6271802627011732, "learning_rate": 8.900825021835821e-05, "loss": 12.1657, "step": 20164 }, { "epoch": 1.0980648410964313, "grad_norm": 0.5596518808132004, "learning_rate": 8.89994854352872e-05, "loss": 11.9854, "step": 20165 }, { "epoch": 1.0981192950930143, "grad_norm": 0.5020805826317156, "learning_rate": 8.899072073775802e-05, "loss": 11.9333, "step": 20166 }, { "epoch": 1.0981737490895973, "grad_norm": 0.6042882510332298, "learning_rate": 8.898195612583886e-05, "loss": 12.1209, "step": 20167 }, { "epoch": 1.0982282030861803, "grad_norm": 0.652798989112454, "learning_rate": 8.897319159959783e-05, "loss": 12.0498, "step": 20168 }, { "epoch": 1.0982826570827633, "grad_norm": 0.5933847046017819, "learning_rate": 8.896442715910317e-05, "loss": 12.1471, "step": 20169 }, { "epoch": 1.0983371110793463, "grad_norm": 0.602944215692327, "learning_rate": 8.895566280442294e-05, "loss": 11.9518, "step": 20170 }, { "epoch": 1.0983915650759293, "grad_norm": 0.5666090980810462, "learning_rate": 8.894689853562532e-05, "loss": 12.0612, "step": 20171 }, { "epoch": 1.0984460190725123, "grad_norm": 0.5955716178058724, "learning_rate": 8.893813435277845e-05, "loss": 12.0099, "step": 20172 }, { "epoch": 1.0985004730690953, "grad_norm": 0.630128554425275, "learning_rate": 8.892937025595053e-05, "loss": 12.0716, "step": 20173 }, { "epoch": 1.0985549270656783, "grad_norm": 0.56439065043191, "learning_rate": 8.892060624520968e-05, "loss": 11.9565, "step": 20174 }, { "epoch": 1.0986093810622612, "grad_norm": 0.5866824579384988, "learning_rate": 8.891184232062405e-05, "loss": 11.9924, "step": 20175 }, { "epoch": 1.0986638350588445, "grad_norm": 0.7665723124085866, "learning_rate": 8.89030784822618e-05, "loss": 12.2208, "step": 20176 }, { "epoch": 1.0987182890554275, "grad_norm": 0.6276742135752813, "learning_rate": 8.889431473019108e-05, "loss": 12.0264, "step": 20177 }, { "epoch": 1.0987727430520104, "grad_norm": 0.579773130352321, "learning_rate": 8.888555106448e-05, "loss": 12.1391, "step": 20178 }, { "epoch": 1.0988271970485934, "grad_norm": 0.5597768164792385, "learning_rate": 8.88767874851968e-05, "loss": 12.1392, "step": 20179 }, { "epoch": 1.0988816510451764, "grad_norm": 0.5509950255852855, "learning_rate": 8.886802399240952e-05, "loss": 11.9973, "step": 20180 }, { "epoch": 1.0989361050417594, "grad_norm": 0.5743486501236731, "learning_rate": 8.885926058618636e-05, "loss": 12.0458, "step": 20181 }, { "epoch": 1.0989905590383424, "grad_norm": 0.5308475307375142, "learning_rate": 8.885049726659546e-05, "loss": 11.9559, "step": 20182 }, { "epoch": 1.0990450130349254, "grad_norm": 0.5065825279705609, "learning_rate": 8.884173403370494e-05, "loss": 11.9881, "step": 20183 }, { "epoch": 1.0990994670315084, "grad_norm": 0.5652712077381974, "learning_rate": 8.883297088758298e-05, "loss": 12.0564, "step": 20184 }, { "epoch": 1.0991539210280914, "grad_norm": 0.5415480303899827, "learning_rate": 8.882420782829772e-05, "loss": 12.0772, "step": 20185 }, { "epoch": 1.0992083750246744, "grad_norm": 0.5352570980250407, "learning_rate": 8.881544485591729e-05, "loss": 12.0056, "step": 20186 }, { "epoch": 1.0992628290212574, "grad_norm": 0.5173298551932756, "learning_rate": 8.880668197050984e-05, "loss": 12.0798, "step": 20187 }, { "epoch": 1.0993172830178404, "grad_norm": 0.48717873604380874, "learning_rate": 8.87979191721435e-05, "loss": 12.0184, "step": 20188 }, { "epoch": 1.0993717370144236, "grad_norm": 0.5597341952123152, "learning_rate": 8.878915646088646e-05, "loss": 11.9295, "step": 20189 }, { "epoch": 1.0994261910110066, "grad_norm": 0.5615709176867464, "learning_rate": 8.878039383680678e-05, "loss": 12.1507, "step": 20190 }, { "epoch": 1.0994806450075896, "grad_norm": 0.5740671509788972, "learning_rate": 8.877163129997265e-05, "loss": 12.022, "step": 20191 }, { "epoch": 1.0995350990041726, "grad_norm": 0.5352406945605978, "learning_rate": 8.876286885045218e-05, "loss": 12.1071, "step": 20192 }, { "epoch": 1.0995895530007556, "grad_norm": 0.5373223953618459, "learning_rate": 8.875410648831355e-05, "loss": 11.9739, "step": 20193 }, { "epoch": 1.0996440069973386, "grad_norm": 0.5296057941726166, "learning_rate": 8.874534421362484e-05, "loss": 11.9395, "step": 20194 }, { "epoch": 1.0996984609939215, "grad_norm": 0.5577670315621532, "learning_rate": 8.873658202645424e-05, "loss": 11.9845, "step": 20195 }, { "epoch": 1.0997529149905045, "grad_norm": 0.5335575364798968, "learning_rate": 8.872781992686987e-05, "loss": 12.0861, "step": 20196 }, { "epoch": 1.0998073689870875, "grad_norm": 0.5508209498312792, "learning_rate": 8.871905791493987e-05, "loss": 11.9597, "step": 20197 }, { "epoch": 1.0998618229836705, "grad_norm": 0.6250421100736349, "learning_rate": 8.871029599073235e-05, "loss": 12.1618, "step": 20198 }, { "epoch": 1.0999162769802535, "grad_norm": 0.591381183464453, "learning_rate": 8.87015341543155e-05, "loss": 12.1627, "step": 20199 }, { "epoch": 1.0999707309768367, "grad_norm": 0.5787438375914769, "learning_rate": 8.869277240575738e-05, "loss": 12.0613, "step": 20200 }, { "epoch": 1.1000251849734197, "grad_norm": 0.5910180724776823, "learning_rate": 8.868401074512616e-05, "loss": 11.9308, "step": 20201 }, { "epoch": 1.1000796389700027, "grad_norm": 0.5395654718912095, "learning_rate": 8.867524917248999e-05, "loss": 12.0461, "step": 20202 }, { "epoch": 1.1001340929665857, "grad_norm": 0.5436651477781418, "learning_rate": 8.866648768791697e-05, "loss": 12.1008, "step": 20203 }, { "epoch": 1.1001885469631687, "grad_norm": 0.6733763075191992, "learning_rate": 8.865772629147523e-05, "loss": 11.9532, "step": 20204 }, { "epoch": 1.1002430009597517, "grad_norm": 0.5931005392577273, "learning_rate": 8.86489649832329e-05, "loss": 12.0734, "step": 20205 }, { "epoch": 1.1002974549563347, "grad_norm": 0.5793400162860026, "learning_rate": 8.864020376325814e-05, "loss": 12.0362, "step": 20206 }, { "epoch": 1.1003519089529177, "grad_norm": 0.5345822138575493, "learning_rate": 8.863144263161906e-05, "loss": 12.0126, "step": 20207 }, { "epoch": 1.1004063629495007, "grad_norm": 0.5150319586228077, "learning_rate": 8.86226815883838e-05, "loss": 12.0538, "step": 20208 }, { "epoch": 1.1004608169460837, "grad_norm": 0.5139144427714485, "learning_rate": 8.86139206336205e-05, "loss": 11.9446, "step": 20209 }, { "epoch": 1.1005152709426667, "grad_norm": 0.5159596684044622, "learning_rate": 8.860515976739722e-05, "loss": 12.0148, "step": 20210 }, { "epoch": 1.1005697249392496, "grad_norm": 0.5954957669807992, "learning_rate": 8.859639898978213e-05, "loss": 11.9208, "step": 20211 }, { "epoch": 1.1006241789358329, "grad_norm": 0.47558291302665023, "learning_rate": 8.858763830084338e-05, "loss": 12.0234, "step": 20212 }, { "epoch": 1.1006786329324159, "grad_norm": 0.5723861581364824, "learning_rate": 8.857887770064905e-05, "loss": 12.1122, "step": 20213 }, { "epoch": 1.1007330869289988, "grad_norm": 0.5300311766221824, "learning_rate": 8.857011718926728e-05, "loss": 11.9661, "step": 20214 }, { "epoch": 1.1007875409255818, "grad_norm": 0.4804128022172825, "learning_rate": 8.85613567667662e-05, "loss": 11.9646, "step": 20215 }, { "epoch": 1.1008419949221648, "grad_norm": 0.6411895825078691, "learning_rate": 8.855259643321391e-05, "loss": 12.0086, "step": 20216 }, { "epoch": 1.1008964489187478, "grad_norm": 0.5045659544266304, "learning_rate": 8.854383618867857e-05, "loss": 11.9535, "step": 20217 }, { "epoch": 1.1009509029153308, "grad_norm": 0.5546269037658237, "learning_rate": 8.853507603322828e-05, "loss": 12.0156, "step": 20218 }, { "epoch": 1.1010053569119138, "grad_norm": 0.5472239519449394, "learning_rate": 8.852631596693115e-05, "loss": 12.0827, "step": 20219 }, { "epoch": 1.1010598109084968, "grad_norm": 0.6151265780247128, "learning_rate": 8.851755598985537e-05, "loss": 12.1356, "step": 20220 }, { "epoch": 1.1011142649050798, "grad_norm": 0.5877863943068412, "learning_rate": 8.850879610206894e-05, "loss": 12.0763, "step": 20221 }, { "epoch": 1.1011687189016628, "grad_norm": 0.5374816326611753, "learning_rate": 8.850003630364005e-05, "loss": 11.9959, "step": 20222 }, { "epoch": 1.101223172898246, "grad_norm": 0.6347521513899049, "learning_rate": 8.849127659463679e-05, "loss": 11.9279, "step": 20223 }, { "epoch": 1.101277626894829, "grad_norm": 0.5497226963963572, "learning_rate": 8.848251697512732e-05, "loss": 11.9248, "step": 20224 }, { "epoch": 1.101332080891412, "grad_norm": 0.5671760158929101, "learning_rate": 8.847375744517972e-05, "loss": 11.9878, "step": 20225 }, { "epoch": 1.101386534887995, "grad_norm": 0.505449844315798, "learning_rate": 8.846499800486211e-05, "loss": 12.0726, "step": 20226 }, { "epoch": 1.101440988884578, "grad_norm": 0.5733129507632411, "learning_rate": 8.845623865424262e-05, "loss": 12.1238, "step": 20227 }, { "epoch": 1.101495442881161, "grad_norm": 0.5792222536553667, "learning_rate": 8.844747939338933e-05, "loss": 12.0608, "step": 20228 }, { "epoch": 1.101549896877744, "grad_norm": 0.5357805512260676, "learning_rate": 8.843872022237039e-05, "loss": 12.0475, "step": 20229 }, { "epoch": 1.101604350874327, "grad_norm": 0.5568764403483965, "learning_rate": 8.842996114125395e-05, "loss": 11.9596, "step": 20230 }, { "epoch": 1.10165880487091, "grad_norm": 0.5652795661893997, "learning_rate": 8.842120215010803e-05, "loss": 12.1226, "step": 20231 }, { "epoch": 1.101713258867493, "grad_norm": 0.5696300592102768, "learning_rate": 8.841244324900076e-05, "loss": 12.1162, "step": 20232 }, { "epoch": 1.101767712864076, "grad_norm": 0.6205151314076434, "learning_rate": 8.84036844380003e-05, "loss": 12.1801, "step": 20233 }, { "epoch": 1.101822166860659, "grad_norm": 0.5466304713585389, "learning_rate": 8.839492571717473e-05, "loss": 11.9161, "step": 20234 }, { "epoch": 1.1018766208572421, "grad_norm": 0.5140270883111085, "learning_rate": 8.838616708659217e-05, "loss": 12.0871, "step": 20235 }, { "epoch": 1.1019310748538251, "grad_norm": 0.5832650218878539, "learning_rate": 8.83774085463207e-05, "loss": 12.0507, "step": 20236 }, { "epoch": 1.1019855288504081, "grad_norm": 0.6361237961683173, "learning_rate": 8.836865009642848e-05, "loss": 12.1832, "step": 20237 }, { "epoch": 1.1020399828469911, "grad_norm": 0.5433931408566341, "learning_rate": 8.835989173698358e-05, "loss": 12.112, "step": 20238 }, { "epoch": 1.102094436843574, "grad_norm": 0.531281605006899, "learning_rate": 8.835113346805408e-05, "loss": 12.1103, "step": 20239 }, { "epoch": 1.102148890840157, "grad_norm": 0.547987291454372, "learning_rate": 8.83423752897082e-05, "loss": 12.0016, "step": 20240 }, { "epoch": 1.10220334483674, "grad_norm": 0.6196430697296458, "learning_rate": 8.833361720201391e-05, "loss": 12.106, "step": 20241 }, { "epoch": 1.102257798833323, "grad_norm": 0.5075520452259732, "learning_rate": 8.832485920503937e-05, "loss": 12.064, "step": 20242 }, { "epoch": 1.102312252829906, "grad_norm": 0.573113397993567, "learning_rate": 8.831610129885266e-05, "loss": 12.1014, "step": 20243 }, { "epoch": 1.102366706826489, "grad_norm": 0.5467387171808178, "learning_rate": 8.830734348352195e-05, "loss": 12.0631, "step": 20244 }, { "epoch": 1.102421160823072, "grad_norm": 0.5386865259364804, "learning_rate": 8.829858575911527e-05, "loss": 12.0966, "step": 20245 }, { "epoch": 1.1024756148196553, "grad_norm": 0.5252737553681605, "learning_rate": 8.828982812570075e-05, "loss": 11.9106, "step": 20246 }, { "epoch": 1.1025300688162383, "grad_norm": 0.48756803933840664, "learning_rate": 8.82810705833465e-05, "loss": 11.9954, "step": 20247 }, { "epoch": 1.1025845228128213, "grad_norm": 0.5584157079026932, "learning_rate": 8.827231313212061e-05, "loss": 12.0073, "step": 20248 }, { "epoch": 1.1026389768094043, "grad_norm": 0.5613385083370157, "learning_rate": 8.826355577209118e-05, "loss": 12.0158, "step": 20249 }, { "epoch": 1.1026934308059873, "grad_norm": 0.5457529803685589, "learning_rate": 8.825479850332633e-05, "loss": 12.097, "step": 20250 }, { "epoch": 1.1027478848025702, "grad_norm": 0.567677049333743, "learning_rate": 8.824604132589412e-05, "loss": 12.0287, "step": 20251 }, { "epoch": 1.1028023387991532, "grad_norm": 0.4833128160398218, "learning_rate": 8.823728423986266e-05, "loss": 12.0636, "step": 20252 }, { "epoch": 1.1028567927957362, "grad_norm": 0.573142902524311, "learning_rate": 8.822852724530004e-05, "loss": 11.9064, "step": 20253 }, { "epoch": 1.1029112467923192, "grad_norm": 0.5058465021018069, "learning_rate": 8.821977034227435e-05, "loss": 12.0118, "step": 20254 }, { "epoch": 1.1029657007889022, "grad_norm": 0.5663852923558932, "learning_rate": 8.821101353085374e-05, "loss": 12.1247, "step": 20255 }, { "epoch": 1.1030201547854852, "grad_norm": 0.6070677912259818, "learning_rate": 8.820225681110624e-05, "loss": 12.1287, "step": 20256 }, { "epoch": 1.1030746087820682, "grad_norm": 0.541915999624938, "learning_rate": 8.819350018309999e-05, "loss": 12.1196, "step": 20257 }, { "epoch": 1.1031290627786514, "grad_norm": 0.5319353360682426, "learning_rate": 8.818474364690306e-05, "loss": 11.7641, "step": 20258 }, { "epoch": 1.1031835167752344, "grad_norm": 0.5757522620881451, "learning_rate": 8.817598720258353e-05, "loss": 11.9708, "step": 20259 }, { "epoch": 1.1032379707718174, "grad_norm": 0.6081238647231033, "learning_rate": 8.816723085020954e-05, "loss": 12.0519, "step": 20260 }, { "epoch": 1.1032924247684004, "grad_norm": 0.5427343785399987, "learning_rate": 8.815847458984911e-05, "loss": 12.0829, "step": 20261 }, { "epoch": 1.1033468787649834, "grad_norm": 0.5652734711236881, "learning_rate": 8.814971842157039e-05, "loss": 12.0157, "step": 20262 }, { "epoch": 1.1034013327615664, "grad_norm": 0.5534906612787915, "learning_rate": 8.814096234544143e-05, "loss": 12.0162, "step": 20263 }, { "epoch": 1.1034557867581494, "grad_norm": 0.5036252440792517, "learning_rate": 8.813220636153035e-05, "loss": 12.009, "step": 20264 }, { "epoch": 1.1035102407547324, "grad_norm": 0.5257087999044628, "learning_rate": 8.812345046990519e-05, "loss": 11.9534, "step": 20265 }, { "epoch": 1.1035646947513154, "grad_norm": 0.49793946160611324, "learning_rate": 8.81146946706341e-05, "loss": 12.03, "step": 20266 }, { "epoch": 1.1036191487478983, "grad_norm": 0.5546572372244591, "learning_rate": 8.810593896378513e-05, "loss": 11.9098, "step": 20267 }, { "epoch": 1.1036736027444813, "grad_norm": 0.5424649818390285, "learning_rate": 8.809718334942639e-05, "loss": 11.9215, "step": 20268 }, { "epoch": 1.1037280567410646, "grad_norm": 0.5866022695322437, "learning_rate": 8.808842782762592e-05, "loss": 12.0288, "step": 20269 }, { "epoch": 1.1037825107376475, "grad_norm": 0.5200616494258773, "learning_rate": 8.807967239845187e-05, "loss": 12.028, "step": 20270 }, { "epoch": 1.1038369647342305, "grad_norm": 0.471892641538079, "learning_rate": 8.807091706197228e-05, "loss": 11.9984, "step": 20271 }, { "epoch": 1.1038914187308135, "grad_norm": 0.5069317963591853, "learning_rate": 8.806216181825522e-05, "loss": 12.0616, "step": 20272 }, { "epoch": 1.1039458727273965, "grad_norm": 0.5181631910018965, "learning_rate": 8.805340666736878e-05, "loss": 12.1398, "step": 20273 }, { "epoch": 1.1040003267239795, "grad_norm": 0.5026001587635697, "learning_rate": 8.804465160938108e-05, "loss": 12.0038, "step": 20274 }, { "epoch": 1.1040547807205625, "grad_norm": 0.5208749547424139, "learning_rate": 8.803589664436017e-05, "loss": 11.9452, "step": 20275 }, { "epoch": 1.1041092347171455, "grad_norm": 0.5329006436594417, "learning_rate": 8.802714177237412e-05, "loss": 12.0868, "step": 20276 }, { "epoch": 1.1041636887137285, "grad_norm": 0.6238998929891365, "learning_rate": 8.801838699349101e-05, "loss": 12.08, "step": 20277 }, { "epoch": 1.1042181427103115, "grad_norm": 0.5713533790144977, "learning_rate": 8.800963230777896e-05, "loss": 12.0738, "step": 20278 }, { "epoch": 1.1042725967068945, "grad_norm": 0.5067501866499987, "learning_rate": 8.8000877715306e-05, "loss": 12.1064, "step": 20279 }, { "epoch": 1.1043270507034775, "grad_norm": 0.5696655009372665, "learning_rate": 8.799212321614029e-05, "loss": 12.0327, "step": 20280 }, { "epoch": 1.1043815047000605, "grad_norm": 0.5907525642848902, "learning_rate": 8.798336881034976e-05, "loss": 11.9661, "step": 20281 }, { "epoch": 1.1044359586966437, "grad_norm": 0.5029806148791618, "learning_rate": 8.797461449800262e-05, "loss": 12.0543, "step": 20282 }, { "epoch": 1.1044904126932267, "grad_norm": 0.5740037503429757, "learning_rate": 8.796586027916686e-05, "loss": 12.0909, "step": 20283 }, { "epoch": 1.1045448666898097, "grad_norm": 0.5359098765190763, "learning_rate": 8.795710615391061e-05, "loss": 12.0901, "step": 20284 }, { "epoch": 1.1045993206863927, "grad_norm": 0.5381826336912698, "learning_rate": 8.794835212230193e-05, "loss": 11.9385, "step": 20285 }, { "epoch": 1.1046537746829757, "grad_norm": 0.5164361531574544, "learning_rate": 8.793959818440887e-05, "loss": 11.9999, "step": 20286 }, { "epoch": 1.1047082286795586, "grad_norm": 0.4952044177832422, "learning_rate": 8.793084434029952e-05, "loss": 11.934, "step": 20287 }, { "epoch": 1.1047626826761416, "grad_norm": 0.5353442979609403, "learning_rate": 8.792209059004193e-05, "loss": 11.9958, "step": 20288 }, { "epoch": 1.1048171366727246, "grad_norm": 0.5198804593022757, "learning_rate": 8.79133369337042e-05, "loss": 11.8735, "step": 20289 }, { "epoch": 1.1048715906693076, "grad_norm": 0.5539164060775061, "learning_rate": 8.790458337135444e-05, "loss": 11.9442, "step": 20290 }, { "epoch": 1.1049260446658906, "grad_norm": 0.5149487649125211, "learning_rate": 8.789582990306062e-05, "loss": 12.0169, "step": 20291 }, { "epoch": 1.1049804986624736, "grad_norm": 0.5593324766953811, "learning_rate": 8.788707652889084e-05, "loss": 12.0518, "step": 20292 }, { "epoch": 1.1050349526590568, "grad_norm": 0.5223788666070568, "learning_rate": 8.78783232489132e-05, "loss": 12.0239, "step": 20293 }, { "epoch": 1.1050894066556398, "grad_norm": 0.6409900434279258, "learning_rate": 8.786957006319577e-05, "loss": 11.9321, "step": 20294 }, { "epoch": 1.1051438606522228, "grad_norm": 0.5991656646274143, "learning_rate": 8.786081697180659e-05, "loss": 12.1513, "step": 20295 }, { "epoch": 1.1051983146488058, "grad_norm": 0.5437244510373981, "learning_rate": 8.785206397481371e-05, "loss": 11.9969, "step": 20296 }, { "epoch": 1.1052527686453888, "grad_norm": 0.5410626905453222, "learning_rate": 8.784331107228525e-05, "loss": 12.117, "step": 20297 }, { "epoch": 1.1053072226419718, "grad_norm": 0.5280928701320662, "learning_rate": 8.783455826428921e-05, "loss": 12.0798, "step": 20298 }, { "epoch": 1.1053616766385548, "grad_norm": 0.5852564727198065, "learning_rate": 8.782580555089368e-05, "loss": 11.9185, "step": 20299 }, { "epoch": 1.1054161306351378, "grad_norm": 0.5640739042641488, "learning_rate": 8.78170529321668e-05, "loss": 12.045, "step": 20300 }, { "epoch": 1.1054705846317208, "grad_norm": 0.5711766805877603, "learning_rate": 8.780830040817651e-05, "loss": 12.0574, "step": 20301 }, { "epoch": 1.1055250386283038, "grad_norm": 0.5487375604738145, "learning_rate": 8.779954797899091e-05, "loss": 12.0689, "step": 20302 }, { "epoch": 1.1055794926248868, "grad_norm": 0.5463485657321694, "learning_rate": 8.779079564467807e-05, "loss": 12.0732, "step": 20303 }, { "epoch": 1.1056339466214697, "grad_norm": 0.5958010094516646, "learning_rate": 8.778204340530606e-05, "loss": 12.14, "step": 20304 }, { "epoch": 1.105688400618053, "grad_norm": 0.555501224415568, "learning_rate": 8.777329126094292e-05, "loss": 11.9945, "step": 20305 }, { "epoch": 1.105742854614636, "grad_norm": 0.5719709735434838, "learning_rate": 8.776453921165674e-05, "loss": 12.0651, "step": 20306 }, { "epoch": 1.105797308611219, "grad_norm": 0.5703447668093605, "learning_rate": 8.775578725751553e-05, "loss": 11.9732, "step": 20307 }, { "epoch": 1.105851762607802, "grad_norm": 0.584597127790104, "learning_rate": 8.77470353985874e-05, "loss": 12.0707, "step": 20308 }, { "epoch": 1.105906216604385, "grad_norm": 0.5099775442129576, "learning_rate": 8.773828363494036e-05, "loss": 11.9749, "step": 20309 }, { "epoch": 1.105960670600968, "grad_norm": 0.5249223983386117, "learning_rate": 8.77295319666425e-05, "loss": 11.933, "step": 20310 }, { "epoch": 1.106015124597551, "grad_norm": 0.5734136019444562, "learning_rate": 8.772078039376184e-05, "loss": 11.9393, "step": 20311 }, { "epoch": 1.106069578594134, "grad_norm": 0.6383360418108145, "learning_rate": 8.771202891636646e-05, "loss": 12.2153, "step": 20312 }, { "epoch": 1.106124032590717, "grad_norm": 0.564871378026721, "learning_rate": 8.770327753452438e-05, "loss": 12.0592, "step": 20313 }, { "epoch": 1.1061784865873, "grad_norm": 0.5003922451111757, "learning_rate": 8.769452624830367e-05, "loss": 11.9846, "step": 20314 }, { "epoch": 1.1062329405838829, "grad_norm": 0.6299781577491396, "learning_rate": 8.768577505777242e-05, "loss": 11.9441, "step": 20315 }, { "epoch": 1.106287394580466, "grad_norm": 0.6763607365341504, "learning_rate": 8.767702396299864e-05, "loss": 12.0107, "step": 20316 }, { "epoch": 1.106341848577049, "grad_norm": 0.5623560295924026, "learning_rate": 8.766827296405039e-05, "loss": 12.0481, "step": 20317 }, { "epoch": 1.106396302573632, "grad_norm": 0.5965173681242213, "learning_rate": 8.765952206099572e-05, "loss": 12.0934, "step": 20318 }, { "epoch": 1.106450756570215, "grad_norm": 0.5279141459995098, "learning_rate": 8.765077125390266e-05, "loss": 12.0243, "step": 20319 }, { "epoch": 1.106505210566798, "grad_norm": 0.5381397115637279, "learning_rate": 8.76420205428393e-05, "loss": 12.0341, "step": 20320 }, { "epoch": 1.106559664563381, "grad_norm": 0.5484374520949024, "learning_rate": 8.763326992787365e-05, "loss": 11.9527, "step": 20321 }, { "epoch": 1.106614118559964, "grad_norm": 0.5771416993957864, "learning_rate": 8.762451940907376e-05, "loss": 12.0113, "step": 20322 }, { "epoch": 1.106668572556547, "grad_norm": 0.5563674089393889, "learning_rate": 8.761576898650768e-05, "loss": 11.9658, "step": 20323 }, { "epoch": 1.10672302655313, "grad_norm": 0.5536476891827287, "learning_rate": 8.760701866024347e-05, "loss": 12.0788, "step": 20324 }, { "epoch": 1.106777480549713, "grad_norm": 0.5891056273868874, "learning_rate": 8.759826843034915e-05, "loss": 12.0831, "step": 20325 }, { "epoch": 1.106831934546296, "grad_norm": 0.599718056726197, "learning_rate": 8.758951829689275e-05, "loss": 12.0573, "step": 20326 }, { "epoch": 1.106886388542879, "grad_norm": 0.5371038737374279, "learning_rate": 8.758076825994237e-05, "loss": 12.0321, "step": 20327 }, { "epoch": 1.1069408425394622, "grad_norm": 0.6004552767530613, "learning_rate": 8.7572018319566e-05, "loss": 12.0596, "step": 20328 }, { "epoch": 1.1069952965360452, "grad_norm": 0.5865194360324422, "learning_rate": 8.756326847583171e-05, "loss": 12.0869, "step": 20329 }, { "epoch": 1.1070497505326282, "grad_norm": 0.5048031034073145, "learning_rate": 8.755451872880757e-05, "loss": 12.0092, "step": 20330 }, { "epoch": 1.1071042045292112, "grad_norm": 0.5322675844630659, "learning_rate": 8.754576907856154e-05, "loss": 12.0208, "step": 20331 }, { "epoch": 1.1071586585257942, "grad_norm": 0.5441646557030124, "learning_rate": 8.753701952516169e-05, "loss": 12.0182, "step": 20332 }, { "epoch": 1.1072131125223772, "grad_norm": 0.5608449926792899, "learning_rate": 8.752827006867607e-05, "loss": 11.9662, "step": 20333 }, { "epoch": 1.1072675665189602, "grad_norm": 0.5723882401594803, "learning_rate": 8.751952070917273e-05, "loss": 11.997, "step": 20334 }, { "epoch": 1.1073220205155432, "grad_norm": 0.5455646117400574, "learning_rate": 8.751077144671968e-05, "loss": 11.9964, "step": 20335 }, { "epoch": 1.1073764745121262, "grad_norm": 0.5869649085404693, "learning_rate": 8.750202228138497e-05, "loss": 12.0137, "step": 20336 }, { "epoch": 1.1074309285087092, "grad_norm": 0.4929235067216394, "learning_rate": 8.749327321323659e-05, "loss": 11.9886, "step": 20337 }, { "epoch": 1.1074853825052922, "grad_norm": 0.542015727952765, "learning_rate": 8.748452424234266e-05, "loss": 12.0229, "step": 20338 }, { "epoch": 1.1075398365018754, "grad_norm": 0.5877668144850737, "learning_rate": 8.747577536877117e-05, "loss": 11.9163, "step": 20339 }, { "epoch": 1.1075942904984584, "grad_norm": 0.5608190697937362, "learning_rate": 8.746702659259017e-05, "loss": 12.0286, "step": 20340 }, { "epoch": 1.1076487444950414, "grad_norm": 0.513750153259443, "learning_rate": 8.745827791386762e-05, "loss": 12.0426, "step": 20341 }, { "epoch": 1.1077031984916244, "grad_norm": 0.5838775241989291, "learning_rate": 8.744952933267163e-05, "loss": 11.9967, "step": 20342 }, { "epoch": 1.1077576524882073, "grad_norm": 0.5905911772599802, "learning_rate": 8.744078084907021e-05, "loss": 12.1397, "step": 20343 }, { "epoch": 1.1078121064847903, "grad_norm": 0.49872395232447336, "learning_rate": 8.743203246313136e-05, "loss": 11.8628, "step": 20344 }, { "epoch": 1.1078665604813733, "grad_norm": 0.5638960798286776, "learning_rate": 8.742328417492316e-05, "loss": 12.0169, "step": 20345 }, { "epoch": 1.1079210144779563, "grad_norm": 0.5880102201495002, "learning_rate": 8.74145359845136e-05, "loss": 12.0852, "step": 20346 }, { "epoch": 1.1079754684745393, "grad_norm": 0.5782451800864982, "learning_rate": 8.740578789197071e-05, "loss": 12.0772, "step": 20347 }, { "epoch": 1.1080299224711223, "grad_norm": 0.5032096969628987, "learning_rate": 8.739703989736252e-05, "loss": 11.8333, "step": 20348 }, { "epoch": 1.1080843764677053, "grad_norm": 0.5392127032883857, "learning_rate": 8.738829200075707e-05, "loss": 12.1177, "step": 20349 }, { "epoch": 1.1081388304642883, "grad_norm": 0.5569545293518601, "learning_rate": 8.737954420222243e-05, "loss": 11.9352, "step": 20350 }, { "epoch": 1.1081932844608713, "grad_norm": 0.5571004661020185, "learning_rate": 8.737079650182653e-05, "loss": 12.1027, "step": 20351 }, { "epoch": 1.1082477384574545, "grad_norm": 0.5202757639842748, "learning_rate": 8.73620488996374e-05, "loss": 11.9588, "step": 20352 }, { "epoch": 1.1083021924540375, "grad_norm": 0.5326218707584636, "learning_rate": 8.735330139572312e-05, "loss": 12.1588, "step": 20353 }, { "epoch": 1.1083566464506205, "grad_norm": 0.581581715781423, "learning_rate": 8.73445539901517e-05, "loss": 12.0124, "step": 20354 }, { "epoch": 1.1084111004472035, "grad_norm": 0.5643813781203146, "learning_rate": 8.733580668299113e-05, "loss": 12.0239, "step": 20355 }, { "epoch": 1.1084655544437865, "grad_norm": 0.4929896961613901, "learning_rate": 8.732705947430948e-05, "loss": 11.9148, "step": 20356 }, { "epoch": 1.1085200084403695, "grad_norm": 0.5223543752886799, "learning_rate": 8.731831236417472e-05, "loss": 11.9633, "step": 20357 }, { "epoch": 1.1085744624369525, "grad_norm": 0.5729833090573728, "learning_rate": 8.73095653526549e-05, "loss": 12.0722, "step": 20358 }, { "epoch": 1.1086289164335354, "grad_norm": 0.5251201813942938, "learning_rate": 8.7300818439818e-05, "loss": 12.0149, "step": 20359 }, { "epoch": 1.1086833704301184, "grad_norm": 0.558311215109325, "learning_rate": 8.729207162573214e-05, "loss": 11.9108, "step": 20360 }, { "epoch": 1.1087378244267014, "grad_norm": 0.5365779165147309, "learning_rate": 8.72833249104652e-05, "loss": 12.0024, "step": 20361 }, { "epoch": 1.1087922784232844, "grad_norm": 0.5151686326710411, "learning_rate": 8.727457829408527e-05, "loss": 11.9575, "step": 20362 }, { "epoch": 1.1088467324198676, "grad_norm": 0.576146484496607, "learning_rate": 8.726583177666034e-05, "loss": 12.146, "step": 20363 }, { "epoch": 1.1089011864164506, "grad_norm": 0.5604576831869068, "learning_rate": 8.725708535825845e-05, "loss": 12.0227, "step": 20364 }, { "epoch": 1.1089556404130336, "grad_norm": 0.5488182443925755, "learning_rate": 8.724833903894761e-05, "loss": 12.0595, "step": 20365 }, { "epoch": 1.1090100944096166, "grad_norm": 0.5702859955842696, "learning_rate": 8.72395928187958e-05, "loss": 12.0216, "step": 20366 }, { "epoch": 1.1090645484061996, "grad_norm": 0.630035859859341, "learning_rate": 8.723084669787107e-05, "loss": 12.0802, "step": 20367 }, { "epoch": 1.1091190024027826, "grad_norm": 0.5346963959972917, "learning_rate": 8.722210067624143e-05, "loss": 12.0565, "step": 20368 }, { "epoch": 1.1091734563993656, "grad_norm": 0.5638355168601141, "learning_rate": 8.721335475397486e-05, "loss": 12.0975, "step": 20369 }, { "epoch": 1.1092279103959486, "grad_norm": 0.544604177941654, "learning_rate": 8.72046089311394e-05, "loss": 11.9492, "step": 20370 }, { "epoch": 1.1092823643925316, "grad_norm": 0.5112490088018798, "learning_rate": 8.719586320780307e-05, "loss": 11.96, "step": 20371 }, { "epoch": 1.1093368183891146, "grad_norm": 0.5328271332370599, "learning_rate": 8.718711758403382e-05, "loss": 11.9347, "step": 20372 }, { "epoch": 1.1093912723856976, "grad_norm": 0.5674785663412966, "learning_rate": 8.717837205989969e-05, "loss": 11.9964, "step": 20373 }, { "epoch": 1.1094457263822806, "grad_norm": 0.5953098610929797, "learning_rate": 8.71696266354687e-05, "loss": 12.0257, "step": 20374 }, { "epoch": 1.1095001803788638, "grad_norm": 0.5529267917614739, "learning_rate": 8.716088131080882e-05, "loss": 11.9219, "step": 20375 }, { "epoch": 1.1095546343754468, "grad_norm": 0.612605982175686, "learning_rate": 8.715213608598811e-05, "loss": 12.0591, "step": 20376 }, { "epoch": 1.1096090883720298, "grad_norm": 0.6502316142012194, "learning_rate": 8.714339096107454e-05, "loss": 12.023, "step": 20377 }, { "epoch": 1.1096635423686128, "grad_norm": 0.5152773212699467, "learning_rate": 8.713464593613612e-05, "loss": 11.94, "step": 20378 }, { "epoch": 1.1097179963651957, "grad_norm": 0.5519647029298497, "learning_rate": 8.712590101124084e-05, "loss": 12.0978, "step": 20379 }, { "epoch": 1.1097724503617787, "grad_norm": 0.5142008477824004, "learning_rate": 8.711715618645671e-05, "loss": 12.1463, "step": 20380 }, { "epoch": 1.1098269043583617, "grad_norm": 0.5431164123937956, "learning_rate": 8.710841146185177e-05, "loss": 12.0085, "step": 20381 }, { "epoch": 1.1098813583549447, "grad_norm": 0.5465098198668166, "learning_rate": 8.709966683749396e-05, "loss": 11.8521, "step": 20382 }, { "epoch": 1.1099358123515277, "grad_norm": 0.5771607951376807, "learning_rate": 8.70909223134513e-05, "loss": 12.0074, "step": 20383 }, { "epoch": 1.1099902663481107, "grad_norm": 0.5732513893159404, "learning_rate": 8.70821778897918e-05, "loss": 12.0577, "step": 20384 }, { "epoch": 1.1100447203446937, "grad_norm": 0.5687566895392488, "learning_rate": 8.707343356658344e-05, "loss": 12.0488, "step": 20385 }, { "epoch": 1.110099174341277, "grad_norm": 0.5436380291903499, "learning_rate": 8.706468934389421e-05, "loss": 11.9659, "step": 20386 }, { "epoch": 1.11015362833786, "grad_norm": 0.5525146804435588, "learning_rate": 8.705594522179214e-05, "loss": 12.0789, "step": 20387 }, { "epoch": 1.110208082334443, "grad_norm": 0.5607722918179316, "learning_rate": 8.704720120034523e-05, "loss": 12.0246, "step": 20388 }, { "epoch": 1.110262536331026, "grad_norm": 0.5643884542944015, "learning_rate": 8.703845727962144e-05, "loss": 12.0696, "step": 20389 }, { "epoch": 1.1103169903276089, "grad_norm": 0.5559613977033397, "learning_rate": 8.702971345968879e-05, "loss": 12.0368, "step": 20390 }, { "epoch": 1.1103714443241919, "grad_norm": 0.5926001588421552, "learning_rate": 8.702096974061527e-05, "loss": 11.9943, "step": 20391 }, { "epoch": 1.1104258983207749, "grad_norm": 0.5782246297613625, "learning_rate": 8.701222612246887e-05, "loss": 12.0798, "step": 20392 }, { "epoch": 1.1104803523173579, "grad_norm": 0.5650841524659677, "learning_rate": 8.700348260531756e-05, "loss": 12.1178, "step": 20393 }, { "epoch": 1.1105348063139409, "grad_norm": 0.527666922247384, "learning_rate": 8.699473918922934e-05, "loss": 11.9583, "step": 20394 }, { "epoch": 1.1105892603105239, "grad_norm": 0.4831398814838757, "learning_rate": 8.698599587427223e-05, "loss": 12.0063, "step": 20395 }, { "epoch": 1.1106437143071068, "grad_norm": 0.6126566647506558, "learning_rate": 8.697725266051419e-05, "loss": 12.1569, "step": 20396 }, { "epoch": 1.1106981683036898, "grad_norm": 0.6469344452197978, "learning_rate": 8.696850954802319e-05, "loss": 12.0177, "step": 20397 }, { "epoch": 1.110752622300273, "grad_norm": 0.5665602869832791, "learning_rate": 8.695976653686726e-05, "loss": 12.167, "step": 20398 }, { "epoch": 1.110807076296856, "grad_norm": 0.5185074852028833, "learning_rate": 8.695102362711439e-05, "loss": 12.0196, "step": 20399 }, { "epoch": 1.110861530293439, "grad_norm": 0.5260885913422563, "learning_rate": 8.694228081883254e-05, "loss": 12.0168, "step": 20400 }, { "epoch": 1.110915984290022, "grad_norm": 0.6708326897002365, "learning_rate": 8.693353811208973e-05, "loss": 12.0019, "step": 20401 }, { "epoch": 1.110970438286605, "grad_norm": 0.5639695878205355, "learning_rate": 8.69247955069539e-05, "loss": 12.1039, "step": 20402 }, { "epoch": 1.111024892283188, "grad_norm": 0.5449293478224777, "learning_rate": 8.691605300349304e-05, "loss": 12.0094, "step": 20403 }, { "epoch": 1.111079346279771, "grad_norm": 0.5740721272416982, "learning_rate": 8.690731060177515e-05, "loss": 12.112, "step": 20404 }, { "epoch": 1.111133800276354, "grad_norm": 0.4930037984290305, "learning_rate": 8.68985683018682e-05, "loss": 12.0775, "step": 20405 }, { "epoch": 1.111188254272937, "grad_norm": 0.6027153909312933, "learning_rate": 8.68898261038402e-05, "loss": 11.9441, "step": 20406 }, { "epoch": 1.11124270826952, "grad_norm": 0.5260394731253033, "learning_rate": 8.68810840077591e-05, "loss": 11.9547, "step": 20407 }, { "epoch": 1.111297162266103, "grad_norm": 0.604684947929935, "learning_rate": 8.687234201369287e-05, "loss": 12.1559, "step": 20408 }, { "epoch": 1.1113516162626862, "grad_norm": 0.5853957597714421, "learning_rate": 8.686360012170954e-05, "loss": 11.8884, "step": 20409 }, { "epoch": 1.1114060702592692, "grad_norm": 0.5204564010716333, "learning_rate": 8.685485833187706e-05, "loss": 12.0701, "step": 20410 }, { "epoch": 1.1114605242558522, "grad_norm": 0.554571373235644, "learning_rate": 8.684611664426344e-05, "loss": 12.0565, "step": 20411 }, { "epoch": 1.1115149782524352, "grad_norm": 0.49278774796167696, "learning_rate": 8.683737505893655e-05, "loss": 11.9681, "step": 20412 }, { "epoch": 1.1115694322490182, "grad_norm": 0.5799618073088181, "learning_rate": 8.682863357596447e-05, "loss": 11.8597, "step": 20413 }, { "epoch": 1.1116238862456012, "grad_norm": 0.6174819718064491, "learning_rate": 8.681989219541516e-05, "loss": 12.0611, "step": 20414 }, { "epoch": 1.1116783402421841, "grad_norm": 0.5563880126339784, "learning_rate": 8.681115091735654e-05, "loss": 12.0307, "step": 20415 }, { "epoch": 1.1117327942387671, "grad_norm": 0.5428688058215628, "learning_rate": 8.680240974185665e-05, "loss": 11.9264, "step": 20416 }, { "epoch": 1.1117872482353501, "grad_norm": 0.6190313780390646, "learning_rate": 8.679366866898343e-05, "loss": 12.1029, "step": 20417 }, { "epoch": 1.1118417022319331, "grad_norm": 0.5624661924655168, "learning_rate": 8.678492769880486e-05, "loss": 12.0339, "step": 20418 }, { "epoch": 1.1118961562285161, "grad_norm": 0.5296270992929764, "learning_rate": 8.677618683138889e-05, "loss": 12.0566, "step": 20419 }, { "epoch": 1.1119506102250991, "grad_norm": 0.6563129787591534, "learning_rate": 8.676744606680352e-05, "loss": 12.0153, "step": 20420 }, { "epoch": 1.112005064221682, "grad_norm": 0.47750180191668606, "learning_rate": 8.675870540511675e-05, "loss": 12.0877, "step": 20421 }, { "epoch": 1.1120595182182653, "grad_norm": 0.5971189211479879, "learning_rate": 8.674996484639647e-05, "loss": 12.052, "step": 20422 }, { "epoch": 1.1121139722148483, "grad_norm": 0.5647061566072635, "learning_rate": 8.674122439071069e-05, "loss": 11.9557, "step": 20423 }, { "epoch": 1.1121684262114313, "grad_norm": 0.5718603926874921, "learning_rate": 8.673248403812735e-05, "loss": 12.0426, "step": 20424 }, { "epoch": 1.1122228802080143, "grad_norm": 0.5862622020485386, "learning_rate": 8.672374378871445e-05, "loss": 11.9336, "step": 20425 }, { "epoch": 1.1122773342045973, "grad_norm": 0.5319568509209703, "learning_rate": 8.671500364253995e-05, "loss": 12.0574, "step": 20426 }, { "epoch": 1.1123317882011803, "grad_norm": 0.5581882484166962, "learning_rate": 8.670626359967181e-05, "loss": 11.901, "step": 20427 }, { "epoch": 1.1123862421977633, "grad_norm": 0.5718910216535872, "learning_rate": 8.669752366017799e-05, "loss": 11.9544, "step": 20428 }, { "epoch": 1.1124406961943463, "grad_norm": 0.535119723211312, "learning_rate": 8.668878382412646e-05, "loss": 11.8905, "step": 20429 }, { "epoch": 1.1124951501909293, "grad_norm": 0.5585066425693939, "learning_rate": 8.668004409158519e-05, "loss": 12.0553, "step": 20430 }, { "epoch": 1.1125496041875123, "grad_norm": 0.5887672763553066, "learning_rate": 8.667130446262214e-05, "loss": 12.0295, "step": 20431 }, { "epoch": 1.1126040581840952, "grad_norm": 0.5512357232765116, "learning_rate": 8.666256493730525e-05, "loss": 12.0979, "step": 20432 }, { "epoch": 1.1126585121806785, "grad_norm": 0.5512854382094922, "learning_rate": 8.665382551570248e-05, "loss": 12.1492, "step": 20433 }, { "epoch": 1.1127129661772615, "grad_norm": 0.5461053454512196, "learning_rate": 8.664508619788181e-05, "loss": 12.1001, "step": 20434 }, { "epoch": 1.1127674201738444, "grad_norm": 0.6048445467025007, "learning_rate": 8.663634698391117e-05, "loss": 12.1017, "step": 20435 }, { "epoch": 1.1128218741704274, "grad_norm": 0.5082271795316599, "learning_rate": 8.662760787385854e-05, "loss": 12.0915, "step": 20436 }, { "epoch": 1.1128763281670104, "grad_norm": 0.5304286711573969, "learning_rate": 8.661886886779189e-05, "loss": 12.0, "step": 20437 }, { "epoch": 1.1129307821635934, "grad_norm": 0.641587832548868, "learning_rate": 8.661012996577915e-05, "loss": 12.0823, "step": 20438 }, { "epoch": 1.1129852361601764, "grad_norm": 0.5334425328832866, "learning_rate": 8.66013911678883e-05, "loss": 12.0591, "step": 20439 }, { "epoch": 1.1130396901567594, "grad_norm": 0.5508297466926332, "learning_rate": 8.659265247418727e-05, "loss": 11.9763, "step": 20440 }, { "epoch": 1.1130941441533424, "grad_norm": 0.6130747054086031, "learning_rate": 8.658391388474404e-05, "loss": 12.0046, "step": 20441 }, { "epoch": 1.1131485981499254, "grad_norm": 0.5395795817917738, "learning_rate": 8.657517539962654e-05, "loss": 12.0152, "step": 20442 }, { "epoch": 1.1132030521465084, "grad_norm": 0.6118794450591564, "learning_rate": 8.65664370189027e-05, "loss": 11.8971, "step": 20443 }, { "epoch": 1.1132575061430914, "grad_norm": 0.5236304783024495, "learning_rate": 8.655769874264052e-05, "loss": 11.9936, "step": 20444 }, { "epoch": 1.1133119601396746, "grad_norm": 0.6092612193110597, "learning_rate": 8.654896057090792e-05, "loss": 12.0868, "step": 20445 }, { "epoch": 1.1133664141362576, "grad_norm": 0.5321936221086799, "learning_rate": 8.654022250377283e-05, "loss": 12.0753, "step": 20446 }, { "epoch": 1.1134208681328406, "grad_norm": 0.6038692861905606, "learning_rate": 8.653148454130327e-05, "loss": 12.0654, "step": 20447 }, { "epoch": 1.1134753221294236, "grad_norm": 0.5325827501991652, "learning_rate": 8.652274668356713e-05, "loss": 12.0671, "step": 20448 }, { "epoch": 1.1135297761260066, "grad_norm": 0.5699565426641002, "learning_rate": 8.651400893063238e-05, "loss": 11.9932, "step": 20449 }, { "epoch": 1.1135842301225896, "grad_norm": 0.5456973476350043, "learning_rate": 8.650527128256695e-05, "loss": 12.0736, "step": 20450 }, { "epoch": 1.1136386841191726, "grad_norm": 0.6364466397889185, "learning_rate": 8.649653373943883e-05, "loss": 12.1013, "step": 20451 }, { "epoch": 1.1136931381157555, "grad_norm": 0.5958478372373719, "learning_rate": 8.648779630131589e-05, "loss": 11.9619, "step": 20452 }, { "epoch": 1.1137475921123385, "grad_norm": 0.5976355951406671, "learning_rate": 8.647905896826611e-05, "loss": 12.0252, "step": 20453 }, { "epoch": 1.1138020461089215, "grad_norm": 0.5255235963552749, "learning_rate": 8.647032174035744e-05, "loss": 12.146, "step": 20454 }, { "epoch": 1.1138565001055045, "grad_norm": 0.6238917781714524, "learning_rate": 8.646158461765782e-05, "loss": 12.003, "step": 20455 }, { "epoch": 1.1139109541020877, "grad_norm": 0.5990554357556224, "learning_rate": 8.645284760023519e-05, "loss": 11.9492, "step": 20456 }, { "epoch": 1.1139654080986707, "grad_norm": 0.5639341509059039, "learning_rate": 8.644411068815747e-05, "loss": 11.8785, "step": 20457 }, { "epoch": 1.1140198620952537, "grad_norm": 0.5225938502442502, "learning_rate": 8.643537388149263e-05, "loss": 12.0676, "step": 20458 }, { "epoch": 1.1140743160918367, "grad_norm": 0.6080400007278929, "learning_rate": 8.64266371803086e-05, "loss": 12.0214, "step": 20459 }, { "epoch": 1.1141287700884197, "grad_norm": 0.5962382006537994, "learning_rate": 8.641790058467332e-05, "loss": 12.0873, "step": 20460 }, { "epoch": 1.1141832240850027, "grad_norm": 0.5414322662812325, "learning_rate": 8.640916409465474e-05, "loss": 12.0977, "step": 20461 }, { "epoch": 1.1142376780815857, "grad_norm": 0.5442969491055653, "learning_rate": 8.640042771032076e-05, "loss": 11.9356, "step": 20462 }, { "epoch": 1.1142921320781687, "grad_norm": 0.5857955462241692, "learning_rate": 8.639169143173934e-05, "loss": 11.9875, "step": 20463 }, { "epoch": 1.1143465860747517, "grad_norm": 0.5722991266071655, "learning_rate": 8.63829552589784e-05, "loss": 11.9408, "step": 20464 }, { "epoch": 1.1144010400713347, "grad_norm": 0.5745187959531379, "learning_rate": 8.637421919210588e-05, "loss": 12.0425, "step": 20465 }, { "epoch": 1.1144554940679177, "grad_norm": 0.5737292188169556, "learning_rate": 8.636548323118974e-05, "loss": 12.011, "step": 20466 }, { "epoch": 1.1145099480645007, "grad_norm": 0.5953915731831864, "learning_rate": 8.635674737629786e-05, "loss": 11.9588, "step": 20467 }, { "epoch": 1.1145644020610839, "grad_norm": 0.6105710976926062, "learning_rate": 8.634801162749819e-05, "loss": 12.0818, "step": 20468 }, { "epoch": 1.1146188560576669, "grad_norm": 0.4860264760757827, "learning_rate": 8.63392759848587e-05, "loss": 11.9673, "step": 20469 }, { "epoch": 1.1146733100542499, "grad_norm": 0.4796813075837099, "learning_rate": 8.633054044844729e-05, "loss": 12.024, "step": 20470 }, { "epoch": 1.1147277640508328, "grad_norm": 0.5950842721789306, "learning_rate": 8.632180501833192e-05, "loss": 12.0347, "step": 20471 }, { "epoch": 1.1147822180474158, "grad_norm": 0.6542190907390353, "learning_rate": 8.631306969458047e-05, "loss": 12.1151, "step": 20472 }, { "epoch": 1.1148366720439988, "grad_norm": 0.5149019425783683, "learning_rate": 8.630433447726084e-05, "loss": 12.0373, "step": 20473 }, { "epoch": 1.1148911260405818, "grad_norm": 0.5401207092106911, "learning_rate": 8.629559936644103e-05, "loss": 12.0794, "step": 20474 }, { "epoch": 1.1149455800371648, "grad_norm": 0.5253189533374024, "learning_rate": 8.628686436218894e-05, "loss": 11.6059, "step": 20475 }, { "epoch": 1.1150000340337478, "grad_norm": 0.5998630256837638, "learning_rate": 8.627812946457249e-05, "loss": 12.0531, "step": 20476 }, { "epoch": 1.1150544880303308, "grad_norm": 0.5713336693894561, "learning_rate": 8.626939467365961e-05, "loss": 12.1183, "step": 20477 }, { "epoch": 1.1151089420269138, "grad_norm": 0.6539845428263784, "learning_rate": 8.626065998951821e-05, "loss": 12.0565, "step": 20478 }, { "epoch": 1.115163396023497, "grad_norm": 0.5795015131651489, "learning_rate": 8.625192541221623e-05, "loss": 12.1167, "step": 20479 }, { "epoch": 1.11521785002008, "grad_norm": 0.6687968959099096, "learning_rate": 8.624319094182157e-05, "loss": 11.9152, "step": 20480 }, { "epoch": 1.115272304016663, "grad_norm": 0.5662463661833551, "learning_rate": 8.623445657840222e-05, "loss": 12.0687, "step": 20481 }, { "epoch": 1.115326758013246, "grad_norm": 0.5569974547007831, "learning_rate": 8.622572232202599e-05, "loss": 12.0152, "step": 20482 }, { "epoch": 1.115381212009829, "grad_norm": 0.5356611435949286, "learning_rate": 8.621698817276087e-05, "loss": 11.987, "step": 20483 }, { "epoch": 1.115435666006412, "grad_norm": 0.5740871642686105, "learning_rate": 8.620825413067473e-05, "loss": 12.1727, "step": 20484 }, { "epoch": 1.115490120002995, "grad_norm": 0.5397304764024144, "learning_rate": 8.619952019583555e-05, "loss": 11.9573, "step": 20485 }, { "epoch": 1.115544573999578, "grad_norm": 0.5413113226753937, "learning_rate": 8.61907863683112e-05, "loss": 12.1108, "step": 20486 }, { "epoch": 1.115599027996161, "grad_norm": 0.5224978649881019, "learning_rate": 8.618205264816962e-05, "loss": 12.0286, "step": 20487 }, { "epoch": 1.115653481992744, "grad_norm": 0.49798079977913895, "learning_rate": 8.617331903547872e-05, "loss": 11.9539, "step": 20488 }, { "epoch": 1.115707935989327, "grad_norm": 0.5374865498581153, "learning_rate": 8.616458553030641e-05, "loss": 11.9546, "step": 20489 }, { "epoch": 1.11576238998591, "grad_norm": 0.5421550574793342, "learning_rate": 8.615585213272059e-05, "loss": 12.0558, "step": 20490 }, { "epoch": 1.115816843982493, "grad_norm": 0.5236490567634463, "learning_rate": 8.614711884278922e-05, "loss": 11.924, "step": 20491 }, { "epoch": 1.1158712979790761, "grad_norm": 0.5894706823009638, "learning_rate": 8.613838566058014e-05, "loss": 12.1306, "step": 20492 }, { "epoch": 1.1159257519756591, "grad_norm": 0.4902113230346238, "learning_rate": 8.612965258616133e-05, "loss": 11.9747, "step": 20493 }, { "epoch": 1.1159802059722421, "grad_norm": 0.5215407998834232, "learning_rate": 8.612091961960064e-05, "loss": 12.0714, "step": 20494 }, { "epoch": 1.1160346599688251, "grad_norm": 0.5243379429072624, "learning_rate": 8.611218676096599e-05, "loss": 12.0208, "step": 20495 }, { "epoch": 1.116089113965408, "grad_norm": 0.6471210179145048, "learning_rate": 8.610345401032532e-05, "loss": 12.1623, "step": 20496 }, { "epoch": 1.116143567961991, "grad_norm": 0.4908431841804648, "learning_rate": 8.609472136774654e-05, "loss": 11.8585, "step": 20497 }, { "epoch": 1.116198021958574, "grad_norm": 0.5336784621895109, "learning_rate": 8.608598883329752e-05, "loss": 11.7578, "step": 20498 }, { "epoch": 1.116252475955157, "grad_norm": 0.539822673026537, "learning_rate": 8.60772564070462e-05, "loss": 12.0355, "step": 20499 }, { "epoch": 1.11630692995174, "grad_norm": 0.5368551058329707, "learning_rate": 8.606852408906047e-05, "loss": 11.9642, "step": 20500 }, { "epoch": 1.116361383948323, "grad_norm": 0.5519632231326893, "learning_rate": 8.605979187940823e-05, "loss": 12.0173, "step": 20501 }, { "epoch": 1.1164158379449063, "grad_norm": 0.558058323641093, "learning_rate": 8.605105977815739e-05, "loss": 11.9624, "step": 20502 }, { "epoch": 1.1164702919414893, "grad_norm": 0.5412411084680292, "learning_rate": 8.604232778537584e-05, "loss": 12.0976, "step": 20503 }, { "epoch": 1.1165247459380723, "grad_norm": 0.6807011047904982, "learning_rate": 8.60335959011315e-05, "loss": 12.1143, "step": 20504 }, { "epoch": 1.1165791999346553, "grad_norm": 0.5167678272691687, "learning_rate": 8.602486412549225e-05, "loss": 11.9778, "step": 20505 }, { "epoch": 1.1166336539312383, "grad_norm": 0.5476282127282881, "learning_rate": 8.601613245852597e-05, "loss": 12.1037, "step": 20506 }, { "epoch": 1.1166881079278212, "grad_norm": 0.4953667512885385, "learning_rate": 8.600740090030062e-05, "loss": 11.962, "step": 20507 }, { "epoch": 1.1167425619244042, "grad_norm": 0.5890134000418888, "learning_rate": 8.599866945088406e-05, "loss": 12.0408, "step": 20508 }, { "epoch": 1.1167970159209872, "grad_norm": 0.5883335810102944, "learning_rate": 8.59899381103442e-05, "loss": 12.1385, "step": 20509 }, { "epoch": 1.1168514699175702, "grad_norm": 0.5502270776658903, "learning_rate": 8.598120687874893e-05, "loss": 11.9543, "step": 20510 }, { "epoch": 1.1169059239141532, "grad_norm": 0.5549793460829717, "learning_rate": 8.597247575616615e-05, "loss": 11.8679, "step": 20511 }, { "epoch": 1.1169603779107362, "grad_norm": 0.5659023150716123, "learning_rate": 8.596374474266378e-05, "loss": 12.1417, "step": 20512 }, { "epoch": 1.1170148319073192, "grad_norm": 0.5642468541772573, "learning_rate": 8.595501383830963e-05, "loss": 12.0823, "step": 20513 }, { "epoch": 1.1170692859039022, "grad_norm": 0.5123625037887571, "learning_rate": 8.594628304317168e-05, "loss": 12.0234, "step": 20514 }, { "epoch": 1.1171237399004854, "grad_norm": 0.47432507338245566, "learning_rate": 8.593755235731779e-05, "loss": 11.8732, "step": 20515 }, { "epoch": 1.1171781938970684, "grad_norm": 0.5304811960593719, "learning_rate": 8.592882178081584e-05, "loss": 11.974, "step": 20516 }, { "epoch": 1.1172326478936514, "grad_norm": 0.5517583033909015, "learning_rate": 8.59200913137337e-05, "loss": 12.0905, "step": 20517 }, { "epoch": 1.1172871018902344, "grad_norm": 0.46713763306512546, "learning_rate": 8.591136095613934e-05, "loss": 11.9744, "step": 20518 }, { "epoch": 1.1173415558868174, "grad_norm": 0.5486187992330056, "learning_rate": 8.590263070810058e-05, "loss": 12.0381, "step": 20519 }, { "epoch": 1.1173960098834004, "grad_norm": 0.5527723804788405, "learning_rate": 8.589390056968534e-05, "loss": 11.9795, "step": 20520 }, { "epoch": 1.1174504638799834, "grad_norm": 0.5018470446533754, "learning_rate": 8.588517054096147e-05, "loss": 12.0284, "step": 20521 }, { "epoch": 1.1175049178765664, "grad_norm": 0.5263256875552247, "learning_rate": 8.587644062199694e-05, "loss": 11.9745, "step": 20522 }, { "epoch": 1.1175593718731494, "grad_norm": 0.5830871320910996, "learning_rate": 8.586771081285952e-05, "loss": 12.161, "step": 20523 }, { "epoch": 1.1176138258697323, "grad_norm": 0.5812253637558997, "learning_rate": 8.585898111361716e-05, "loss": 11.9857, "step": 20524 }, { "epoch": 1.1176682798663153, "grad_norm": 0.6240263377907601, "learning_rate": 8.585025152433775e-05, "loss": 12.0903, "step": 20525 }, { "epoch": 1.1177227338628986, "grad_norm": 0.5347853323347552, "learning_rate": 8.584152204508916e-05, "loss": 12.0122, "step": 20526 }, { "epoch": 1.1177771878594815, "grad_norm": 0.5142875269130167, "learning_rate": 8.583279267593924e-05, "loss": 12.0679, "step": 20527 }, { "epoch": 1.1178316418560645, "grad_norm": 0.524234472729522, "learning_rate": 8.582406341695591e-05, "loss": 11.9171, "step": 20528 }, { "epoch": 1.1178860958526475, "grad_norm": 0.6411909173864685, "learning_rate": 8.581533426820703e-05, "loss": 11.9259, "step": 20529 }, { "epoch": 1.1179405498492305, "grad_norm": 0.5997938318227984, "learning_rate": 8.580660522976051e-05, "loss": 12.0067, "step": 20530 }, { "epoch": 1.1179950038458135, "grad_norm": 0.5543244796967985, "learning_rate": 8.57978763016842e-05, "loss": 11.9591, "step": 20531 }, { "epoch": 1.1180494578423965, "grad_norm": 0.511095836273182, "learning_rate": 8.578914748404603e-05, "loss": 11.9835, "step": 20532 }, { "epoch": 1.1181039118389795, "grad_norm": 0.527984273724554, "learning_rate": 8.578041877691376e-05, "loss": 12.0705, "step": 20533 }, { "epoch": 1.1181583658355625, "grad_norm": 0.5387854188171488, "learning_rate": 8.577169018035537e-05, "loss": 11.9265, "step": 20534 }, { "epoch": 1.1182128198321455, "grad_norm": 0.5687718892618461, "learning_rate": 8.576296169443872e-05, "loss": 11.8669, "step": 20535 }, { "epoch": 1.1182672738287285, "grad_norm": 0.5026518080973875, "learning_rate": 8.575423331923164e-05, "loss": 12.0234, "step": 20536 }, { "epoch": 1.1183217278253115, "grad_norm": 0.5401622858273115, "learning_rate": 8.574550505480204e-05, "loss": 12.0075, "step": 20537 }, { "epoch": 1.1183761818218947, "grad_norm": 0.5619456287560358, "learning_rate": 8.573677690121779e-05, "loss": 11.9651, "step": 20538 }, { "epoch": 1.1184306358184777, "grad_norm": 0.8599802351019422, "learning_rate": 8.572804885854676e-05, "loss": 12.0924, "step": 20539 }, { "epoch": 1.1184850898150607, "grad_norm": 0.5401716468800819, "learning_rate": 8.571932092685676e-05, "loss": 12.0439, "step": 20540 }, { "epoch": 1.1185395438116437, "grad_norm": 0.5613658674342403, "learning_rate": 8.571059310621577e-05, "loss": 11.9226, "step": 20541 }, { "epoch": 1.1185939978082267, "grad_norm": 0.5119920663394302, "learning_rate": 8.570186539669163e-05, "loss": 12.002, "step": 20542 }, { "epoch": 1.1186484518048097, "grad_norm": 0.5920825355337451, "learning_rate": 8.569313779835215e-05, "loss": 12.1506, "step": 20543 }, { "epoch": 1.1187029058013926, "grad_norm": 0.6027706362517788, "learning_rate": 8.568441031126519e-05, "loss": 11.9428, "step": 20544 }, { "epoch": 1.1187573597979756, "grad_norm": 0.5351833490287161, "learning_rate": 8.56756829354987e-05, "loss": 12.0721, "step": 20545 }, { "epoch": 1.1188118137945586, "grad_norm": 0.5572626660345444, "learning_rate": 8.56669556711205e-05, "loss": 11.9794, "step": 20546 }, { "epoch": 1.1188662677911416, "grad_norm": 0.5189188827745314, "learning_rate": 8.565822851819845e-05, "loss": 11.9398, "step": 20547 }, { "epoch": 1.1189207217877246, "grad_norm": 0.5874501284539313, "learning_rate": 8.564950147680043e-05, "loss": 12.1981, "step": 20548 }, { "epoch": 1.1189751757843078, "grad_norm": 0.5324668275406199, "learning_rate": 8.564077454699428e-05, "loss": 11.9317, "step": 20549 }, { "epoch": 1.1190296297808908, "grad_norm": 0.6009991727214898, "learning_rate": 8.563204772884787e-05, "loss": 12.2401, "step": 20550 }, { "epoch": 1.1190840837774738, "grad_norm": 0.5977241246014698, "learning_rate": 8.562332102242905e-05, "loss": 12.0362, "step": 20551 }, { "epoch": 1.1191385377740568, "grad_norm": 0.6377908932742733, "learning_rate": 8.561459442780578e-05, "loss": 12.2194, "step": 20552 }, { "epoch": 1.1191929917706398, "grad_norm": 0.6035991811431012, "learning_rate": 8.560586794504577e-05, "loss": 12.0193, "step": 20553 }, { "epoch": 1.1192474457672228, "grad_norm": 0.5325660866753731, "learning_rate": 8.559714157421695e-05, "loss": 12.063, "step": 20554 }, { "epoch": 1.1193018997638058, "grad_norm": 0.7824485461261439, "learning_rate": 8.558841531538715e-05, "loss": 12.02, "step": 20555 }, { "epoch": 1.1193563537603888, "grad_norm": 0.55243015110621, "learning_rate": 8.557968916862428e-05, "loss": 12.1466, "step": 20556 }, { "epoch": 1.1194108077569718, "grad_norm": 0.5050461586089697, "learning_rate": 8.557096313399615e-05, "loss": 11.994, "step": 20557 }, { "epoch": 1.1194652617535548, "grad_norm": 0.5906472124260127, "learning_rate": 8.556223721157064e-05, "loss": 12.1666, "step": 20558 }, { "epoch": 1.1195197157501378, "grad_norm": 0.5649664453950213, "learning_rate": 8.55535114014156e-05, "loss": 12.1033, "step": 20559 }, { "epoch": 1.1195741697467207, "grad_norm": 0.5679386653316959, "learning_rate": 8.554478570359887e-05, "loss": 12.0274, "step": 20560 }, { "epoch": 1.119628623743304, "grad_norm": 0.5260415529671344, "learning_rate": 8.553606011818832e-05, "loss": 12.1302, "step": 20561 }, { "epoch": 1.119683077739887, "grad_norm": 0.48550420063201516, "learning_rate": 8.55273346452518e-05, "loss": 11.9974, "step": 20562 }, { "epoch": 1.11973753173647, "grad_norm": 0.5596068466457854, "learning_rate": 8.551860928485715e-05, "loss": 11.95, "step": 20563 }, { "epoch": 1.119791985733053, "grad_norm": 0.5320204569567574, "learning_rate": 8.550988403707221e-05, "loss": 11.9891, "step": 20564 }, { "epoch": 1.119846439729636, "grad_norm": 0.5272038792017315, "learning_rate": 8.550115890196484e-05, "loss": 12.0433, "step": 20565 }, { "epoch": 1.119900893726219, "grad_norm": 0.5367106092079147, "learning_rate": 8.549243387960286e-05, "loss": 12.0138, "step": 20566 }, { "epoch": 1.119955347722802, "grad_norm": 0.5226903680437649, "learning_rate": 8.548370897005418e-05, "loss": 11.9826, "step": 20567 }, { "epoch": 1.120009801719385, "grad_norm": 0.6286936405364363, "learning_rate": 8.547498417338661e-05, "loss": 11.9526, "step": 20568 }, { "epoch": 1.120064255715968, "grad_norm": 0.5376852340755028, "learning_rate": 8.5466259489668e-05, "loss": 12.0677, "step": 20569 }, { "epoch": 1.120118709712551, "grad_norm": 0.5371857089677221, "learning_rate": 8.54575349189662e-05, "loss": 11.8697, "step": 20570 }, { "epoch": 1.120173163709134, "grad_norm": 0.589717476056914, "learning_rate": 8.544881046134905e-05, "loss": 11.8035, "step": 20571 }, { "epoch": 1.120227617705717, "grad_norm": 0.521908997324979, "learning_rate": 8.54400861168844e-05, "loss": 11.9903, "step": 20572 }, { "epoch": 1.1202820717023, "grad_norm": 0.5720192903917372, "learning_rate": 8.543136188564007e-05, "loss": 12.0403, "step": 20573 }, { "epoch": 1.120336525698883, "grad_norm": 0.6255945238675611, "learning_rate": 8.542263776768392e-05, "loss": 12.0523, "step": 20574 }, { "epoch": 1.120390979695466, "grad_norm": 0.569480533231531, "learning_rate": 8.541391376308376e-05, "loss": 12.0133, "step": 20575 }, { "epoch": 1.120445433692049, "grad_norm": 0.533932773695763, "learning_rate": 8.540518987190746e-05, "loss": 11.9363, "step": 20576 }, { "epoch": 1.120499887688632, "grad_norm": 0.5475647975327231, "learning_rate": 8.539646609422285e-05, "loss": 12.0669, "step": 20577 }, { "epoch": 1.120554341685215, "grad_norm": 0.5928832029061398, "learning_rate": 8.538774243009775e-05, "loss": 11.9212, "step": 20578 }, { "epoch": 1.120608795681798, "grad_norm": 0.5038069420499145, "learning_rate": 8.537901887960004e-05, "loss": 11.9198, "step": 20579 }, { "epoch": 1.120663249678381, "grad_norm": 0.5096724960488348, "learning_rate": 8.537029544279754e-05, "loss": 11.927, "step": 20580 }, { "epoch": 1.120717703674964, "grad_norm": 0.5598120008559261, "learning_rate": 8.536157211975806e-05, "loss": 12.149, "step": 20581 }, { "epoch": 1.120772157671547, "grad_norm": 0.564549478719887, "learning_rate": 8.535284891054947e-05, "loss": 12.011, "step": 20582 }, { "epoch": 1.12082661166813, "grad_norm": 0.5757046759921594, "learning_rate": 8.534412581523959e-05, "loss": 12.0404, "step": 20583 }, { "epoch": 1.120881065664713, "grad_norm": 0.5195550332959707, "learning_rate": 8.533540283389621e-05, "loss": 12.0446, "step": 20584 }, { "epoch": 1.1209355196612962, "grad_norm": 0.537669835620897, "learning_rate": 8.53266799665872e-05, "loss": 11.986, "step": 20585 }, { "epoch": 1.1209899736578792, "grad_norm": 0.5266346732329061, "learning_rate": 8.531795721338041e-05, "loss": 12.0639, "step": 20586 }, { "epoch": 1.1210444276544622, "grad_norm": 0.6063960080528453, "learning_rate": 8.530923457434364e-05, "loss": 12.0502, "step": 20587 }, { "epoch": 1.1210988816510452, "grad_norm": 0.5711788447519252, "learning_rate": 8.530051204954472e-05, "loss": 12.0723, "step": 20588 }, { "epoch": 1.1211533356476282, "grad_norm": 0.5400089161958422, "learning_rate": 8.529178963905147e-05, "loss": 12.1502, "step": 20589 }, { "epoch": 1.1212077896442112, "grad_norm": 0.5623434008351368, "learning_rate": 8.528306734293174e-05, "loss": 11.9905, "step": 20590 }, { "epoch": 1.1212622436407942, "grad_norm": 0.5020169980727249, "learning_rate": 8.527434516125335e-05, "loss": 12.04, "step": 20591 }, { "epoch": 1.1213166976373772, "grad_norm": 0.5083796763533629, "learning_rate": 8.526562309408417e-05, "loss": 12.041, "step": 20592 }, { "epoch": 1.1213711516339602, "grad_norm": 0.5386733757095622, "learning_rate": 8.525690114149191e-05, "loss": 12.0352, "step": 20593 }, { "epoch": 1.1214256056305432, "grad_norm": 0.5225178827581798, "learning_rate": 8.524817930354447e-05, "loss": 11.9339, "step": 20594 }, { "epoch": 1.1214800596271262, "grad_norm": 0.5441174805016625, "learning_rate": 8.523945758030966e-05, "loss": 11.9746, "step": 20595 }, { "epoch": 1.1215345136237094, "grad_norm": 0.5150008167747349, "learning_rate": 8.523073597185533e-05, "loss": 12.0508, "step": 20596 }, { "epoch": 1.1215889676202924, "grad_norm": 0.5307939347724793, "learning_rate": 8.522201447824925e-05, "loss": 11.5901, "step": 20597 }, { "epoch": 1.1216434216168754, "grad_norm": 0.5660549221776581, "learning_rate": 8.521329309955927e-05, "loss": 11.9772, "step": 20598 }, { "epoch": 1.1216978756134584, "grad_norm": 0.5287968192433967, "learning_rate": 8.520457183585321e-05, "loss": 12.0573, "step": 20599 }, { "epoch": 1.1217523296100413, "grad_norm": 0.6005800707822906, "learning_rate": 8.519585068719884e-05, "loss": 12.1486, "step": 20600 }, { "epoch": 1.1218067836066243, "grad_norm": 0.58718529211763, "learning_rate": 8.518712965366406e-05, "loss": 11.9406, "step": 20601 }, { "epoch": 1.1218612376032073, "grad_norm": 0.5926879722142123, "learning_rate": 8.517840873531669e-05, "loss": 12.0648, "step": 20602 }, { "epoch": 1.1219156915997903, "grad_norm": 0.555690531355676, "learning_rate": 8.516968793222443e-05, "loss": 12.0557, "step": 20603 }, { "epoch": 1.1219701455963733, "grad_norm": 0.5334045336469574, "learning_rate": 8.516096724445516e-05, "loss": 12.0405, "step": 20604 }, { "epoch": 1.1220245995929563, "grad_norm": 0.6051364508916515, "learning_rate": 8.515224667207671e-05, "loss": 11.9985, "step": 20605 }, { "epoch": 1.1220790535895393, "grad_norm": 0.5152068370069078, "learning_rate": 8.514352621515689e-05, "loss": 11.9404, "step": 20606 }, { "epoch": 1.1221335075861223, "grad_norm": 0.5700140228021777, "learning_rate": 8.51348058737635e-05, "loss": 12.0488, "step": 20607 }, { "epoch": 1.1221879615827055, "grad_norm": 0.5311689879860269, "learning_rate": 8.512608564796435e-05, "loss": 11.9787, "step": 20608 }, { "epoch": 1.1222424155792885, "grad_norm": 0.6004770577091992, "learning_rate": 8.511736553782725e-05, "loss": 11.8855, "step": 20609 }, { "epoch": 1.1222968695758715, "grad_norm": 0.572080055635242, "learning_rate": 8.510864554342e-05, "loss": 11.9507, "step": 20610 }, { "epoch": 1.1223513235724545, "grad_norm": 0.5223463898176213, "learning_rate": 8.509992566481042e-05, "loss": 11.9969, "step": 20611 }, { "epoch": 1.1224057775690375, "grad_norm": 0.5549006948857823, "learning_rate": 8.509120590206637e-05, "loss": 12.1299, "step": 20612 }, { "epoch": 1.1224602315656205, "grad_norm": 0.568669376047195, "learning_rate": 8.508248625525557e-05, "loss": 11.8601, "step": 20613 }, { "epoch": 1.1225146855622035, "grad_norm": 0.5619581181952749, "learning_rate": 8.507376672444585e-05, "loss": 12.1542, "step": 20614 }, { "epoch": 1.1225691395587865, "grad_norm": 0.5640475607492639, "learning_rate": 8.506504730970501e-05, "loss": 11.9929, "step": 20615 }, { "epoch": 1.1226235935553694, "grad_norm": 0.5495919073592045, "learning_rate": 8.505632801110087e-05, "loss": 11.9254, "step": 20616 }, { "epoch": 1.1226780475519524, "grad_norm": 0.5614813120214723, "learning_rate": 8.504760882870124e-05, "loss": 12.0447, "step": 20617 }, { "epoch": 1.1227325015485354, "grad_norm": 0.6016034729577902, "learning_rate": 8.503888976257392e-05, "loss": 12.0285, "step": 20618 }, { "epoch": 1.1227869555451186, "grad_norm": 0.5867644357431405, "learning_rate": 8.503017081278668e-05, "loss": 11.9198, "step": 20619 }, { "epoch": 1.1228414095417016, "grad_norm": 0.5127568851116143, "learning_rate": 8.502145197940736e-05, "loss": 11.9742, "step": 20620 }, { "epoch": 1.1228958635382846, "grad_norm": 0.5337297068088316, "learning_rate": 8.501273326250374e-05, "loss": 11.9856, "step": 20621 }, { "epoch": 1.1229503175348676, "grad_norm": 0.5461410367404895, "learning_rate": 8.500401466214364e-05, "loss": 12.0635, "step": 20622 }, { "epoch": 1.1230047715314506, "grad_norm": 0.5617180325957596, "learning_rate": 8.49952961783948e-05, "loss": 11.954, "step": 20623 }, { "epoch": 1.1230592255280336, "grad_norm": 0.510976442238758, "learning_rate": 8.498657781132509e-05, "loss": 11.8703, "step": 20624 }, { "epoch": 1.1231136795246166, "grad_norm": 0.5383942493352548, "learning_rate": 8.497785956100223e-05, "loss": 12.0471, "step": 20625 }, { "epoch": 1.1231681335211996, "grad_norm": 0.5160260825142511, "learning_rate": 8.496914142749407e-05, "loss": 11.9466, "step": 20626 }, { "epoch": 1.1232225875177826, "grad_norm": 0.5590712471310877, "learning_rate": 8.496042341086836e-05, "loss": 12.0509, "step": 20627 }, { "epoch": 1.1232770415143656, "grad_norm": 0.5524489743549429, "learning_rate": 8.495170551119296e-05, "loss": 11.9801, "step": 20628 }, { "epoch": 1.1233314955109486, "grad_norm": 0.5421061679085142, "learning_rate": 8.49429877285356e-05, "loss": 11.9793, "step": 20629 }, { "epoch": 1.1233859495075316, "grad_norm": 0.5565855504551699, "learning_rate": 8.49342700629641e-05, "loss": 12.1138, "step": 20630 }, { "epoch": 1.1234404035041148, "grad_norm": 0.5393647732434288, "learning_rate": 8.492555251454623e-05, "loss": 12.0277, "step": 20631 }, { "epoch": 1.1234948575006978, "grad_norm": 0.5653211907581519, "learning_rate": 8.491683508334983e-05, "loss": 12.0809, "step": 20632 }, { "epoch": 1.1235493114972808, "grad_norm": 0.5566333487947905, "learning_rate": 8.490811776944263e-05, "loss": 11.9899, "step": 20633 }, { "epoch": 1.1236037654938638, "grad_norm": 0.5489336043672067, "learning_rate": 8.489940057289243e-05, "loss": 12.0761, "step": 20634 }, { "epoch": 1.1236582194904468, "grad_norm": 0.5431071514195215, "learning_rate": 8.489068349376702e-05, "loss": 12.0434, "step": 20635 }, { "epoch": 1.1237126734870297, "grad_norm": 0.537853868515468, "learning_rate": 8.48819665321342e-05, "loss": 12.0849, "step": 20636 }, { "epoch": 1.1237671274836127, "grad_norm": 0.5511346778647371, "learning_rate": 8.487324968806173e-05, "loss": 11.9328, "step": 20637 }, { "epoch": 1.1238215814801957, "grad_norm": 0.5667308329210314, "learning_rate": 8.486453296161739e-05, "loss": 12.0361, "step": 20638 }, { "epoch": 1.1238760354767787, "grad_norm": 0.5471565466242325, "learning_rate": 8.485581635286901e-05, "loss": 12.0199, "step": 20639 }, { "epoch": 1.1239304894733617, "grad_norm": 0.6623247915833094, "learning_rate": 8.484709986188433e-05, "loss": 11.9894, "step": 20640 }, { "epoch": 1.1239849434699447, "grad_norm": 0.5511119629493311, "learning_rate": 8.483838348873116e-05, "loss": 12.0032, "step": 20641 }, { "epoch": 1.124039397466528, "grad_norm": 0.5424432658011794, "learning_rate": 8.482966723347726e-05, "loss": 12.0568, "step": 20642 }, { "epoch": 1.124093851463111, "grad_norm": 0.5191386336139402, "learning_rate": 8.48209510961904e-05, "loss": 11.7916, "step": 20643 }, { "epoch": 1.124148305459694, "grad_norm": 0.6078510380658935, "learning_rate": 8.481223507693838e-05, "loss": 12.0313, "step": 20644 }, { "epoch": 1.124202759456277, "grad_norm": 0.5488151658308369, "learning_rate": 8.480351917578896e-05, "loss": 12.1066, "step": 20645 }, { "epoch": 1.12425721345286, "grad_norm": 0.5545155623942513, "learning_rate": 8.479480339280992e-05, "loss": 12.0689, "step": 20646 }, { "epoch": 1.1243116674494429, "grad_norm": 0.5300148673499622, "learning_rate": 8.478608772806904e-05, "loss": 11.94, "step": 20647 }, { "epoch": 1.1243661214460259, "grad_norm": 0.6296302325770019, "learning_rate": 8.47773721816341e-05, "loss": 12.1124, "step": 20648 }, { "epoch": 1.1244205754426089, "grad_norm": 0.5481045776550506, "learning_rate": 8.476865675357284e-05, "loss": 11.9786, "step": 20649 }, { "epoch": 1.1244750294391919, "grad_norm": 0.5632361699445835, "learning_rate": 8.475994144395307e-05, "loss": 11.9742, "step": 20650 }, { "epoch": 1.1245294834357749, "grad_norm": 0.6013238189165129, "learning_rate": 8.475122625284257e-05, "loss": 12.0224, "step": 20651 }, { "epoch": 1.1245839374323578, "grad_norm": 0.6487355626787903, "learning_rate": 8.474251118030912e-05, "loss": 11.9843, "step": 20652 }, { "epoch": 1.1246383914289408, "grad_norm": 0.5609178175685393, "learning_rate": 8.47337962264204e-05, "loss": 11.9898, "step": 20653 }, { "epoch": 1.1246928454255238, "grad_norm": 0.5067012783158299, "learning_rate": 8.472508139124426e-05, "loss": 12.0245, "step": 20654 }, { "epoch": 1.124747299422107, "grad_norm": 0.5781508273221143, "learning_rate": 8.471636667484846e-05, "loss": 12.078, "step": 20655 }, { "epoch": 1.12480175341869, "grad_norm": 0.5277975985823132, "learning_rate": 8.470765207730075e-05, "loss": 12.0828, "step": 20656 }, { "epoch": 1.124856207415273, "grad_norm": 0.5802282833173706, "learning_rate": 8.469893759866892e-05, "loss": 11.9441, "step": 20657 }, { "epoch": 1.124910661411856, "grad_norm": 0.5611172893829233, "learning_rate": 8.46902232390207e-05, "loss": 12.1023, "step": 20658 }, { "epoch": 1.124965115408439, "grad_norm": 0.5192570995241735, "learning_rate": 8.468150899842387e-05, "loss": 12.0198, "step": 20659 }, { "epoch": 1.125019569405022, "grad_norm": 0.6822053457142676, "learning_rate": 8.467279487694617e-05, "loss": 12.0171, "step": 20660 }, { "epoch": 1.125074023401605, "grad_norm": 0.5516464625410687, "learning_rate": 8.46640808746554e-05, "loss": 12.0902, "step": 20661 }, { "epoch": 1.125128477398188, "grad_norm": 0.5654967257334075, "learning_rate": 8.465536699161934e-05, "loss": 12.095, "step": 20662 }, { "epoch": 1.125182931394771, "grad_norm": 0.5408078521278947, "learning_rate": 8.464665322790574e-05, "loss": 12.0886, "step": 20663 }, { "epoch": 1.125237385391354, "grad_norm": 0.595730130403624, "learning_rate": 8.463793958358228e-05, "loss": 12.0437, "step": 20664 }, { "epoch": 1.1252918393879372, "grad_norm": 0.6521803950094307, "learning_rate": 8.46292260587168e-05, "loss": 12.06, "step": 20665 }, { "epoch": 1.1253462933845202, "grad_norm": 0.4951956217474917, "learning_rate": 8.462051265337702e-05, "loss": 11.9553, "step": 20666 }, { "epoch": 1.1254007473811032, "grad_norm": 0.5791223500643684, "learning_rate": 8.461179936763074e-05, "loss": 11.9232, "step": 20667 }, { "epoch": 1.1254552013776862, "grad_norm": 0.5697549963250519, "learning_rate": 8.460308620154566e-05, "loss": 11.9406, "step": 20668 }, { "epoch": 1.1255096553742692, "grad_norm": 0.6110789914736281, "learning_rate": 8.459437315518959e-05, "loss": 12.1223, "step": 20669 }, { "epoch": 1.1255641093708522, "grad_norm": 0.5377100319564458, "learning_rate": 8.458566022863026e-05, "loss": 12.0876, "step": 20670 }, { "epoch": 1.1256185633674352, "grad_norm": 0.5596473554488341, "learning_rate": 8.457694742193538e-05, "loss": 12.0281, "step": 20671 }, { "epoch": 1.1256730173640181, "grad_norm": 0.6157363408101525, "learning_rate": 8.456823473517277e-05, "loss": 12.0079, "step": 20672 }, { "epoch": 1.1257274713606011, "grad_norm": 0.5958012714277305, "learning_rate": 8.45595221684102e-05, "loss": 12.0952, "step": 20673 }, { "epoch": 1.1257819253571841, "grad_norm": 0.5909040796004863, "learning_rate": 8.455080972171535e-05, "loss": 11.9936, "step": 20674 }, { "epoch": 1.1258363793537671, "grad_norm": 0.5259277364480356, "learning_rate": 8.454209739515594e-05, "loss": 11.9397, "step": 20675 }, { "epoch": 1.1258908333503501, "grad_norm": 0.6289245786484194, "learning_rate": 8.453338518879981e-05, "loss": 12.0402, "step": 20676 }, { "epoch": 1.125945287346933, "grad_norm": 0.5463792922285373, "learning_rate": 8.452467310271467e-05, "loss": 11.9932, "step": 20677 }, { "epoch": 1.1259997413435163, "grad_norm": 0.5498803039109769, "learning_rate": 8.451596113696827e-05, "loss": 12.1414, "step": 20678 }, { "epoch": 1.1260541953400993, "grad_norm": 0.5195443857787331, "learning_rate": 8.450724929162834e-05, "loss": 11.9772, "step": 20679 }, { "epoch": 1.1261086493366823, "grad_norm": 0.6088749495130134, "learning_rate": 8.449853756676265e-05, "loss": 12.1435, "step": 20680 }, { "epoch": 1.1261631033332653, "grad_norm": 0.4979402048421249, "learning_rate": 8.448982596243893e-05, "loss": 11.9464, "step": 20681 }, { "epoch": 1.1262175573298483, "grad_norm": 0.5471764648632118, "learning_rate": 8.448111447872493e-05, "loss": 11.9167, "step": 20682 }, { "epoch": 1.1262720113264313, "grad_norm": 0.5347362131973291, "learning_rate": 8.44724031156884e-05, "loss": 11.9314, "step": 20683 }, { "epoch": 1.1263264653230143, "grad_norm": 0.5300495252964539, "learning_rate": 8.446369187339704e-05, "loss": 11.9897, "step": 20684 }, { "epoch": 1.1263809193195973, "grad_norm": 0.4901735506020483, "learning_rate": 8.445498075191863e-05, "loss": 12.0088, "step": 20685 }, { "epoch": 1.1264353733161803, "grad_norm": 0.5439219481947628, "learning_rate": 8.44462697513209e-05, "loss": 12.0971, "step": 20686 }, { "epoch": 1.1264898273127633, "grad_norm": 0.5318345497439173, "learning_rate": 8.443755887167155e-05, "loss": 12.0603, "step": 20687 }, { "epoch": 1.1265442813093463, "grad_norm": 0.5827985748435938, "learning_rate": 8.442884811303837e-05, "loss": 11.9276, "step": 20688 }, { "epoch": 1.1265987353059295, "grad_norm": 0.5364340700027024, "learning_rate": 8.442013747548909e-05, "loss": 12.1521, "step": 20689 }, { "epoch": 1.1266531893025125, "grad_norm": 0.5170281003949356, "learning_rate": 8.441142695909143e-05, "loss": 11.9498, "step": 20690 }, { "epoch": 1.1267076432990955, "grad_norm": 0.5055257982311351, "learning_rate": 8.440271656391313e-05, "loss": 12.0405, "step": 20691 }, { "epoch": 1.1267620972956784, "grad_norm": 0.5428997872547985, "learning_rate": 8.439400629002192e-05, "loss": 12.1165, "step": 20692 }, { "epoch": 1.1268165512922614, "grad_norm": 0.5820501425587059, "learning_rate": 8.438529613748556e-05, "loss": 11.9797, "step": 20693 }, { "epoch": 1.1268710052888444, "grad_norm": 0.5406008973033072, "learning_rate": 8.437658610637172e-05, "loss": 11.9376, "step": 20694 }, { "epoch": 1.1269254592854274, "grad_norm": 0.5758219611985749, "learning_rate": 8.436787619674819e-05, "loss": 12.0318, "step": 20695 }, { "epoch": 1.1269799132820104, "grad_norm": 0.4994890911792227, "learning_rate": 8.435916640868266e-05, "loss": 11.9158, "step": 20696 }, { "epoch": 1.1270343672785934, "grad_norm": 0.575546458026418, "learning_rate": 8.435045674224287e-05, "loss": 11.7971, "step": 20697 }, { "epoch": 1.1270888212751764, "grad_norm": 0.5398853059585484, "learning_rate": 8.434174719749654e-05, "loss": 11.9874, "step": 20698 }, { "epoch": 1.1271432752717594, "grad_norm": 0.6294282540302789, "learning_rate": 8.433303777451143e-05, "loss": 12.1287, "step": 20699 }, { "epoch": 1.1271977292683424, "grad_norm": 0.7472791478523968, "learning_rate": 8.432432847335525e-05, "loss": 11.9709, "step": 20700 }, { "epoch": 1.1272521832649254, "grad_norm": 0.5856051668569312, "learning_rate": 8.431561929409571e-05, "loss": 11.9004, "step": 20701 }, { "epoch": 1.1273066372615086, "grad_norm": 0.5889737421383973, "learning_rate": 8.430691023680055e-05, "loss": 12.041, "step": 20702 }, { "epoch": 1.1273610912580916, "grad_norm": 0.5480485563764228, "learning_rate": 8.429820130153752e-05, "loss": 12.0473, "step": 20703 }, { "epoch": 1.1274155452546746, "grad_norm": 0.5528813501864324, "learning_rate": 8.428949248837428e-05, "loss": 12.1747, "step": 20704 }, { "epoch": 1.1274699992512576, "grad_norm": 0.5446607362507545, "learning_rate": 8.428078379737858e-05, "loss": 11.9402, "step": 20705 }, { "epoch": 1.1275244532478406, "grad_norm": 0.584286209918372, "learning_rate": 8.427207522861813e-05, "loss": 12.1177, "step": 20706 }, { "epoch": 1.1275789072444236, "grad_norm": 0.5559404672854191, "learning_rate": 8.426336678216066e-05, "loss": 12.0242, "step": 20707 }, { "epoch": 1.1276333612410065, "grad_norm": 0.6205962003625162, "learning_rate": 8.425465845807392e-05, "loss": 12.0569, "step": 20708 }, { "epoch": 1.1276878152375895, "grad_norm": 0.5098974570827257, "learning_rate": 8.424595025642555e-05, "loss": 12.053, "step": 20709 }, { "epoch": 1.1277422692341725, "grad_norm": 0.5212660381706562, "learning_rate": 8.423724217728334e-05, "loss": 12.102, "step": 20710 }, { "epoch": 1.1277967232307555, "grad_norm": 0.5943423464276083, "learning_rate": 8.422853422071497e-05, "loss": 12.042, "step": 20711 }, { "epoch": 1.1278511772273387, "grad_norm": 0.559493213992451, "learning_rate": 8.421982638678818e-05, "loss": 11.9594, "step": 20712 }, { "epoch": 1.1279056312239217, "grad_norm": 0.5556647353810512, "learning_rate": 8.421111867557068e-05, "loss": 12.1456, "step": 20713 }, { "epoch": 1.1279600852205047, "grad_norm": 0.6328214803425803, "learning_rate": 8.420241108713013e-05, "loss": 12.1421, "step": 20714 }, { "epoch": 1.1280145392170877, "grad_norm": 0.5926666590503891, "learning_rate": 8.419370362153431e-05, "loss": 12.0015, "step": 20715 }, { "epoch": 1.1280689932136707, "grad_norm": 0.5315547798326242, "learning_rate": 8.418499627885089e-05, "loss": 11.9904, "step": 20716 }, { "epoch": 1.1281234472102537, "grad_norm": 0.6096039674322927, "learning_rate": 8.417628905914758e-05, "loss": 12.0197, "step": 20717 }, { "epoch": 1.1281779012068367, "grad_norm": 0.5584602220091498, "learning_rate": 8.416758196249211e-05, "loss": 12.0328, "step": 20718 }, { "epoch": 1.1282323552034197, "grad_norm": 0.5960657696185826, "learning_rate": 8.415887498895219e-05, "loss": 11.9276, "step": 20719 }, { "epoch": 1.1282868092000027, "grad_norm": 0.6119079673118717, "learning_rate": 8.415016813859548e-05, "loss": 11.9412, "step": 20720 }, { "epoch": 1.1283412631965857, "grad_norm": 0.5498559512074462, "learning_rate": 8.414146141148972e-05, "loss": 11.9704, "step": 20721 }, { "epoch": 1.1283957171931687, "grad_norm": 0.542946266885336, "learning_rate": 8.413275480770266e-05, "loss": 12.0014, "step": 20722 }, { "epoch": 1.1284501711897517, "grad_norm": 0.5427884476921939, "learning_rate": 8.412404832730197e-05, "loss": 12.0812, "step": 20723 }, { "epoch": 1.1285046251863347, "grad_norm": 0.5462960786118958, "learning_rate": 8.411534197035529e-05, "loss": 11.9141, "step": 20724 }, { "epoch": 1.1285590791829179, "grad_norm": 0.565910848925007, "learning_rate": 8.41066357369304e-05, "loss": 11.9949, "step": 20725 }, { "epoch": 1.1286135331795009, "grad_norm": 0.5239689661926308, "learning_rate": 8.409792962709497e-05, "loss": 11.7701, "step": 20726 }, { "epoch": 1.1286679871760839, "grad_norm": 0.5462626931379434, "learning_rate": 8.40892236409167e-05, "loss": 12.043, "step": 20727 }, { "epoch": 1.1287224411726668, "grad_norm": 0.5312671667088449, "learning_rate": 8.40805177784633e-05, "loss": 12.0121, "step": 20728 }, { "epoch": 1.1287768951692498, "grad_norm": 0.6380397818351647, "learning_rate": 8.407181203980247e-05, "loss": 12.1194, "step": 20729 }, { "epoch": 1.1288313491658328, "grad_norm": 0.578529542017685, "learning_rate": 8.406310642500189e-05, "loss": 12.1208, "step": 20730 }, { "epoch": 1.1288858031624158, "grad_norm": 0.5497148691261249, "learning_rate": 8.405440093412927e-05, "loss": 11.9895, "step": 20731 }, { "epoch": 1.1289402571589988, "grad_norm": 0.5100078242558772, "learning_rate": 8.404569556725229e-05, "loss": 12.0079, "step": 20732 }, { "epoch": 1.1289947111555818, "grad_norm": 0.5535915308511297, "learning_rate": 8.403699032443871e-05, "loss": 12.0246, "step": 20733 }, { "epoch": 1.1290491651521648, "grad_norm": 0.6122926920072559, "learning_rate": 8.402828520575614e-05, "loss": 11.9979, "step": 20734 }, { "epoch": 1.129103619148748, "grad_norm": 0.5094572839458141, "learning_rate": 8.40195802112723e-05, "loss": 11.7763, "step": 20735 }, { "epoch": 1.129158073145331, "grad_norm": 0.5970999949646029, "learning_rate": 8.401087534105485e-05, "loss": 12.0442, "step": 20736 }, { "epoch": 1.129212527141914, "grad_norm": 0.5621315848332539, "learning_rate": 8.400217059517155e-05, "loss": 12.0884, "step": 20737 }, { "epoch": 1.129266981138497, "grad_norm": 0.6562930582146015, "learning_rate": 8.399346597369005e-05, "loss": 12.0669, "step": 20738 }, { "epoch": 1.12932143513508, "grad_norm": 0.574893916287573, "learning_rate": 8.398476147667803e-05, "loss": 12.0122, "step": 20739 }, { "epoch": 1.129375889131663, "grad_norm": 0.6457764055167019, "learning_rate": 8.39760571042032e-05, "loss": 11.9435, "step": 20740 }, { "epoch": 1.129430343128246, "grad_norm": 0.5280157277237386, "learning_rate": 8.396735285633324e-05, "loss": 12.0004, "step": 20741 }, { "epoch": 1.129484797124829, "grad_norm": 0.5123033419856629, "learning_rate": 8.395864873313584e-05, "loss": 11.9824, "step": 20742 }, { "epoch": 1.129539251121412, "grad_norm": 0.4986953868214987, "learning_rate": 8.394994473467869e-05, "loss": 11.8879, "step": 20743 }, { "epoch": 1.129593705117995, "grad_norm": 0.5713935244026771, "learning_rate": 8.394124086102943e-05, "loss": 11.8317, "step": 20744 }, { "epoch": 1.129648159114578, "grad_norm": 0.5064476313565307, "learning_rate": 8.393253711225579e-05, "loss": 11.8824, "step": 20745 }, { "epoch": 1.129702613111161, "grad_norm": 0.5383477103141034, "learning_rate": 8.392383348842543e-05, "loss": 12.062, "step": 20746 }, { "epoch": 1.129757067107744, "grad_norm": 0.6857506568266057, "learning_rate": 8.3915129989606e-05, "loss": 12.0129, "step": 20747 }, { "epoch": 1.1298115211043271, "grad_norm": 0.6066521891673776, "learning_rate": 8.390642661586528e-05, "loss": 12.1521, "step": 20748 }, { "epoch": 1.1298659751009101, "grad_norm": 0.604582705656562, "learning_rate": 8.389772336727084e-05, "loss": 12.0386, "step": 20749 }, { "epoch": 1.1299204290974931, "grad_norm": 0.545887271751831, "learning_rate": 8.388902024389042e-05, "loss": 12.0592, "step": 20750 }, { "epoch": 1.1299748830940761, "grad_norm": 0.6514439487333721, "learning_rate": 8.388031724579169e-05, "loss": 12.2167, "step": 20751 }, { "epoch": 1.1300293370906591, "grad_norm": 0.544262019270203, "learning_rate": 8.387161437304232e-05, "loss": 11.9321, "step": 20752 }, { "epoch": 1.130083791087242, "grad_norm": 0.5831675179655477, "learning_rate": 8.386291162570998e-05, "loss": 11.9923, "step": 20753 }, { "epoch": 1.130138245083825, "grad_norm": 0.5468630280688331, "learning_rate": 8.385420900386234e-05, "loss": 11.8361, "step": 20754 }, { "epoch": 1.130192699080408, "grad_norm": 0.5239437358619802, "learning_rate": 8.384550650756707e-05, "loss": 11.8683, "step": 20755 }, { "epoch": 1.130247153076991, "grad_norm": 0.5770978379625384, "learning_rate": 8.383680413689186e-05, "loss": 11.9685, "step": 20756 }, { "epoch": 1.130301607073574, "grad_norm": 0.5528615214271068, "learning_rate": 8.382810189190438e-05, "loss": 12.027, "step": 20757 }, { "epoch": 1.1303560610701573, "grad_norm": 0.5572352510964343, "learning_rate": 8.381939977267225e-05, "loss": 11.9764, "step": 20758 }, { "epoch": 1.1304105150667403, "grad_norm": 0.5438921781892165, "learning_rate": 8.38106977792632e-05, "loss": 11.9362, "step": 20759 }, { "epoch": 1.1304649690633233, "grad_norm": 0.6162761724338172, "learning_rate": 8.38019959117449e-05, "loss": 11.9992, "step": 20760 }, { "epoch": 1.1305194230599063, "grad_norm": 0.5644696651643153, "learning_rate": 8.379329417018497e-05, "loss": 11.9646, "step": 20761 }, { "epoch": 1.1305738770564893, "grad_norm": 0.661556556635499, "learning_rate": 8.378459255465112e-05, "loss": 12.0489, "step": 20762 }, { "epoch": 1.1306283310530723, "grad_norm": 0.5901163540090555, "learning_rate": 8.377589106521101e-05, "loss": 12.0038, "step": 20763 }, { "epoch": 1.1306827850496552, "grad_norm": 0.5887311343504869, "learning_rate": 8.376718970193229e-05, "loss": 12.1154, "step": 20764 }, { "epoch": 1.1307372390462382, "grad_norm": 0.6280187554043807, "learning_rate": 8.375848846488262e-05, "loss": 12.0463, "step": 20765 }, { "epoch": 1.1307916930428212, "grad_norm": 0.5887668368648779, "learning_rate": 8.374978735412965e-05, "loss": 12.0367, "step": 20766 }, { "epoch": 1.1308461470394042, "grad_norm": 0.5647871665134128, "learning_rate": 8.374108636974107e-05, "loss": 11.8751, "step": 20767 }, { "epoch": 1.1309006010359872, "grad_norm": 0.5375661928587627, "learning_rate": 8.373238551178453e-05, "loss": 12.0289, "step": 20768 }, { "epoch": 1.1309550550325702, "grad_norm": 0.5365046984815546, "learning_rate": 8.372368478032765e-05, "loss": 12.0516, "step": 20769 }, { "epoch": 1.1310095090291532, "grad_norm": 0.57074124799673, "learning_rate": 8.371498417543817e-05, "loss": 12.1869, "step": 20770 }, { "epoch": 1.1310639630257362, "grad_norm": 0.5485043911729385, "learning_rate": 8.37062836971837e-05, "loss": 12.1736, "step": 20771 }, { "epoch": 1.1311184170223194, "grad_norm": 0.5538806730895031, "learning_rate": 8.369758334563189e-05, "loss": 12.04, "step": 20772 }, { "epoch": 1.1311728710189024, "grad_norm": 0.5623222575053048, "learning_rate": 8.368888312085043e-05, "loss": 12.0119, "step": 20773 }, { "epoch": 1.1312273250154854, "grad_norm": 0.5860184149076423, "learning_rate": 8.368018302290694e-05, "loss": 12.0345, "step": 20774 }, { "epoch": 1.1312817790120684, "grad_norm": 0.5368386044729148, "learning_rate": 8.367148305186907e-05, "loss": 12.0709, "step": 20775 }, { "epoch": 1.1313362330086514, "grad_norm": 0.6503054542888462, "learning_rate": 8.366278320780449e-05, "loss": 12.1014, "step": 20776 }, { "epoch": 1.1313906870052344, "grad_norm": 0.5586135419144223, "learning_rate": 8.365408349078085e-05, "loss": 12.0123, "step": 20777 }, { "epoch": 1.1314451410018174, "grad_norm": 0.5508012796026294, "learning_rate": 8.36453839008658e-05, "loss": 11.9524, "step": 20778 }, { "epoch": 1.1314995949984004, "grad_norm": 0.5687890564258543, "learning_rate": 8.3636684438127e-05, "loss": 12.0393, "step": 20779 }, { "epoch": 1.1315540489949834, "grad_norm": 0.5505202177610155, "learning_rate": 8.362798510263205e-05, "loss": 11.9455, "step": 20780 }, { "epoch": 1.1316085029915663, "grad_norm": 0.5414750858393832, "learning_rate": 8.361928589444865e-05, "loss": 11.9601, "step": 20781 }, { "epoch": 1.1316629569881496, "grad_norm": 0.5258501960877228, "learning_rate": 8.361058681364442e-05, "loss": 11.9788, "step": 20782 }, { "epoch": 1.1317174109847326, "grad_norm": 0.5440550629269573, "learning_rate": 8.360188786028707e-05, "loss": 11.9825, "step": 20783 }, { "epoch": 1.1317718649813155, "grad_norm": 0.5351544035011582, "learning_rate": 8.359318903444416e-05, "loss": 12.0982, "step": 20784 }, { "epoch": 1.1318263189778985, "grad_norm": 0.5323293259840147, "learning_rate": 8.358449033618334e-05, "loss": 11.9676, "step": 20785 }, { "epoch": 1.1318807729744815, "grad_norm": 0.5529529749133446, "learning_rate": 8.357579176557228e-05, "loss": 11.8974, "step": 20786 }, { "epoch": 1.1319352269710645, "grad_norm": 0.590287551039442, "learning_rate": 8.356709332267863e-05, "loss": 11.9511, "step": 20787 }, { "epoch": 1.1319896809676475, "grad_norm": 0.5125296807104094, "learning_rate": 8.355839500757002e-05, "loss": 12.0024, "step": 20788 }, { "epoch": 1.1320441349642305, "grad_norm": 0.5804900466222254, "learning_rate": 8.354969682031407e-05, "loss": 12.1384, "step": 20789 }, { "epoch": 1.1320985889608135, "grad_norm": 0.5368063847193392, "learning_rate": 8.354099876097845e-05, "loss": 11.9719, "step": 20790 }, { "epoch": 1.1321530429573965, "grad_norm": 0.5779207309703169, "learning_rate": 8.353230082963078e-05, "loss": 11.9777, "step": 20791 }, { "epoch": 1.1322074969539795, "grad_norm": 0.5739429398866446, "learning_rate": 8.352360302633868e-05, "loss": 12.0905, "step": 20792 }, { "epoch": 1.1322619509505625, "grad_norm": 0.5300896274934617, "learning_rate": 8.351490535116987e-05, "loss": 12.0226, "step": 20793 }, { "epoch": 1.1323164049471455, "grad_norm": 0.571155766038941, "learning_rate": 8.35062078041919e-05, "loss": 12.0976, "step": 20794 }, { "epoch": 1.1323708589437287, "grad_norm": 0.564676725194143, "learning_rate": 8.349751038547239e-05, "loss": 11.9845, "step": 20795 }, { "epoch": 1.1324253129403117, "grad_norm": 0.5337945908674252, "learning_rate": 8.3488813095079e-05, "loss": 11.9669, "step": 20796 }, { "epoch": 1.1324797669368947, "grad_norm": 0.5344172893573053, "learning_rate": 8.348011593307939e-05, "loss": 11.9563, "step": 20797 }, { "epoch": 1.1325342209334777, "grad_norm": 0.5806123347477514, "learning_rate": 8.347141889954117e-05, "loss": 12.061, "step": 20798 }, { "epoch": 1.1325886749300607, "grad_norm": 0.5690743516430177, "learning_rate": 8.346272199453196e-05, "loss": 12.024, "step": 20799 }, { "epoch": 1.1326431289266436, "grad_norm": 0.5225427525492036, "learning_rate": 8.34540252181194e-05, "loss": 11.9932, "step": 20800 }, { "epoch": 1.1326975829232266, "grad_norm": 0.5668045251543957, "learning_rate": 8.344532857037113e-05, "loss": 11.9542, "step": 20801 }, { "epoch": 1.1327520369198096, "grad_norm": 0.591275603933892, "learning_rate": 8.343663205135474e-05, "loss": 12.0476, "step": 20802 }, { "epoch": 1.1328064909163926, "grad_norm": 0.5238304872807422, "learning_rate": 8.342793566113787e-05, "loss": 12.0662, "step": 20803 }, { "epoch": 1.1328609449129756, "grad_norm": 0.5531231281251932, "learning_rate": 8.341923939978821e-05, "loss": 11.9729, "step": 20804 }, { "epoch": 1.1329153989095588, "grad_norm": 0.5525939622220033, "learning_rate": 8.341054326737327e-05, "loss": 11.9551, "step": 20805 }, { "epoch": 1.1329698529061418, "grad_norm": 0.5020694928463673, "learning_rate": 8.340184726396076e-05, "loss": 11.9396, "step": 20806 }, { "epoch": 1.1330243069027248, "grad_norm": 0.4834336628662161, "learning_rate": 8.339315138961821e-05, "loss": 11.9399, "step": 20807 }, { "epoch": 1.1330787608993078, "grad_norm": 0.5434950420977097, "learning_rate": 8.338445564441335e-05, "loss": 12.0291, "step": 20808 }, { "epoch": 1.1331332148958908, "grad_norm": 0.5607777847547193, "learning_rate": 8.337576002841375e-05, "loss": 12.1495, "step": 20809 }, { "epoch": 1.1331876688924738, "grad_norm": 0.5272975127627831, "learning_rate": 8.336706454168701e-05, "loss": 12.1168, "step": 20810 }, { "epoch": 1.1332421228890568, "grad_norm": 0.5269370953237635, "learning_rate": 8.335836918430075e-05, "loss": 12.1105, "step": 20811 }, { "epoch": 1.1332965768856398, "grad_norm": 0.5280620247681213, "learning_rate": 8.334967395632264e-05, "loss": 12.0228, "step": 20812 }, { "epoch": 1.1333510308822228, "grad_norm": 0.5604435021674699, "learning_rate": 8.334097885782024e-05, "loss": 12.0042, "step": 20813 }, { "epoch": 1.1334054848788058, "grad_norm": 0.4949261064367269, "learning_rate": 8.333228388886121e-05, "loss": 11.9785, "step": 20814 }, { "epoch": 1.1334599388753888, "grad_norm": 0.5435580938905246, "learning_rate": 8.33235890495131e-05, "loss": 12.0545, "step": 20815 }, { "epoch": 1.1335143928719718, "grad_norm": 0.5391843622721241, "learning_rate": 8.331489433984357e-05, "loss": 12.1, "step": 20816 }, { "epoch": 1.1335688468685547, "grad_norm": 0.5577771887455933, "learning_rate": 8.330619975992021e-05, "loss": 12.0138, "step": 20817 }, { "epoch": 1.133623300865138, "grad_norm": 0.5249232630778278, "learning_rate": 8.329750530981064e-05, "loss": 11.9539, "step": 20818 }, { "epoch": 1.133677754861721, "grad_norm": 0.5104218897688741, "learning_rate": 8.328881098958246e-05, "loss": 12.0681, "step": 20819 }, { "epoch": 1.133732208858304, "grad_norm": 0.5857135309788942, "learning_rate": 8.32801167993033e-05, "loss": 11.8939, "step": 20820 }, { "epoch": 1.133786662854887, "grad_norm": 0.6147682545125815, "learning_rate": 8.327142273904078e-05, "loss": 12.0024, "step": 20821 }, { "epoch": 1.13384111685147, "grad_norm": 0.5800537403969274, "learning_rate": 8.326272880886245e-05, "loss": 12.0978, "step": 20822 }, { "epoch": 1.133895570848053, "grad_norm": 0.5212552774853718, "learning_rate": 8.325403500883597e-05, "loss": 11.96, "step": 20823 }, { "epoch": 1.133950024844636, "grad_norm": 0.5668164666074186, "learning_rate": 8.324534133902892e-05, "loss": 11.9927, "step": 20824 }, { "epoch": 1.134004478841219, "grad_norm": 0.5557225954644414, "learning_rate": 8.32366477995089e-05, "loss": 12.0414, "step": 20825 }, { "epoch": 1.134058932837802, "grad_norm": 0.536066984690846, "learning_rate": 8.322795439034352e-05, "loss": 12.0549, "step": 20826 }, { "epoch": 1.134113386834385, "grad_norm": 0.5094212987549769, "learning_rate": 8.321926111160038e-05, "loss": 11.9891, "step": 20827 }, { "epoch": 1.134167840830968, "grad_norm": 0.6266210587123217, "learning_rate": 8.321056796334707e-05, "loss": 12.0174, "step": 20828 }, { "epoch": 1.134222294827551, "grad_norm": 0.5920350735747615, "learning_rate": 8.320187494565121e-05, "loss": 12.1067, "step": 20829 }, { "epoch": 1.134276748824134, "grad_norm": 0.5157900799792909, "learning_rate": 8.319318205858036e-05, "loss": 11.9817, "step": 20830 }, { "epoch": 1.134331202820717, "grad_norm": 0.5484848096232319, "learning_rate": 8.318448930220216e-05, "loss": 11.996, "step": 20831 }, { "epoch": 1.1343856568173, "grad_norm": 0.4911315591287539, "learning_rate": 8.31757966765842e-05, "loss": 11.7924, "step": 20832 }, { "epoch": 1.134440110813883, "grad_norm": 0.48866603006549925, "learning_rate": 8.316710418179406e-05, "loss": 11.8883, "step": 20833 }, { "epoch": 1.134494564810466, "grad_norm": 0.5355135372081358, "learning_rate": 8.315841181789937e-05, "loss": 11.9627, "step": 20834 }, { "epoch": 1.134549018807049, "grad_norm": 0.5275092246816174, "learning_rate": 8.314971958496766e-05, "loss": 11.9631, "step": 20835 }, { "epoch": 1.134603472803632, "grad_norm": 0.5295825837600786, "learning_rate": 8.314102748306659e-05, "loss": 11.9989, "step": 20836 }, { "epoch": 1.134657926800215, "grad_norm": 0.5321704154501747, "learning_rate": 8.313233551226369e-05, "loss": 12.0102, "step": 20837 }, { "epoch": 1.134712380796798, "grad_norm": 0.5254003594455219, "learning_rate": 8.312364367262658e-05, "loss": 12.021, "step": 20838 }, { "epoch": 1.134766834793381, "grad_norm": 0.5216226383713083, "learning_rate": 8.311495196422284e-05, "loss": 11.9733, "step": 20839 }, { "epoch": 1.134821288789964, "grad_norm": 0.5459613177918216, "learning_rate": 8.310626038712007e-05, "loss": 12.0244, "step": 20840 }, { "epoch": 1.1348757427865472, "grad_norm": 0.5404943272558749, "learning_rate": 8.309756894138583e-05, "loss": 12.1336, "step": 20841 }, { "epoch": 1.1349301967831302, "grad_norm": 0.48839452700605696, "learning_rate": 8.308887762708776e-05, "loss": 12.013, "step": 20842 }, { "epoch": 1.1349846507797132, "grad_norm": 0.5520083087994961, "learning_rate": 8.30801864442934e-05, "loss": 12.0592, "step": 20843 }, { "epoch": 1.1350391047762962, "grad_norm": 0.5462084428929227, "learning_rate": 8.30714953930704e-05, "loss": 11.9144, "step": 20844 }, { "epoch": 1.1350935587728792, "grad_norm": 0.5471507247040064, "learning_rate": 8.306280447348622e-05, "loss": 11.8395, "step": 20845 }, { "epoch": 1.1351480127694622, "grad_norm": 0.5501453700212303, "learning_rate": 8.305411368560854e-05, "loss": 11.9186, "step": 20846 }, { "epoch": 1.1352024667660452, "grad_norm": 0.5454816436293293, "learning_rate": 8.304542302950491e-05, "loss": 11.9424, "step": 20847 }, { "epoch": 1.1352569207626282, "grad_norm": 0.4800147172461387, "learning_rate": 8.303673250524293e-05, "loss": 11.9729, "step": 20848 }, { "epoch": 1.1353113747592112, "grad_norm": 0.5395301056753729, "learning_rate": 8.302804211289015e-05, "loss": 11.9276, "step": 20849 }, { "epoch": 1.1353658287557942, "grad_norm": 0.545924304880344, "learning_rate": 8.301935185251415e-05, "loss": 11.9468, "step": 20850 }, { "epoch": 1.1354202827523772, "grad_norm": 0.5531911619321243, "learning_rate": 8.301066172418252e-05, "loss": 12.0504, "step": 20851 }, { "epoch": 1.1354747367489604, "grad_norm": 0.5295766318287469, "learning_rate": 8.300197172796283e-05, "loss": 12.0351, "step": 20852 }, { "epoch": 1.1355291907455434, "grad_norm": 0.5322049728341919, "learning_rate": 8.299328186392266e-05, "loss": 11.9966, "step": 20853 }, { "epoch": 1.1355836447421264, "grad_norm": 0.5081287053833177, "learning_rate": 8.298459213212964e-05, "loss": 11.9673, "step": 20854 }, { "epoch": 1.1356380987387094, "grad_norm": 0.6124065673217175, "learning_rate": 8.297590253265125e-05, "loss": 11.983, "step": 20855 }, { "epoch": 1.1356925527352923, "grad_norm": 0.53898433722122, "learning_rate": 8.296721306555505e-05, "loss": 11.9183, "step": 20856 }, { "epoch": 1.1357470067318753, "grad_norm": 0.5382650722310977, "learning_rate": 8.295852373090869e-05, "loss": 12.0799, "step": 20857 }, { "epoch": 1.1358014607284583, "grad_norm": 0.561854909778086, "learning_rate": 8.294983452877971e-05, "loss": 11.9602, "step": 20858 }, { "epoch": 1.1358559147250413, "grad_norm": 0.6108217648046498, "learning_rate": 8.294114545923567e-05, "loss": 11.9697, "step": 20859 }, { "epoch": 1.1359103687216243, "grad_norm": 0.5682286780819156, "learning_rate": 8.293245652234415e-05, "loss": 12.0881, "step": 20860 }, { "epoch": 1.1359648227182073, "grad_norm": 0.6408087879625746, "learning_rate": 8.29237677181727e-05, "loss": 12.0791, "step": 20861 }, { "epoch": 1.1360192767147903, "grad_norm": 0.5605205521100582, "learning_rate": 8.291507904678892e-05, "loss": 12.1151, "step": 20862 }, { "epoch": 1.1360737307113733, "grad_norm": 0.5605707280569578, "learning_rate": 8.29063905082603e-05, "loss": 12.0089, "step": 20863 }, { "epoch": 1.1361281847079563, "grad_norm": 0.5012292664038457, "learning_rate": 8.289770210265453e-05, "loss": 11.9938, "step": 20864 }, { "epoch": 1.1361826387045395, "grad_norm": 0.5684573322543377, "learning_rate": 8.288901383003907e-05, "loss": 12.0373, "step": 20865 }, { "epoch": 1.1362370927011225, "grad_norm": 0.6351742069265385, "learning_rate": 8.288032569048148e-05, "loss": 12.0249, "step": 20866 }, { "epoch": 1.1362915466977055, "grad_norm": 0.5222002852865439, "learning_rate": 8.287163768404934e-05, "loss": 12.0471, "step": 20867 }, { "epoch": 1.1363460006942885, "grad_norm": 0.5203810318401516, "learning_rate": 8.286294981081024e-05, "loss": 11.9294, "step": 20868 }, { "epoch": 1.1364004546908715, "grad_norm": 0.57972478770493, "learning_rate": 8.285426207083171e-05, "loss": 12.0424, "step": 20869 }, { "epoch": 1.1364549086874545, "grad_norm": 0.5544463610879068, "learning_rate": 8.284557446418133e-05, "loss": 12.0597, "step": 20870 }, { "epoch": 1.1365093626840375, "grad_norm": 0.5035943721139681, "learning_rate": 8.283688699092662e-05, "loss": 11.9878, "step": 20871 }, { "epoch": 1.1365638166806205, "grad_norm": 0.5344768186437175, "learning_rate": 8.282819965113516e-05, "loss": 12.0697, "step": 20872 }, { "epoch": 1.1366182706772034, "grad_norm": 0.5616838941972153, "learning_rate": 8.281951244487452e-05, "loss": 11.989, "step": 20873 }, { "epoch": 1.1366727246737864, "grad_norm": 0.5391340768095988, "learning_rate": 8.281082537221223e-05, "loss": 11.9472, "step": 20874 }, { "epoch": 1.1367271786703697, "grad_norm": 0.6130211524475987, "learning_rate": 8.280213843321583e-05, "loss": 12.1204, "step": 20875 }, { "epoch": 1.1367816326669526, "grad_norm": 0.522993663207442, "learning_rate": 8.279345162795288e-05, "loss": 12.0767, "step": 20876 }, { "epoch": 1.1368360866635356, "grad_norm": 0.5704096775661565, "learning_rate": 8.278476495649094e-05, "loss": 12.0927, "step": 20877 }, { "epoch": 1.1368905406601186, "grad_norm": 0.5182424211696456, "learning_rate": 8.277607841889754e-05, "loss": 11.9097, "step": 20878 }, { "epoch": 1.1369449946567016, "grad_norm": 0.5664499558712293, "learning_rate": 8.276739201524026e-05, "loss": 11.9551, "step": 20879 }, { "epoch": 1.1369994486532846, "grad_norm": 0.5124961895977534, "learning_rate": 8.275870574558661e-05, "loss": 11.9613, "step": 20880 }, { "epoch": 1.1370539026498676, "grad_norm": 0.49355180719984976, "learning_rate": 8.275001961000418e-05, "loss": 12.0667, "step": 20881 }, { "epoch": 1.1371083566464506, "grad_norm": 0.5412642353635919, "learning_rate": 8.27413336085605e-05, "loss": 12.0697, "step": 20882 }, { "epoch": 1.1371628106430336, "grad_norm": 0.5478756978199564, "learning_rate": 8.273264774132308e-05, "loss": 11.9211, "step": 20883 }, { "epoch": 1.1372172646396166, "grad_norm": 0.5521736725057819, "learning_rate": 8.272396200835952e-05, "loss": 12.0213, "step": 20884 }, { "epoch": 1.1372717186361996, "grad_norm": 0.5305067708369225, "learning_rate": 8.27152764097373e-05, "loss": 11.9613, "step": 20885 }, { "epoch": 1.1373261726327826, "grad_norm": 0.5280367029753679, "learning_rate": 8.270659094552399e-05, "loss": 12.0564, "step": 20886 }, { "epoch": 1.1373806266293656, "grad_norm": 0.523282340775394, "learning_rate": 8.269790561578713e-05, "loss": 11.9566, "step": 20887 }, { "epoch": 1.1374350806259488, "grad_norm": 0.5073888625187748, "learning_rate": 8.268922042059426e-05, "loss": 11.9844, "step": 20888 }, { "epoch": 1.1374895346225318, "grad_norm": 0.5630654298574181, "learning_rate": 8.268053536001291e-05, "loss": 11.9883, "step": 20889 }, { "epoch": 1.1375439886191148, "grad_norm": 0.49276678646697886, "learning_rate": 8.26718504341106e-05, "loss": 11.952, "step": 20890 }, { "epoch": 1.1375984426156978, "grad_norm": 0.6022868989355634, "learning_rate": 8.266316564295492e-05, "loss": 12.0448, "step": 20891 }, { "epoch": 1.1376528966122808, "grad_norm": 0.4980652688408364, "learning_rate": 8.265448098661337e-05, "loss": 12.0342, "step": 20892 }, { "epoch": 1.1377073506088637, "grad_norm": 0.4817094395889994, "learning_rate": 8.264579646515347e-05, "loss": 11.9591, "step": 20893 }, { "epoch": 1.1377618046054467, "grad_norm": 0.5647523062277882, "learning_rate": 8.26371120786428e-05, "loss": 12.1397, "step": 20894 }, { "epoch": 1.1378162586020297, "grad_norm": 0.5375555758113099, "learning_rate": 8.262842782714884e-05, "loss": 11.9929, "step": 20895 }, { "epoch": 1.1378707125986127, "grad_norm": 0.49082208075832834, "learning_rate": 8.261974371073913e-05, "loss": 11.9572, "step": 20896 }, { "epoch": 1.1379251665951957, "grad_norm": 0.5972417861121767, "learning_rate": 8.26110597294812e-05, "loss": 12.2294, "step": 20897 }, { "epoch": 1.137979620591779, "grad_norm": 0.5170383175147765, "learning_rate": 8.26023758834426e-05, "loss": 12.0637, "step": 20898 }, { "epoch": 1.138034074588362, "grad_norm": 0.5175739439103323, "learning_rate": 8.259369217269084e-05, "loss": 11.8461, "step": 20899 }, { "epoch": 1.138088528584945, "grad_norm": 0.6240727278872582, "learning_rate": 8.258500859729345e-05, "loss": 12.1108, "step": 20900 }, { "epoch": 1.138142982581528, "grad_norm": 0.5173425148012402, "learning_rate": 8.257632515731793e-05, "loss": 12.1129, "step": 20901 }, { "epoch": 1.138197436578111, "grad_norm": 0.5384766030862483, "learning_rate": 8.256764185283184e-05, "loss": 12.031, "step": 20902 }, { "epoch": 1.138251890574694, "grad_norm": 0.4990048614018597, "learning_rate": 8.25589586839027e-05, "loss": 11.9201, "step": 20903 }, { "epoch": 1.1383063445712769, "grad_norm": 0.5347219549756849, "learning_rate": 8.255027565059806e-05, "loss": 12.0611, "step": 20904 }, { "epoch": 1.1383607985678599, "grad_norm": 0.523376663435599, "learning_rate": 8.254159275298533e-05, "loss": 11.9876, "step": 20905 }, { "epoch": 1.1384152525644429, "grad_norm": 0.49233654599900334, "learning_rate": 8.253290999113215e-05, "loss": 12.0099, "step": 20906 }, { "epoch": 1.1384697065610259, "grad_norm": 0.5117814584178179, "learning_rate": 8.252422736510597e-05, "loss": 12.0075, "step": 20907 }, { "epoch": 1.1385241605576089, "grad_norm": 0.6113436907536076, "learning_rate": 8.251554487497436e-05, "loss": 12.0207, "step": 20908 }, { "epoch": 1.1385786145541918, "grad_norm": 0.5237525973981209, "learning_rate": 8.250686252080478e-05, "loss": 12.0911, "step": 20909 }, { "epoch": 1.1386330685507748, "grad_norm": 0.5453306120977369, "learning_rate": 8.249818030266476e-05, "loss": 11.9071, "step": 20910 }, { "epoch": 1.138687522547358, "grad_norm": 0.5957861253600286, "learning_rate": 8.248949822062182e-05, "loss": 12.0551, "step": 20911 }, { "epoch": 1.138741976543941, "grad_norm": 0.5245725217612074, "learning_rate": 8.248081627474349e-05, "loss": 12.0185, "step": 20912 }, { "epoch": 1.138796430540524, "grad_norm": 0.5930235919132725, "learning_rate": 8.247213446509728e-05, "loss": 11.9996, "step": 20913 }, { "epoch": 1.138850884537107, "grad_norm": 0.5450954577191609, "learning_rate": 8.246345279175073e-05, "loss": 11.9504, "step": 20914 }, { "epoch": 1.13890533853369, "grad_norm": 0.5530840617194773, "learning_rate": 8.245477125477125e-05, "loss": 12.01, "step": 20915 }, { "epoch": 1.138959792530273, "grad_norm": 0.5633213277208992, "learning_rate": 8.244608985422641e-05, "loss": 12.0457, "step": 20916 }, { "epoch": 1.139014246526856, "grad_norm": 0.6085785006143246, "learning_rate": 8.243740859018375e-05, "loss": 12.1239, "step": 20917 }, { "epoch": 1.139068700523439, "grad_norm": 0.5330646862283069, "learning_rate": 8.242872746271073e-05, "loss": 11.9485, "step": 20918 }, { "epoch": 1.139123154520022, "grad_norm": 0.5364267288916039, "learning_rate": 8.242004647187489e-05, "loss": 12.0183, "step": 20919 }, { "epoch": 1.139177608516605, "grad_norm": 0.5496380663520917, "learning_rate": 8.24113656177437e-05, "loss": 12.0552, "step": 20920 }, { "epoch": 1.139232062513188, "grad_norm": 0.5371979806699659, "learning_rate": 8.240268490038469e-05, "loss": 12.0438, "step": 20921 }, { "epoch": 1.1392865165097712, "grad_norm": 0.5498816405632554, "learning_rate": 8.239400431986535e-05, "loss": 11.9895, "step": 20922 }, { "epoch": 1.1393409705063542, "grad_norm": 0.5625453952844264, "learning_rate": 8.238532387625315e-05, "loss": 11.9807, "step": 20923 }, { "epoch": 1.1393954245029372, "grad_norm": 0.5611986634358086, "learning_rate": 8.23766435696157e-05, "loss": 12.0085, "step": 20924 }, { "epoch": 1.1394498784995202, "grad_norm": 0.5377483954179323, "learning_rate": 8.236796340002038e-05, "loss": 12.0947, "step": 20925 }, { "epoch": 1.1395043324961032, "grad_norm": 0.5262368616779615, "learning_rate": 8.235928336753475e-05, "loss": 11.9465, "step": 20926 }, { "epoch": 1.1395587864926862, "grad_norm": 0.5223377240065364, "learning_rate": 8.235060347222625e-05, "loss": 12.0398, "step": 20927 }, { "epoch": 1.1396132404892692, "grad_norm": 0.5642763894207888, "learning_rate": 8.234192371416245e-05, "loss": 11.8982, "step": 20928 }, { "epoch": 1.1396676944858521, "grad_norm": 0.5019674705192412, "learning_rate": 8.233324409341081e-05, "loss": 12.0971, "step": 20929 }, { "epoch": 1.1397221484824351, "grad_norm": 0.5285775151462369, "learning_rate": 8.232456461003882e-05, "loss": 11.9557, "step": 20930 }, { "epoch": 1.1397766024790181, "grad_norm": 0.5729624412586914, "learning_rate": 8.231588526411398e-05, "loss": 12.1298, "step": 20931 }, { "epoch": 1.1398310564756011, "grad_norm": 0.5606389022362214, "learning_rate": 8.230720605570379e-05, "loss": 11.9898, "step": 20932 }, { "epoch": 1.1398855104721841, "grad_norm": 0.5903555006233688, "learning_rate": 8.229852698487572e-05, "loss": 11.9642, "step": 20933 }, { "epoch": 1.139939964468767, "grad_norm": 0.6299819004174931, "learning_rate": 8.228984805169732e-05, "loss": 12.1306, "step": 20934 }, { "epoch": 1.1399944184653503, "grad_norm": 0.5577541227970585, "learning_rate": 8.228116925623599e-05, "loss": 12.029, "step": 20935 }, { "epoch": 1.1400488724619333, "grad_norm": 0.5175393691618557, "learning_rate": 8.227249059855926e-05, "loss": 12.1277, "step": 20936 }, { "epoch": 1.1401033264585163, "grad_norm": 0.5606078093504049, "learning_rate": 8.226381207873462e-05, "loss": 12.153, "step": 20937 }, { "epoch": 1.1401577804550993, "grad_norm": 0.5168465111345226, "learning_rate": 8.225513369682954e-05, "loss": 11.9966, "step": 20938 }, { "epoch": 1.1402122344516823, "grad_norm": 0.6631339416457368, "learning_rate": 8.224645545291151e-05, "loss": 12.0144, "step": 20939 }, { "epoch": 1.1402666884482653, "grad_norm": 0.6520044289618595, "learning_rate": 8.223777734704804e-05, "loss": 12.0726, "step": 20940 }, { "epoch": 1.1403211424448483, "grad_norm": 0.5234933027330751, "learning_rate": 8.222909937930658e-05, "loss": 11.9941, "step": 20941 }, { "epoch": 1.1403755964414313, "grad_norm": 0.5772608521354804, "learning_rate": 8.222042154975464e-05, "loss": 12.0001, "step": 20942 }, { "epoch": 1.1404300504380143, "grad_norm": 0.5533461011161972, "learning_rate": 8.221174385845967e-05, "loss": 11.9857, "step": 20943 }, { "epoch": 1.1404845044345973, "grad_norm": 0.5251306865625073, "learning_rate": 8.220306630548917e-05, "loss": 11.9284, "step": 20944 }, { "epoch": 1.1405389584311805, "grad_norm": 0.5049067693066229, "learning_rate": 8.219438889091062e-05, "loss": 11.9974, "step": 20945 }, { "epoch": 1.1405934124277635, "grad_norm": 0.562717881292853, "learning_rate": 8.218571161479148e-05, "loss": 12.0331, "step": 20946 }, { "epoch": 1.1406478664243465, "grad_norm": 0.5125144286918268, "learning_rate": 8.217703447719924e-05, "loss": 11.9266, "step": 20947 }, { "epoch": 1.1407023204209294, "grad_norm": 0.5769333365392472, "learning_rate": 8.216835747820135e-05, "loss": 12.0408, "step": 20948 }, { "epoch": 1.1407567744175124, "grad_norm": 0.5479203582995349, "learning_rate": 8.215968061786531e-05, "loss": 12.1124, "step": 20949 }, { "epoch": 1.1408112284140954, "grad_norm": 0.5596161916792809, "learning_rate": 8.215100389625857e-05, "loss": 12.0871, "step": 20950 }, { "epoch": 1.1408656824106784, "grad_norm": 0.5305381593496411, "learning_rate": 8.214232731344864e-05, "loss": 12.0144, "step": 20951 }, { "epoch": 1.1409201364072614, "grad_norm": 0.5416221276249467, "learning_rate": 8.213365086950296e-05, "loss": 12.0667, "step": 20952 }, { "epoch": 1.1409745904038444, "grad_norm": 0.5333267437537554, "learning_rate": 8.2124974564489e-05, "loss": 11.9662, "step": 20953 }, { "epoch": 1.1410290444004274, "grad_norm": 0.5099745896153601, "learning_rate": 8.211629839847426e-05, "loss": 11.9656, "step": 20954 }, { "epoch": 1.1410834983970104, "grad_norm": 0.5423766204134551, "learning_rate": 8.210762237152619e-05, "loss": 11.9546, "step": 20955 }, { "epoch": 1.1411379523935934, "grad_norm": 0.5532803593890093, "learning_rate": 8.209894648371222e-05, "loss": 12.0223, "step": 20956 }, { "epoch": 1.1411924063901764, "grad_norm": 0.5835951375321685, "learning_rate": 8.209027073509985e-05, "loss": 12.0279, "step": 20957 }, { "epoch": 1.1412468603867596, "grad_norm": 0.5905217931965142, "learning_rate": 8.208159512575654e-05, "loss": 12.1469, "step": 20958 }, { "epoch": 1.1413013143833426, "grad_norm": 0.5422334570641386, "learning_rate": 8.207291965574974e-05, "loss": 12.1288, "step": 20959 }, { "epoch": 1.1413557683799256, "grad_norm": 0.696890680767564, "learning_rate": 8.206424432514694e-05, "loss": 12.0775, "step": 20960 }, { "epoch": 1.1414102223765086, "grad_norm": 0.5585119847024153, "learning_rate": 8.205556913401555e-05, "loss": 11.99, "step": 20961 }, { "epoch": 1.1414646763730916, "grad_norm": 0.549365936991758, "learning_rate": 8.20468940824231e-05, "loss": 12.0516, "step": 20962 }, { "epoch": 1.1415191303696746, "grad_norm": 0.607436255760605, "learning_rate": 8.2038219170437e-05, "loss": 12.0653, "step": 20963 }, { "epoch": 1.1415735843662576, "grad_norm": 0.5655225055773133, "learning_rate": 8.202954439812472e-05, "loss": 12.1366, "step": 20964 }, { "epoch": 1.1416280383628405, "grad_norm": 0.5301801390339581, "learning_rate": 8.202086976555375e-05, "loss": 12.0433, "step": 20965 }, { "epoch": 1.1416824923594235, "grad_norm": 0.4744429712595387, "learning_rate": 8.201219527279147e-05, "loss": 11.8752, "step": 20966 }, { "epoch": 1.1417369463560065, "grad_norm": 0.507296115025727, "learning_rate": 8.200352091990539e-05, "loss": 11.9695, "step": 20967 }, { "epoch": 1.1417914003525897, "grad_norm": 0.5290589105091998, "learning_rate": 8.199484670696295e-05, "loss": 12.045, "step": 20968 }, { "epoch": 1.1418458543491727, "grad_norm": 0.7096101281605665, "learning_rate": 8.19861726340316e-05, "loss": 12.1148, "step": 20969 }, { "epoch": 1.1419003083457557, "grad_norm": 0.49424842042945244, "learning_rate": 8.197749870117879e-05, "loss": 12.0347, "step": 20970 }, { "epoch": 1.1419547623423387, "grad_norm": 0.5749252311480237, "learning_rate": 8.196882490847197e-05, "loss": 12.0663, "step": 20971 }, { "epoch": 1.1420092163389217, "grad_norm": 0.5670819687811587, "learning_rate": 8.196015125597858e-05, "loss": 12.0073, "step": 20972 }, { "epoch": 1.1420636703355047, "grad_norm": 0.555447687916874, "learning_rate": 8.195147774376609e-05, "loss": 12.0688, "step": 20973 }, { "epoch": 1.1421181243320877, "grad_norm": 0.5414378867694744, "learning_rate": 8.194280437190194e-05, "loss": 12.0326, "step": 20974 }, { "epoch": 1.1421725783286707, "grad_norm": 0.5773807343132444, "learning_rate": 8.19341311404536e-05, "loss": 12.0903, "step": 20975 }, { "epoch": 1.1422270323252537, "grad_norm": 0.5020329669878001, "learning_rate": 8.192545804948845e-05, "loss": 11.9684, "step": 20976 }, { "epoch": 1.1422814863218367, "grad_norm": 0.48234046727032104, "learning_rate": 8.191678509907396e-05, "loss": 11.9321, "step": 20977 }, { "epoch": 1.1423359403184197, "grad_norm": 0.567535834836397, "learning_rate": 8.190811228927761e-05, "loss": 12.0578, "step": 20978 }, { "epoch": 1.1423903943150027, "grad_norm": 0.5327119061845109, "learning_rate": 8.189943962016679e-05, "loss": 11.9767, "step": 20979 }, { "epoch": 1.1424448483115857, "grad_norm": 0.5632906184815956, "learning_rate": 8.189076709180898e-05, "loss": 12.0716, "step": 20980 }, { "epoch": 1.1424993023081689, "grad_norm": 0.593895236471705, "learning_rate": 8.188209470427159e-05, "loss": 12.1147, "step": 20981 }, { "epoch": 1.1425537563047519, "grad_norm": 0.520442931732901, "learning_rate": 8.187342245762209e-05, "loss": 11.8542, "step": 20982 }, { "epoch": 1.1426082103013349, "grad_norm": 0.5405669907945672, "learning_rate": 8.186475035192788e-05, "loss": 12.0936, "step": 20983 }, { "epoch": 1.1426626642979179, "grad_norm": 0.5492648899230916, "learning_rate": 8.185607838725639e-05, "loss": 11.9907, "step": 20984 }, { "epoch": 1.1427171182945008, "grad_norm": 0.5803230662974547, "learning_rate": 8.184740656367515e-05, "loss": 12.0568, "step": 20985 }, { "epoch": 1.1427715722910838, "grad_norm": 0.46560360122414524, "learning_rate": 8.183873488125147e-05, "loss": 11.9337, "step": 20986 }, { "epoch": 1.1428260262876668, "grad_norm": 0.49780376200500465, "learning_rate": 8.183006334005283e-05, "loss": 12.0872, "step": 20987 }, { "epoch": 1.1428804802842498, "grad_norm": 0.6016666680327781, "learning_rate": 8.182139194014665e-05, "loss": 12.0181, "step": 20988 }, { "epoch": 1.1429349342808328, "grad_norm": 0.6388919810710112, "learning_rate": 8.18127206816004e-05, "loss": 12.1541, "step": 20989 }, { "epoch": 1.1429893882774158, "grad_norm": 0.5375566325314137, "learning_rate": 8.180404956448147e-05, "loss": 12.0993, "step": 20990 }, { "epoch": 1.1430438422739988, "grad_norm": 0.5732752135944795, "learning_rate": 8.179537858885731e-05, "loss": 12.0784, "step": 20991 }, { "epoch": 1.143098296270582, "grad_norm": 0.5518683111626997, "learning_rate": 8.178670775479534e-05, "loss": 12.0159, "step": 20992 }, { "epoch": 1.143152750267165, "grad_norm": 0.5556360373222765, "learning_rate": 8.177803706236299e-05, "loss": 11.9894, "step": 20993 }, { "epoch": 1.143207204263748, "grad_norm": 0.5298063978603275, "learning_rate": 8.176936651162767e-05, "loss": 11.9655, "step": 20994 }, { "epoch": 1.143261658260331, "grad_norm": 0.5793338571832891, "learning_rate": 8.176069610265684e-05, "loss": 12.0498, "step": 20995 }, { "epoch": 1.143316112256914, "grad_norm": 0.6264256406282895, "learning_rate": 8.175202583551787e-05, "loss": 11.9412, "step": 20996 }, { "epoch": 1.143370566253497, "grad_norm": 0.5361244133679585, "learning_rate": 8.174335571027823e-05, "loss": 12.0836, "step": 20997 }, { "epoch": 1.14342502025008, "grad_norm": 0.5556546496980178, "learning_rate": 8.173468572700529e-05, "loss": 11.8692, "step": 20998 }, { "epoch": 1.143479474246663, "grad_norm": 0.5293104875373195, "learning_rate": 8.172601588576648e-05, "loss": 11.9413, "step": 20999 }, { "epoch": 1.143533928243246, "grad_norm": 0.5472832595633416, "learning_rate": 8.171734618662927e-05, "loss": 12.0546, "step": 21000 }, { "epoch": 1.143588382239829, "grad_norm": 0.5230902969254605, "learning_rate": 8.170867662966102e-05, "loss": 12.0118, "step": 21001 }, { "epoch": 1.143642836236412, "grad_norm": 0.5418456221500304, "learning_rate": 8.170000721492918e-05, "loss": 11.9179, "step": 21002 }, { "epoch": 1.143697290232995, "grad_norm": 0.5203268480355566, "learning_rate": 8.169133794250116e-05, "loss": 12.0247, "step": 21003 }, { "epoch": 1.143751744229578, "grad_norm": 0.5229551837980176, "learning_rate": 8.168266881244436e-05, "loss": 11.903, "step": 21004 }, { "epoch": 1.1438061982261611, "grad_norm": 0.5344584978853519, "learning_rate": 8.167399982482622e-05, "loss": 11.9609, "step": 21005 }, { "epoch": 1.1438606522227441, "grad_norm": 0.5228388886315154, "learning_rate": 8.166533097971412e-05, "loss": 12.05, "step": 21006 }, { "epoch": 1.1439151062193271, "grad_norm": 0.5711820104447795, "learning_rate": 8.165666227717546e-05, "loss": 11.9169, "step": 21007 }, { "epoch": 1.1439695602159101, "grad_norm": 0.5198358688515019, "learning_rate": 8.164799371727768e-05, "loss": 11.9617, "step": 21008 }, { "epoch": 1.1440240142124931, "grad_norm": 0.5458365819539345, "learning_rate": 8.163932530008817e-05, "loss": 12.09, "step": 21009 }, { "epoch": 1.144078468209076, "grad_norm": 0.5959144261997894, "learning_rate": 8.163065702567433e-05, "loss": 12.0294, "step": 21010 }, { "epoch": 1.144132922205659, "grad_norm": 0.5189776173020441, "learning_rate": 8.162198889410362e-05, "loss": 12.0019, "step": 21011 }, { "epoch": 1.144187376202242, "grad_norm": 0.5838636030572146, "learning_rate": 8.161332090544339e-05, "loss": 11.9223, "step": 21012 }, { "epoch": 1.144241830198825, "grad_norm": 0.553351221664354, "learning_rate": 8.160465305976107e-05, "loss": 12.0005, "step": 21013 }, { "epoch": 1.144296284195408, "grad_norm": 0.5587968080469131, "learning_rate": 8.159598535712405e-05, "loss": 11.8973, "step": 21014 }, { "epoch": 1.1443507381919913, "grad_norm": 0.6187991735080954, "learning_rate": 8.158731779759975e-05, "loss": 12.2249, "step": 21015 }, { "epoch": 1.1444051921885743, "grad_norm": 0.5533867559389424, "learning_rate": 8.157865038125552e-05, "loss": 12.02, "step": 21016 }, { "epoch": 1.1444596461851573, "grad_norm": 0.6128098583652383, "learning_rate": 8.156998310815882e-05, "loss": 12.2211, "step": 21017 }, { "epoch": 1.1445141001817403, "grad_norm": 0.5387386573489695, "learning_rate": 8.156131597837701e-05, "loss": 12.0796, "step": 21018 }, { "epoch": 1.1445685541783233, "grad_norm": 0.5328584040496055, "learning_rate": 8.15526489919775e-05, "loss": 11.837, "step": 21019 }, { "epoch": 1.1446230081749063, "grad_norm": 0.5710270942995136, "learning_rate": 8.154398214902769e-05, "loss": 12.1224, "step": 21020 }, { "epoch": 1.1446774621714892, "grad_norm": 0.5795974385270815, "learning_rate": 8.153531544959494e-05, "loss": 12.0274, "step": 21021 }, { "epoch": 1.1447319161680722, "grad_norm": 0.5319926295155886, "learning_rate": 8.15266488937467e-05, "loss": 11.9125, "step": 21022 }, { "epoch": 1.1447863701646552, "grad_norm": 0.5604516086391089, "learning_rate": 8.151798248155032e-05, "loss": 11.8868, "step": 21023 }, { "epoch": 1.1448408241612382, "grad_norm": 0.5489005774116194, "learning_rate": 8.150931621307323e-05, "loss": 11.9741, "step": 21024 }, { "epoch": 1.1448952781578212, "grad_norm": 0.5389533652488021, "learning_rate": 8.150065008838281e-05, "loss": 12.0303, "step": 21025 }, { "epoch": 1.1449497321544042, "grad_norm": 0.5405954217513335, "learning_rate": 8.149198410754641e-05, "loss": 11.9794, "step": 21026 }, { "epoch": 1.1450041861509872, "grad_norm": 0.5810190019682795, "learning_rate": 8.148331827063147e-05, "loss": 11.935, "step": 21027 }, { "epoch": 1.1450586401475704, "grad_norm": 0.7319128147122567, "learning_rate": 8.147465257770532e-05, "loss": 12.0482, "step": 21028 }, { "epoch": 1.1451130941441534, "grad_norm": 0.5822036562162385, "learning_rate": 8.14659870288354e-05, "loss": 12.0907, "step": 21029 }, { "epoch": 1.1451675481407364, "grad_norm": 0.5532352398438115, "learning_rate": 8.145732162408907e-05, "loss": 11.9685, "step": 21030 }, { "epoch": 1.1452220021373194, "grad_norm": 0.5712018466987027, "learning_rate": 8.144865636353371e-05, "loss": 12.1183, "step": 21031 }, { "epoch": 1.1452764561339024, "grad_norm": 0.54110084551623, "learning_rate": 8.14399912472367e-05, "loss": 12.0274, "step": 21032 }, { "epoch": 1.1453309101304854, "grad_norm": 0.5363305858709243, "learning_rate": 8.143132627526545e-05, "loss": 12.0609, "step": 21033 }, { "epoch": 1.1453853641270684, "grad_norm": 0.5843397847758113, "learning_rate": 8.142266144768729e-05, "loss": 12.0073, "step": 21034 }, { "epoch": 1.1454398181236514, "grad_norm": 0.5350188464651414, "learning_rate": 8.141399676456972e-05, "loss": 12.0521, "step": 21035 }, { "epoch": 1.1454942721202344, "grad_norm": 0.5420875012653492, "learning_rate": 8.140533222597995e-05, "loss": 12.0326, "step": 21036 }, { "epoch": 1.1455487261168174, "grad_norm": 0.5653638168090159, "learning_rate": 8.139666783198542e-05, "loss": 11.9954, "step": 21037 }, { "epoch": 1.1456031801134006, "grad_norm": 0.5723196320718857, "learning_rate": 8.138800358265354e-05, "loss": 12.0604, "step": 21038 }, { "epoch": 1.1456576341099836, "grad_norm": 0.5773998459245281, "learning_rate": 8.137933947805169e-05, "loss": 12.0177, "step": 21039 }, { "epoch": 1.1457120881065666, "grad_norm": 0.51146925626736, "learning_rate": 8.13706755182472e-05, "loss": 12.0252, "step": 21040 }, { "epoch": 1.1457665421031495, "grad_norm": 0.6265872521892802, "learning_rate": 8.136201170330746e-05, "loss": 12.0123, "step": 21041 }, { "epoch": 1.1458209960997325, "grad_norm": 0.4907310093920785, "learning_rate": 8.135334803329983e-05, "loss": 11.9729, "step": 21042 }, { "epoch": 1.1458754500963155, "grad_norm": 0.5463913693585858, "learning_rate": 8.134468450829172e-05, "loss": 11.9928, "step": 21043 }, { "epoch": 1.1459299040928985, "grad_norm": 0.5455569596044371, "learning_rate": 8.133602112835043e-05, "loss": 11.9847, "step": 21044 }, { "epoch": 1.1459843580894815, "grad_norm": 0.5316381424131947, "learning_rate": 8.132735789354346e-05, "loss": 11.9869, "step": 21045 }, { "epoch": 1.1460388120860645, "grad_norm": 0.5544940775590294, "learning_rate": 8.131869480393803e-05, "loss": 12.0745, "step": 21046 }, { "epoch": 1.1460932660826475, "grad_norm": 0.6004156000299287, "learning_rate": 8.131003185960154e-05, "loss": 12.0626, "step": 21047 }, { "epoch": 1.1461477200792305, "grad_norm": 0.5218071904391417, "learning_rate": 8.130136906060137e-05, "loss": 12.0363, "step": 21048 }, { "epoch": 1.1462021740758135, "grad_norm": 0.5389146853602708, "learning_rate": 8.129270640700492e-05, "loss": 12.006, "step": 21049 }, { "epoch": 1.1462566280723965, "grad_norm": 0.6203417650257257, "learning_rate": 8.128404389887953e-05, "loss": 12.0354, "step": 21050 }, { "epoch": 1.1463110820689797, "grad_norm": 0.5902856739990013, "learning_rate": 8.127538153629253e-05, "loss": 12.0125, "step": 21051 }, { "epoch": 1.1463655360655627, "grad_norm": 0.5306534686815397, "learning_rate": 8.126671931931131e-05, "loss": 12.1156, "step": 21052 }, { "epoch": 1.1464199900621457, "grad_norm": 0.5373704830394005, "learning_rate": 8.125805724800323e-05, "loss": 11.9522, "step": 21053 }, { "epoch": 1.1464744440587287, "grad_norm": 0.5377695949027106, "learning_rate": 8.124939532243564e-05, "loss": 11.9527, "step": 21054 }, { "epoch": 1.1465288980553117, "grad_norm": 0.5911281005314784, "learning_rate": 8.12407335426759e-05, "loss": 12.1034, "step": 21055 }, { "epoch": 1.1465833520518947, "grad_norm": 0.4977620166418643, "learning_rate": 8.123207190879136e-05, "loss": 11.9416, "step": 21056 }, { "epoch": 1.1466378060484776, "grad_norm": 0.5116839969984828, "learning_rate": 8.122341042084938e-05, "loss": 12.0125, "step": 21057 }, { "epoch": 1.1466922600450606, "grad_norm": 0.537829407364425, "learning_rate": 8.12147490789173e-05, "loss": 12.0088, "step": 21058 }, { "epoch": 1.1467467140416436, "grad_norm": 0.5566024102714832, "learning_rate": 8.120608788306245e-05, "loss": 11.992, "step": 21059 }, { "epoch": 1.1468011680382266, "grad_norm": 0.5172239653070049, "learning_rate": 8.119742683335225e-05, "loss": 12.038, "step": 21060 }, { "epoch": 1.1468556220348098, "grad_norm": 0.5081733170513308, "learning_rate": 8.1188765929854e-05, "loss": 11.9034, "step": 21061 }, { "epoch": 1.1469100760313928, "grad_norm": 0.6079477794406521, "learning_rate": 8.118010517263506e-05, "loss": 11.9627, "step": 21062 }, { "epoch": 1.1469645300279758, "grad_norm": 0.5826173534014737, "learning_rate": 8.11714445617628e-05, "loss": 12.0594, "step": 21063 }, { "epoch": 1.1470189840245588, "grad_norm": 0.5632061995460318, "learning_rate": 8.116278409730452e-05, "loss": 11.9167, "step": 21064 }, { "epoch": 1.1470734380211418, "grad_norm": 0.5485729104136255, "learning_rate": 8.115412377932762e-05, "loss": 12.1227, "step": 21065 }, { "epoch": 1.1471278920177248, "grad_norm": 0.5528861036138745, "learning_rate": 8.11454636078994e-05, "loss": 12.0376, "step": 21066 }, { "epoch": 1.1471823460143078, "grad_norm": 0.5574868688652987, "learning_rate": 8.11368035830872e-05, "loss": 11.8677, "step": 21067 }, { "epoch": 1.1472368000108908, "grad_norm": 0.5818010753479786, "learning_rate": 8.112814370495839e-05, "loss": 12.0715, "step": 21068 }, { "epoch": 1.1472912540074738, "grad_norm": 0.6798929789492493, "learning_rate": 8.11194839735803e-05, "loss": 12.1723, "step": 21069 }, { "epoch": 1.1473457080040568, "grad_norm": 0.5422378399543559, "learning_rate": 8.111082438902025e-05, "loss": 12.065, "step": 21070 }, { "epoch": 1.1474001620006398, "grad_norm": 0.5122511501381658, "learning_rate": 8.110216495134562e-05, "loss": 11.9232, "step": 21071 }, { "epoch": 1.1474546159972228, "grad_norm": 0.5453350705386038, "learning_rate": 8.10935056606237e-05, "loss": 12.0262, "step": 21072 }, { "epoch": 1.1475090699938058, "grad_norm": 0.6438196027922032, "learning_rate": 8.108484651692188e-05, "loss": 12.0422, "step": 21073 }, { "epoch": 1.1475635239903887, "grad_norm": 0.6528455938916412, "learning_rate": 8.107618752030745e-05, "loss": 12.0682, "step": 21074 }, { "epoch": 1.147617977986972, "grad_norm": 0.5451997878687775, "learning_rate": 8.10675286708478e-05, "loss": 12.0776, "step": 21075 }, { "epoch": 1.147672431983555, "grad_norm": 0.520908444699066, "learning_rate": 8.105886996861017e-05, "loss": 11.988, "step": 21076 }, { "epoch": 1.147726885980138, "grad_norm": 0.5852947218268076, "learning_rate": 8.105021141366196e-05, "loss": 12.0616, "step": 21077 }, { "epoch": 1.147781339976721, "grad_norm": 0.5432544667041285, "learning_rate": 8.104155300607049e-05, "loss": 11.9157, "step": 21078 }, { "epoch": 1.147835793973304, "grad_norm": 0.4872885499050939, "learning_rate": 8.103289474590308e-05, "loss": 11.9543, "step": 21079 }, { "epoch": 1.147890247969887, "grad_norm": 0.5648118091473893, "learning_rate": 8.102423663322704e-05, "loss": 11.9183, "step": 21080 }, { "epoch": 1.14794470196647, "grad_norm": 0.5579987798738413, "learning_rate": 8.101557866810972e-05, "loss": 12.1045, "step": 21081 }, { "epoch": 1.147999155963053, "grad_norm": 0.5483380124450635, "learning_rate": 8.100692085061847e-05, "loss": 12.0214, "step": 21082 }, { "epoch": 1.148053609959636, "grad_norm": 0.532668117450102, "learning_rate": 8.099826318082057e-05, "loss": 12.1756, "step": 21083 }, { "epoch": 1.148108063956219, "grad_norm": 0.5636987922499963, "learning_rate": 8.098960565878337e-05, "loss": 11.9614, "step": 21084 }, { "epoch": 1.148162517952802, "grad_norm": 0.5606659244042904, "learning_rate": 8.098094828457424e-05, "loss": 12.1564, "step": 21085 }, { "epoch": 1.148216971949385, "grad_norm": 0.5411349597727095, "learning_rate": 8.097229105826036e-05, "loss": 11.9888, "step": 21086 }, { "epoch": 1.148271425945968, "grad_norm": 0.6505933994179495, "learning_rate": 8.096363397990917e-05, "loss": 11.8961, "step": 21087 }, { "epoch": 1.148325879942551, "grad_norm": 0.495973865804788, "learning_rate": 8.095497704958795e-05, "loss": 11.969, "step": 21088 }, { "epoch": 1.148380333939134, "grad_norm": 0.48686591604475277, "learning_rate": 8.094632026736403e-05, "loss": 11.9074, "step": 21089 }, { "epoch": 1.148434787935717, "grad_norm": 0.5323346380652924, "learning_rate": 8.093766363330471e-05, "loss": 11.9727, "step": 21090 }, { "epoch": 1.1484892419323, "grad_norm": 0.5178261071161707, "learning_rate": 8.092900714747731e-05, "loss": 11.9483, "step": 21091 }, { "epoch": 1.148543695928883, "grad_norm": 0.543103451995716, "learning_rate": 8.092035080994917e-05, "loss": 11.9492, "step": 21092 }, { "epoch": 1.148598149925466, "grad_norm": 0.5291390005538827, "learning_rate": 8.091169462078754e-05, "loss": 12.0143, "step": 21093 }, { "epoch": 1.148652603922049, "grad_norm": 0.560070717878981, "learning_rate": 8.09030385800598e-05, "loss": 12.1458, "step": 21094 }, { "epoch": 1.148707057918632, "grad_norm": 0.5588331491242884, "learning_rate": 8.089438268783323e-05, "loss": 12.0294, "step": 21095 }, { "epoch": 1.148761511915215, "grad_norm": 0.5007083159102311, "learning_rate": 8.08857269441752e-05, "loss": 11.9746, "step": 21096 }, { "epoch": 1.148815965911798, "grad_norm": 0.5677994203029244, "learning_rate": 8.087707134915288e-05, "loss": 11.9238, "step": 21097 }, { "epoch": 1.1488704199083812, "grad_norm": 0.5813209840921494, "learning_rate": 8.08684159028337e-05, "loss": 12.088, "step": 21098 }, { "epoch": 1.1489248739049642, "grad_norm": 0.5525302957048495, "learning_rate": 8.085976060528491e-05, "loss": 12.0606, "step": 21099 }, { "epoch": 1.1489793279015472, "grad_norm": 0.5496389271612488, "learning_rate": 8.085110545657385e-05, "loss": 12.0706, "step": 21100 }, { "epoch": 1.1490337818981302, "grad_norm": 0.5482635604991649, "learning_rate": 8.084245045676779e-05, "loss": 12.0023, "step": 21101 }, { "epoch": 1.1490882358947132, "grad_norm": 0.5469133095471433, "learning_rate": 8.083379560593406e-05, "loss": 12.0108, "step": 21102 }, { "epoch": 1.1491426898912962, "grad_norm": 0.5482534608450617, "learning_rate": 8.082514090413994e-05, "loss": 12.0259, "step": 21103 }, { "epoch": 1.1491971438878792, "grad_norm": 0.5767821949269134, "learning_rate": 8.081648635145272e-05, "loss": 12.1142, "step": 21104 }, { "epoch": 1.1492515978844622, "grad_norm": 0.5878543688318114, "learning_rate": 8.080783194793975e-05, "loss": 11.9575, "step": 21105 }, { "epoch": 1.1493060518810452, "grad_norm": 0.537615416805045, "learning_rate": 8.079917769366833e-05, "loss": 11.9365, "step": 21106 }, { "epoch": 1.1493605058776282, "grad_norm": 0.5902894231903612, "learning_rate": 8.079052358870568e-05, "loss": 12.0805, "step": 21107 }, { "epoch": 1.1494149598742114, "grad_norm": 0.5797267215268892, "learning_rate": 8.078186963311912e-05, "loss": 12.0357, "step": 21108 }, { "epoch": 1.1494694138707944, "grad_norm": 0.5578950816383108, "learning_rate": 8.0773215826976e-05, "loss": 12.0792, "step": 21109 }, { "epoch": 1.1495238678673774, "grad_norm": 0.531064236981189, "learning_rate": 8.076456217034356e-05, "loss": 12.0386, "step": 21110 }, { "epoch": 1.1495783218639604, "grad_norm": 0.508623729429057, "learning_rate": 8.075590866328911e-05, "loss": 12.0248, "step": 21111 }, { "epoch": 1.1496327758605434, "grad_norm": 0.5843775193424045, "learning_rate": 8.074725530587996e-05, "loss": 12.0742, "step": 21112 }, { "epoch": 1.1496872298571263, "grad_norm": 0.4926905519642264, "learning_rate": 8.073860209818336e-05, "loss": 11.9017, "step": 21113 }, { "epoch": 1.1497416838537093, "grad_norm": 0.5328678072904144, "learning_rate": 8.072994904026663e-05, "loss": 11.9381, "step": 21114 }, { "epoch": 1.1497961378502923, "grad_norm": 0.526885307689961, "learning_rate": 8.072129613219703e-05, "loss": 11.9485, "step": 21115 }, { "epoch": 1.1498505918468753, "grad_norm": 0.5497004056654098, "learning_rate": 8.071264337404192e-05, "loss": 12.0153, "step": 21116 }, { "epoch": 1.1499050458434583, "grad_norm": 0.5863572104234963, "learning_rate": 8.070399076586849e-05, "loss": 11.9572, "step": 21117 }, { "epoch": 1.1499594998400413, "grad_norm": 0.5615088944244939, "learning_rate": 8.069533830774407e-05, "loss": 12.0666, "step": 21118 }, { "epoch": 1.1500139538366243, "grad_norm": 0.7323211513084971, "learning_rate": 8.06866859997359e-05, "loss": 12.0178, "step": 21119 }, { "epoch": 1.1500684078332073, "grad_norm": 0.5678232451995301, "learning_rate": 8.067803384191133e-05, "loss": 11.9026, "step": 21120 }, { "epoch": 1.1501228618297905, "grad_norm": 0.5446422610668087, "learning_rate": 8.066938183433762e-05, "loss": 11.9061, "step": 21121 }, { "epoch": 1.1501773158263735, "grad_norm": 0.537432196594231, "learning_rate": 8.066072997708203e-05, "loss": 11.9063, "step": 21122 }, { "epoch": 1.1502317698229565, "grad_norm": 0.557170520357956, "learning_rate": 8.065207827021184e-05, "loss": 11.9433, "step": 21123 }, { "epoch": 1.1502862238195395, "grad_norm": 0.7040948029689192, "learning_rate": 8.064342671379435e-05, "loss": 12.1554, "step": 21124 }, { "epoch": 1.1503406778161225, "grad_norm": 0.6032339334585413, "learning_rate": 8.063477530789681e-05, "loss": 11.9106, "step": 21125 }, { "epoch": 1.1503951318127055, "grad_norm": 0.5399728029226735, "learning_rate": 8.062612405258652e-05, "loss": 11.9057, "step": 21126 }, { "epoch": 1.1504495858092885, "grad_norm": 0.4955261796241731, "learning_rate": 8.061747294793071e-05, "loss": 12.0238, "step": 21127 }, { "epoch": 1.1505040398058715, "grad_norm": 0.5855436553356044, "learning_rate": 8.06088219939967e-05, "loss": 12.0454, "step": 21128 }, { "epoch": 1.1505584938024545, "grad_norm": 0.605434000788639, "learning_rate": 8.060017119085173e-05, "loss": 12.1447, "step": 21129 }, { "epoch": 1.1506129477990374, "grad_norm": 0.5567938188227343, "learning_rate": 8.059152053856307e-05, "loss": 11.985, "step": 21130 }, { "epoch": 1.1506674017956207, "grad_norm": 0.5553514637675814, "learning_rate": 8.058287003719802e-05, "loss": 12.0672, "step": 21131 }, { "epoch": 1.1507218557922037, "grad_norm": 0.5184625714277747, "learning_rate": 8.057421968682383e-05, "loss": 12.0448, "step": 21132 }, { "epoch": 1.1507763097887866, "grad_norm": 0.5204256390067515, "learning_rate": 8.056556948750777e-05, "loss": 11.9548, "step": 21133 }, { "epoch": 1.1508307637853696, "grad_norm": 0.5953661138703211, "learning_rate": 8.055691943931707e-05, "loss": 12.0341, "step": 21134 }, { "epoch": 1.1508852177819526, "grad_norm": 0.8378579415423998, "learning_rate": 8.054826954231906e-05, "loss": 12.039, "step": 21135 }, { "epoch": 1.1509396717785356, "grad_norm": 0.5715107785392818, "learning_rate": 8.053961979658098e-05, "loss": 12.1688, "step": 21136 }, { "epoch": 1.1509941257751186, "grad_norm": 0.563690352731225, "learning_rate": 8.053097020217006e-05, "loss": 12.0174, "step": 21137 }, { "epoch": 1.1510485797717016, "grad_norm": 0.5488959381633189, "learning_rate": 8.052232075915359e-05, "loss": 12.0471, "step": 21138 }, { "epoch": 1.1511030337682846, "grad_norm": 0.5663448934394109, "learning_rate": 8.05136714675988e-05, "loss": 12.1247, "step": 21139 }, { "epoch": 1.1511574877648676, "grad_norm": 0.5685910922950765, "learning_rate": 8.050502232757297e-05, "loss": 12.0227, "step": 21140 }, { "epoch": 1.1512119417614506, "grad_norm": 0.5900570812695328, "learning_rate": 8.049637333914336e-05, "loss": 11.9444, "step": 21141 }, { "epoch": 1.1512663957580336, "grad_norm": 0.5252079331707762, "learning_rate": 8.04877245023772e-05, "loss": 12.0593, "step": 21142 }, { "epoch": 1.1513208497546166, "grad_norm": 0.5511587861500431, "learning_rate": 8.047907581734178e-05, "loss": 12.0923, "step": 21143 }, { "epoch": 1.1513753037511998, "grad_norm": 0.47982691738561195, "learning_rate": 8.047042728410436e-05, "loss": 11.9816, "step": 21144 }, { "epoch": 1.1514297577477828, "grad_norm": 0.5415394760446558, "learning_rate": 8.046177890273216e-05, "loss": 12.122, "step": 21145 }, { "epoch": 1.1514842117443658, "grad_norm": 0.5198899441470982, "learning_rate": 8.045313067329248e-05, "loss": 11.7322, "step": 21146 }, { "epoch": 1.1515386657409488, "grad_norm": 0.5429851464257295, "learning_rate": 8.044448259585249e-05, "loss": 12.0108, "step": 21147 }, { "epoch": 1.1515931197375318, "grad_norm": 0.5622638318610981, "learning_rate": 8.043583467047949e-05, "loss": 12.1389, "step": 21148 }, { "epoch": 1.1516475737341147, "grad_norm": 0.530104001255593, "learning_rate": 8.042718689724072e-05, "loss": 11.9547, "step": 21149 }, { "epoch": 1.1517020277306977, "grad_norm": 0.5883781883269091, "learning_rate": 8.041853927620345e-05, "loss": 12.0862, "step": 21150 }, { "epoch": 1.1517564817272807, "grad_norm": 0.5161862523816715, "learning_rate": 8.040989180743487e-05, "loss": 11.8887, "step": 21151 }, { "epoch": 1.1518109357238637, "grad_norm": 0.5728392481592719, "learning_rate": 8.040124449100226e-05, "loss": 12.0205, "step": 21152 }, { "epoch": 1.1518653897204467, "grad_norm": 0.5858138955307554, "learning_rate": 8.039259732697286e-05, "loss": 12.0949, "step": 21153 }, { "epoch": 1.1519198437170297, "grad_norm": 0.5524384241816089, "learning_rate": 8.038395031541392e-05, "loss": 12.0203, "step": 21154 }, { "epoch": 1.151974297713613, "grad_norm": 0.535014389337564, "learning_rate": 8.037530345639267e-05, "loss": 11.9085, "step": 21155 }, { "epoch": 1.152028751710196, "grad_norm": 0.6554376807816443, "learning_rate": 8.036665674997639e-05, "loss": 11.9767, "step": 21156 }, { "epoch": 1.152083205706779, "grad_norm": 0.5947689449903816, "learning_rate": 8.035801019623224e-05, "loss": 12.0815, "step": 21157 }, { "epoch": 1.152137659703362, "grad_norm": 0.548992716856938, "learning_rate": 8.034936379522749e-05, "loss": 11.892, "step": 21158 }, { "epoch": 1.152192113699945, "grad_norm": 0.5515032063781492, "learning_rate": 8.034071754702938e-05, "loss": 11.9991, "step": 21159 }, { "epoch": 1.152246567696528, "grad_norm": 0.5111353487376212, "learning_rate": 8.033207145170516e-05, "loss": 11.8936, "step": 21160 }, { "epoch": 1.1523010216931109, "grad_norm": 0.6442684090397887, "learning_rate": 8.032342550932206e-05, "loss": 12.0178, "step": 21161 }, { "epoch": 1.1523554756896939, "grad_norm": 0.5372293081498746, "learning_rate": 8.03147797199473e-05, "loss": 11.7815, "step": 21162 }, { "epoch": 1.1524099296862769, "grad_norm": 0.531260849518373, "learning_rate": 8.030613408364812e-05, "loss": 12.1365, "step": 21163 }, { "epoch": 1.1524643836828599, "grad_norm": 0.5124011686686147, "learning_rate": 8.029748860049168e-05, "loss": 12.0414, "step": 21164 }, { "epoch": 1.1525188376794429, "grad_norm": 0.5123375927676078, "learning_rate": 8.028884327054534e-05, "loss": 11.8998, "step": 21165 }, { "epoch": 1.1525732916760258, "grad_norm": 0.5245392110966306, "learning_rate": 8.028019809387629e-05, "loss": 11.9568, "step": 21166 }, { "epoch": 1.1526277456726088, "grad_norm": 0.5271637670116032, "learning_rate": 8.027155307055167e-05, "loss": 11.9273, "step": 21167 }, { "epoch": 1.152682199669192, "grad_norm": 0.4852455642741703, "learning_rate": 8.026290820063876e-05, "loss": 11.9551, "step": 21168 }, { "epoch": 1.152736653665775, "grad_norm": 0.5851320866655585, "learning_rate": 8.02542634842048e-05, "loss": 11.9567, "step": 21169 }, { "epoch": 1.152791107662358, "grad_norm": 0.5351084480055083, "learning_rate": 8.024561892131699e-05, "loss": 11.8094, "step": 21170 }, { "epoch": 1.152845561658941, "grad_norm": 0.5882382168396529, "learning_rate": 8.023697451204258e-05, "loss": 12.1339, "step": 21171 }, { "epoch": 1.152900015655524, "grad_norm": 0.6477972692190181, "learning_rate": 8.022833025644875e-05, "loss": 12.2011, "step": 21172 }, { "epoch": 1.152954469652107, "grad_norm": 0.5532746213673208, "learning_rate": 8.021968615460275e-05, "loss": 12.0372, "step": 21173 }, { "epoch": 1.15300892364869, "grad_norm": 0.5286103616233467, "learning_rate": 8.021104220657178e-05, "loss": 11.9014, "step": 21174 }, { "epoch": 1.153063377645273, "grad_norm": 0.57629068999438, "learning_rate": 8.020239841242305e-05, "loss": 12.0307, "step": 21175 }, { "epoch": 1.153117831641856, "grad_norm": 0.5705025655303789, "learning_rate": 8.019375477222386e-05, "loss": 12.0665, "step": 21176 }, { "epoch": 1.153172285638439, "grad_norm": 0.5119161902042083, "learning_rate": 8.01851112860413e-05, "loss": 12.0077, "step": 21177 }, { "epoch": 1.1532267396350222, "grad_norm": 0.5084787593407395, "learning_rate": 8.017646795394264e-05, "loss": 11.99, "step": 21178 }, { "epoch": 1.1532811936316052, "grad_norm": 0.5625113833150053, "learning_rate": 8.016782477599507e-05, "loss": 11.9331, "step": 21179 }, { "epoch": 1.1533356476281882, "grad_norm": 0.5683958156635728, "learning_rate": 8.015918175226584e-05, "loss": 12.0022, "step": 21180 }, { "epoch": 1.1533901016247712, "grad_norm": 0.552218824644041, "learning_rate": 8.015053888282215e-05, "loss": 11.9492, "step": 21181 }, { "epoch": 1.1534445556213542, "grad_norm": 0.538202368181314, "learning_rate": 8.014189616773117e-05, "loss": 11.9867, "step": 21182 }, { "epoch": 1.1534990096179372, "grad_norm": 0.5772816653059827, "learning_rate": 8.013325360706017e-05, "loss": 11.9636, "step": 21183 }, { "epoch": 1.1535534636145202, "grad_norm": 0.5305500868198957, "learning_rate": 8.012461120087631e-05, "loss": 11.9883, "step": 21184 }, { "epoch": 1.1536079176111032, "grad_norm": 0.6269183943377432, "learning_rate": 8.01159689492468e-05, "loss": 12.0116, "step": 21185 }, { "epoch": 1.1536623716076861, "grad_norm": 0.5043439783473882, "learning_rate": 8.010732685223888e-05, "loss": 11.9738, "step": 21186 }, { "epoch": 1.1537168256042691, "grad_norm": 0.5478230975249527, "learning_rate": 8.009868490991969e-05, "loss": 12.1364, "step": 21187 }, { "epoch": 1.1537712796008521, "grad_norm": 0.5167076642854395, "learning_rate": 8.009004312235648e-05, "loss": 11.9932, "step": 21188 }, { "epoch": 1.1538257335974351, "grad_norm": 0.591628232371505, "learning_rate": 8.008140148961641e-05, "loss": 12.0235, "step": 21189 }, { "epoch": 1.1538801875940181, "grad_norm": 0.5232239089879509, "learning_rate": 8.007276001176672e-05, "loss": 12.0338, "step": 21190 }, { "epoch": 1.1539346415906013, "grad_norm": 0.5218900289117727, "learning_rate": 8.006411868887456e-05, "loss": 12.0667, "step": 21191 }, { "epoch": 1.1539890955871843, "grad_norm": 0.5002955849688884, "learning_rate": 8.005547752100718e-05, "loss": 12.0313, "step": 21192 }, { "epoch": 1.1540435495837673, "grad_norm": 0.6418628227847089, "learning_rate": 8.004683650823175e-05, "loss": 12.0277, "step": 21193 }, { "epoch": 1.1540980035803503, "grad_norm": 0.5421520459976056, "learning_rate": 8.003819565061548e-05, "loss": 12.0242, "step": 21194 }, { "epoch": 1.1541524575769333, "grad_norm": 0.5854748086041517, "learning_rate": 8.002955494822553e-05, "loss": 11.9826, "step": 21195 }, { "epoch": 1.1542069115735163, "grad_norm": 0.5257715022701855, "learning_rate": 8.002091440112914e-05, "loss": 11.9302, "step": 21196 }, { "epoch": 1.1542613655700993, "grad_norm": 0.546327156552604, "learning_rate": 8.001227400939345e-05, "loss": 11.973, "step": 21197 }, { "epoch": 1.1543158195666823, "grad_norm": 0.594397566181094, "learning_rate": 8.000363377308566e-05, "loss": 12.0479, "step": 21198 }, { "epoch": 1.1543702735632653, "grad_norm": 0.5339239788357752, "learning_rate": 7.999499369227298e-05, "loss": 11.9646, "step": 21199 }, { "epoch": 1.1544247275598483, "grad_norm": 0.5500296085494902, "learning_rate": 7.998635376702257e-05, "loss": 11.9813, "step": 21200 }, { "epoch": 1.1544791815564315, "grad_norm": 0.5488862208409117, "learning_rate": 7.997771399740163e-05, "loss": 12.0284, "step": 21201 }, { "epoch": 1.1545336355530145, "grad_norm": 0.5354108211709937, "learning_rate": 7.996907438347734e-05, "loss": 11.8456, "step": 21202 }, { "epoch": 1.1545880895495975, "grad_norm": 0.5203275691147052, "learning_rate": 7.99604349253169e-05, "loss": 12.0175, "step": 21203 }, { "epoch": 1.1546425435461805, "grad_norm": 0.5707911160642656, "learning_rate": 7.995179562298746e-05, "loss": 11.8751, "step": 21204 }, { "epoch": 1.1546969975427634, "grad_norm": 0.5927001165041338, "learning_rate": 7.994315647655624e-05, "loss": 12.0275, "step": 21205 }, { "epoch": 1.1547514515393464, "grad_norm": 0.5277214276087671, "learning_rate": 7.993451748609042e-05, "loss": 11.9848, "step": 21206 }, { "epoch": 1.1548059055359294, "grad_norm": 0.5769471969029595, "learning_rate": 7.992587865165713e-05, "loss": 11.9488, "step": 21207 }, { "epoch": 1.1548603595325124, "grad_norm": 0.5496266310399047, "learning_rate": 7.991723997332358e-05, "loss": 12.0007, "step": 21208 }, { "epoch": 1.1549148135290954, "grad_norm": 0.6594062965515542, "learning_rate": 7.990860145115694e-05, "loss": 12.0828, "step": 21209 }, { "epoch": 1.1549692675256784, "grad_norm": 0.5677854047819902, "learning_rate": 7.989996308522437e-05, "loss": 12.0297, "step": 21210 }, { "epoch": 1.1550237215222614, "grad_norm": 0.551883355920494, "learning_rate": 7.989132487559307e-05, "loss": 12.0298, "step": 21211 }, { "epoch": 1.1550781755188444, "grad_norm": 0.5323634440159634, "learning_rate": 7.98826868223302e-05, "loss": 11.9089, "step": 21212 }, { "epoch": 1.1551326295154274, "grad_norm": 0.5037670116250631, "learning_rate": 7.987404892550289e-05, "loss": 11.9592, "step": 21213 }, { "epoch": 1.1551870835120106, "grad_norm": 0.5199204506116677, "learning_rate": 7.98654111851784e-05, "loss": 11.9533, "step": 21214 }, { "epoch": 1.1552415375085936, "grad_norm": 0.6107623890788886, "learning_rate": 7.985677360142384e-05, "loss": 12.0708, "step": 21215 }, { "epoch": 1.1552959915051766, "grad_norm": 0.5511599905622395, "learning_rate": 7.984813617430644e-05, "loss": 12.1261, "step": 21216 }, { "epoch": 1.1553504455017596, "grad_norm": 0.5115863465619226, "learning_rate": 7.983949890389322e-05, "loss": 11.9577, "step": 21217 }, { "epoch": 1.1554048994983426, "grad_norm": 0.5637422061740728, "learning_rate": 7.983086179025148e-05, "loss": 12.0071, "step": 21218 }, { "epoch": 1.1554593534949256, "grad_norm": 0.4981769207547077, "learning_rate": 7.982222483344834e-05, "loss": 11.9628, "step": 21219 }, { "epoch": 1.1555138074915086, "grad_norm": 0.5414525482918751, "learning_rate": 7.981358803355095e-05, "loss": 11.975, "step": 21220 }, { "epoch": 1.1555682614880916, "grad_norm": 0.5530668572217571, "learning_rate": 7.980495139062649e-05, "loss": 12.0245, "step": 21221 }, { "epoch": 1.1556227154846745, "grad_norm": 0.5786923404746341, "learning_rate": 7.979631490474213e-05, "loss": 11.9814, "step": 21222 }, { "epoch": 1.1556771694812575, "grad_norm": 0.6240826110107114, "learning_rate": 7.978767857596499e-05, "loss": 12.1326, "step": 21223 }, { "epoch": 1.1557316234778405, "grad_norm": 0.49542183088818065, "learning_rate": 7.977904240436224e-05, "loss": 11.7954, "step": 21224 }, { "epoch": 1.1557860774744237, "grad_norm": 0.556239010444873, "learning_rate": 7.977040639000107e-05, "loss": 12.0706, "step": 21225 }, { "epoch": 1.1558405314710067, "grad_norm": 0.5166307537336219, "learning_rate": 7.976177053294867e-05, "loss": 11.9495, "step": 21226 }, { "epoch": 1.1558949854675897, "grad_norm": 0.5862827674070573, "learning_rate": 7.975313483327206e-05, "loss": 12.0701, "step": 21227 }, { "epoch": 1.1559494394641727, "grad_norm": 0.5799460978782308, "learning_rate": 7.974449929103847e-05, "loss": 11.9343, "step": 21228 }, { "epoch": 1.1560038934607557, "grad_norm": 0.5799848983304122, "learning_rate": 7.973586390631508e-05, "loss": 11.9795, "step": 21229 }, { "epoch": 1.1560583474573387, "grad_norm": 0.5479810690267198, "learning_rate": 7.9727228679169e-05, "loss": 12.0038, "step": 21230 }, { "epoch": 1.1561128014539217, "grad_norm": 0.5268258755091737, "learning_rate": 7.971859360966739e-05, "loss": 11.9109, "step": 21231 }, { "epoch": 1.1561672554505047, "grad_norm": 0.5399960113306216, "learning_rate": 7.970995869787738e-05, "loss": 12.1343, "step": 21232 }, { "epoch": 1.1562217094470877, "grad_norm": 0.5416405261216394, "learning_rate": 7.970132394386616e-05, "loss": 11.9387, "step": 21233 }, { "epoch": 1.1562761634436707, "grad_norm": 0.4994698027199779, "learning_rate": 7.969268934770084e-05, "loss": 11.9743, "step": 21234 }, { "epoch": 1.1563306174402537, "grad_norm": 0.5605141229754358, "learning_rate": 7.968405490944855e-05, "loss": 11.9807, "step": 21235 }, { "epoch": 1.1563850714368367, "grad_norm": 0.5164187375636196, "learning_rate": 7.967542062917648e-05, "loss": 11.9675, "step": 21236 }, { "epoch": 1.1564395254334197, "grad_norm": 0.6047170965949826, "learning_rate": 7.966678650695179e-05, "loss": 12.0729, "step": 21237 }, { "epoch": 1.1564939794300029, "grad_norm": 0.5670926295472087, "learning_rate": 7.965815254284152e-05, "loss": 12.0425, "step": 21238 }, { "epoch": 1.1565484334265859, "grad_norm": 0.5591787087438296, "learning_rate": 7.964951873691289e-05, "loss": 12.0772, "step": 21239 }, { "epoch": 1.1566028874231689, "grad_norm": 0.5418566161904471, "learning_rate": 7.964088508923297e-05, "loss": 12.067, "step": 21240 }, { "epoch": 1.1566573414197518, "grad_norm": 0.60068859839871, "learning_rate": 7.963225159986899e-05, "loss": 12.1196, "step": 21241 }, { "epoch": 1.1567117954163348, "grad_norm": 0.6292034462025742, "learning_rate": 7.962361826888802e-05, "loss": 12.1183, "step": 21242 }, { "epoch": 1.1567662494129178, "grad_norm": 0.5440244217598444, "learning_rate": 7.961498509635722e-05, "loss": 11.9312, "step": 21243 }, { "epoch": 1.1568207034095008, "grad_norm": 0.6557745520530629, "learning_rate": 7.96063520823437e-05, "loss": 12.0664, "step": 21244 }, { "epoch": 1.1568751574060838, "grad_norm": 0.51055205754822, "learning_rate": 7.959771922691463e-05, "loss": 12.0452, "step": 21245 }, { "epoch": 1.1569296114026668, "grad_norm": 0.5914900864544089, "learning_rate": 7.95890865301371e-05, "loss": 11.9728, "step": 21246 }, { "epoch": 1.1569840653992498, "grad_norm": 0.5354573851902568, "learning_rate": 7.958045399207827e-05, "loss": 11.9417, "step": 21247 }, { "epoch": 1.157038519395833, "grad_norm": 0.5204337816259289, "learning_rate": 7.957182161280526e-05, "loss": 12.0514, "step": 21248 }, { "epoch": 1.157092973392416, "grad_norm": 0.5252946339072717, "learning_rate": 7.956318939238517e-05, "loss": 11.989, "step": 21249 }, { "epoch": 1.157147427388999, "grad_norm": 0.5091175709706328, "learning_rate": 7.955455733088516e-05, "loss": 12.0704, "step": 21250 }, { "epoch": 1.157201881385582, "grad_norm": 0.5174848837304137, "learning_rate": 7.954592542837229e-05, "loss": 12.0009, "step": 21251 }, { "epoch": 1.157256335382165, "grad_norm": 0.5023318740844385, "learning_rate": 7.953729368491378e-05, "loss": 11.8673, "step": 21252 }, { "epoch": 1.157310789378748, "grad_norm": 0.6140836736504344, "learning_rate": 7.95286621005767e-05, "loss": 11.9944, "step": 21253 }, { "epoch": 1.157365243375331, "grad_norm": 0.5182919984248456, "learning_rate": 7.952003067542818e-05, "loss": 12.1002, "step": 21254 }, { "epoch": 1.157419697371914, "grad_norm": 0.5287698514499525, "learning_rate": 7.951139940953533e-05, "loss": 12.0127, "step": 21255 }, { "epoch": 1.157474151368497, "grad_norm": 0.5765625554102265, "learning_rate": 7.950276830296527e-05, "loss": 12.0541, "step": 21256 }, { "epoch": 1.15752860536508, "grad_norm": 0.5937487107232046, "learning_rate": 7.949413735578517e-05, "loss": 12.071, "step": 21257 }, { "epoch": 1.157583059361663, "grad_norm": 0.5589663206974158, "learning_rate": 7.948550656806205e-05, "loss": 11.8881, "step": 21258 }, { "epoch": 1.157637513358246, "grad_norm": 0.666793727274845, "learning_rate": 7.947687593986308e-05, "loss": 12.0645, "step": 21259 }, { "epoch": 1.157691967354829, "grad_norm": 0.6111311427044318, "learning_rate": 7.946824547125536e-05, "loss": 11.9878, "step": 21260 }, { "epoch": 1.1577464213514121, "grad_norm": 0.5227747121282392, "learning_rate": 7.945961516230601e-05, "loss": 11.9656, "step": 21261 }, { "epoch": 1.1578008753479951, "grad_norm": 0.5919891131184604, "learning_rate": 7.94509850130821e-05, "loss": 11.9137, "step": 21262 }, { "epoch": 1.1578553293445781, "grad_norm": 0.5526761338976475, "learning_rate": 7.944235502365083e-05, "loss": 11.9691, "step": 21263 }, { "epoch": 1.1579097833411611, "grad_norm": 0.5191002217963598, "learning_rate": 7.943372519407924e-05, "loss": 11.9662, "step": 21264 }, { "epoch": 1.1579642373377441, "grad_norm": 0.5784555927342834, "learning_rate": 7.942509552443445e-05, "loss": 12.0409, "step": 21265 }, { "epoch": 1.158018691334327, "grad_norm": 0.5888502247943441, "learning_rate": 7.941646601478357e-05, "loss": 12.0134, "step": 21266 }, { "epoch": 1.15807314533091, "grad_norm": 0.5631901823699048, "learning_rate": 7.940783666519372e-05, "loss": 11.9675, "step": 21267 }, { "epoch": 1.158127599327493, "grad_norm": 0.5360246408255919, "learning_rate": 7.939920747573195e-05, "loss": 12.0723, "step": 21268 }, { "epoch": 1.158182053324076, "grad_norm": 0.5450568280593008, "learning_rate": 7.939057844646542e-05, "loss": 11.9873, "step": 21269 }, { "epoch": 1.158236507320659, "grad_norm": 0.7460199505110907, "learning_rate": 7.93819495774612e-05, "loss": 12.0658, "step": 21270 }, { "epoch": 1.1582909613172423, "grad_norm": 0.6202295260594721, "learning_rate": 7.937332086878639e-05, "loss": 12.0706, "step": 21271 }, { "epoch": 1.1583454153138253, "grad_norm": 0.5186259869149088, "learning_rate": 7.93646923205081e-05, "loss": 12.0008, "step": 21272 }, { "epoch": 1.1583998693104083, "grad_norm": 0.5389499880625561, "learning_rate": 7.935606393269341e-05, "loss": 12.0292, "step": 21273 }, { "epoch": 1.1584543233069913, "grad_norm": 0.6026633532668297, "learning_rate": 7.934743570540944e-05, "loss": 11.9039, "step": 21274 }, { "epoch": 1.1585087773035743, "grad_norm": 0.5740250909525888, "learning_rate": 7.933880763872328e-05, "loss": 12.0317, "step": 21275 }, { "epoch": 1.1585632313001573, "grad_norm": 0.5241001314895766, "learning_rate": 7.933017973270202e-05, "loss": 12.0434, "step": 21276 }, { "epoch": 1.1586176852967403, "grad_norm": 0.5617167280592977, "learning_rate": 7.932155198741276e-05, "loss": 12.1402, "step": 21277 }, { "epoch": 1.1586721392933232, "grad_norm": 0.5273417370388322, "learning_rate": 7.931292440292258e-05, "loss": 11.9136, "step": 21278 }, { "epoch": 1.1587265932899062, "grad_norm": 0.5187379134264419, "learning_rate": 7.930429697929855e-05, "loss": 12.0329, "step": 21279 }, { "epoch": 1.1587810472864892, "grad_norm": 0.5208629925119909, "learning_rate": 7.929566971660777e-05, "loss": 11.9753, "step": 21280 }, { "epoch": 1.1588355012830722, "grad_norm": 0.5019973934481978, "learning_rate": 7.928704261491735e-05, "loss": 11.9225, "step": 21281 }, { "epoch": 1.1588899552796552, "grad_norm": 0.5206683386613279, "learning_rate": 7.927841567429435e-05, "loss": 12.0117, "step": 21282 }, { "epoch": 1.1589444092762382, "grad_norm": 0.5106389297711984, "learning_rate": 7.926978889480587e-05, "loss": 12.0175, "step": 21283 }, { "epoch": 1.1589988632728214, "grad_norm": 0.5863089332292762, "learning_rate": 7.926116227651896e-05, "loss": 12.0695, "step": 21284 }, { "epoch": 1.1590533172694044, "grad_norm": 0.5314155203167451, "learning_rate": 7.925253581950077e-05, "loss": 11.9583, "step": 21285 }, { "epoch": 1.1591077712659874, "grad_norm": 0.5579858506104858, "learning_rate": 7.924390952381832e-05, "loss": 12.0124, "step": 21286 }, { "epoch": 1.1591622252625704, "grad_norm": 0.5483341561174498, "learning_rate": 7.923528338953876e-05, "loss": 12.0567, "step": 21287 }, { "epoch": 1.1592166792591534, "grad_norm": 0.5681173540784744, "learning_rate": 7.922665741672905e-05, "loss": 12.078, "step": 21288 }, { "epoch": 1.1592711332557364, "grad_norm": 0.5228607695356412, "learning_rate": 7.921803160545637e-05, "loss": 12.0183, "step": 21289 }, { "epoch": 1.1593255872523194, "grad_norm": 0.5857548834210604, "learning_rate": 7.920940595578775e-05, "loss": 12.0019, "step": 21290 }, { "epoch": 1.1593800412489024, "grad_norm": 0.523380359704805, "learning_rate": 7.920078046779028e-05, "loss": 12.037, "step": 21291 }, { "epoch": 1.1594344952454854, "grad_norm": 0.5778553722466274, "learning_rate": 7.919215514153103e-05, "loss": 12.0122, "step": 21292 }, { "epoch": 1.1594889492420684, "grad_norm": 0.5528023586460237, "learning_rate": 7.918352997707708e-05, "loss": 12.0816, "step": 21293 }, { "epoch": 1.1595434032386516, "grad_norm": 0.5587668613138254, "learning_rate": 7.917490497449547e-05, "loss": 11.8789, "step": 21294 }, { "epoch": 1.1595978572352346, "grad_norm": 0.555394070804435, "learning_rate": 7.916628013385331e-05, "loss": 12.0878, "step": 21295 }, { "epoch": 1.1596523112318176, "grad_norm": 0.5488890422522659, "learning_rate": 7.915765545521761e-05, "loss": 12.0984, "step": 21296 }, { "epoch": 1.1597067652284005, "grad_norm": 0.546272098096521, "learning_rate": 7.914903093865555e-05, "loss": 11.9192, "step": 21297 }, { "epoch": 1.1597612192249835, "grad_norm": 0.6543254243942339, "learning_rate": 7.91404065842341e-05, "loss": 12.0496, "step": 21298 }, { "epoch": 1.1598156732215665, "grad_norm": 0.6017006398399886, "learning_rate": 7.913178239202032e-05, "loss": 12.1064, "step": 21299 }, { "epoch": 1.1598701272181495, "grad_norm": 0.5230995730140959, "learning_rate": 7.912315836208128e-05, "loss": 12.0087, "step": 21300 }, { "epoch": 1.1599245812147325, "grad_norm": 0.5614196050577605, "learning_rate": 7.911453449448409e-05, "loss": 12.0527, "step": 21301 }, { "epoch": 1.1599790352113155, "grad_norm": 0.4539513549397306, "learning_rate": 7.910591078929578e-05, "loss": 12.0398, "step": 21302 }, { "epoch": 1.1600334892078985, "grad_norm": 0.5356462699840768, "learning_rate": 7.909728724658342e-05, "loss": 12.0782, "step": 21303 }, { "epoch": 1.1600879432044815, "grad_norm": 0.5066520142058429, "learning_rate": 7.908866386641404e-05, "loss": 11.807, "step": 21304 }, { "epoch": 1.1601423972010645, "grad_norm": 0.5668869110468674, "learning_rate": 7.908004064885475e-05, "loss": 12.0233, "step": 21305 }, { "epoch": 1.1601968511976475, "grad_norm": 0.5285924976149727, "learning_rate": 7.907141759397255e-05, "loss": 12.1045, "step": 21306 }, { "epoch": 1.1602513051942305, "grad_norm": 0.546543507842577, "learning_rate": 7.906279470183453e-05, "loss": 12.0654, "step": 21307 }, { "epoch": 1.1603057591908137, "grad_norm": 0.5385015810317186, "learning_rate": 7.905417197250772e-05, "loss": 12.1201, "step": 21308 }, { "epoch": 1.1603602131873967, "grad_norm": 0.5577951474519199, "learning_rate": 7.904554940605918e-05, "loss": 11.9945, "step": 21309 }, { "epoch": 1.1604146671839797, "grad_norm": 0.5469466081823766, "learning_rate": 7.903692700255596e-05, "loss": 12.0259, "step": 21310 }, { "epoch": 1.1604691211805627, "grad_norm": 0.5346637133607479, "learning_rate": 7.902830476206509e-05, "loss": 11.9273, "step": 21311 }, { "epoch": 1.1605235751771457, "grad_norm": 0.5918326644363558, "learning_rate": 7.901968268465366e-05, "loss": 12.0651, "step": 21312 }, { "epoch": 1.1605780291737287, "grad_norm": 0.5384818229036822, "learning_rate": 7.90110607703887e-05, "loss": 11.9657, "step": 21313 }, { "epoch": 1.1606324831703116, "grad_norm": 0.6664454169675025, "learning_rate": 7.900243901933726e-05, "loss": 12.014, "step": 21314 }, { "epoch": 1.1606869371668946, "grad_norm": 0.5937989905265902, "learning_rate": 7.899381743156636e-05, "loss": 12.1148, "step": 21315 }, { "epoch": 1.1607413911634776, "grad_norm": 0.5574320242817628, "learning_rate": 7.898519600714304e-05, "loss": 11.9253, "step": 21316 }, { "epoch": 1.1607958451600606, "grad_norm": 0.669922264965814, "learning_rate": 7.897657474613442e-05, "loss": 11.993, "step": 21317 }, { "epoch": 1.1608502991566438, "grad_norm": 0.5635964816746843, "learning_rate": 7.896795364860743e-05, "loss": 11.9512, "step": 21318 }, { "epoch": 1.1609047531532268, "grad_norm": 0.5431889182245252, "learning_rate": 7.895933271462919e-05, "loss": 12.103, "step": 21319 }, { "epoch": 1.1609592071498098, "grad_norm": 0.5465032975915614, "learning_rate": 7.895071194426669e-05, "loss": 11.995, "step": 21320 }, { "epoch": 1.1610136611463928, "grad_norm": 0.6067239504686526, "learning_rate": 7.894209133758698e-05, "loss": 12.0264, "step": 21321 }, { "epoch": 1.1610681151429758, "grad_norm": 0.5230913732754815, "learning_rate": 7.893347089465707e-05, "loss": 11.9428, "step": 21322 }, { "epoch": 1.1611225691395588, "grad_norm": 0.554245263055647, "learning_rate": 7.892485061554407e-05, "loss": 11.9681, "step": 21323 }, { "epoch": 1.1611770231361418, "grad_norm": 0.5982935869902283, "learning_rate": 7.891623050031495e-05, "loss": 11.9538, "step": 21324 }, { "epoch": 1.1612314771327248, "grad_norm": 0.6120429744966326, "learning_rate": 7.890761054903675e-05, "loss": 11.9878, "step": 21325 }, { "epoch": 1.1612859311293078, "grad_norm": 0.5260012624394336, "learning_rate": 7.889899076177651e-05, "loss": 12.005, "step": 21326 }, { "epoch": 1.1613403851258908, "grad_norm": 0.5592786523396323, "learning_rate": 7.88903711386013e-05, "loss": 11.9804, "step": 21327 }, { "epoch": 1.1613948391224738, "grad_norm": 0.641811322354292, "learning_rate": 7.888175167957807e-05, "loss": 12.0675, "step": 21328 }, { "epoch": 1.1614492931190568, "grad_norm": 0.5965510411636518, "learning_rate": 7.887313238477387e-05, "loss": 11.9095, "step": 21329 }, { "epoch": 1.1615037471156398, "grad_norm": 0.5483269925942232, "learning_rate": 7.886451325425574e-05, "loss": 12.0477, "step": 21330 }, { "epoch": 1.161558201112223, "grad_norm": 0.5454992293707681, "learning_rate": 7.88558942880907e-05, "loss": 11.9792, "step": 21331 }, { "epoch": 1.161612655108806, "grad_norm": 0.5596179934384802, "learning_rate": 7.884727548634578e-05, "loss": 12.11, "step": 21332 }, { "epoch": 1.161667109105389, "grad_norm": 0.6544838598092451, "learning_rate": 7.883865684908797e-05, "loss": 11.9894, "step": 21333 }, { "epoch": 1.161721563101972, "grad_norm": 0.5976716142108864, "learning_rate": 7.883003837638433e-05, "loss": 12.1143, "step": 21334 }, { "epoch": 1.161776017098555, "grad_norm": 0.6820796469692685, "learning_rate": 7.882142006830186e-05, "loss": 12.1575, "step": 21335 }, { "epoch": 1.161830471095138, "grad_norm": 0.5891571395628682, "learning_rate": 7.881280192490759e-05, "loss": 11.9877, "step": 21336 }, { "epoch": 1.161884925091721, "grad_norm": 0.6234461921548158, "learning_rate": 7.880418394626852e-05, "loss": 12.0753, "step": 21337 }, { "epoch": 1.161939379088304, "grad_norm": 0.5503187924707625, "learning_rate": 7.879556613245168e-05, "loss": 11.9641, "step": 21338 }, { "epoch": 1.161993833084887, "grad_norm": 0.5876566938983689, "learning_rate": 7.878694848352406e-05, "loss": 12.0456, "step": 21339 }, { "epoch": 1.16204828708147, "grad_norm": 0.5923178089296206, "learning_rate": 7.877833099955269e-05, "loss": 12.075, "step": 21340 }, { "epoch": 1.1621027410780531, "grad_norm": 0.5391073852811503, "learning_rate": 7.876971368060457e-05, "loss": 11.9976, "step": 21341 }, { "epoch": 1.162157195074636, "grad_norm": 0.5005271848541565, "learning_rate": 7.876109652674672e-05, "loss": 11.874, "step": 21342 }, { "epoch": 1.162211649071219, "grad_norm": 0.6154958135301781, "learning_rate": 7.875247953804615e-05, "loss": 12.0391, "step": 21343 }, { "epoch": 1.162266103067802, "grad_norm": 0.5388557819299863, "learning_rate": 7.874386271456986e-05, "loss": 11.933, "step": 21344 }, { "epoch": 1.162320557064385, "grad_norm": 0.576313255775912, "learning_rate": 7.873524605638483e-05, "loss": 12.0524, "step": 21345 }, { "epoch": 1.162375011060968, "grad_norm": 0.5233283920183274, "learning_rate": 7.872662956355812e-05, "loss": 12.0269, "step": 21346 }, { "epoch": 1.162429465057551, "grad_norm": 0.5535799199008398, "learning_rate": 7.871801323615675e-05, "loss": 12.0384, "step": 21347 }, { "epoch": 1.162483919054134, "grad_norm": 0.5528278765162272, "learning_rate": 7.870939707424762e-05, "loss": 12.034, "step": 21348 }, { "epoch": 1.162538373050717, "grad_norm": 0.5317526454774004, "learning_rate": 7.870078107789778e-05, "loss": 12.0472, "step": 21349 }, { "epoch": 1.1625928270473, "grad_norm": 0.5714501615963714, "learning_rate": 7.869216524717426e-05, "loss": 11.973, "step": 21350 }, { "epoch": 1.162647281043883, "grad_norm": 0.5215241127983764, "learning_rate": 7.868354958214404e-05, "loss": 11.9012, "step": 21351 }, { "epoch": 1.162701735040466, "grad_norm": 0.4966222070787855, "learning_rate": 7.867493408287412e-05, "loss": 11.972, "step": 21352 }, { "epoch": 1.162756189037049, "grad_norm": 0.580673890672549, "learning_rate": 7.866631874943148e-05, "loss": 11.9781, "step": 21353 }, { "epoch": 1.1628106430336322, "grad_norm": 0.6109334189838569, "learning_rate": 7.865770358188312e-05, "loss": 12.0469, "step": 21354 }, { "epoch": 1.1628650970302152, "grad_norm": 0.6331103900132437, "learning_rate": 7.864908858029604e-05, "loss": 11.9192, "step": 21355 }, { "epoch": 1.1629195510267982, "grad_norm": 0.5767142908888633, "learning_rate": 7.86404737447372e-05, "loss": 12.213, "step": 21356 }, { "epoch": 1.1629740050233812, "grad_norm": 0.6159866567963135, "learning_rate": 7.863185907527369e-05, "loss": 12.0352, "step": 21357 }, { "epoch": 1.1630284590199642, "grad_norm": 0.5060434680232894, "learning_rate": 7.862324457197237e-05, "loss": 11.8988, "step": 21358 }, { "epoch": 1.1630829130165472, "grad_norm": 0.660432155580146, "learning_rate": 7.86146302349003e-05, "loss": 12.1318, "step": 21359 }, { "epoch": 1.1631373670131302, "grad_norm": 0.6432927532773697, "learning_rate": 7.860601606412444e-05, "loss": 11.8871, "step": 21360 }, { "epoch": 1.1631918210097132, "grad_norm": 0.5569170269303703, "learning_rate": 7.859740205971178e-05, "loss": 12.0168, "step": 21361 }, { "epoch": 1.1632462750062962, "grad_norm": 0.5579488273407743, "learning_rate": 7.858878822172933e-05, "loss": 12.0225, "step": 21362 }, { "epoch": 1.1633007290028792, "grad_norm": 0.5476491036133607, "learning_rate": 7.858017455024405e-05, "loss": 11.8573, "step": 21363 }, { "epoch": 1.1633551829994624, "grad_norm": 0.5435103348402449, "learning_rate": 7.857156104532293e-05, "loss": 11.998, "step": 21364 }, { "epoch": 1.1634096369960454, "grad_norm": 0.5526049723870691, "learning_rate": 7.856294770703292e-05, "loss": 11.9178, "step": 21365 }, { "epoch": 1.1634640909926284, "grad_norm": 0.6471772261019824, "learning_rate": 7.855433453544105e-05, "loss": 12.1013, "step": 21366 }, { "epoch": 1.1635185449892114, "grad_norm": 0.5763607212315821, "learning_rate": 7.854572153061428e-05, "loss": 11.945, "step": 21367 }, { "epoch": 1.1635729989857944, "grad_norm": 0.5876161848674404, "learning_rate": 7.853710869261957e-05, "loss": 11.9471, "step": 21368 }, { "epoch": 1.1636274529823774, "grad_norm": 0.5490189329583168, "learning_rate": 7.85284960215239e-05, "loss": 11.9542, "step": 21369 }, { "epoch": 1.1636819069789603, "grad_norm": 0.5655948032052398, "learning_rate": 7.851988351739423e-05, "loss": 12.0385, "step": 21370 }, { "epoch": 1.1637363609755433, "grad_norm": 0.6149743281232586, "learning_rate": 7.851127118029753e-05, "loss": 11.9412, "step": 21371 }, { "epoch": 1.1637908149721263, "grad_norm": 0.5610191324150361, "learning_rate": 7.850265901030081e-05, "loss": 11.9229, "step": 21372 }, { "epoch": 1.1638452689687093, "grad_norm": 0.5474582047453633, "learning_rate": 7.849404700747103e-05, "loss": 11.9991, "step": 21373 }, { "epoch": 1.1638997229652923, "grad_norm": 0.5261407730884615, "learning_rate": 7.848543517187514e-05, "loss": 11.9588, "step": 21374 }, { "epoch": 1.1639541769618753, "grad_norm": 0.6201605935308045, "learning_rate": 7.847682350358012e-05, "loss": 12.0262, "step": 21375 }, { "epoch": 1.1640086309584583, "grad_norm": 0.5875726654817186, "learning_rate": 7.846821200265292e-05, "loss": 12.1394, "step": 21376 }, { "epoch": 1.1640630849550415, "grad_norm": 0.545933145885558, "learning_rate": 7.845960066916052e-05, "loss": 11.8759, "step": 21377 }, { "epoch": 1.1641175389516245, "grad_norm": 0.5308942247364341, "learning_rate": 7.845098950316991e-05, "loss": 12.0471, "step": 21378 }, { "epoch": 1.1641719929482075, "grad_norm": 0.5420715926627245, "learning_rate": 7.844237850474798e-05, "loss": 11.8491, "step": 21379 }, { "epoch": 1.1642264469447905, "grad_norm": 0.5368358898850524, "learning_rate": 7.843376767396174e-05, "loss": 12.0462, "step": 21380 }, { "epoch": 1.1642809009413735, "grad_norm": 0.5109476748719622, "learning_rate": 7.842515701087813e-05, "loss": 12.0705, "step": 21381 }, { "epoch": 1.1643353549379565, "grad_norm": 0.528363230321364, "learning_rate": 7.841654651556409e-05, "loss": 11.8376, "step": 21382 }, { "epoch": 1.1643898089345395, "grad_norm": 0.5139255861100533, "learning_rate": 7.840793618808664e-05, "loss": 11.8097, "step": 21383 }, { "epoch": 1.1644442629311225, "grad_norm": 0.5236125064547095, "learning_rate": 7.839932602851269e-05, "loss": 12.0464, "step": 21384 }, { "epoch": 1.1644987169277055, "grad_norm": 0.5277906866132869, "learning_rate": 7.839071603690922e-05, "loss": 12.0775, "step": 21385 }, { "epoch": 1.1645531709242884, "grad_norm": 0.5543228153366013, "learning_rate": 7.838210621334316e-05, "loss": 12.0554, "step": 21386 }, { "epoch": 1.1646076249208714, "grad_norm": 0.5684060371693637, "learning_rate": 7.837349655788146e-05, "loss": 11.8817, "step": 21387 }, { "epoch": 1.1646620789174547, "grad_norm": 0.5108695795591632, "learning_rate": 7.836488707059109e-05, "loss": 11.7814, "step": 21388 }, { "epoch": 1.1647165329140376, "grad_norm": 0.5589237735708477, "learning_rate": 7.835627775153899e-05, "loss": 11.9772, "step": 21389 }, { "epoch": 1.1647709869106206, "grad_norm": 0.5192688732247119, "learning_rate": 7.834766860079208e-05, "loss": 11.8846, "step": 21390 }, { "epoch": 1.1648254409072036, "grad_norm": 0.6108269002553003, "learning_rate": 7.833905961841734e-05, "loss": 11.9929, "step": 21391 }, { "epoch": 1.1648798949037866, "grad_norm": 0.5471500568396419, "learning_rate": 7.833045080448172e-05, "loss": 12.1352, "step": 21392 }, { "epoch": 1.1649343489003696, "grad_norm": 0.6343554048826865, "learning_rate": 7.832184215905211e-05, "loss": 11.9481, "step": 21393 }, { "epoch": 1.1649888028969526, "grad_norm": 0.5065980631943604, "learning_rate": 7.831323368219551e-05, "loss": 11.9507, "step": 21394 }, { "epoch": 1.1650432568935356, "grad_norm": 0.5531899671964808, "learning_rate": 7.830462537397884e-05, "loss": 11.8348, "step": 21395 }, { "epoch": 1.1650977108901186, "grad_norm": 0.564211332494212, "learning_rate": 7.829601723446904e-05, "loss": 11.9069, "step": 21396 }, { "epoch": 1.1651521648867016, "grad_norm": 0.508681001390616, "learning_rate": 7.828740926373307e-05, "loss": 12.025, "step": 21397 }, { "epoch": 1.1652066188832846, "grad_norm": 0.5496652300910502, "learning_rate": 7.827880146183785e-05, "loss": 11.9943, "step": 21398 }, { "epoch": 1.1652610728798676, "grad_norm": 0.5495858512184852, "learning_rate": 7.82701938288503e-05, "loss": 12.0881, "step": 21399 }, { "epoch": 1.1653155268764506, "grad_norm": 0.558278490805566, "learning_rate": 7.826158636483736e-05, "loss": 11.9001, "step": 21400 }, { "epoch": 1.1653699808730338, "grad_norm": 0.5387885611485658, "learning_rate": 7.825297906986597e-05, "loss": 12.0711, "step": 21401 }, { "epoch": 1.1654244348696168, "grad_norm": 0.4769160878218986, "learning_rate": 7.824437194400307e-05, "loss": 11.9543, "step": 21402 }, { "epoch": 1.1654788888661998, "grad_norm": 0.5122308867949037, "learning_rate": 7.823576498731557e-05, "loss": 12.0465, "step": 21403 }, { "epoch": 1.1655333428627828, "grad_norm": 0.5677880671335251, "learning_rate": 7.822715819987042e-05, "loss": 11.9772, "step": 21404 }, { "epoch": 1.1655877968593658, "grad_norm": 0.49972644574935027, "learning_rate": 7.821855158173453e-05, "loss": 12.0451, "step": 21405 }, { "epoch": 1.1656422508559487, "grad_norm": 0.5070277872774278, "learning_rate": 7.820994513297484e-05, "loss": 12.0071, "step": 21406 }, { "epoch": 1.1656967048525317, "grad_norm": 0.5061112900393233, "learning_rate": 7.820133885365827e-05, "loss": 11.9472, "step": 21407 }, { "epoch": 1.1657511588491147, "grad_norm": 0.546498177730851, "learning_rate": 7.819273274385179e-05, "loss": 12.0763, "step": 21408 }, { "epoch": 1.1658056128456977, "grad_norm": 0.5124945710238951, "learning_rate": 7.818412680362222e-05, "loss": 12.0112, "step": 21409 }, { "epoch": 1.1658600668422807, "grad_norm": 0.5119162206929523, "learning_rate": 7.817552103303654e-05, "loss": 11.9779, "step": 21410 }, { "epoch": 1.165914520838864, "grad_norm": 0.5696860492088878, "learning_rate": 7.81669154321617e-05, "loss": 11.923, "step": 21411 }, { "epoch": 1.165968974835447, "grad_norm": 0.5315858939238846, "learning_rate": 7.815831000106457e-05, "loss": 12.0212, "step": 21412 }, { "epoch": 1.16602342883203, "grad_norm": 0.5426692120256107, "learning_rate": 7.814970473981208e-05, "loss": 11.9583, "step": 21413 }, { "epoch": 1.166077882828613, "grad_norm": 0.5719014979707006, "learning_rate": 7.814109964847115e-05, "loss": 11.8939, "step": 21414 }, { "epoch": 1.166132336825196, "grad_norm": 0.5993287668117351, "learning_rate": 7.81324947271087e-05, "loss": 11.9861, "step": 21415 }, { "epoch": 1.166186790821779, "grad_norm": 0.5455525230575496, "learning_rate": 7.812388997579161e-05, "loss": 11.9907, "step": 21416 }, { "epoch": 1.1662412448183619, "grad_norm": 0.5466803659336565, "learning_rate": 7.811528539458686e-05, "loss": 11.9992, "step": 21417 }, { "epoch": 1.1662956988149449, "grad_norm": 0.5648320572273463, "learning_rate": 7.810668098356134e-05, "loss": 11.9194, "step": 21418 }, { "epoch": 1.1663501528115279, "grad_norm": 0.5427405229639615, "learning_rate": 7.809807674278191e-05, "loss": 11.7762, "step": 21419 }, { "epoch": 1.1664046068081109, "grad_norm": 0.5612093245163017, "learning_rate": 7.808947267231549e-05, "loss": 12.1771, "step": 21420 }, { "epoch": 1.1664590608046939, "grad_norm": 0.5351677144029716, "learning_rate": 7.808086877222903e-05, "loss": 11.9764, "step": 21421 }, { "epoch": 1.1665135148012769, "grad_norm": 0.6642511035229242, "learning_rate": 7.80722650425894e-05, "loss": 12.0197, "step": 21422 }, { "epoch": 1.1665679687978598, "grad_norm": 0.5569335625516744, "learning_rate": 7.806366148346352e-05, "loss": 12.0209, "step": 21423 }, { "epoch": 1.166622422794443, "grad_norm": 0.5508548278692025, "learning_rate": 7.805505809491828e-05, "loss": 11.9193, "step": 21424 }, { "epoch": 1.166676876791026, "grad_norm": 0.5410126673008115, "learning_rate": 7.804645487702058e-05, "loss": 12.0918, "step": 21425 }, { "epoch": 1.166731330787609, "grad_norm": 0.5194402193255873, "learning_rate": 7.803785182983735e-05, "loss": 12.0509, "step": 21426 }, { "epoch": 1.166785784784192, "grad_norm": 0.5470937659480506, "learning_rate": 7.802924895343543e-05, "loss": 12.0272, "step": 21427 }, { "epoch": 1.166840238780775, "grad_norm": 0.5389157927800435, "learning_rate": 7.802064624788183e-05, "loss": 12.1264, "step": 21428 }, { "epoch": 1.166894692777358, "grad_norm": 0.5848160886153482, "learning_rate": 7.801204371324332e-05, "loss": 12.0118, "step": 21429 }, { "epoch": 1.166949146773941, "grad_norm": 0.489528793970145, "learning_rate": 7.800344134958685e-05, "loss": 11.9481, "step": 21430 }, { "epoch": 1.167003600770524, "grad_norm": 0.51385469906854, "learning_rate": 7.799483915697928e-05, "loss": 11.8606, "step": 21431 }, { "epoch": 1.167058054767107, "grad_norm": 0.6091393168735681, "learning_rate": 7.798623713548757e-05, "loss": 11.9637, "step": 21432 }, { "epoch": 1.16711250876369, "grad_norm": 0.5699559473013444, "learning_rate": 7.797763528517855e-05, "loss": 12.0566, "step": 21433 }, { "epoch": 1.1671669627602732, "grad_norm": 0.4878021996624549, "learning_rate": 7.796903360611915e-05, "loss": 11.9056, "step": 21434 }, { "epoch": 1.1672214167568562, "grad_norm": 0.5638929882084674, "learning_rate": 7.796043209837622e-05, "loss": 11.765, "step": 21435 }, { "epoch": 1.1672758707534392, "grad_norm": 0.5080449196689862, "learning_rate": 7.795183076201669e-05, "loss": 11.9961, "step": 21436 }, { "epoch": 1.1673303247500222, "grad_norm": 0.5320629353679006, "learning_rate": 7.794322959710741e-05, "loss": 11.9381, "step": 21437 }, { "epoch": 1.1673847787466052, "grad_norm": 0.5284954434762772, "learning_rate": 7.79346286037153e-05, "loss": 11.8977, "step": 21438 }, { "epoch": 1.1674392327431882, "grad_norm": 0.5356057188016835, "learning_rate": 7.792602778190717e-05, "loss": 11.9503, "step": 21439 }, { "epoch": 1.1674936867397712, "grad_norm": 0.5816844473371946, "learning_rate": 7.791742713174998e-05, "loss": 11.9861, "step": 21440 }, { "epoch": 1.1675481407363542, "grad_norm": 0.5891457658875092, "learning_rate": 7.790882665331057e-05, "loss": 12.0884, "step": 21441 }, { "epoch": 1.1676025947329371, "grad_norm": 0.5551175021627227, "learning_rate": 7.790022634665584e-05, "loss": 11.8205, "step": 21442 }, { "epoch": 1.1676570487295201, "grad_norm": 0.5438903206830921, "learning_rate": 7.789162621185263e-05, "loss": 12.1324, "step": 21443 }, { "epoch": 1.1677115027261031, "grad_norm": 0.5401067958423126, "learning_rate": 7.788302624896784e-05, "loss": 11.8797, "step": 21444 }, { "epoch": 1.1677659567226861, "grad_norm": 0.5592458681040414, "learning_rate": 7.787442645806837e-05, "loss": 11.9851, "step": 21445 }, { "epoch": 1.1678204107192691, "grad_norm": 0.575958398770358, "learning_rate": 7.786582683922107e-05, "loss": 11.9914, "step": 21446 }, { "epoch": 1.1678748647158523, "grad_norm": 0.5036211915335588, "learning_rate": 7.78572273924928e-05, "loss": 11.9179, "step": 21447 }, { "epoch": 1.1679293187124353, "grad_norm": 0.5415017112148952, "learning_rate": 7.784862811795048e-05, "loss": 12.0637, "step": 21448 }, { "epoch": 1.1679837727090183, "grad_norm": 0.6061407429736834, "learning_rate": 7.784002901566091e-05, "loss": 12.0024, "step": 21449 }, { "epoch": 1.1680382267056013, "grad_norm": 0.6181644418935153, "learning_rate": 7.783143008569099e-05, "loss": 12.0362, "step": 21450 }, { "epoch": 1.1680926807021843, "grad_norm": 0.5514534086004671, "learning_rate": 7.782283132810759e-05, "loss": 12.0524, "step": 21451 }, { "epoch": 1.1681471346987673, "grad_norm": 0.5047071015530673, "learning_rate": 7.781423274297757e-05, "loss": 11.9453, "step": 21452 }, { "epoch": 1.1682015886953503, "grad_norm": 0.52471953065119, "learning_rate": 7.78056343303678e-05, "loss": 11.9401, "step": 21453 }, { "epoch": 1.1682560426919333, "grad_norm": 0.6706785428933535, "learning_rate": 7.779703609034514e-05, "loss": 11.9634, "step": 21454 }, { "epoch": 1.1683104966885163, "grad_norm": 0.5778686607698854, "learning_rate": 7.778843802297645e-05, "loss": 12.1274, "step": 21455 }, { "epoch": 1.1683649506850993, "grad_norm": 0.5590734244778683, "learning_rate": 7.777984012832859e-05, "loss": 11.9344, "step": 21456 }, { "epoch": 1.1684194046816823, "grad_norm": 0.5578065982592294, "learning_rate": 7.777124240646842e-05, "loss": 12.0206, "step": 21457 }, { "epoch": 1.1684738586782655, "grad_norm": 0.5428385181545224, "learning_rate": 7.77626448574628e-05, "loss": 12.0342, "step": 21458 }, { "epoch": 1.1685283126748485, "grad_norm": 0.6147564819772413, "learning_rate": 7.77540474813786e-05, "loss": 12.1248, "step": 21459 }, { "epoch": 1.1685827666714315, "grad_norm": 0.5909814840434413, "learning_rate": 7.774545027828265e-05, "loss": 11.9908, "step": 21460 }, { "epoch": 1.1686372206680145, "grad_norm": 0.5652429534110385, "learning_rate": 7.773685324824178e-05, "loss": 11.8524, "step": 21461 }, { "epoch": 1.1686916746645974, "grad_norm": 0.6087531963234392, "learning_rate": 7.77282563913229e-05, "loss": 11.9695, "step": 21462 }, { "epoch": 1.1687461286611804, "grad_norm": 0.5512132667893598, "learning_rate": 7.771965970759281e-05, "loss": 12.0036, "step": 21463 }, { "epoch": 1.1688005826577634, "grad_norm": 0.5260982059537934, "learning_rate": 7.771106319711841e-05, "loss": 11.9306, "step": 21464 }, { "epoch": 1.1688550366543464, "grad_norm": 0.579546662874138, "learning_rate": 7.770246685996648e-05, "loss": 11.9179, "step": 21465 }, { "epoch": 1.1689094906509294, "grad_norm": 0.5552546566089984, "learning_rate": 7.769387069620394e-05, "loss": 12.0308, "step": 21466 }, { "epoch": 1.1689639446475124, "grad_norm": 0.5320251798810329, "learning_rate": 7.76852747058976e-05, "loss": 11.9921, "step": 21467 }, { "epoch": 1.1690183986440954, "grad_norm": 0.5515518523649794, "learning_rate": 7.767667888911434e-05, "loss": 12.0567, "step": 21468 }, { "epoch": 1.1690728526406784, "grad_norm": 0.5240191542993571, "learning_rate": 7.766808324592091e-05, "loss": 11.94, "step": 21469 }, { "epoch": 1.1691273066372614, "grad_norm": 0.5596577161314998, "learning_rate": 7.765948777638423e-05, "loss": 11.9289, "step": 21470 }, { "epoch": 1.1691817606338446, "grad_norm": 0.5554941562208867, "learning_rate": 7.765089248057114e-05, "loss": 11.9961, "step": 21471 }, { "epoch": 1.1692362146304276, "grad_norm": 0.5683779647278253, "learning_rate": 7.764229735854843e-05, "loss": 12.1373, "step": 21472 }, { "epoch": 1.1692906686270106, "grad_norm": 0.5305858502239906, "learning_rate": 7.763370241038297e-05, "loss": 12.0558, "step": 21473 }, { "epoch": 1.1693451226235936, "grad_norm": 0.5020052882068247, "learning_rate": 7.762510763614161e-05, "loss": 11.8852, "step": 21474 }, { "epoch": 1.1693995766201766, "grad_norm": 0.5547686098086152, "learning_rate": 7.761651303589114e-05, "loss": 11.9912, "step": 21475 }, { "epoch": 1.1694540306167596, "grad_norm": 0.5308770890161167, "learning_rate": 7.76079186096984e-05, "loss": 11.9896, "step": 21476 }, { "epoch": 1.1695084846133426, "grad_norm": 0.5203878529266085, "learning_rate": 7.759932435763027e-05, "loss": 11.8653, "step": 21477 }, { "epoch": 1.1695629386099256, "grad_norm": 0.5600808721754098, "learning_rate": 7.75907302797536e-05, "loss": 12.0303, "step": 21478 }, { "epoch": 1.1696173926065085, "grad_norm": 0.5237632925496599, "learning_rate": 7.758213637613512e-05, "loss": 11.7995, "step": 21479 }, { "epoch": 1.1696718466030915, "grad_norm": 0.5309656644064694, "learning_rate": 7.757354264684168e-05, "loss": 12.0239, "step": 21480 }, { "epoch": 1.1697263005996748, "grad_norm": 0.6012737482998177, "learning_rate": 7.756494909194016e-05, "loss": 12.0147, "step": 21481 }, { "epoch": 1.1697807545962577, "grad_norm": 0.5278493531239027, "learning_rate": 7.755635571149735e-05, "loss": 11.9673, "step": 21482 }, { "epoch": 1.1698352085928407, "grad_norm": 0.6371868709985388, "learning_rate": 7.75477625055801e-05, "loss": 11.9891, "step": 21483 }, { "epoch": 1.1698896625894237, "grad_norm": 0.5384168711632088, "learning_rate": 7.75391694742552e-05, "loss": 11.9979, "step": 21484 }, { "epoch": 1.1699441165860067, "grad_norm": 0.7231925061144907, "learning_rate": 7.753057661758949e-05, "loss": 11.9619, "step": 21485 }, { "epoch": 1.1699985705825897, "grad_norm": 0.6527073810977893, "learning_rate": 7.752198393564977e-05, "loss": 11.9728, "step": 21486 }, { "epoch": 1.1700530245791727, "grad_norm": 0.5188787105760849, "learning_rate": 7.751339142850288e-05, "loss": 12.0277, "step": 21487 }, { "epoch": 1.1701074785757557, "grad_norm": 0.4975516801920765, "learning_rate": 7.750479909621568e-05, "loss": 12.0173, "step": 21488 }, { "epoch": 1.1701619325723387, "grad_norm": 0.5568290775559681, "learning_rate": 7.749620693885489e-05, "loss": 11.9154, "step": 21489 }, { "epoch": 1.1702163865689217, "grad_norm": 0.592378744097072, "learning_rate": 7.748761495648736e-05, "loss": 11.9838, "step": 21490 }, { "epoch": 1.1702708405655047, "grad_norm": 0.5392249836439629, "learning_rate": 7.74790231491799e-05, "loss": 11.8422, "step": 21491 }, { "epoch": 1.1703252945620877, "grad_norm": 0.5828539964433967, "learning_rate": 7.747043151699935e-05, "loss": 12.0813, "step": 21492 }, { "epoch": 1.1703797485586707, "grad_norm": 0.5842192642892001, "learning_rate": 7.74618400600125e-05, "loss": 12.0662, "step": 21493 }, { "epoch": 1.1704342025552539, "grad_norm": 0.49210379808848115, "learning_rate": 7.745324877828617e-05, "loss": 11.9989, "step": 21494 }, { "epoch": 1.1704886565518369, "grad_norm": 0.5979440408864463, "learning_rate": 7.744465767188715e-05, "loss": 11.8663, "step": 21495 }, { "epoch": 1.1705431105484199, "grad_norm": 0.5539942505207179, "learning_rate": 7.743606674088227e-05, "loss": 12.0314, "step": 21496 }, { "epoch": 1.1705975645450029, "grad_norm": 0.548218170252454, "learning_rate": 7.74274759853383e-05, "loss": 12.1344, "step": 21497 }, { "epoch": 1.1706520185415858, "grad_norm": 0.5625011843254378, "learning_rate": 7.74188854053221e-05, "loss": 12.1762, "step": 21498 }, { "epoch": 1.1707064725381688, "grad_norm": 0.5460425827148898, "learning_rate": 7.74102950009004e-05, "loss": 12.0292, "step": 21499 }, { "epoch": 1.1707609265347518, "grad_norm": 0.5408142850430533, "learning_rate": 7.740170477214003e-05, "loss": 12.0924, "step": 21500 }, { "epoch": 1.1708153805313348, "grad_norm": 0.5781869069480234, "learning_rate": 7.739311471910781e-05, "loss": 12.073, "step": 21501 }, { "epoch": 1.1708698345279178, "grad_norm": 0.5392614362905274, "learning_rate": 7.738452484187052e-05, "loss": 11.9822, "step": 21502 }, { "epoch": 1.1709242885245008, "grad_norm": 0.5686113171382908, "learning_rate": 7.737593514049492e-05, "loss": 11.9301, "step": 21503 }, { "epoch": 1.170978742521084, "grad_norm": 0.6093378513520353, "learning_rate": 7.736734561504787e-05, "loss": 11.9898, "step": 21504 }, { "epoch": 1.171033196517667, "grad_norm": 0.5521503110204157, "learning_rate": 7.735875626559614e-05, "loss": 12.0431, "step": 21505 }, { "epoch": 1.17108765051425, "grad_norm": 0.5503166172575223, "learning_rate": 7.735016709220652e-05, "loss": 12.0454, "step": 21506 }, { "epoch": 1.171142104510833, "grad_norm": 0.5399224442906748, "learning_rate": 7.73415780949458e-05, "loss": 11.8754, "step": 21507 }, { "epoch": 1.171196558507416, "grad_norm": 0.6172229676242614, "learning_rate": 7.733298927388077e-05, "loss": 12.1151, "step": 21508 }, { "epoch": 1.171251012503999, "grad_norm": 0.5221733846267943, "learning_rate": 7.732440062907822e-05, "loss": 12.1128, "step": 21509 }, { "epoch": 1.171305466500582, "grad_norm": 0.5535158775303224, "learning_rate": 7.731581216060492e-05, "loss": 11.9696, "step": 21510 }, { "epoch": 1.171359920497165, "grad_norm": 0.5784357095092382, "learning_rate": 7.730722386852768e-05, "loss": 12.0313, "step": 21511 }, { "epoch": 1.171414374493748, "grad_norm": 0.5411237223474381, "learning_rate": 7.729863575291326e-05, "loss": 11.9306, "step": 21512 }, { "epoch": 1.171468828490331, "grad_norm": 0.5534941810446697, "learning_rate": 7.729004781382847e-05, "loss": 11.8688, "step": 21513 }, { "epoch": 1.171523282486914, "grad_norm": 0.5361318082877433, "learning_rate": 7.728146005134005e-05, "loss": 12.0888, "step": 21514 }, { "epoch": 1.171577736483497, "grad_norm": 0.5330867725931275, "learning_rate": 7.727287246551482e-05, "loss": 11.9465, "step": 21515 }, { "epoch": 1.17163219048008, "grad_norm": 0.6008539322265298, "learning_rate": 7.726428505641955e-05, "loss": 12.0014, "step": 21516 }, { "epoch": 1.1716866444766632, "grad_norm": 0.5886242340830706, "learning_rate": 7.725569782412102e-05, "loss": 11.9946, "step": 21517 }, { "epoch": 1.1717410984732461, "grad_norm": 0.5120914005227445, "learning_rate": 7.7247110768686e-05, "loss": 11.9638, "step": 21518 }, { "epoch": 1.1717955524698291, "grad_norm": 0.5123307090634064, "learning_rate": 7.723852389018126e-05, "loss": 11.9975, "step": 21519 }, { "epoch": 1.1718500064664121, "grad_norm": 0.5193406394637023, "learning_rate": 7.722993718867357e-05, "loss": 11.958, "step": 21520 }, { "epoch": 1.1719044604629951, "grad_norm": 0.5594636417028016, "learning_rate": 7.72213506642297e-05, "loss": 11.9261, "step": 21521 }, { "epoch": 1.1719589144595781, "grad_norm": 0.5427898649075381, "learning_rate": 7.721276431691643e-05, "loss": 12.0003, "step": 21522 }, { "epoch": 1.172013368456161, "grad_norm": 0.5094372058973505, "learning_rate": 7.720417814680052e-05, "loss": 12.0733, "step": 21523 }, { "epoch": 1.172067822452744, "grad_norm": 0.5052121062928325, "learning_rate": 7.719559215394875e-05, "loss": 12.0188, "step": 21524 }, { "epoch": 1.172122276449327, "grad_norm": 0.5406966806318048, "learning_rate": 7.718700633842787e-05, "loss": 11.9344, "step": 21525 }, { "epoch": 1.17217673044591, "grad_norm": 0.5726195194524218, "learning_rate": 7.717842070030467e-05, "loss": 11.9564, "step": 21526 }, { "epoch": 1.172231184442493, "grad_norm": 0.49501450545394016, "learning_rate": 7.71698352396459e-05, "loss": 11.8853, "step": 21527 }, { "epoch": 1.1722856384390763, "grad_norm": 0.5695613901911343, "learning_rate": 7.71612499565183e-05, "loss": 12.0181, "step": 21528 }, { "epoch": 1.1723400924356593, "grad_norm": 0.537949886667884, "learning_rate": 7.715266485098868e-05, "loss": 11.9117, "step": 21529 }, { "epoch": 1.1723945464322423, "grad_norm": 0.5456325926834007, "learning_rate": 7.714407992312376e-05, "loss": 11.9595, "step": 21530 }, { "epoch": 1.1724490004288253, "grad_norm": 0.600319765366184, "learning_rate": 7.71354951729903e-05, "loss": 12.0551, "step": 21531 }, { "epoch": 1.1725034544254083, "grad_norm": 0.6105793578343066, "learning_rate": 7.712691060065507e-05, "loss": 11.9822, "step": 21532 }, { "epoch": 1.1725579084219913, "grad_norm": 0.505282344779516, "learning_rate": 7.711832620618482e-05, "loss": 11.9356, "step": 21533 }, { "epoch": 1.1726123624185743, "grad_norm": 0.5079079065072811, "learning_rate": 7.710974198964629e-05, "loss": 11.9677, "step": 21534 }, { "epoch": 1.1726668164151572, "grad_norm": 0.5928270465117728, "learning_rate": 7.710115795110625e-05, "loss": 11.9835, "step": 21535 }, { "epoch": 1.1727212704117402, "grad_norm": 0.5729532760680833, "learning_rate": 7.709257409063142e-05, "loss": 11.918, "step": 21536 }, { "epoch": 1.1727757244083232, "grad_norm": 0.6133392232219819, "learning_rate": 7.70839904082886e-05, "loss": 12.0699, "step": 21537 }, { "epoch": 1.1728301784049062, "grad_norm": 0.5242315398613062, "learning_rate": 7.707540690414452e-05, "loss": 12.1721, "step": 21538 }, { "epoch": 1.1728846324014892, "grad_norm": 0.48245773539151876, "learning_rate": 7.706682357826595e-05, "loss": 11.9284, "step": 21539 }, { "epoch": 1.1729390863980722, "grad_norm": 0.5321526831624755, "learning_rate": 7.705824043071957e-05, "loss": 12.0253, "step": 21540 }, { "epoch": 1.1729935403946554, "grad_norm": 0.5277659031414554, "learning_rate": 7.704965746157215e-05, "loss": 11.9888, "step": 21541 }, { "epoch": 1.1730479943912384, "grad_norm": 0.5525367153991201, "learning_rate": 7.704107467089045e-05, "loss": 11.9003, "step": 21542 }, { "epoch": 1.1731024483878214, "grad_norm": 0.5292540902969628, "learning_rate": 7.703249205874121e-05, "loss": 11.7963, "step": 21543 }, { "epoch": 1.1731569023844044, "grad_norm": 0.4988636316086766, "learning_rate": 7.702390962519117e-05, "loss": 11.9743, "step": 21544 }, { "epoch": 1.1732113563809874, "grad_norm": 0.5287904304911054, "learning_rate": 7.701532737030706e-05, "loss": 11.9094, "step": 21545 }, { "epoch": 1.1732658103775704, "grad_norm": 0.5412664155879475, "learning_rate": 7.70067452941556e-05, "loss": 11.9634, "step": 21546 }, { "epoch": 1.1733202643741534, "grad_norm": 0.5867668093510661, "learning_rate": 7.699816339680357e-05, "loss": 12.0047, "step": 21547 }, { "epoch": 1.1733747183707364, "grad_norm": 0.5837210487251758, "learning_rate": 7.698958167831763e-05, "loss": 12.0087, "step": 21548 }, { "epoch": 1.1734291723673194, "grad_norm": 0.5097681136664706, "learning_rate": 7.698100013876465e-05, "loss": 12.0246, "step": 21549 }, { "epoch": 1.1734836263639024, "grad_norm": 0.5253903708182228, "learning_rate": 7.697241877821121e-05, "loss": 11.9348, "step": 21550 }, { "epoch": 1.1735380803604856, "grad_norm": 0.6293028844089644, "learning_rate": 7.696383759672412e-05, "loss": 11.9706, "step": 21551 }, { "epoch": 1.1735925343570686, "grad_norm": 0.5576519320191654, "learning_rate": 7.695525659437006e-05, "loss": 12.0808, "step": 21552 }, { "epoch": 1.1736469883536516, "grad_norm": 0.5181540665008444, "learning_rate": 7.694667577121582e-05, "loss": 12.0329, "step": 21553 }, { "epoch": 1.1737014423502345, "grad_norm": 0.5389763408112704, "learning_rate": 7.69380951273281e-05, "loss": 12.0049, "step": 21554 }, { "epoch": 1.1737558963468175, "grad_norm": 0.5413564993798322, "learning_rate": 7.69295146627736e-05, "loss": 12.0269, "step": 21555 }, { "epoch": 1.1738103503434005, "grad_norm": 0.7198102054487848, "learning_rate": 7.692093437761908e-05, "loss": 12.1316, "step": 21556 }, { "epoch": 1.1738648043399835, "grad_norm": 0.5786289001723626, "learning_rate": 7.691235427193123e-05, "loss": 12.0126, "step": 21557 }, { "epoch": 1.1739192583365665, "grad_norm": 0.513170917273, "learning_rate": 7.690377434577681e-05, "loss": 11.8932, "step": 21558 }, { "epoch": 1.1739737123331495, "grad_norm": 0.5374406423491991, "learning_rate": 7.68951945992225e-05, "loss": 12.0512, "step": 21559 }, { "epoch": 1.1740281663297325, "grad_norm": 0.5724407425139194, "learning_rate": 7.688661503233503e-05, "loss": 12.0738, "step": 21560 }, { "epoch": 1.1740826203263155, "grad_norm": 0.5209037313444472, "learning_rate": 7.687803564518112e-05, "loss": 11.9837, "step": 21561 }, { "epoch": 1.1741370743228985, "grad_norm": 0.5103557591440533, "learning_rate": 7.686945643782747e-05, "loss": 11.9613, "step": 21562 }, { "epoch": 1.1741915283194815, "grad_norm": 0.5317941678247606, "learning_rate": 7.68608774103408e-05, "loss": 11.9773, "step": 21563 }, { "epoch": 1.1742459823160647, "grad_norm": 0.5251832866116968, "learning_rate": 7.685229856278784e-05, "loss": 11.9801, "step": 21564 }, { "epoch": 1.1743004363126477, "grad_norm": 0.5508580766149924, "learning_rate": 7.684371989523528e-05, "loss": 12.0116, "step": 21565 }, { "epoch": 1.1743548903092307, "grad_norm": 0.541710490167256, "learning_rate": 7.683514140774985e-05, "loss": 12.0006, "step": 21566 }, { "epoch": 1.1744093443058137, "grad_norm": 0.5776584803130658, "learning_rate": 7.682656310039826e-05, "loss": 11.9537, "step": 21567 }, { "epoch": 1.1744637983023967, "grad_norm": 0.5629310797797762, "learning_rate": 7.681798497324716e-05, "loss": 11.9284, "step": 21568 }, { "epoch": 1.1745182522989797, "grad_norm": 0.5587841189061926, "learning_rate": 7.680940702636335e-05, "loss": 11.9966, "step": 21569 }, { "epoch": 1.1745727062955627, "grad_norm": 0.6449658253645129, "learning_rate": 7.680082925981346e-05, "loss": 12.0528, "step": 21570 }, { "epoch": 1.1746271602921456, "grad_norm": 0.5749180238364583, "learning_rate": 7.67922516736642e-05, "loss": 12.0674, "step": 21571 }, { "epoch": 1.1746816142887286, "grad_norm": 0.6287072461563051, "learning_rate": 7.678367426798228e-05, "loss": 12.0589, "step": 21572 }, { "epoch": 1.1747360682853116, "grad_norm": 0.6296938676638881, "learning_rate": 7.67750970428344e-05, "loss": 12.1065, "step": 21573 }, { "epoch": 1.1747905222818948, "grad_norm": 0.5412884507051381, "learning_rate": 7.676651999828726e-05, "loss": 11.9356, "step": 21574 }, { "epoch": 1.1748449762784778, "grad_norm": 0.5511691378546619, "learning_rate": 7.675794313440756e-05, "loss": 11.9952, "step": 21575 }, { "epoch": 1.1748994302750608, "grad_norm": 0.6065834705687223, "learning_rate": 7.6749366451262e-05, "loss": 11.9431, "step": 21576 }, { "epoch": 1.1749538842716438, "grad_norm": 0.5623202627458568, "learning_rate": 7.674078994891727e-05, "loss": 12.0089, "step": 21577 }, { "epoch": 1.1750083382682268, "grad_norm": 0.5780561703387903, "learning_rate": 7.673221362744005e-05, "loss": 11.9446, "step": 21578 }, { "epoch": 1.1750627922648098, "grad_norm": 0.502136544401422, "learning_rate": 7.672363748689706e-05, "loss": 11.9293, "step": 21579 }, { "epoch": 1.1751172462613928, "grad_norm": 0.494545000401578, "learning_rate": 7.671506152735495e-05, "loss": 11.9548, "step": 21580 }, { "epoch": 1.1751717002579758, "grad_norm": 0.5151063631831978, "learning_rate": 7.670648574888042e-05, "loss": 11.9267, "step": 21581 }, { "epoch": 1.1752261542545588, "grad_norm": 0.6079219359619696, "learning_rate": 7.669791015154017e-05, "loss": 11.9529, "step": 21582 }, { "epoch": 1.1752806082511418, "grad_norm": 0.5665895568151611, "learning_rate": 7.668933473540087e-05, "loss": 11.9499, "step": 21583 }, { "epoch": 1.1753350622477248, "grad_norm": 0.5909458742546837, "learning_rate": 7.668075950052922e-05, "loss": 12.0102, "step": 21584 }, { "epoch": 1.1753895162443078, "grad_norm": 0.5969256567130299, "learning_rate": 7.667218444699187e-05, "loss": 11.8862, "step": 21585 }, { "epoch": 1.1754439702408908, "grad_norm": 0.5237406253917345, "learning_rate": 7.666360957485554e-05, "loss": 11.9454, "step": 21586 }, { "epoch": 1.175498424237474, "grad_norm": 0.5921875596250283, "learning_rate": 7.66550348841869e-05, "loss": 12.0029, "step": 21587 }, { "epoch": 1.175552878234057, "grad_norm": 0.5465794547164644, "learning_rate": 7.664646037505263e-05, "loss": 11.8206, "step": 21588 }, { "epoch": 1.17560733223064, "grad_norm": 0.5875587815375578, "learning_rate": 7.663788604751943e-05, "loss": 12.0814, "step": 21589 }, { "epoch": 1.175661786227223, "grad_norm": 0.5642222664525203, "learning_rate": 7.66293119016539e-05, "loss": 12.0113, "step": 21590 }, { "epoch": 1.175716240223806, "grad_norm": 0.5204404510690026, "learning_rate": 7.662073793752278e-05, "loss": 11.9023, "step": 21591 }, { "epoch": 1.175770694220389, "grad_norm": 0.5556397579875381, "learning_rate": 7.661216415519273e-05, "loss": 11.9804, "step": 21592 }, { "epoch": 1.175825148216972, "grad_norm": 0.5640496495315787, "learning_rate": 7.660359055473039e-05, "loss": 12.0748, "step": 21593 }, { "epoch": 1.175879602213555, "grad_norm": 0.49745883549555725, "learning_rate": 7.659501713620246e-05, "loss": 11.9385, "step": 21594 }, { "epoch": 1.175934056210138, "grad_norm": 0.5404957254428031, "learning_rate": 7.65864438996756e-05, "loss": 11.916, "step": 21595 }, { "epoch": 1.175988510206721, "grad_norm": 0.5242902139557931, "learning_rate": 7.657787084521649e-05, "loss": 12.1823, "step": 21596 }, { "epoch": 1.1760429642033041, "grad_norm": 0.5657329966808164, "learning_rate": 7.656929797289177e-05, "loss": 12.0118, "step": 21597 }, { "epoch": 1.1760974181998871, "grad_norm": 0.5700382513723844, "learning_rate": 7.656072528276811e-05, "loss": 12.0433, "step": 21598 }, { "epoch": 1.17615187219647, "grad_norm": 0.5633965987301115, "learning_rate": 7.655215277491225e-05, "loss": 12.0989, "step": 21599 }, { "epoch": 1.176206326193053, "grad_norm": 0.5158507634434333, "learning_rate": 7.654358044939073e-05, "loss": 12.009, "step": 21600 }, { "epoch": 1.176260780189636, "grad_norm": 0.5670445956130047, "learning_rate": 7.653500830627023e-05, "loss": 12.0958, "step": 21601 }, { "epoch": 1.176315234186219, "grad_norm": 0.5698455091294612, "learning_rate": 7.652643634561748e-05, "loss": 12.001, "step": 21602 }, { "epoch": 1.176369688182802, "grad_norm": 0.5565254874646226, "learning_rate": 7.651786456749908e-05, "loss": 12.017, "step": 21603 }, { "epoch": 1.176424142179385, "grad_norm": 0.5624231475396476, "learning_rate": 7.650929297198171e-05, "loss": 11.9315, "step": 21604 }, { "epoch": 1.176478596175968, "grad_norm": 0.6829559344536329, "learning_rate": 7.650072155913203e-05, "loss": 11.9368, "step": 21605 }, { "epoch": 1.176533050172551, "grad_norm": 0.5266904437228023, "learning_rate": 7.649215032901666e-05, "loss": 12.1225, "step": 21606 }, { "epoch": 1.176587504169134, "grad_norm": 0.5355267032043709, "learning_rate": 7.648357928170228e-05, "loss": 11.8722, "step": 21607 }, { "epoch": 1.176641958165717, "grad_norm": 0.5974199310957993, "learning_rate": 7.647500841725553e-05, "loss": 11.9625, "step": 21608 }, { "epoch": 1.1766964121623, "grad_norm": 0.5469317279833944, "learning_rate": 7.646643773574309e-05, "loss": 11.9221, "step": 21609 }, { "epoch": 1.176750866158883, "grad_norm": 0.5394380790937495, "learning_rate": 7.645786723723156e-05, "loss": 11.8321, "step": 21610 }, { "epoch": 1.1768053201554662, "grad_norm": 0.5701565621650369, "learning_rate": 7.644929692178758e-05, "loss": 12.1174, "step": 21611 }, { "epoch": 1.1768597741520492, "grad_norm": 0.5291252400811727, "learning_rate": 7.644072678947781e-05, "loss": 12.0025, "step": 21612 }, { "epoch": 1.1769142281486322, "grad_norm": 0.5223918746204956, "learning_rate": 7.643215684036891e-05, "loss": 12.0317, "step": 21613 }, { "epoch": 1.1769686821452152, "grad_norm": 0.6607752055855449, "learning_rate": 7.642358707452752e-05, "loss": 12.2902, "step": 21614 }, { "epoch": 1.1770231361417982, "grad_norm": 0.5726191469782652, "learning_rate": 7.641501749202028e-05, "loss": 12.0658, "step": 21615 }, { "epoch": 1.1770775901383812, "grad_norm": 0.6062855462084796, "learning_rate": 7.640644809291381e-05, "loss": 12.0044, "step": 21616 }, { "epoch": 1.1771320441349642, "grad_norm": 0.5062403174202712, "learning_rate": 7.639787887727474e-05, "loss": 11.9501, "step": 21617 }, { "epoch": 1.1771864981315472, "grad_norm": 0.5440039427500988, "learning_rate": 7.638930984516975e-05, "loss": 12.0606, "step": 21618 }, { "epoch": 1.1772409521281302, "grad_norm": 0.6159668397552996, "learning_rate": 7.638074099666545e-05, "loss": 12.1079, "step": 21619 }, { "epoch": 1.1772954061247132, "grad_norm": 0.5651141397183372, "learning_rate": 7.637217233182845e-05, "loss": 12.1082, "step": 21620 }, { "epoch": 1.1773498601212964, "grad_norm": 0.5454780345859144, "learning_rate": 7.63636038507254e-05, "loss": 11.9098, "step": 21621 }, { "epoch": 1.1774043141178794, "grad_norm": 0.5504587016802348, "learning_rate": 7.635503555342294e-05, "loss": 11.9108, "step": 21622 }, { "epoch": 1.1774587681144624, "grad_norm": 0.5042810541540745, "learning_rate": 7.634646743998765e-05, "loss": 12.0666, "step": 21623 }, { "epoch": 1.1775132221110454, "grad_norm": 0.5973572988736263, "learning_rate": 7.633789951048622e-05, "loss": 12.0896, "step": 21624 }, { "epoch": 1.1775676761076284, "grad_norm": 0.5247466352235615, "learning_rate": 7.632933176498527e-05, "loss": 11.9067, "step": 21625 }, { "epoch": 1.1776221301042114, "grad_norm": 0.6310328034642694, "learning_rate": 7.632076420355139e-05, "loss": 12.0383, "step": 21626 }, { "epoch": 1.1776765841007943, "grad_norm": 0.515311925378349, "learning_rate": 7.631219682625123e-05, "loss": 12.053, "step": 21627 }, { "epoch": 1.1777310380973773, "grad_norm": 0.6070465029960924, "learning_rate": 7.630362963315138e-05, "loss": 12.006, "step": 21628 }, { "epoch": 1.1777854920939603, "grad_norm": 0.5652235188360191, "learning_rate": 7.629506262431852e-05, "loss": 11.9344, "step": 21629 }, { "epoch": 1.1778399460905433, "grad_norm": 0.5773397818903766, "learning_rate": 7.62864957998192e-05, "loss": 12.0128, "step": 21630 }, { "epoch": 1.1778944000871263, "grad_norm": 0.6079772690009896, "learning_rate": 7.627792915972006e-05, "loss": 12.0603, "step": 21631 }, { "epoch": 1.1779488540837093, "grad_norm": 0.5497045741866009, "learning_rate": 7.626936270408774e-05, "loss": 11.927, "step": 21632 }, { "epoch": 1.1780033080802923, "grad_norm": 0.5537199405150324, "learning_rate": 7.626079643298882e-05, "loss": 12.0391, "step": 21633 }, { "epoch": 1.1780577620768755, "grad_norm": 0.5762763130172146, "learning_rate": 7.62522303464899e-05, "loss": 12.0694, "step": 21634 }, { "epoch": 1.1781122160734585, "grad_norm": 0.5431762987688716, "learning_rate": 7.624366444465764e-05, "loss": 11.958, "step": 21635 }, { "epoch": 1.1781666700700415, "grad_norm": 0.5582454172540878, "learning_rate": 7.623509872755866e-05, "loss": 12.1113, "step": 21636 }, { "epoch": 1.1782211240666245, "grad_norm": 0.5281912605122874, "learning_rate": 7.622653319525951e-05, "loss": 12.1244, "step": 21637 }, { "epoch": 1.1782755780632075, "grad_norm": 0.5317912076200442, "learning_rate": 7.621796784782683e-05, "loss": 11.9328, "step": 21638 }, { "epoch": 1.1783300320597905, "grad_norm": 0.5226098405298144, "learning_rate": 7.620940268532724e-05, "loss": 11.9744, "step": 21639 }, { "epoch": 1.1783844860563735, "grad_norm": 0.5211359218380267, "learning_rate": 7.620083770782731e-05, "loss": 11.7664, "step": 21640 }, { "epoch": 1.1784389400529565, "grad_norm": 0.5195296694323369, "learning_rate": 7.619227291539364e-05, "loss": 11.9052, "step": 21641 }, { "epoch": 1.1784933940495395, "grad_norm": 0.5125149024680569, "learning_rate": 7.618370830809287e-05, "loss": 12.0138, "step": 21642 }, { "epoch": 1.1785478480461224, "grad_norm": 0.5779339418846191, "learning_rate": 7.617514388599158e-05, "loss": 12.0039, "step": 21643 }, { "epoch": 1.1786023020427057, "grad_norm": 0.6368765557088946, "learning_rate": 7.616657964915634e-05, "loss": 12.0219, "step": 21644 }, { "epoch": 1.1786567560392887, "grad_norm": 0.6600037004525121, "learning_rate": 7.615801559765378e-05, "loss": 11.9651, "step": 21645 }, { "epoch": 1.1787112100358716, "grad_norm": 0.5504352862620744, "learning_rate": 7.614945173155049e-05, "loss": 12.0223, "step": 21646 }, { "epoch": 1.1787656640324546, "grad_norm": 0.5182725241016743, "learning_rate": 7.614088805091308e-05, "loss": 12.006, "step": 21647 }, { "epoch": 1.1788201180290376, "grad_norm": 0.5330077841931283, "learning_rate": 7.613232455580811e-05, "loss": 11.8409, "step": 21648 }, { "epoch": 1.1788745720256206, "grad_norm": 0.5525951312144873, "learning_rate": 7.612376124630224e-05, "loss": 12.0381, "step": 21649 }, { "epoch": 1.1789290260222036, "grad_norm": 0.5190346790686747, "learning_rate": 7.611519812246194e-05, "loss": 11.9219, "step": 21650 }, { "epoch": 1.1789834800187866, "grad_norm": 0.5979838501324751, "learning_rate": 7.610663518435388e-05, "loss": 12.0164, "step": 21651 }, { "epoch": 1.1790379340153696, "grad_norm": 0.5470158345829987, "learning_rate": 7.609807243204464e-05, "loss": 12.0324, "step": 21652 }, { "epoch": 1.1790923880119526, "grad_norm": 0.5308636105202947, "learning_rate": 7.60895098656008e-05, "loss": 12.0483, "step": 21653 }, { "epoch": 1.1791468420085356, "grad_norm": 0.500336870950848, "learning_rate": 7.608094748508893e-05, "loss": 11.7895, "step": 21654 }, { "epoch": 1.1792012960051186, "grad_norm": 0.5666076552703959, "learning_rate": 7.607238529057563e-05, "loss": 11.9856, "step": 21655 }, { "epoch": 1.1792557500017016, "grad_norm": 0.5528134284284965, "learning_rate": 7.606382328212748e-05, "loss": 12.012, "step": 21656 }, { "epoch": 1.1793102039982848, "grad_norm": 0.5439674157238209, "learning_rate": 7.605526145981103e-05, "loss": 11.9978, "step": 21657 }, { "epoch": 1.1793646579948678, "grad_norm": 0.5572339329752212, "learning_rate": 7.604669982369289e-05, "loss": 11.9457, "step": 21658 }, { "epoch": 1.1794191119914508, "grad_norm": 0.5019183247288584, "learning_rate": 7.603813837383968e-05, "loss": 12.0303, "step": 21659 }, { "epoch": 1.1794735659880338, "grad_norm": 0.5234298019560105, "learning_rate": 7.602957711031788e-05, "loss": 12.0421, "step": 21660 }, { "epoch": 1.1795280199846168, "grad_norm": 0.5988081752598906, "learning_rate": 7.60210160331941e-05, "loss": 12.0151, "step": 21661 }, { "epoch": 1.1795824739811998, "grad_norm": 0.5503929227153116, "learning_rate": 7.601245514253494e-05, "loss": 12.022, "step": 21662 }, { "epoch": 1.1796369279777827, "grad_norm": 0.5875432886014211, "learning_rate": 7.600389443840694e-05, "loss": 12.1746, "step": 21663 }, { "epoch": 1.1796913819743657, "grad_norm": 0.5548592242777497, "learning_rate": 7.599533392087667e-05, "loss": 12.0658, "step": 21664 }, { "epoch": 1.1797458359709487, "grad_norm": 0.5441249909569249, "learning_rate": 7.598677359001074e-05, "loss": 12.0069, "step": 21665 }, { "epoch": 1.1798002899675317, "grad_norm": 0.5411189061569485, "learning_rate": 7.597821344587566e-05, "loss": 11.9706, "step": 21666 }, { "epoch": 1.179854743964115, "grad_norm": 0.5401778057740976, "learning_rate": 7.596965348853804e-05, "loss": 11.9824, "step": 21667 }, { "epoch": 1.179909197960698, "grad_norm": 0.5358619510051359, "learning_rate": 7.59610937180644e-05, "loss": 11.9398, "step": 21668 }, { "epoch": 1.179963651957281, "grad_norm": 0.5755059795425265, "learning_rate": 7.595253413452133e-05, "loss": 11.9483, "step": 21669 }, { "epoch": 1.180018105953864, "grad_norm": 0.6660613258187639, "learning_rate": 7.594397473797545e-05, "loss": 11.9965, "step": 21670 }, { "epoch": 1.180072559950447, "grad_norm": 0.5245059523204031, "learning_rate": 7.593541552849319e-05, "loss": 11.9452, "step": 21671 }, { "epoch": 1.18012701394703, "grad_norm": 0.5805989717286782, "learning_rate": 7.592685650614118e-05, "loss": 12.0659, "step": 21672 }, { "epoch": 1.180181467943613, "grad_norm": 0.5281261993262514, "learning_rate": 7.591829767098598e-05, "loss": 11.9192, "step": 21673 }, { "epoch": 1.1802359219401959, "grad_norm": 0.5152599095149386, "learning_rate": 7.590973902309413e-05, "loss": 12.0361, "step": 21674 }, { "epoch": 1.1802903759367789, "grad_norm": 0.5273053352868992, "learning_rate": 7.59011805625322e-05, "loss": 11.8206, "step": 21675 }, { "epoch": 1.1803448299333619, "grad_norm": 0.6876622776737558, "learning_rate": 7.589262228936674e-05, "loss": 11.9064, "step": 21676 }, { "epoch": 1.1803992839299449, "grad_norm": 0.517094720883418, "learning_rate": 7.588406420366427e-05, "loss": 11.9869, "step": 21677 }, { "epoch": 1.1804537379265279, "grad_norm": 0.6099536909062926, "learning_rate": 7.587550630549136e-05, "loss": 11.8845, "step": 21678 }, { "epoch": 1.1805081919231109, "grad_norm": 0.6056931162179835, "learning_rate": 7.586694859491455e-05, "loss": 12.1122, "step": 21679 }, { "epoch": 1.180562645919694, "grad_norm": 0.5603632374089449, "learning_rate": 7.585839107200046e-05, "loss": 11.96, "step": 21680 }, { "epoch": 1.180617099916277, "grad_norm": 0.5319769323960366, "learning_rate": 7.584983373681552e-05, "loss": 11.9465, "step": 21681 }, { "epoch": 1.18067155391286, "grad_norm": 0.5019727975318679, "learning_rate": 7.584127658942632e-05, "loss": 11.7859, "step": 21682 }, { "epoch": 1.180726007909443, "grad_norm": 0.5351146581359351, "learning_rate": 7.58327196298994e-05, "loss": 12.0504, "step": 21683 }, { "epoch": 1.180780461906026, "grad_norm": 0.5645373231965284, "learning_rate": 7.582416285830132e-05, "loss": 12.011, "step": 21684 }, { "epoch": 1.180834915902609, "grad_norm": 0.5979440946985238, "learning_rate": 7.58156062746986e-05, "loss": 12.0611, "step": 21685 }, { "epoch": 1.180889369899192, "grad_norm": 0.5055138933136387, "learning_rate": 7.580704987915777e-05, "loss": 12.0173, "step": 21686 }, { "epoch": 1.180943823895775, "grad_norm": 0.4761881341162523, "learning_rate": 7.579849367174539e-05, "loss": 11.9772, "step": 21687 }, { "epoch": 1.180998277892358, "grad_norm": 0.6189338825141933, "learning_rate": 7.578993765252798e-05, "loss": 11.8785, "step": 21688 }, { "epoch": 1.181052731888941, "grad_norm": 0.656185572498181, "learning_rate": 7.578138182157208e-05, "loss": 12.0732, "step": 21689 }, { "epoch": 1.181107185885524, "grad_norm": 0.5553444220352391, "learning_rate": 7.577282617894423e-05, "loss": 11.8689, "step": 21690 }, { "epoch": 1.1811616398821072, "grad_norm": 0.6184382676376969, "learning_rate": 7.576427072471093e-05, "loss": 12.1293, "step": 21691 }, { "epoch": 1.1812160938786902, "grad_norm": 0.5651102836980537, "learning_rate": 7.575571545893873e-05, "loss": 12.1049, "step": 21692 }, { "epoch": 1.1812705478752732, "grad_norm": 0.5838191518166306, "learning_rate": 7.574716038169414e-05, "loss": 12.0283, "step": 21693 }, { "epoch": 1.1813250018718562, "grad_norm": 0.5161780662387591, "learning_rate": 7.573860549304368e-05, "loss": 11.9599, "step": 21694 }, { "epoch": 1.1813794558684392, "grad_norm": 0.5102349093103432, "learning_rate": 7.573005079305392e-05, "loss": 11.8245, "step": 21695 }, { "epoch": 1.1814339098650222, "grad_norm": 0.6195600594406752, "learning_rate": 7.572149628179135e-05, "loss": 12.0737, "step": 21696 }, { "epoch": 1.1814883638616052, "grad_norm": 0.639473472962265, "learning_rate": 7.57129419593225e-05, "loss": 12.0344, "step": 21697 }, { "epoch": 1.1815428178581882, "grad_norm": 0.495696446874667, "learning_rate": 7.570438782571388e-05, "loss": 11.9619, "step": 21698 }, { "epoch": 1.1815972718547711, "grad_norm": 0.5520409378708824, "learning_rate": 7.569583388103201e-05, "loss": 11.9519, "step": 21699 }, { "epoch": 1.1816517258513541, "grad_norm": 0.5965402705952677, "learning_rate": 7.568728012534345e-05, "loss": 12.1446, "step": 21700 }, { "epoch": 1.1817061798479371, "grad_norm": 0.5520543923776201, "learning_rate": 7.567872655871464e-05, "loss": 11.9793, "step": 21701 }, { "epoch": 1.1817606338445201, "grad_norm": 0.512609167119209, "learning_rate": 7.567017318121214e-05, "loss": 11.9533, "step": 21702 }, { "epoch": 1.1818150878411031, "grad_norm": 0.5471941486705536, "learning_rate": 7.566161999290246e-05, "loss": 11.9136, "step": 21703 }, { "epoch": 1.1818695418376863, "grad_norm": 0.7079993458364057, "learning_rate": 7.565306699385208e-05, "loss": 11.9342, "step": 21704 }, { "epoch": 1.1819239958342693, "grad_norm": 0.560453505923944, "learning_rate": 7.564451418412756e-05, "loss": 11.854, "step": 21705 }, { "epoch": 1.1819784498308523, "grad_norm": 0.566423257294691, "learning_rate": 7.563596156379536e-05, "loss": 12.0045, "step": 21706 }, { "epoch": 1.1820329038274353, "grad_norm": 0.5686640800327653, "learning_rate": 7.562740913292201e-05, "loss": 11.8475, "step": 21707 }, { "epoch": 1.1820873578240183, "grad_norm": 0.5770219785563593, "learning_rate": 7.561885689157402e-05, "loss": 11.9858, "step": 21708 }, { "epoch": 1.1821418118206013, "grad_norm": 0.530537262039549, "learning_rate": 7.56103048398179e-05, "loss": 11.9835, "step": 21709 }, { "epoch": 1.1821962658171843, "grad_norm": 0.5525937311384657, "learning_rate": 7.560175297772016e-05, "loss": 11.9754, "step": 21710 }, { "epoch": 1.1822507198137673, "grad_norm": 0.5695581815367907, "learning_rate": 7.559320130534724e-05, "loss": 12.0579, "step": 21711 }, { "epoch": 1.1823051738103503, "grad_norm": 0.5483632717658697, "learning_rate": 7.558464982276569e-05, "loss": 11.8674, "step": 21712 }, { "epoch": 1.1823596278069333, "grad_norm": 0.6059415138577536, "learning_rate": 7.5576098530042e-05, "loss": 11.7938, "step": 21713 }, { "epoch": 1.1824140818035165, "grad_norm": 0.5610916969377091, "learning_rate": 7.556754742724267e-05, "loss": 11.7845, "step": 21714 }, { "epoch": 1.1824685358000995, "grad_norm": 0.5227998543254345, "learning_rate": 7.555899651443417e-05, "loss": 12.0602, "step": 21715 }, { "epoch": 1.1825229897966825, "grad_norm": 0.5181757698245862, "learning_rate": 7.555044579168303e-05, "loss": 11.9774, "step": 21716 }, { "epoch": 1.1825774437932655, "grad_norm": 0.5710367409027894, "learning_rate": 7.554189525905569e-05, "loss": 12.0563, "step": 21717 }, { "epoch": 1.1826318977898485, "grad_norm": 0.46707603485019517, "learning_rate": 7.553334491661871e-05, "loss": 11.9401, "step": 21718 }, { "epoch": 1.1826863517864314, "grad_norm": 0.557364000105476, "learning_rate": 7.552479476443854e-05, "loss": 12.0401, "step": 21719 }, { "epoch": 1.1827408057830144, "grad_norm": 0.5968173852566319, "learning_rate": 7.55162448025817e-05, "loss": 11.9308, "step": 21720 }, { "epoch": 1.1827952597795974, "grad_norm": 0.6053471368379904, "learning_rate": 7.550769503111459e-05, "loss": 12.148, "step": 21721 }, { "epoch": 1.1828497137761804, "grad_norm": 0.5733610961834373, "learning_rate": 7.549914545010377e-05, "loss": 11.9982, "step": 21722 }, { "epoch": 1.1829041677727634, "grad_norm": 0.521571558531158, "learning_rate": 7.54905960596157e-05, "loss": 12.0328, "step": 21723 }, { "epoch": 1.1829586217693464, "grad_norm": 0.563622430367173, "learning_rate": 7.548204685971688e-05, "loss": 12.0217, "step": 21724 }, { "epoch": 1.1830130757659294, "grad_norm": 0.5433801146102819, "learning_rate": 7.547349785047376e-05, "loss": 11.926, "step": 21725 }, { "epoch": 1.1830675297625124, "grad_norm": 0.5531502300311864, "learning_rate": 7.546494903195284e-05, "loss": 11.7903, "step": 21726 }, { "epoch": 1.1831219837590956, "grad_norm": 0.5920247200071598, "learning_rate": 7.54564004042206e-05, "loss": 12.1427, "step": 21727 }, { "epoch": 1.1831764377556786, "grad_norm": 0.5443411208538833, "learning_rate": 7.544785196734347e-05, "loss": 12.0634, "step": 21728 }, { "epoch": 1.1832308917522616, "grad_norm": 0.6103023491678193, "learning_rate": 7.543930372138799e-05, "loss": 12.0133, "step": 21729 }, { "epoch": 1.1832853457488446, "grad_norm": 0.5284839116957317, "learning_rate": 7.543075566642063e-05, "loss": 11.9542, "step": 21730 }, { "epoch": 1.1833397997454276, "grad_norm": 0.5370732737599623, "learning_rate": 7.542220780250781e-05, "loss": 11.9737, "step": 21731 }, { "epoch": 1.1833942537420106, "grad_norm": 0.5081119754958391, "learning_rate": 7.5413660129716e-05, "loss": 11.9504, "step": 21732 }, { "epoch": 1.1834487077385936, "grad_norm": 0.5198359969619085, "learning_rate": 7.540511264811172e-05, "loss": 11.9739, "step": 21733 }, { "epoch": 1.1835031617351766, "grad_norm": 0.5294198800766511, "learning_rate": 7.53965653577614e-05, "loss": 12.0196, "step": 21734 }, { "epoch": 1.1835576157317595, "grad_norm": 0.6708523712972554, "learning_rate": 7.538801825873151e-05, "loss": 12.0584, "step": 21735 }, { "epoch": 1.1836120697283425, "grad_norm": 0.5905873501532606, "learning_rate": 7.537947135108852e-05, "loss": 12.0055, "step": 21736 }, { "epoch": 1.1836665237249258, "grad_norm": 0.5280514428250044, "learning_rate": 7.537092463489888e-05, "loss": 11.9186, "step": 21737 }, { "epoch": 1.1837209777215087, "grad_norm": 0.511630308174885, "learning_rate": 7.536237811022908e-05, "loss": 11.8857, "step": 21738 }, { "epoch": 1.1837754317180917, "grad_norm": 0.5773849467589317, "learning_rate": 7.535383177714553e-05, "loss": 12.0339, "step": 21739 }, { "epoch": 1.1838298857146747, "grad_norm": 0.5902880598466377, "learning_rate": 7.534528563571478e-05, "loss": 12.0105, "step": 21740 }, { "epoch": 1.1838843397112577, "grad_norm": 0.5576658102997276, "learning_rate": 7.533673968600317e-05, "loss": 11.8993, "step": 21741 }, { "epoch": 1.1839387937078407, "grad_norm": 0.5011873542817606, "learning_rate": 7.532819392807723e-05, "loss": 11.9773, "step": 21742 }, { "epoch": 1.1839932477044237, "grad_norm": 0.5056615027385634, "learning_rate": 7.531964836200336e-05, "loss": 11.9591, "step": 21743 }, { "epoch": 1.1840477017010067, "grad_norm": 0.5902346509462059, "learning_rate": 7.531110298784807e-05, "loss": 12.0789, "step": 21744 }, { "epoch": 1.1841021556975897, "grad_norm": 0.5357587708500191, "learning_rate": 7.530255780567777e-05, "loss": 11.9819, "step": 21745 }, { "epoch": 1.1841566096941727, "grad_norm": 0.5714466400676492, "learning_rate": 7.529401281555892e-05, "loss": 11.9447, "step": 21746 }, { "epoch": 1.1842110636907557, "grad_norm": 0.6919689987784267, "learning_rate": 7.528546801755799e-05, "loss": 12.066, "step": 21747 }, { "epoch": 1.1842655176873387, "grad_norm": 0.569462809235025, "learning_rate": 7.52769234117414e-05, "loss": 12.0948, "step": 21748 }, { "epoch": 1.1843199716839217, "grad_norm": 0.5582325725628782, "learning_rate": 7.526837899817559e-05, "loss": 11.9375, "step": 21749 }, { "epoch": 1.1843744256805049, "grad_norm": 0.5830476014734609, "learning_rate": 7.525983477692703e-05, "loss": 12.0411, "step": 21750 }, { "epoch": 1.1844288796770879, "grad_norm": 0.6654803020952056, "learning_rate": 7.525129074806213e-05, "loss": 12.0759, "step": 21751 }, { "epoch": 1.1844833336736709, "grad_norm": 0.5128129143192934, "learning_rate": 7.524274691164734e-05, "loss": 11.9515, "step": 21752 }, { "epoch": 1.1845377876702539, "grad_norm": 0.6029074652028825, "learning_rate": 7.523420326774911e-05, "loss": 11.9985, "step": 21753 }, { "epoch": 1.1845922416668369, "grad_norm": 0.5067885544790296, "learning_rate": 7.522565981643387e-05, "loss": 11.8589, "step": 21754 }, { "epoch": 1.1846466956634198, "grad_norm": 0.5158502405510174, "learning_rate": 7.521711655776802e-05, "loss": 11.9687, "step": 21755 }, { "epoch": 1.1847011496600028, "grad_norm": 0.5380068678218914, "learning_rate": 7.520857349181806e-05, "loss": 12.0553, "step": 21756 }, { "epoch": 1.1847556036565858, "grad_norm": 0.5258209740523851, "learning_rate": 7.520003061865038e-05, "loss": 11.9678, "step": 21757 }, { "epoch": 1.1848100576531688, "grad_norm": 0.5674122484543469, "learning_rate": 7.519148793833143e-05, "loss": 12.027, "step": 21758 }, { "epoch": 1.1848645116497518, "grad_norm": 0.5415370607754529, "learning_rate": 7.518294545092763e-05, "loss": 11.8541, "step": 21759 }, { "epoch": 1.1849189656463348, "grad_norm": 0.5215861042928728, "learning_rate": 7.517440315650542e-05, "loss": 11.8873, "step": 21760 }, { "epoch": 1.184973419642918, "grad_norm": 0.7521342107961826, "learning_rate": 7.51658610551312e-05, "loss": 12.246, "step": 21761 }, { "epoch": 1.185027873639501, "grad_norm": 0.5522051040985905, "learning_rate": 7.51573191468714e-05, "loss": 11.9465, "step": 21762 }, { "epoch": 1.185082327636084, "grad_norm": 0.5986971920610508, "learning_rate": 7.514877743179248e-05, "loss": 12.0501, "step": 21763 }, { "epoch": 1.185136781632667, "grad_norm": 0.6577498856361116, "learning_rate": 7.514023590996081e-05, "loss": 12.0744, "step": 21764 }, { "epoch": 1.18519123562925, "grad_norm": 0.5709753816871418, "learning_rate": 7.513169458144284e-05, "loss": 11.8767, "step": 21765 }, { "epoch": 1.185245689625833, "grad_norm": 0.5424432614333545, "learning_rate": 7.512315344630496e-05, "loss": 11.942, "step": 21766 }, { "epoch": 1.185300143622416, "grad_norm": 0.5524916153826994, "learning_rate": 7.511461250461364e-05, "loss": 12.0522, "step": 21767 }, { "epoch": 1.185354597618999, "grad_norm": 0.5386178542301641, "learning_rate": 7.510607175643525e-05, "loss": 12.0999, "step": 21768 }, { "epoch": 1.185409051615582, "grad_norm": 0.5302354868198255, "learning_rate": 7.509753120183624e-05, "loss": 12.0432, "step": 21769 }, { "epoch": 1.185463505612165, "grad_norm": 0.5581106828952281, "learning_rate": 7.508899084088301e-05, "loss": 12.0233, "step": 21770 }, { "epoch": 1.185517959608748, "grad_norm": 0.5425977101660957, "learning_rate": 7.508045067364194e-05, "loss": 12.047, "step": 21771 }, { "epoch": 1.185572413605331, "grad_norm": 0.593887575565652, "learning_rate": 7.507191070017948e-05, "loss": 11.974, "step": 21772 }, { "epoch": 1.185626867601914, "grad_norm": 0.5225432690410278, "learning_rate": 7.506337092056202e-05, "loss": 12.0737, "step": 21773 }, { "epoch": 1.1856813215984972, "grad_norm": 0.6143012266055953, "learning_rate": 7.505483133485595e-05, "loss": 11.9907, "step": 21774 }, { "epoch": 1.1857357755950801, "grad_norm": 0.5629345433031676, "learning_rate": 7.504629194312773e-05, "loss": 11.9683, "step": 21775 }, { "epoch": 1.1857902295916631, "grad_norm": 0.581947804657824, "learning_rate": 7.50377527454437e-05, "loss": 11.9701, "step": 21776 }, { "epoch": 1.1858446835882461, "grad_norm": 0.5147110297629652, "learning_rate": 7.502921374187029e-05, "loss": 11.9844, "step": 21777 }, { "epoch": 1.1858991375848291, "grad_norm": 0.5407679106530497, "learning_rate": 7.50206749324739e-05, "loss": 11.8965, "step": 21778 }, { "epoch": 1.1859535915814121, "grad_norm": 0.5643712912136952, "learning_rate": 7.501213631732095e-05, "loss": 12.0641, "step": 21779 }, { "epoch": 1.186008045577995, "grad_norm": 0.6000764496420661, "learning_rate": 7.500359789647785e-05, "loss": 12.1549, "step": 21780 }, { "epoch": 1.186062499574578, "grad_norm": 0.5520801822640712, "learning_rate": 7.49950596700109e-05, "loss": 11.9133, "step": 21781 }, { "epoch": 1.186116953571161, "grad_norm": 0.6109658539324219, "learning_rate": 7.498652163798658e-05, "loss": 11.9836, "step": 21782 }, { "epoch": 1.186171407567744, "grad_norm": 0.5252949920892036, "learning_rate": 7.497798380047127e-05, "loss": 12.1093, "step": 21783 }, { "epoch": 1.1862258615643273, "grad_norm": 0.6179409760128146, "learning_rate": 7.496944615753136e-05, "loss": 12.0782, "step": 21784 }, { "epoch": 1.1862803155609103, "grad_norm": 0.5840217304614371, "learning_rate": 7.496090870923323e-05, "loss": 11.9846, "step": 21785 }, { "epoch": 1.1863347695574933, "grad_norm": 0.594856080206936, "learning_rate": 7.495237145564327e-05, "loss": 12.0495, "step": 21786 }, { "epoch": 1.1863892235540763, "grad_norm": 0.5039334530956467, "learning_rate": 7.494383439682787e-05, "loss": 11.902, "step": 21787 }, { "epoch": 1.1864436775506593, "grad_norm": 0.48203965829591866, "learning_rate": 7.493529753285339e-05, "loss": 11.7758, "step": 21788 }, { "epoch": 1.1864981315472423, "grad_norm": 0.5000380138956201, "learning_rate": 7.492676086378626e-05, "loss": 11.9807, "step": 21789 }, { "epoch": 1.1865525855438253, "grad_norm": 0.5560242956456776, "learning_rate": 7.491822438969289e-05, "loss": 11.9622, "step": 21790 }, { "epoch": 1.1866070395404082, "grad_norm": 0.561093459286848, "learning_rate": 7.490968811063956e-05, "loss": 11.9649, "step": 21791 }, { "epoch": 1.1866614935369912, "grad_norm": 0.569981991409385, "learning_rate": 7.49011520266927e-05, "loss": 12.2138, "step": 21792 }, { "epoch": 1.1867159475335742, "grad_norm": 0.5575985421141544, "learning_rate": 7.489261613791871e-05, "loss": 11.9822, "step": 21793 }, { "epoch": 1.1867704015301572, "grad_norm": 0.4961472956487318, "learning_rate": 7.488408044438393e-05, "loss": 12.0297, "step": 21794 }, { "epoch": 1.1868248555267402, "grad_norm": 0.5311876448727464, "learning_rate": 7.487554494615476e-05, "loss": 12.059, "step": 21795 }, { "epoch": 1.1868793095233232, "grad_norm": 0.5626886924773438, "learning_rate": 7.486700964329758e-05, "loss": 11.9969, "step": 21796 }, { "epoch": 1.1869337635199064, "grad_norm": 0.5843387177835994, "learning_rate": 7.485847453587873e-05, "loss": 12.0714, "step": 21797 }, { "epoch": 1.1869882175164894, "grad_norm": 0.5189952502279995, "learning_rate": 7.48499396239646e-05, "loss": 12.1539, "step": 21798 }, { "epoch": 1.1870426715130724, "grad_norm": 0.5209743899056666, "learning_rate": 7.484140490762158e-05, "loss": 11.8418, "step": 21799 }, { "epoch": 1.1870971255096554, "grad_norm": 0.4994690385578024, "learning_rate": 7.483287038691601e-05, "loss": 11.8865, "step": 21800 }, { "epoch": 1.1871515795062384, "grad_norm": 0.5424976862399479, "learning_rate": 7.482433606191426e-05, "loss": 12.0755, "step": 21801 }, { "epoch": 1.1872060335028214, "grad_norm": 0.5428216072561727, "learning_rate": 7.481580193268267e-05, "loss": 11.8406, "step": 21802 }, { "epoch": 1.1872604874994044, "grad_norm": 0.611819389257739, "learning_rate": 7.480726799928764e-05, "loss": 11.9912, "step": 21803 }, { "epoch": 1.1873149414959874, "grad_norm": 0.5115133464397964, "learning_rate": 7.47987342617955e-05, "loss": 12.0168, "step": 21804 }, { "epoch": 1.1873693954925704, "grad_norm": 0.6358308408573634, "learning_rate": 7.479020072027266e-05, "loss": 12.166, "step": 21805 }, { "epoch": 1.1874238494891534, "grad_norm": 0.5419860484051422, "learning_rate": 7.478166737478543e-05, "loss": 12.1022, "step": 21806 }, { "epoch": 1.1874783034857366, "grad_norm": 0.50538323000821, "learning_rate": 7.477313422540017e-05, "loss": 11.9044, "step": 21807 }, { "epoch": 1.1875327574823196, "grad_norm": 0.5468604627982997, "learning_rate": 7.476460127218328e-05, "loss": 12.081, "step": 21808 }, { "epoch": 1.1875872114789026, "grad_norm": 0.5568421073854486, "learning_rate": 7.475606851520107e-05, "loss": 12.0435, "step": 21809 }, { "epoch": 1.1876416654754856, "grad_norm": 0.6510994234584724, "learning_rate": 7.474753595451992e-05, "loss": 12.0943, "step": 21810 }, { "epoch": 1.1876961194720685, "grad_norm": 0.5522041966154742, "learning_rate": 7.473900359020615e-05, "loss": 12.0387, "step": 21811 }, { "epoch": 1.1877505734686515, "grad_norm": 0.6119962614850255, "learning_rate": 7.473047142232611e-05, "loss": 12.0829, "step": 21812 }, { "epoch": 1.1878050274652345, "grad_norm": 0.5305426444618092, "learning_rate": 7.472193945094619e-05, "loss": 12.0808, "step": 21813 }, { "epoch": 1.1878594814618175, "grad_norm": 0.6337365180981813, "learning_rate": 7.47134076761327e-05, "loss": 12.129, "step": 21814 }, { "epoch": 1.1879139354584005, "grad_norm": 0.5250699532031685, "learning_rate": 7.470487609795197e-05, "loss": 11.983, "step": 21815 }, { "epoch": 1.1879683894549835, "grad_norm": 0.5471512499498226, "learning_rate": 7.469634471647037e-05, "loss": 12.1291, "step": 21816 }, { "epoch": 1.1880228434515665, "grad_norm": 0.5467733525912786, "learning_rate": 7.468781353175425e-05, "loss": 11.9967, "step": 21817 }, { "epoch": 1.1880772974481495, "grad_norm": 0.50643789381646, "learning_rate": 7.467928254386993e-05, "loss": 11.8694, "step": 21818 }, { "epoch": 1.1881317514447325, "grad_norm": 0.6046764353226523, "learning_rate": 7.467075175288377e-05, "loss": 12.0005, "step": 21819 }, { "epoch": 1.1881862054413157, "grad_norm": 0.5516677753495105, "learning_rate": 7.466222115886208e-05, "loss": 11.8673, "step": 21820 }, { "epoch": 1.1882406594378987, "grad_norm": 0.5172722454661293, "learning_rate": 7.465369076187124e-05, "loss": 11.9521, "step": 21821 }, { "epoch": 1.1882951134344817, "grad_norm": 0.49449399673176725, "learning_rate": 7.46451605619775e-05, "loss": 11.9785, "step": 21822 }, { "epoch": 1.1883495674310647, "grad_norm": 0.6243888753730924, "learning_rate": 7.463663055924728e-05, "loss": 11.9585, "step": 21823 }, { "epoch": 1.1884040214276477, "grad_norm": 0.5698569554828076, "learning_rate": 7.462810075374685e-05, "loss": 11.8915, "step": 21824 }, { "epoch": 1.1884584754242307, "grad_norm": 0.5130897372640837, "learning_rate": 7.461957114554256e-05, "loss": 12.0606, "step": 21825 }, { "epoch": 1.1885129294208137, "grad_norm": 0.585214140568694, "learning_rate": 7.461104173470075e-05, "loss": 12.0466, "step": 21826 }, { "epoch": 1.1885673834173967, "grad_norm": 0.8057724923497462, "learning_rate": 7.460251252128774e-05, "loss": 12.0085, "step": 21827 }, { "epoch": 1.1886218374139796, "grad_norm": 0.5206311767301525, "learning_rate": 7.459398350536985e-05, "loss": 12.0089, "step": 21828 }, { "epoch": 1.1886762914105626, "grad_norm": 0.5250316044303713, "learning_rate": 7.458545468701341e-05, "loss": 12.0066, "step": 21829 }, { "epoch": 1.1887307454071459, "grad_norm": 0.5309614924213232, "learning_rate": 7.457692606628473e-05, "loss": 11.9824, "step": 21830 }, { "epoch": 1.1887851994037288, "grad_norm": 0.6201426355334324, "learning_rate": 7.456839764325016e-05, "loss": 11.9513, "step": 21831 }, { "epoch": 1.1888396534003118, "grad_norm": 0.6264038379079032, "learning_rate": 7.455986941797597e-05, "loss": 12.1016, "step": 21832 }, { "epoch": 1.1888941073968948, "grad_norm": 0.5442003806645258, "learning_rate": 7.45513413905285e-05, "loss": 12.0651, "step": 21833 }, { "epoch": 1.1889485613934778, "grad_norm": 0.5446554848843562, "learning_rate": 7.454281356097407e-05, "loss": 11.9691, "step": 21834 }, { "epoch": 1.1890030153900608, "grad_norm": 0.6018851851438473, "learning_rate": 7.453428592937901e-05, "loss": 12.0177, "step": 21835 }, { "epoch": 1.1890574693866438, "grad_norm": 0.5436708939061847, "learning_rate": 7.45257584958096e-05, "loss": 11.8384, "step": 21836 }, { "epoch": 1.1891119233832268, "grad_norm": 0.5096974488486813, "learning_rate": 7.451723126033214e-05, "loss": 11.8501, "step": 21837 }, { "epoch": 1.1891663773798098, "grad_norm": 0.6681927843726951, "learning_rate": 7.450870422301298e-05, "loss": 12.0757, "step": 21838 }, { "epoch": 1.1892208313763928, "grad_norm": 0.5688654042324359, "learning_rate": 7.450017738391841e-05, "loss": 11.7817, "step": 21839 }, { "epoch": 1.1892752853729758, "grad_norm": 0.5095351042546655, "learning_rate": 7.449165074311475e-05, "loss": 11.9059, "step": 21840 }, { "epoch": 1.1893297393695588, "grad_norm": 0.5143100203772302, "learning_rate": 7.448312430066831e-05, "loss": 12.0267, "step": 21841 }, { "epoch": 1.1893841933661418, "grad_norm": 0.5155825069420525, "learning_rate": 7.447459805664534e-05, "loss": 11.8781, "step": 21842 }, { "epoch": 1.1894386473627248, "grad_norm": 0.5223028192969036, "learning_rate": 7.446607201111219e-05, "loss": 12.0149, "step": 21843 }, { "epoch": 1.189493101359308, "grad_norm": 0.5689111220899553, "learning_rate": 7.445754616413514e-05, "loss": 11.7965, "step": 21844 }, { "epoch": 1.189547555355891, "grad_norm": 0.5745430197939587, "learning_rate": 7.444902051578049e-05, "loss": 12.1652, "step": 21845 }, { "epoch": 1.189602009352474, "grad_norm": 0.5448431461543952, "learning_rate": 7.444049506611454e-05, "loss": 11.8951, "step": 21846 }, { "epoch": 1.189656463349057, "grad_norm": 0.5872918584595909, "learning_rate": 7.44319698152036e-05, "loss": 12.0168, "step": 21847 }, { "epoch": 1.18971091734564, "grad_norm": 0.5226448915531637, "learning_rate": 7.442344476311393e-05, "loss": 12.0349, "step": 21848 }, { "epoch": 1.189765371342223, "grad_norm": 0.5432356020572908, "learning_rate": 7.441491990991185e-05, "loss": 11.9819, "step": 21849 }, { "epoch": 1.189819825338806, "grad_norm": 0.5628765091838128, "learning_rate": 7.440639525566365e-05, "loss": 12.068, "step": 21850 }, { "epoch": 1.189874279335389, "grad_norm": 0.531129064044447, "learning_rate": 7.439787080043565e-05, "loss": 11.977, "step": 21851 }, { "epoch": 1.189928733331972, "grad_norm": 0.5306473030453958, "learning_rate": 7.438934654429407e-05, "loss": 11.9767, "step": 21852 }, { "epoch": 1.189983187328555, "grad_norm": 0.6468496902013098, "learning_rate": 7.438082248730521e-05, "loss": 12.0752, "step": 21853 }, { "epoch": 1.1900376413251381, "grad_norm": 0.662395989981953, "learning_rate": 7.437229862953536e-05, "loss": 12.1451, "step": 21854 }, { "epoch": 1.190092095321721, "grad_norm": 0.5634303787723975, "learning_rate": 7.436377497105085e-05, "loss": 11.8939, "step": 21855 }, { "epoch": 1.190146549318304, "grad_norm": 0.5284039031704981, "learning_rate": 7.43552515119179e-05, "loss": 12.0463, "step": 21856 }, { "epoch": 1.190201003314887, "grad_norm": 0.5481202976593118, "learning_rate": 7.434672825220283e-05, "loss": 12.0324, "step": 21857 }, { "epoch": 1.19025545731147, "grad_norm": 0.5671166962659867, "learning_rate": 7.433820519197189e-05, "loss": 12.0328, "step": 21858 }, { "epoch": 1.190309911308053, "grad_norm": 0.552763651588234, "learning_rate": 7.432968233129139e-05, "loss": 12.0551, "step": 21859 }, { "epoch": 1.190364365304636, "grad_norm": 0.5391150052421613, "learning_rate": 7.432115967022754e-05, "loss": 11.999, "step": 21860 }, { "epoch": 1.190418819301219, "grad_norm": 0.5515344211903856, "learning_rate": 7.431263720884674e-05, "loss": 11.9639, "step": 21861 }, { "epoch": 1.190473273297802, "grad_norm": 0.4927736099365677, "learning_rate": 7.430411494721512e-05, "loss": 12.0217, "step": 21862 }, { "epoch": 1.190527727294385, "grad_norm": 0.6616589330031137, "learning_rate": 7.429559288539903e-05, "loss": 12.0721, "step": 21863 }, { "epoch": 1.190582181290968, "grad_norm": 0.5585836406806164, "learning_rate": 7.428707102346469e-05, "loss": 11.963, "step": 21864 }, { "epoch": 1.190636635287551, "grad_norm": 0.5778445956640136, "learning_rate": 7.427854936147841e-05, "loss": 12.1283, "step": 21865 }, { "epoch": 1.190691089284134, "grad_norm": 0.5773331365814527, "learning_rate": 7.427002789950645e-05, "loss": 12.102, "step": 21866 }, { "epoch": 1.1907455432807172, "grad_norm": 0.6090155689074539, "learning_rate": 7.426150663761508e-05, "loss": 12.058, "step": 21867 }, { "epoch": 1.1907999972773002, "grad_norm": 0.5268971066809944, "learning_rate": 7.425298557587054e-05, "loss": 11.9267, "step": 21868 }, { "epoch": 1.1908544512738832, "grad_norm": 0.5352024391379133, "learning_rate": 7.42444647143391e-05, "loss": 11.8219, "step": 21869 }, { "epoch": 1.1909089052704662, "grad_norm": 0.5267535550203765, "learning_rate": 7.423594405308703e-05, "loss": 11.9724, "step": 21870 }, { "epoch": 1.1909633592670492, "grad_norm": 0.5316513047581957, "learning_rate": 7.42274235921806e-05, "loss": 12.0106, "step": 21871 }, { "epoch": 1.1910178132636322, "grad_norm": 0.5899821522090289, "learning_rate": 7.421890333168602e-05, "loss": 12.1346, "step": 21872 }, { "epoch": 1.1910722672602152, "grad_norm": 0.5992321160668765, "learning_rate": 7.421038327166958e-05, "loss": 12.0992, "step": 21873 }, { "epoch": 1.1911267212567982, "grad_norm": 0.5220059104085887, "learning_rate": 7.420186341219751e-05, "loss": 12.0391, "step": 21874 }, { "epoch": 1.1911811752533812, "grad_norm": 0.5483952568272, "learning_rate": 7.419334375333606e-05, "loss": 12.0126, "step": 21875 }, { "epoch": 1.1912356292499642, "grad_norm": 0.5386623185595957, "learning_rate": 7.418482429515152e-05, "loss": 12.078, "step": 21876 }, { "epoch": 1.1912900832465474, "grad_norm": 0.5439532173277934, "learning_rate": 7.417630503771013e-05, "loss": 12.0004, "step": 21877 }, { "epoch": 1.1913445372431304, "grad_norm": 0.5736166552730142, "learning_rate": 7.41677859810781e-05, "loss": 12.0908, "step": 21878 }, { "epoch": 1.1913989912397134, "grad_norm": 0.5648809372426857, "learning_rate": 7.41592671253217e-05, "loss": 11.9778, "step": 21879 }, { "epoch": 1.1914534452362964, "grad_norm": 0.48510183035173565, "learning_rate": 7.41507484705072e-05, "loss": 11.9495, "step": 21880 }, { "epoch": 1.1915078992328794, "grad_norm": 0.5211179210723154, "learning_rate": 7.41422300167008e-05, "loss": 11.9781, "step": 21881 }, { "epoch": 1.1915623532294624, "grad_norm": 0.5299213249093413, "learning_rate": 7.413371176396876e-05, "loss": 12.0785, "step": 21882 }, { "epoch": 1.1916168072260453, "grad_norm": 0.49308108682965807, "learning_rate": 7.41251937123773e-05, "loss": 11.9841, "step": 21883 }, { "epoch": 1.1916712612226283, "grad_norm": 0.5342328033936905, "learning_rate": 7.41166758619927e-05, "loss": 11.8857, "step": 21884 }, { "epoch": 1.1917257152192113, "grad_norm": 0.5273168195257198, "learning_rate": 7.410815821288113e-05, "loss": 12.0555, "step": 21885 }, { "epoch": 1.1917801692157943, "grad_norm": 0.5528610747944571, "learning_rate": 7.409964076510886e-05, "loss": 11.9563, "step": 21886 }, { "epoch": 1.1918346232123773, "grad_norm": 0.5396212993556745, "learning_rate": 7.409112351874214e-05, "loss": 11.9265, "step": 21887 }, { "epoch": 1.1918890772089603, "grad_norm": 0.5604629835030344, "learning_rate": 7.40826064738472e-05, "loss": 11.9565, "step": 21888 }, { "epoch": 1.1919435312055433, "grad_norm": 0.5256247292380115, "learning_rate": 7.407408963049027e-05, "loss": 11.9664, "step": 21889 }, { "epoch": 1.1919979852021265, "grad_norm": 0.5554092422140184, "learning_rate": 7.406557298873754e-05, "loss": 11.9756, "step": 21890 }, { "epoch": 1.1920524391987095, "grad_norm": 0.5385375597876597, "learning_rate": 7.40570565486553e-05, "loss": 11.9556, "step": 21891 }, { "epoch": 1.1921068931952925, "grad_norm": 0.5466236276669121, "learning_rate": 7.404854031030971e-05, "loss": 11.9882, "step": 21892 }, { "epoch": 1.1921613471918755, "grad_norm": 0.610434010639925, "learning_rate": 7.404002427376703e-05, "loss": 11.9038, "step": 21893 }, { "epoch": 1.1922158011884585, "grad_norm": 0.5481300770223017, "learning_rate": 7.403150843909348e-05, "loss": 11.7977, "step": 21894 }, { "epoch": 1.1922702551850415, "grad_norm": 0.5432539067052593, "learning_rate": 7.402299280635526e-05, "loss": 11.8553, "step": 21895 }, { "epoch": 1.1923247091816245, "grad_norm": 0.5326296538994771, "learning_rate": 7.401447737561862e-05, "loss": 12.0045, "step": 21896 }, { "epoch": 1.1923791631782075, "grad_norm": 0.48761894352387997, "learning_rate": 7.400596214694973e-05, "loss": 11.9545, "step": 21897 }, { "epoch": 1.1924336171747905, "grad_norm": 0.5777590971349343, "learning_rate": 7.399744712041485e-05, "loss": 12.0196, "step": 21898 }, { "epoch": 1.1924880711713735, "grad_norm": 0.5979944522655282, "learning_rate": 7.398893229608019e-05, "loss": 11.9432, "step": 21899 }, { "epoch": 1.1925425251679567, "grad_norm": 0.5351970404514349, "learning_rate": 7.398041767401196e-05, "loss": 11.9262, "step": 21900 }, { "epoch": 1.1925969791645397, "grad_norm": 0.5851772163331859, "learning_rate": 7.397190325427641e-05, "loss": 12.0415, "step": 21901 }, { "epoch": 1.1926514331611227, "grad_norm": 0.5520233932528713, "learning_rate": 7.396338903693964e-05, "loss": 11.9481, "step": 21902 }, { "epoch": 1.1927058871577056, "grad_norm": 0.5674022158261073, "learning_rate": 7.395487502206794e-05, "loss": 11.8746, "step": 21903 }, { "epoch": 1.1927603411542886, "grad_norm": 0.5574839517552891, "learning_rate": 7.394636120972749e-05, "loss": 11.9236, "step": 21904 }, { "epoch": 1.1928147951508716, "grad_norm": 0.5526424355538554, "learning_rate": 7.393784759998452e-05, "loss": 11.8955, "step": 21905 }, { "epoch": 1.1928692491474546, "grad_norm": 0.5830260492488549, "learning_rate": 7.392933419290522e-05, "loss": 11.9274, "step": 21906 }, { "epoch": 1.1929237031440376, "grad_norm": 0.5597230753997932, "learning_rate": 7.392082098855577e-05, "loss": 12.0131, "step": 21907 }, { "epoch": 1.1929781571406206, "grad_norm": 0.6554421652823247, "learning_rate": 7.391230798700242e-05, "loss": 12.031, "step": 21908 }, { "epoch": 1.1930326111372036, "grad_norm": 0.5903428092902078, "learning_rate": 7.390379518831129e-05, "loss": 12.0472, "step": 21909 }, { "epoch": 1.1930870651337866, "grad_norm": 0.5491708507929751, "learning_rate": 7.389528259254866e-05, "loss": 11.9285, "step": 21910 }, { "epoch": 1.1931415191303696, "grad_norm": 0.5961736138488625, "learning_rate": 7.388677019978072e-05, "loss": 11.9876, "step": 21911 }, { "epoch": 1.1931959731269526, "grad_norm": 0.5362989632073789, "learning_rate": 7.387825801007359e-05, "loss": 11.9254, "step": 21912 }, { "epoch": 1.1932504271235358, "grad_norm": 0.59063688744146, "learning_rate": 7.38697460234935e-05, "loss": 12.0482, "step": 21913 }, { "epoch": 1.1933048811201188, "grad_norm": 0.538421744485904, "learning_rate": 7.386123424010667e-05, "loss": 11.9666, "step": 21914 }, { "epoch": 1.1933593351167018, "grad_norm": 0.5636419177521339, "learning_rate": 7.385272265997924e-05, "loss": 11.8589, "step": 21915 }, { "epoch": 1.1934137891132848, "grad_norm": 0.5653417044781114, "learning_rate": 7.384421128317746e-05, "loss": 11.9675, "step": 21916 }, { "epoch": 1.1934682431098678, "grad_norm": 0.5327041947186542, "learning_rate": 7.383570010976746e-05, "loss": 11.9112, "step": 21917 }, { "epoch": 1.1935226971064508, "grad_norm": 0.5864609024597708, "learning_rate": 7.382718913981543e-05, "loss": 11.9947, "step": 21918 }, { "epoch": 1.1935771511030338, "grad_norm": 0.6286781208678304, "learning_rate": 7.381867837338758e-05, "loss": 11.9866, "step": 21919 }, { "epoch": 1.1936316050996167, "grad_norm": 0.5571969791110689, "learning_rate": 7.381016781055007e-05, "loss": 11.8934, "step": 21920 }, { "epoch": 1.1936860590961997, "grad_norm": 0.5391164813349384, "learning_rate": 7.380165745136914e-05, "loss": 11.9263, "step": 21921 }, { "epoch": 1.1937405130927827, "grad_norm": 0.5685581972310029, "learning_rate": 7.379314729591086e-05, "loss": 11.9015, "step": 21922 }, { "epoch": 1.1937949670893657, "grad_norm": 0.5347922702222191, "learning_rate": 7.378463734424148e-05, "loss": 11.9034, "step": 21923 }, { "epoch": 1.193849421085949, "grad_norm": 0.5334756761875965, "learning_rate": 7.377612759642714e-05, "loss": 11.8916, "step": 21924 }, { "epoch": 1.193903875082532, "grad_norm": 0.5378746969796901, "learning_rate": 7.376761805253402e-05, "loss": 11.9487, "step": 21925 }, { "epoch": 1.193958329079115, "grad_norm": 0.5632364163634869, "learning_rate": 7.375910871262832e-05, "loss": 11.9742, "step": 21926 }, { "epoch": 1.194012783075698, "grad_norm": 0.5509891768269785, "learning_rate": 7.37505995767762e-05, "loss": 11.9957, "step": 21927 }, { "epoch": 1.194067237072281, "grad_norm": 0.5028240155737436, "learning_rate": 7.37420906450438e-05, "loss": 11.9756, "step": 21928 }, { "epoch": 1.194121691068864, "grad_norm": 0.5700726798776007, "learning_rate": 7.373358191749732e-05, "loss": 11.9226, "step": 21929 }, { "epoch": 1.194176145065447, "grad_norm": 0.640326226880041, "learning_rate": 7.372507339420291e-05, "loss": 12.0049, "step": 21930 }, { "epoch": 1.1942305990620299, "grad_norm": 0.5734539135195217, "learning_rate": 7.371656507522676e-05, "loss": 12.0835, "step": 21931 }, { "epoch": 1.1942850530586129, "grad_norm": 0.5266682529185589, "learning_rate": 7.370805696063499e-05, "loss": 11.9226, "step": 21932 }, { "epoch": 1.1943395070551959, "grad_norm": 0.5321589207592013, "learning_rate": 7.369954905049376e-05, "loss": 11.8316, "step": 21933 }, { "epoch": 1.1943939610517789, "grad_norm": 0.6018036908920806, "learning_rate": 7.369104134486926e-05, "loss": 11.9235, "step": 21934 }, { "epoch": 1.1944484150483619, "grad_norm": 0.5212117869171325, "learning_rate": 7.368253384382761e-05, "loss": 12.0144, "step": 21935 }, { "epoch": 1.1945028690449448, "grad_norm": 0.55760427495541, "learning_rate": 7.367402654743503e-05, "loss": 11.8912, "step": 21936 }, { "epoch": 1.194557323041528, "grad_norm": 0.5723866896650784, "learning_rate": 7.36655194557576e-05, "loss": 12.0242, "step": 21937 }, { "epoch": 1.194611777038111, "grad_norm": 0.6439026464612314, "learning_rate": 7.365701256886152e-05, "loss": 12.0879, "step": 21938 }, { "epoch": 1.194666231034694, "grad_norm": 0.6360830880730395, "learning_rate": 7.364850588681293e-05, "loss": 11.77, "step": 21939 }, { "epoch": 1.194720685031277, "grad_norm": 0.5790733971628051, "learning_rate": 7.363999940967799e-05, "loss": 12.0057, "step": 21940 }, { "epoch": 1.19477513902786, "grad_norm": 0.5885949248156588, "learning_rate": 7.363149313752284e-05, "loss": 11.979, "step": 21941 }, { "epoch": 1.194829593024443, "grad_norm": 0.5387787434352426, "learning_rate": 7.362298707041361e-05, "loss": 11.9989, "step": 21942 }, { "epoch": 1.194884047021026, "grad_norm": 0.5671488692927811, "learning_rate": 7.361448120841645e-05, "loss": 11.9839, "step": 21943 }, { "epoch": 1.194938501017609, "grad_norm": 0.5458994071998429, "learning_rate": 7.360597555159752e-05, "loss": 12.0155, "step": 21944 }, { "epoch": 1.194992955014192, "grad_norm": 0.5285626015856262, "learning_rate": 7.359747010002294e-05, "loss": 11.8917, "step": 21945 }, { "epoch": 1.195047409010775, "grad_norm": 0.5591984973781156, "learning_rate": 7.358896485375883e-05, "loss": 11.9563, "step": 21946 }, { "epoch": 1.1951018630073582, "grad_norm": 0.5750925795714951, "learning_rate": 7.358045981287141e-05, "loss": 12.0029, "step": 21947 }, { "epoch": 1.1951563170039412, "grad_norm": 0.5418965446382998, "learning_rate": 7.357195497742673e-05, "loss": 11.9467, "step": 21948 }, { "epoch": 1.1952107710005242, "grad_norm": 0.539027786051126, "learning_rate": 7.356345034749098e-05, "loss": 11.9311, "step": 21949 }, { "epoch": 1.1952652249971072, "grad_norm": 0.5456273462070516, "learning_rate": 7.355494592313026e-05, "loss": 12.0312, "step": 21950 }, { "epoch": 1.1953196789936902, "grad_norm": 0.5476710821233999, "learning_rate": 7.354644170441075e-05, "loss": 11.9479, "step": 21951 }, { "epoch": 1.1953741329902732, "grad_norm": 0.5823357085415194, "learning_rate": 7.353793769139851e-05, "loss": 12.0686, "step": 21952 }, { "epoch": 1.1954285869868562, "grad_norm": 0.5753955765326135, "learning_rate": 7.352943388415973e-05, "loss": 12.0287, "step": 21953 }, { "epoch": 1.1954830409834392, "grad_norm": 0.5315100817541516, "learning_rate": 7.35209302827605e-05, "loss": 11.9147, "step": 21954 }, { "epoch": 1.1955374949800222, "grad_norm": 0.541929125728084, "learning_rate": 7.351242688726693e-05, "loss": 11.989, "step": 21955 }, { "epoch": 1.1955919489766051, "grad_norm": 0.58048912274076, "learning_rate": 7.350392369774521e-05, "loss": 12.0564, "step": 21956 }, { "epoch": 1.1956464029731881, "grad_norm": 0.5415460020106003, "learning_rate": 7.34954207142614e-05, "loss": 11.9536, "step": 21957 }, { "epoch": 1.1957008569697711, "grad_norm": 0.5395732840520236, "learning_rate": 7.348691793688162e-05, "loss": 12.0654, "step": 21958 }, { "epoch": 1.1957553109663541, "grad_norm": 0.5234919368691676, "learning_rate": 7.347841536567205e-05, "loss": 11.7763, "step": 21959 }, { "epoch": 1.1958097649629373, "grad_norm": 0.5268100722474017, "learning_rate": 7.346991300069876e-05, "loss": 11.9582, "step": 21960 }, { "epoch": 1.1958642189595203, "grad_norm": 0.6006650936852125, "learning_rate": 7.346141084202787e-05, "loss": 12.0406, "step": 21961 }, { "epoch": 1.1959186729561033, "grad_norm": 0.5727018594013691, "learning_rate": 7.345290888972554e-05, "loss": 12.0073, "step": 21962 }, { "epoch": 1.1959731269526863, "grad_norm": 0.5304001919462159, "learning_rate": 7.344440714385781e-05, "loss": 12.0074, "step": 21963 }, { "epoch": 1.1960275809492693, "grad_norm": 0.5307128137436281, "learning_rate": 7.34359056044908e-05, "loss": 12.0686, "step": 21964 }, { "epoch": 1.1960820349458523, "grad_norm": 0.6126776099286598, "learning_rate": 7.342740427169068e-05, "loss": 12.0333, "step": 21965 }, { "epoch": 1.1961364889424353, "grad_norm": 0.5468384048153477, "learning_rate": 7.34189031455235e-05, "loss": 11.9079, "step": 21966 }, { "epoch": 1.1961909429390183, "grad_norm": 0.5427062902505094, "learning_rate": 7.341040222605539e-05, "loss": 11.9996, "step": 21967 }, { "epoch": 1.1962453969356013, "grad_norm": 0.5923454382568167, "learning_rate": 7.340190151335245e-05, "loss": 12.1062, "step": 21968 }, { "epoch": 1.1962998509321843, "grad_norm": 0.5215597390807284, "learning_rate": 7.339340100748078e-05, "loss": 12.0299, "step": 21969 }, { "epoch": 1.1963543049287675, "grad_norm": 0.5252418576237954, "learning_rate": 7.338490070850649e-05, "loss": 11.9611, "step": 21970 }, { "epoch": 1.1964087589253505, "grad_norm": 0.5642377871979222, "learning_rate": 7.337640061649566e-05, "loss": 12.0417, "step": 21971 }, { "epoch": 1.1964632129219335, "grad_norm": 0.5532303926034008, "learning_rate": 7.336790073151447e-05, "loss": 11.9488, "step": 21972 }, { "epoch": 1.1965176669185165, "grad_norm": 0.5866435522997976, "learning_rate": 7.335940105362888e-05, "loss": 12.0359, "step": 21973 }, { "epoch": 1.1965721209150995, "grad_norm": 0.5965417508798888, "learning_rate": 7.33509015829051e-05, "loss": 12.0077, "step": 21974 }, { "epoch": 1.1966265749116825, "grad_norm": 0.5517370879178353, "learning_rate": 7.334240231940914e-05, "loss": 11.9778, "step": 21975 }, { "epoch": 1.1966810289082654, "grad_norm": 0.5736355459938184, "learning_rate": 7.333390326320715e-05, "loss": 11.9716, "step": 21976 }, { "epoch": 1.1967354829048484, "grad_norm": 0.6115971430366769, "learning_rate": 7.332540441436519e-05, "loss": 12.0461, "step": 21977 }, { "epoch": 1.1967899369014314, "grad_norm": 0.5074682437089225, "learning_rate": 7.331690577294936e-05, "loss": 11.7933, "step": 21978 }, { "epoch": 1.1968443908980144, "grad_norm": 0.5400402430781936, "learning_rate": 7.330840733902575e-05, "loss": 11.9894, "step": 21979 }, { "epoch": 1.1968988448945974, "grad_norm": 0.5863845049108035, "learning_rate": 7.329990911266043e-05, "loss": 11.9397, "step": 21980 }, { "epoch": 1.1969532988911804, "grad_norm": 0.5672006204768686, "learning_rate": 7.32914110939195e-05, "loss": 12.021, "step": 21981 }, { "epoch": 1.1970077528877634, "grad_norm": 0.5649091205380448, "learning_rate": 7.32829132828691e-05, "loss": 11.9162, "step": 21982 }, { "epoch": 1.1970622068843466, "grad_norm": 0.5193042287535758, "learning_rate": 7.327441567957518e-05, "loss": 11.8243, "step": 21983 }, { "epoch": 1.1971166608809296, "grad_norm": 0.6211147748830858, "learning_rate": 7.326591828410388e-05, "loss": 12.1122, "step": 21984 }, { "epoch": 1.1971711148775126, "grad_norm": 0.5599456933873733, "learning_rate": 7.32574210965213e-05, "loss": 12.0533, "step": 21985 }, { "epoch": 1.1972255688740956, "grad_norm": 0.584362125676465, "learning_rate": 7.324892411689348e-05, "loss": 11.9424, "step": 21986 }, { "epoch": 1.1972800228706786, "grad_norm": 0.6199258810366467, "learning_rate": 7.324042734528653e-05, "loss": 11.9043, "step": 21987 }, { "epoch": 1.1973344768672616, "grad_norm": 0.566877086814113, "learning_rate": 7.32319307817665e-05, "loss": 11.9762, "step": 21988 }, { "epoch": 1.1973889308638446, "grad_norm": 0.5703254437265531, "learning_rate": 7.322343442639948e-05, "loss": 12.0712, "step": 21989 }, { "epoch": 1.1974433848604276, "grad_norm": 0.6513271552859072, "learning_rate": 7.32149382792515e-05, "loss": 11.9653, "step": 21990 }, { "epoch": 1.1974978388570106, "grad_norm": 0.5497910689981529, "learning_rate": 7.320644234038865e-05, "loss": 12.0185, "step": 21991 }, { "epoch": 1.1975522928535935, "grad_norm": 0.543022473176367, "learning_rate": 7.319794660987704e-05, "loss": 12.0448, "step": 21992 }, { "epoch": 1.1976067468501765, "grad_norm": 0.5516768398291159, "learning_rate": 7.318945108778267e-05, "loss": 12.062, "step": 21993 }, { "epoch": 1.1976612008467598, "grad_norm": 0.5261862423612065, "learning_rate": 7.318095577417161e-05, "loss": 11.9819, "step": 21994 }, { "epoch": 1.1977156548433427, "grad_norm": 0.5705506301206956, "learning_rate": 7.317246066910992e-05, "loss": 11.9227, "step": 21995 }, { "epoch": 1.1977701088399257, "grad_norm": 0.5288704172122437, "learning_rate": 7.31639657726637e-05, "loss": 12.0418, "step": 21996 }, { "epoch": 1.1978245628365087, "grad_norm": 0.5464414712091592, "learning_rate": 7.315547108489897e-05, "loss": 11.859, "step": 21997 }, { "epoch": 1.1978790168330917, "grad_norm": 0.5154675847793856, "learning_rate": 7.314697660588181e-05, "loss": 11.9805, "step": 21998 }, { "epoch": 1.1979334708296747, "grad_norm": 0.54948897474018, "learning_rate": 7.313848233567826e-05, "loss": 11.9353, "step": 21999 }, { "epoch": 1.1979879248262577, "grad_norm": 0.5794043846753852, "learning_rate": 7.312998827435438e-05, "loss": 11.8999, "step": 22000 }, { "epoch": 1.1980423788228407, "grad_norm": 0.5547427345883039, "learning_rate": 7.312149442197623e-05, "loss": 11.9735, "step": 22001 }, { "epoch": 1.1980968328194237, "grad_norm": 0.5325978083539448, "learning_rate": 7.311300077860986e-05, "loss": 12.0776, "step": 22002 }, { "epoch": 1.1981512868160067, "grad_norm": 0.6224752343369995, "learning_rate": 7.310450734432125e-05, "loss": 12.2011, "step": 22003 }, { "epoch": 1.1982057408125897, "grad_norm": 0.5198020646625031, "learning_rate": 7.309601411917655e-05, "loss": 11.8813, "step": 22004 }, { "epoch": 1.1982601948091727, "grad_norm": 0.5493944469541123, "learning_rate": 7.308752110324173e-05, "loss": 11.9463, "step": 22005 }, { "epoch": 1.1983146488057557, "grad_norm": 0.6173523081537328, "learning_rate": 7.307902829658286e-05, "loss": 12.0457, "step": 22006 }, { "epoch": 1.1983691028023389, "grad_norm": 0.567660112391515, "learning_rate": 7.307053569926597e-05, "loss": 12.0803, "step": 22007 }, { "epoch": 1.1984235567989219, "grad_norm": 0.5880222453600811, "learning_rate": 7.30620433113571e-05, "loss": 12.0014, "step": 22008 }, { "epoch": 1.1984780107955049, "grad_norm": 0.5611166320327582, "learning_rate": 7.305355113292233e-05, "loss": 11.9751, "step": 22009 }, { "epoch": 1.1985324647920879, "grad_norm": 0.5648240770131326, "learning_rate": 7.304505916402766e-05, "loss": 12.0712, "step": 22010 }, { "epoch": 1.1985869187886709, "grad_norm": 0.5093535437447665, "learning_rate": 7.303656740473914e-05, "loss": 11.9333, "step": 22011 }, { "epoch": 1.1986413727852538, "grad_norm": 0.5327392268729846, "learning_rate": 7.302807585512281e-05, "loss": 11.957, "step": 22012 }, { "epoch": 1.1986958267818368, "grad_norm": 0.559389433152678, "learning_rate": 7.301958451524464e-05, "loss": 12.0161, "step": 22013 }, { "epoch": 1.1987502807784198, "grad_norm": 0.5470471857605839, "learning_rate": 7.301109338517074e-05, "loss": 11.9553, "step": 22014 }, { "epoch": 1.1988047347750028, "grad_norm": 0.47837328899612946, "learning_rate": 7.300260246496708e-05, "loss": 11.8601, "step": 22015 }, { "epoch": 1.1988591887715858, "grad_norm": 0.5899237141298945, "learning_rate": 7.299411175469972e-05, "loss": 11.977, "step": 22016 }, { "epoch": 1.198913642768169, "grad_norm": 0.5375233053095325, "learning_rate": 7.298562125443466e-05, "loss": 11.9505, "step": 22017 }, { "epoch": 1.198968096764752, "grad_norm": 0.48675226641449054, "learning_rate": 7.297713096423794e-05, "loss": 12.0649, "step": 22018 }, { "epoch": 1.199022550761335, "grad_norm": 0.5555814473334991, "learning_rate": 7.296864088417559e-05, "loss": 12.0215, "step": 22019 }, { "epoch": 1.199077004757918, "grad_norm": 0.5859006106638064, "learning_rate": 7.296015101431362e-05, "loss": 12.0142, "step": 22020 }, { "epoch": 1.199131458754501, "grad_norm": 0.5139311204066148, "learning_rate": 7.295166135471807e-05, "loss": 11.8954, "step": 22021 }, { "epoch": 1.199185912751084, "grad_norm": 0.55179853803515, "learning_rate": 7.294317190545494e-05, "loss": 12.0594, "step": 22022 }, { "epoch": 1.199240366747667, "grad_norm": 0.5353701993641484, "learning_rate": 7.293468266659023e-05, "loss": 11.9732, "step": 22023 }, { "epoch": 1.19929482074425, "grad_norm": 0.5464661093525545, "learning_rate": 7.292619363818995e-05, "loss": 11.9928, "step": 22024 }, { "epoch": 1.199349274740833, "grad_norm": 0.6882576058037276, "learning_rate": 7.291770482032014e-05, "loss": 12.1068, "step": 22025 }, { "epoch": 1.199403728737416, "grad_norm": 0.5362359921119237, "learning_rate": 7.29092162130468e-05, "loss": 11.9675, "step": 22026 }, { "epoch": 1.199458182733999, "grad_norm": 0.560517441415461, "learning_rate": 7.290072781643595e-05, "loss": 11.9185, "step": 22027 }, { "epoch": 1.199512636730582, "grad_norm": 0.57787623729744, "learning_rate": 7.289223963055357e-05, "loss": 12.0093, "step": 22028 }, { "epoch": 1.199567090727165, "grad_norm": 0.5177218457333005, "learning_rate": 7.288375165546567e-05, "loss": 11.8983, "step": 22029 }, { "epoch": 1.1996215447237482, "grad_norm": 0.637719324734241, "learning_rate": 7.287526389123827e-05, "loss": 12.0996, "step": 22030 }, { "epoch": 1.1996759987203311, "grad_norm": 0.5255124147758452, "learning_rate": 7.286677633793737e-05, "loss": 11.9215, "step": 22031 }, { "epoch": 1.1997304527169141, "grad_norm": 0.48575137124148565, "learning_rate": 7.285828899562902e-05, "loss": 11.8506, "step": 22032 }, { "epoch": 1.1997849067134971, "grad_norm": 0.6130569886618931, "learning_rate": 7.28498018643791e-05, "loss": 11.9346, "step": 22033 }, { "epoch": 1.1998393607100801, "grad_norm": 0.5802945288033231, "learning_rate": 7.284131494425369e-05, "loss": 12.0002, "step": 22034 }, { "epoch": 1.1998938147066631, "grad_norm": 0.5819325035949174, "learning_rate": 7.283282823531877e-05, "loss": 12.0333, "step": 22035 }, { "epoch": 1.1999482687032461, "grad_norm": 0.5236939960500671, "learning_rate": 7.282434173764035e-05, "loss": 11.9424, "step": 22036 }, { "epoch": 1.200002722699829, "grad_norm": 0.5966003358843548, "learning_rate": 7.281585545128438e-05, "loss": 11.9131, "step": 22037 }, { "epoch": 1.200057176696412, "grad_norm": 0.5546101784497532, "learning_rate": 7.28073693763169e-05, "loss": 12.0133, "step": 22038 }, { "epoch": 1.200111630692995, "grad_norm": 0.5156972528773146, "learning_rate": 7.279888351280386e-05, "loss": 12.0682, "step": 22039 }, { "epoch": 1.2001660846895783, "grad_norm": 0.5116502255674945, "learning_rate": 7.279039786081124e-05, "loss": 12.0056, "step": 22040 }, { "epoch": 1.2002205386861613, "grad_norm": 0.55062344944069, "learning_rate": 7.278191242040508e-05, "loss": 12.006, "step": 22041 }, { "epoch": 1.2002749926827443, "grad_norm": 0.5618602417553507, "learning_rate": 7.277342719165137e-05, "loss": 12.0006, "step": 22042 }, { "epoch": 1.2003294466793273, "grad_norm": 0.5445278229766826, "learning_rate": 7.276494217461602e-05, "loss": 12.0291, "step": 22043 }, { "epoch": 1.2003839006759103, "grad_norm": 0.600400346533125, "learning_rate": 7.275645736936503e-05, "loss": 12.0862, "step": 22044 }, { "epoch": 1.2004383546724933, "grad_norm": 0.568299995987044, "learning_rate": 7.27479727759644e-05, "loss": 12.0462, "step": 22045 }, { "epoch": 1.2004928086690763, "grad_norm": 0.5273174219750538, "learning_rate": 7.273948839448011e-05, "loss": 12.0002, "step": 22046 }, { "epoch": 1.2005472626656593, "grad_norm": 0.568077861293959, "learning_rate": 7.273100422497813e-05, "loss": 11.8807, "step": 22047 }, { "epoch": 1.2006017166622422, "grad_norm": 0.5055822204763117, "learning_rate": 7.272252026752444e-05, "loss": 11.9585, "step": 22048 }, { "epoch": 1.2006561706588252, "grad_norm": 0.5276861482917721, "learning_rate": 7.271403652218501e-05, "loss": 11.9588, "step": 22049 }, { "epoch": 1.2007106246554082, "grad_norm": 0.4972628498060447, "learning_rate": 7.27055529890258e-05, "loss": 11.8181, "step": 22050 }, { "epoch": 1.2007650786519912, "grad_norm": 0.6470355406641413, "learning_rate": 7.269706966811278e-05, "loss": 11.9526, "step": 22051 }, { "epoch": 1.2008195326485742, "grad_norm": 0.556983911002417, "learning_rate": 7.268858655951196e-05, "loss": 11.9277, "step": 22052 }, { "epoch": 1.2008739866451574, "grad_norm": 0.5454720828432393, "learning_rate": 7.268010366328926e-05, "loss": 11.9055, "step": 22053 }, { "epoch": 1.2009284406417404, "grad_norm": 0.5822493836480481, "learning_rate": 7.267162097951063e-05, "loss": 12.0546, "step": 22054 }, { "epoch": 1.2009828946383234, "grad_norm": 0.5324998231694357, "learning_rate": 7.266313850824209e-05, "loss": 11.9716, "step": 22055 }, { "epoch": 1.2010373486349064, "grad_norm": 0.5819054785121974, "learning_rate": 7.26546562495495e-05, "loss": 12.0384, "step": 22056 }, { "epoch": 1.2010918026314894, "grad_norm": 0.5133980500777363, "learning_rate": 7.264617420349895e-05, "loss": 11.9353, "step": 22057 }, { "epoch": 1.2011462566280724, "grad_norm": 0.5658558153461438, "learning_rate": 7.263769237015631e-05, "loss": 11.9319, "step": 22058 }, { "epoch": 1.2012007106246554, "grad_norm": 0.47454789496767735, "learning_rate": 7.26292107495876e-05, "loss": 11.975, "step": 22059 }, { "epoch": 1.2012551646212384, "grad_norm": 0.5440066283558274, "learning_rate": 7.262072934185871e-05, "loss": 11.9386, "step": 22060 }, { "epoch": 1.2013096186178214, "grad_norm": 0.5391034435579533, "learning_rate": 7.261224814703562e-05, "loss": 12.0825, "step": 22061 }, { "epoch": 1.2013640726144044, "grad_norm": 0.584625249614327, "learning_rate": 7.260376716518431e-05, "loss": 11.8243, "step": 22062 }, { "epoch": 1.2014185266109874, "grad_norm": 0.5094079871757436, "learning_rate": 7.259528639637068e-05, "loss": 12.0372, "step": 22063 }, { "epoch": 1.2014729806075706, "grad_norm": 0.7009737554586154, "learning_rate": 7.258680584066069e-05, "loss": 12.017, "step": 22064 }, { "epoch": 1.2015274346041536, "grad_norm": 0.5222235715445752, "learning_rate": 7.25783254981203e-05, "loss": 12.0611, "step": 22065 }, { "epoch": 1.2015818886007366, "grad_norm": 0.6015498109875128, "learning_rate": 7.256984536881545e-05, "loss": 11.9995, "step": 22066 }, { "epoch": 1.2016363425973196, "grad_norm": 0.5289823743578922, "learning_rate": 7.256136545281207e-05, "loss": 11.8069, "step": 22067 }, { "epoch": 1.2016907965939025, "grad_norm": 0.5015003605636306, "learning_rate": 7.255288575017612e-05, "loss": 11.9141, "step": 22068 }, { "epoch": 1.2017452505904855, "grad_norm": 0.5378912269149257, "learning_rate": 7.254440626097354e-05, "loss": 11.9026, "step": 22069 }, { "epoch": 1.2017997045870685, "grad_norm": 0.6546457360339119, "learning_rate": 7.253592698527025e-05, "loss": 11.9585, "step": 22070 }, { "epoch": 1.2018541585836515, "grad_norm": 0.5534231465095167, "learning_rate": 7.252744792313223e-05, "loss": 11.9595, "step": 22071 }, { "epoch": 1.2019086125802345, "grad_norm": 0.5249925229316503, "learning_rate": 7.251896907462537e-05, "loss": 11.9611, "step": 22072 }, { "epoch": 1.2019630665768175, "grad_norm": 0.5746386431126431, "learning_rate": 7.25104904398156e-05, "loss": 12.0867, "step": 22073 }, { "epoch": 1.2020175205734005, "grad_norm": 0.5710153979610945, "learning_rate": 7.250201201876888e-05, "loss": 11.9604, "step": 22074 }, { "epoch": 1.2020719745699835, "grad_norm": 0.5717775825974136, "learning_rate": 7.249353381155111e-05, "loss": 12.0821, "step": 22075 }, { "epoch": 1.2021264285665665, "grad_norm": 0.5998918160103405, "learning_rate": 7.248505581822825e-05, "loss": 12.0195, "step": 22076 }, { "epoch": 1.2021808825631497, "grad_norm": 0.5254032594783522, "learning_rate": 7.247657803886619e-05, "loss": 11.8963, "step": 22077 }, { "epoch": 1.2022353365597327, "grad_norm": 0.5537859402263627, "learning_rate": 7.246810047353087e-05, "loss": 11.9807, "step": 22078 }, { "epoch": 1.2022897905563157, "grad_norm": 0.5903931312597357, "learning_rate": 7.245962312228823e-05, "loss": 11.9017, "step": 22079 }, { "epoch": 1.2023442445528987, "grad_norm": 0.5417485964314547, "learning_rate": 7.245114598520419e-05, "loss": 11.9962, "step": 22080 }, { "epoch": 1.2023986985494817, "grad_norm": 0.5291549933040715, "learning_rate": 7.244266906234465e-05, "loss": 11.8927, "step": 22081 }, { "epoch": 1.2024531525460647, "grad_norm": 0.609881952281548, "learning_rate": 7.243419235377556e-05, "loss": 11.9386, "step": 22082 }, { "epoch": 1.2025076065426477, "grad_norm": 0.5623469278131573, "learning_rate": 7.242571585956279e-05, "loss": 11.9458, "step": 22083 }, { "epoch": 1.2025620605392306, "grad_norm": 0.547022649623105, "learning_rate": 7.241723957977229e-05, "loss": 11.9944, "step": 22084 }, { "epoch": 1.2026165145358136, "grad_norm": 0.5352473947782485, "learning_rate": 7.240876351446995e-05, "loss": 11.8539, "step": 22085 }, { "epoch": 1.2026709685323966, "grad_norm": 0.6625965168116763, "learning_rate": 7.240028766372168e-05, "loss": 12.0785, "step": 22086 }, { "epoch": 1.2027254225289798, "grad_norm": 0.718071770523711, "learning_rate": 7.239181202759342e-05, "loss": 12.1267, "step": 22087 }, { "epoch": 1.2027798765255628, "grad_norm": 0.5455341048641585, "learning_rate": 7.238333660615105e-05, "loss": 11.9934, "step": 22088 }, { "epoch": 1.2028343305221458, "grad_norm": 0.5202972777844755, "learning_rate": 7.237486139946046e-05, "loss": 11.9052, "step": 22089 }, { "epoch": 1.2028887845187288, "grad_norm": 0.595001783655921, "learning_rate": 7.236638640758761e-05, "loss": 12.0565, "step": 22090 }, { "epoch": 1.2029432385153118, "grad_norm": 0.5074587400834651, "learning_rate": 7.235791163059839e-05, "loss": 12.0901, "step": 22091 }, { "epoch": 1.2029976925118948, "grad_norm": 0.7264258267848539, "learning_rate": 7.23494370685587e-05, "loss": 11.8703, "step": 22092 }, { "epoch": 1.2030521465084778, "grad_norm": 0.5209063478833043, "learning_rate": 7.234096272153438e-05, "loss": 11.948, "step": 22093 }, { "epoch": 1.2031066005050608, "grad_norm": 0.5365000809312424, "learning_rate": 7.233248858959139e-05, "loss": 11.9564, "step": 22094 }, { "epoch": 1.2031610545016438, "grad_norm": 0.5794642254238294, "learning_rate": 7.232401467279559e-05, "loss": 11.9069, "step": 22095 }, { "epoch": 1.2032155084982268, "grad_norm": 0.5413611195574631, "learning_rate": 7.231554097121291e-05, "loss": 12.0539, "step": 22096 }, { "epoch": 1.2032699624948098, "grad_norm": 0.5224053850158283, "learning_rate": 7.230706748490923e-05, "loss": 11.934, "step": 22097 }, { "epoch": 1.2033244164913928, "grad_norm": 0.509226677967835, "learning_rate": 7.229859421395042e-05, "loss": 11.9446, "step": 22098 }, { "epoch": 1.2033788704879758, "grad_norm": 0.5373899283698169, "learning_rate": 7.22901211584024e-05, "loss": 12.1334, "step": 22099 }, { "epoch": 1.203433324484559, "grad_norm": 0.5349429933355131, "learning_rate": 7.228164831833102e-05, "loss": 11.9687, "step": 22100 }, { "epoch": 1.203487778481142, "grad_norm": 0.5473400868933365, "learning_rate": 7.22731756938022e-05, "loss": 12.0422, "step": 22101 }, { "epoch": 1.203542232477725, "grad_norm": 0.5280914671988042, "learning_rate": 7.226470328488184e-05, "loss": 11.9748, "step": 22102 }, { "epoch": 1.203596686474308, "grad_norm": 0.5490625735743139, "learning_rate": 7.225623109163584e-05, "loss": 11.9687, "step": 22103 }, { "epoch": 1.203651140470891, "grad_norm": 0.5523105319659944, "learning_rate": 7.224775911412996e-05, "loss": 11.7874, "step": 22104 }, { "epoch": 1.203705594467474, "grad_norm": 0.5269022681343605, "learning_rate": 7.223928735243019e-05, "loss": 11.9339, "step": 22105 }, { "epoch": 1.203760048464057, "grad_norm": 0.5386341462858246, "learning_rate": 7.223081580660236e-05, "loss": 12.1385, "step": 22106 }, { "epoch": 1.20381450246064, "grad_norm": 0.5050447802853469, "learning_rate": 7.222234447671239e-05, "loss": 11.9921, "step": 22107 }, { "epoch": 1.203868956457223, "grad_norm": 0.5736470925018637, "learning_rate": 7.22138733628261e-05, "loss": 12.0373, "step": 22108 }, { "epoch": 1.203923410453806, "grad_norm": 0.5264139579520998, "learning_rate": 7.22054024650094e-05, "loss": 11.9781, "step": 22109 }, { "epoch": 1.2039778644503891, "grad_norm": 0.5299022741525633, "learning_rate": 7.219693178332816e-05, "loss": 11.8533, "step": 22110 }, { "epoch": 1.2040323184469721, "grad_norm": 0.5799698616022124, "learning_rate": 7.218846131784824e-05, "loss": 12.0601, "step": 22111 }, { "epoch": 1.204086772443555, "grad_norm": 0.5806676006289738, "learning_rate": 7.217999106863549e-05, "loss": 11.9913, "step": 22112 }, { "epoch": 1.204141226440138, "grad_norm": 0.5501912701320303, "learning_rate": 7.217152103575584e-05, "loss": 11.9818, "step": 22113 }, { "epoch": 1.204195680436721, "grad_norm": 0.5655415207289628, "learning_rate": 7.216305121927508e-05, "loss": 11.9584, "step": 22114 }, { "epoch": 1.204250134433304, "grad_norm": 0.592532109262495, "learning_rate": 7.21545816192591e-05, "loss": 11.9461, "step": 22115 }, { "epoch": 1.204304588429887, "grad_norm": 0.48483512667346723, "learning_rate": 7.214611223577375e-05, "loss": 12.1258, "step": 22116 }, { "epoch": 1.20435904242647, "grad_norm": 0.6021119418504816, "learning_rate": 7.213764306888492e-05, "loss": 11.9288, "step": 22117 }, { "epoch": 1.204413496423053, "grad_norm": 0.5611011265865317, "learning_rate": 7.212917411865844e-05, "loss": 12.0514, "step": 22118 }, { "epoch": 1.204467950419636, "grad_norm": 0.5305076937015334, "learning_rate": 7.212070538516017e-05, "loss": 11.9955, "step": 22119 }, { "epoch": 1.204522404416219, "grad_norm": 0.5534774058940248, "learning_rate": 7.2112236868456e-05, "loss": 12.0304, "step": 22120 }, { "epoch": 1.204576858412802, "grad_norm": 0.5845141801582963, "learning_rate": 7.210376856861175e-05, "loss": 12.0212, "step": 22121 }, { "epoch": 1.204631312409385, "grad_norm": 0.5383471489402331, "learning_rate": 7.209530048569325e-05, "loss": 11.99, "step": 22122 }, { "epoch": 1.2046857664059683, "grad_norm": 0.5803132674896115, "learning_rate": 7.208683261976641e-05, "loss": 12.0276, "step": 22123 }, { "epoch": 1.2047402204025512, "grad_norm": 0.5633793886050223, "learning_rate": 7.207836497089701e-05, "loss": 12.0332, "step": 22124 }, { "epoch": 1.2047946743991342, "grad_norm": 0.5597780825700696, "learning_rate": 7.206989753915092e-05, "loss": 12.1111, "step": 22125 }, { "epoch": 1.2048491283957172, "grad_norm": 0.5462479356952539, "learning_rate": 7.2061430324594e-05, "loss": 12.0149, "step": 22126 }, { "epoch": 1.2049035823923002, "grad_norm": 0.5657041967252572, "learning_rate": 7.205296332729206e-05, "loss": 11.9446, "step": 22127 }, { "epoch": 1.2049580363888832, "grad_norm": 0.560924352421728, "learning_rate": 7.2044496547311e-05, "loss": 12.0223, "step": 22128 }, { "epoch": 1.2050124903854662, "grad_norm": 0.5336889827650598, "learning_rate": 7.203602998471661e-05, "loss": 11.9153, "step": 22129 }, { "epoch": 1.2050669443820492, "grad_norm": 0.5347407201619592, "learning_rate": 7.202756363957473e-05, "loss": 11.9628, "step": 22130 }, { "epoch": 1.2051213983786322, "grad_norm": 0.5443814156617037, "learning_rate": 7.201909751195122e-05, "loss": 11.9808, "step": 22131 }, { "epoch": 1.2051758523752152, "grad_norm": 0.5346787885470732, "learning_rate": 7.201063160191191e-05, "loss": 11.9713, "step": 22132 }, { "epoch": 1.2052303063717984, "grad_norm": 0.5654284677731318, "learning_rate": 7.200216590952262e-05, "loss": 11.8919, "step": 22133 }, { "epoch": 1.2052847603683814, "grad_norm": 0.5169820110637174, "learning_rate": 7.199370043484917e-05, "loss": 11.9123, "step": 22134 }, { "epoch": 1.2053392143649644, "grad_norm": 0.5622142007958477, "learning_rate": 7.198523517795741e-05, "loss": 11.9989, "step": 22135 }, { "epoch": 1.2053936683615474, "grad_norm": 0.5471605534669074, "learning_rate": 7.197677013891315e-05, "loss": 11.9656, "step": 22136 }, { "epoch": 1.2054481223581304, "grad_norm": 0.555817997447976, "learning_rate": 7.196830531778222e-05, "loss": 12.0999, "step": 22137 }, { "epoch": 1.2055025763547134, "grad_norm": 0.5182694496488273, "learning_rate": 7.195984071463045e-05, "loss": 11.8154, "step": 22138 }, { "epoch": 1.2055570303512964, "grad_norm": 0.6090367274971842, "learning_rate": 7.195137632952367e-05, "loss": 12.0171, "step": 22139 }, { "epoch": 1.2056114843478793, "grad_norm": 0.529902810752365, "learning_rate": 7.194291216252769e-05, "loss": 11.9217, "step": 22140 }, { "epoch": 1.2056659383444623, "grad_norm": 0.6266915580456607, "learning_rate": 7.193444821370833e-05, "loss": 12.1702, "step": 22141 }, { "epoch": 1.2057203923410453, "grad_norm": 0.5324727183403394, "learning_rate": 7.192598448313141e-05, "loss": 12.0653, "step": 22142 }, { "epoch": 1.2057748463376283, "grad_norm": 0.5717854460507498, "learning_rate": 7.191752097086275e-05, "loss": 11.9419, "step": 22143 }, { "epoch": 1.2058293003342113, "grad_norm": 0.5500219032678746, "learning_rate": 7.190905767696816e-05, "loss": 11.9897, "step": 22144 }, { "epoch": 1.2058837543307943, "grad_norm": 0.6058155988404833, "learning_rate": 7.190059460151342e-05, "loss": 11.987, "step": 22145 }, { "epoch": 1.2059382083273773, "grad_norm": 0.5239431508726817, "learning_rate": 7.189213174456439e-05, "loss": 12.0315, "step": 22146 }, { "epoch": 1.2059926623239605, "grad_norm": 0.5485371883024152, "learning_rate": 7.188366910618684e-05, "loss": 11.9058, "step": 22147 }, { "epoch": 1.2060471163205435, "grad_norm": 0.5739704399628821, "learning_rate": 7.187520668644662e-05, "loss": 11.917, "step": 22148 }, { "epoch": 1.2061015703171265, "grad_norm": 0.5965933753043204, "learning_rate": 7.186674448540947e-05, "loss": 12.0307, "step": 22149 }, { "epoch": 1.2061560243137095, "grad_norm": 0.5515978531681733, "learning_rate": 7.185828250314126e-05, "loss": 12.0179, "step": 22150 }, { "epoch": 1.2062104783102925, "grad_norm": 0.5232068825150631, "learning_rate": 7.184982073970776e-05, "loss": 12.0243, "step": 22151 }, { "epoch": 1.2062649323068755, "grad_norm": 0.558244250566568, "learning_rate": 7.184135919517479e-05, "loss": 11.8912, "step": 22152 }, { "epoch": 1.2063193863034585, "grad_norm": 0.5243481543708333, "learning_rate": 7.183289786960813e-05, "loss": 12.0872, "step": 22153 }, { "epoch": 1.2063738403000415, "grad_norm": 0.5432256391753869, "learning_rate": 7.182443676307357e-05, "loss": 12.0196, "step": 22154 }, { "epoch": 1.2064282942966245, "grad_norm": 0.5113051679382367, "learning_rate": 7.181597587563691e-05, "loss": 11.969, "step": 22155 }, { "epoch": 1.2064827482932075, "grad_norm": 0.5187867259613202, "learning_rate": 7.180751520736395e-05, "loss": 11.9699, "step": 22156 }, { "epoch": 1.2065372022897907, "grad_norm": 0.536965619191231, "learning_rate": 7.17990547583205e-05, "loss": 11.9784, "step": 22157 }, { "epoch": 1.2065916562863737, "grad_norm": 0.6097347835717006, "learning_rate": 7.17905945285723e-05, "loss": 12.1263, "step": 22158 }, { "epoch": 1.2066461102829567, "grad_norm": 0.5507974083113052, "learning_rate": 7.178213451818519e-05, "loss": 11.9288, "step": 22159 }, { "epoch": 1.2067005642795396, "grad_norm": 0.5705949664999904, "learning_rate": 7.177367472722492e-05, "loss": 11.9414, "step": 22160 }, { "epoch": 1.2067550182761226, "grad_norm": 0.8062485571839195, "learning_rate": 7.176521515575725e-05, "loss": 11.9337, "step": 22161 }, { "epoch": 1.2068094722727056, "grad_norm": 0.5864169340682323, "learning_rate": 7.175675580384806e-05, "loss": 11.9364, "step": 22162 }, { "epoch": 1.2068639262692886, "grad_norm": 0.5337505307724522, "learning_rate": 7.17482966715631e-05, "loss": 11.997, "step": 22163 }, { "epoch": 1.2069183802658716, "grad_norm": 0.5436177172422525, "learning_rate": 7.173983775896807e-05, "loss": 12.0018, "step": 22164 }, { "epoch": 1.2069728342624546, "grad_norm": 0.5648042686347058, "learning_rate": 7.17313790661288e-05, "loss": 12.0179, "step": 22165 }, { "epoch": 1.2070272882590376, "grad_norm": 0.5700943268038059, "learning_rate": 7.172292059311108e-05, "loss": 12.0263, "step": 22166 }, { "epoch": 1.2070817422556206, "grad_norm": 0.5838511277616565, "learning_rate": 7.171446233998067e-05, "loss": 11.9884, "step": 22167 }, { "epoch": 1.2071361962522036, "grad_norm": 0.5887753007205588, "learning_rate": 7.170600430680335e-05, "loss": 12.0172, "step": 22168 }, { "epoch": 1.2071906502487866, "grad_norm": 0.5278608586343595, "learning_rate": 7.169754649364487e-05, "loss": 11.9377, "step": 22169 }, { "epoch": 1.2072451042453698, "grad_norm": 0.5147691660591388, "learning_rate": 7.168908890057102e-05, "loss": 11.9707, "step": 22170 }, { "epoch": 1.2072995582419528, "grad_norm": 0.5332753264935212, "learning_rate": 7.168063152764756e-05, "loss": 11.8652, "step": 22171 }, { "epoch": 1.2073540122385358, "grad_norm": 0.5971722709092365, "learning_rate": 7.167217437494024e-05, "loss": 11.9451, "step": 22172 }, { "epoch": 1.2074084662351188, "grad_norm": 0.5479329384980722, "learning_rate": 7.166371744251492e-05, "loss": 11.9568, "step": 22173 }, { "epoch": 1.2074629202317018, "grad_norm": 0.5476576886552974, "learning_rate": 7.165526073043723e-05, "loss": 11.9433, "step": 22174 }, { "epoch": 1.2075173742282848, "grad_norm": 0.5637277927302408, "learning_rate": 7.164680423877299e-05, "loss": 12.0464, "step": 22175 }, { "epoch": 1.2075718282248677, "grad_norm": 0.6468205685455182, "learning_rate": 7.163834796758794e-05, "loss": 12.0958, "step": 22176 }, { "epoch": 1.2076262822214507, "grad_norm": 0.5717610794146486, "learning_rate": 7.162989191694787e-05, "loss": 12.0327, "step": 22177 }, { "epoch": 1.2076807362180337, "grad_norm": 0.5335832001968199, "learning_rate": 7.162143608691851e-05, "loss": 12.0202, "step": 22178 }, { "epoch": 1.2077351902146167, "grad_norm": 0.6300336057485512, "learning_rate": 7.161298047756561e-05, "loss": 11.9623, "step": 22179 }, { "epoch": 1.2077896442112, "grad_norm": 0.7084449429386043, "learning_rate": 7.160452508895497e-05, "loss": 11.8882, "step": 22180 }, { "epoch": 1.207844098207783, "grad_norm": 0.6087104624616091, "learning_rate": 7.15960699211523e-05, "loss": 12.0099, "step": 22181 }, { "epoch": 1.207898552204366, "grad_norm": 0.550174407470673, "learning_rate": 7.158761497422331e-05, "loss": 11.918, "step": 22182 }, { "epoch": 1.207953006200949, "grad_norm": 0.5466120104557948, "learning_rate": 7.157916024823386e-05, "loss": 11.9994, "step": 22183 }, { "epoch": 1.208007460197532, "grad_norm": 0.541649744902221, "learning_rate": 7.157070574324957e-05, "loss": 12.0657, "step": 22184 }, { "epoch": 1.208061914194115, "grad_norm": 0.6087705722689702, "learning_rate": 7.156225145933625e-05, "loss": 11.9982, "step": 22185 }, { "epoch": 1.208116368190698, "grad_norm": 0.5725121504350018, "learning_rate": 7.155379739655965e-05, "loss": 12.0643, "step": 22186 }, { "epoch": 1.208170822187281, "grad_norm": 0.6066352387285385, "learning_rate": 7.154534355498545e-05, "loss": 12.0684, "step": 22187 }, { "epoch": 1.2082252761838639, "grad_norm": 0.57731847318181, "learning_rate": 7.153688993467946e-05, "loss": 11.9755, "step": 22188 }, { "epoch": 1.2082797301804469, "grad_norm": 0.6293896024435822, "learning_rate": 7.15284365357074e-05, "loss": 12.1648, "step": 22189 }, { "epoch": 1.2083341841770299, "grad_norm": 0.4920126587746231, "learning_rate": 7.151998335813497e-05, "loss": 12.0314, "step": 22190 }, { "epoch": 1.2083886381736129, "grad_norm": 0.5472210948432139, "learning_rate": 7.151153040202794e-05, "loss": 12.0244, "step": 22191 }, { "epoch": 1.2084430921701959, "grad_norm": 0.5024160976264329, "learning_rate": 7.150307766745202e-05, "loss": 11.8616, "step": 22192 }, { "epoch": 1.208497546166779, "grad_norm": 0.5253886343420642, "learning_rate": 7.149462515447296e-05, "loss": 11.9704, "step": 22193 }, { "epoch": 1.208552000163362, "grad_norm": 0.607827313687678, "learning_rate": 7.148617286315646e-05, "loss": 11.9671, "step": 22194 }, { "epoch": 1.208606454159945, "grad_norm": 0.5251209836074804, "learning_rate": 7.147772079356827e-05, "loss": 11.8412, "step": 22195 }, { "epoch": 1.208660908156528, "grad_norm": 0.67730418799882, "learning_rate": 7.14692689457741e-05, "loss": 12.0243, "step": 22196 }, { "epoch": 1.208715362153111, "grad_norm": 0.5731436442116857, "learning_rate": 7.14608173198397e-05, "loss": 11.9742, "step": 22197 }, { "epoch": 1.208769816149694, "grad_norm": 0.645929462247328, "learning_rate": 7.145236591583072e-05, "loss": 12.0492, "step": 22198 }, { "epoch": 1.208824270146277, "grad_norm": 0.6119640998397153, "learning_rate": 7.144391473381296e-05, "loss": 12.057, "step": 22199 }, { "epoch": 1.20887872414286, "grad_norm": 0.5738906898356876, "learning_rate": 7.143546377385211e-05, "loss": 12.0368, "step": 22200 }, { "epoch": 1.208933178139443, "grad_norm": 0.6179311492166093, "learning_rate": 7.142701303601388e-05, "loss": 11.8716, "step": 22201 }, { "epoch": 1.208987632136026, "grad_norm": 0.5294809640433394, "learning_rate": 7.1418562520364e-05, "loss": 11.9695, "step": 22202 }, { "epoch": 1.2090420861326092, "grad_norm": 0.5285947080877262, "learning_rate": 7.141011222696818e-05, "loss": 11.887, "step": 22203 }, { "epoch": 1.2090965401291922, "grad_norm": 0.5063114473792395, "learning_rate": 7.14016621558921e-05, "loss": 11.9013, "step": 22204 }, { "epoch": 1.2091509941257752, "grad_norm": 0.578170449929612, "learning_rate": 7.139321230720151e-05, "loss": 11.9329, "step": 22205 }, { "epoch": 1.2092054481223582, "grad_norm": 0.5967577483165094, "learning_rate": 7.138476268096208e-05, "loss": 12.0208, "step": 22206 }, { "epoch": 1.2092599021189412, "grad_norm": 0.5141582545857023, "learning_rate": 7.137631327723952e-05, "loss": 11.9588, "step": 22207 }, { "epoch": 1.2093143561155242, "grad_norm": 0.5621523592159323, "learning_rate": 7.136786409609957e-05, "loss": 11.9533, "step": 22208 }, { "epoch": 1.2093688101121072, "grad_norm": 0.5581547531966375, "learning_rate": 7.135941513760792e-05, "loss": 11.8932, "step": 22209 }, { "epoch": 1.2094232641086902, "grad_norm": 0.5387537878154146, "learning_rate": 7.135096640183022e-05, "loss": 12.1501, "step": 22210 }, { "epoch": 1.2094777181052732, "grad_norm": 0.6273124702833844, "learning_rate": 7.134251788883224e-05, "loss": 11.9529, "step": 22211 }, { "epoch": 1.2095321721018562, "grad_norm": 0.6198131612178198, "learning_rate": 7.133406959867965e-05, "loss": 12.0887, "step": 22212 }, { "epoch": 1.2095866260984391, "grad_norm": 0.5908403742200561, "learning_rate": 7.132562153143819e-05, "loss": 11.9164, "step": 22213 }, { "epoch": 1.2096410800950221, "grad_norm": 0.508816991417017, "learning_rate": 7.131717368717342e-05, "loss": 11.9498, "step": 22214 }, { "epoch": 1.2096955340916051, "grad_norm": 0.5326215958832874, "learning_rate": 7.130872606595114e-05, "loss": 11.9998, "step": 22215 }, { "epoch": 1.2097499880881883, "grad_norm": 0.5395288580198045, "learning_rate": 7.130027866783703e-05, "loss": 12.0351, "step": 22216 }, { "epoch": 1.2098044420847713, "grad_norm": 0.5535370494809941, "learning_rate": 7.129183149289677e-05, "loss": 12.0239, "step": 22217 }, { "epoch": 1.2098588960813543, "grad_norm": 0.5553002147851895, "learning_rate": 7.128338454119603e-05, "loss": 12.0336, "step": 22218 }, { "epoch": 1.2099133500779373, "grad_norm": 0.5585794211770257, "learning_rate": 7.127493781280052e-05, "loss": 11.8861, "step": 22219 }, { "epoch": 1.2099678040745203, "grad_norm": 0.5464541689008439, "learning_rate": 7.12664913077759e-05, "loss": 11.9994, "step": 22220 }, { "epoch": 1.2100222580711033, "grad_norm": 0.5440864382681991, "learning_rate": 7.125804502618784e-05, "loss": 12.0383, "step": 22221 }, { "epoch": 1.2100767120676863, "grad_norm": 0.6148614300199156, "learning_rate": 7.124959896810207e-05, "loss": 12.1397, "step": 22222 }, { "epoch": 1.2101311660642693, "grad_norm": 0.5157349583589322, "learning_rate": 7.124115313358428e-05, "loss": 12.0791, "step": 22223 }, { "epoch": 1.2101856200608523, "grad_norm": 0.5429489920842572, "learning_rate": 7.123270752270005e-05, "loss": 12.0118, "step": 22224 }, { "epoch": 1.2102400740574353, "grad_norm": 0.5254134460917859, "learning_rate": 7.122426213551513e-05, "loss": 11.9898, "step": 22225 }, { "epoch": 1.2102945280540183, "grad_norm": 0.5446806538940857, "learning_rate": 7.121581697209516e-05, "loss": 11.9785, "step": 22226 }, { "epoch": 1.2103489820506015, "grad_norm": 0.5197203705656492, "learning_rate": 7.120737203250582e-05, "loss": 11.9456, "step": 22227 }, { "epoch": 1.2104034360471845, "grad_norm": 0.5630397593922395, "learning_rate": 7.11989273168128e-05, "loss": 11.984, "step": 22228 }, { "epoch": 1.2104578900437675, "grad_norm": 0.6025670215102922, "learning_rate": 7.119048282508176e-05, "loss": 12.1266, "step": 22229 }, { "epoch": 1.2105123440403505, "grad_norm": 0.5154841225457294, "learning_rate": 7.118203855737833e-05, "loss": 11.9637, "step": 22230 }, { "epoch": 1.2105667980369335, "grad_norm": 0.5638156237527165, "learning_rate": 7.117359451376822e-05, "loss": 11.9354, "step": 22231 }, { "epoch": 1.2106212520335164, "grad_norm": 0.5660231263125333, "learning_rate": 7.116515069431704e-05, "loss": 11.9912, "step": 22232 }, { "epoch": 1.2106757060300994, "grad_norm": 0.6107715028659775, "learning_rate": 7.115670709909056e-05, "loss": 11.9319, "step": 22233 }, { "epoch": 1.2107301600266824, "grad_norm": 0.6017035343736808, "learning_rate": 7.114826372815432e-05, "loss": 11.9558, "step": 22234 }, { "epoch": 1.2107846140232654, "grad_norm": 0.5229652175836073, "learning_rate": 7.113982058157402e-05, "loss": 11.8972, "step": 22235 }, { "epoch": 1.2108390680198484, "grad_norm": 0.5701160898836272, "learning_rate": 7.113137765941528e-05, "loss": 11.9585, "step": 22236 }, { "epoch": 1.2108935220164314, "grad_norm": 0.553893458828715, "learning_rate": 7.112293496174381e-05, "loss": 11.9503, "step": 22237 }, { "epoch": 1.2109479760130144, "grad_norm": 0.4892356897325319, "learning_rate": 7.111449248862525e-05, "loss": 11.9463, "step": 22238 }, { "epoch": 1.2110024300095974, "grad_norm": 0.5822877339074625, "learning_rate": 7.110605024012524e-05, "loss": 11.9796, "step": 22239 }, { "epoch": 1.2110568840061806, "grad_norm": 0.5853273380034315, "learning_rate": 7.109760821630943e-05, "loss": 12.045, "step": 22240 }, { "epoch": 1.2111113380027636, "grad_norm": 0.5656663684305093, "learning_rate": 7.108916641724345e-05, "loss": 12.0361, "step": 22241 }, { "epoch": 1.2111657919993466, "grad_norm": 0.5540103396672027, "learning_rate": 7.108072484299299e-05, "loss": 11.9424, "step": 22242 }, { "epoch": 1.2112202459959296, "grad_norm": 0.5130192052115216, "learning_rate": 7.107228349362368e-05, "loss": 11.9045, "step": 22243 }, { "epoch": 1.2112746999925126, "grad_norm": 0.563121879580242, "learning_rate": 7.106384236920109e-05, "loss": 12.0127, "step": 22244 }, { "epoch": 1.2113291539890956, "grad_norm": 0.5140509319795811, "learning_rate": 7.105540146979095e-05, "loss": 11.9238, "step": 22245 }, { "epoch": 1.2113836079856786, "grad_norm": 0.5607506092057514, "learning_rate": 7.104696079545886e-05, "loss": 11.9958, "step": 22246 }, { "epoch": 1.2114380619822616, "grad_norm": 0.5316034142149118, "learning_rate": 7.10385203462704e-05, "loss": 11.6815, "step": 22247 }, { "epoch": 1.2114925159788446, "grad_norm": 0.5238273300317361, "learning_rate": 7.10300801222913e-05, "loss": 12.1356, "step": 22248 }, { "epoch": 1.2115469699754275, "grad_norm": 0.5737696562662198, "learning_rate": 7.102164012358719e-05, "loss": 11.9107, "step": 22249 }, { "epoch": 1.2116014239720108, "grad_norm": 0.4843855987877133, "learning_rate": 7.101320035022363e-05, "loss": 11.9216, "step": 22250 }, { "epoch": 1.2116558779685938, "grad_norm": 0.5321854172161506, "learning_rate": 7.10047608022663e-05, "loss": 12.0449, "step": 22251 }, { "epoch": 1.2117103319651767, "grad_norm": 0.542024674210292, "learning_rate": 7.099632147978081e-05, "loss": 11.9542, "step": 22252 }, { "epoch": 1.2117647859617597, "grad_norm": 0.5624187728852066, "learning_rate": 7.098788238283278e-05, "loss": 11.9009, "step": 22253 }, { "epoch": 1.2118192399583427, "grad_norm": 0.545018263851282, "learning_rate": 7.097944351148787e-05, "loss": 11.9881, "step": 22254 }, { "epoch": 1.2118736939549257, "grad_norm": 0.547322436937603, "learning_rate": 7.097100486581165e-05, "loss": 12.1616, "step": 22255 }, { "epoch": 1.2119281479515087, "grad_norm": 0.5854550211063693, "learning_rate": 7.096256644586976e-05, "loss": 12.0093, "step": 22256 }, { "epoch": 1.2119826019480917, "grad_norm": 0.5294357935560317, "learning_rate": 7.095412825172784e-05, "loss": 11.9794, "step": 22257 }, { "epoch": 1.2120370559446747, "grad_norm": 0.6303869108387259, "learning_rate": 7.094569028345146e-05, "loss": 12.0509, "step": 22258 }, { "epoch": 1.2120915099412577, "grad_norm": 0.5067586615831682, "learning_rate": 7.093725254110627e-05, "loss": 11.9189, "step": 22259 }, { "epoch": 1.2121459639378407, "grad_norm": 0.5578965016345673, "learning_rate": 7.09288150247579e-05, "loss": 11.9744, "step": 22260 }, { "epoch": 1.2122004179344237, "grad_norm": 0.7391896317516873, "learning_rate": 7.092037773447193e-05, "loss": 11.9811, "step": 22261 }, { "epoch": 1.2122548719310067, "grad_norm": 0.5437859337146264, "learning_rate": 7.091194067031398e-05, "loss": 11.9437, "step": 22262 }, { "epoch": 1.2123093259275899, "grad_norm": 0.5742713919066256, "learning_rate": 7.090350383234966e-05, "loss": 11.929, "step": 22263 }, { "epoch": 1.2123637799241729, "grad_norm": 0.5562476907318756, "learning_rate": 7.08950672206446e-05, "loss": 11.9208, "step": 22264 }, { "epoch": 1.2124182339207559, "grad_norm": 0.5611820438255171, "learning_rate": 7.088663083526434e-05, "loss": 11.9286, "step": 22265 }, { "epoch": 1.2124726879173389, "grad_norm": 0.5372397433175323, "learning_rate": 7.087819467627454e-05, "loss": 11.978, "step": 22266 }, { "epoch": 1.2125271419139219, "grad_norm": 0.5401658270660569, "learning_rate": 7.086975874374077e-05, "loss": 12.0468, "step": 22267 }, { "epoch": 1.2125815959105049, "grad_norm": 0.5468530284162061, "learning_rate": 7.086132303772864e-05, "loss": 11.9912, "step": 22268 }, { "epoch": 1.2126360499070878, "grad_norm": 0.5289231617253853, "learning_rate": 7.085288755830375e-05, "loss": 11.9488, "step": 22269 }, { "epoch": 1.2126905039036708, "grad_norm": 0.4974327342037205, "learning_rate": 7.084445230553167e-05, "loss": 12.022, "step": 22270 }, { "epoch": 1.2127449579002538, "grad_norm": 0.5586721468309652, "learning_rate": 7.083601727947806e-05, "loss": 12.0494, "step": 22271 }, { "epoch": 1.2127994118968368, "grad_norm": 0.5432637061339359, "learning_rate": 7.082758248020844e-05, "loss": 12.0238, "step": 22272 }, { "epoch": 1.21285386589342, "grad_norm": 0.5474105843631708, "learning_rate": 7.081914790778845e-05, "loss": 11.9483, "step": 22273 }, { "epoch": 1.212908319890003, "grad_norm": 0.5318146688778155, "learning_rate": 7.081071356228368e-05, "loss": 11.7935, "step": 22274 }, { "epoch": 1.212962773886586, "grad_norm": 0.5240733562038657, "learning_rate": 7.080227944375968e-05, "loss": 11.9969, "step": 22275 }, { "epoch": 1.213017227883169, "grad_norm": 0.5855271049242533, "learning_rate": 7.079384555228204e-05, "loss": 12.1057, "step": 22276 }, { "epoch": 1.213071681879752, "grad_norm": 0.5257915104221932, "learning_rate": 7.078541188791636e-05, "loss": 11.9402, "step": 22277 }, { "epoch": 1.213126135876335, "grad_norm": 0.5357458275395512, "learning_rate": 7.077697845072821e-05, "loss": 11.8799, "step": 22278 }, { "epoch": 1.213180589872918, "grad_norm": 0.5892448472689967, "learning_rate": 7.076854524078318e-05, "loss": 12.0626, "step": 22279 }, { "epoch": 1.213235043869501, "grad_norm": 0.5561518650997543, "learning_rate": 7.076011225814685e-05, "loss": 11.994, "step": 22280 }, { "epoch": 1.213289497866084, "grad_norm": 0.5671959921232491, "learning_rate": 7.075167950288477e-05, "loss": 11.9708, "step": 22281 }, { "epoch": 1.213343951862667, "grad_norm": 0.5241208392779223, "learning_rate": 7.074324697506255e-05, "loss": 11.8656, "step": 22282 }, { "epoch": 1.21339840585925, "grad_norm": 0.6521234757186432, "learning_rate": 7.073481467474575e-05, "loss": 12.013, "step": 22283 }, { "epoch": 1.213452859855833, "grad_norm": 0.48788085413657317, "learning_rate": 7.072638260199997e-05, "loss": 12.0519, "step": 22284 }, { "epoch": 1.213507313852416, "grad_norm": 0.563264647372284, "learning_rate": 7.07179507568907e-05, "loss": 11.8737, "step": 22285 }, { "epoch": 1.2135617678489992, "grad_norm": 0.594163666289845, "learning_rate": 7.070951913948359e-05, "loss": 12.0171, "step": 22286 }, { "epoch": 1.2136162218455822, "grad_norm": 0.5633618600797079, "learning_rate": 7.070108774984415e-05, "loss": 12.028, "step": 22287 }, { "epoch": 1.2136706758421651, "grad_norm": 0.5191886984552105, "learning_rate": 7.069265658803796e-05, "loss": 11.9681, "step": 22288 }, { "epoch": 1.2137251298387481, "grad_norm": 0.537810626953105, "learning_rate": 7.06842256541306e-05, "loss": 12.0174, "step": 22289 }, { "epoch": 1.2137795838353311, "grad_norm": 0.5692931708589785, "learning_rate": 7.067579494818761e-05, "loss": 11.9745, "step": 22290 }, { "epoch": 1.2138340378319141, "grad_norm": 0.6202625084016253, "learning_rate": 7.066736447027455e-05, "loss": 11.9476, "step": 22291 }, { "epoch": 1.2138884918284971, "grad_norm": 0.5324484456463837, "learning_rate": 7.065893422045698e-05, "loss": 12.1948, "step": 22292 }, { "epoch": 1.21394294582508, "grad_norm": 0.5021182461547399, "learning_rate": 7.065050419880046e-05, "loss": 11.9584, "step": 22293 }, { "epoch": 1.213997399821663, "grad_norm": 0.5939087172169644, "learning_rate": 7.064207440537061e-05, "loss": 12.0118, "step": 22294 }, { "epoch": 1.214051853818246, "grad_norm": 0.5634578786794503, "learning_rate": 7.063364484023285e-05, "loss": 11.9904, "step": 22295 }, { "epoch": 1.214106307814829, "grad_norm": 0.5484709194305013, "learning_rate": 7.062521550345277e-05, "loss": 11.8776, "step": 22296 }, { "epoch": 1.2141607618114123, "grad_norm": 0.5421099908982828, "learning_rate": 7.061678639509596e-05, "loss": 12.0019, "step": 22297 }, { "epoch": 1.2142152158079953, "grad_norm": 0.4991751723091259, "learning_rate": 7.060835751522797e-05, "loss": 11.9418, "step": 22298 }, { "epoch": 1.2142696698045783, "grad_norm": 0.577553957827333, "learning_rate": 7.059992886391428e-05, "loss": 12.0586, "step": 22299 }, { "epoch": 1.2143241238011613, "grad_norm": 0.5255331978796294, "learning_rate": 7.059150044122052e-05, "loss": 11.9671, "step": 22300 }, { "epoch": 1.2143785777977443, "grad_norm": 0.5196347588231244, "learning_rate": 7.058307224721216e-05, "loss": 11.8502, "step": 22301 }, { "epoch": 1.2144330317943273, "grad_norm": 0.5355706344964161, "learning_rate": 7.057464428195476e-05, "loss": 11.8549, "step": 22302 }, { "epoch": 1.2144874857909103, "grad_norm": 0.4955177547160181, "learning_rate": 7.056621654551385e-05, "loss": 11.8426, "step": 22303 }, { "epoch": 1.2145419397874933, "grad_norm": 0.523880147256863, "learning_rate": 7.055778903795502e-05, "loss": 11.9664, "step": 22304 }, { "epoch": 1.2145963937840762, "grad_norm": 0.5350504152097167, "learning_rate": 7.054936175934375e-05, "loss": 11.8836, "step": 22305 }, { "epoch": 1.2146508477806592, "grad_norm": 0.5548101050832339, "learning_rate": 7.054093470974557e-05, "loss": 11.903, "step": 22306 }, { "epoch": 1.2147053017772422, "grad_norm": 0.6393004098557902, "learning_rate": 7.053250788922599e-05, "loss": 12.1723, "step": 22307 }, { "epoch": 1.2147597557738252, "grad_norm": 0.5315787203549713, "learning_rate": 7.05240812978506e-05, "loss": 12.0701, "step": 22308 }, { "epoch": 1.2148142097704082, "grad_norm": 0.5644005983604934, "learning_rate": 7.051565493568488e-05, "loss": 11.8484, "step": 22309 }, { "epoch": 1.2148686637669914, "grad_norm": 0.5338038664794075, "learning_rate": 7.050722880279439e-05, "loss": 11.9557, "step": 22310 }, { "epoch": 1.2149231177635744, "grad_norm": 0.5581254134054306, "learning_rate": 7.049880289924464e-05, "loss": 11.9729, "step": 22311 }, { "epoch": 1.2149775717601574, "grad_norm": 0.5450984713680336, "learning_rate": 7.049037722510113e-05, "loss": 11.8956, "step": 22312 }, { "epoch": 1.2150320257567404, "grad_norm": 0.6100547288983047, "learning_rate": 7.04819517804294e-05, "loss": 12.0668, "step": 22313 }, { "epoch": 1.2150864797533234, "grad_norm": 0.5259666863737034, "learning_rate": 7.047352656529498e-05, "loss": 12.1377, "step": 22314 }, { "epoch": 1.2151409337499064, "grad_norm": 0.5224156702911438, "learning_rate": 7.046510157976336e-05, "loss": 11.9721, "step": 22315 }, { "epoch": 1.2151953877464894, "grad_norm": 0.5203314355568164, "learning_rate": 7.045667682390004e-05, "loss": 11.9608, "step": 22316 }, { "epoch": 1.2152498417430724, "grad_norm": 0.5803029169990991, "learning_rate": 7.04482522977706e-05, "loss": 11.9736, "step": 22317 }, { "epoch": 1.2153042957396554, "grad_norm": 0.55320366865983, "learning_rate": 7.043982800144046e-05, "loss": 11.9466, "step": 22318 }, { "epoch": 1.2153587497362384, "grad_norm": 0.5532418365233676, "learning_rate": 7.043140393497518e-05, "loss": 12.005, "step": 22319 }, { "epoch": 1.2154132037328216, "grad_norm": 0.5154581249122987, "learning_rate": 7.042298009844027e-05, "loss": 11.7698, "step": 22320 }, { "epoch": 1.2154676577294046, "grad_norm": 0.5570806186394629, "learning_rate": 7.041455649190123e-05, "loss": 12.0179, "step": 22321 }, { "epoch": 1.2155221117259876, "grad_norm": 0.6075919078474695, "learning_rate": 7.040613311542357e-05, "loss": 12.0396, "step": 22322 }, { "epoch": 1.2155765657225706, "grad_norm": 0.5666390942113456, "learning_rate": 7.039770996907277e-05, "loss": 11.9011, "step": 22323 }, { "epoch": 1.2156310197191535, "grad_norm": 0.5092742503468421, "learning_rate": 7.038928705291436e-05, "loss": 11.926, "step": 22324 }, { "epoch": 1.2156854737157365, "grad_norm": 0.5501225754048922, "learning_rate": 7.038086436701381e-05, "loss": 11.8644, "step": 22325 }, { "epoch": 1.2157399277123195, "grad_norm": 0.5719510745672549, "learning_rate": 7.037244191143661e-05, "loss": 12.0941, "step": 22326 }, { "epoch": 1.2157943817089025, "grad_norm": 0.5577233311053958, "learning_rate": 7.03640196862483e-05, "loss": 11.9658, "step": 22327 }, { "epoch": 1.2158488357054855, "grad_norm": 0.554705498761114, "learning_rate": 7.035559769151432e-05, "loss": 11.8951, "step": 22328 }, { "epoch": 1.2159032897020685, "grad_norm": 0.6049096053195613, "learning_rate": 7.034717592730018e-05, "loss": 12.1224, "step": 22329 }, { "epoch": 1.2159577436986515, "grad_norm": 0.5216970000678234, "learning_rate": 7.033875439367137e-05, "loss": 11.9001, "step": 22330 }, { "epoch": 1.2160121976952345, "grad_norm": 0.5569509677271596, "learning_rate": 7.033033309069339e-05, "loss": 12.0039, "step": 22331 }, { "epoch": 1.2160666516918175, "grad_norm": 0.5508933269962223, "learning_rate": 7.03219120184317e-05, "loss": 11.99, "step": 22332 }, { "epoch": 1.2161211056884007, "grad_norm": 0.5963243297822097, "learning_rate": 7.031349117695183e-05, "loss": 11.9617, "step": 22333 }, { "epoch": 1.2161755596849837, "grad_norm": 0.5379930064043972, "learning_rate": 7.030507056631923e-05, "loss": 11.9727, "step": 22334 }, { "epoch": 1.2162300136815667, "grad_norm": 0.5386747676124668, "learning_rate": 7.029665018659936e-05, "loss": 11.9948, "step": 22335 }, { "epoch": 1.2162844676781497, "grad_norm": 0.5723447515807047, "learning_rate": 7.028823003785774e-05, "loss": 11.9454, "step": 22336 }, { "epoch": 1.2163389216747327, "grad_norm": 0.583893023561163, "learning_rate": 7.027981012015981e-05, "loss": 11.9514, "step": 22337 }, { "epoch": 1.2163933756713157, "grad_norm": 0.5887967027117306, "learning_rate": 7.027139043357106e-05, "loss": 12.0417, "step": 22338 }, { "epoch": 1.2164478296678987, "grad_norm": 0.5689338247182932, "learning_rate": 7.026297097815697e-05, "loss": 12.0591, "step": 22339 }, { "epoch": 1.2165022836644817, "grad_norm": 0.5141804373708683, "learning_rate": 7.025455175398299e-05, "loss": 12.0649, "step": 22340 }, { "epoch": 1.2165567376610646, "grad_norm": 0.7292763687760996, "learning_rate": 7.02461327611146e-05, "loss": 12.0871, "step": 22341 }, { "epoch": 1.2166111916576476, "grad_norm": 0.5518115603447546, "learning_rate": 7.02377139996173e-05, "loss": 12.036, "step": 22342 }, { "epoch": 1.2166656456542309, "grad_norm": 0.5506875059426861, "learning_rate": 7.02292954695565e-05, "loss": 11.9148, "step": 22343 }, { "epoch": 1.2167200996508138, "grad_norm": 0.7162991465665424, "learning_rate": 7.022087717099775e-05, "loss": 11.9591, "step": 22344 }, { "epoch": 1.2167745536473968, "grad_norm": 0.5522233152721466, "learning_rate": 7.021245910400638e-05, "loss": 11.9098, "step": 22345 }, { "epoch": 1.2168290076439798, "grad_norm": 0.5137823090818378, "learning_rate": 7.020404126864794e-05, "loss": 11.9703, "step": 22346 }, { "epoch": 1.2168834616405628, "grad_norm": 0.5967318474798641, "learning_rate": 7.01956236649879e-05, "loss": 12.0042, "step": 22347 }, { "epoch": 1.2169379156371458, "grad_norm": 0.510429647743093, "learning_rate": 7.018720629309167e-05, "loss": 12.0425, "step": 22348 }, { "epoch": 1.2169923696337288, "grad_norm": 0.6660538355641454, "learning_rate": 7.017878915302471e-05, "loss": 11.9772, "step": 22349 }, { "epoch": 1.2170468236303118, "grad_norm": 0.5156984135756433, "learning_rate": 7.017037224485251e-05, "loss": 11.9955, "step": 22350 }, { "epoch": 1.2171012776268948, "grad_norm": 0.5515831340977696, "learning_rate": 7.016195556864049e-05, "loss": 12.0198, "step": 22351 }, { "epoch": 1.2171557316234778, "grad_norm": 0.5009435038304422, "learning_rate": 7.015353912445408e-05, "loss": 11.9356, "step": 22352 }, { "epoch": 1.2172101856200608, "grad_norm": 0.5027011911111109, "learning_rate": 7.01451229123588e-05, "loss": 11.8627, "step": 22353 }, { "epoch": 1.2172646396166438, "grad_norm": 0.5668826387820898, "learning_rate": 7.013670693242005e-05, "loss": 12.0749, "step": 22354 }, { "epoch": 1.2173190936132268, "grad_norm": 0.5515281227037341, "learning_rate": 7.012829118470328e-05, "loss": 12.139, "step": 22355 }, { "epoch": 1.21737354760981, "grad_norm": 0.4810495926302193, "learning_rate": 7.011987566927387e-05, "loss": 11.9472, "step": 22356 }, { "epoch": 1.217428001606393, "grad_norm": 0.5723272624958875, "learning_rate": 7.011146038619735e-05, "loss": 11.8951, "step": 22357 }, { "epoch": 1.217482455602976, "grad_norm": 0.5549262552776699, "learning_rate": 7.010304533553913e-05, "loss": 12.0505, "step": 22358 }, { "epoch": 1.217536909599559, "grad_norm": 0.5405482614457372, "learning_rate": 7.009463051736465e-05, "loss": 11.9682, "step": 22359 }, { "epoch": 1.217591363596142, "grad_norm": 0.5412814365790568, "learning_rate": 7.008621593173932e-05, "loss": 11.9465, "step": 22360 }, { "epoch": 1.217645817592725, "grad_norm": 0.5467583588804531, "learning_rate": 7.00778015787286e-05, "loss": 12.0006, "step": 22361 }, { "epoch": 1.217700271589308, "grad_norm": 0.4916924452821014, "learning_rate": 7.006938745839792e-05, "loss": 12.0715, "step": 22362 }, { "epoch": 1.217754725585891, "grad_norm": 0.5508482146314773, "learning_rate": 7.006097357081269e-05, "loss": 12.0779, "step": 22363 }, { "epoch": 1.217809179582474, "grad_norm": 0.5041038578527925, "learning_rate": 7.005255991603838e-05, "loss": 11.9602, "step": 22364 }, { "epoch": 1.217863633579057, "grad_norm": 0.5695379925221715, "learning_rate": 7.004414649414037e-05, "loss": 12.0427, "step": 22365 }, { "epoch": 1.2179180875756401, "grad_norm": 0.525114739195111, "learning_rate": 7.003573330518409e-05, "loss": 11.8928, "step": 22366 }, { "epoch": 1.2179725415722231, "grad_norm": 0.5566647811168054, "learning_rate": 7.002732034923499e-05, "loss": 12.0557, "step": 22367 }, { "epoch": 1.2180269955688061, "grad_norm": 0.49262671405552383, "learning_rate": 7.001890762635844e-05, "loss": 11.9055, "step": 22368 }, { "epoch": 1.218081449565389, "grad_norm": 0.5578951026885405, "learning_rate": 7.00104951366199e-05, "loss": 11.9872, "step": 22369 }, { "epoch": 1.218135903561972, "grad_norm": 0.5683924026785109, "learning_rate": 7.000208288008478e-05, "loss": 11.9625, "step": 22370 }, { "epoch": 1.218190357558555, "grad_norm": 0.5086219925298866, "learning_rate": 6.999367085681852e-05, "loss": 11.9946, "step": 22371 }, { "epoch": 1.218244811555138, "grad_norm": 0.515937880076869, "learning_rate": 6.998525906688649e-05, "loss": 12.0035, "step": 22372 }, { "epoch": 1.218299265551721, "grad_norm": 0.5588427097572062, "learning_rate": 6.997684751035411e-05, "loss": 11.9378, "step": 22373 }, { "epoch": 1.218353719548304, "grad_norm": 0.5279204411786923, "learning_rate": 6.996843618728684e-05, "loss": 11.9883, "step": 22374 }, { "epoch": 1.218408173544887, "grad_norm": 0.5212889295302302, "learning_rate": 6.996002509775001e-05, "loss": 11.9559, "step": 22375 }, { "epoch": 1.21846262754147, "grad_norm": 0.5420376501032255, "learning_rate": 6.995161424180908e-05, "loss": 11.8928, "step": 22376 }, { "epoch": 1.218517081538053, "grad_norm": 0.5166400531308509, "learning_rate": 6.994320361952943e-05, "loss": 11.8835, "step": 22377 }, { "epoch": 1.218571535534636, "grad_norm": 0.5292885786067021, "learning_rate": 6.993479323097647e-05, "loss": 11.993, "step": 22378 }, { "epoch": 1.218625989531219, "grad_norm": 0.583737031027247, "learning_rate": 6.992638307621557e-05, "loss": 11.8614, "step": 22379 }, { "epoch": 1.2186804435278022, "grad_norm": 0.561478994551792, "learning_rate": 6.99179731553122e-05, "loss": 12.0964, "step": 22380 }, { "epoch": 1.2187348975243852, "grad_norm": 0.5640655878626899, "learning_rate": 6.990956346833168e-05, "loss": 11.9127, "step": 22381 }, { "epoch": 1.2187893515209682, "grad_norm": 0.5762987678441256, "learning_rate": 6.990115401533946e-05, "loss": 12.0166, "step": 22382 }, { "epoch": 1.2188438055175512, "grad_norm": 0.6597124157320646, "learning_rate": 6.98927447964009e-05, "loss": 12.0442, "step": 22383 }, { "epoch": 1.2188982595141342, "grad_norm": 0.5609973227230155, "learning_rate": 6.988433581158145e-05, "loss": 11.9126, "step": 22384 }, { "epoch": 1.2189527135107172, "grad_norm": 0.5440578199406028, "learning_rate": 6.987592706094643e-05, "loss": 11.8648, "step": 22385 }, { "epoch": 1.2190071675073002, "grad_norm": 0.5859657659305496, "learning_rate": 6.986751854456124e-05, "loss": 12.0417, "step": 22386 }, { "epoch": 1.2190616215038832, "grad_norm": 0.5791757679621246, "learning_rate": 6.98591102624913e-05, "loss": 11.8141, "step": 22387 }, { "epoch": 1.2191160755004662, "grad_norm": 0.580928826714245, "learning_rate": 6.985070221480195e-05, "loss": 11.9734, "step": 22388 }, { "epoch": 1.2191705294970492, "grad_norm": 0.523511068552816, "learning_rate": 6.98422944015586e-05, "loss": 11.9273, "step": 22389 }, { "epoch": 1.2192249834936324, "grad_norm": 0.5280995417782323, "learning_rate": 6.98338868228266e-05, "loss": 11.9703, "step": 22390 }, { "epoch": 1.2192794374902154, "grad_norm": 0.554149458968344, "learning_rate": 6.982547947867141e-05, "loss": 11.9068, "step": 22391 }, { "epoch": 1.2193338914867984, "grad_norm": 0.5710116531873979, "learning_rate": 6.981707236915833e-05, "loss": 11.856, "step": 22392 }, { "epoch": 1.2193883454833814, "grad_norm": 0.5646006857839734, "learning_rate": 6.980866549435275e-05, "loss": 11.9466, "step": 22393 }, { "epoch": 1.2194427994799644, "grad_norm": 0.5457992022543512, "learning_rate": 6.980025885432006e-05, "loss": 12.0449, "step": 22394 }, { "epoch": 1.2194972534765474, "grad_norm": 0.6146281492599752, "learning_rate": 6.979185244912563e-05, "loss": 11.9747, "step": 22395 }, { "epoch": 1.2195517074731304, "grad_norm": 0.589197910788417, "learning_rate": 6.978344627883482e-05, "loss": 12.0342, "step": 22396 }, { "epoch": 1.2196061614697133, "grad_norm": 0.5438373320570249, "learning_rate": 6.977504034351297e-05, "loss": 12.0001, "step": 22397 }, { "epoch": 1.2196606154662963, "grad_norm": 0.5923276252131006, "learning_rate": 6.97666346432255e-05, "loss": 12.0653, "step": 22398 }, { "epoch": 1.2197150694628793, "grad_norm": 0.6069501760450577, "learning_rate": 6.975822917803773e-05, "loss": 12.0088, "step": 22399 }, { "epoch": 1.2197695234594623, "grad_norm": 0.5136480609945389, "learning_rate": 6.974982394801505e-05, "loss": 12.0383, "step": 22400 }, { "epoch": 1.2198239774560453, "grad_norm": 0.5076437212057705, "learning_rate": 6.974141895322282e-05, "loss": 11.9397, "step": 22401 }, { "epoch": 1.2198784314526283, "grad_norm": 0.5724996968219022, "learning_rate": 6.973301419372637e-05, "loss": 11.965, "step": 22402 }, { "epoch": 1.2199328854492115, "grad_norm": 0.4995089364803861, "learning_rate": 6.97246096695911e-05, "loss": 11.975, "step": 22403 }, { "epoch": 1.2199873394457945, "grad_norm": 0.580369141175968, "learning_rate": 6.971620538088233e-05, "loss": 11.9444, "step": 22404 }, { "epoch": 1.2200417934423775, "grad_norm": 0.49372070185979805, "learning_rate": 6.970780132766545e-05, "loss": 11.7919, "step": 22405 }, { "epoch": 1.2200962474389605, "grad_norm": 0.6917361868930043, "learning_rate": 6.969939751000577e-05, "loss": 12.0882, "step": 22406 }, { "epoch": 1.2201507014355435, "grad_norm": 0.6322136547605418, "learning_rate": 6.969099392796865e-05, "loss": 12.0858, "step": 22407 }, { "epoch": 1.2202051554321265, "grad_norm": 0.5223013259575535, "learning_rate": 6.968259058161946e-05, "loss": 11.8596, "step": 22408 }, { "epoch": 1.2202596094287095, "grad_norm": 0.5512881492846489, "learning_rate": 6.967418747102351e-05, "loss": 12.0115, "step": 22409 }, { "epoch": 1.2203140634252925, "grad_norm": 0.5173212222099689, "learning_rate": 6.966578459624617e-05, "loss": 11.8679, "step": 22410 }, { "epoch": 1.2203685174218755, "grad_norm": 0.5708016938914064, "learning_rate": 6.965738195735279e-05, "loss": 12.0618, "step": 22411 }, { "epoch": 1.2204229714184585, "grad_norm": 0.5448068878427165, "learning_rate": 6.964897955440868e-05, "loss": 11.8162, "step": 22412 }, { "epoch": 1.2204774254150417, "grad_norm": 0.5529709578796422, "learning_rate": 6.964057738747918e-05, "loss": 12.0061, "step": 22413 }, { "epoch": 1.2205318794116247, "grad_norm": 0.5358876828902283, "learning_rate": 6.963217545662965e-05, "loss": 11.9137, "step": 22414 }, { "epoch": 1.2205863334082077, "grad_norm": 0.5647036952014872, "learning_rate": 6.962377376192548e-05, "loss": 12.0191, "step": 22415 }, { "epoch": 1.2206407874047907, "grad_norm": 0.5940657194824316, "learning_rate": 6.961537230343188e-05, "loss": 12.184, "step": 22416 }, { "epoch": 1.2206952414013736, "grad_norm": 0.5790464934404309, "learning_rate": 6.960697108121423e-05, "loss": 12.0073, "step": 22417 }, { "epoch": 1.2207496953979566, "grad_norm": 0.5045579535013146, "learning_rate": 6.959857009533787e-05, "loss": 11.9809, "step": 22418 }, { "epoch": 1.2208041493945396, "grad_norm": 0.5530522189101298, "learning_rate": 6.959016934586816e-05, "loss": 12.039, "step": 22419 }, { "epoch": 1.2208586033911226, "grad_norm": 0.5335794066957925, "learning_rate": 6.958176883287037e-05, "loss": 11.9908, "step": 22420 }, { "epoch": 1.2209130573877056, "grad_norm": 0.5344672194234419, "learning_rate": 6.957336855640986e-05, "loss": 11.9482, "step": 22421 }, { "epoch": 1.2209675113842886, "grad_norm": 0.5409941895580455, "learning_rate": 6.956496851655194e-05, "loss": 12.055, "step": 22422 }, { "epoch": 1.2210219653808716, "grad_norm": 0.5162961278211933, "learning_rate": 6.955656871336191e-05, "loss": 11.9066, "step": 22423 }, { "epoch": 1.2210764193774546, "grad_norm": 0.5731886725647957, "learning_rate": 6.954816914690509e-05, "loss": 11.9437, "step": 22424 }, { "epoch": 1.2211308733740376, "grad_norm": 0.5375378154163946, "learning_rate": 6.95397698172469e-05, "loss": 11.897, "step": 22425 }, { "epoch": 1.2211853273706208, "grad_norm": 0.5308922702002112, "learning_rate": 6.95313707244525e-05, "loss": 11.8796, "step": 22426 }, { "epoch": 1.2212397813672038, "grad_norm": 0.5496555211264113, "learning_rate": 6.952297186858728e-05, "loss": 11.9544, "step": 22427 }, { "epoch": 1.2212942353637868, "grad_norm": 0.5110784431805737, "learning_rate": 6.951457324971653e-05, "loss": 11.971, "step": 22428 }, { "epoch": 1.2213486893603698, "grad_norm": 0.549558745358558, "learning_rate": 6.950617486790558e-05, "loss": 11.9023, "step": 22429 }, { "epoch": 1.2214031433569528, "grad_norm": 0.554236279059842, "learning_rate": 6.949777672321973e-05, "loss": 12.076, "step": 22430 }, { "epoch": 1.2214575973535358, "grad_norm": 0.5544054192583099, "learning_rate": 6.948937881572428e-05, "loss": 11.9806, "step": 22431 }, { "epoch": 1.2215120513501188, "grad_norm": 0.5659384279633011, "learning_rate": 6.948098114548454e-05, "loss": 11.8784, "step": 22432 }, { "epoch": 1.2215665053467017, "grad_norm": 0.5623758176488516, "learning_rate": 6.947258371256582e-05, "loss": 11.9009, "step": 22433 }, { "epoch": 1.2216209593432847, "grad_norm": 0.517444801061649, "learning_rate": 6.94641865170334e-05, "loss": 12.0179, "step": 22434 }, { "epoch": 1.2216754133398677, "grad_norm": 0.5516104639008468, "learning_rate": 6.945578955895259e-05, "loss": 11.8954, "step": 22435 }, { "epoch": 1.221729867336451, "grad_norm": 0.5662922139479862, "learning_rate": 6.944739283838868e-05, "loss": 11.9934, "step": 22436 }, { "epoch": 1.221784321333034, "grad_norm": 0.5264966002719521, "learning_rate": 6.943899635540697e-05, "loss": 11.8817, "step": 22437 }, { "epoch": 1.221838775329617, "grad_norm": 0.5127025445740278, "learning_rate": 6.943060011007274e-05, "loss": 11.8937, "step": 22438 }, { "epoch": 1.2218932293262, "grad_norm": 0.5271368637409032, "learning_rate": 6.942220410245128e-05, "loss": 11.9038, "step": 22439 }, { "epoch": 1.221947683322783, "grad_norm": 0.5469080513964029, "learning_rate": 6.94138083326079e-05, "loss": 12.1692, "step": 22440 }, { "epoch": 1.222002137319366, "grad_norm": 0.4922272766274975, "learning_rate": 6.940541280060788e-05, "loss": 11.9012, "step": 22441 }, { "epoch": 1.222056591315949, "grad_norm": 0.5071699516397944, "learning_rate": 6.93970175065165e-05, "loss": 11.9217, "step": 22442 }, { "epoch": 1.222111045312532, "grad_norm": 0.5443291129826455, "learning_rate": 6.938862245039904e-05, "loss": 12.0067, "step": 22443 }, { "epoch": 1.2221654993091149, "grad_norm": 0.5463068505259102, "learning_rate": 6.938022763232079e-05, "loss": 11.8455, "step": 22444 }, { "epoch": 1.2222199533056979, "grad_norm": 0.5480977159865835, "learning_rate": 6.937183305234705e-05, "loss": 11.9707, "step": 22445 }, { "epoch": 1.2222744073022809, "grad_norm": 0.5634515913441669, "learning_rate": 6.936343871054304e-05, "loss": 11.9748, "step": 22446 }, { "epoch": 1.2223288612988639, "grad_norm": 0.571882199790371, "learning_rate": 6.935504460697407e-05, "loss": 12.0599, "step": 22447 }, { "epoch": 1.2223833152954469, "grad_norm": 0.528124344999855, "learning_rate": 6.934665074170542e-05, "loss": 11.8866, "step": 22448 }, { "epoch": 1.2224377692920299, "grad_norm": 0.5836550455259616, "learning_rate": 6.933825711480236e-05, "loss": 11.9151, "step": 22449 }, { "epoch": 1.222492223288613, "grad_norm": 0.5056020976244936, "learning_rate": 6.932986372633012e-05, "loss": 11.9975, "step": 22450 }, { "epoch": 1.222546677285196, "grad_norm": 0.48986824900253506, "learning_rate": 6.932147057635403e-05, "loss": 12.004, "step": 22451 }, { "epoch": 1.222601131281779, "grad_norm": 0.5660171207746736, "learning_rate": 6.931307766493933e-05, "loss": 12.0485, "step": 22452 }, { "epoch": 1.222655585278362, "grad_norm": 0.6416172517087058, "learning_rate": 6.930468499215128e-05, "loss": 11.9987, "step": 22453 }, { "epoch": 1.222710039274945, "grad_norm": 0.5803503295819011, "learning_rate": 6.929629255805514e-05, "loss": 11.8998, "step": 22454 }, { "epoch": 1.222764493271528, "grad_norm": 0.524470433421093, "learning_rate": 6.928790036271622e-05, "loss": 12.0086, "step": 22455 }, { "epoch": 1.222818947268111, "grad_norm": 0.5916510426763025, "learning_rate": 6.92795084061997e-05, "loss": 11.8913, "step": 22456 }, { "epoch": 1.222873401264694, "grad_norm": 0.6499721457360714, "learning_rate": 6.927111668857088e-05, "loss": 12.0046, "step": 22457 }, { "epoch": 1.222927855261277, "grad_norm": 0.5795488762817579, "learning_rate": 6.926272520989501e-05, "loss": 12.0773, "step": 22458 }, { "epoch": 1.22298230925786, "grad_norm": 0.4982720873340564, "learning_rate": 6.925433397023734e-05, "loss": 11.9238, "step": 22459 }, { "epoch": 1.2230367632544432, "grad_norm": 0.7991892105498443, "learning_rate": 6.924594296966313e-05, "loss": 11.9147, "step": 22460 }, { "epoch": 1.2230912172510262, "grad_norm": 0.5443048221822511, "learning_rate": 6.923755220823759e-05, "loss": 11.9642, "step": 22461 }, { "epoch": 1.2231456712476092, "grad_norm": 0.5778090281931294, "learning_rate": 6.922916168602604e-05, "loss": 12.0044, "step": 22462 }, { "epoch": 1.2232001252441922, "grad_norm": 0.644367087982054, "learning_rate": 6.922077140309368e-05, "loss": 11.9334, "step": 22463 }, { "epoch": 1.2232545792407752, "grad_norm": 0.529193243856524, "learning_rate": 6.921238135950578e-05, "loss": 11.8468, "step": 22464 }, { "epoch": 1.2233090332373582, "grad_norm": 0.5449996993842378, "learning_rate": 6.92039915553276e-05, "loss": 11.8906, "step": 22465 }, { "epoch": 1.2233634872339412, "grad_norm": 0.5335279736405514, "learning_rate": 6.919560199062426e-05, "loss": 12.024, "step": 22466 }, { "epoch": 1.2234179412305242, "grad_norm": 0.7067743752537503, "learning_rate": 6.918721266546113e-05, "loss": 12.007, "step": 22467 }, { "epoch": 1.2234723952271072, "grad_norm": 0.6387622983406572, "learning_rate": 6.91788235799034e-05, "loss": 12.0687, "step": 22468 }, { "epoch": 1.2235268492236901, "grad_norm": 0.6232709815148468, "learning_rate": 6.917043473401631e-05, "loss": 11.9692, "step": 22469 }, { "epoch": 1.2235813032202731, "grad_norm": 0.5561707891930123, "learning_rate": 6.916204612786508e-05, "loss": 11.9515, "step": 22470 }, { "epoch": 1.2236357572168561, "grad_norm": 0.4914024925290726, "learning_rate": 6.915365776151495e-05, "loss": 11.924, "step": 22471 }, { "epoch": 1.2236902112134391, "grad_norm": 0.6152761621250927, "learning_rate": 6.914526963503116e-05, "loss": 12.0788, "step": 22472 }, { "epoch": 1.2237446652100223, "grad_norm": 0.5532885451751817, "learning_rate": 6.91368817484789e-05, "loss": 11.9162, "step": 22473 }, { "epoch": 1.2237991192066053, "grad_norm": 0.5519537101500993, "learning_rate": 6.912849410192343e-05, "loss": 11.8547, "step": 22474 }, { "epoch": 1.2238535732031883, "grad_norm": 0.5261638613155791, "learning_rate": 6.912010669543003e-05, "loss": 11.9965, "step": 22475 }, { "epoch": 1.2239080271997713, "grad_norm": 0.5554353324154613, "learning_rate": 6.911171952906381e-05, "loss": 11.9629, "step": 22476 }, { "epoch": 1.2239624811963543, "grad_norm": 0.5097970921595514, "learning_rate": 6.910333260289002e-05, "loss": 12.0675, "step": 22477 }, { "epoch": 1.2240169351929373, "grad_norm": 0.570485128699268, "learning_rate": 6.909494591697391e-05, "loss": 11.9794, "step": 22478 }, { "epoch": 1.2240713891895203, "grad_norm": 0.5508712214127788, "learning_rate": 6.908655947138069e-05, "loss": 11.9146, "step": 22479 }, { "epoch": 1.2241258431861033, "grad_norm": 0.583189624217757, "learning_rate": 6.907817326617558e-05, "loss": 12.0606, "step": 22480 }, { "epoch": 1.2241802971826863, "grad_norm": 0.5304849872975674, "learning_rate": 6.906978730142377e-05, "loss": 11.942, "step": 22481 }, { "epoch": 1.2242347511792693, "grad_norm": 0.5421844094714305, "learning_rate": 6.906140157719048e-05, "loss": 11.887, "step": 22482 }, { "epoch": 1.2242892051758525, "grad_norm": 0.5693422574327641, "learning_rate": 6.90530160935409e-05, "loss": 11.9978, "step": 22483 }, { "epoch": 1.2243436591724355, "grad_norm": 0.5567753428968648, "learning_rate": 6.904463085054028e-05, "loss": 11.9522, "step": 22484 }, { "epoch": 1.2243981131690185, "grad_norm": 0.5357986277554075, "learning_rate": 6.903624584825382e-05, "loss": 12.0713, "step": 22485 }, { "epoch": 1.2244525671656015, "grad_norm": 0.5721509995507145, "learning_rate": 6.902786108674668e-05, "loss": 12.022, "step": 22486 }, { "epoch": 1.2245070211621845, "grad_norm": 0.5412860233253918, "learning_rate": 6.901947656608409e-05, "loss": 11.8419, "step": 22487 }, { "epoch": 1.2245614751587675, "grad_norm": 0.5522774571518956, "learning_rate": 6.901109228633123e-05, "loss": 11.9114, "step": 22488 }, { "epoch": 1.2246159291553504, "grad_norm": 0.5264038505000138, "learning_rate": 6.900270824755333e-05, "loss": 11.9053, "step": 22489 }, { "epoch": 1.2246703831519334, "grad_norm": 0.5416035535578183, "learning_rate": 6.899432444981555e-05, "loss": 12.0428, "step": 22490 }, { "epoch": 1.2247248371485164, "grad_norm": 0.5632192382301364, "learning_rate": 6.898594089318313e-05, "loss": 11.7867, "step": 22491 }, { "epoch": 1.2247792911450994, "grad_norm": 0.5267971404154178, "learning_rate": 6.897755757772122e-05, "loss": 11.8979, "step": 22492 }, { "epoch": 1.2248337451416824, "grad_norm": 0.5483848947228833, "learning_rate": 6.896917450349502e-05, "loss": 12.0011, "step": 22493 }, { "epoch": 1.2248881991382654, "grad_norm": 0.5449668670560219, "learning_rate": 6.896079167056973e-05, "loss": 11.9352, "step": 22494 }, { "epoch": 1.2249426531348484, "grad_norm": 0.6690299927337976, "learning_rate": 6.895240907901056e-05, "loss": 12.0275, "step": 22495 }, { "epoch": 1.2249971071314316, "grad_norm": 0.5341100907560594, "learning_rate": 6.894402672888263e-05, "loss": 11.8851, "step": 22496 }, { "epoch": 1.2250515611280146, "grad_norm": 0.6020366719678959, "learning_rate": 6.893564462025116e-05, "loss": 11.8098, "step": 22497 }, { "epoch": 1.2251060151245976, "grad_norm": 0.6167733876103798, "learning_rate": 6.892726275318133e-05, "loss": 12.0455, "step": 22498 }, { "epoch": 1.2251604691211806, "grad_norm": 0.5388056884271457, "learning_rate": 6.891888112773828e-05, "loss": 11.9592, "step": 22499 }, { "epoch": 1.2252149231177636, "grad_norm": 0.552119657574981, "learning_rate": 6.891049974398727e-05, "loss": 11.9421, "step": 22500 }, { "epoch": 1.2252693771143466, "grad_norm": 0.5919754129219378, "learning_rate": 6.89021186019934e-05, "loss": 11.9928, "step": 22501 }, { "epoch": 1.2253238311109296, "grad_norm": 0.6203640101726013, "learning_rate": 6.889373770182189e-05, "loss": 12.0415, "step": 22502 }, { "epoch": 1.2253782851075126, "grad_norm": 0.6101859476466426, "learning_rate": 6.888535704353789e-05, "loss": 12.0055, "step": 22503 }, { "epoch": 1.2254327391040956, "grad_norm": 0.5680737713519539, "learning_rate": 6.887697662720655e-05, "loss": 11.971, "step": 22504 }, { "epoch": 1.2254871931006786, "grad_norm": 0.5394158768390048, "learning_rate": 6.886859645289312e-05, "loss": 11.9425, "step": 22505 }, { "epoch": 1.2255416470972618, "grad_norm": 0.5569811013637291, "learning_rate": 6.886021652066266e-05, "loss": 12.0418, "step": 22506 }, { "epoch": 1.2255961010938448, "grad_norm": 0.4913095262555458, "learning_rate": 6.885183683058037e-05, "loss": 11.9588, "step": 22507 }, { "epoch": 1.2256505550904278, "grad_norm": 0.5106557013586046, "learning_rate": 6.884345738271144e-05, "loss": 11.8817, "step": 22508 }, { "epoch": 1.2257050090870107, "grad_norm": 0.5293005486103188, "learning_rate": 6.883507817712099e-05, "loss": 11.9469, "step": 22509 }, { "epoch": 1.2257594630835937, "grad_norm": 0.5521016993068111, "learning_rate": 6.882669921387419e-05, "loss": 12.1155, "step": 22510 }, { "epoch": 1.2258139170801767, "grad_norm": 0.5996763306083485, "learning_rate": 6.881832049303622e-05, "loss": 11.9832, "step": 22511 }, { "epoch": 1.2258683710767597, "grad_norm": 0.49781124861363907, "learning_rate": 6.880994201467224e-05, "loss": 12.0111, "step": 22512 }, { "epoch": 1.2259228250733427, "grad_norm": 0.568234994183213, "learning_rate": 6.880156377884736e-05, "loss": 12.0161, "step": 22513 }, { "epoch": 1.2259772790699257, "grad_norm": 0.5972351646468848, "learning_rate": 6.879318578562674e-05, "loss": 11.9595, "step": 22514 }, { "epoch": 1.2260317330665087, "grad_norm": 0.6252271107783179, "learning_rate": 6.878480803507559e-05, "loss": 12.1492, "step": 22515 }, { "epoch": 1.2260861870630917, "grad_norm": 0.6310811557791951, "learning_rate": 6.877643052725898e-05, "loss": 11.9716, "step": 22516 }, { "epoch": 1.2261406410596747, "grad_norm": 0.5824550513346468, "learning_rate": 6.876805326224207e-05, "loss": 12.0157, "step": 22517 }, { "epoch": 1.2261950950562577, "grad_norm": 0.5659324624616336, "learning_rate": 6.875967624009002e-05, "loss": 11.9627, "step": 22518 }, { "epoch": 1.226249549052841, "grad_norm": 0.5534296008852613, "learning_rate": 6.875129946086797e-05, "loss": 11.8946, "step": 22519 }, { "epoch": 1.2263040030494239, "grad_norm": 0.5444844881677916, "learning_rate": 6.874292292464105e-05, "loss": 11.9176, "step": 22520 }, { "epoch": 1.2263584570460069, "grad_norm": 0.5676441010144988, "learning_rate": 6.87345466314744e-05, "loss": 11.958, "step": 22521 }, { "epoch": 1.2264129110425899, "grad_norm": 0.5618143800630517, "learning_rate": 6.872617058143315e-05, "loss": 11.8106, "step": 22522 }, { "epoch": 1.2264673650391729, "grad_norm": 0.5839632123162697, "learning_rate": 6.871779477458246e-05, "loss": 11.8015, "step": 22523 }, { "epoch": 1.2265218190357559, "grad_norm": 0.5790251698032695, "learning_rate": 6.870941921098745e-05, "loss": 11.9906, "step": 22524 }, { "epoch": 1.2265762730323388, "grad_norm": 0.5921096649894817, "learning_rate": 6.870104389071328e-05, "loss": 12.1944, "step": 22525 }, { "epoch": 1.2266307270289218, "grad_norm": 0.549904217895622, "learning_rate": 6.869266881382497e-05, "loss": 12.0126, "step": 22526 }, { "epoch": 1.2266851810255048, "grad_norm": 0.5672576721021367, "learning_rate": 6.868429398038774e-05, "loss": 11.9439, "step": 22527 }, { "epoch": 1.2267396350220878, "grad_norm": 0.5552647166083431, "learning_rate": 6.86759193904667e-05, "loss": 11.9606, "step": 22528 }, { "epoch": 1.2267940890186708, "grad_norm": 0.5234237128835707, "learning_rate": 6.866754504412696e-05, "loss": 11.9319, "step": 22529 }, { "epoch": 1.226848543015254, "grad_norm": 0.5150598652908518, "learning_rate": 6.865917094143365e-05, "loss": 11.8037, "step": 22530 }, { "epoch": 1.226902997011837, "grad_norm": 0.5196617226235651, "learning_rate": 6.865079708245188e-05, "loss": 12.0576, "step": 22531 }, { "epoch": 1.22695745100842, "grad_norm": 0.5557178074468829, "learning_rate": 6.864242346724677e-05, "loss": 11.9688, "step": 22532 }, { "epoch": 1.227011905005003, "grad_norm": 0.5535336575135944, "learning_rate": 6.86340500958834e-05, "loss": 12.0447, "step": 22533 }, { "epoch": 1.227066359001586, "grad_norm": 0.5763363214579711, "learning_rate": 6.862567696842694e-05, "loss": 12.0194, "step": 22534 }, { "epoch": 1.227120812998169, "grad_norm": 0.569721398763426, "learning_rate": 6.861730408494254e-05, "loss": 12.0445, "step": 22535 }, { "epoch": 1.227175266994752, "grad_norm": 0.5694185737185509, "learning_rate": 6.860893144549519e-05, "loss": 11.8702, "step": 22536 }, { "epoch": 1.227229720991335, "grad_norm": 0.524479808326079, "learning_rate": 6.860055905015003e-05, "loss": 11.8846, "step": 22537 }, { "epoch": 1.227284174987918, "grad_norm": 0.5434479798074577, "learning_rate": 6.85921868989722e-05, "loss": 11.9942, "step": 22538 }, { "epoch": 1.227338628984501, "grad_norm": 0.5475471972796995, "learning_rate": 6.858381499202682e-05, "loss": 11.9149, "step": 22539 }, { "epoch": 1.227393082981084, "grad_norm": 0.578268790355292, "learning_rate": 6.857544332937894e-05, "loss": 11.9532, "step": 22540 }, { "epoch": 1.227447536977667, "grad_norm": 0.5972913130155101, "learning_rate": 6.85670719110937e-05, "loss": 12.0852, "step": 22541 }, { "epoch": 1.22750199097425, "grad_norm": 0.5799477358588745, "learning_rate": 6.855870073723616e-05, "loss": 12.0768, "step": 22542 }, { "epoch": 1.2275564449708332, "grad_norm": 0.6316007411233439, "learning_rate": 6.855032980787146e-05, "loss": 12.0178, "step": 22543 }, { "epoch": 1.2276108989674162, "grad_norm": 0.5529893064266413, "learning_rate": 6.854195912306463e-05, "loss": 11.7676, "step": 22544 }, { "epoch": 1.2276653529639991, "grad_norm": 0.5453360472387511, "learning_rate": 6.853358868288082e-05, "loss": 11.8291, "step": 22545 }, { "epoch": 1.2277198069605821, "grad_norm": 0.5922061215016341, "learning_rate": 6.852521848738515e-05, "loss": 11.9044, "step": 22546 }, { "epoch": 1.2277742609571651, "grad_norm": 0.5260442546307095, "learning_rate": 6.851684853664262e-05, "loss": 11.9625, "step": 22547 }, { "epoch": 1.2278287149537481, "grad_norm": 0.6544268062678831, "learning_rate": 6.850847883071832e-05, "loss": 11.9651, "step": 22548 }, { "epoch": 1.2278831689503311, "grad_norm": 0.5514728833294227, "learning_rate": 6.850010936967742e-05, "loss": 11.8761, "step": 22549 }, { "epoch": 1.227937622946914, "grad_norm": 0.534652900630826, "learning_rate": 6.849174015358493e-05, "loss": 11.9961, "step": 22550 }, { "epoch": 1.227992076943497, "grad_norm": 0.5620353073739606, "learning_rate": 6.848337118250596e-05, "loss": 12.0729, "step": 22551 }, { "epoch": 1.22804653094008, "grad_norm": 0.5092306169240086, "learning_rate": 6.847500245650558e-05, "loss": 11.8961, "step": 22552 }, { "epoch": 1.2281009849366633, "grad_norm": 0.5395441372993375, "learning_rate": 6.846663397564885e-05, "loss": 11.9984, "step": 22553 }, { "epoch": 1.2281554389332463, "grad_norm": 0.555298253458449, "learning_rate": 6.845826574000089e-05, "loss": 11.9878, "step": 22554 }, { "epoch": 1.2282098929298293, "grad_norm": 0.5385689252278301, "learning_rate": 6.844989774962671e-05, "loss": 11.8666, "step": 22555 }, { "epoch": 1.2282643469264123, "grad_norm": 0.6204148714636181, "learning_rate": 6.844153000459147e-05, "loss": 11.9007, "step": 22556 }, { "epoch": 1.2283188009229953, "grad_norm": 0.5367444221664323, "learning_rate": 6.843316250496017e-05, "loss": 11.9166, "step": 22557 }, { "epoch": 1.2283732549195783, "grad_norm": 0.5213410503064586, "learning_rate": 6.842479525079787e-05, "loss": 12.0957, "step": 22558 }, { "epoch": 1.2284277089161613, "grad_norm": 0.5080702699495391, "learning_rate": 6.841642824216965e-05, "loss": 11.8988, "step": 22559 }, { "epoch": 1.2284821629127443, "grad_norm": 0.48398772506905424, "learning_rate": 6.840806147914058e-05, "loss": 11.9082, "step": 22560 }, { "epoch": 1.2285366169093273, "grad_norm": 0.5325699704306123, "learning_rate": 6.839969496177574e-05, "loss": 11.9781, "step": 22561 }, { "epoch": 1.2285910709059102, "grad_norm": 0.5933961282585728, "learning_rate": 6.839132869014017e-05, "loss": 12.106, "step": 22562 }, { "epoch": 1.2286455249024932, "grad_norm": 0.5650542432538881, "learning_rate": 6.838296266429893e-05, "loss": 11.9166, "step": 22563 }, { "epoch": 1.2286999788990762, "grad_norm": 0.5643171401506263, "learning_rate": 6.837459688431707e-05, "loss": 11.9447, "step": 22564 }, { "epoch": 1.2287544328956592, "grad_norm": 0.5603946865686427, "learning_rate": 6.836623135025964e-05, "loss": 11.9263, "step": 22565 }, { "epoch": 1.2288088868922424, "grad_norm": 0.5456345787971714, "learning_rate": 6.835786606219174e-05, "loss": 12.0358, "step": 22566 }, { "epoch": 1.2288633408888254, "grad_norm": 0.535565212057959, "learning_rate": 6.834950102017834e-05, "loss": 11.9318, "step": 22567 }, { "epoch": 1.2289177948854084, "grad_norm": 0.610236385075074, "learning_rate": 6.834113622428455e-05, "loss": 11.9999, "step": 22568 }, { "epoch": 1.2289722488819914, "grad_norm": 0.579675211951449, "learning_rate": 6.833277167457536e-05, "loss": 12.0322, "step": 22569 }, { "epoch": 1.2290267028785744, "grad_norm": 0.5672237234485954, "learning_rate": 6.832440737111589e-05, "loss": 11.8957, "step": 22570 }, { "epoch": 1.2290811568751574, "grad_norm": 0.6164650484087583, "learning_rate": 6.831604331397109e-05, "loss": 11.9038, "step": 22571 }, { "epoch": 1.2291356108717404, "grad_norm": 0.5123542532218338, "learning_rate": 6.830767950320608e-05, "loss": 11.9202, "step": 22572 }, { "epoch": 1.2291900648683234, "grad_norm": 0.6149653410400626, "learning_rate": 6.829931593888588e-05, "loss": 11.9309, "step": 22573 }, { "epoch": 1.2292445188649064, "grad_norm": 0.49101752482147454, "learning_rate": 6.82909526210755e-05, "loss": 11.8955, "step": 22574 }, { "epoch": 1.2292989728614894, "grad_norm": 0.5521005653498695, "learning_rate": 6.828258954984e-05, "loss": 11.9297, "step": 22575 }, { "epoch": 1.2293534268580726, "grad_norm": 0.5256936737272673, "learning_rate": 6.82742267252444e-05, "loss": 11.9761, "step": 22576 }, { "epoch": 1.2294078808546556, "grad_norm": 0.5728446845923955, "learning_rate": 6.826586414735374e-05, "loss": 11.9677, "step": 22577 }, { "epoch": 1.2294623348512386, "grad_norm": 0.5371594106111425, "learning_rate": 6.825750181623302e-05, "loss": 12.0253, "step": 22578 }, { "epoch": 1.2295167888478216, "grad_norm": 0.5341225782071433, "learning_rate": 6.82491397319473e-05, "loss": 11.9273, "step": 22579 }, { "epoch": 1.2295712428444046, "grad_norm": 0.5874889172319521, "learning_rate": 6.82407778945616e-05, "loss": 11.8723, "step": 22580 }, { "epoch": 1.2296256968409875, "grad_norm": 0.5269104334742466, "learning_rate": 6.823241630414095e-05, "loss": 11.9875, "step": 22581 }, { "epoch": 1.2296801508375705, "grad_norm": 0.5422815228069731, "learning_rate": 6.822405496075031e-05, "loss": 12.0433, "step": 22582 }, { "epoch": 1.2297346048341535, "grad_norm": 0.568900395477977, "learning_rate": 6.821569386445478e-05, "loss": 11.9877, "step": 22583 }, { "epoch": 1.2297890588307365, "grad_norm": 0.48023974666047436, "learning_rate": 6.820733301531935e-05, "loss": 11.8652, "step": 22584 }, { "epoch": 1.2298435128273195, "grad_norm": 0.5149114235042755, "learning_rate": 6.819897241340902e-05, "loss": 11.9277, "step": 22585 }, { "epoch": 1.2298979668239025, "grad_norm": 0.5066632032578524, "learning_rate": 6.819061205878883e-05, "loss": 11.9283, "step": 22586 }, { "epoch": 1.2299524208204855, "grad_norm": 0.5299972732180512, "learning_rate": 6.818225195152376e-05, "loss": 11.9157, "step": 22587 }, { "epoch": 1.2300068748170685, "grad_norm": 0.4802202635590141, "learning_rate": 6.817389209167883e-05, "loss": 11.8945, "step": 22588 }, { "epoch": 1.2300613288136517, "grad_norm": 0.5332295352909948, "learning_rate": 6.816553247931907e-05, "loss": 11.9258, "step": 22589 }, { "epoch": 1.2301157828102347, "grad_norm": 0.5719248548778227, "learning_rate": 6.815717311450947e-05, "loss": 11.8852, "step": 22590 }, { "epoch": 1.2301702368068177, "grad_norm": 0.6258691862808952, "learning_rate": 6.814881399731501e-05, "loss": 12.0193, "step": 22591 }, { "epoch": 1.2302246908034007, "grad_norm": 0.5201591560747867, "learning_rate": 6.814045512780072e-05, "loss": 11.9122, "step": 22592 }, { "epoch": 1.2302791447999837, "grad_norm": 0.5890835281633671, "learning_rate": 6.813209650603158e-05, "loss": 11.9035, "step": 22593 }, { "epoch": 1.2303335987965667, "grad_norm": 0.6592927364606244, "learning_rate": 6.812373813207262e-05, "loss": 11.9232, "step": 22594 }, { "epoch": 1.2303880527931497, "grad_norm": 0.5377431673788448, "learning_rate": 6.81153800059888e-05, "loss": 11.8767, "step": 22595 }, { "epoch": 1.2304425067897327, "grad_norm": 0.4924899511594738, "learning_rate": 6.810702212784521e-05, "loss": 11.8827, "step": 22596 }, { "epoch": 1.2304969607863157, "grad_norm": 0.5719182262177606, "learning_rate": 6.809866449770668e-05, "loss": 11.8717, "step": 22597 }, { "epoch": 1.2305514147828986, "grad_norm": 0.5529520024374325, "learning_rate": 6.809030711563831e-05, "loss": 11.9094, "step": 22598 }, { "epoch": 1.2306058687794816, "grad_norm": 0.5762758018285343, "learning_rate": 6.808194998170505e-05, "loss": 11.9475, "step": 22599 }, { "epoch": 1.2306603227760649, "grad_norm": 0.4990905240149167, "learning_rate": 6.807359309597192e-05, "loss": 11.9709, "step": 22600 }, { "epoch": 1.2307147767726478, "grad_norm": 0.5124147148330038, "learning_rate": 6.806523645850387e-05, "loss": 11.9831, "step": 22601 }, { "epoch": 1.2307692307692308, "grad_norm": 0.5695992152209984, "learning_rate": 6.805688006936591e-05, "loss": 11.8572, "step": 22602 }, { "epoch": 1.2308236847658138, "grad_norm": 0.6755224310305814, "learning_rate": 6.804852392862299e-05, "loss": 11.8961, "step": 22603 }, { "epoch": 1.2308781387623968, "grad_norm": 0.546045519852647, "learning_rate": 6.80401680363401e-05, "loss": 12.0278, "step": 22604 }, { "epoch": 1.2309325927589798, "grad_norm": 0.5436839481613359, "learning_rate": 6.803181239258223e-05, "loss": 11.9962, "step": 22605 }, { "epoch": 1.2309870467555628, "grad_norm": 0.5958397610471744, "learning_rate": 6.80234569974144e-05, "loss": 12.0153, "step": 22606 }, { "epoch": 1.2310415007521458, "grad_norm": 0.5994537104580747, "learning_rate": 6.801510185090148e-05, "loss": 12.0225, "step": 22607 }, { "epoch": 1.2310959547487288, "grad_norm": 0.5257403281060432, "learning_rate": 6.800674695310848e-05, "loss": 11.9313, "step": 22608 }, { "epoch": 1.2311504087453118, "grad_norm": 0.5503936262250163, "learning_rate": 6.79983923041004e-05, "loss": 12.1216, "step": 22609 }, { "epoch": 1.2312048627418948, "grad_norm": 0.515757488444049, "learning_rate": 6.799003790394218e-05, "loss": 11.9001, "step": 22610 }, { "epoch": 1.2312593167384778, "grad_norm": 0.6445319175777956, "learning_rate": 6.798168375269881e-05, "loss": 11.8449, "step": 22611 }, { "epoch": 1.2313137707350608, "grad_norm": 0.6110454760683547, "learning_rate": 6.797332985043524e-05, "loss": 12.0362, "step": 22612 }, { "epoch": 1.231368224731644, "grad_norm": 0.5364761671587409, "learning_rate": 6.796497619721641e-05, "loss": 11.9972, "step": 22613 }, { "epoch": 1.231422678728227, "grad_norm": 0.6156344582695163, "learning_rate": 6.79566227931073e-05, "loss": 11.9898, "step": 22614 }, { "epoch": 1.23147713272481, "grad_norm": 0.5757783441910294, "learning_rate": 6.794826963817284e-05, "loss": 12.0149, "step": 22615 }, { "epoch": 1.231531586721393, "grad_norm": 0.5549759110624761, "learning_rate": 6.793991673247808e-05, "loss": 11.9959, "step": 22616 }, { "epoch": 1.231586040717976, "grad_norm": 0.4844406136239094, "learning_rate": 6.793156407608788e-05, "loss": 11.8264, "step": 22617 }, { "epoch": 1.231640494714559, "grad_norm": 0.5388303063570763, "learning_rate": 6.792321166906718e-05, "loss": 11.9343, "step": 22618 }, { "epoch": 1.231694948711142, "grad_norm": 0.5448056529738672, "learning_rate": 6.791485951148098e-05, "loss": 11.9791, "step": 22619 }, { "epoch": 1.231749402707725, "grad_norm": 0.5034495799225918, "learning_rate": 6.79065076033942e-05, "loss": 11.9938, "step": 22620 }, { "epoch": 1.231803856704308, "grad_norm": 0.5739551857827964, "learning_rate": 6.789815594487181e-05, "loss": 11.9997, "step": 22621 }, { "epoch": 1.231858310700891, "grad_norm": 0.5570374439432412, "learning_rate": 6.788980453597874e-05, "loss": 11.9231, "step": 22622 }, { "epoch": 1.2319127646974741, "grad_norm": 0.5414628401592906, "learning_rate": 6.788145337677996e-05, "loss": 12.04, "step": 22623 }, { "epoch": 1.2319672186940571, "grad_norm": 0.592957815956549, "learning_rate": 6.787310246734036e-05, "loss": 12.0072, "step": 22624 }, { "epoch": 1.2320216726906401, "grad_norm": 0.5401153412064662, "learning_rate": 6.786475180772492e-05, "loss": 12.0457, "step": 22625 }, { "epoch": 1.232076126687223, "grad_norm": 0.5082280335715815, "learning_rate": 6.785640139799856e-05, "loss": 11.9912, "step": 22626 }, { "epoch": 1.232130580683806, "grad_norm": 0.6033043900846138, "learning_rate": 6.78480512382262e-05, "loss": 12.0248, "step": 22627 }, { "epoch": 1.232185034680389, "grad_norm": 0.5695370069701157, "learning_rate": 6.78397013284728e-05, "loss": 11.861, "step": 22628 }, { "epoch": 1.232239488676972, "grad_norm": 0.5128287328816599, "learning_rate": 6.783135166880326e-05, "loss": 11.9754, "step": 22629 }, { "epoch": 1.232293942673555, "grad_norm": 0.5195913340414557, "learning_rate": 6.782300225928253e-05, "loss": 11.8701, "step": 22630 }, { "epoch": 1.232348396670138, "grad_norm": 0.5027984525122968, "learning_rate": 6.78146530999755e-05, "loss": 11.8952, "step": 22631 }, { "epoch": 1.232402850666721, "grad_norm": 0.5218699466640082, "learning_rate": 6.780630419094715e-05, "loss": 11.858, "step": 22632 }, { "epoch": 1.232457304663304, "grad_norm": 0.5555496894314489, "learning_rate": 6.779795553226239e-05, "loss": 11.9742, "step": 22633 }, { "epoch": 1.232511758659887, "grad_norm": 0.5483631381595321, "learning_rate": 6.778960712398612e-05, "loss": 11.8262, "step": 22634 }, { "epoch": 1.23256621265647, "grad_norm": 0.6204895092534077, "learning_rate": 6.778125896618326e-05, "loss": 12.127, "step": 22635 }, { "epoch": 1.2326206666530533, "grad_norm": 0.5007419609052064, "learning_rate": 6.777291105891876e-05, "loss": 11.8728, "step": 22636 }, { "epoch": 1.2326751206496362, "grad_norm": 0.5545802228894495, "learning_rate": 6.77645634022575e-05, "loss": 11.7296, "step": 22637 }, { "epoch": 1.2327295746462192, "grad_norm": 0.5471509780195176, "learning_rate": 6.775621599626438e-05, "loss": 12.0449, "step": 22638 }, { "epoch": 1.2327840286428022, "grad_norm": 0.5134534463126355, "learning_rate": 6.774786884100435e-05, "loss": 11.8806, "step": 22639 }, { "epoch": 1.2328384826393852, "grad_norm": 0.5137325719406078, "learning_rate": 6.773952193654228e-05, "loss": 11.828, "step": 22640 }, { "epoch": 1.2328929366359682, "grad_norm": 0.5495636242292773, "learning_rate": 6.77311752829431e-05, "loss": 12.1211, "step": 22641 }, { "epoch": 1.2329473906325512, "grad_norm": 0.4992174580434379, "learning_rate": 6.772282888027171e-05, "loss": 11.7942, "step": 22642 }, { "epoch": 1.2330018446291342, "grad_norm": 0.5063635264465519, "learning_rate": 6.771448272859302e-05, "loss": 11.9813, "step": 22643 }, { "epoch": 1.2330562986257172, "grad_norm": 0.5161442399417954, "learning_rate": 6.770613682797193e-05, "loss": 11.9526, "step": 22644 }, { "epoch": 1.2331107526223002, "grad_norm": 0.5498698779672171, "learning_rate": 6.769779117847335e-05, "loss": 11.9349, "step": 22645 }, { "epoch": 1.2331652066188834, "grad_norm": 0.5908461002668517, "learning_rate": 6.768944578016217e-05, "loss": 12.0561, "step": 22646 }, { "epoch": 1.2332196606154664, "grad_norm": 0.5286359439660577, "learning_rate": 6.768110063310325e-05, "loss": 11.79, "step": 22647 }, { "epoch": 1.2332741146120494, "grad_norm": 0.5591648237871142, "learning_rate": 6.767275573736152e-05, "loss": 11.9224, "step": 22648 }, { "epoch": 1.2333285686086324, "grad_norm": 0.5916860885641544, "learning_rate": 6.766441109300187e-05, "loss": 12.0007, "step": 22649 }, { "epoch": 1.2333830226052154, "grad_norm": 0.5530588051323196, "learning_rate": 6.765606670008917e-05, "loss": 11.9313, "step": 22650 }, { "epoch": 1.2334374766017984, "grad_norm": 0.5291355069703004, "learning_rate": 6.764772255868834e-05, "loss": 12.0127, "step": 22651 }, { "epoch": 1.2334919305983814, "grad_norm": 0.5586532283395358, "learning_rate": 6.763937866886422e-05, "loss": 11.9683, "step": 22652 }, { "epoch": 1.2335463845949644, "grad_norm": 0.631528253563068, "learning_rate": 6.763103503068171e-05, "loss": 11.8984, "step": 22653 }, { "epoch": 1.2336008385915473, "grad_norm": 0.5216279564163208, "learning_rate": 6.762269164420572e-05, "loss": 11.8951, "step": 22654 }, { "epoch": 1.2336552925881303, "grad_norm": 0.5698621996284772, "learning_rate": 6.76143485095011e-05, "loss": 11.7775, "step": 22655 }, { "epoch": 1.2337097465847133, "grad_norm": 0.5700367434440197, "learning_rate": 6.760600562663279e-05, "loss": 11.982, "step": 22656 }, { "epoch": 1.2337642005812963, "grad_norm": 0.5284410018572115, "learning_rate": 6.759766299566554e-05, "loss": 11.8915, "step": 22657 }, { "epoch": 1.2338186545778793, "grad_norm": 0.587121349461119, "learning_rate": 6.758932061666431e-05, "loss": 12.0293, "step": 22658 }, { "epoch": 1.2338731085744625, "grad_norm": 0.5086158498825752, "learning_rate": 6.758097848969397e-05, "loss": 11.8235, "step": 22659 }, { "epoch": 1.2339275625710455, "grad_norm": 0.5458134683974727, "learning_rate": 6.75726366148194e-05, "loss": 11.7725, "step": 22660 }, { "epoch": 1.2339820165676285, "grad_norm": 0.516343793279754, "learning_rate": 6.756429499210541e-05, "loss": 11.9901, "step": 22661 }, { "epoch": 1.2340364705642115, "grad_norm": 0.5301177127299678, "learning_rate": 6.755595362161693e-05, "loss": 11.984, "step": 22662 }, { "epoch": 1.2340909245607945, "grad_norm": 0.5508190832135074, "learning_rate": 6.754761250341879e-05, "loss": 11.9713, "step": 22663 }, { "epoch": 1.2341453785573775, "grad_norm": 0.5372543443815975, "learning_rate": 6.753927163757581e-05, "loss": 11.9204, "step": 22664 }, { "epoch": 1.2341998325539605, "grad_norm": 0.5476867763403248, "learning_rate": 6.753093102415295e-05, "loss": 11.9879, "step": 22665 }, { "epoch": 1.2342542865505435, "grad_norm": 0.5621986188289442, "learning_rate": 6.752259066321504e-05, "loss": 12.0198, "step": 22666 }, { "epoch": 1.2343087405471265, "grad_norm": 0.5251636266109724, "learning_rate": 6.751425055482688e-05, "loss": 12.0163, "step": 22667 }, { "epoch": 1.2343631945437095, "grad_norm": 0.5277914289651588, "learning_rate": 6.750591069905334e-05, "loss": 11.8987, "step": 22668 }, { "epoch": 1.2344176485402927, "grad_norm": 0.5784950361547624, "learning_rate": 6.74975710959593e-05, "loss": 11.8538, "step": 22669 }, { "epoch": 1.2344721025368757, "grad_norm": 0.5752232794584748, "learning_rate": 6.748923174560958e-05, "loss": 11.9443, "step": 22670 }, { "epoch": 1.2345265565334587, "grad_norm": 0.4871636448689952, "learning_rate": 6.748089264806907e-05, "loss": 11.984, "step": 22671 }, { "epoch": 1.2345810105300417, "grad_norm": 0.5971699616649676, "learning_rate": 6.747255380340257e-05, "loss": 11.897, "step": 22672 }, { "epoch": 1.2346354645266246, "grad_norm": 0.5349004302448748, "learning_rate": 6.746421521167497e-05, "loss": 12.0665, "step": 22673 }, { "epoch": 1.2346899185232076, "grad_norm": 0.5500605269688773, "learning_rate": 6.745587687295108e-05, "loss": 11.8755, "step": 22674 }, { "epoch": 1.2347443725197906, "grad_norm": 0.590098038149082, "learning_rate": 6.744753878729574e-05, "loss": 11.855, "step": 22675 }, { "epoch": 1.2347988265163736, "grad_norm": 0.524694545292558, "learning_rate": 6.743920095477383e-05, "loss": 11.8882, "step": 22676 }, { "epoch": 1.2348532805129566, "grad_norm": 0.5093109039751735, "learning_rate": 6.743086337545012e-05, "loss": 11.7996, "step": 22677 }, { "epoch": 1.2349077345095396, "grad_norm": 0.5393763795827721, "learning_rate": 6.742252604938949e-05, "loss": 11.9541, "step": 22678 }, { "epoch": 1.2349621885061226, "grad_norm": 0.5886730619525234, "learning_rate": 6.741418897665675e-05, "loss": 11.9881, "step": 22679 }, { "epoch": 1.2350166425027056, "grad_norm": 0.5932752272406046, "learning_rate": 6.740585215731674e-05, "loss": 12.0575, "step": 22680 }, { "epoch": 1.2350710964992886, "grad_norm": 0.5251342499524592, "learning_rate": 6.73975155914343e-05, "loss": 12.0279, "step": 22681 }, { "epoch": 1.2351255504958716, "grad_norm": 0.5798823011980077, "learning_rate": 6.738917927907424e-05, "loss": 11.9904, "step": 22682 }, { "epoch": 1.2351800044924548, "grad_norm": 0.5514062392490326, "learning_rate": 6.73808432203014e-05, "loss": 11.9444, "step": 22683 }, { "epoch": 1.2352344584890378, "grad_norm": 0.5154261983976824, "learning_rate": 6.737250741518058e-05, "loss": 12.0411, "step": 22684 }, { "epoch": 1.2352889124856208, "grad_norm": 0.5192587201413797, "learning_rate": 6.736417186377663e-05, "loss": 11.9051, "step": 22685 }, { "epoch": 1.2353433664822038, "grad_norm": 0.5458591462569395, "learning_rate": 6.735583656615434e-05, "loss": 11.8986, "step": 22686 }, { "epoch": 1.2353978204787868, "grad_norm": 0.5108083682756132, "learning_rate": 6.734750152237856e-05, "loss": 11.988, "step": 22687 }, { "epoch": 1.2354522744753698, "grad_norm": 0.5506524263833593, "learning_rate": 6.733916673251408e-05, "loss": 12.0789, "step": 22688 }, { "epoch": 1.2355067284719528, "grad_norm": 0.5131917728864864, "learning_rate": 6.73308321966257e-05, "loss": 11.963, "step": 22689 }, { "epoch": 1.2355611824685357, "grad_norm": 0.5439987044445266, "learning_rate": 6.732249791477825e-05, "loss": 11.9902, "step": 22690 }, { "epoch": 1.2356156364651187, "grad_norm": 0.5099899001935355, "learning_rate": 6.731416388703652e-05, "loss": 12.0014, "step": 22691 }, { "epoch": 1.2356700904617017, "grad_norm": 0.5273773552932229, "learning_rate": 6.730583011346536e-05, "loss": 11.9103, "step": 22692 }, { "epoch": 1.235724544458285, "grad_norm": 0.530539161167646, "learning_rate": 6.729749659412954e-05, "loss": 11.9131, "step": 22693 }, { "epoch": 1.235778998454868, "grad_norm": 0.500679073640153, "learning_rate": 6.728916332909387e-05, "loss": 11.9508, "step": 22694 }, { "epoch": 1.235833452451451, "grad_norm": 0.5657946855188445, "learning_rate": 6.728083031842315e-05, "loss": 11.8095, "step": 22695 }, { "epoch": 1.235887906448034, "grad_norm": 0.5454305197094377, "learning_rate": 6.727249756218219e-05, "loss": 11.8639, "step": 22696 }, { "epoch": 1.235942360444617, "grad_norm": 0.5170051698920389, "learning_rate": 6.726416506043578e-05, "loss": 11.9299, "step": 22697 }, { "epoch": 1.2359968144412, "grad_norm": 0.5403322015639688, "learning_rate": 6.725583281324871e-05, "loss": 11.993, "step": 22698 }, { "epoch": 1.236051268437783, "grad_norm": 0.5114468184722819, "learning_rate": 6.724750082068576e-05, "loss": 11.9085, "step": 22699 }, { "epoch": 1.236105722434366, "grad_norm": 0.5425623936775001, "learning_rate": 6.723916908281174e-05, "loss": 11.9386, "step": 22700 }, { "epoch": 1.2361601764309489, "grad_norm": 0.5399263853903555, "learning_rate": 6.723083759969145e-05, "loss": 11.9577, "step": 22701 }, { "epoch": 1.2362146304275319, "grad_norm": 0.5799004326323731, "learning_rate": 6.722250637138963e-05, "loss": 11.8526, "step": 22702 }, { "epoch": 1.2362690844241149, "grad_norm": 0.5418963677157963, "learning_rate": 6.721417539797113e-05, "loss": 11.9665, "step": 22703 }, { "epoch": 1.2363235384206979, "grad_norm": 0.5320204572101112, "learning_rate": 6.720584467950068e-05, "loss": 11.8885, "step": 22704 }, { "epoch": 1.2363779924172809, "grad_norm": 0.5013538756593339, "learning_rate": 6.719751421604309e-05, "loss": 12.0037, "step": 22705 }, { "epoch": 1.236432446413864, "grad_norm": 0.6402306975556139, "learning_rate": 6.718918400766312e-05, "loss": 11.9172, "step": 22706 }, { "epoch": 1.236486900410447, "grad_norm": 0.5376631435166885, "learning_rate": 6.71808540544256e-05, "loss": 11.9102, "step": 22707 }, { "epoch": 1.23654135440703, "grad_norm": 0.5384022363175526, "learning_rate": 6.717252435639523e-05, "loss": 11.887, "step": 22708 }, { "epoch": 1.236595808403613, "grad_norm": 0.5905386507610483, "learning_rate": 6.716419491363681e-05, "loss": 12.0708, "step": 22709 }, { "epoch": 1.236650262400196, "grad_norm": 0.5622731857639514, "learning_rate": 6.715586572621512e-05, "loss": 11.9543, "step": 22710 }, { "epoch": 1.236704716396779, "grad_norm": 0.5143368789981935, "learning_rate": 6.714753679419495e-05, "loss": 11.9082, "step": 22711 }, { "epoch": 1.236759170393362, "grad_norm": 0.5170015509220544, "learning_rate": 6.713920811764101e-05, "loss": 11.7931, "step": 22712 }, { "epoch": 1.236813624389945, "grad_norm": 0.5738164528129192, "learning_rate": 6.713087969661808e-05, "loss": 11.9441, "step": 22713 }, { "epoch": 1.236868078386528, "grad_norm": 0.5163665854909195, "learning_rate": 6.712255153119098e-05, "loss": 12.0418, "step": 22714 }, { "epoch": 1.236922532383111, "grad_norm": 0.5418832478842345, "learning_rate": 6.711422362142443e-05, "loss": 11.9186, "step": 22715 }, { "epoch": 1.2369769863796942, "grad_norm": 0.5630243743266894, "learning_rate": 6.710589596738319e-05, "loss": 11.8954, "step": 22716 }, { "epoch": 1.2370314403762772, "grad_norm": 0.5540732424339679, "learning_rate": 6.709756856913203e-05, "loss": 12.0347, "step": 22717 }, { "epoch": 1.2370858943728602, "grad_norm": 0.5835943891047622, "learning_rate": 6.70892414267357e-05, "loss": 11.9687, "step": 22718 }, { "epoch": 1.2371403483694432, "grad_norm": 0.638789111867418, "learning_rate": 6.708091454025891e-05, "loss": 12.0823, "step": 22719 }, { "epoch": 1.2371948023660262, "grad_norm": 0.5060120714308841, "learning_rate": 6.707258790976647e-05, "loss": 11.8976, "step": 22720 }, { "epoch": 1.2372492563626092, "grad_norm": 0.5404010664963349, "learning_rate": 6.706426153532311e-05, "loss": 11.9189, "step": 22721 }, { "epoch": 1.2373037103591922, "grad_norm": 0.5313752114208501, "learning_rate": 6.705593541699358e-05, "loss": 11.8907, "step": 22722 }, { "epoch": 1.2373581643557752, "grad_norm": 0.5405744362055556, "learning_rate": 6.704760955484262e-05, "loss": 11.9977, "step": 22723 }, { "epoch": 1.2374126183523582, "grad_norm": 0.5053750280482343, "learning_rate": 6.703928394893496e-05, "loss": 11.916, "step": 22724 }, { "epoch": 1.2374670723489412, "grad_norm": 0.6332462231421013, "learning_rate": 6.703095859933534e-05, "loss": 12.0191, "step": 22725 }, { "epoch": 1.2375215263455241, "grad_norm": 0.5620715208301164, "learning_rate": 6.702263350610853e-05, "loss": 11.9745, "step": 22726 }, { "epoch": 1.2375759803421071, "grad_norm": 0.523419185547234, "learning_rate": 6.70143086693193e-05, "loss": 12.0299, "step": 22727 }, { "epoch": 1.2376304343386901, "grad_norm": 0.536929624789274, "learning_rate": 6.700598408903231e-05, "loss": 12.017, "step": 22728 }, { "epoch": 1.2376848883352733, "grad_norm": 0.5240904329169529, "learning_rate": 6.69976597653123e-05, "loss": 12.0377, "step": 22729 }, { "epoch": 1.2377393423318563, "grad_norm": 0.5417193831271322, "learning_rate": 6.698933569822401e-05, "loss": 11.9908, "step": 22730 }, { "epoch": 1.2377937963284393, "grad_norm": 0.5075968817592331, "learning_rate": 6.698101188783222e-05, "loss": 12.0386, "step": 22731 }, { "epoch": 1.2378482503250223, "grad_norm": 0.5439818920805917, "learning_rate": 6.697268833420159e-05, "loss": 11.9398, "step": 22732 }, { "epoch": 1.2379027043216053, "grad_norm": 0.540981612610113, "learning_rate": 6.69643650373969e-05, "loss": 11.966, "step": 22733 }, { "epoch": 1.2379571583181883, "grad_norm": 0.5410089949216403, "learning_rate": 6.695604199748282e-05, "loss": 11.9297, "step": 22734 }, { "epoch": 1.2380116123147713, "grad_norm": 0.5080515709221153, "learning_rate": 6.694771921452411e-05, "loss": 11.9432, "step": 22735 }, { "epoch": 1.2380660663113543, "grad_norm": 0.5701795735195642, "learning_rate": 6.693939668858547e-05, "loss": 11.9448, "step": 22736 }, { "epoch": 1.2381205203079373, "grad_norm": 0.5625459367180433, "learning_rate": 6.693107441973166e-05, "loss": 12.0541, "step": 22737 }, { "epoch": 1.2381749743045203, "grad_norm": 0.5421907134881676, "learning_rate": 6.692275240802734e-05, "loss": 11.8146, "step": 22738 }, { "epoch": 1.2382294283011035, "grad_norm": 0.5157314531188918, "learning_rate": 6.691443065353724e-05, "loss": 11.9038, "step": 22739 }, { "epoch": 1.2382838822976865, "grad_norm": 0.5451684293546573, "learning_rate": 6.690610915632605e-05, "loss": 11.9046, "step": 22740 }, { "epoch": 1.2383383362942695, "grad_norm": 0.5315778944147992, "learning_rate": 6.689778791645854e-05, "loss": 11.9785, "step": 22741 }, { "epoch": 1.2383927902908525, "grad_norm": 0.6178763847409804, "learning_rate": 6.688946693399938e-05, "loss": 11.9117, "step": 22742 }, { "epoch": 1.2384472442874355, "grad_norm": 0.6453867300968816, "learning_rate": 6.688114620901327e-05, "loss": 11.9758, "step": 22743 }, { "epoch": 1.2385016982840185, "grad_norm": 0.5725942618275108, "learning_rate": 6.687282574156492e-05, "loss": 11.9988, "step": 22744 }, { "epoch": 1.2385561522806015, "grad_norm": 0.5849737479201819, "learning_rate": 6.686450553171904e-05, "loss": 11.944, "step": 22745 }, { "epoch": 1.2386106062771844, "grad_norm": 0.5283609630593522, "learning_rate": 6.685618557954031e-05, "loss": 11.8822, "step": 22746 }, { "epoch": 1.2386650602737674, "grad_norm": 0.5740219832501973, "learning_rate": 6.684786588509346e-05, "loss": 11.9405, "step": 22747 }, { "epoch": 1.2387195142703504, "grad_norm": 0.4982715936990555, "learning_rate": 6.683954644844316e-05, "loss": 11.8322, "step": 22748 }, { "epoch": 1.2387739682669334, "grad_norm": 0.6233930846331157, "learning_rate": 6.683122726965409e-05, "loss": 11.988, "step": 22749 }, { "epoch": 1.2388284222635164, "grad_norm": 0.5632371456358553, "learning_rate": 6.682290834879096e-05, "loss": 11.8357, "step": 22750 }, { "epoch": 1.2388828762600994, "grad_norm": 0.5964121805149151, "learning_rate": 6.681458968591846e-05, "loss": 11.9755, "step": 22751 }, { "epoch": 1.2389373302566826, "grad_norm": 0.5756759926454542, "learning_rate": 6.680627128110129e-05, "loss": 11.9904, "step": 22752 }, { "epoch": 1.2389917842532656, "grad_norm": 0.5681878427286643, "learning_rate": 6.679795313440412e-05, "loss": 11.8841, "step": 22753 }, { "epoch": 1.2390462382498486, "grad_norm": 0.5422053691231631, "learning_rate": 6.678963524589162e-05, "loss": 11.959, "step": 22754 }, { "epoch": 1.2391006922464316, "grad_norm": 0.6214782310646312, "learning_rate": 6.67813176156285e-05, "loss": 12.027, "step": 22755 }, { "epoch": 1.2391551462430146, "grad_norm": 0.5894579910660742, "learning_rate": 6.67730002436794e-05, "loss": 11.9687, "step": 22756 }, { "epoch": 1.2392096002395976, "grad_norm": 0.48326707282042, "learning_rate": 6.676468313010907e-05, "loss": 11.9699, "step": 22757 }, { "epoch": 1.2392640542361806, "grad_norm": 0.5369041904412026, "learning_rate": 6.67563662749821e-05, "loss": 12.0289, "step": 22758 }, { "epoch": 1.2393185082327636, "grad_norm": 0.5745372613026676, "learning_rate": 6.674804967836321e-05, "loss": 11.8012, "step": 22759 }, { "epoch": 1.2393729622293466, "grad_norm": 0.5116956700294779, "learning_rate": 6.673973334031707e-05, "loss": 11.855, "step": 22760 }, { "epoch": 1.2394274162259296, "grad_norm": 0.5551668780314658, "learning_rate": 6.673141726090833e-05, "loss": 12.1515, "step": 22761 }, { "epoch": 1.2394818702225125, "grad_norm": 0.5104546891451653, "learning_rate": 6.672310144020163e-05, "loss": 11.8827, "step": 22762 }, { "epoch": 1.2395363242190958, "grad_norm": 0.5674099124673071, "learning_rate": 6.671478587826173e-05, "loss": 11.9037, "step": 22763 }, { "epoch": 1.2395907782156788, "grad_norm": 0.5461280995964353, "learning_rate": 6.670647057515322e-05, "loss": 11.9623, "step": 22764 }, { "epoch": 1.2396452322122617, "grad_norm": 0.5709435991393212, "learning_rate": 6.669815553094079e-05, "loss": 11.8487, "step": 22765 }, { "epoch": 1.2396996862088447, "grad_norm": 0.48453856223587394, "learning_rate": 6.668984074568908e-05, "loss": 11.881, "step": 22766 }, { "epoch": 1.2397541402054277, "grad_norm": 0.6409868095410967, "learning_rate": 6.668152621946276e-05, "loss": 11.9496, "step": 22767 }, { "epoch": 1.2398085942020107, "grad_norm": 0.5477782554657079, "learning_rate": 6.667321195232648e-05, "loss": 12.0103, "step": 22768 }, { "epoch": 1.2398630481985937, "grad_norm": 0.5040437193973005, "learning_rate": 6.666489794434487e-05, "loss": 11.8298, "step": 22769 }, { "epoch": 1.2399175021951767, "grad_norm": 0.5680760203946319, "learning_rate": 6.665658419558262e-05, "loss": 11.9437, "step": 22770 }, { "epoch": 1.2399719561917597, "grad_norm": 0.528085507850227, "learning_rate": 6.664827070610436e-05, "loss": 11.9805, "step": 22771 }, { "epoch": 1.2400264101883427, "grad_norm": 0.5444655791242297, "learning_rate": 6.663995747597475e-05, "loss": 11.8826, "step": 22772 }, { "epoch": 1.2400808641849257, "grad_norm": 0.503196129850154, "learning_rate": 6.66316445052584e-05, "loss": 11.9569, "step": 22773 }, { "epoch": 1.2401353181815087, "grad_norm": 0.4988060771487552, "learning_rate": 6.662333179401998e-05, "loss": 11.804, "step": 22774 }, { "epoch": 1.2401897721780917, "grad_norm": 0.5450478647484439, "learning_rate": 6.661501934232414e-05, "loss": 12.018, "step": 22775 }, { "epoch": 1.240244226174675, "grad_norm": 0.5498181916634691, "learning_rate": 6.660670715023551e-05, "loss": 11.8347, "step": 22776 }, { "epoch": 1.2402986801712579, "grad_norm": 0.5582544321055958, "learning_rate": 6.659839521781879e-05, "loss": 12.0097, "step": 22777 }, { "epoch": 1.2403531341678409, "grad_norm": 0.5152176707698568, "learning_rate": 6.659008354513844e-05, "loss": 11.7026, "step": 22778 }, { "epoch": 1.2404075881644239, "grad_norm": 0.5728070721846744, "learning_rate": 6.658177213225927e-05, "loss": 11.87, "step": 22779 }, { "epoch": 1.2404620421610069, "grad_norm": 0.5975612798629394, "learning_rate": 6.657346097924581e-05, "loss": 11.8497, "step": 22780 }, { "epoch": 1.2405164961575899, "grad_norm": 0.5670380370392075, "learning_rate": 6.656515008616275e-05, "loss": 12.0412, "step": 22781 }, { "epoch": 1.2405709501541728, "grad_norm": 0.5194776893845515, "learning_rate": 6.655683945307467e-05, "loss": 11.9909, "step": 22782 }, { "epoch": 1.2406254041507558, "grad_norm": 0.546511889568762, "learning_rate": 6.654852908004623e-05, "loss": 11.9428, "step": 22783 }, { "epoch": 1.2406798581473388, "grad_norm": 0.5292786096158895, "learning_rate": 6.654021896714204e-05, "loss": 11.9456, "step": 22784 }, { "epoch": 1.2407343121439218, "grad_norm": 0.5195446486772057, "learning_rate": 6.653190911442669e-05, "loss": 12.0358, "step": 22785 }, { "epoch": 1.240788766140505, "grad_norm": 0.5578130700424351, "learning_rate": 6.652359952196483e-05, "loss": 12.1051, "step": 22786 }, { "epoch": 1.240843220137088, "grad_norm": 0.5627174905235722, "learning_rate": 6.651529018982113e-05, "loss": 11.8365, "step": 22787 }, { "epoch": 1.240897674133671, "grad_norm": 0.5522797222027749, "learning_rate": 6.65069811180601e-05, "loss": 12.0269, "step": 22788 }, { "epoch": 1.240952128130254, "grad_norm": 0.5400125516162491, "learning_rate": 6.64986723067464e-05, "loss": 12.0219, "step": 22789 }, { "epoch": 1.241006582126837, "grad_norm": 0.5535811517532464, "learning_rate": 6.649036375594466e-05, "loss": 11.9918, "step": 22790 }, { "epoch": 1.24106103612342, "grad_norm": 0.5981970822688325, "learning_rate": 6.648205546571947e-05, "loss": 11.8923, "step": 22791 }, { "epoch": 1.241115490120003, "grad_norm": 0.49991588153224736, "learning_rate": 6.647374743613542e-05, "loss": 11.9389, "step": 22792 }, { "epoch": 1.241169944116586, "grad_norm": 0.5064208515861376, "learning_rate": 6.646543966725715e-05, "loss": 12.0333, "step": 22793 }, { "epoch": 1.241224398113169, "grad_norm": 0.5021847292174838, "learning_rate": 6.645713215914924e-05, "loss": 11.8361, "step": 22794 }, { "epoch": 1.241278852109752, "grad_norm": 0.5778181717137493, "learning_rate": 6.64488249118763e-05, "loss": 11.8868, "step": 22795 }, { "epoch": 1.241333306106335, "grad_norm": 0.47529978052258676, "learning_rate": 6.644051792550288e-05, "loss": 11.9687, "step": 22796 }, { "epoch": 1.241387760102918, "grad_norm": 0.5428177435372803, "learning_rate": 6.643221120009371e-05, "loss": 11.9814, "step": 22797 }, { "epoch": 1.241442214099501, "grad_norm": 0.5526512328224183, "learning_rate": 6.642390473571324e-05, "loss": 11.892, "step": 22798 }, { "epoch": 1.2414966680960842, "grad_norm": 0.5800933044027282, "learning_rate": 6.641559853242612e-05, "loss": 12.0973, "step": 22799 }, { "epoch": 1.2415511220926672, "grad_norm": 0.6170253827013614, "learning_rate": 6.640729259029692e-05, "loss": 11.9002, "step": 22800 }, { "epoch": 1.2416055760892502, "grad_norm": 0.5343101617382512, "learning_rate": 6.639898690939025e-05, "loss": 11.9431, "step": 22801 }, { "epoch": 1.2416600300858331, "grad_norm": 0.516671138329763, "learning_rate": 6.639068148977072e-05, "loss": 11.9978, "step": 22802 }, { "epoch": 1.2417144840824161, "grad_norm": 0.5553064455594036, "learning_rate": 6.638237633150288e-05, "loss": 12.0286, "step": 22803 }, { "epoch": 1.2417689380789991, "grad_norm": 0.5912290720511462, "learning_rate": 6.637407143465131e-05, "loss": 11.8816, "step": 22804 }, { "epoch": 1.2418233920755821, "grad_norm": 0.5343535316534611, "learning_rate": 6.63657667992806e-05, "loss": 11.7998, "step": 22805 }, { "epoch": 1.2418778460721651, "grad_norm": 0.5206141984023502, "learning_rate": 6.635746242545532e-05, "loss": 11.9438, "step": 22806 }, { "epoch": 1.241932300068748, "grad_norm": 0.6804279007851276, "learning_rate": 6.63491583132401e-05, "loss": 11.9349, "step": 22807 }, { "epoch": 1.241986754065331, "grad_norm": 0.5287261857381383, "learning_rate": 6.634085446269944e-05, "loss": 11.8697, "step": 22808 }, { "epoch": 1.2420412080619143, "grad_norm": 0.5142994957197141, "learning_rate": 6.633255087389793e-05, "loss": 11.9309, "step": 22809 }, { "epoch": 1.2420956620584973, "grad_norm": 0.6111271483717513, "learning_rate": 6.632424754690017e-05, "loss": 11.9755, "step": 22810 }, { "epoch": 1.2421501160550803, "grad_norm": 0.5223875794535703, "learning_rate": 6.631594448177066e-05, "loss": 11.9082, "step": 22811 }, { "epoch": 1.2422045700516633, "grad_norm": 0.5995704030578055, "learning_rate": 6.630764167857405e-05, "loss": 11.9782, "step": 22812 }, { "epoch": 1.2422590240482463, "grad_norm": 0.505948768567176, "learning_rate": 6.629933913737486e-05, "loss": 11.9421, "step": 22813 }, { "epoch": 1.2423134780448293, "grad_norm": 0.54299871443734, "learning_rate": 6.629103685823767e-05, "loss": 11.9526, "step": 22814 }, { "epoch": 1.2423679320414123, "grad_norm": 0.493709313312357, "learning_rate": 6.628273484122703e-05, "loss": 11.8863, "step": 22815 }, { "epoch": 1.2424223860379953, "grad_norm": 0.5821463873061075, "learning_rate": 6.62744330864075e-05, "loss": 11.8817, "step": 22816 }, { "epoch": 1.2424768400345783, "grad_norm": 0.5618459142456248, "learning_rate": 6.626613159384366e-05, "loss": 11.9891, "step": 22817 }, { "epoch": 1.2425312940311612, "grad_norm": 0.5702148586567803, "learning_rate": 6.62578303636e-05, "loss": 11.9193, "step": 22818 }, { "epoch": 1.2425857480277442, "grad_norm": 0.5470756108060282, "learning_rate": 6.624952939574111e-05, "loss": 12.0095, "step": 22819 }, { "epoch": 1.2426402020243272, "grad_norm": 0.5709138327143086, "learning_rate": 6.624122869033154e-05, "loss": 11.872, "step": 22820 }, { "epoch": 1.2426946560209102, "grad_norm": 0.5554455275669883, "learning_rate": 6.623292824743585e-05, "loss": 11.9505, "step": 22821 }, { "epoch": 1.2427491100174934, "grad_norm": 0.5779973836898594, "learning_rate": 6.622462806711857e-05, "loss": 12.0125, "step": 22822 }, { "epoch": 1.2428035640140764, "grad_norm": 0.5528729451071411, "learning_rate": 6.621632814944421e-05, "loss": 11.9637, "step": 22823 }, { "epoch": 1.2428580180106594, "grad_norm": 0.6124236927248429, "learning_rate": 6.620802849447738e-05, "loss": 12.025, "step": 22824 }, { "epoch": 1.2429124720072424, "grad_norm": 0.5262678844303744, "learning_rate": 6.61997291022826e-05, "loss": 11.8004, "step": 22825 }, { "epoch": 1.2429669260038254, "grad_norm": 0.5208419089742101, "learning_rate": 6.619142997292437e-05, "loss": 11.86, "step": 22826 }, { "epoch": 1.2430213800004084, "grad_norm": 0.5465260354290734, "learning_rate": 6.618313110646728e-05, "loss": 11.9544, "step": 22827 }, { "epoch": 1.2430758339969914, "grad_norm": 0.5774287225303286, "learning_rate": 6.617483250297582e-05, "loss": 12.0348, "step": 22828 }, { "epoch": 1.2431302879935744, "grad_norm": 0.6067544016343792, "learning_rate": 6.616653416251454e-05, "loss": 11.9491, "step": 22829 }, { "epoch": 1.2431847419901574, "grad_norm": 0.49681862261512644, "learning_rate": 6.615823608514794e-05, "loss": 12.0043, "step": 22830 }, { "epoch": 1.2432391959867404, "grad_norm": 0.5031150136579183, "learning_rate": 6.61499382709406e-05, "loss": 11.9967, "step": 22831 }, { "epoch": 1.2432936499833234, "grad_norm": 0.5081850415140976, "learning_rate": 6.6141640719957e-05, "loss": 11.9372, "step": 22832 }, { "epoch": 1.2433481039799066, "grad_norm": 0.6067099171071557, "learning_rate": 6.61333434322617e-05, "loss": 12.0013, "step": 22833 }, { "epoch": 1.2434025579764896, "grad_norm": 0.4989131758117803, "learning_rate": 6.612504640791914e-05, "loss": 11.8733, "step": 22834 }, { "epoch": 1.2434570119730726, "grad_norm": 0.5458558129514194, "learning_rate": 6.611674964699396e-05, "loss": 12.0634, "step": 22835 }, { "epoch": 1.2435114659696556, "grad_norm": 0.5371725161108272, "learning_rate": 6.61084531495506e-05, "loss": 11.7361, "step": 22836 }, { "epoch": 1.2435659199662386, "grad_norm": 0.5092776703288111, "learning_rate": 6.610015691565359e-05, "loss": 12.0028, "step": 22837 }, { "epoch": 1.2436203739628215, "grad_norm": 0.555539266294269, "learning_rate": 6.609186094536746e-05, "loss": 12.0336, "step": 22838 }, { "epoch": 1.2436748279594045, "grad_norm": 0.5857264935503956, "learning_rate": 6.60835652387567e-05, "loss": 12.0261, "step": 22839 }, { "epoch": 1.2437292819559875, "grad_norm": 0.5883447922003333, "learning_rate": 6.607526979588583e-05, "loss": 12.0262, "step": 22840 }, { "epoch": 1.2437837359525705, "grad_norm": 0.5495328691077903, "learning_rate": 6.606697461681934e-05, "loss": 11.9726, "step": 22841 }, { "epoch": 1.2438381899491535, "grad_norm": 0.6093131901575297, "learning_rate": 6.605867970162174e-05, "loss": 11.9135, "step": 22842 }, { "epoch": 1.2438926439457365, "grad_norm": 0.542856367570383, "learning_rate": 6.605038505035754e-05, "loss": 12.0268, "step": 22843 }, { "epoch": 1.2439470979423195, "grad_norm": 0.5403828018426662, "learning_rate": 6.604209066309124e-05, "loss": 11.9622, "step": 22844 }, { "epoch": 1.2440015519389025, "grad_norm": 0.5417773340777237, "learning_rate": 6.603379653988732e-05, "loss": 12.0037, "step": 22845 }, { "epoch": 1.2440560059354857, "grad_norm": 0.6233297521403925, "learning_rate": 6.602550268081031e-05, "loss": 12.0848, "step": 22846 }, { "epoch": 1.2441104599320687, "grad_norm": 0.5563424335303409, "learning_rate": 6.601720908592471e-05, "loss": 11.9075, "step": 22847 }, { "epoch": 1.2441649139286517, "grad_norm": 0.5498742965493167, "learning_rate": 6.600891575529501e-05, "loss": 11.9492, "step": 22848 }, { "epoch": 1.2442193679252347, "grad_norm": 0.5495562028184575, "learning_rate": 6.600062268898563e-05, "loss": 11.9896, "step": 22849 }, { "epoch": 1.2442738219218177, "grad_norm": 0.5960210216426902, "learning_rate": 6.599232988706112e-05, "loss": 12.032, "step": 22850 }, { "epoch": 1.2443282759184007, "grad_norm": 0.5405426947554883, "learning_rate": 6.598403734958596e-05, "loss": 11.881, "step": 22851 }, { "epoch": 1.2443827299149837, "grad_norm": 0.6180780284103252, "learning_rate": 6.597574507662463e-05, "loss": 12.0612, "step": 22852 }, { "epoch": 1.2444371839115667, "grad_norm": 0.5053292031113816, "learning_rate": 6.596745306824162e-05, "loss": 11.999, "step": 22853 }, { "epoch": 1.2444916379081497, "grad_norm": 0.6042403860038301, "learning_rate": 6.595916132450139e-05, "loss": 11.9975, "step": 22854 }, { "epoch": 1.2445460919047326, "grad_norm": 0.6292996176273324, "learning_rate": 6.595086984546844e-05, "loss": 11.965, "step": 22855 }, { "epoch": 1.2446005459013159, "grad_norm": 0.5859200357053135, "learning_rate": 6.594257863120721e-05, "loss": 12.0571, "step": 22856 }, { "epoch": 1.2446549998978989, "grad_norm": 0.5907369634070254, "learning_rate": 6.593428768178223e-05, "loss": 12.0495, "step": 22857 }, { "epoch": 1.2447094538944818, "grad_norm": 0.9430568370294133, "learning_rate": 6.592599699725796e-05, "loss": 11.966, "step": 22858 }, { "epoch": 1.2447639078910648, "grad_norm": 0.5372757406434971, "learning_rate": 6.591770657769884e-05, "loss": 12.0155, "step": 22859 }, { "epoch": 1.2448183618876478, "grad_norm": 0.5540848802763377, "learning_rate": 6.590941642316931e-05, "loss": 12.0618, "step": 22860 }, { "epoch": 1.2448728158842308, "grad_norm": 0.5110306054951284, "learning_rate": 6.590112653373391e-05, "loss": 11.9085, "step": 22861 }, { "epoch": 1.2449272698808138, "grad_norm": 0.5478302796397301, "learning_rate": 6.589283690945704e-05, "loss": 12.1195, "step": 22862 }, { "epoch": 1.2449817238773968, "grad_norm": 0.5953012209816627, "learning_rate": 6.588454755040322e-05, "loss": 11.8825, "step": 22863 }, { "epoch": 1.2450361778739798, "grad_norm": 0.6720573249958113, "learning_rate": 6.587625845663687e-05, "loss": 11.8381, "step": 22864 }, { "epoch": 1.2450906318705628, "grad_norm": 0.5689001755365769, "learning_rate": 6.586796962822247e-05, "loss": 11.9777, "step": 22865 }, { "epoch": 1.2451450858671458, "grad_norm": 0.5345723488025051, "learning_rate": 6.585968106522443e-05, "loss": 11.9733, "step": 22866 }, { "epoch": 1.2451995398637288, "grad_norm": 0.5711652832127669, "learning_rate": 6.585139276770724e-05, "loss": 11.9361, "step": 22867 }, { "epoch": 1.2452539938603118, "grad_norm": 0.6009085892003865, "learning_rate": 6.58431047357354e-05, "loss": 11.9393, "step": 22868 }, { "epoch": 1.245308447856895, "grad_norm": 0.5404850062028494, "learning_rate": 6.583481696937326e-05, "loss": 11.9827, "step": 22869 }, { "epoch": 1.245362901853478, "grad_norm": 0.5653755541851823, "learning_rate": 6.582652946868532e-05, "loss": 11.9453, "step": 22870 }, { "epoch": 1.245417355850061, "grad_norm": 0.5379684103837694, "learning_rate": 6.5818242233736e-05, "loss": 12.0339, "step": 22871 }, { "epoch": 1.245471809846644, "grad_norm": 0.5248870771654717, "learning_rate": 6.580995526458977e-05, "loss": 12.0152, "step": 22872 }, { "epoch": 1.245526263843227, "grad_norm": 0.5737092984332973, "learning_rate": 6.580166856131106e-05, "loss": 11.9311, "step": 22873 }, { "epoch": 1.24558071783981, "grad_norm": 0.48237775427526935, "learning_rate": 6.579338212396432e-05, "loss": 11.9401, "step": 22874 }, { "epoch": 1.245635171836393, "grad_norm": 0.5559850193376921, "learning_rate": 6.578509595261397e-05, "loss": 11.8752, "step": 22875 }, { "epoch": 1.245689625832976, "grad_norm": 0.4930299259343834, "learning_rate": 6.577681004732445e-05, "loss": 11.7788, "step": 22876 }, { "epoch": 1.245744079829559, "grad_norm": 0.5179040222610088, "learning_rate": 6.576852440816022e-05, "loss": 11.9134, "step": 22877 }, { "epoch": 1.245798533826142, "grad_norm": 0.510126945991866, "learning_rate": 6.576023903518568e-05, "loss": 11.8957, "step": 22878 }, { "epoch": 1.2458529878227251, "grad_norm": 0.5671174775740991, "learning_rate": 6.575195392846525e-05, "loss": 11.969, "step": 22879 }, { "epoch": 1.2459074418193081, "grad_norm": 0.5147618181834822, "learning_rate": 6.574366908806337e-05, "loss": 11.9845, "step": 22880 }, { "epoch": 1.2459618958158911, "grad_norm": 0.507930474647851, "learning_rate": 6.573538451404446e-05, "loss": 11.9498, "step": 22881 }, { "epoch": 1.2460163498124741, "grad_norm": 0.5310853360787483, "learning_rate": 6.572710020647295e-05, "loss": 11.9354, "step": 22882 }, { "epoch": 1.246070803809057, "grad_norm": 0.5177735012089201, "learning_rate": 6.571881616541323e-05, "loss": 11.8733, "step": 22883 }, { "epoch": 1.24612525780564, "grad_norm": 0.5360896782995199, "learning_rate": 6.571053239092977e-05, "loss": 12.0134, "step": 22884 }, { "epoch": 1.246179711802223, "grad_norm": 0.5017990803176361, "learning_rate": 6.570224888308695e-05, "loss": 11.938, "step": 22885 }, { "epoch": 1.246234165798806, "grad_norm": 0.5250885074961562, "learning_rate": 6.569396564194921e-05, "loss": 11.9224, "step": 22886 }, { "epoch": 1.246288619795389, "grad_norm": 0.5691015680339959, "learning_rate": 6.568568266758094e-05, "loss": 11.8836, "step": 22887 }, { "epoch": 1.246343073791972, "grad_norm": 0.5500882464683317, "learning_rate": 6.567739996004658e-05, "loss": 12.0345, "step": 22888 }, { "epoch": 1.246397527788555, "grad_norm": 0.5518441169966526, "learning_rate": 6.56691175194105e-05, "loss": 11.9901, "step": 22889 }, { "epoch": 1.246451981785138, "grad_norm": 0.5058913121367379, "learning_rate": 6.56608353457371e-05, "loss": 11.9948, "step": 22890 }, { "epoch": 1.246506435781721, "grad_norm": 0.5543399473024002, "learning_rate": 6.565255343909081e-05, "loss": 11.9444, "step": 22891 }, { "epoch": 1.2465608897783043, "grad_norm": 0.4886695931854353, "learning_rate": 6.564427179953603e-05, "loss": 11.8646, "step": 22892 }, { "epoch": 1.2466153437748873, "grad_norm": 0.5083409658185977, "learning_rate": 6.563599042713715e-05, "loss": 11.9316, "step": 22893 }, { "epoch": 1.2466697977714702, "grad_norm": 0.509851838688398, "learning_rate": 6.562770932195856e-05, "loss": 12.0863, "step": 22894 }, { "epoch": 1.2467242517680532, "grad_norm": 0.5570191696101516, "learning_rate": 6.561942848406469e-05, "loss": 12.0591, "step": 22895 }, { "epoch": 1.2467787057646362, "grad_norm": 0.5679042787084743, "learning_rate": 6.561114791351987e-05, "loss": 12.0477, "step": 22896 }, { "epoch": 1.2468331597612192, "grad_norm": 0.5306661509851105, "learning_rate": 6.560286761038858e-05, "loss": 11.938, "step": 22897 }, { "epoch": 1.2468876137578022, "grad_norm": 0.5204155469272607, "learning_rate": 6.559458757473516e-05, "loss": 11.8639, "step": 22898 }, { "epoch": 1.2469420677543852, "grad_norm": 0.4769915772493378, "learning_rate": 6.558630780662397e-05, "loss": 11.9881, "step": 22899 }, { "epoch": 1.2469965217509682, "grad_norm": 0.552021418415857, "learning_rate": 6.557802830611943e-05, "loss": 11.9603, "step": 22900 }, { "epoch": 1.2470509757475512, "grad_norm": 0.6544919271195204, "learning_rate": 6.556974907328591e-05, "loss": 12.048, "step": 22901 }, { "epoch": 1.2471054297441344, "grad_norm": 0.5398390438960115, "learning_rate": 6.55614701081878e-05, "loss": 11.8334, "step": 22902 }, { "epoch": 1.2471598837407174, "grad_norm": 0.5215423436765481, "learning_rate": 6.555319141088947e-05, "loss": 11.908, "step": 22903 }, { "epoch": 1.2472143377373004, "grad_norm": 0.5968230781626701, "learning_rate": 6.554491298145531e-05, "loss": 11.9538, "step": 22904 }, { "epoch": 1.2472687917338834, "grad_norm": 0.5614518888979998, "learning_rate": 6.553663481994965e-05, "loss": 11.9059, "step": 22905 }, { "epoch": 1.2473232457304664, "grad_norm": 0.5533441516707198, "learning_rate": 6.552835692643693e-05, "loss": 11.9552, "step": 22906 }, { "epoch": 1.2473776997270494, "grad_norm": 0.49192685322037805, "learning_rate": 6.552007930098147e-05, "loss": 11.905, "step": 22907 }, { "epoch": 1.2474321537236324, "grad_norm": 0.5075322530471847, "learning_rate": 6.551180194364772e-05, "loss": 11.9061, "step": 22908 }, { "epoch": 1.2474866077202154, "grad_norm": 0.5567091267623645, "learning_rate": 6.550352485449991e-05, "loss": 11.9821, "step": 22909 }, { "epoch": 1.2475410617167983, "grad_norm": 0.543098851357432, "learning_rate": 6.549524803360248e-05, "loss": 11.9562, "step": 22910 }, { "epoch": 1.2475955157133813, "grad_norm": 0.6312837386891761, "learning_rate": 6.54869714810198e-05, "loss": 11.8395, "step": 22911 }, { "epoch": 1.2476499697099643, "grad_norm": 0.736972893975821, "learning_rate": 6.547869519681622e-05, "loss": 12.0243, "step": 22912 }, { "epoch": 1.2477044237065473, "grad_norm": 0.5898809579991051, "learning_rate": 6.54704191810561e-05, "loss": 12.0607, "step": 22913 }, { "epoch": 1.2477588777031303, "grad_norm": 0.5352698872447551, "learning_rate": 6.546214343380379e-05, "loss": 11.928, "step": 22914 }, { "epoch": 1.2478133316997133, "grad_norm": 0.5886613853761997, "learning_rate": 6.545386795512364e-05, "loss": 11.8943, "step": 22915 }, { "epoch": 1.2478677856962965, "grad_norm": 0.5940386684191961, "learning_rate": 6.544559274507998e-05, "loss": 11.8677, "step": 22916 }, { "epoch": 1.2479222396928795, "grad_norm": 0.6104365727146253, "learning_rate": 6.543731780373721e-05, "loss": 11.9247, "step": 22917 }, { "epoch": 1.2479766936894625, "grad_norm": 0.5376032921813544, "learning_rate": 6.542904313115971e-05, "loss": 12.0488, "step": 22918 }, { "epoch": 1.2480311476860455, "grad_norm": 0.5762837312993669, "learning_rate": 6.542076872741172e-05, "loss": 12.0017, "step": 22919 }, { "epoch": 1.2480856016826285, "grad_norm": 0.5363121824057173, "learning_rate": 6.54124945925576e-05, "loss": 11.9132, "step": 22920 }, { "epoch": 1.2481400556792115, "grad_norm": 0.5791488075391481, "learning_rate": 6.540422072666175e-05, "loss": 11.7995, "step": 22921 }, { "epoch": 1.2481945096757945, "grad_norm": 0.5395242927333442, "learning_rate": 6.539594712978848e-05, "loss": 11.955, "step": 22922 }, { "epoch": 1.2482489636723775, "grad_norm": 0.5487681436831426, "learning_rate": 6.538767380200212e-05, "loss": 11.9998, "step": 22923 }, { "epoch": 1.2483034176689605, "grad_norm": 0.5203020553522513, "learning_rate": 6.537940074336701e-05, "loss": 11.9544, "step": 22924 }, { "epoch": 1.2483578716655435, "grad_norm": 0.5889912883526086, "learning_rate": 6.537112795394751e-05, "loss": 11.9886, "step": 22925 }, { "epoch": 1.2484123256621267, "grad_norm": 0.5290888907053558, "learning_rate": 6.536285543380791e-05, "loss": 11.8611, "step": 22926 }, { "epoch": 1.2484667796587097, "grad_norm": 0.528132850681092, "learning_rate": 6.535458318301255e-05, "loss": 11.9693, "step": 22927 }, { "epoch": 1.2485212336552927, "grad_norm": 0.555364238149745, "learning_rate": 6.53463112016258e-05, "loss": 11.9385, "step": 22928 }, { "epoch": 1.2485756876518757, "grad_norm": 0.49825336231402734, "learning_rate": 6.53380394897119e-05, "loss": 11.8099, "step": 22929 }, { "epoch": 1.2486301416484586, "grad_norm": 0.5050365700015226, "learning_rate": 6.532976804733524e-05, "loss": 11.9691, "step": 22930 }, { "epoch": 1.2486845956450416, "grad_norm": 0.5237797915712181, "learning_rate": 6.532149687456011e-05, "loss": 11.8852, "step": 22931 }, { "epoch": 1.2487390496416246, "grad_norm": 0.5184891802825093, "learning_rate": 6.531322597145081e-05, "loss": 12.0183, "step": 22932 }, { "epoch": 1.2487935036382076, "grad_norm": 0.5059313801881709, "learning_rate": 6.530495533807171e-05, "loss": 11.9459, "step": 22933 }, { "epoch": 1.2488479576347906, "grad_norm": 0.5441516367093713, "learning_rate": 6.529668497448709e-05, "loss": 12.0306, "step": 22934 }, { "epoch": 1.2489024116313736, "grad_norm": 0.5734294262571994, "learning_rate": 6.528841488076127e-05, "loss": 11.9553, "step": 22935 }, { "epoch": 1.2489568656279566, "grad_norm": 0.5639771296590832, "learning_rate": 6.528014505695856e-05, "loss": 11.9432, "step": 22936 }, { "epoch": 1.2490113196245396, "grad_norm": 0.549302656599705, "learning_rate": 6.527187550314327e-05, "loss": 12.0203, "step": 22937 }, { "epoch": 1.2490657736211226, "grad_norm": 0.5319682895932197, "learning_rate": 6.52636062193797e-05, "loss": 11.8934, "step": 22938 }, { "epoch": 1.2491202276177058, "grad_norm": 0.5681112030552473, "learning_rate": 6.525533720573214e-05, "loss": 11.8764, "step": 22939 }, { "epoch": 1.2491746816142888, "grad_norm": 0.5331700864501003, "learning_rate": 6.524706846226492e-05, "loss": 11.8594, "step": 22940 }, { "epoch": 1.2492291356108718, "grad_norm": 0.5368385453299201, "learning_rate": 6.52387999890423e-05, "loss": 12.0113, "step": 22941 }, { "epoch": 1.2492835896074548, "grad_norm": 0.561377539879999, "learning_rate": 6.523053178612861e-05, "loss": 11.8856, "step": 22942 }, { "epoch": 1.2493380436040378, "grad_norm": 0.5632375007956382, "learning_rate": 6.522226385358813e-05, "loss": 12.0217, "step": 22943 }, { "epoch": 1.2493924976006208, "grad_norm": 0.5442323860301268, "learning_rate": 6.521399619148517e-05, "loss": 11.972, "step": 22944 }, { "epoch": 1.2494469515972038, "grad_norm": 0.524902231818379, "learning_rate": 6.5205728799884e-05, "loss": 11.9334, "step": 22945 }, { "epoch": 1.2495014055937868, "grad_norm": 0.7055924414692931, "learning_rate": 6.519746167884892e-05, "loss": 12.0036, "step": 22946 }, { "epoch": 1.2495558595903697, "grad_norm": 0.590997549458098, "learning_rate": 6.518919482844423e-05, "loss": 11.7153, "step": 22947 }, { "epoch": 1.2496103135869527, "grad_norm": 0.5579959947996137, "learning_rate": 6.518092824873421e-05, "loss": 11.8822, "step": 22948 }, { "epoch": 1.249664767583536, "grad_norm": 0.5963325373914566, "learning_rate": 6.51726619397831e-05, "loss": 11.8778, "step": 22949 }, { "epoch": 1.249719221580119, "grad_norm": 0.585942340167395, "learning_rate": 6.516439590165522e-05, "loss": 11.9765, "step": 22950 }, { "epoch": 1.249773675576702, "grad_norm": 0.5401228626095188, "learning_rate": 6.515613013441485e-05, "loss": 11.9633, "step": 22951 }, { "epoch": 1.249828129573285, "grad_norm": 0.5686170809381587, "learning_rate": 6.514786463812624e-05, "loss": 11.8138, "step": 22952 }, { "epoch": 1.249882583569868, "grad_norm": 0.5419393697444189, "learning_rate": 6.513959941285368e-05, "loss": 11.8725, "step": 22953 }, { "epoch": 1.249937037566451, "grad_norm": 0.5487208114712231, "learning_rate": 6.513133445866143e-05, "loss": 11.8953, "step": 22954 }, { "epoch": 1.249991491563034, "grad_norm": 0.5181968881859137, "learning_rate": 6.512306977561379e-05, "loss": 11.8141, "step": 22955 }, { "epoch": 1.250045945559617, "grad_norm": 0.5156325427064138, "learning_rate": 6.5114805363775e-05, "loss": 11.8736, "step": 22956 }, { "epoch": 1.2501003995562, "grad_norm": 0.5216489790880355, "learning_rate": 6.510654122320934e-05, "loss": 11.9813, "step": 22957 }, { "epoch": 1.2501548535527829, "grad_norm": 0.5144461249999991, "learning_rate": 6.509827735398108e-05, "loss": 11.9433, "step": 22958 }, { "epoch": 1.2502093075493659, "grad_norm": 0.5668710856366023, "learning_rate": 6.509001375615446e-05, "loss": 12.0624, "step": 22959 }, { "epoch": 1.2502637615459489, "grad_norm": 0.5463624945385009, "learning_rate": 6.508175042979374e-05, "loss": 11.9766, "step": 22960 }, { "epoch": 1.2503182155425319, "grad_norm": 0.489156747425386, "learning_rate": 6.507348737496319e-05, "loss": 11.9146, "step": 22961 }, { "epoch": 1.2503726695391149, "grad_norm": 0.5718373374121186, "learning_rate": 6.506522459172705e-05, "loss": 12.0009, "step": 22962 }, { "epoch": 1.250427123535698, "grad_norm": 0.52519505403957, "learning_rate": 6.505696208014959e-05, "loss": 11.9269, "step": 22963 }, { "epoch": 1.250481577532281, "grad_norm": 0.6130330578240278, "learning_rate": 6.504869984029504e-05, "loss": 11.9778, "step": 22964 }, { "epoch": 1.250536031528864, "grad_norm": 0.5223733110557156, "learning_rate": 6.504043787222767e-05, "loss": 11.8907, "step": 22965 }, { "epoch": 1.250590485525447, "grad_norm": 0.5227047815433213, "learning_rate": 6.503217617601171e-05, "loss": 12.0034, "step": 22966 }, { "epoch": 1.25064493952203, "grad_norm": 0.5840137043222837, "learning_rate": 6.502391475171142e-05, "loss": 11.9016, "step": 22967 }, { "epoch": 1.250699393518613, "grad_norm": 0.6283189772930491, "learning_rate": 6.501565359939108e-05, "loss": 12.0354, "step": 22968 }, { "epoch": 1.250753847515196, "grad_norm": 0.5581576060407885, "learning_rate": 6.500739271911482e-05, "loss": 11.9421, "step": 22969 }, { "epoch": 1.250808301511779, "grad_norm": 0.5156956675751961, "learning_rate": 6.499913211094697e-05, "loss": 11.9535, "step": 22970 }, { "epoch": 1.250862755508362, "grad_norm": 0.5931791196620244, "learning_rate": 6.499087177495173e-05, "loss": 11.9979, "step": 22971 }, { "epoch": 1.2509172095049452, "grad_norm": 0.4876625214204427, "learning_rate": 6.498261171119333e-05, "loss": 11.7944, "step": 22972 }, { "epoch": 1.2509716635015282, "grad_norm": 0.5471450507710717, "learning_rate": 6.497435191973605e-05, "loss": 11.9792, "step": 22973 }, { "epoch": 1.2510261174981112, "grad_norm": 0.5485752705540134, "learning_rate": 6.496609240064404e-05, "loss": 12.1258, "step": 22974 }, { "epoch": 1.2510805714946942, "grad_norm": 0.5385059712319659, "learning_rate": 6.495783315398159e-05, "loss": 11.9627, "step": 22975 }, { "epoch": 1.2511350254912772, "grad_norm": 0.5762942386677303, "learning_rate": 6.494957417981291e-05, "loss": 12.0554, "step": 22976 }, { "epoch": 1.2511894794878602, "grad_norm": 0.530072852836688, "learning_rate": 6.49413154782022e-05, "loss": 11.8623, "step": 22977 }, { "epoch": 1.2512439334844432, "grad_norm": 0.5243166244014196, "learning_rate": 6.493305704921371e-05, "loss": 11.972, "step": 22978 }, { "epoch": 1.2512983874810262, "grad_norm": 0.5025990193059422, "learning_rate": 6.49247988929117e-05, "loss": 11.9838, "step": 22979 }, { "epoch": 1.2513528414776092, "grad_norm": 0.5799394634592401, "learning_rate": 6.491654100936029e-05, "loss": 12.1679, "step": 22980 }, { "epoch": 1.2514072954741922, "grad_norm": 0.5624798675759112, "learning_rate": 6.490828339862372e-05, "loss": 11.9008, "step": 22981 }, { "epoch": 1.2514617494707752, "grad_norm": 0.5181483739429259, "learning_rate": 6.490002606076625e-05, "loss": 11.9532, "step": 22982 }, { "epoch": 1.2515162034673581, "grad_norm": 0.6048615842854305, "learning_rate": 6.489176899585205e-05, "loss": 12.0913, "step": 22983 }, { "epoch": 1.2515706574639411, "grad_norm": 0.5168002705232881, "learning_rate": 6.488351220394536e-05, "loss": 11.9505, "step": 22984 }, { "epoch": 1.2516251114605241, "grad_norm": 0.5534263194047239, "learning_rate": 6.487525568511036e-05, "loss": 11.9204, "step": 22985 }, { "epoch": 1.2516795654571073, "grad_norm": 0.5006750619220163, "learning_rate": 6.486699943941126e-05, "loss": 11.9048, "step": 22986 }, { "epoch": 1.2517340194536903, "grad_norm": 0.5385279523589465, "learning_rate": 6.485874346691227e-05, "loss": 11.9173, "step": 22987 }, { "epoch": 1.2517884734502733, "grad_norm": 0.5664077290372833, "learning_rate": 6.485048776767754e-05, "loss": 11.9994, "step": 22988 }, { "epoch": 1.2518429274468563, "grad_norm": 0.6190079342912472, "learning_rate": 6.48422323417714e-05, "loss": 11.8936, "step": 22989 }, { "epoch": 1.2518973814434393, "grad_norm": 0.5514339130229768, "learning_rate": 6.483397718925792e-05, "loss": 11.9804, "step": 22990 }, { "epoch": 1.2519518354400223, "grad_norm": 0.5127695023833323, "learning_rate": 6.482572231020132e-05, "loss": 11.8427, "step": 22991 }, { "epoch": 1.2520062894366053, "grad_norm": 0.5620487969975653, "learning_rate": 6.481746770466577e-05, "loss": 11.9001, "step": 22992 }, { "epoch": 1.2520607434331883, "grad_norm": 0.49963670589846776, "learning_rate": 6.480921337271553e-05, "loss": 11.8697, "step": 22993 }, { "epoch": 1.2521151974297713, "grad_norm": 0.5557818379722875, "learning_rate": 6.480095931441475e-05, "loss": 11.8505, "step": 22994 }, { "epoch": 1.2521696514263545, "grad_norm": 0.5568201837159433, "learning_rate": 6.47927055298276e-05, "loss": 12.027, "step": 22995 }, { "epoch": 1.2522241054229375, "grad_norm": 0.5674997406215048, "learning_rate": 6.478445201901827e-05, "loss": 12.0055, "step": 22996 }, { "epoch": 1.2522785594195205, "grad_norm": 0.5519263533010362, "learning_rate": 6.477619878205095e-05, "loss": 11.9743, "step": 22997 }, { "epoch": 1.2523330134161035, "grad_norm": 0.49660196651081107, "learning_rate": 6.476794581898983e-05, "loss": 11.9608, "step": 22998 }, { "epoch": 1.2523874674126865, "grad_norm": 0.5599330299498402, "learning_rate": 6.475969312989909e-05, "loss": 11.9485, "step": 22999 }, { "epoch": 1.2524419214092695, "grad_norm": 0.5344015870015618, "learning_rate": 6.475144071484285e-05, "loss": 11.8989, "step": 23000 }, { "epoch": 1.2524963754058525, "grad_norm": 0.5197967280637122, "learning_rate": 6.474318857388532e-05, "loss": 11.8861, "step": 23001 }, { "epoch": 1.2525508294024355, "grad_norm": 0.5200498758350924, "learning_rate": 6.473493670709068e-05, "loss": 11.8961, "step": 23002 }, { "epoch": 1.2526052833990184, "grad_norm": 0.5206830469370257, "learning_rate": 6.472668511452307e-05, "loss": 11.9804, "step": 23003 }, { "epoch": 1.2526597373956014, "grad_norm": 0.5131759205830891, "learning_rate": 6.471843379624669e-05, "loss": 11.9574, "step": 23004 }, { "epoch": 1.2527141913921844, "grad_norm": 0.513270765710931, "learning_rate": 6.471018275232568e-05, "loss": 11.9623, "step": 23005 }, { "epoch": 1.2527686453887674, "grad_norm": 0.5124940921030762, "learning_rate": 6.47019319828242e-05, "loss": 11.8931, "step": 23006 }, { "epoch": 1.2528230993853504, "grad_norm": 0.5672971005079075, "learning_rate": 6.469368148780641e-05, "loss": 11.9584, "step": 23007 }, { "epoch": 1.2528775533819334, "grad_norm": 0.6092767991292602, "learning_rate": 6.468543126733651e-05, "loss": 12.1172, "step": 23008 }, { "epoch": 1.2529320073785164, "grad_norm": 0.5113985421572147, "learning_rate": 6.46771813214786e-05, "loss": 11.9211, "step": 23009 }, { "epoch": 1.2529864613750996, "grad_norm": 0.5015182630609952, "learning_rate": 6.466893165029685e-05, "loss": 11.9121, "step": 23010 }, { "epoch": 1.2530409153716826, "grad_norm": 0.5314409037250363, "learning_rate": 6.466068225385542e-05, "loss": 11.8639, "step": 23011 }, { "epoch": 1.2530953693682656, "grad_norm": 0.545839570387426, "learning_rate": 6.465243313221842e-05, "loss": 11.9892, "step": 23012 }, { "epoch": 1.2531498233648486, "grad_norm": 0.4882250111572915, "learning_rate": 6.464418428545006e-05, "loss": 11.9823, "step": 23013 }, { "epoch": 1.2532042773614316, "grad_norm": 0.5030383229180341, "learning_rate": 6.463593571361441e-05, "loss": 11.9142, "step": 23014 }, { "epoch": 1.2532587313580146, "grad_norm": 0.5949180837371855, "learning_rate": 6.46276874167757e-05, "loss": 11.9972, "step": 23015 }, { "epoch": 1.2533131853545976, "grad_norm": 0.5167607919930672, "learning_rate": 6.4619439394998e-05, "loss": 11.7229, "step": 23016 }, { "epoch": 1.2533676393511806, "grad_norm": 0.5033872352171551, "learning_rate": 6.461119164834548e-05, "loss": 11.8811, "step": 23017 }, { "epoch": 1.2534220933477638, "grad_norm": 0.5670707557320989, "learning_rate": 6.460294417688227e-05, "loss": 12.1498, "step": 23018 }, { "epoch": 1.2534765473443468, "grad_norm": 0.5405924893289286, "learning_rate": 6.459469698067253e-05, "loss": 11.9884, "step": 23019 }, { "epoch": 1.2535310013409298, "grad_norm": 0.604217689453398, "learning_rate": 6.458645005978033e-05, "loss": 12.097, "step": 23020 }, { "epoch": 1.2535854553375128, "grad_norm": 0.49637233258769686, "learning_rate": 6.457820341426985e-05, "loss": 11.8859, "step": 23021 }, { "epoch": 1.2536399093340957, "grad_norm": 0.5607773866657347, "learning_rate": 6.456995704420518e-05, "loss": 12.0768, "step": 23022 }, { "epoch": 1.2536943633306787, "grad_norm": 0.5030846259925406, "learning_rate": 6.456171094965049e-05, "loss": 11.9584, "step": 23023 }, { "epoch": 1.2537488173272617, "grad_norm": 0.571124339836725, "learning_rate": 6.455346513066985e-05, "loss": 11.9904, "step": 23024 }, { "epoch": 1.2538032713238447, "grad_norm": 0.515989313493274, "learning_rate": 6.454521958732743e-05, "loss": 12.0152, "step": 23025 }, { "epoch": 1.2538577253204277, "grad_norm": 0.5371035254206465, "learning_rate": 6.45369743196873e-05, "loss": 11.8703, "step": 23026 }, { "epoch": 1.2539121793170107, "grad_norm": 0.5229543350872914, "learning_rate": 6.452872932781363e-05, "loss": 11.9173, "step": 23027 }, { "epoch": 1.2539666333135937, "grad_norm": 0.5932582677642733, "learning_rate": 6.45204846117705e-05, "loss": 11.8756, "step": 23028 }, { "epoch": 1.2540210873101767, "grad_norm": 0.5891383987686949, "learning_rate": 6.451224017162209e-05, "loss": 11.965, "step": 23029 }, { "epoch": 1.2540755413067597, "grad_norm": 0.4903832737201092, "learning_rate": 6.450399600743238e-05, "loss": 11.9513, "step": 23030 }, { "epoch": 1.2541299953033427, "grad_norm": 0.5674561792441308, "learning_rate": 6.449575211926556e-05, "loss": 11.8607, "step": 23031 }, { "epoch": 1.2541844492999257, "grad_norm": 0.5579231146428775, "learning_rate": 6.448750850718575e-05, "loss": 12.0236, "step": 23032 }, { "epoch": 1.254238903296509, "grad_norm": 0.5181541147940405, "learning_rate": 6.4479265171257e-05, "loss": 11.8999, "step": 23033 }, { "epoch": 1.2542933572930919, "grad_norm": 0.5489469574332925, "learning_rate": 6.447102211154346e-05, "loss": 11.9517, "step": 23034 }, { "epoch": 1.2543478112896749, "grad_norm": 0.5369419671946504, "learning_rate": 6.44627793281092e-05, "loss": 12.0059, "step": 23035 }, { "epoch": 1.2544022652862579, "grad_norm": 0.5083090277377812, "learning_rate": 6.445453682101835e-05, "loss": 12.0114, "step": 23036 }, { "epoch": 1.2544567192828409, "grad_norm": 0.506253196690573, "learning_rate": 6.444629459033493e-05, "loss": 12.0002, "step": 23037 }, { "epoch": 1.2545111732794239, "grad_norm": 0.5636252841187461, "learning_rate": 6.443805263612313e-05, "loss": 12.0145, "step": 23038 }, { "epoch": 1.2545656272760068, "grad_norm": 0.5842933617248807, "learning_rate": 6.442981095844702e-05, "loss": 11.812, "step": 23039 }, { "epoch": 1.2546200812725898, "grad_norm": 0.5633163761859511, "learning_rate": 6.442156955737064e-05, "loss": 11.9461, "step": 23040 }, { "epoch": 1.2546745352691728, "grad_norm": 0.5499040542405221, "learning_rate": 6.441332843295807e-05, "loss": 12.1107, "step": 23041 }, { "epoch": 1.254728989265756, "grad_norm": 0.5445402995069029, "learning_rate": 6.440508758527344e-05, "loss": 11.9543, "step": 23042 }, { "epoch": 1.254783443262339, "grad_norm": 0.5453971556779926, "learning_rate": 6.439684701438085e-05, "loss": 12.0705, "step": 23043 }, { "epoch": 1.254837897258922, "grad_norm": 0.5205463045709341, "learning_rate": 6.438860672034433e-05, "loss": 11.8273, "step": 23044 }, { "epoch": 1.254892351255505, "grad_norm": 0.5060498204368865, "learning_rate": 6.438036670322795e-05, "loss": 11.8163, "step": 23045 }, { "epoch": 1.254946805252088, "grad_norm": 0.5179264361560095, "learning_rate": 6.437212696309585e-05, "loss": 11.8962, "step": 23046 }, { "epoch": 1.255001259248671, "grad_norm": 0.5623083269078556, "learning_rate": 6.436388750001205e-05, "loss": 12.0951, "step": 23047 }, { "epoch": 1.255055713245254, "grad_norm": 0.523609816078576, "learning_rate": 6.435564831404061e-05, "loss": 11.7966, "step": 23048 }, { "epoch": 1.255110167241837, "grad_norm": 0.5472175230259042, "learning_rate": 6.434740940524569e-05, "loss": 11.8966, "step": 23049 }, { "epoch": 1.25516462123842, "grad_norm": 0.5651524739962391, "learning_rate": 6.433917077369127e-05, "loss": 11.9129, "step": 23050 }, { "epoch": 1.255219075235003, "grad_norm": 0.6087141538496955, "learning_rate": 6.433093241944141e-05, "loss": 11.8583, "step": 23051 }, { "epoch": 1.255273529231586, "grad_norm": 0.5504794378725869, "learning_rate": 6.432269434256021e-05, "loss": 11.9824, "step": 23052 }, { "epoch": 1.255327983228169, "grad_norm": 0.5703693768570628, "learning_rate": 6.431445654311173e-05, "loss": 11.7509, "step": 23053 }, { "epoch": 1.255382437224752, "grad_norm": 0.5131923723032333, "learning_rate": 6.430621902116e-05, "loss": 12.0155, "step": 23054 }, { "epoch": 1.255436891221335, "grad_norm": 0.5824598217731434, "learning_rate": 6.429798177676913e-05, "loss": 12.092, "step": 23055 }, { "epoch": 1.2554913452179182, "grad_norm": 0.5023407872327654, "learning_rate": 6.428974481000312e-05, "loss": 11.9702, "step": 23056 }, { "epoch": 1.2555457992145012, "grad_norm": 0.5230683133346614, "learning_rate": 6.428150812092606e-05, "loss": 12.0129, "step": 23057 }, { "epoch": 1.2556002532110841, "grad_norm": 0.5839902902678337, "learning_rate": 6.427327170960197e-05, "loss": 11.9333, "step": 23058 }, { "epoch": 1.2556547072076671, "grad_norm": 0.5607862559971485, "learning_rate": 6.426503557609494e-05, "loss": 11.8152, "step": 23059 }, { "epoch": 1.2557091612042501, "grad_norm": 0.5186679038061481, "learning_rate": 6.425679972046895e-05, "loss": 11.9656, "step": 23060 }, { "epoch": 1.2557636152008331, "grad_norm": 0.5383825335095569, "learning_rate": 6.424856414278809e-05, "loss": 11.9613, "step": 23061 }, { "epoch": 1.2558180691974161, "grad_norm": 0.5256951533910921, "learning_rate": 6.424032884311639e-05, "loss": 11.9378, "step": 23062 }, { "epoch": 1.2558725231939991, "grad_norm": 0.5566260473565764, "learning_rate": 6.423209382151787e-05, "loss": 11.9813, "step": 23063 }, { "epoch": 1.255926977190582, "grad_norm": 0.5047489211992726, "learning_rate": 6.422385907805661e-05, "loss": 11.7673, "step": 23064 }, { "epoch": 1.2559814311871653, "grad_norm": 0.5319156804588298, "learning_rate": 6.421562461279662e-05, "loss": 11.9302, "step": 23065 }, { "epoch": 1.2560358851837483, "grad_norm": 0.5469902777045331, "learning_rate": 6.420739042580192e-05, "loss": 11.8669, "step": 23066 }, { "epoch": 1.2560903391803313, "grad_norm": 0.5895496587336508, "learning_rate": 6.419915651713657e-05, "loss": 11.8581, "step": 23067 }, { "epoch": 1.2561447931769143, "grad_norm": 0.4797947992699836, "learning_rate": 6.419092288686458e-05, "loss": 11.9102, "step": 23068 }, { "epoch": 1.2561992471734973, "grad_norm": 0.5577967038474159, "learning_rate": 6.418268953505e-05, "loss": 11.9772, "step": 23069 }, { "epoch": 1.2562537011700803, "grad_norm": 0.5434598292455445, "learning_rate": 6.417445646175682e-05, "loss": 11.9336, "step": 23070 }, { "epoch": 1.2563081551666633, "grad_norm": 0.5360937510040483, "learning_rate": 6.416622366704906e-05, "loss": 11.8435, "step": 23071 }, { "epoch": 1.2563626091632463, "grad_norm": 0.5290102951434025, "learning_rate": 6.415799115099075e-05, "loss": 12.0287, "step": 23072 }, { "epoch": 1.2564170631598293, "grad_norm": 0.4780939803977824, "learning_rate": 6.414975891364591e-05, "loss": 11.8835, "step": 23073 }, { "epoch": 1.2564715171564123, "grad_norm": 0.5697258236466961, "learning_rate": 6.414152695507855e-05, "loss": 11.9116, "step": 23074 }, { "epoch": 1.2565259711529952, "grad_norm": 0.546484044969463, "learning_rate": 6.413329527535272e-05, "loss": 11.888, "step": 23075 }, { "epoch": 1.2565804251495782, "grad_norm": 0.6539474260009979, "learning_rate": 6.412506387453239e-05, "loss": 12.0349, "step": 23076 }, { "epoch": 1.2566348791461612, "grad_norm": 0.5893862166396721, "learning_rate": 6.411683275268157e-05, "loss": 11.8892, "step": 23077 }, { "epoch": 1.2566893331427442, "grad_norm": 0.5086572468603361, "learning_rate": 6.410860190986428e-05, "loss": 11.8784, "step": 23078 }, { "epoch": 1.2567437871393274, "grad_norm": 0.5472500138255146, "learning_rate": 6.410037134614455e-05, "loss": 11.8455, "step": 23079 }, { "epoch": 1.2567982411359104, "grad_norm": 0.544630527948792, "learning_rate": 6.409214106158633e-05, "loss": 12.0407, "step": 23080 }, { "epoch": 1.2568526951324934, "grad_norm": 0.5492984380849422, "learning_rate": 6.408391105625365e-05, "loss": 11.9997, "step": 23081 }, { "epoch": 1.2569071491290764, "grad_norm": 0.5511564523785917, "learning_rate": 6.407568133021048e-05, "loss": 12.0376, "step": 23082 }, { "epoch": 1.2569616031256594, "grad_norm": 0.6279874531789499, "learning_rate": 6.406745188352085e-05, "loss": 11.9121, "step": 23083 }, { "epoch": 1.2570160571222424, "grad_norm": 0.6220941261245012, "learning_rate": 6.405922271624874e-05, "loss": 11.9591, "step": 23084 }, { "epoch": 1.2570705111188254, "grad_norm": 0.5370646846868051, "learning_rate": 6.405099382845814e-05, "loss": 11.8795, "step": 23085 }, { "epoch": 1.2571249651154084, "grad_norm": 0.5544988273277008, "learning_rate": 6.404276522021301e-05, "loss": 12.0876, "step": 23086 }, { "epoch": 1.2571794191119914, "grad_norm": 0.6192562302073945, "learning_rate": 6.40345368915774e-05, "loss": 11.9662, "step": 23087 }, { "epoch": 1.2572338731085746, "grad_norm": 0.575416522526096, "learning_rate": 6.402630884261526e-05, "loss": 11.9524, "step": 23088 }, { "epoch": 1.2572883271051576, "grad_norm": 0.5161518379421237, "learning_rate": 6.401808107339062e-05, "loss": 11.9907, "step": 23089 }, { "epoch": 1.2573427811017406, "grad_norm": 0.5524182548409718, "learning_rate": 6.400985358396733e-05, "loss": 11.9012, "step": 23090 }, { "epoch": 1.2573972350983236, "grad_norm": 0.5543905883301882, "learning_rate": 6.40016263744095e-05, "loss": 11.9165, "step": 23091 }, { "epoch": 1.2574516890949066, "grad_norm": 0.5261536283933002, "learning_rate": 6.399339944478107e-05, "loss": 11.7749, "step": 23092 }, { "epoch": 1.2575061430914896, "grad_norm": 0.5374076723199579, "learning_rate": 6.398517279514598e-05, "loss": 11.9491, "step": 23093 }, { "epoch": 1.2575605970880726, "grad_norm": 0.5399215177823272, "learning_rate": 6.397694642556824e-05, "loss": 11.9369, "step": 23094 }, { "epoch": 1.2576150510846555, "grad_norm": 0.5332425503379244, "learning_rate": 6.396872033611179e-05, "loss": 11.9261, "step": 23095 }, { "epoch": 1.2576695050812385, "grad_norm": 0.5709318959269863, "learning_rate": 6.396049452684062e-05, "loss": 11.9361, "step": 23096 }, { "epoch": 1.2577239590778215, "grad_norm": 0.6037910756924191, "learning_rate": 6.395226899781868e-05, "loss": 11.9538, "step": 23097 }, { "epoch": 1.2577784130744045, "grad_norm": 0.5358524572601534, "learning_rate": 6.394404374910996e-05, "loss": 11.9424, "step": 23098 }, { "epoch": 1.2578328670709875, "grad_norm": 0.6004955658989394, "learning_rate": 6.393581878077844e-05, "loss": 11.9896, "step": 23099 }, { "epoch": 1.2578873210675705, "grad_norm": 0.5485408023650694, "learning_rate": 6.392759409288799e-05, "loss": 11.9138, "step": 23100 }, { "epoch": 1.2579417750641535, "grad_norm": 0.5666292944739462, "learning_rate": 6.391936968550261e-05, "loss": 11.931, "step": 23101 }, { "epoch": 1.2579962290607365, "grad_norm": 0.5597404353871231, "learning_rate": 6.391114555868627e-05, "loss": 11.9859, "step": 23102 }, { "epoch": 1.2580506830573197, "grad_norm": 0.5670522544599553, "learning_rate": 6.390292171250291e-05, "loss": 11.8759, "step": 23103 }, { "epoch": 1.2581051370539027, "grad_norm": 0.5995999828066928, "learning_rate": 6.389469814701651e-05, "loss": 11.9792, "step": 23104 }, { "epoch": 1.2581595910504857, "grad_norm": 0.5782929205834054, "learning_rate": 6.388647486229097e-05, "loss": 11.9888, "step": 23105 }, { "epoch": 1.2582140450470687, "grad_norm": 0.5484559404524344, "learning_rate": 6.387825185839026e-05, "loss": 11.8832, "step": 23106 }, { "epoch": 1.2582684990436517, "grad_norm": 0.598937853919029, "learning_rate": 6.387002913537834e-05, "loss": 11.9739, "step": 23107 }, { "epoch": 1.2583229530402347, "grad_norm": 0.4995317441061127, "learning_rate": 6.38618066933191e-05, "loss": 11.9355, "step": 23108 }, { "epoch": 1.2583774070368177, "grad_norm": 0.5618095843570149, "learning_rate": 6.385358453227657e-05, "loss": 12.1109, "step": 23109 }, { "epoch": 1.2584318610334007, "grad_norm": 0.5526007604359351, "learning_rate": 6.384536265231457e-05, "loss": 11.8914, "step": 23110 }, { "epoch": 1.2584863150299836, "grad_norm": 0.5531099195759396, "learning_rate": 6.383714105349712e-05, "loss": 12.0769, "step": 23111 }, { "epoch": 1.2585407690265669, "grad_norm": 0.4985342167243614, "learning_rate": 6.382891973588809e-05, "loss": 11.9721, "step": 23112 }, { "epoch": 1.2585952230231499, "grad_norm": 0.556810224128595, "learning_rate": 6.382069869955149e-05, "loss": 11.8717, "step": 23113 }, { "epoch": 1.2586496770197328, "grad_norm": 0.6270485080778043, "learning_rate": 6.381247794455118e-05, "loss": 11.9444, "step": 23114 }, { "epoch": 1.2587041310163158, "grad_norm": 0.5523639002205809, "learning_rate": 6.380425747095111e-05, "loss": 11.9966, "step": 23115 }, { "epoch": 1.2587585850128988, "grad_norm": 0.5353121475579924, "learning_rate": 6.379603727881522e-05, "loss": 11.8434, "step": 23116 }, { "epoch": 1.2588130390094818, "grad_norm": 0.5446101326768436, "learning_rate": 6.37878173682074e-05, "loss": 11.8856, "step": 23117 }, { "epoch": 1.2588674930060648, "grad_norm": 0.6029760601144726, "learning_rate": 6.37795977391916e-05, "loss": 12.0094, "step": 23118 }, { "epoch": 1.2589219470026478, "grad_norm": 0.5865087141597802, "learning_rate": 6.37713783918317e-05, "loss": 11.9017, "step": 23119 }, { "epoch": 1.2589764009992308, "grad_norm": 0.5477665226348177, "learning_rate": 6.376315932619169e-05, "loss": 11.9262, "step": 23120 }, { "epoch": 1.2590308549958138, "grad_norm": 0.5282841499264371, "learning_rate": 6.37549405423354e-05, "loss": 11.9528, "step": 23121 }, { "epoch": 1.2590853089923968, "grad_norm": 0.5412124065498328, "learning_rate": 6.374672204032675e-05, "loss": 12.0454, "step": 23122 }, { "epoch": 1.2591397629889798, "grad_norm": 0.5553462550966848, "learning_rate": 6.373850382022965e-05, "loss": 11.959, "step": 23123 }, { "epoch": 1.2591942169855628, "grad_norm": 0.6113007116293697, "learning_rate": 6.373028588210808e-05, "loss": 12.0259, "step": 23124 }, { "epoch": 1.2592486709821458, "grad_norm": 0.5830883374819321, "learning_rate": 6.372206822602586e-05, "loss": 11.8463, "step": 23125 }, { "epoch": 1.259303124978729, "grad_norm": 0.5504992518560115, "learning_rate": 6.371385085204693e-05, "loss": 11.9425, "step": 23126 }, { "epoch": 1.259357578975312, "grad_norm": 0.576981204386988, "learning_rate": 6.370563376023517e-05, "loss": 11.8694, "step": 23127 }, { "epoch": 1.259412032971895, "grad_norm": 0.5528066597285911, "learning_rate": 6.36974169506545e-05, "loss": 11.9209, "step": 23128 }, { "epoch": 1.259466486968478, "grad_norm": 0.5578907904421315, "learning_rate": 6.36892004233688e-05, "loss": 12.0281, "step": 23129 }, { "epoch": 1.259520940965061, "grad_norm": 0.49908705921764723, "learning_rate": 6.368098417844199e-05, "loss": 11.9853, "step": 23130 }, { "epoch": 1.259575394961644, "grad_norm": 0.6456895948428514, "learning_rate": 6.367276821593791e-05, "loss": 11.9007, "step": 23131 }, { "epoch": 1.259629848958227, "grad_norm": 0.5393688064228819, "learning_rate": 6.366455253592048e-05, "loss": 11.8418, "step": 23132 }, { "epoch": 1.25968430295481, "grad_norm": 0.5367421126036882, "learning_rate": 6.365633713845358e-05, "loss": 11.9011, "step": 23133 }, { "epoch": 1.259738756951393, "grad_norm": 0.5310563904804384, "learning_rate": 6.364812202360111e-05, "loss": 11.8744, "step": 23134 }, { "epoch": 1.2597932109479761, "grad_norm": 0.545635062163532, "learning_rate": 6.363990719142691e-05, "loss": 12.103, "step": 23135 }, { "epoch": 1.2598476649445591, "grad_norm": 0.529964804335486, "learning_rate": 6.363169264199491e-05, "loss": 12.0641, "step": 23136 }, { "epoch": 1.2599021189411421, "grad_norm": 0.5454530713208232, "learning_rate": 6.362347837536898e-05, "loss": 11.8868, "step": 23137 }, { "epoch": 1.2599565729377251, "grad_norm": 0.5138547367416849, "learning_rate": 6.361526439161297e-05, "loss": 11.9207, "step": 23138 }, { "epoch": 1.260011026934308, "grad_norm": 0.5379980430562593, "learning_rate": 6.360705069079076e-05, "loss": 11.9224, "step": 23139 }, { "epoch": 1.260065480930891, "grad_norm": 0.5570539254887212, "learning_rate": 6.359883727296625e-05, "loss": 12.0006, "step": 23140 }, { "epoch": 1.260119934927474, "grad_norm": 0.5660768674787063, "learning_rate": 6.359062413820327e-05, "loss": 12.0522, "step": 23141 }, { "epoch": 1.260174388924057, "grad_norm": 0.49642938404356757, "learning_rate": 6.35824112865657e-05, "loss": 11.9169, "step": 23142 }, { "epoch": 1.26022884292064, "grad_norm": 0.5354280412353992, "learning_rate": 6.357419871811741e-05, "loss": 11.9543, "step": 23143 }, { "epoch": 1.260283296917223, "grad_norm": 0.5997684021188936, "learning_rate": 6.356598643292225e-05, "loss": 11.9438, "step": 23144 }, { "epoch": 1.260337750913806, "grad_norm": 0.5336866194677516, "learning_rate": 6.355777443104409e-05, "loss": 11.9164, "step": 23145 }, { "epoch": 1.260392204910389, "grad_norm": 0.5803367431008287, "learning_rate": 6.354956271254678e-05, "loss": 12.1357, "step": 23146 }, { "epoch": 1.260446658906972, "grad_norm": 0.5217981905140658, "learning_rate": 6.35413512774942e-05, "loss": 11.9656, "step": 23147 }, { "epoch": 1.260501112903555, "grad_norm": 0.5283137024171627, "learning_rate": 6.353314012595018e-05, "loss": 11.9016, "step": 23148 }, { "epoch": 1.2605555669001383, "grad_norm": 0.47655219752417416, "learning_rate": 6.352492925797859e-05, "loss": 11.9532, "step": 23149 }, { "epoch": 1.2606100208967213, "grad_norm": 0.517452982016707, "learning_rate": 6.351671867364327e-05, "loss": 11.9848, "step": 23150 }, { "epoch": 1.2606644748933042, "grad_norm": 0.518384238667633, "learning_rate": 6.350850837300805e-05, "loss": 11.9969, "step": 23151 }, { "epoch": 1.2607189288898872, "grad_norm": 0.5072064806068445, "learning_rate": 6.35002983561368e-05, "loss": 12.031, "step": 23152 }, { "epoch": 1.2607733828864702, "grad_norm": 0.49020800588185137, "learning_rate": 6.349208862309334e-05, "loss": 11.8534, "step": 23153 }, { "epoch": 1.2608278368830532, "grad_norm": 0.46912186730601757, "learning_rate": 6.348387917394152e-05, "loss": 11.8691, "step": 23154 }, { "epoch": 1.2608822908796362, "grad_norm": 0.5524909289000592, "learning_rate": 6.347567000874519e-05, "loss": 12.024, "step": 23155 }, { "epoch": 1.2609367448762192, "grad_norm": 0.5420559822279098, "learning_rate": 6.346746112756816e-05, "loss": 11.7939, "step": 23156 }, { "epoch": 1.2609911988728022, "grad_norm": 0.6005430952423993, "learning_rate": 6.345925253047426e-05, "loss": 11.8845, "step": 23157 }, { "epoch": 1.2610456528693854, "grad_norm": 0.584496432963144, "learning_rate": 6.345104421752737e-05, "loss": 12.0256, "step": 23158 }, { "epoch": 1.2611001068659684, "grad_norm": 0.5289213329822015, "learning_rate": 6.344283618879128e-05, "loss": 11.8286, "step": 23159 }, { "epoch": 1.2611545608625514, "grad_norm": 0.5215239341627839, "learning_rate": 6.343462844432988e-05, "loss": 11.9474, "step": 23160 }, { "epoch": 1.2612090148591344, "grad_norm": 0.5349334059457926, "learning_rate": 6.342642098420688e-05, "loss": 12.0584, "step": 23161 }, { "epoch": 1.2612634688557174, "grad_norm": 0.5446407112239008, "learning_rate": 6.341821380848618e-05, "loss": 12.0434, "step": 23162 }, { "epoch": 1.2613179228523004, "grad_norm": 0.564088965609529, "learning_rate": 6.341000691723158e-05, "loss": 11.9337, "step": 23163 }, { "epoch": 1.2613723768488834, "grad_norm": 0.5340390154873355, "learning_rate": 6.340180031050691e-05, "loss": 11.8474, "step": 23164 }, { "epoch": 1.2614268308454664, "grad_norm": 0.47720631400330055, "learning_rate": 6.339359398837596e-05, "loss": 11.8776, "step": 23165 }, { "epoch": 1.2614812848420494, "grad_norm": 0.5736225944813493, "learning_rate": 6.338538795090258e-05, "loss": 12.0683, "step": 23166 }, { "epoch": 1.2615357388386323, "grad_norm": 0.5591859961628993, "learning_rate": 6.337718219815057e-05, "loss": 12.0328, "step": 23167 }, { "epoch": 1.2615901928352153, "grad_norm": 0.539813660015235, "learning_rate": 6.336897673018369e-05, "loss": 12.006, "step": 23168 }, { "epoch": 1.2616446468317983, "grad_norm": 0.47370333657589964, "learning_rate": 6.336077154706581e-05, "loss": 11.9422, "step": 23169 }, { "epoch": 1.2616991008283813, "grad_norm": 0.5175435505840981, "learning_rate": 6.335256664886078e-05, "loss": 11.8468, "step": 23170 }, { "epoch": 1.2617535548249643, "grad_norm": 0.5364831212928655, "learning_rate": 6.334436203563228e-05, "loss": 11.9393, "step": 23171 }, { "epoch": 1.2618080088215473, "grad_norm": 0.5821503420255064, "learning_rate": 6.333615770744414e-05, "loss": 11.9126, "step": 23172 }, { "epoch": 1.2618624628181305, "grad_norm": 0.5733777960952744, "learning_rate": 6.332795366436024e-05, "loss": 12.055, "step": 23173 }, { "epoch": 1.2619169168147135, "grad_norm": 0.5389332093998159, "learning_rate": 6.33197499064443e-05, "loss": 11.9563, "step": 23174 }, { "epoch": 1.2619713708112965, "grad_norm": 0.5275144965712444, "learning_rate": 6.331154643376012e-05, "loss": 11.7789, "step": 23175 }, { "epoch": 1.2620258248078795, "grad_norm": 0.5537466378450545, "learning_rate": 6.330334324637153e-05, "loss": 11.935, "step": 23176 }, { "epoch": 1.2620802788044625, "grad_norm": 0.5470867083695272, "learning_rate": 6.329514034434229e-05, "loss": 11.9791, "step": 23177 }, { "epoch": 1.2621347328010455, "grad_norm": 0.5287713218484725, "learning_rate": 6.328693772773619e-05, "loss": 12.1183, "step": 23178 }, { "epoch": 1.2621891867976285, "grad_norm": 0.5675339930776699, "learning_rate": 6.327873539661701e-05, "loss": 11.777, "step": 23179 }, { "epoch": 1.2622436407942115, "grad_norm": 0.5937655479464573, "learning_rate": 6.327053335104858e-05, "loss": 11.9364, "step": 23180 }, { "epoch": 1.2622980947907947, "grad_norm": 0.5768170937460548, "learning_rate": 6.326233159109462e-05, "loss": 11.8478, "step": 23181 }, { "epoch": 1.2623525487873777, "grad_norm": 0.5116243289974295, "learning_rate": 6.325413011681893e-05, "loss": 11.8969, "step": 23182 }, { "epoch": 1.2624070027839607, "grad_norm": 0.5474243620780744, "learning_rate": 6.32459289282853e-05, "loss": 11.9277, "step": 23183 }, { "epoch": 1.2624614567805437, "grad_norm": 0.5677173261618479, "learning_rate": 6.323772802555745e-05, "loss": 11.9387, "step": 23184 }, { "epoch": 1.2625159107771267, "grad_norm": 0.5112451267947925, "learning_rate": 6.32295274086992e-05, "loss": 11.8689, "step": 23185 }, { "epoch": 1.2625703647737097, "grad_norm": 0.512827670829193, "learning_rate": 6.322132707777433e-05, "loss": 11.971, "step": 23186 }, { "epoch": 1.2626248187702926, "grad_norm": 0.5207663536707516, "learning_rate": 6.321312703284658e-05, "loss": 11.8929, "step": 23187 }, { "epoch": 1.2626792727668756, "grad_norm": 0.5155169795566987, "learning_rate": 6.320492727397974e-05, "loss": 11.9738, "step": 23188 }, { "epoch": 1.2627337267634586, "grad_norm": 0.5314580142077816, "learning_rate": 6.319672780123755e-05, "loss": 11.9145, "step": 23189 }, { "epoch": 1.2627881807600416, "grad_norm": 0.5421368749543253, "learning_rate": 6.318852861468378e-05, "loss": 12.0622, "step": 23190 }, { "epoch": 1.2628426347566246, "grad_norm": 0.5179067735758253, "learning_rate": 6.31803297143822e-05, "loss": 11.9269, "step": 23191 }, { "epoch": 1.2628970887532076, "grad_norm": 0.5736250693930064, "learning_rate": 6.317213110039651e-05, "loss": 11.7545, "step": 23192 }, { "epoch": 1.2629515427497906, "grad_norm": 0.5697848875943401, "learning_rate": 6.316393277279053e-05, "loss": 12.0713, "step": 23193 }, { "epoch": 1.2630059967463736, "grad_norm": 0.4869568018442, "learning_rate": 6.315573473162797e-05, "loss": 11.9277, "step": 23194 }, { "epoch": 1.2630604507429566, "grad_norm": 0.5616724562357337, "learning_rate": 6.314753697697258e-05, "loss": 12.0319, "step": 23195 }, { "epoch": 1.2631149047395398, "grad_norm": 0.5267218874973129, "learning_rate": 6.313933950888815e-05, "loss": 12.0089, "step": 23196 }, { "epoch": 1.2631693587361228, "grad_norm": 0.5520135585587579, "learning_rate": 6.31311423274384e-05, "loss": 12.0212, "step": 23197 }, { "epoch": 1.2632238127327058, "grad_norm": 0.5351385629875579, "learning_rate": 6.312294543268706e-05, "loss": 11.7936, "step": 23198 }, { "epoch": 1.2632782667292888, "grad_norm": 0.5610677188971568, "learning_rate": 6.31147488246979e-05, "loss": 11.9184, "step": 23199 }, { "epoch": 1.2633327207258718, "grad_norm": 0.5747318834051146, "learning_rate": 6.310655250353464e-05, "loss": 11.9075, "step": 23200 }, { "epoch": 1.2633871747224548, "grad_norm": 0.6582293748444622, "learning_rate": 6.3098356469261e-05, "loss": 12.006, "step": 23201 }, { "epoch": 1.2634416287190378, "grad_norm": 0.5305480900275891, "learning_rate": 6.309016072194071e-05, "loss": 11.9112, "step": 23202 }, { "epoch": 1.2634960827156207, "grad_norm": 0.5357069806269253, "learning_rate": 6.308196526163755e-05, "loss": 11.9746, "step": 23203 }, { "epoch": 1.2635505367122037, "grad_norm": 0.5747193317278483, "learning_rate": 6.30737700884152e-05, "loss": 12.0809, "step": 23204 }, { "epoch": 1.263604990708787, "grad_norm": 0.5651868305719783, "learning_rate": 6.306557520233741e-05, "loss": 11.8314, "step": 23205 }, { "epoch": 1.26365944470537, "grad_norm": 0.5116818528225225, "learning_rate": 6.305738060346788e-05, "loss": 11.9867, "step": 23206 }, { "epoch": 1.263713898701953, "grad_norm": 0.6191881391424359, "learning_rate": 6.304918629187037e-05, "loss": 12.0998, "step": 23207 }, { "epoch": 1.263768352698536, "grad_norm": 0.5084801937641094, "learning_rate": 6.30409922676086e-05, "loss": 11.8947, "step": 23208 }, { "epoch": 1.263822806695119, "grad_norm": 0.5234002530945163, "learning_rate": 6.303279853074626e-05, "loss": 11.805, "step": 23209 }, { "epoch": 1.263877260691702, "grad_norm": 0.5706221736923324, "learning_rate": 6.302460508134711e-05, "loss": 11.9956, "step": 23210 }, { "epoch": 1.263931714688285, "grad_norm": 0.5396563639855002, "learning_rate": 6.30164119194748e-05, "loss": 11.9397, "step": 23211 }, { "epoch": 1.263986168684868, "grad_norm": 0.6354073308334479, "learning_rate": 6.300821904519308e-05, "loss": 12.1244, "step": 23212 }, { "epoch": 1.264040622681451, "grad_norm": 0.5845475176699747, "learning_rate": 6.300002645856566e-05, "loss": 11.9064, "step": 23213 }, { "epoch": 1.264095076678034, "grad_norm": 0.512881774459477, "learning_rate": 6.299183415965622e-05, "loss": 11.9788, "step": 23214 }, { "epoch": 1.2641495306746169, "grad_norm": 0.5672581737919049, "learning_rate": 6.298364214852849e-05, "loss": 11.9306, "step": 23215 }, { "epoch": 1.2642039846711999, "grad_norm": 0.6185248005041978, "learning_rate": 6.297545042524617e-05, "loss": 11.9911, "step": 23216 }, { "epoch": 1.2642584386677829, "grad_norm": 0.579008583626923, "learning_rate": 6.296725898987292e-05, "loss": 11.9292, "step": 23217 }, { "epoch": 1.2643128926643659, "grad_norm": 0.5554296521119939, "learning_rate": 6.295906784247252e-05, "loss": 12.0232, "step": 23218 }, { "epoch": 1.264367346660949, "grad_norm": 0.6114492731324929, "learning_rate": 6.295087698310861e-05, "loss": 11.9058, "step": 23219 }, { "epoch": 1.264421800657532, "grad_norm": 0.5274101672933743, "learning_rate": 6.294268641184493e-05, "loss": 11.9656, "step": 23220 }, { "epoch": 1.264476254654115, "grad_norm": 0.5238667204154219, "learning_rate": 6.293449612874508e-05, "loss": 11.8131, "step": 23221 }, { "epoch": 1.264530708650698, "grad_norm": 0.5434777512564144, "learning_rate": 6.292630613387282e-05, "loss": 11.9128, "step": 23222 }, { "epoch": 1.264585162647281, "grad_norm": 0.5432222398151255, "learning_rate": 6.291811642729182e-05, "loss": 11.8331, "step": 23223 }, { "epoch": 1.264639616643864, "grad_norm": 0.5524449538973034, "learning_rate": 6.290992700906577e-05, "loss": 11.972, "step": 23224 }, { "epoch": 1.264694070640447, "grad_norm": 0.5445788443924797, "learning_rate": 6.290173787925835e-05, "loss": 11.9811, "step": 23225 }, { "epoch": 1.26474852463703, "grad_norm": 0.5215192081338217, "learning_rate": 6.289354903793324e-05, "loss": 11.9049, "step": 23226 }, { "epoch": 1.264802978633613, "grad_norm": 0.5496417411242858, "learning_rate": 6.28853604851541e-05, "loss": 11.9841, "step": 23227 }, { "epoch": 1.2648574326301962, "grad_norm": 0.5736958780394876, "learning_rate": 6.287717222098464e-05, "loss": 12.0088, "step": 23228 }, { "epoch": 1.2649118866267792, "grad_norm": 0.5553454283394794, "learning_rate": 6.286898424548848e-05, "loss": 11.9605, "step": 23229 }, { "epoch": 1.2649663406233622, "grad_norm": 0.49554090269372214, "learning_rate": 6.286079655872938e-05, "loss": 11.9359, "step": 23230 }, { "epoch": 1.2650207946199452, "grad_norm": 0.5995344148429932, "learning_rate": 6.285260916077093e-05, "loss": 11.8937, "step": 23231 }, { "epoch": 1.2650752486165282, "grad_norm": 0.5470788013975341, "learning_rate": 6.284442205167681e-05, "loss": 12.0121, "step": 23232 }, { "epoch": 1.2651297026131112, "grad_norm": 0.5481015507073892, "learning_rate": 6.283623523151068e-05, "loss": 11.9989, "step": 23233 }, { "epoch": 1.2651841566096942, "grad_norm": 0.6058346904021367, "learning_rate": 6.282804870033623e-05, "loss": 11.7442, "step": 23234 }, { "epoch": 1.2652386106062772, "grad_norm": 0.5694201142122483, "learning_rate": 6.281986245821712e-05, "loss": 12.051, "step": 23235 }, { "epoch": 1.2652930646028602, "grad_norm": 0.5331388845783178, "learning_rate": 6.281167650521699e-05, "loss": 11.9646, "step": 23236 }, { "epoch": 1.2653475185994432, "grad_norm": 0.5898842206056572, "learning_rate": 6.28034908413995e-05, "loss": 12.0569, "step": 23237 }, { "epoch": 1.2654019725960262, "grad_norm": 0.5033871143616334, "learning_rate": 6.27953054668283e-05, "loss": 11.8935, "step": 23238 }, { "epoch": 1.2654564265926092, "grad_norm": 0.5025364947010356, "learning_rate": 6.278712038156704e-05, "loss": 11.8969, "step": 23239 }, { "epoch": 1.2655108805891921, "grad_norm": 0.5339284807788707, "learning_rate": 6.27789355856794e-05, "loss": 11.727, "step": 23240 }, { "epoch": 1.2655653345857751, "grad_norm": 0.5831331266534203, "learning_rate": 6.277075107922899e-05, "loss": 11.9637, "step": 23241 }, { "epoch": 1.2656197885823581, "grad_norm": 0.5602101162288189, "learning_rate": 6.276256686227944e-05, "loss": 11.963, "step": 23242 }, { "epoch": 1.2656742425789413, "grad_norm": 0.6348367389306971, "learning_rate": 6.275438293489442e-05, "loss": 12.0841, "step": 23243 }, { "epoch": 1.2657286965755243, "grad_norm": 0.5662549810181848, "learning_rate": 6.274619929713755e-05, "loss": 11.9062, "step": 23244 }, { "epoch": 1.2657831505721073, "grad_norm": 0.5880688167587791, "learning_rate": 6.273801594907249e-05, "loss": 12.0878, "step": 23245 }, { "epoch": 1.2658376045686903, "grad_norm": 0.5658460549330208, "learning_rate": 6.272983289076288e-05, "loss": 12.0015, "step": 23246 }, { "epoch": 1.2658920585652733, "grad_norm": 0.5288370428761704, "learning_rate": 6.272165012227235e-05, "loss": 11.8035, "step": 23247 }, { "epoch": 1.2659465125618563, "grad_norm": 0.5091781328094663, "learning_rate": 6.271346764366451e-05, "loss": 12.032, "step": 23248 }, { "epoch": 1.2660009665584393, "grad_norm": 0.6019770426441612, "learning_rate": 6.270528545500298e-05, "loss": 11.9276, "step": 23249 }, { "epoch": 1.2660554205550223, "grad_norm": 0.5592074457787859, "learning_rate": 6.269710355635145e-05, "loss": 11.8501, "step": 23250 }, { "epoch": 1.2661098745516055, "grad_norm": 0.5581442322272826, "learning_rate": 6.268892194777348e-05, "loss": 11.9014, "step": 23251 }, { "epoch": 1.2661643285481885, "grad_norm": 0.5455366191873362, "learning_rate": 6.268074062933269e-05, "loss": 11.9663, "step": 23252 }, { "epoch": 1.2662187825447715, "grad_norm": 0.6696918915359905, "learning_rate": 6.267255960109273e-05, "loss": 11.9712, "step": 23253 }, { "epoch": 1.2662732365413545, "grad_norm": 0.5379251392148051, "learning_rate": 6.26643788631172e-05, "loss": 12.0345, "step": 23254 }, { "epoch": 1.2663276905379375, "grad_norm": 0.5931605972582454, "learning_rate": 6.26561984154697e-05, "loss": 12.0742, "step": 23255 }, { "epoch": 1.2663821445345205, "grad_norm": 0.5017334888256585, "learning_rate": 6.26480182582139e-05, "loss": 11.9426, "step": 23256 }, { "epoch": 1.2664365985311035, "grad_norm": 0.5068977268219558, "learning_rate": 6.263983839141335e-05, "loss": 11.9662, "step": 23257 }, { "epoch": 1.2664910525276865, "grad_norm": 0.5356184965527556, "learning_rate": 6.26316588151317e-05, "loss": 11.7663, "step": 23258 }, { "epoch": 1.2665455065242694, "grad_norm": 0.6047985115412942, "learning_rate": 6.262347952943253e-05, "loss": 11.9585, "step": 23259 }, { "epoch": 1.2665999605208524, "grad_norm": 0.5935450485177922, "learning_rate": 6.261530053437946e-05, "loss": 12.0218, "step": 23260 }, { "epoch": 1.2666544145174354, "grad_norm": 0.5792616729946286, "learning_rate": 6.26071218300361e-05, "loss": 11.942, "step": 23261 }, { "epoch": 1.2667088685140184, "grad_norm": 0.4911537276124451, "learning_rate": 6.2598943416466e-05, "loss": 11.8469, "step": 23262 }, { "epoch": 1.2667633225106014, "grad_norm": 0.5888120572545132, "learning_rate": 6.25907652937328e-05, "loss": 12.0768, "step": 23263 }, { "epoch": 1.2668177765071844, "grad_norm": 0.5842572999363864, "learning_rate": 6.258258746190008e-05, "loss": 11.8555, "step": 23264 }, { "epoch": 1.2668722305037674, "grad_norm": 0.5770417131764191, "learning_rate": 6.257440992103143e-05, "loss": 11.9088, "step": 23265 }, { "epoch": 1.2669266845003506, "grad_norm": 0.5123296797440976, "learning_rate": 6.256623267119043e-05, "loss": 11.9103, "step": 23266 }, { "epoch": 1.2669811384969336, "grad_norm": 0.5641928636700514, "learning_rate": 6.25580557124407e-05, "loss": 11.9741, "step": 23267 }, { "epoch": 1.2670355924935166, "grad_norm": 0.5655096911419298, "learning_rate": 6.25498790448458e-05, "loss": 12.0109, "step": 23268 }, { "epoch": 1.2670900464900996, "grad_norm": 0.5428229007565369, "learning_rate": 6.254170266846933e-05, "loss": 11.9479, "step": 23269 }, { "epoch": 1.2671445004866826, "grad_norm": 0.5706404677280447, "learning_rate": 6.253352658337487e-05, "loss": 12.0651, "step": 23270 }, { "epoch": 1.2671989544832656, "grad_norm": 0.5262681530829951, "learning_rate": 6.2525350789626e-05, "loss": 11.8401, "step": 23271 }, { "epoch": 1.2672534084798486, "grad_norm": 0.6176674300786359, "learning_rate": 6.251717528728627e-05, "loss": 11.9482, "step": 23272 }, { "epoch": 1.2673078624764316, "grad_norm": 0.5983821018764575, "learning_rate": 6.250900007641927e-05, "loss": 11.9959, "step": 23273 }, { "epoch": 1.2673623164730146, "grad_norm": 0.5273196446127435, "learning_rate": 6.250082515708857e-05, "loss": 11.8718, "step": 23274 }, { "epoch": 1.2674167704695978, "grad_norm": 0.5228535750345598, "learning_rate": 6.249265052935774e-05, "loss": 11.754, "step": 23275 }, { "epoch": 1.2674712244661808, "grad_norm": 0.5031082126493903, "learning_rate": 6.248447619329036e-05, "loss": 11.9304, "step": 23276 }, { "epoch": 1.2675256784627638, "grad_norm": 0.5775344306743875, "learning_rate": 6.247630214894995e-05, "loss": 11.9764, "step": 23277 }, { "epoch": 1.2675801324593468, "grad_norm": 0.5511462710066333, "learning_rate": 6.246812839640013e-05, "loss": 12.0393, "step": 23278 }, { "epoch": 1.2676345864559297, "grad_norm": 0.5546271233692556, "learning_rate": 6.245995493570445e-05, "loss": 11.8554, "step": 23279 }, { "epoch": 1.2676890404525127, "grad_norm": 0.5542476229734516, "learning_rate": 6.245178176692645e-05, "loss": 11.9874, "step": 23280 }, { "epoch": 1.2677434944490957, "grad_norm": 0.5601960937182989, "learning_rate": 6.244360889012973e-05, "loss": 11.974, "step": 23281 }, { "epoch": 1.2677979484456787, "grad_norm": 0.536459073035721, "learning_rate": 6.243543630537775e-05, "loss": 12.0649, "step": 23282 }, { "epoch": 1.2678524024422617, "grad_norm": 0.5326910175065769, "learning_rate": 6.242726401273414e-05, "loss": 11.9427, "step": 23283 }, { "epoch": 1.2679068564388447, "grad_norm": 0.6847770682281887, "learning_rate": 6.241909201226242e-05, "loss": 12.0452, "step": 23284 }, { "epoch": 1.2679613104354277, "grad_norm": 0.4851762226565081, "learning_rate": 6.241092030402614e-05, "loss": 11.9293, "step": 23285 }, { "epoch": 1.2680157644320107, "grad_norm": 0.5076819760948074, "learning_rate": 6.240274888808883e-05, "loss": 12.0396, "step": 23286 }, { "epoch": 1.2680702184285937, "grad_norm": 0.5597168326451334, "learning_rate": 6.239457776451409e-05, "loss": 12.0031, "step": 23287 }, { "epoch": 1.2681246724251767, "grad_norm": 0.6254650905558556, "learning_rate": 6.238640693336539e-05, "loss": 11.8565, "step": 23288 }, { "epoch": 1.26817912642176, "grad_norm": 0.5411474727450071, "learning_rate": 6.237823639470628e-05, "loss": 11.9987, "step": 23289 }, { "epoch": 1.2682335804183429, "grad_norm": 0.5686613732412673, "learning_rate": 6.237006614860035e-05, "loss": 11.9573, "step": 23290 }, { "epoch": 1.2682880344149259, "grad_norm": 0.5273454910767795, "learning_rate": 6.236189619511113e-05, "loss": 12.0214, "step": 23291 }, { "epoch": 1.2683424884115089, "grad_norm": 0.5251569727347625, "learning_rate": 6.235372653430207e-05, "loss": 11.8177, "step": 23292 }, { "epoch": 1.2683969424080919, "grad_norm": 0.5271534590038707, "learning_rate": 6.234555716623672e-05, "loss": 11.8267, "step": 23293 }, { "epoch": 1.2684513964046749, "grad_norm": 0.6089298643908899, "learning_rate": 6.233738809097866e-05, "loss": 12.0401, "step": 23294 }, { "epoch": 1.2685058504012579, "grad_norm": 0.5535058687639198, "learning_rate": 6.23292193085914e-05, "loss": 12.0141, "step": 23295 }, { "epoch": 1.2685603043978408, "grad_norm": 0.57154260724941, "learning_rate": 6.232105081913841e-05, "loss": 11.9038, "step": 23296 }, { "epoch": 1.2686147583944238, "grad_norm": 0.5788602074094982, "learning_rate": 6.231288262268328e-05, "loss": 12.0593, "step": 23297 }, { "epoch": 1.268669212391007, "grad_norm": 0.5300962947667739, "learning_rate": 6.23047147192895e-05, "loss": 11.9953, "step": 23298 }, { "epoch": 1.26872366638759, "grad_norm": 0.6186935749669186, "learning_rate": 6.229654710902055e-05, "loss": 11.9587, "step": 23299 }, { "epoch": 1.268778120384173, "grad_norm": 0.5494811757785866, "learning_rate": 6.228837979193997e-05, "loss": 12.0647, "step": 23300 }, { "epoch": 1.268832574380756, "grad_norm": 0.5447030181196109, "learning_rate": 6.228021276811134e-05, "loss": 12.0203, "step": 23301 }, { "epoch": 1.268887028377339, "grad_norm": 0.5637450092619816, "learning_rate": 6.227204603759805e-05, "loss": 11.9965, "step": 23302 }, { "epoch": 1.268941482373922, "grad_norm": 0.5638522258794966, "learning_rate": 6.226387960046367e-05, "loss": 11.7628, "step": 23303 }, { "epoch": 1.268995936370505, "grad_norm": 0.5467926557255012, "learning_rate": 6.225571345677165e-05, "loss": 11.9061, "step": 23304 }, { "epoch": 1.269050390367088, "grad_norm": 0.5593070649001599, "learning_rate": 6.224754760658558e-05, "loss": 12.0445, "step": 23305 }, { "epoch": 1.269104844363671, "grad_norm": 0.5343540127281311, "learning_rate": 6.223938204996889e-05, "loss": 11.9997, "step": 23306 }, { "epoch": 1.269159298360254, "grad_norm": 0.5801851478240748, "learning_rate": 6.223121678698509e-05, "loss": 12.0018, "step": 23307 }, { "epoch": 1.269213752356837, "grad_norm": 0.5533776677930815, "learning_rate": 6.222305181769769e-05, "loss": 11.923, "step": 23308 }, { "epoch": 1.26926820635342, "grad_norm": 0.5789373933804217, "learning_rate": 6.221488714217019e-05, "loss": 12.0229, "step": 23309 }, { "epoch": 1.269322660350003, "grad_norm": 0.5536744500147756, "learning_rate": 6.220672276046604e-05, "loss": 11.883, "step": 23310 }, { "epoch": 1.269377114346586, "grad_norm": 0.5783446621668239, "learning_rate": 6.219855867264878e-05, "loss": 11.9313, "step": 23311 }, { "epoch": 1.269431568343169, "grad_norm": 0.5404254904073282, "learning_rate": 6.219039487878187e-05, "loss": 12.0112, "step": 23312 }, { "epoch": 1.2694860223397522, "grad_norm": 0.5262018222211831, "learning_rate": 6.218223137892876e-05, "loss": 11.8799, "step": 23313 }, { "epoch": 1.2695404763363352, "grad_norm": 0.540015923190593, "learning_rate": 6.217406817315297e-05, "loss": 11.9449, "step": 23314 }, { "epoch": 1.2695949303329181, "grad_norm": 0.5976045275009173, "learning_rate": 6.216590526151795e-05, "loss": 12.1262, "step": 23315 }, { "epoch": 1.2696493843295011, "grad_norm": 0.5368482700177268, "learning_rate": 6.215774264408723e-05, "loss": 11.9444, "step": 23316 }, { "epoch": 1.2697038383260841, "grad_norm": 0.5558777228267541, "learning_rate": 6.214958032092423e-05, "loss": 12.0168, "step": 23317 }, { "epoch": 1.2697582923226671, "grad_norm": 0.5077506313370604, "learning_rate": 6.214141829209245e-05, "loss": 11.9611, "step": 23318 }, { "epoch": 1.2698127463192501, "grad_norm": 0.5487927148954824, "learning_rate": 6.213325655765534e-05, "loss": 11.9458, "step": 23319 }, { "epoch": 1.2698672003158331, "grad_norm": 0.595899878814489, "learning_rate": 6.21250951176764e-05, "loss": 11.9533, "step": 23320 }, { "epoch": 1.2699216543124163, "grad_norm": 0.5506273459385893, "learning_rate": 6.211693397221908e-05, "loss": 11.7498, "step": 23321 }, { "epoch": 1.2699761083089993, "grad_norm": 0.538278177967009, "learning_rate": 6.210877312134679e-05, "loss": 11.9185, "step": 23322 }, { "epoch": 1.2700305623055823, "grad_norm": 0.5227067430337492, "learning_rate": 6.210061256512306e-05, "loss": 11.9251, "step": 23323 }, { "epoch": 1.2700850163021653, "grad_norm": 0.5872058249802069, "learning_rate": 6.209245230361131e-05, "loss": 11.9939, "step": 23324 }, { "epoch": 1.2701394702987483, "grad_norm": 0.524340618375271, "learning_rate": 6.208429233687503e-05, "loss": 12.0238, "step": 23325 }, { "epoch": 1.2701939242953313, "grad_norm": 0.5671513504042198, "learning_rate": 6.20761326649776e-05, "loss": 11.8688, "step": 23326 }, { "epoch": 1.2702483782919143, "grad_norm": 0.5589026914595644, "learning_rate": 6.206797328798257e-05, "loss": 12.0015, "step": 23327 }, { "epoch": 1.2703028322884973, "grad_norm": 0.5103027645540704, "learning_rate": 6.205981420595332e-05, "loss": 11.9565, "step": 23328 }, { "epoch": 1.2703572862850803, "grad_norm": 0.5709819143252606, "learning_rate": 6.205165541895334e-05, "loss": 11.9735, "step": 23329 }, { "epoch": 1.2704117402816633, "grad_norm": 0.6095574741860947, "learning_rate": 6.204349692704604e-05, "loss": 12.035, "step": 23330 }, { "epoch": 1.2704661942782463, "grad_norm": 0.5646974626640744, "learning_rate": 6.20353387302949e-05, "loss": 11.8901, "step": 23331 }, { "epoch": 1.2705206482748292, "grad_norm": 0.5210950415666447, "learning_rate": 6.20271808287633e-05, "loss": 12.0456, "step": 23332 }, { "epoch": 1.2705751022714122, "grad_norm": 0.5811755441916826, "learning_rate": 6.201902322251471e-05, "loss": 11.9576, "step": 23333 }, { "epoch": 1.2706295562679952, "grad_norm": 0.5897526476423185, "learning_rate": 6.201086591161255e-05, "loss": 11.8529, "step": 23334 }, { "epoch": 1.2706840102645782, "grad_norm": 0.6421405117499367, "learning_rate": 6.200270889612029e-05, "loss": 12.0423, "step": 23335 }, { "epoch": 1.2707384642611614, "grad_norm": 0.5818935058785432, "learning_rate": 6.199455217610135e-05, "loss": 11.9691, "step": 23336 }, { "epoch": 1.2707929182577444, "grad_norm": 0.6004240477887385, "learning_rate": 6.198639575161914e-05, "loss": 11.946, "step": 23337 }, { "epoch": 1.2708473722543274, "grad_norm": 0.539171698349238, "learning_rate": 6.197823962273705e-05, "loss": 12.0062, "step": 23338 }, { "epoch": 1.2709018262509104, "grad_norm": 0.6258640458409044, "learning_rate": 6.197008378951858e-05, "loss": 11.7681, "step": 23339 }, { "epoch": 1.2709562802474934, "grad_norm": 0.5848841771074273, "learning_rate": 6.196192825202711e-05, "loss": 12.032, "step": 23340 }, { "epoch": 1.2710107342440764, "grad_norm": 0.5291610932441859, "learning_rate": 6.195377301032611e-05, "loss": 11.7731, "step": 23341 }, { "epoch": 1.2710651882406594, "grad_norm": 0.4886445273656309, "learning_rate": 6.19456180644789e-05, "loss": 11.9044, "step": 23342 }, { "epoch": 1.2711196422372424, "grad_norm": 0.5475517712648182, "learning_rate": 6.193746341454894e-05, "loss": 12.0123, "step": 23343 }, { "epoch": 1.2711740962338254, "grad_norm": 0.5115837751985072, "learning_rate": 6.192930906059966e-05, "loss": 11.8949, "step": 23344 }, { "epoch": 1.2712285502304086, "grad_norm": 0.5539929387810759, "learning_rate": 6.192115500269447e-05, "loss": 11.9995, "step": 23345 }, { "epoch": 1.2712830042269916, "grad_norm": 0.4909185350024668, "learning_rate": 6.191300124089675e-05, "loss": 11.916, "step": 23346 }, { "epoch": 1.2713374582235746, "grad_norm": 0.49260331961665, "learning_rate": 6.190484777526993e-05, "loss": 11.8449, "step": 23347 }, { "epoch": 1.2713919122201576, "grad_norm": 0.631466451668857, "learning_rate": 6.189669460587739e-05, "loss": 11.8119, "step": 23348 }, { "epoch": 1.2714463662167406, "grad_norm": 0.6089354795289456, "learning_rate": 6.188854173278254e-05, "loss": 12.0137, "step": 23349 }, { "epoch": 1.2715008202133236, "grad_norm": 0.6371413460409984, "learning_rate": 6.188038915604877e-05, "loss": 11.9834, "step": 23350 }, { "epoch": 1.2715552742099065, "grad_norm": 0.5769881919737412, "learning_rate": 6.187223687573956e-05, "loss": 11.9342, "step": 23351 }, { "epoch": 1.2716097282064895, "grad_norm": 0.6152241490133966, "learning_rate": 6.186408489191818e-05, "loss": 11.9962, "step": 23352 }, { "epoch": 1.2716641822030725, "grad_norm": 0.6116610121964597, "learning_rate": 6.185593320464805e-05, "loss": 11.973, "step": 23353 }, { "epoch": 1.2717186361996555, "grad_norm": 0.5256129458254312, "learning_rate": 6.184778181399258e-05, "loss": 11.8692, "step": 23354 }, { "epoch": 1.2717730901962385, "grad_norm": 0.4920780804838193, "learning_rate": 6.183963072001517e-05, "loss": 11.9996, "step": 23355 }, { "epoch": 1.2718275441928215, "grad_norm": 0.5127263926711845, "learning_rate": 6.18314799227792e-05, "loss": 11.9083, "step": 23356 }, { "epoch": 1.2718819981894045, "grad_norm": 0.6217008392542402, "learning_rate": 6.182332942234804e-05, "loss": 12.036, "step": 23357 }, { "epoch": 1.2719364521859875, "grad_norm": 0.5130976735521793, "learning_rate": 6.181517921878508e-05, "loss": 11.8585, "step": 23358 }, { "epoch": 1.2719909061825707, "grad_norm": 0.5202591226152474, "learning_rate": 6.180702931215367e-05, "loss": 11.9987, "step": 23359 }, { "epoch": 1.2720453601791537, "grad_norm": 0.6111920306799264, "learning_rate": 6.17988797025172e-05, "loss": 12.0882, "step": 23360 }, { "epoch": 1.2720998141757367, "grad_norm": 0.6293271982113089, "learning_rate": 6.17907303899391e-05, "loss": 11.9575, "step": 23361 }, { "epoch": 1.2721542681723197, "grad_norm": 0.5099706333577905, "learning_rate": 6.178258137448265e-05, "loss": 12.0043, "step": 23362 }, { "epoch": 1.2722087221689027, "grad_norm": 0.6005071181171349, "learning_rate": 6.177443265621127e-05, "loss": 11.9809, "step": 23363 }, { "epoch": 1.2722631761654857, "grad_norm": 0.48955023102144846, "learning_rate": 6.176628423518827e-05, "loss": 11.8855, "step": 23364 }, { "epoch": 1.2723176301620687, "grad_norm": 0.5513402720154954, "learning_rate": 6.17581361114771e-05, "loss": 11.9205, "step": 23365 }, { "epoch": 1.2723720841586517, "grad_norm": 0.5271349303137202, "learning_rate": 6.174998828514106e-05, "loss": 11.9024, "step": 23366 }, { "epoch": 1.2724265381552347, "grad_norm": 0.5440387096455964, "learning_rate": 6.174184075624352e-05, "loss": 11.8205, "step": 23367 }, { "epoch": 1.2724809921518179, "grad_norm": 0.5316353551943467, "learning_rate": 6.173369352484786e-05, "loss": 11.9881, "step": 23368 }, { "epoch": 1.2725354461484009, "grad_norm": 0.5195114242919787, "learning_rate": 6.17255465910174e-05, "loss": 11.8855, "step": 23369 }, { "epoch": 1.2725899001449839, "grad_norm": 0.6067601046513386, "learning_rate": 6.171739995481551e-05, "loss": 12.0578, "step": 23370 }, { "epoch": 1.2726443541415668, "grad_norm": 0.5922350874741872, "learning_rate": 6.170925361630557e-05, "loss": 11.955, "step": 23371 }, { "epoch": 1.2726988081381498, "grad_norm": 0.523548089866187, "learning_rate": 6.170110757555088e-05, "loss": 11.9278, "step": 23372 }, { "epoch": 1.2727532621347328, "grad_norm": 0.5592176843160478, "learning_rate": 6.169296183261477e-05, "loss": 11.8571, "step": 23373 }, { "epoch": 1.2728077161313158, "grad_norm": 0.6185745649402246, "learning_rate": 6.168481638756064e-05, "loss": 12.0033, "step": 23374 }, { "epoch": 1.2728621701278988, "grad_norm": 0.5580615287086415, "learning_rate": 6.167667124045178e-05, "loss": 11.9415, "step": 23375 }, { "epoch": 1.2729166241244818, "grad_norm": 0.5315801320232145, "learning_rate": 6.166852639135156e-05, "loss": 11.8717, "step": 23376 }, { "epoch": 1.2729710781210648, "grad_norm": 0.5221864854326923, "learning_rate": 6.16603818403233e-05, "loss": 11.9166, "step": 23377 }, { "epoch": 1.2730255321176478, "grad_norm": 0.49489873083951413, "learning_rate": 6.165223758743037e-05, "loss": 11.9756, "step": 23378 }, { "epoch": 1.2730799861142308, "grad_norm": 0.5284182535982023, "learning_rate": 6.164409363273604e-05, "loss": 11.8718, "step": 23379 }, { "epoch": 1.2731344401108138, "grad_norm": 0.5700556655297687, "learning_rate": 6.163594997630369e-05, "loss": 11.7334, "step": 23380 }, { "epoch": 1.2731888941073968, "grad_norm": 0.5452737708328596, "learning_rate": 6.162780661819665e-05, "loss": 12.0551, "step": 23381 }, { "epoch": 1.27324334810398, "grad_norm": 0.5649806032124066, "learning_rate": 6.161966355847819e-05, "loss": 12.0111, "step": 23382 }, { "epoch": 1.273297802100563, "grad_norm": 0.5035865394764981, "learning_rate": 6.161152079721166e-05, "loss": 12.0424, "step": 23383 }, { "epoch": 1.273352256097146, "grad_norm": 0.5401260476633886, "learning_rate": 6.16033783344604e-05, "loss": 11.9739, "step": 23384 }, { "epoch": 1.273406710093729, "grad_norm": 0.5715111184080219, "learning_rate": 6.159523617028768e-05, "loss": 12.0213, "step": 23385 }, { "epoch": 1.273461164090312, "grad_norm": 0.656240407456829, "learning_rate": 6.158709430475687e-05, "loss": 11.9437, "step": 23386 }, { "epoch": 1.273515618086895, "grad_norm": 0.5331603550570894, "learning_rate": 6.157895273793123e-05, "loss": 11.8895, "step": 23387 }, { "epoch": 1.273570072083478, "grad_norm": 0.5571948358317733, "learning_rate": 6.15708114698741e-05, "loss": 11.9225, "step": 23388 }, { "epoch": 1.273624526080061, "grad_norm": 0.576153893707623, "learning_rate": 6.156267050064881e-05, "loss": 11.9145, "step": 23389 }, { "epoch": 1.273678980076644, "grad_norm": 0.580075660793452, "learning_rate": 6.155452983031862e-05, "loss": 11.9521, "step": 23390 }, { "epoch": 1.2737334340732271, "grad_norm": 0.5433488848620138, "learning_rate": 6.154638945894689e-05, "loss": 11.9118, "step": 23391 }, { "epoch": 1.2737878880698101, "grad_norm": 0.5541799106192558, "learning_rate": 6.153824938659684e-05, "loss": 12.0682, "step": 23392 }, { "epoch": 1.2738423420663931, "grad_norm": 0.8937971582716784, "learning_rate": 6.153010961333184e-05, "loss": 12.0163, "step": 23393 }, { "epoch": 1.2738967960629761, "grad_norm": 0.5796018149701783, "learning_rate": 6.152197013921515e-05, "loss": 11.8705, "step": 23394 }, { "epoch": 1.2739512500595591, "grad_norm": 0.5156819839981832, "learning_rate": 6.151383096431008e-05, "loss": 11.792, "step": 23395 }, { "epoch": 1.274005704056142, "grad_norm": 0.5247084374996511, "learning_rate": 6.150569208867989e-05, "loss": 11.9977, "step": 23396 }, { "epoch": 1.274060158052725, "grad_norm": 0.5045128890404776, "learning_rate": 6.149755351238791e-05, "loss": 11.734, "step": 23397 }, { "epoch": 1.274114612049308, "grad_norm": 0.5428727538934164, "learning_rate": 6.148941523549739e-05, "loss": 11.7835, "step": 23398 }, { "epoch": 1.274169066045891, "grad_norm": 0.5639829723044368, "learning_rate": 6.148127725807166e-05, "loss": 11.9327, "step": 23399 }, { "epoch": 1.274223520042474, "grad_norm": 0.5416806279693785, "learning_rate": 6.147313958017398e-05, "loss": 11.9288, "step": 23400 }, { "epoch": 1.274277974039057, "grad_norm": 0.5457388335316171, "learning_rate": 6.146500220186766e-05, "loss": 12.0199, "step": 23401 }, { "epoch": 1.27433242803564, "grad_norm": 0.5179846459378248, "learning_rate": 6.145686512321588e-05, "loss": 11.9786, "step": 23402 }, { "epoch": 1.274386882032223, "grad_norm": 0.5599616162200108, "learning_rate": 6.1448728344282e-05, "loss": 11.7671, "step": 23403 }, { "epoch": 1.274441336028806, "grad_norm": 0.5396860211024469, "learning_rate": 6.144059186512928e-05, "loss": 11.9588, "step": 23404 }, { "epoch": 1.274495790025389, "grad_norm": 0.5213496629497232, "learning_rate": 6.143245568582099e-05, "loss": 11.9472, "step": 23405 }, { "epoch": 1.2745502440219723, "grad_norm": 0.5279588003393272, "learning_rate": 6.142431980642039e-05, "loss": 11.8882, "step": 23406 }, { "epoch": 1.2746046980185552, "grad_norm": 0.555959952393999, "learning_rate": 6.141618422699074e-05, "loss": 11.8884, "step": 23407 }, { "epoch": 1.2746591520151382, "grad_norm": 0.566584123832805, "learning_rate": 6.14080489475953e-05, "loss": 11.8963, "step": 23408 }, { "epoch": 1.2747136060117212, "grad_norm": 0.5402824653181659, "learning_rate": 6.139991396829735e-05, "loss": 12.0018, "step": 23409 }, { "epoch": 1.2747680600083042, "grad_norm": 0.557910135170613, "learning_rate": 6.139177928916016e-05, "loss": 11.9609, "step": 23410 }, { "epoch": 1.2748225140048872, "grad_norm": 0.5543819070305888, "learning_rate": 6.138364491024696e-05, "loss": 11.8824, "step": 23411 }, { "epoch": 1.2748769680014702, "grad_norm": 0.5733588974557553, "learning_rate": 6.137551083162105e-05, "loss": 12.017, "step": 23412 }, { "epoch": 1.2749314219980532, "grad_norm": 0.48920246956431995, "learning_rate": 6.136737705334559e-05, "loss": 11.9517, "step": 23413 }, { "epoch": 1.2749858759946364, "grad_norm": 0.552769775479179, "learning_rate": 6.13592435754839e-05, "loss": 11.9836, "step": 23414 }, { "epoch": 1.2750403299912194, "grad_norm": 0.5824338554355197, "learning_rate": 6.135111039809922e-05, "loss": 11.9281, "step": 23415 }, { "epoch": 1.2750947839878024, "grad_norm": 0.5714441455621442, "learning_rate": 6.134297752125477e-05, "loss": 11.9724, "step": 23416 }, { "epoch": 1.2751492379843854, "grad_norm": 0.4878833198075495, "learning_rate": 6.133484494501382e-05, "loss": 11.9296, "step": 23417 }, { "epoch": 1.2752036919809684, "grad_norm": 0.5395389807250964, "learning_rate": 6.132671266943962e-05, "loss": 11.9294, "step": 23418 }, { "epoch": 1.2752581459775514, "grad_norm": 0.6499568272746115, "learning_rate": 6.131858069459537e-05, "loss": 12.0162, "step": 23419 }, { "epoch": 1.2753125999741344, "grad_norm": 0.5111275170025651, "learning_rate": 6.13104490205443e-05, "loss": 11.9132, "step": 23420 }, { "epoch": 1.2753670539707174, "grad_norm": 0.5512976507197156, "learning_rate": 6.130231764734968e-05, "loss": 11.8927, "step": 23421 }, { "epoch": 1.2754215079673004, "grad_norm": 0.5757738157915058, "learning_rate": 6.129418657507478e-05, "loss": 11.9277, "step": 23422 }, { "epoch": 1.2754759619638834, "grad_norm": 0.5713838120879103, "learning_rate": 6.128605580378273e-05, "loss": 11.8534, "step": 23423 }, { "epoch": 1.2755304159604663, "grad_norm": 0.5810350740042042, "learning_rate": 6.127792533353681e-05, "loss": 11.9139, "step": 23424 }, { "epoch": 1.2755848699570493, "grad_norm": 0.5520984846109018, "learning_rate": 6.126979516440021e-05, "loss": 11.8544, "step": 23425 }, { "epoch": 1.2756393239536323, "grad_norm": 0.6195542466138565, "learning_rate": 6.12616652964362e-05, "loss": 12.0248, "step": 23426 }, { "epoch": 1.2756937779502153, "grad_norm": 0.6065463181157293, "learning_rate": 6.125353572970798e-05, "loss": 12.0263, "step": 23427 }, { "epoch": 1.2757482319467983, "grad_norm": 0.6022890840270189, "learning_rate": 6.124540646427877e-05, "loss": 11.9128, "step": 23428 }, { "epoch": 1.2758026859433815, "grad_norm": 0.5505979578393485, "learning_rate": 6.123727750021177e-05, "loss": 11.9789, "step": 23429 }, { "epoch": 1.2758571399399645, "grad_norm": 0.5313454778922818, "learning_rate": 6.122914883757019e-05, "loss": 11.8778, "step": 23430 }, { "epoch": 1.2759115939365475, "grad_norm": 0.5165311898873544, "learning_rate": 6.122102047641725e-05, "loss": 11.7385, "step": 23431 }, { "epoch": 1.2759660479331305, "grad_norm": 0.5619170786324981, "learning_rate": 6.121289241681621e-05, "loss": 12.0393, "step": 23432 }, { "epoch": 1.2760205019297135, "grad_norm": 0.5371501925842642, "learning_rate": 6.120476465883018e-05, "loss": 11.9275, "step": 23433 }, { "epoch": 1.2760749559262965, "grad_norm": 0.5539806629730009, "learning_rate": 6.11966372025224e-05, "loss": 11.9057, "step": 23434 }, { "epoch": 1.2761294099228795, "grad_norm": 0.5254273132834162, "learning_rate": 6.11885100479561e-05, "loss": 11.9598, "step": 23435 }, { "epoch": 1.2761838639194625, "grad_norm": 0.5256322934788941, "learning_rate": 6.118038319519441e-05, "loss": 11.8457, "step": 23436 }, { "epoch": 1.2762383179160455, "grad_norm": 0.5742182452294878, "learning_rate": 6.11722566443006e-05, "loss": 12.0103, "step": 23437 }, { "epoch": 1.2762927719126287, "grad_norm": 0.5619782065172815, "learning_rate": 6.116413039533784e-05, "loss": 11.9429, "step": 23438 }, { "epoch": 1.2763472259092117, "grad_norm": 0.5912072790665202, "learning_rate": 6.11560044483693e-05, "loss": 11.9045, "step": 23439 }, { "epoch": 1.2764016799057947, "grad_norm": 0.640642372825217, "learning_rate": 6.114787880345818e-05, "loss": 11.9004, "step": 23440 }, { "epoch": 1.2764561339023777, "grad_norm": 0.529558915606228, "learning_rate": 6.113975346066769e-05, "loss": 11.945, "step": 23441 }, { "epoch": 1.2765105878989607, "grad_norm": 0.5491331705510023, "learning_rate": 6.1131628420061e-05, "loss": 11.865, "step": 23442 }, { "epoch": 1.2765650418955437, "grad_norm": 0.5521629736406414, "learning_rate": 6.112350368170128e-05, "loss": 11.9299, "step": 23443 }, { "epoch": 1.2766194958921266, "grad_norm": 0.5413581163713285, "learning_rate": 6.11153792456517e-05, "loss": 11.999, "step": 23444 }, { "epoch": 1.2766739498887096, "grad_norm": 0.5127209632085405, "learning_rate": 6.110725511197546e-05, "loss": 11.9061, "step": 23445 }, { "epoch": 1.2767284038852926, "grad_norm": 0.5368385954467395, "learning_rate": 6.109913128073572e-05, "loss": 11.9514, "step": 23446 }, { "epoch": 1.2767828578818756, "grad_norm": 0.5673990511629734, "learning_rate": 6.109100775199565e-05, "loss": 12.0544, "step": 23447 }, { "epoch": 1.2768373118784586, "grad_norm": 0.5432429153901958, "learning_rate": 6.108288452581844e-05, "loss": 11.9729, "step": 23448 }, { "epoch": 1.2768917658750416, "grad_norm": 0.531720712228885, "learning_rate": 6.107476160226725e-05, "loss": 12.0446, "step": 23449 }, { "epoch": 1.2769462198716246, "grad_norm": 0.5586792284313403, "learning_rate": 6.106663898140524e-05, "loss": 12.2062, "step": 23450 }, { "epoch": 1.2770006738682076, "grad_norm": 0.5147979941718789, "learning_rate": 6.105851666329557e-05, "loss": 11.9341, "step": 23451 }, { "epoch": 1.2770551278647908, "grad_norm": 0.5359541791723327, "learning_rate": 6.105039464800143e-05, "loss": 11.8957, "step": 23452 }, { "epoch": 1.2771095818613738, "grad_norm": 0.5456329212624745, "learning_rate": 6.104227293558593e-05, "loss": 11.9419, "step": 23453 }, { "epoch": 1.2771640358579568, "grad_norm": 0.692260373025588, "learning_rate": 6.103415152611225e-05, "loss": 12.0249, "step": 23454 }, { "epoch": 1.2772184898545398, "grad_norm": 0.5121736932820726, "learning_rate": 6.1026030419643544e-05, "loss": 11.8515, "step": 23455 }, { "epoch": 1.2772729438511228, "grad_norm": 0.534931595755875, "learning_rate": 6.1017909616242966e-05, "loss": 11.9126, "step": 23456 }, { "epoch": 1.2773273978477058, "grad_norm": 0.6172589535524825, "learning_rate": 6.100978911597366e-05, "loss": 11.936, "step": 23457 }, { "epoch": 1.2773818518442888, "grad_norm": 0.5175136620150501, "learning_rate": 6.1001668918898734e-05, "loss": 11.8191, "step": 23458 }, { "epoch": 1.2774363058408718, "grad_norm": 0.5067862787806349, "learning_rate": 6.099354902508141e-05, "loss": 11.9255, "step": 23459 }, { "epoch": 1.2774907598374547, "grad_norm": 0.5599109908899864, "learning_rate": 6.098542943458478e-05, "loss": 12.0136, "step": 23460 }, { "epoch": 1.277545213834038, "grad_norm": 0.5299940958146621, "learning_rate": 6.0977310147472e-05, "loss": 11.8644, "step": 23461 }, { "epoch": 1.277599667830621, "grad_norm": 0.5700600588235133, "learning_rate": 6.096919116380622e-05, "loss": 11.9795, "step": 23462 }, { "epoch": 1.277654121827204, "grad_norm": 0.5638360644329294, "learning_rate": 6.0961072483650526e-05, "loss": 11.9326, "step": 23463 }, { "epoch": 1.277708575823787, "grad_norm": 0.6578693654611429, "learning_rate": 6.095295410706809e-05, "loss": 12.0909, "step": 23464 }, { "epoch": 1.27776302982037, "grad_norm": 0.533684078825207, "learning_rate": 6.094483603412203e-05, "loss": 11.9293, "step": 23465 }, { "epoch": 1.277817483816953, "grad_norm": 0.47515698786075217, "learning_rate": 6.093671826487547e-05, "loss": 11.9598, "step": 23466 }, { "epoch": 1.277871937813536, "grad_norm": 0.5637803768239379, "learning_rate": 6.092860079939154e-05, "loss": 12.0752, "step": 23467 }, { "epoch": 1.277926391810119, "grad_norm": 0.574273764631386, "learning_rate": 6.092048363773337e-05, "loss": 11.9568, "step": 23468 }, { "epoch": 1.277980845806702, "grad_norm": 0.522232082491125, "learning_rate": 6.0912366779964035e-05, "loss": 11.9929, "step": 23469 }, { "epoch": 1.278035299803285, "grad_norm": 0.5076968821147885, "learning_rate": 6.0904250226146734e-05, "loss": 11.9391, "step": 23470 }, { "epoch": 1.278089753799868, "grad_norm": 0.5842175089414539, "learning_rate": 6.0896133976344526e-05, "loss": 11.8999, "step": 23471 }, { "epoch": 1.2781442077964509, "grad_norm": 0.5444197913072716, "learning_rate": 6.0888018030620586e-05, "loss": 11.8363, "step": 23472 }, { "epoch": 1.2781986617930339, "grad_norm": 0.6741627689583077, "learning_rate": 6.0879902389037915e-05, "loss": 12.0643, "step": 23473 }, { "epoch": 1.2782531157896169, "grad_norm": 0.5012801147988735, "learning_rate": 6.087178705165969e-05, "loss": 11.8394, "step": 23474 }, { "epoch": 1.2783075697861999, "grad_norm": 0.5118881877419978, "learning_rate": 6.086367201854902e-05, "loss": 11.9598, "step": 23475 }, { "epoch": 1.278362023782783, "grad_norm": 0.564274403607142, "learning_rate": 6.085555728976899e-05, "loss": 11.8439, "step": 23476 }, { "epoch": 1.278416477779366, "grad_norm": 0.5169678361466552, "learning_rate": 6.084744286538273e-05, "loss": 11.8106, "step": 23477 }, { "epoch": 1.278470931775949, "grad_norm": 0.5286873537986246, "learning_rate": 6.08393287454533e-05, "loss": 11.986, "step": 23478 }, { "epoch": 1.278525385772532, "grad_norm": 0.5170143293378469, "learning_rate": 6.083121493004384e-05, "loss": 11.962, "step": 23479 }, { "epoch": 1.278579839769115, "grad_norm": 0.5290861828173461, "learning_rate": 6.082310141921739e-05, "loss": 11.9349, "step": 23480 }, { "epoch": 1.278634293765698, "grad_norm": 0.5574460050941807, "learning_rate": 6.0814988213037094e-05, "loss": 11.9356, "step": 23481 }, { "epoch": 1.278688747762281, "grad_norm": 0.7228777916774161, "learning_rate": 6.080687531156606e-05, "loss": 11.964, "step": 23482 }, { "epoch": 1.278743201758864, "grad_norm": 0.5450852058212383, "learning_rate": 6.079876271486729e-05, "loss": 11.9233, "step": 23483 }, { "epoch": 1.2787976557554472, "grad_norm": 0.5047807597759899, "learning_rate": 6.079065042300393e-05, "loss": 11.7822, "step": 23484 }, { "epoch": 1.2788521097520302, "grad_norm": 0.5826234685577726, "learning_rate": 6.0782538436039e-05, "loss": 11.8873, "step": 23485 }, { "epoch": 1.2789065637486132, "grad_norm": 0.4916929922109629, "learning_rate": 6.0774426754035685e-05, "loss": 11.9222, "step": 23486 }, { "epoch": 1.2789610177451962, "grad_norm": 0.5386901618287323, "learning_rate": 6.076631537705698e-05, "loss": 11.9255, "step": 23487 }, { "epoch": 1.2790154717417792, "grad_norm": 0.5870239611376995, "learning_rate": 6.075820430516599e-05, "loss": 12.0858, "step": 23488 }, { "epoch": 1.2790699257383622, "grad_norm": 0.4760090413465526, "learning_rate": 6.0750093538425804e-05, "loss": 11.8747, "step": 23489 }, { "epoch": 1.2791243797349452, "grad_norm": 0.5780090479800983, "learning_rate": 6.074198307689945e-05, "loss": 11.9545, "step": 23490 }, { "epoch": 1.2791788337315282, "grad_norm": 0.5252310941109722, "learning_rate": 6.0733872920650026e-05, "loss": 11.9644, "step": 23491 }, { "epoch": 1.2792332877281112, "grad_norm": 0.5351756410748505, "learning_rate": 6.0725763069740606e-05, "loss": 11.8312, "step": 23492 }, { "epoch": 1.2792877417246942, "grad_norm": 0.661062772247031, "learning_rate": 6.071765352423422e-05, "loss": 11.9972, "step": 23493 }, { "epoch": 1.2793421957212772, "grad_norm": 0.49745495292634456, "learning_rate": 6.070954428419395e-05, "loss": 11.8081, "step": 23494 }, { "epoch": 1.2793966497178602, "grad_norm": 0.5352121245957874, "learning_rate": 6.070143534968286e-05, "loss": 12.01, "step": 23495 }, { "epoch": 1.2794511037144431, "grad_norm": 0.5423569846107363, "learning_rate": 6.069332672076398e-05, "loss": 12.0703, "step": 23496 }, { "epoch": 1.2795055577110261, "grad_norm": 0.5854606957132008, "learning_rate": 6.068521839750039e-05, "loss": 11.911, "step": 23497 }, { "epoch": 1.2795600117076091, "grad_norm": 0.5446942177758455, "learning_rate": 6.067711037995514e-05, "loss": 11.9375, "step": 23498 }, { "epoch": 1.2796144657041923, "grad_norm": 0.6168174619334406, "learning_rate": 6.066900266819127e-05, "loss": 12.051, "step": 23499 }, { "epoch": 1.2796689197007753, "grad_norm": 0.6167618173501145, "learning_rate": 6.066089526227183e-05, "loss": 12.0398, "step": 23500 }, { "epoch": 1.2797233736973583, "grad_norm": 0.5456757129491338, "learning_rate": 6.065278816225988e-05, "loss": 11.9841, "step": 23501 }, { "epoch": 1.2797778276939413, "grad_norm": 0.5606503968145746, "learning_rate": 6.0644681368218457e-05, "loss": 11.976, "step": 23502 }, { "epoch": 1.2798322816905243, "grad_norm": 0.6417888688006007, "learning_rate": 6.0636574880210574e-05, "loss": 11.9576, "step": 23503 }, { "epoch": 1.2798867356871073, "grad_norm": 0.536462312518082, "learning_rate": 6.062846869829929e-05, "loss": 11.868, "step": 23504 }, { "epoch": 1.2799411896836903, "grad_norm": 0.4967739744330492, "learning_rate": 6.062036282254764e-05, "loss": 11.857, "step": 23505 }, { "epoch": 1.2799956436802733, "grad_norm": 0.5456281349254622, "learning_rate": 6.0612257253018646e-05, "loss": 12.0076, "step": 23506 }, { "epoch": 1.2800500976768563, "grad_norm": 0.5479946047060017, "learning_rate": 6.0604151989775316e-05, "loss": 11.9893, "step": 23507 }, { "epoch": 1.2801045516734395, "grad_norm": 0.5749185320388244, "learning_rate": 6.059604703288073e-05, "loss": 11.8828, "step": 23508 }, { "epoch": 1.2801590056700225, "grad_norm": 0.5171287018473631, "learning_rate": 6.05879423823979e-05, "loss": 11.8257, "step": 23509 }, { "epoch": 1.2802134596666055, "grad_norm": 0.6543870163652833, "learning_rate": 6.0579838038389826e-05, "loss": 11.9345, "step": 23510 }, { "epoch": 1.2802679136631885, "grad_norm": 0.49841729897115733, "learning_rate": 6.0571734000919554e-05, "loss": 11.9877, "step": 23511 }, { "epoch": 1.2803223676597715, "grad_norm": 0.5220794510578373, "learning_rate": 6.056363027005011e-05, "loss": 11.9932, "step": 23512 }, { "epoch": 1.2803768216563545, "grad_norm": 0.6025757275996647, "learning_rate": 6.055552684584447e-05, "loss": 11.9358, "step": 23513 }, { "epoch": 1.2804312756529375, "grad_norm": 0.5372976783892274, "learning_rate": 6.054742372836566e-05, "loss": 11.9075, "step": 23514 }, { "epoch": 1.2804857296495205, "grad_norm": 0.5521924338851903, "learning_rate": 6.0539320917676714e-05, "loss": 11.9101, "step": 23515 }, { "epoch": 1.2805401836461034, "grad_norm": 0.5861845568906207, "learning_rate": 6.0531218413840616e-05, "loss": 11.8921, "step": 23516 }, { "epoch": 1.2805946376426864, "grad_norm": 0.558562405022393, "learning_rate": 6.0523116216920374e-05, "loss": 11.8053, "step": 23517 }, { "epoch": 1.2806490916392694, "grad_norm": 0.517342550968619, "learning_rate": 6.0515014326978994e-05, "loss": 11.9603, "step": 23518 }, { "epoch": 1.2807035456358524, "grad_norm": 0.5778837259537967, "learning_rate": 6.05069127440795e-05, "loss": 11.9863, "step": 23519 }, { "epoch": 1.2807579996324354, "grad_norm": 0.5400045352346342, "learning_rate": 6.0498811468284876e-05, "loss": 11.9889, "step": 23520 }, { "epoch": 1.2808124536290184, "grad_norm": 0.5218498590662102, "learning_rate": 6.049071049965811e-05, "loss": 11.7868, "step": 23521 }, { "epoch": 1.2808669076256016, "grad_norm": 0.5186413862549286, "learning_rate": 6.048260983826224e-05, "loss": 12.0402, "step": 23522 }, { "epoch": 1.2809213616221846, "grad_norm": 0.5169947844750505, "learning_rate": 6.04745094841602e-05, "loss": 11.9166, "step": 23523 }, { "epoch": 1.2809758156187676, "grad_norm": 0.5430486455984835, "learning_rate": 6.0466409437414996e-05, "loss": 11.91, "step": 23524 }, { "epoch": 1.2810302696153506, "grad_norm": 0.5519218148989543, "learning_rate": 6.045830969808963e-05, "loss": 11.9521, "step": 23525 }, { "epoch": 1.2810847236119336, "grad_norm": 0.6091077701215787, "learning_rate": 6.045021026624707e-05, "loss": 11.9696, "step": 23526 }, { "epoch": 1.2811391776085166, "grad_norm": 0.5080581539476212, "learning_rate": 6.0442111141950306e-05, "loss": 11.9114, "step": 23527 }, { "epoch": 1.2811936316050996, "grad_norm": 0.5620295368624172, "learning_rate": 6.0434012325262336e-05, "loss": 11.9139, "step": 23528 }, { "epoch": 1.2812480856016826, "grad_norm": 0.5600735699797295, "learning_rate": 6.042591381624608e-05, "loss": 11.8936, "step": 23529 }, { "epoch": 1.2813025395982656, "grad_norm": 0.5644602481955032, "learning_rate": 6.041781561496458e-05, "loss": 11.8653, "step": 23530 }, { "epoch": 1.2813569935948488, "grad_norm": 0.5071749127778168, "learning_rate": 6.0409717721480796e-05, "loss": 11.93, "step": 23531 }, { "epoch": 1.2814114475914318, "grad_norm": 0.5515682328891715, "learning_rate": 6.040162013585772e-05, "loss": 11.8772, "step": 23532 }, { "epoch": 1.2814659015880148, "grad_norm": 0.5675425006480583, "learning_rate": 6.039352285815823e-05, "loss": 11.881, "step": 23533 }, { "epoch": 1.2815203555845978, "grad_norm": 0.5471934437434559, "learning_rate": 6.038542588844536e-05, "loss": 12.0435, "step": 23534 }, { "epoch": 1.2815748095811808, "grad_norm": 0.5565368557901031, "learning_rate": 6.037732922678206e-05, "loss": 11.98, "step": 23535 }, { "epoch": 1.2816292635777637, "grad_norm": 0.5070680059614715, "learning_rate": 6.036923287323131e-05, "loss": 11.7727, "step": 23536 }, { "epoch": 1.2816837175743467, "grad_norm": 0.553493808862405, "learning_rate": 6.0361136827856025e-05, "loss": 12.0505, "step": 23537 }, { "epoch": 1.2817381715709297, "grad_norm": 0.5608070685866844, "learning_rate": 6.0353041090719196e-05, "loss": 12.0352, "step": 23538 }, { "epoch": 1.2817926255675127, "grad_norm": 0.5491546897844471, "learning_rate": 6.034494566188378e-05, "loss": 11.9842, "step": 23539 }, { "epoch": 1.2818470795640957, "grad_norm": 0.5405475520046354, "learning_rate": 6.033685054141272e-05, "loss": 11.9077, "step": 23540 }, { "epoch": 1.2819015335606787, "grad_norm": 0.5240052472356858, "learning_rate": 6.0328755729368925e-05, "loss": 11.8298, "step": 23541 }, { "epoch": 1.2819559875572617, "grad_norm": 0.4917994796912182, "learning_rate": 6.032066122581545e-05, "loss": 11.8881, "step": 23542 }, { "epoch": 1.2820104415538447, "grad_norm": 0.5027360717309111, "learning_rate": 6.031256703081511e-05, "loss": 11.9357, "step": 23543 }, { "epoch": 1.2820648955504277, "grad_norm": 0.5170751853146255, "learning_rate": 6.0304473144430926e-05, "loss": 11.8487, "step": 23544 }, { "epoch": 1.2821193495470107, "grad_norm": 0.5896824757083037, "learning_rate": 6.0296379566725794e-05, "loss": 11.8997, "step": 23545 }, { "epoch": 1.282173803543594, "grad_norm": 0.5906842332292388, "learning_rate": 6.028828629776269e-05, "loss": 11.8785, "step": 23546 }, { "epoch": 1.2822282575401769, "grad_norm": 0.5480852917853959, "learning_rate": 6.028019333760452e-05, "loss": 11.8501, "step": 23547 }, { "epoch": 1.2822827115367599, "grad_norm": 0.5572639996001882, "learning_rate": 6.0272100686314234e-05, "loss": 11.9747, "step": 23548 }, { "epoch": 1.2823371655333429, "grad_norm": 0.5369151321080103, "learning_rate": 6.0264008343954757e-05, "loss": 11.894, "step": 23549 }, { "epoch": 1.2823916195299259, "grad_norm": 0.5091076563718924, "learning_rate": 6.025591631058901e-05, "loss": 11.925, "step": 23550 }, { "epoch": 1.2824460735265089, "grad_norm": 0.6046272320357335, "learning_rate": 6.0247824586279934e-05, "loss": 11.9582, "step": 23551 }, { "epoch": 1.2825005275230918, "grad_norm": 0.5397308357040135, "learning_rate": 6.023973317109042e-05, "loss": 11.9748, "step": 23552 }, { "epoch": 1.2825549815196748, "grad_norm": 0.5633565540476361, "learning_rate": 6.0231642065083446e-05, "loss": 11.9608, "step": 23553 }, { "epoch": 1.282609435516258, "grad_norm": 0.5766835774345143, "learning_rate": 6.0223551268321874e-05, "loss": 12.1016, "step": 23554 }, { "epoch": 1.282663889512841, "grad_norm": 0.5828712749179817, "learning_rate": 6.021546078086864e-05, "loss": 11.944, "step": 23555 }, { "epoch": 1.282718343509424, "grad_norm": 0.5719930660253305, "learning_rate": 6.0207370602786626e-05, "loss": 11.9348, "step": 23556 }, { "epoch": 1.282772797506007, "grad_norm": 0.4884218997155032, "learning_rate": 6.019928073413879e-05, "loss": 11.8262, "step": 23557 }, { "epoch": 1.28282725150259, "grad_norm": 0.5204375143460239, "learning_rate": 6.019119117498802e-05, "loss": 11.9146, "step": 23558 }, { "epoch": 1.282881705499173, "grad_norm": 0.5678449367458687, "learning_rate": 6.018310192539722e-05, "loss": 11.979, "step": 23559 }, { "epoch": 1.282936159495756, "grad_norm": 0.5339692036640276, "learning_rate": 6.0175012985429313e-05, "loss": 11.9012, "step": 23560 }, { "epoch": 1.282990613492339, "grad_norm": 0.5594405997241506, "learning_rate": 6.0166924355147166e-05, "loss": 11.9855, "step": 23561 }, { "epoch": 1.283045067488922, "grad_norm": 0.5583463392382488, "learning_rate": 6.015883603461372e-05, "loss": 11.9826, "step": 23562 }, { "epoch": 1.283099521485505, "grad_norm": 0.529926138611232, "learning_rate": 6.015074802389184e-05, "loss": 11.8863, "step": 23563 }, { "epoch": 1.283153975482088, "grad_norm": 0.5203733800981577, "learning_rate": 6.0142660323044434e-05, "loss": 11.8016, "step": 23564 }, { "epoch": 1.283208429478671, "grad_norm": 0.5748523056640842, "learning_rate": 6.0134572932134356e-05, "loss": 11.8061, "step": 23565 }, { "epoch": 1.283262883475254, "grad_norm": 0.5779499015108761, "learning_rate": 6.0126485851224534e-05, "loss": 11.9713, "step": 23566 }, { "epoch": 1.283317337471837, "grad_norm": 0.5401640476135271, "learning_rate": 6.011839908037783e-05, "loss": 11.9529, "step": 23567 }, { "epoch": 1.28337179146842, "grad_norm": 0.538671360492073, "learning_rate": 6.011031261965716e-05, "loss": 11.8232, "step": 23568 }, { "epoch": 1.2834262454650032, "grad_norm": 0.5492423700879108, "learning_rate": 6.010222646912539e-05, "loss": 11.7709, "step": 23569 }, { "epoch": 1.2834806994615862, "grad_norm": 0.5787543441007559, "learning_rate": 6.00941406288454e-05, "loss": 11.8725, "step": 23570 }, { "epoch": 1.2835351534581692, "grad_norm": 0.6230576628149644, "learning_rate": 6.0086055098880055e-05, "loss": 11.9372, "step": 23571 }, { "epoch": 1.2835896074547521, "grad_norm": 0.5521267877334615, "learning_rate": 6.007796987929225e-05, "loss": 11.9578, "step": 23572 }, { "epoch": 1.2836440614513351, "grad_norm": 0.5963668936277664, "learning_rate": 6.0069884970144865e-05, "loss": 11.9397, "step": 23573 }, { "epoch": 1.2836985154479181, "grad_norm": 0.5510056906815245, "learning_rate": 6.006180037150073e-05, "loss": 11.6585, "step": 23574 }, { "epoch": 1.2837529694445011, "grad_norm": 0.5483257883956336, "learning_rate": 6.005371608342272e-05, "loss": 11.8992, "step": 23575 }, { "epoch": 1.2838074234410841, "grad_norm": 0.5198488024735248, "learning_rate": 6.004563210597372e-05, "loss": 11.9707, "step": 23576 }, { "epoch": 1.283861877437667, "grad_norm": 0.5488059841016077, "learning_rate": 6.0037548439216596e-05, "loss": 11.9824, "step": 23577 }, { "epoch": 1.2839163314342503, "grad_norm": 0.5917836802938782, "learning_rate": 6.0029465083214166e-05, "loss": 11.8574, "step": 23578 }, { "epoch": 1.2839707854308333, "grad_norm": 0.5013777123048397, "learning_rate": 6.002138203802934e-05, "loss": 11.8769, "step": 23579 }, { "epoch": 1.2840252394274163, "grad_norm": 0.5305015348335042, "learning_rate": 6.0013299303724955e-05, "loss": 11.9521, "step": 23580 }, { "epoch": 1.2840796934239993, "grad_norm": 0.5501595583170282, "learning_rate": 6.0005216880363866e-05, "loss": 11.9786, "step": 23581 }, { "epoch": 1.2841341474205823, "grad_norm": 0.4893357817304148, "learning_rate": 5.99971347680089e-05, "loss": 11.8602, "step": 23582 }, { "epoch": 1.2841886014171653, "grad_norm": 0.5931966388465176, "learning_rate": 5.998905296672296e-05, "loss": 12.0242, "step": 23583 }, { "epoch": 1.2842430554137483, "grad_norm": 0.502447089109366, "learning_rate": 5.9980971476568825e-05, "loss": 11.9041, "step": 23584 }, { "epoch": 1.2842975094103313, "grad_norm": 0.5075812170370226, "learning_rate": 5.997289029760937e-05, "loss": 11.8359, "step": 23585 }, { "epoch": 1.2843519634069143, "grad_norm": 0.5448849983072174, "learning_rate": 5.996480942990743e-05, "loss": 11.8758, "step": 23586 }, { "epoch": 1.2844064174034973, "grad_norm": 0.553796465616994, "learning_rate": 5.995672887352586e-05, "loss": 11.9681, "step": 23587 }, { "epoch": 1.2844608714000803, "grad_norm": 0.5487255265970418, "learning_rate": 5.994864862852746e-05, "loss": 11.8359, "step": 23588 }, { "epoch": 1.2845153253966632, "grad_norm": 0.573686709985358, "learning_rate": 5.9940568694975096e-05, "loss": 11.9695, "step": 23589 }, { "epoch": 1.2845697793932462, "grad_norm": 0.5637612380220137, "learning_rate": 5.993248907293156e-05, "loss": 11.7809, "step": 23590 }, { "epoch": 1.2846242333898292, "grad_norm": 0.4951625100799867, "learning_rate": 5.9924409762459746e-05, "loss": 11.815, "step": 23591 }, { "epoch": 1.2846786873864124, "grad_norm": 0.5436142416323838, "learning_rate": 5.991633076362242e-05, "loss": 12.0503, "step": 23592 }, { "epoch": 1.2847331413829954, "grad_norm": 0.5323256001549563, "learning_rate": 5.990825207648247e-05, "loss": 12.0098, "step": 23593 }, { "epoch": 1.2847875953795784, "grad_norm": 0.5898551135137177, "learning_rate": 5.9900173701102634e-05, "loss": 11.9454, "step": 23594 }, { "epoch": 1.2848420493761614, "grad_norm": 0.4723549336831795, "learning_rate": 5.9892095637545784e-05, "loss": 11.9479, "step": 23595 }, { "epoch": 1.2848965033727444, "grad_norm": 0.4998330178812302, "learning_rate": 5.988401788587472e-05, "loss": 11.88, "step": 23596 }, { "epoch": 1.2849509573693274, "grad_norm": 0.5051779881653149, "learning_rate": 5.987594044615225e-05, "loss": 11.9079, "step": 23597 }, { "epoch": 1.2850054113659104, "grad_norm": 0.5245486970219934, "learning_rate": 5.986786331844122e-05, "loss": 11.889, "step": 23598 }, { "epoch": 1.2850598653624934, "grad_norm": 0.5615451894349467, "learning_rate": 5.98597865028044e-05, "loss": 11.7919, "step": 23599 }, { "epoch": 1.2851143193590764, "grad_norm": 0.4951813094482244, "learning_rate": 5.9851709999304615e-05, "loss": 12.0349, "step": 23600 }, { "epoch": 1.2851687733556596, "grad_norm": 0.5609478608796767, "learning_rate": 5.984363380800465e-05, "loss": 12.0273, "step": 23601 }, { "epoch": 1.2852232273522426, "grad_norm": 0.5654504929409612, "learning_rate": 5.9835557928967335e-05, "loss": 11.9728, "step": 23602 }, { "epoch": 1.2852776813488256, "grad_norm": 0.5180004123328328, "learning_rate": 5.982748236225551e-05, "loss": 11.9134, "step": 23603 }, { "epoch": 1.2853321353454086, "grad_norm": 0.5284728078042654, "learning_rate": 5.9819407107931856e-05, "loss": 11.8346, "step": 23604 }, { "epoch": 1.2853865893419916, "grad_norm": 0.5193660021771949, "learning_rate": 5.981133216605923e-05, "loss": 11.9533, "step": 23605 }, { "epoch": 1.2854410433385746, "grad_norm": 0.5128027865249251, "learning_rate": 5.9803257536700444e-05, "loss": 11.9465, "step": 23606 }, { "epoch": 1.2854954973351576, "grad_norm": 0.5292023147523693, "learning_rate": 5.979518321991826e-05, "loss": 11.9244, "step": 23607 }, { "epoch": 1.2855499513317405, "grad_norm": 0.5840133205707407, "learning_rate": 5.9787109215775484e-05, "loss": 11.9686, "step": 23608 }, { "epoch": 1.2856044053283235, "grad_norm": 0.5453332728022575, "learning_rate": 5.977903552433488e-05, "loss": 11.8874, "step": 23609 }, { "epoch": 1.2856588593249065, "grad_norm": 0.5694304844855177, "learning_rate": 5.9770962145659245e-05, "loss": 12.0776, "step": 23610 }, { "epoch": 1.2857133133214895, "grad_norm": 0.5776448964429606, "learning_rate": 5.9762889079811354e-05, "loss": 11.9809, "step": 23611 }, { "epoch": 1.2857677673180725, "grad_norm": 0.6017741977456825, "learning_rate": 5.975481632685396e-05, "loss": 11.8373, "step": 23612 }, { "epoch": 1.2858222213146555, "grad_norm": 0.5445144755282323, "learning_rate": 5.974674388684993e-05, "loss": 11.8613, "step": 23613 }, { "epoch": 1.2858766753112385, "grad_norm": 0.5497804284953607, "learning_rate": 5.973867175986193e-05, "loss": 11.9679, "step": 23614 }, { "epoch": 1.2859311293078217, "grad_norm": 0.5528742445411363, "learning_rate": 5.973059994595277e-05, "loss": 11.9977, "step": 23615 }, { "epoch": 1.2859855833044047, "grad_norm": 0.6365557765121772, "learning_rate": 5.97225284451852e-05, "loss": 11.8794, "step": 23616 }, { "epoch": 1.2860400373009877, "grad_norm": 0.5608041996493477, "learning_rate": 5.9714457257622016e-05, "loss": 12.0025, "step": 23617 }, { "epoch": 1.2860944912975707, "grad_norm": 0.4649658802703738, "learning_rate": 5.9706386383325976e-05, "loss": 11.8543, "step": 23618 }, { "epoch": 1.2861489452941537, "grad_norm": 0.5246046723573995, "learning_rate": 5.969831582235983e-05, "loss": 11.7959, "step": 23619 }, { "epoch": 1.2862033992907367, "grad_norm": 0.481864440579084, "learning_rate": 5.969024557478633e-05, "loss": 11.9564, "step": 23620 }, { "epoch": 1.2862578532873197, "grad_norm": 0.5037851902097632, "learning_rate": 5.9682175640668244e-05, "loss": 11.7368, "step": 23621 }, { "epoch": 1.2863123072839027, "grad_norm": 0.5100480487418915, "learning_rate": 5.967410602006833e-05, "loss": 11.8962, "step": 23622 }, { "epoch": 1.2863667612804857, "grad_norm": 0.5635853340470395, "learning_rate": 5.9666036713049356e-05, "loss": 11.7806, "step": 23623 }, { "epoch": 1.2864212152770689, "grad_norm": 0.5291879304710763, "learning_rate": 5.9657967719674015e-05, "loss": 11.9973, "step": 23624 }, { "epoch": 1.2864756692736519, "grad_norm": 0.5879224653664377, "learning_rate": 5.9649899040005085e-05, "loss": 11.9562, "step": 23625 }, { "epoch": 1.2865301232702349, "grad_norm": 0.4965507218558734, "learning_rate": 5.9641830674105294e-05, "loss": 11.877, "step": 23626 }, { "epoch": 1.2865845772668179, "grad_norm": 0.6443914294268037, "learning_rate": 5.9633762622037396e-05, "loss": 12.0468, "step": 23627 }, { "epoch": 1.2866390312634008, "grad_norm": 0.5358332881599275, "learning_rate": 5.962569488386413e-05, "loss": 11.9733, "step": 23628 }, { "epoch": 1.2866934852599838, "grad_norm": 0.4888460672702682, "learning_rate": 5.9617627459648253e-05, "loss": 11.755, "step": 23629 }, { "epoch": 1.2867479392565668, "grad_norm": 0.5055719708645405, "learning_rate": 5.960956034945245e-05, "loss": 11.8508, "step": 23630 }, { "epoch": 1.2868023932531498, "grad_norm": 0.5385345014820144, "learning_rate": 5.96014935533395e-05, "loss": 11.8001, "step": 23631 }, { "epoch": 1.2868568472497328, "grad_norm": 0.5515374747980404, "learning_rate": 5.95934270713721e-05, "loss": 11.989, "step": 23632 }, { "epoch": 1.2869113012463158, "grad_norm": 0.5364470059610076, "learning_rate": 5.958536090361302e-05, "loss": 11.8877, "step": 23633 }, { "epoch": 1.2869657552428988, "grad_norm": 0.6145460880505714, "learning_rate": 5.9577295050124926e-05, "loss": 12.0312, "step": 23634 }, { "epoch": 1.2870202092394818, "grad_norm": 0.6416609088364478, "learning_rate": 5.956922951097055e-05, "loss": 11.8751, "step": 23635 }, { "epoch": 1.2870746632360648, "grad_norm": 0.5836040120019552, "learning_rate": 5.956116428621263e-05, "loss": 11.9816, "step": 23636 }, { "epoch": 1.2871291172326478, "grad_norm": 0.5161969708347284, "learning_rate": 5.955309937591389e-05, "loss": 11.936, "step": 23637 }, { "epoch": 1.2871835712292308, "grad_norm": 0.5620302967688622, "learning_rate": 5.954503478013703e-05, "loss": 11.9001, "step": 23638 }, { "epoch": 1.287238025225814, "grad_norm": 0.6406360918722982, "learning_rate": 5.9536970498944745e-05, "loss": 11.8731, "step": 23639 }, { "epoch": 1.287292479222397, "grad_norm": 0.5256318177011917, "learning_rate": 5.9528906532399776e-05, "loss": 11.9375, "step": 23640 }, { "epoch": 1.28734693321898, "grad_norm": 0.5221750789978684, "learning_rate": 5.952084288056482e-05, "loss": 11.951, "step": 23641 }, { "epoch": 1.287401387215563, "grad_norm": 0.5805291395444275, "learning_rate": 5.951277954350259e-05, "loss": 11.9437, "step": 23642 }, { "epoch": 1.287455841212146, "grad_norm": 0.5745203857240743, "learning_rate": 5.950471652127578e-05, "loss": 11.9151, "step": 23643 }, { "epoch": 1.287510295208729, "grad_norm": 0.5976754697185809, "learning_rate": 5.949665381394707e-05, "loss": 12.0609, "step": 23644 }, { "epoch": 1.287564749205312, "grad_norm": 0.5771545026760481, "learning_rate": 5.948859142157917e-05, "loss": 11.8742, "step": 23645 }, { "epoch": 1.287619203201895, "grad_norm": 0.5279876863213004, "learning_rate": 5.948052934423478e-05, "loss": 11.8626, "step": 23646 }, { "epoch": 1.287673657198478, "grad_norm": 0.48604035686339353, "learning_rate": 5.947246758197658e-05, "loss": 11.9474, "step": 23647 }, { "epoch": 1.2877281111950611, "grad_norm": 0.6153703921357986, "learning_rate": 5.946440613486728e-05, "loss": 11.9791, "step": 23648 }, { "epoch": 1.2877825651916441, "grad_norm": 0.49441011159873605, "learning_rate": 5.945634500296955e-05, "loss": 11.8251, "step": 23649 }, { "epoch": 1.2878370191882271, "grad_norm": 0.5557404908758066, "learning_rate": 5.944828418634607e-05, "loss": 11.8788, "step": 23650 }, { "epoch": 1.2878914731848101, "grad_norm": 0.5616918914548961, "learning_rate": 5.9440223685059536e-05, "loss": 11.9618, "step": 23651 }, { "epoch": 1.2879459271813931, "grad_norm": 0.5308384410030225, "learning_rate": 5.9432163499172645e-05, "loss": 11.8682, "step": 23652 }, { "epoch": 1.288000381177976, "grad_norm": 0.5100486469580383, "learning_rate": 5.942410362874807e-05, "loss": 11.93, "step": 23653 }, { "epoch": 1.288054835174559, "grad_norm": 0.5627076991753851, "learning_rate": 5.941604407384842e-05, "loss": 11.8506, "step": 23654 }, { "epoch": 1.288109289171142, "grad_norm": 0.5809962828595369, "learning_rate": 5.940798483453645e-05, "loss": 12.1106, "step": 23655 }, { "epoch": 1.288163743167725, "grad_norm": 0.5761211825251635, "learning_rate": 5.939992591087478e-05, "loss": 11.9726, "step": 23656 }, { "epoch": 1.288218197164308, "grad_norm": 0.5176080305630002, "learning_rate": 5.939186730292611e-05, "loss": 11.832, "step": 23657 }, { "epoch": 1.288272651160891, "grad_norm": 0.5168934673239702, "learning_rate": 5.938380901075308e-05, "loss": 11.9764, "step": 23658 }, { "epoch": 1.288327105157474, "grad_norm": 0.5240142885989021, "learning_rate": 5.937575103441836e-05, "loss": 11.9576, "step": 23659 }, { "epoch": 1.288381559154057, "grad_norm": 0.5473455566039885, "learning_rate": 5.936769337398462e-05, "loss": 12.0432, "step": 23660 }, { "epoch": 1.28843601315064, "grad_norm": 0.533416332353287, "learning_rate": 5.935963602951449e-05, "loss": 11.8169, "step": 23661 }, { "epoch": 1.2884904671472233, "grad_norm": 0.5651284474850365, "learning_rate": 5.9351579001070655e-05, "loss": 12.0078, "step": 23662 }, { "epoch": 1.2885449211438063, "grad_norm": 0.6008388714869841, "learning_rate": 5.93435222887158e-05, "loss": 11.9991, "step": 23663 }, { "epoch": 1.2885993751403892, "grad_norm": 0.5592068078442981, "learning_rate": 5.933546589251251e-05, "loss": 11.9556, "step": 23664 }, { "epoch": 1.2886538291369722, "grad_norm": 0.6156076208952148, "learning_rate": 5.9327409812523424e-05, "loss": 11.9975, "step": 23665 }, { "epoch": 1.2887082831335552, "grad_norm": 0.5952681026970403, "learning_rate": 5.9319354048811237e-05, "loss": 11.9256, "step": 23666 }, { "epoch": 1.2887627371301382, "grad_norm": 0.5675388295157449, "learning_rate": 5.931129860143858e-05, "loss": 11.8337, "step": 23667 }, { "epoch": 1.2888171911267212, "grad_norm": 0.5549848730510045, "learning_rate": 5.9303243470468095e-05, "loss": 11.9471, "step": 23668 }, { "epoch": 1.2888716451233042, "grad_norm": 0.5394915762639385, "learning_rate": 5.929518865596241e-05, "loss": 11.9956, "step": 23669 }, { "epoch": 1.2889260991198872, "grad_norm": 0.5430149223289498, "learning_rate": 5.928713415798416e-05, "loss": 11.9668, "step": 23670 }, { "epoch": 1.2889805531164704, "grad_norm": 0.5864954645272095, "learning_rate": 5.927907997659598e-05, "loss": 11.9191, "step": 23671 }, { "epoch": 1.2890350071130534, "grad_norm": 0.5578452271628115, "learning_rate": 5.9271026111860486e-05, "loss": 11.8341, "step": 23672 }, { "epoch": 1.2890894611096364, "grad_norm": 0.5312293951369268, "learning_rate": 5.926297256384039e-05, "loss": 11.844, "step": 23673 }, { "epoch": 1.2891439151062194, "grad_norm": 0.5332665994177112, "learning_rate": 5.92549193325982e-05, "loss": 11.982, "step": 23674 }, { "epoch": 1.2891983691028024, "grad_norm": 0.5135015601352471, "learning_rate": 5.92468664181966e-05, "loss": 12.0161, "step": 23675 }, { "epoch": 1.2892528230993854, "grad_norm": 0.5983764378190438, "learning_rate": 5.923881382069818e-05, "loss": 11.7917, "step": 23676 }, { "epoch": 1.2893072770959684, "grad_norm": 0.5557263800671538, "learning_rate": 5.923076154016559e-05, "loss": 11.9336, "step": 23677 }, { "epoch": 1.2893617310925514, "grad_norm": 0.5779450898848552, "learning_rate": 5.922270957666145e-05, "loss": 12.007, "step": 23678 }, { "epoch": 1.2894161850891344, "grad_norm": 0.5638874342151753, "learning_rate": 5.921465793024834e-05, "loss": 11.9898, "step": 23679 }, { "epoch": 1.2894706390857174, "grad_norm": 0.5401848903416356, "learning_rate": 5.9206606600988904e-05, "loss": 12.068, "step": 23680 }, { "epoch": 1.2895250930823003, "grad_norm": 0.5063761932526026, "learning_rate": 5.919855558894574e-05, "loss": 11.9837, "step": 23681 }, { "epoch": 1.2895795470788833, "grad_norm": 0.5055177441847434, "learning_rate": 5.919050489418143e-05, "loss": 11.9499, "step": 23682 }, { "epoch": 1.2896340010754663, "grad_norm": 0.572799586197494, "learning_rate": 5.918245451675863e-05, "loss": 12.0118, "step": 23683 }, { "epoch": 1.2896884550720493, "grad_norm": 0.5243849526515628, "learning_rate": 5.9174404456739896e-05, "loss": 11.9704, "step": 23684 }, { "epoch": 1.2897429090686325, "grad_norm": 0.5067721466066325, "learning_rate": 5.9166354714187834e-05, "loss": 11.9171, "step": 23685 }, { "epoch": 1.2897973630652155, "grad_norm": 0.5837874081527563, "learning_rate": 5.915830528916504e-05, "loss": 11.9104, "step": 23686 }, { "epoch": 1.2898518170617985, "grad_norm": 0.6207793209349238, "learning_rate": 5.9150256181734085e-05, "loss": 11.9239, "step": 23687 }, { "epoch": 1.2899062710583815, "grad_norm": 0.5254442650792589, "learning_rate": 5.914220739195763e-05, "loss": 11.9123, "step": 23688 }, { "epoch": 1.2899607250549645, "grad_norm": 0.49713421448790956, "learning_rate": 5.913415891989821e-05, "loss": 11.9195, "step": 23689 }, { "epoch": 1.2900151790515475, "grad_norm": 0.534137301964781, "learning_rate": 5.912611076561843e-05, "loss": 11.7454, "step": 23690 }, { "epoch": 1.2900696330481305, "grad_norm": 0.4969835572591292, "learning_rate": 5.9118062929180854e-05, "loss": 11.985, "step": 23691 }, { "epoch": 1.2901240870447135, "grad_norm": 0.5544418764078004, "learning_rate": 5.911001541064809e-05, "loss": 11.9663, "step": 23692 }, { "epoch": 1.2901785410412965, "grad_norm": 0.601756892019909, "learning_rate": 5.9101968210082714e-05, "loss": 11.9447, "step": 23693 }, { "epoch": 1.2902329950378797, "grad_norm": 0.5287257994580373, "learning_rate": 5.909392132754727e-05, "loss": 11.8107, "step": 23694 }, { "epoch": 1.2902874490344627, "grad_norm": 0.5607373362066157, "learning_rate": 5.908587476310437e-05, "loss": 11.9615, "step": 23695 }, { "epoch": 1.2903419030310457, "grad_norm": 0.5465967799122933, "learning_rate": 5.907782851681656e-05, "loss": 11.9678, "step": 23696 }, { "epoch": 1.2903963570276287, "grad_norm": 0.572141024917695, "learning_rate": 5.9069782588746424e-05, "loss": 11.9509, "step": 23697 }, { "epoch": 1.2904508110242117, "grad_norm": 0.5471142728851983, "learning_rate": 5.9061736978956515e-05, "loss": 11.8992, "step": 23698 }, { "epoch": 1.2905052650207947, "grad_norm": 0.5580420481713758, "learning_rate": 5.9053691687509385e-05, "loss": 11.8991, "step": 23699 }, { "epoch": 1.2905597190173776, "grad_norm": 0.6032433260272864, "learning_rate": 5.904564671446763e-05, "loss": 11.9893, "step": 23700 }, { "epoch": 1.2906141730139606, "grad_norm": 0.5273241292063136, "learning_rate": 5.903760205989379e-05, "loss": 11.8925, "step": 23701 }, { "epoch": 1.2906686270105436, "grad_norm": 0.5173306857448301, "learning_rate": 5.902955772385045e-05, "loss": 12.0058, "step": 23702 }, { "epoch": 1.2907230810071266, "grad_norm": 0.6058203988463428, "learning_rate": 5.902151370640012e-05, "loss": 11.9569, "step": 23703 }, { "epoch": 1.2907775350037096, "grad_norm": 0.5844213336884051, "learning_rate": 5.90134700076054e-05, "loss": 12.0116, "step": 23704 }, { "epoch": 1.2908319890002926, "grad_norm": 0.5443874397600219, "learning_rate": 5.900542662752879e-05, "loss": 11.9553, "step": 23705 }, { "epoch": 1.2908864429968756, "grad_norm": 0.5647633404967047, "learning_rate": 5.899738356623286e-05, "loss": 11.9905, "step": 23706 }, { "epoch": 1.2909408969934586, "grad_norm": 0.501919395815344, "learning_rate": 5.8989340823780135e-05, "loss": 11.9122, "step": 23707 }, { "epoch": 1.2909953509900416, "grad_norm": 0.48842413756295855, "learning_rate": 5.898129840023319e-05, "loss": 11.8905, "step": 23708 }, { "epoch": 1.2910498049866248, "grad_norm": 0.6030129318770379, "learning_rate": 5.8973256295654546e-05, "loss": 11.9723, "step": 23709 }, { "epoch": 1.2911042589832078, "grad_norm": 0.5126006741755619, "learning_rate": 5.8965214510106715e-05, "loss": 11.9802, "step": 23710 }, { "epoch": 1.2911587129797908, "grad_norm": 0.6057647348014951, "learning_rate": 5.895717304365228e-05, "loss": 11.811, "step": 23711 }, { "epoch": 1.2912131669763738, "grad_norm": 0.5512818310963955, "learning_rate": 5.894913189635375e-05, "loss": 11.9218, "step": 23712 }, { "epoch": 1.2912676209729568, "grad_norm": 0.5559498739751749, "learning_rate": 5.894109106827366e-05, "loss": 11.8989, "step": 23713 }, { "epoch": 1.2913220749695398, "grad_norm": 0.5551262936257649, "learning_rate": 5.8933050559474535e-05, "loss": 12.003, "step": 23714 }, { "epoch": 1.2913765289661228, "grad_norm": 0.5136157492964187, "learning_rate": 5.89250103700189e-05, "loss": 12.0017, "step": 23715 }, { "epoch": 1.2914309829627058, "grad_norm": 0.531313460374404, "learning_rate": 5.891697049996925e-05, "loss": 11.9446, "step": 23716 }, { "epoch": 1.291485436959289, "grad_norm": 0.48825477336853895, "learning_rate": 5.890893094938814e-05, "loss": 11.9323, "step": 23717 }, { "epoch": 1.291539890955872, "grad_norm": 0.5101215598549473, "learning_rate": 5.890089171833808e-05, "loss": 11.972, "step": 23718 }, { "epoch": 1.291594344952455, "grad_norm": 0.5541828303361535, "learning_rate": 5.889285280688156e-05, "loss": 11.953, "step": 23719 }, { "epoch": 1.291648798949038, "grad_norm": 0.5143235778991876, "learning_rate": 5.888481421508112e-05, "loss": 11.9354, "step": 23720 }, { "epoch": 1.291703252945621, "grad_norm": 0.5063198474722517, "learning_rate": 5.887677594299924e-05, "loss": 11.9112, "step": 23721 }, { "epoch": 1.291757706942204, "grad_norm": 0.5603941868171852, "learning_rate": 5.8868737990698465e-05, "loss": 11.8923, "step": 23722 }, { "epoch": 1.291812160938787, "grad_norm": 0.5081302596193433, "learning_rate": 5.8860700358241286e-05, "loss": 11.8388, "step": 23723 }, { "epoch": 1.29186661493537, "grad_norm": 0.5332331312804888, "learning_rate": 5.8852663045690236e-05, "loss": 11.9585, "step": 23724 }, { "epoch": 1.291921068931953, "grad_norm": 0.6407102059965988, "learning_rate": 5.884462605310772e-05, "loss": 12.1426, "step": 23725 }, { "epoch": 1.291975522928536, "grad_norm": 0.5282749642282629, "learning_rate": 5.8836589380556306e-05, "loss": 11.9303, "step": 23726 }, { "epoch": 1.292029976925119, "grad_norm": 0.5010477551716277, "learning_rate": 5.882855302809849e-05, "loss": 11.8868, "step": 23727 }, { "epoch": 1.2920844309217019, "grad_norm": 0.5263521332847538, "learning_rate": 5.8820516995796735e-05, "loss": 11.686, "step": 23728 }, { "epoch": 1.2921388849182849, "grad_norm": 0.5315598524409277, "learning_rate": 5.881248128371355e-05, "loss": 11.8769, "step": 23729 }, { "epoch": 1.2921933389148679, "grad_norm": 0.5113617335711895, "learning_rate": 5.880444589191143e-05, "loss": 11.9031, "step": 23730 }, { "epoch": 1.2922477929114509, "grad_norm": 0.5513426784972582, "learning_rate": 5.879641082045284e-05, "loss": 11.9405, "step": 23731 }, { "epoch": 1.292302246908034, "grad_norm": 0.5547387914133479, "learning_rate": 5.8788376069400244e-05, "loss": 11.8762, "step": 23732 }, { "epoch": 1.292356700904617, "grad_norm": 0.5883632763509766, "learning_rate": 5.878034163881616e-05, "loss": 11.9423, "step": 23733 }, { "epoch": 1.2924111549012, "grad_norm": 0.5147257702675143, "learning_rate": 5.87723075287631e-05, "loss": 11.9103, "step": 23734 }, { "epoch": 1.292465608897783, "grad_norm": 0.5376391013574864, "learning_rate": 5.8764273739303464e-05, "loss": 11.7991, "step": 23735 }, { "epoch": 1.292520062894366, "grad_norm": 0.547459391107265, "learning_rate": 5.8756240270499715e-05, "loss": 11.882, "step": 23736 }, { "epoch": 1.292574516890949, "grad_norm": 0.5494695018731429, "learning_rate": 5.874820712241438e-05, "loss": 11.9814, "step": 23737 }, { "epoch": 1.292628970887532, "grad_norm": 0.5471057445038715, "learning_rate": 5.874017429510992e-05, "loss": 11.9549, "step": 23738 }, { "epoch": 1.292683424884115, "grad_norm": 0.6807750576173539, "learning_rate": 5.873214178864877e-05, "loss": 11.8596, "step": 23739 }, { "epoch": 1.292737878880698, "grad_norm": 0.5740114418723041, "learning_rate": 5.87241096030934e-05, "loss": 11.9223, "step": 23740 }, { "epoch": 1.2927923328772812, "grad_norm": 0.5288712231651108, "learning_rate": 5.8716077738506284e-05, "loss": 11.8839, "step": 23741 }, { "epoch": 1.2928467868738642, "grad_norm": 0.5266905118122426, "learning_rate": 5.870804619494987e-05, "loss": 11.9494, "step": 23742 }, { "epoch": 1.2929012408704472, "grad_norm": 0.5449159386048019, "learning_rate": 5.8700014972486606e-05, "loss": 11.8494, "step": 23743 }, { "epoch": 1.2929556948670302, "grad_norm": 0.5882369463915113, "learning_rate": 5.869198407117897e-05, "loss": 11.9339, "step": 23744 }, { "epoch": 1.2930101488636132, "grad_norm": 0.5461902077565499, "learning_rate": 5.8683953491089395e-05, "loss": 11.988, "step": 23745 }, { "epoch": 1.2930646028601962, "grad_norm": 0.5408887115112508, "learning_rate": 5.867592323228031e-05, "loss": 11.9151, "step": 23746 }, { "epoch": 1.2931190568567792, "grad_norm": 0.5891116599950841, "learning_rate": 5.866789329481418e-05, "loss": 11.9295, "step": 23747 }, { "epoch": 1.2931735108533622, "grad_norm": 0.5673280691471253, "learning_rate": 5.865986367875342e-05, "loss": 11.9891, "step": 23748 }, { "epoch": 1.2932279648499452, "grad_norm": 0.5540971147823334, "learning_rate": 5.865183438416051e-05, "loss": 11.9701, "step": 23749 }, { "epoch": 1.2932824188465282, "grad_norm": 0.5066621558352484, "learning_rate": 5.864380541109788e-05, "loss": 11.9131, "step": 23750 }, { "epoch": 1.2933368728431112, "grad_norm": 0.5779754619780152, "learning_rate": 5.863577675962795e-05, "loss": 11.9878, "step": 23751 }, { "epoch": 1.2933913268396942, "grad_norm": 0.5818797433262762, "learning_rate": 5.862774842981315e-05, "loss": 11.9, "step": 23752 }, { "epoch": 1.2934457808362771, "grad_norm": 0.5821332362538245, "learning_rate": 5.861972042171593e-05, "loss": 11.967, "step": 23753 }, { "epoch": 1.2935002348328601, "grad_norm": 0.5601375137464693, "learning_rate": 5.861169273539871e-05, "loss": 11.81, "step": 23754 }, { "epoch": 1.2935546888294434, "grad_norm": 0.5738173837500221, "learning_rate": 5.86036653709239e-05, "loss": 11.8963, "step": 23755 }, { "epoch": 1.2936091428260263, "grad_norm": 0.6331948374525299, "learning_rate": 5.859563832835393e-05, "loss": 11.9184, "step": 23756 }, { "epoch": 1.2936635968226093, "grad_norm": 0.5269610859734193, "learning_rate": 5.858761160775121e-05, "loss": 11.9363, "step": 23757 }, { "epoch": 1.2937180508191923, "grad_norm": 0.5604407469730615, "learning_rate": 5.8579585209178164e-05, "loss": 12.0051, "step": 23758 }, { "epoch": 1.2937725048157753, "grad_norm": 0.529801134839837, "learning_rate": 5.8571559132697206e-05, "loss": 11.9784, "step": 23759 }, { "epoch": 1.2938269588123583, "grad_norm": 0.5561562143215054, "learning_rate": 5.856353337837076e-05, "loss": 11.9694, "step": 23760 }, { "epoch": 1.2938814128089413, "grad_norm": 0.4916798146514856, "learning_rate": 5.8555507946261235e-05, "loss": 11.8015, "step": 23761 }, { "epoch": 1.2939358668055243, "grad_norm": 0.5454294032558833, "learning_rate": 5.8547482836431014e-05, "loss": 11.9613, "step": 23762 }, { "epoch": 1.2939903208021073, "grad_norm": 0.566705039358307, "learning_rate": 5.853945804894254e-05, "loss": 11.8301, "step": 23763 }, { "epoch": 1.2940447747986905, "grad_norm": 0.5164305283152058, "learning_rate": 5.8531433583858195e-05, "loss": 11.8817, "step": 23764 }, { "epoch": 1.2940992287952735, "grad_norm": 0.5587180282690472, "learning_rate": 5.8523409441240375e-05, "loss": 11.9838, "step": 23765 }, { "epoch": 1.2941536827918565, "grad_norm": 0.5042120080470417, "learning_rate": 5.8515385621151464e-05, "loss": 12.0209, "step": 23766 }, { "epoch": 1.2942081367884395, "grad_norm": 0.5293438762147187, "learning_rate": 5.8507362123653865e-05, "loss": 11.926, "step": 23767 }, { "epoch": 1.2942625907850225, "grad_norm": 0.5307943430357466, "learning_rate": 5.849933894880999e-05, "loss": 11.8703, "step": 23768 }, { "epoch": 1.2943170447816055, "grad_norm": 0.5962872100297739, "learning_rate": 5.8491316096682215e-05, "loss": 11.9902, "step": 23769 }, { "epoch": 1.2943714987781885, "grad_norm": 0.4928160541912051, "learning_rate": 5.848329356733291e-05, "loss": 11.823, "step": 23770 }, { "epoch": 1.2944259527747715, "grad_norm": 0.47542862553071874, "learning_rate": 5.847527136082449e-05, "loss": 11.8341, "step": 23771 }, { "epoch": 1.2944804067713545, "grad_norm": 0.5182900495334072, "learning_rate": 5.8467249477219313e-05, "loss": 11.8392, "step": 23772 }, { "epoch": 1.2945348607679374, "grad_norm": 0.5317771082088423, "learning_rate": 5.8459227916579785e-05, "loss": 11.8587, "step": 23773 }, { "epoch": 1.2945893147645204, "grad_norm": 0.5421895251981557, "learning_rate": 5.8451206678968285e-05, "loss": 11.8498, "step": 23774 }, { "epoch": 1.2946437687611034, "grad_norm": 0.48099212544232717, "learning_rate": 5.844318576444714e-05, "loss": 11.7839, "step": 23775 }, { "epoch": 1.2946982227576864, "grad_norm": 0.6457133927383553, "learning_rate": 5.8435165173078765e-05, "loss": 11.9891, "step": 23776 }, { "epoch": 1.2947526767542694, "grad_norm": 0.581678108408296, "learning_rate": 5.842714490492551e-05, "loss": 12.0298, "step": 23777 }, { "epoch": 1.2948071307508524, "grad_norm": 0.5619604634688051, "learning_rate": 5.8419124960049745e-05, "loss": 12.0587, "step": 23778 }, { "epoch": 1.2948615847474356, "grad_norm": 0.5902723998282311, "learning_rate": 5.841110533851384e-05, "loss": 11.9813, "step": 23779 }, { "epoch": 1.2949160387440186, "grad_norm": 0.5290632394977075, "learning_rate": 5.840308604038015e-05, "loss": 11.9145, "step": 23780 }, { "epoch": 1.2949704927406016, "grad_norm": 0.5630728018540572, "learning_rate": 5.839506706571103e-05, "loss": 11.872, "step": 23781 }, { "epoch": 1.2950249467371846, "grad_norm": 0.49309045151418357, "learning_rate": 5.838704841456886e-05, "loss": 11.8864, "step": 23782 }, { "epoch": 1.2950794007337676, "grad_norm": 0.5205485808217177, "learning_rate": 5.837903008701599e-05, "loss": 11.8321, "step": 23783 }, { "epoch": 1.2951338547303506, "grad_norm": 0.5734610177021967, "learning_rate": 5.8371012083114776e-05, "loss": 11.9272, "step": 23784 }, { "epoch": 1.2951883087269336, "grad_norm": 0.61766529693796, "learning_rate": 5.836299440292751e-05, "loss": 12.018, "step": 23785 }, { "epoch": 1.2952427627235166, "grad_norm": 0.5309879938683016, "learning_rate": 5.835497704651662e-05, "loss": 11.8067, "step": 23786 }, { "epoch": 1.2952972167200998, "grad_norm": 0.5115278956316054, "learning_rate": 5.834696001394437e-05, "loss": 11.9181, "step": 23787 }, { "epoch": 1.2953516707166828, "grad_norm": 0.5730103687587895, "learning_rate": 5.8338943305273184e-05, "loss": 12.0184, "step": 23788 }, { "epoch": 1.2954061247132658, "grad_norm": 0.5128052120920888, "learning_rate": 5.833092692056532e-05, "loss": 11.7312, "step": 23789 }, { "epoch": 1.2954605787098488, "grad_norm": 0.5364698494166938, "learning_rate": 5.832291085988316e-05, "loss": 11.9624, "step": 23790 }, { "epoch": 1.2955150327064318, "grad_norm": 0.5467671699510431, "learning_rate": 5.8314895123289074e-05, "loss": 12.015, "step": 23791 }, { "epoch": 1.2955694867030147, "grad_norm": 0.5578199902118518, "learning_rate": 5.830687971084532e-05, "loss": 11.8048, "step": 23792 }, { "epoch": 1.2956239406995977, "grad_norm": 0.5275092571653156, "learning_rate": 5.829886462261429e-05, "loss": 11.8615, "step": 23793 }, { "epoch": 1.2956783946961807, "grad_norm": 0.545961655763849, "learning_rate": 5.829084985865829e-05, "loss": 11.9292, "step": 23794 }, { "epoch": 1.2957328486927637, "grad_norm": 0.5126942762884821, "learning_rate": 5.828283541903959e-05, "loss": 11.8929, "step": 23795 }, { "epoch": 1.2957873026893467, "grad_norm": 0.6318212840121846, "learning_rate": 5.8274821303820605e-05, "loss": 12.0266, "step": 23796 }, { "epoch": 1.2958417566859297, "grad_norm": 0.5517809070225351, "learning_rate": 5.826680751306357e-05, "loss": 11.8974, "step": 23797 }, { "epoch": 1.2958962106825127, "grad_norm": 0.5450332140338545, "learning_rate": 5.825879404683087e-05, "loss": 12.0884, "step": 23798 }, { "epoch": 1.2959506646790957, "grad_norm": 0.5179500438583599, "learning_rate": 5.825078090518474e-05, "loss": 11.829, "step": 23799 }, { "epoch": 1.2960051186756787, "grad_norm": 0.4967420919153505, "learning_rate": 5.824276808818756e-05, "loss": 11.9634, "step": 23800 }, { "epoch": 1.2960595726722617, "grad_norm": 0.5318850930840494, "learning_rate": 5.8234755595901635e-05, "loss": 11.9274, "step": 23801 }, { "epoch": 1.296114026668845, "grad_norm": 0.5272746189101266, "learning_rate": 5.822674342838923e-05, "loss": 11.8998, "step": 23802 }, { "epoch": 1.296168480665428, "grad_norm": 0.504146223444168, "learning_rate": 5.8218731585712716e-05, "loss": 11.9453, "step": 23803 }, { "epoch": 1.2962229346620109, "grad_norm": 0.5549635152298085, "learning_rate": 5.821072006793434e-05, "loss": 12.0533, "step": 23804 }, { "epoch": 1.2962773886585939, "grad_norm": 0.5818619237050583, "learning_rate": 5.8202708875116385e-05, "loss": 11.8858, "step": 23805 }, { "epoch": 1.2963318426551769, "grad_norm": 0.6054106256274053, "learning_rate": 5.8194698007321205e-05, "loss": 11.9465, "step": 23806 }, { "epoch": 1.2963862966517599, "grad_norm": 0.5968692808356221, "learning_rate": 5.818668746461102e-05, "loss": 12.0636, "step": 23807 }, { "epoch": 1.2964407506483429, "grad_norm": 0.5005143996104131, "learning_rate": 5.817867724704822e-05, "loss": 11.8417, "step": 23808 }, { "epoch": 1.2964952046449258, "grad_norm": 0.5110394529088859, "learning_rate": 5.8170667354694984e-05, "loss": 11.887, "step": 23809 }, { "epoch": 1.2965496586415088, "grad_norm": 0.5864714130318646, "learning_rate": 5.8162657787613715e-05, "loss": 11.8843, "step": 23810 }, { "epoch": 1.296604112638092, "grad_norm": 0.5275926335063034, "learning_rate": 5.815464854586657e-05, "loss": 11.9233, "step": 23811 }, { "epoch": 1.296658566634675, "grad_norm": 0.5586000244429157, "learning_rate": 5.814663962951592e-05, "loss": 11.8799, "step": 23812 }, { "epoch": 1.296713020631258, "grad_norm": 0.530092297639588, "learning_rate": 5.8138631038624036e-05, "loss": 11.8257, "step": 23813 }, { "epoch": 1.296767474627841, "grad_norm": 0.5328622060712425, "learning_rate": 5.813062277325319e-05, "loss": 11.7601, "step": 23814 }, { "epoch": 1.296821928624424, "grad_norm": 0.5578334577050502, "learning_rate": 5.812261483346564e-05, "loss": 11.9954, "step": 23815 }, { "epoch": 1.296876382621007, "grad_norm": 0.5591524005561552, "learning_rate": 5.8114607219323623e-05, "loss": 11.8942, "step": 23816 }, { "epoch": 1.29693083661759, "grad_norm": 0.5014435898895022, "learning_rate": 5.810659993088944e-05, "loss": 11.9737, "step": 23817 }, { "epoch": 1.296985290614173, "grad_norm": 0.5155678054343346, "learning_rate": 5.8098592968225396e-05, "loss": 11.8323, "step": 23818 }, { "epoch": 1.297039744610756, "grad_norm": 0.5822572248388822, "learning_rate": 5.809058633139369e-05, "loss": 12.0131, "step": 23819 }, { "epoch": 1.297094198607339, "grad_norm": 0.5669873017065186, "learning_rate": 5.8082580020456656e-05, "loss": 11.9765, "step": 23820 }, { "epoch": 1.297148652603922, "grad_norm": 0.565166966704871, "learning_rate": 5.807457403547646e-05, "loss": 11.9164, "step": 23821 }, { "epoch": 1.297203106600505, "grad_norm": 0.6252835077462645, "learning_rate": 5.806656837651545e-05, "loss": 11.9195, "step": 23822 }, { "epoch": 1.297257560597088, "grad_norm": 0.5209635888907312, "learning_rate": 5.8058563043635796e-05, "loss": 11.868, "step": 23823 }, { "epoch": 1.297312014593671, "grad_norm": 0.5397714261569933, "learning_rate": 5.8050558036899826e-05, "loss": 11.922, "step": 23824 }, { "epoch": 1.2973664685902542, "grad_norm": 0.5615187512027605, "learning_rate": 5.804255335636976e-05, "loss": 12.0561, "step": 23825 }, { "epoch": 1.2974209225868372, "grad_norm": 0.5356667127732464, "learning_rate": 5.8034549002107786e-05, "loss": 11.9817, "step": 23826 }, { "epoch": 1.2974753765834202, "grad_norm": 0.5363407231181678, "learning_rate": 5.802654497417621e-05, "loss": 11.8955, "step": 23827 }, { "epoch": 1.2975298305800032, "grad_norm": 0.5562681698490525, "learning_rate": 5.8018541272637286e-05, "loss": 11.9367, "step": 23828 }, { "epoch": 1.2975842845765861, "grad_norm": 0.5507851988821135, "learning_rate": 5.8010537897553195e-05, "loss": 11.8209, "step": 23829 }, { "epoch": 1.2976387385731691, "grad_norm": 0.5723448092638935, "learning_rate": 5.800253484898623e-05, "loss": 11.934, "step": 23830 }, { "epoch": 1.2976931925697521, "grad_norm": 0.5014592242239699, "learning_rate": 5.7994532126998566e-05, "loss": 12.0612, "step": 23831 }, { "epoch": 1.2977476465663351, "grad_norm": 0.4921722432223852, "learning_rate": 5.798652973165251e-05, "loss": 11.8723, "step": 23832 }, { "epoch": 1.2978021005629181, "grad_norm": 0.5835628798465374, "learning_rate": 5.7978527663010196e-05, "loss": 12.0561, "step": 23833 }, { "epoch": 1.2978565545595013, "grad_norm": 0.5568174068152698, "learning_rate": 5.797052592113394e-05, "loss": 11.8702, "step": 23834 }, { "epoch": 1.2979110085560843, "grad_norm": 0.5443639620474775, "learning_rate": 5.796252450608592e-05, "loss": 11.9998, "step": 23835 }, { "epoch": 1.2979654625526673, "grad_norm": 0.5243651156283624, "learning_rate": 5.795452341792833e-05, "loss": 11.8549, "step": 23836 }, { "epoch": 1.2980199165492503, "grad_norm": 0.5552627226889993, "learning_rate": 5.7946522656723447e-05, "loss": 11.9421, "step": 23837 }, { "epoch": 1.2980743705458333, "grad_norm": 0.5577937602476003, "learning_rate": 5.793852222253342e-05, "loss": 11.9975, "step": 23838 }, { "epoch": 1.2981288245424163, "grad_norm": 0.5406367919214983, "learning_rate": 5.7930522115420495e-05, "loss": 11.9579, "step": 23839 }, { "epoch": 1.2981832785389993, "grad_norm": 0.5159964460721704, "learning_rate": 5.792252233544693e-05, "loss": 11.9685, "step": 23840 }, { "epoch": 1.2982377325355823, "grad_norm": 0.5752622332443564, "learning_rate": 5.791452288267483e-05, "loss": 12.0587, "step": 23841 }, { "epoch": 1.2982921865321653, "grad_norm": 0.5143233796983403, "learning_rate": 5.790652375716652e-05, "loss": 11.828, "step": 23842 }, { "epoch": 1.2983466405287483, "grad_norm": 0.5817519099039418, "learning_rate": 5.789852495898409e-05, "loss": 11.948, "step": 23843 }, { "epoch": 1.2984010945253313, "grad_norm": 0.5981241894383442, "learning_rate": 5.789052648818982e-05, "loss": 12.0364, "step": 23844 }, { "epoch": 1.2984555485219142, "grad_norm": 0.55008481899298, "learning_rate": 5.78825283448459e-05, "loss": 11.9335, "step": 23845 }, { "epoch": 1.2985100025184972, "grad_norm": 0.5705241028715168, "learning_rate": 5.787453052901446e-05, "loss": 11.9181, "step": 23846 }, { "epoch": 1.2985644565150802, "grad_norm": 0.5941401412513208, "learning_rate": 5.786653304075775e-05, "loss": 12.0423, "step": 23847 }, { "epoch": 1.2986189105116632, "grad_norm": 0.5608612963629043, "learning_rate": 5.785853588013793e-05, "loss": 11.9988, "step": 23848 }, { "epoch": 1.2986733645082464, "grad_norm": 0.5342991395931518, "learning_rate": 5.78505390472172e-05, "loss": 11.8275, "step": 23849 }, { "epoch": 1.2987278185048294, "grad_norm": 0.592120336158684, "learning_rate": 5.784254254205779e-05, "loss": 11.9297, "step": 23850 }, { "epoch": 1.2987822725014124, "grad_norm": 0.5739279250310847, "learning_rate": 5.7834546364721785e-05, "loss": 12.0212, "step": 23851 }, { "epoch": 1.2988367264979954, "grad_norm": 0.5394185905729064, "learning_rate": 5.782655051527146e-05, "loss": 11.9065, "step": 23852 }, { "epoch": 1.2988911804945784, "grad_norm": 0.5220110030213705, "learning_rate": 5.781855499376891e-05, "loss": 11.8859, "step": 23853 }, { "epoch": 1.2989456344911614, "grad_norm": 0.7403391556608028, "learning_rate": 5.7810559800276396e-05, "loss": 11.9985, "step": 23854 }, { "epoch": 1.2990000884877444, "grad_norm": 0.5237367908716927, "learning_rate": 5.780256493485605e-05, "loss": 11.9219, "step": 23855 }, { "epoch": 1.2990545424843274, "grad_norm": 0.5028134170372256, "learning_rate": 5.779457039756999e-05, "loss": 11.9866, "step": 23856 }, { "epoch": 1.2991089964809106, "grad_norm": 0.5407719520119866, "learning_rate": 5.7786576188480465e-05, "loss": 11.8646, "step": 23857 }, { "epoch": 1.2991634504774936, "grad_norm": 0.5432444500769634, "learning_rate": 5.777858230764956e-05, "loss": 11.799, "step": 23858 }, { "epoch": 1.2992179044740766, "grad_norm": 0.556584499302641, "learning_rate": 5.777058875513952e-05, "loss": 11.9791, "step": 23859 }, { "epoch": 1.2992723584706596, "grad_norm": 0.5299547991900019, "learning_rate": 5.7762595531012444e-05, "loss": 11.9898, "step": 23860 }, { "epoch": 1.2993268124672426, "grad_norm": 0.5567155647967426, "learning_rate": 5.775460263533048e-05, "loss": 11.829, "step": 23861 }, { "epoch": 1.2993812664638256, "grad_norm": 0.5485088855415148, "learning_rate": 5.774661006815587e-05, "loss": 12.0006, "step": 23862 }, { "epoch": 1.2994357204604086, "grad_norm": 0.6038910920864603, "learning_rate": 5.773861782955066e-05, "loss": 11.9628, "step": 23863 }, { "epoch": 1.2994901744569916, "grad_norm": 0.5907919696422618, "learning_rate": 5.773062591957708e-05, "loss": 12.1342, "step": 23864 }, { "epoch": 1.2995446284535745, "grad_norm": 0.56804388009057, "learning_rate": 5.7722634338297256e-05, "loss": 11.9145, "step": 23865 }, { "epoch": 1.2995990824501575, "grad_norm": 0.5934006202192055, "learning_rate": 5.7714643085773256e-05, "loss": 11.9192, "step": 23866 }, { "epoch": 1.2996535364467405, "grad_norm": 0.521596306869903, "learning_rate": 5.770665216206734e-05, "loss": 11.9393, "step": 23867 }, { "epoch": 1.2997079904433235, "grad_norm": 0.5476409909677536, "learning_rate": 5.769866156724154e-05, "loss": 11.8477, "step": 23868 }, { "epoch": 1.2997624444399065, "grad_norm": 0.5489940155779739, "learning_rate": 5.769067130135808e-05, "loss": 11.8936, "step": 23869 }, { "epoch": 1.2998168984364895, "grad_norm": 0.5687623535481318, "learning_rate": 5.7682681364479027e-05, "loss": 11.9452, "step": 23870 }, { "epoch": 1.2998713524330725, "grad_norm": 0.49442109312059335, "learning_rate": 5.7674691756666574e-05, "loss": 11.8604, "step": 23871 }, { "epoch": 1.2999258064296557, "grad_norm": 0.5268642892099454, "learning_rate": 5.766670247798277e-05, "loss": 11.9767, "step": 23872 }, { "epoch": 1.2999802604262387, "grad_norm": 0.5900355269341435, "learning_rate": 5.7658713528489795e-05, "loss": 11.9823, "step": 23873 }, { "epoch": 1.3000347144228217, "grad_norm": 0.5524735410150464, "learning_rate": 5.76507249082498e-05, "loss": 12.0053, "step": 23874 }, { "epoch": 1.3000891684194047, "grad_norm": 0.5957451169779749, "learning_rate": 5.7642736617324864e-05, "loss": 11.9088, "step": 23875 }, { "epoch": 1.3001436224159877, "grad_norm": 0.5603659799437561, "learning_rate": 5.763474865577708e-05, "loss": 11.9087, "step": 23876 }, { "epoch": 1.3001980764125707, "grad_norm": 0.5687007741173038, "learning_rate": 5.7626761023668644e-05, "loss": 11.9719, "step": 23877 }, { "epoch": 1.3002525304091537, "grad_norm": 0.523834443595407, "learning_rate": 5.761877372106157e-05, "loss": 11.9838, "step": 23878 }, { "epoch": 1.3003069844057367, "grad_norm": 0.5162018241912517, "learning_rate": 5.761078674801805e-05, "loss": 11.8398, "step": 23879 }, { "epoch": 1.3003614384023197, "grad_norm": 0.5216713171190042, "learning_rate": 5.760280010460013e-05, "loss": 12.0186, "step": 23880 }, { "epoch": 1.3004158923989029, "grad_norm": 0.5385634257076888, "learning_rate": 5.759481379086998e-05, "loss": 11.9314, "step": 23881 }, { "epoch": 1.3004703463954859, "grad_norm": 0.5015222462913387, "learning_rate": 5.758682780688964e-05, "loss": 11.8739, "step": 23882 }, { "epoch": 1.3005248003920689, "grad_norm": 0.5829663844393825, "learning_rate": 5.757884215272123e-05, "loss": 11.9699, "step": 23883 }, { "epoch": 1.3005792543886519, "grad_norm": 0.5987181288521144, "learning_rate": 5.7570856828426914e-05, "loss": 11.7919, "step": 23884 }, { "epoch": 1.3006337083852348, "grad_norm": 0.5477114358282926, "learning_rate": 5.756287183406872e-05, "loss": 11.9544, "step": 23885 }, { "epoch": 1.3006881623818178, "grad_norm": 0.5637247972403945, "learning_rate": 5.755488716970874e-05, "loss": 11.9266, "step": 23886 }, { "epoch": 1.3007426163784008, "grad_norm": 0.5424718020020158, "learning_rate": 5.754690283540904e-05, "loss": 11.864, "step": 23887 }, { "epoch": 1.3007970703749838, "grad_norm": 0.5247746464249025, "learning_rate": 5.753891883123176e-05, "loss": 11.9365, "step": 23888 }, { "epoch": 1.3008515243715668, "grad_norm": 0.5233613628141781, "learning_rate": 5.7530935157239e-05, "loss": 12.0639, "step": 23889 }, { "epoch": 1.3009059783681498, "grad_norm": 0.5323720067503869, "learning_rate": 5.7522951813492764e-05, "loss": 11.9468, "step": 23890 }, { "epoch": 1.3009604323647328, "grad_norm": 0.481733645939565, "learning_rate": 5.7514968800055226e-05, "loss": 11.8236, "step": 23891 }, { "epoch": 1.3010148863613158, "grad_norm": 0.5137549636562518, "learning_rate": 5.750698611698837e-05, "loss": 11.92, "step": 23892 }, { "epoch": 1.3010693403578988, "grad_norm": 0.5909328738816363, "learning_rate": 5.7499003764354354e-05, "loss": 11.988, "step": 23893 }, { "epoch": 1.3011237943544818, "grad_norm": 0.576355762077326, "learning_rate": 5.749102174221517e-05, "loss": 11.999, "step": 23894 }, { "epoch": 1.301178248351065, "grad_norm": 0.5370711057197125, "learning_rate": 5.7483040050632974e-05, "loss": 11.8079, "step": 23895 }, { "epoch": 1.301232702347648, "grad_norm": 0.5246186221614892, "learning_rate": 5.747505868966979e-05, "loss": 11.9394, "step": 23896 }, { "epoch": 1.301287156344231, "grad_norm": 0.5614223842972061, "learning_rate": 5.746707765938764e-05, "loss": 11.8673, "step": 23897 }, { "epoch": 1.301341610340814, "grad_norm": 0.5842289670745541, "learning_rate": 5.745909695984864e-05, "loss": 11.9609, "step": 23898 }, { "epoch": 1.301396064337397, "grad_norm": 0.5484661025083538, "learning_rate": 5.745111659111485e-05, "loss": 11.8219, "step": 23899 }, { "epoch": 1.30145051833398, "grad_norm": 0.485387211382524, "learning_rate": 5.744313655324829e-05, "loss": 11.9354, "step": 23900 }, { "epoch": 1.301504972330563, "grad_norm": 0.5524606598815619, "learning_rate": 5.743515684631107e-05, "loss": 11.8696, "step": 23901 }, { "epoch": 1.301559426327146, "grad_norm": 0.563018059080123, "learning_rate": 5.742717747036517e-05, "loss": 12.0242, "step": 23902 }, { "epoch": 1.301613880323729, "grad_norm": 0.5186934091779591, "learning_rate": 5.741919842547272e-05, "loss": 11.9867, "step": 23903 }, { "epoch": 1.3016683343203121, "grad_norm": 0.5546046245961251, "learning_rate": 5.741121971169567e-05, "loss": 11.9718, "step": 23904 }, { "epoch": 1.3017227883168951, "grad_norm": 0.5722461246795527, "learning_rate": 5.740324132909618e-05, "loss": 11.9853, "step": 23905 }, { "epoch": 1.3017772423134781, "grad_norm": 0.56459150025772, "learning_rate": 5.739526327773622e-05, "loss": 12.0055, "step": 23906 }, { "epoch": 1.3018316963100611, "grad_norm": 0.5408282463553108, "learning_rate": 5.73872855576778e-05, "loss": 11.9205, "step": 23907 }, { "epoch": 1.3018861503066441, "grad_norm": 0.5474122335268758, "learning_rate": 5.737930816898303e-05, "loss": 12.0301, "step": 23908 }, { "epoch": 1.3019406043032271, "grad_norm": 0.5187639001840174, "learning_rate": 5.737133111171386e-05, "loss": 11.9641, "step": 23909 }, { "epoch": 1.30199505829981, "grad_norm": 0.6272055705797348, "learning_rate": 5.736335438593239e-05, "loss": 12.128, "step": 23910 }, { "epoch": 1.302049512296393, "grad_norm": 0.5272254647433033, "learning_rate": 5.7355377991700645e-05, "loss": 11.9563, "step": 23911 }, { "epoch": 1.302103966292976, "grad_norm": 0.5549107036377859, "learning_rate": 5.734740192908059e-05, "loss": 11.8946, "step": 23912 }, { "epoch": 1.302158420289559, "grad_norm": 0.5343595563170288, "learning_rate": 5.733942619813435e-05, "loss": 11.9768, "step": 23913 }, { "epoch": 1.302212874286142, "grad_norm": 0.49984534310555945, "learning_rate": 5.7331450798923835e-05, "loss": 11.8715, "step": 23914 }, { "epoch": 1.302267328282725, "grad_norm": 0.5395153645029583, "learning_rate": 5.732347573151116e-05, "loss": 11.958, "step": 23915 }, { "epoch": 1.302321782279308, "grad_norm": 0.48031310327344234, "learning_rate": 5.7315500995958284e-05, "loss": 11.91, "step": 23916 }, { "epoch": 1.302376236275891, "grad_norm": 0.4942641907324273, "learning_rate": 5.73075265923272e-05, "loss": 11.8832, "step": 23917 }, { "epoch": 1.3024306902724743, "grad_norm": 0.5386913802218604, "learning_rate": 5.729955252067998e-05, "loss": 11.9891, "step": 23918 }, { "epoch": 1.3024851442690573, "grad_norm": 0.5376796334453726, "learning_rate": 5.7291578781078556e-05, "loss": 11.9751, "step": 23919 }, { "epoch": 1.3025395982656403, "grad_norm": 0.5565333856111213, "learning_rate": 5.7283605373585035e-05, "loss": 11.8746, "step": 23920 }, { "epoch": 1.3025940522622232, "grad_norm": 0.5851019026703895, "learning_rate": 5.727563229826132e-05, "loss": 11.941, "step": 23921 }, { "epoch": 1.3026485062588062, "grad_norm": 0.4855587166916286, "learning_rate": 5.726765955516944e-05, "loss": 11.9242, "step": 23922 }, { "epoch": 1.3027029602553892, "grad_norm": 0.5296173452222003, "learning_rate": 5.7259687144371454e-05, "loss": 11.8905, "step": 23923 }, { "epoch": 1.3027574142519722, "grad_norm": 0.581536295566947, "learning_rate": 5.725171506592927e-05, "loss": 11.7452, "step": 23924 }, { "epoch": 1.3028118682485552, "grad_norm": 0.5704231691825496, "learning_rate": 5.724374331990495e-05, "loss": 11.8212, "step": 23925 }, { "epoch": 1.3028663222451382, "grad_norm": 0.6077926864510211, "learning_rate": 5.723577190636047e-05, "loss": 11.9739, "step": 23926 }, { "epoch": 1.3029207762417214, "grad_norm": 0.539981475425696, "learning_rate": 5.722780082535775e-05, "loss": 11.9746, "step": 23927 }, { "epoch": 1.3029752302383044, "grad_norm": 0.5626468766958902, "learning_rate": 5.721983007695886e-05, "loss": 11.9676, "step": 23928 }, { "epoch": 1.3030296842348874, "grad_norm": 0.5107973326796912, "learning_rate": 5.72118596612257e-05, "loss": 11.9145, "step": 23929 }, { "epoch": 1.3030841382314704, "grad_norm": 0.5337323240886189, "learning_rate": 5.720388957822034e-05, "loss": 11.822, "step": 23930 }, { "epoch": 1.3031385922280534, "grad_norm": 0.6045486064607628, "learning_rate": 5.7195919828004675e-05, "loss": 11.9822, "step": 23931 }, { "epoch": 1.3031930462246364, "grad_norm": 0.5117673129782551, "learning_rate": 5.718795041064071e-05, "loss": 11.9986, "step": 23932 }, { "epoch": 1.3032475002212194, "grad_norm": 0.5175239048840924, "learning_rate": 5.717998132619047e-05, "loss": 11.9173, "step": 23933 }, { "epoch": 1.3033019542178024, "grad_norm": 0.5182460916005857, "learning_rate": 5.7172012574715826e-05, "loss": 11.8938, "step": 23934 }, { "epoch": 1.3033564082143854, "grad_norm": 0.5171902548628802, "learning_rate": 5.716404415627887e-05, "loss": 11.8766, "step": 23935 }, { "epoch": 1.3034108622109684, "grad_norm": 0.6326446928776895, "learning_rate": 5.715607607094142e-05, "loss": 11.9336, "step": 23936 }, { "epoch": 1.3034653162075513, "grad_norm": 0.5356317795730359, "learning_rate": 5.714810831876551e-05, "loss": 11.9468, "step": 23937 }, { "epoch": 1.3035197702041343, "grad_norm": 0.5355944319998637, "learning_rate": 5.7140140899813144e-05, "loss": 11.8743, "step": 23938 }, { "epoch": 1.3035742242007173, "grad_norm": 0.576354179545349, "learning_rate": 5.7132173814146176e-05, "loss": 11.9397, "step": 23939 }, { "epoch": 1.3036286781973003, "grad_norm": 0.7450549003939333, "learning_rate": 5.712420706182666e-05, "loss": 12.009, "step": 23940 }, { "epoch": 1.3036831321938833, "grad_norm": 0.8013479299433881, "learning_rate": 5.7116240642916455e-05, "loss": 12.0488, "step": 23941 }, { "epoch": 1.3037375861904665, "grad_norm": 0.5770703440886297, "learning_rate": 5.7108274557477606e-05, "loss": 11.8351, "step": 23942 }, { "epoch": 1.3037920401870495, "grad_norm": 0.5448431251972765, "learning_rate": 5.7100308805571946e-05, "loss": 11.942, "step": 23943 }, { "epoch": 1.3038464941836325, "grad_norm": 0.56651186123102, "learning_rate": 5.709234338726149e-05, "loss": 11.9969, "step": 23944 }, { "epoch": 1.3039009481802155, "grad_norm": 0.5477211928099325, "learning_rate": 5.7084378302608245e-05, "loss": 11.869, "step": 23945 }, { "epoch": 1.3039554021767985, "grad_norm": 0.5792967807923594, "learning_rate": 5.707641355167399e-05, "loss": 11.993, "step": 23946 }, { "epoch": 1.3040098561733815, "grad_norm": 0.6211499004231313, "learning_rate": 5.706844913452075e-05, "loss": 11.8483, "step": 23947 }, { "epoch": 1.3040643101699645, "grad_norm": 0.5835205538992685, "learning_rate": 5.7060485051210486e-05, "loss": 11.8844, "step": 23948 }, { "epoch": 1.3041187641665475, "grad_norm": 0.5406301439325446, "learning_rate": 5.705252130180505e-05, "loss": 11.8945, "step": 23949 }, { "epoch": 1.3041732181631307, "grad_norm": 0.510877086515905, "learning_rate": 5.7044557886366455e-05, "loss": 11.9438, "step": 23950 }, { "epoch": 1.3042276721597137, "grad_norm": 0.6818922851058039, "learning_rate": 5.703659480495653e-05, "loss": 12.0, "step": 23951 }, { "epoch": 1.3042821261562967, "grad_norm": 0.5291035509334728, "learning_rate": 5.702863205763731e-05, "loss": 11.9178, "step": 23952 }, { "epoch": 1.3043365801528797, "grad_norm": 0.6095401079017516, "learning_rate": 5.702066964447059e-05, "loss": 11.9598, "step": 23953 }, { "epoch": 1.3043910341494627, "grad_norm": 0.5731255467056443, "learning_rate": 5.7012707565518365e-05, "loss": 11.964, "step": 23954 }, { "epoch": 1.3044454881460457, "grad_norm": 0.5698033834226252, "learning_rate": 5.700474582084261e-05, "loss": 11.9513, "step": 23955 }, { "epoch": 1.3044999421426287, "grad_norm": 0.5418096243886533, "learning_rate": 5.699678441050508e-05, "loss": 11.9278, "step": 23956 }, { "epoch": 1.3045543961392116, "grad_norm": 0.5285863962434048, "learning_rate": 5.698882333456781e-05, "loss": 11.8176, "step": 23957 }, { "epoch": 1.3046088501357946, "grad_norm": 0.5437655748493991, "learning_rate": 5.698086259309261e-05, "loss": 11.8714, "step": 23958 }, { "epoch": 1.3046633041323776, "grad_norm": 0.5249223565079, "learning_rate": 5.697290218614145e-05, "loss": 11.9625, "step": 23959 }, { "epoch": 1.3047177581289606, "grad_norm": 0.5897176494325972, "learning_rate": 5.696494211377625e-05, "loss": 11.9968, "step": 23960 }, { "epoch": 1.3047722121255436, "grad_norm": 0.5744789564895469, "learning_rate": 5.695698237605885e-05, "loss": 11.9869, "step": 23961 }, { "epoch": 1.3048266661221266, "grad_norm": 0.5416320966784032, "learning_rate": 5.69490229730512e-05, "loss": 11.9474, "step": 23962 }, { "epoch": 1.3048811201187096, "grad_norm": 0.5158342676255641, "learning_rate": 5.694106390481514e-05, "loss": 11.7993, "step": 23963 }, { "epoch": 1.3049355741152926, "grad_norm": 0.5253323981802934, "learning_rate": 5.693310517141261e-05, "loss": 12.033, "step": 23964 }, { "epoch": 1.3049900281118758, "grad_norm": 0.5401077524287262, "learning_rate": 5.69251467729055e-05, "loss": 12.0019, "step": 23965 }, { "epoch": 1.3050444821084588, "grad_norm": 0.575819176008194, "learning_rate": 5.691718870935562e-05, "loss": 11.9024, "step": 23966 }, { "epoch": 1.3050989361050418, "grad_norm": 0.5009735122171262, "learning_rate": 5.690923098082495e-05, "loss": 12.007, "step": 23967 }, { "epoch": 1.3051533901016248, "grad_norm": 0.5469346786144591, "learning_rate": 5.690127358737527e-05, "loss": 12.0547, "step": 23968 }, { "epoch": 1.3052078440982078, "grad_norm": 0.6110871644336969, "learning_rate": 5.689331652906857e-05, "loss": 11.9572, "step": 23969 }, { "epoch": 1.3052622980947908, "grad_norm": 0.4965294515940343, "learning_rate": 5.688535980596663e-05, "loss": 11.8601, "step": 23970 }, { "epoch": 1.3053167520913738, "grad_norm": 0.5301775928037475, "learning_rate": 5.687740341813135e-05, "loss": 11.8822, "step": 23971 }, { "epoch": 1.3053712060879568, "grad_norm": 0.9010601067024929, "learning_rate": 5.6869447365624675e-05, "loss": 11.9948, "step": 23972 }, { "epoch": 1.3054256600845398, "grad_norm": 0.5239675187616208, "learning_rate": 5.6861491648508346e-05, "loss": 11.7696, "step": 23973 }, { "epoch": 1.305480114081123, "grad_norm": 0.6069009527012706, "learning_rate": 5.6853536266844346e-05, "loss": 12.0514, "step": 23974 }, { "epoch": 1.305534568077706, "grad_norm": 0.5187344931404261, "learning_rate": 5.684558122069448e-05, "loss": 11.8903, "step": 23975 }, { "epoch": 1.305589022074289, "grad_norm": 0.545560513923747, "learning_rate": 5.683762651012057e-05, "loss": 11.8777, "step": 23976 }, { "epoch": 1.305643476070872, "grad_norm": 0.5995544898189384, "learning_rate": 5.682967213518455e-05, "loss": 11.9604, "step": 23977 }, { "epoch": 1.305697930067455, "grad_norm": 0.5321000437618817, "learning_rate": 5.682171809594821e-05, "loss": 11.8635, "step": 23978 }, { "epoch": 1.305752384064038, "grad_norm": 0.5049934564198645, "learning_rate": 5.6813764392473464e-05, "loss": 12.0295, "step": 23979 }, { "epoch": 1.305806838060621, "grad_norm": 0.4867020493912554, "learning_rate": 5.68058110248221e-05, "loss": 11.889, "step": 23980 }, { "epoch": 1.305861292057204, "grad_norm": 0.5488352103955727, "learning_rate": 5.679785799305597e-05, "loss": 11.9764, "step": 23981 }, { "epoch": 1.305915746053787, "grad_norm": 0.5984157365757806, "learning_rate": 5.6789905297237e-05, "loss": 12.0128, "step": 23982 }, { "epoch": 1.30597020005037, "grad_norm": 0.5676370800152911, "learning_rate": 5.678195293742693e-05, "loss": 11.9651, "step": 23983 }, { "epoch": 1.306024654046953, "grad_norm": 0.5449524338822279, "learning_rate": 5.677400091368767e-05, "loss": 11.8785, "step": 23984 }, { "epoch": 1.3060791080435359, "grad_norm": 0.5984049980477618, "learning_rate": 5.6766049226081e-05, "loss": 11.8982, "step": 23985 }, { "epoch": 1.3061335620401189, "grad_norm": 0.5300111201265589, "learning_rate": 5.675809787466883e-05, "loss": 11.7741, "step": 23986 }, { "epoch": 1.3061880160367019, "grad_norm": 0.4962737380666011, "learning_rate": 5.675014685951292e-05, "loss": 11.8057, "step": 23987 }, { "epoch": 1.306242470033285, "grad_norm": 0.6006448553922471, "learning_rate": 5.67421961806751e-05, "loss": 12.1342, "step": 23988 }, { "epoch": 1.306296924029868, "grad_norm": 0.5084978759646408, "learning_rate": 5.673424583821725e-05, "loss": 11.9022, "step": 23989 }, { "epoch": 1.306351378026451, "grad_norm": 0.5722836251395635, "learning_rate": 5.6726295832201115e-05, "loss": 11.922, "step": 23990 }, { "epoch": 1.306405832023034, "grad_norm": 0.5830201612688393, "learning_rate": 5.671834616268861e-05, "loss": 11.972, "step": 23991 }, { "epoch": 1.306460286019617, "grad_norm": 0.5242589673921513, "learning_rate": 5.6710396829741455e-05, "loss": 11.9899, "step": 23992 }, { "epoch": 1.3065147400162, "grad_norm": 0.5522634693503423, "learning_rate": 5.670244783342151e-05, "loss": 11.9227, "step": 23993 }, { "epoch": 1.306569194012783, "grad_norm": 0.5975844373618137, "learning_rate": 5.669449917379064e-05, "loss": 11.9588, "step": 23994 }, { "epoch": 1.306623648009366, "grad_norm": 0.5658906806418614, "learning_rate": 5.6686550850910566e-05, "loss": 11.9769, "step": 23995 }, { "epoch": 1.306678102005949, "grad_norm": 0.576029591485249, "learning_rate": 5.6678602864843186e-05, "loss": 11.9018, "step": 23996 }, { "epoch": 1.3067325560025322, "grad_norm": 0.5397198944961555, "learning_rate": 5.667065521565025e-05, "loss": 11.9782, "step": 23997 }, { "epoch": 1.3067870099991152, "grad_norm": 0.5318343941425525, "learning_rate": 5.666270790339352e-05, "loss": 11.8726, "step": 23998 }, { "epoch": 1.3068414639956982, "grad_norm": 0.4897478737466349, "learning_rate": 5.665476092813488e-05, "loss": 11.8749, "step": 23999 }, { "epoch": 1.3068959179922812, "grad_norm": 0.5184081515081808, "learning_rate": 5.664681428993604e-05, "loss": 11.8335, "step": 24000 }, { "epoch": 1.3069503719888642, "grad_norm": 0.5645208913521276, "learning_rate": 5.663886798885889e-05, "loss": 11.8736, "step": 24001 }, { "epoch": 1.3070048259854472, "grad_norm": 0.5654316704582217, "learning_rate": 5.663092202496514e-05, "loss": 11.983, "step": 24002 }, { "epoch": 1.3070592799820302, "grad_norm": 0.5317254218554905, "learning_rate": 5.662297639831662e-05, "loss": 11.9351, "step": 24003 }, { "epoch": 1.3071137339786132, "grad_norm": 0.5035033916689757, "learning_rate": 5.661503110897512e-05, "loss": 11.8796, "step": 24004 }, { "epoch": 1.3071681879751962, "grad_norm": 0.5551355566986512, "learning_rate": 5.660708615700241e-05, "loss": 11.9708, "step": 24005 }, { "epoch": 1.3072226419717792, "grad_norm": 0.5316190494277161, "learning_rate": 5.6599141542460334e-05, "loss": 11.9273, "step": 24006 }, { "epoch": 1.3072770959683622, "grad_norm": 0.5489959422403061, "learning_rate": 5.659119726541054e-05, "loss": 11.9331, "step": 24007 }, { "epoch": 1.3073315499649452, "grad_norm": 0.5314437492337467, "learning_rate": 5.658325332591486e-05, "loss": 11.9833, "step": 24008 }, { "epoch": 1.3073860039615282, "grad_norm": 0.5465914577428189, "learning_rate": 5.657530972403514e-05, "loss": 11.7488, "step": 24009 }, { "epoch": 1.3074404579581111, "grad_norm": 0.5306735013351034, "learning_rate": 5.656736645983305e-05, "loss": 11.9382, "step": 24010 }, { "epoch": 1.3074949119546941, "grad_norm": 0.5726449187442675, "learning_rate": 5.6559423533370435e-05, "loss": 11.819, "step": 24011 }, { "epoch": 1.3075493659512774, "grad_norm": 0.5486158402846787, "learning_rate": 5.655148094470899e-05, "loss": 11.8961, "step": 24012 }, { "epoch": 1.3076038199478603, "grad_norm": 0.5426016652965927, "learning_rate": 5.654353869391057e-05, "loss": 11.9445, "step": 24013 }, { "epoch": 1.3076582739444433, "grad_norm": 0.5245954095774398, "learning_rate": 5.6535596781036826e-05, "loss": 11.8985, "step": 24014 }, { "epoch": 1.3077127279410263, "grad_norm": 0.5068428886140711, "learning_rate": 5.652765520614958e-05, "loss": 12.0234, "step": 24015 }, { "epoch": 1.3077671819376093, "grad_norm": 0.5968747370915668, "learning_rate": 5.651971396931064e-05, "loss": 11.9745, "step": 24016 }, { "epoch": 1.3078216359341923, "grad_norm": 0.5904562558238576, "learning_rate": 5.651177307058163e-05, "loss": 11.829, "step": 24017 }, { "epoch": 1.3078760899307753, "grad_norm": 0.6144528212683565, "learning_rate": 5.650383251002437e-05, "loss": 12.0584, "step": 24018 }, { "epoch": 1.3079305439273583, "grad_norm": 0.5637354895837392, "learning_rate": 5.649589228770062e-05, "loss": 12.0088, "step": 24019 }, { "epoch": 1.3079849979239415, "grad_norm": 0.5174112427094429, "learning_rate": 5.64879524036721e-05, "loss": 11.859, "step": 24020 }, { "epoch": 1.3080394519205245, "grad_norm": 0.5722180502695624, "learning_rate": 5.6480012858000575e-05, "loss": 11.8917, "step": 24021 }, { "epoch": 1.3080939059171075, "grad_norm": 0.5278915893715153, "learning_rate": 5.6472073650747736e-05, "loss": 11.841, "step": 24022 }, { "epoch": 1.3081483599136905, "grad_norm": 0.5434381151015685, "learning_rate": 5.646413478197539e-05, "loss": 12.0007, "step": 24023 }, { "epoch": 1.3082028139102735, "grad_norm": 0.57750000746015, "learning_rate": 5.64561962517452e-05, "loss": 11.8534, "step": 24024 }, { "epoch": 1.3082572679068565, "grad_norm": 0.5459021214604409, "learning_rate": 5.6448258060118954e-05, "loss": 11.9986, "step": 24025 }, { "epoch": 1.3083117219034395, "grad_norm": 0.5416026288585469, "learning_rate": 5.644032020715837e-05, "loss": 12.0973, "step": 24026 }, { "epoch": 1.3083661759000225, "grad_norm": 0.5158199941024781, "learning_rate": 5.6432382692925124e-05, "loss": 11.9428, "step": 24027 }, { "epoch": 1.3084206298966055, "grad_norm": 0.4821028621826143, "learning_rate": 5.642444551748101e-05, "loss": 11.8582, "step": 24028 }, { "epoch": 1.3084750838931885, "grad_norm": 0.5817373794363259, "learning_rate": 5.641650868088768e-05, "loss": 11.84, "step": 24029 }, { "epoch": 1.3085295378897714, "grad_norm": 0.5238270562827404, "learning_rate": 5.6408572183206874e-05, "loss": 11.8774, "step": 24030 }, { "epoch": 1.3085839918863544, "grad_norm": 0.5662106271145493, "learning_rate": 5.6400636024500376e-05, "loss": 11.8612, "step": 24031 }, { "epoch": 1.3086384458829374, "grad_norm": 0.4658411699306566, "learning_rate": 5.63927002048298e-05, "loss": 11.8203, "step": 24032 }, { "epoch": 1.3086928998795204, "grad_norm": 0.5369833044130452, "learning_rate": 5.6384764724256935e-05, "loss": 11.7989, "step": 24033 }, { "epoch": 1.3087473538761034, "grad_norm": 0.6215086146378541, "learning_rate": 5.6376829582843416e-05, "loss": 11.9815, "step": 24034 }, { "epoch": 1.3088018078726866, "grad_norm": 0.575460996600585, "learning_rate": 5.636889478065103e-05, "loss": 11.8371, "step": 24035 }, { "epoch": 1.3088562618692696, "grad_norm": 0.5267694407632315, "learning_rate": 5.636096031774143e-05, "loss": 11.8948, "step": 24036 }, { "epoch": 1.3089107158658526, "grad_norm": 0.9051984971751704, "learning_rate": 5.6353026194176284e-05, "loss": 11.8355, "step": 24037 }, { "epoch": 1.3089651698624356, "grad_norm": 0.5451482067920245, "learning_rate": 5.6345092410017366e-05, "loss": 11.8367, "step": 24038 }, { "epoch": 1.3090196238590186, "grad_norm": 0.556870168449486, "learning_rate": 5.633715896532629e-05, "loss": 12.0332, "step": 24039 }, { "epoch": 1.3090740778556016, "grad_norm": 0.5546790452992627, "learning_rate": 5.632922586016482e-05, "loss": 11.9491, "step": 24040 }, { "epoch": 1.3091285318521846, "grad_norm": 0.548344602851646, "learning_rate": 5.6321293094594586e-05, "loss": 11.9903, "step": 24041 }, { "epoch": 1.3091829858487676, "grad_norm": 0.5352260763084332, "learning_rate": 5.63133606686773e-05, "loss": 11.7799, "step": 24042 }, { "epoch": 1.3092374398453506, "grad_norm": 0.5511868697841315, "learning_rate": 5.6305428582474676e-05, "loss": 12.0075, "step": 24043 }, { "epoch": 1.3092918938419338, "grad_norm": 0.5807556727173098, "learning_rate": 5.6297496836048336e-05, "loss": 11.9363, "step": 24044 }, { "epoch": 1.3093463478385168, "grad_norm": 0.5416019456363944, "learning_rate": 5.6289565429460024e-05, "loss": 11.9437, "step": 24045 }, { "epoch": 1.3094008018350998, "grad_norm": 0.5568288851944784, "learning_rate": 5.6281634362771383e-05, "loss": 11.9437, "step": 24046 }, { "epoch": 1.3094552558316828, "grad_norm": 0.5165909000398059, "learning_rate": 5.627370363604405e-05, "loss": 11.9533, "step": 24047 }, { "epoch": 1.3095097098282658, "grad_norm": 0.4648065800283373, "learning_rate": 5.626577324933977e-05, "loss": 11.9777, "step": 24048 }, { "epoch": 1.3095641638248487, "grad_norm": 0.5402151427444775, "learning_rate": 5.625784320272014e-05, "loss": 11.9627, "step": 24049 }, { "epoch": 1.3096186178214317, "grad_norm": 0.5151015954640977, "learning_rate": 5.6249913496246884e-05, "loss": 11.8636, "step": 24050 }, { "epoch": 1.3096730718180147, "grad_norm": 0.7071352397373822, "learning_rate": 5.6241984129981594e-05, "loss": 11.9924, "step": 24051 }, { "epoch": 1.3097275258145977, "grad_norm": 0.5532174252587672, "learning_rate": 5.623405510398598e-05, "loss": 11.8778, "step": 24052 }, { "epoch": 1.3097819798111807, "grad_norm": 0.5289146410485642, "learning_rate": 5.6226126418321734e-05, "loss": 11.926, "step": 24053 }, { "epoch": 1.3098364338077637, "grad_norm": 0.5334503954840796, "learning_rate": 5.621819807305043e-05, "loss": 11.9385, "step": 24054 }, { "epoch": 1.3098908878043467, "grad_norm": 0.5675098184871193, "learning_rate": 5.621027006823381e-05, "loss": 11.91, "step": 24055 }, { "epoch": 1.3099453418009297, "grad_norm": 0.5520081124621085, "learning_rate": 5.620234240393345e-05, "loss": 11.8502, "step": 24056 }, { "epoch": 1.3099997957975127, "grad_norm": 0.5103938780111638, "learning_rate": 5.619441508021099e-05, "loss": 11.9141, "step": 24057 }, { "epoch": 1.310054249794096, "grad_norm": 0.6273218532787944, "learning_rate": 5.618648809712815e-05, "loss": 12.0362, "step": 24058 }, { "epoch": 1.310108703790679, "grad_norm": 0.6317001937888972, "learning_rate": 5.6178561454746495e-05, "loss": 11.962, "step": 24059 }, { "epoch": 1.310163157787262, "grad_norm": 0.5852638614045123, "learning_rate": 5.6170635153127703e-05, "loss": 11.9447, "step": 24060 }, { "epoch": 1.3102176117838449, "grad_norm": 0.5165607266868081, "learning_rate": 5.6162709192333394e-05, "loss": 11.9877, "step": 24061 }, { "epoch": 1.3102720657804279, "grad_norm": 0.541180216936752, "learning_rate": 5.615478357242523e-05, "loss": 11.7278, "step": 24062 }, { "epoch": 1.3103265197770109, "grad_norm": 0.6027574576790281, "learning_rate": 5.61468582934648e-05, "loss": 11.7352, "step": 24063 }, { "epoch": 1.3103809737735939, "grad_norm": 0.561972172242614, "learning_rate": 5.6138933355513744e-05, "loss": 11.9482, "step": 24064 }, { "epoch": 1.3104354277701769, "grad_norm": 0.540328313228767, "learning_rate": 5.613100875863373e-05, "loss": 11.8044, "step": 24065 }, { "epoch": 1.3104898817667598, "grad_norm": 0.5062809733534139, "learning_rate": 5.6123084502886355e-05, "loss": 11.4273, "step": 24066 }, { "epoch": 1.310544335763343, "grad_norm": 0.5369482911576979, "learning_rate": 5.611516058833319e-05, "loss": 11.9504, "step": 24067 }, { "epoch": 1.310598789759926, "grad_norm": 0.5616688717766474, "learning_rate": 5.610723701503595e-05, "loss": 11.8698, "step": 24068 }, { "epoch": 1.310653243756509, "grad_norm": 0.5599362798095469, "learning_rate": 5.609931378305615e-05, "loss": 11.7793, "step": 24069 }, { "epoch": 1.310707697753092, "grad_norm": 0.5818904319739762, "learning_rate": 5.609139089245548e-05, "loss": 12.0357, "step": 24070 }, { "epoch": 1.310762151749675, "grad_norm": 0.5087136123233187, "learning_rate": 5.608346834329547e-05, "loss": 11.8868, "step": 24071 }, { "epoch": 1.310816605746258, "grad_norm": 0.5162082398924985, "learning_rate": 5.607554613563783e-05, "loss": 11.928, "step": 24072 }, { "epoch": 1.310871059742841, "grad_norm": 0.5629835155002205, "learning_rate": 5.606762426954408e-05, "loss": 11.8736, "step": 24073 }, { "epoch": 1.310925513739424, "grad_norm": 0.5890715222779773, "learning_rate": 5.605970274507588e-05, "loss": 11.9634, "step": 24074 }, { "epoch": 1.310979967736007, "grad_norm": 0.5558526509435125, "learning_rate": 5.605178156229476e-05, "loss": 11.8771, "step": 24075 }, { "epoch": 1.31103442173259, "grad_norm": 0.5009418577530341, "learning_rate": 5.604386072126239e-05, "loss": 11.9972, "step": 24076 }, { "epoch": 1.311088875729173, "grad_norm": 0.5493961153888932, "learning_rate": 5.603594022204033e-05, "loss": 11.7846, "step": 24077 }, { "epoch": 1.311143329725756, "grad_norm": 0.5293464153718507, "learning_rate": 5.602802006469014e-05, "loss": 11.8235, "step": 24078 }, { "epoch": 1.311197783722339, "grad_norm": 0.47494824965195975, "learning_rate": 5.602010024927343e-05, "loss": 11.9343, "step": 24079 }, { "epoch": 1.311252237718922, "grad_norm": 0.5400570253824287, "learning_rate": 5.601218077585184e-05, "loss": 11.9424, "step": 24080 }, { "epoch": 1.311306691715505, "grad_norm": 0.6292446865449202, "learning_rate": 5.600426164448688e-05, "loss": 11.9883, "step": 24081 }, { "epoch": 1.3113611457120882, "grad_norm": 0.5384956763460513, "learning_rate": 5.5996342855240194e-05, "loss": 12.0204, "step": 24082 }, { "epoch": 1.3114155997086712, "grad_norm": 0.4949942337977581, "learning_rate": 5.598842440817328e-05, "loss": 11.8607, "step": 24083 }, { "epoch": 1.3114700537052542, "grad_norm": 0.5495955408059275, "learning_rate": 5.59805063033478e-05, "loss": 11.8031, "step": 24084 }, { "epoch": 1.3115245077018372, "grad_norm": 0.5227917745529473, "learning_rate": 5.5972588540825245e-05, "loss": 11.9676, "step": 24085 }, { "epoch": 1.3115789616984201, "grad_norm": 0.5493167528344649, "learning_rate": 5.596467112066727e-05, "loss": 12.0129, "step": 24086 }, { "epoch": 1.3116334156950031, "grad_norm": 0.5369586038681113, "learning_rate": 5.595675404293539e-05, "loss": 11.934, "step": 24087 }, { "epoch": 1.3116878696915861, "grad_norm": 0.5692549213598999, "learning_rate": 5.594883730769114e-05, "loss": 12.0284, "step": 24088 }, { "epoch": 1.3117423236881691, "grad_norm": 0.5246548461312474, "learning_rate": 5.594092091499617e-05, "loss": 12.104, "step": 24089 }, { "epoch": 1.3117967776847523, "grad_norm": 0.5422912381640886, "learning_rate": 5.593300486491193e-05, "loss": 11.8926, "step": 24090 }, { "epoch": 1.3118512316813353, "grad_norm": 0.5507037008770228, "learning_rate": 5.592508915750003e-05, "loss": 11.8453, "step": 24091 }, { "epoch": 1.3119056856779183, "grad_norm": 0.6109536311951776, "learning_rate": 5.5917173792822086e-05, "loss": 11.8903, "step": 24092 }, { "epoch": 1.3119601396745013, "grad_norm": 0.5667929815034383, "learning_rate": 5.590925877093954e-05, "loss": 11.925, "step": 24093 }, { "epoch": 1.3120145936710843, "grad_norm": 0.5151352515239541, "learning_rate": 5.590134409191403e-05, "loss": 11.7802, "step": 24094 }, { "epoch": 1.3120690476676673, "grad_norm": 0.5312198409149739, "learning_rate": 5.589342975580703e-05, "loss": 11.8378, "step": 24095 }, { "epoch": 1.3121235016642503, "grad_norm": 0.51001384620566, "learning_rate": 5.588551576268016e-05, "loss": 11.7902, "step": 24096 }, { "epoch": 1.3121779556608333, "grad_norm": 0.5474836460471255, "learning_rate": 5.587760211259492e-05, "loss": 12.0006, "step": 24097 }, { "epoch": 1.3122324096574163, "grad_norm": 0.5813697004632432, "learning_rate": 5.5869688805612784e-05, "loss": 11.9648, "step": 24098 }, { "epoch": 1.3122868636539993, "grad_norm": 0.5495130797855269, "learning_rate": 5.5861775841795417e-05, "loss": 11.8442, "step": 24099 }, { "epoch": 1.3123413176505823, "grad_norm": 0.5563958357476311, "learning_rate": 5.585386322120423e-05, "loss": 11.9653, "step": 24100 }, { "epoch": 1.3123957716471653, "grad_norm": 0.5601210651888593, "learning_rate": 5.584595094390079e-05, "loss": 11.9562, "step": 24101 }, { "epoch": 1.3124502256437482, "grad_norm": 0.5085020324736595, "learning_rate": 5.583803900994671e-05, "loss": 11.974, "step": 24102 }, { "epoch": 1.3125046796403312, "grad_norm": 0.5350322377035194, "learning_rate": 5.583012741940339e-05, "loss": 12.0051, "step": 24103 }, { "epoch": 1.3125591336369142, "grad_norm": 0.5307092152711169, "learning_rate": 5.5822216172332444e-05, "loss": 11.9561, "step": 24104 }, { "epoch": 1.3126135876334974, "grad_norm": 0.5560314482779806, "learning_rate": 5.581430526879532e-05, "loss": 11.8514, "step": 24105 }, { "epoch": 1.3126680416300804, "grad_norm": 0.5246609139397009, "learning_rate": 5.58063947088536e-05, "loss": 11.9932, "step": 24106 }, { "epoch": 1.3127224956266634, "grad_norm": 0.5446374354321504, "learning_rate": 5.579848449256877e-05, "loss": 11.8102, "step": 24107 }, { "epoch": 1.3127769496232464, "grad_norm": 0.557710650956272, "learning_rate": 5.57905746200023e-05, "loss": 11.9275, "step": 24108 }, { "epoch": 1.3128314036198294, "grad_norm": 0.569629619957673, "learning_rate": 5.5782665091215766e-05, "loss": 11.9469, "step": 24109 }, { "epoch": 1.3128858576164124, "grad_norm": 0.5452258413061813, "learning_rate": 5.5774755906270613e-05, "loss": 12.0616, "step": 24110 }, { "epoch": 1.3129403116129954, "grad_norm": 0.549681639306198, "learning_rate": 5.576684706522841e-05, "loss": 11.814, "step": 24111 }, { "epoch": 1.3129947656095784, "grad_norm": 0.8968357936170647, "learning_rate": 5.5758938568150585e-05, "loss": 12.0231, "step": 24112 }, { "epoch": 1.3130492196061614, "grad_norm": 0.6114355622916864, "learning_rate": 5.575103041509867e-05, "loss": 11.9277, "step": 24113 }, { "epoch": 1.3131036736027446, "grad_norm": 0.5632441548114183, "learning_rate": 5.5743122606134214e-05, "loss": 11.8432, "step": 24114 }, { "epoch": 1.3131581275993276, "grad_norm": 0.6229341802493858, "learning_rate": 5.5735215141318607e-05, "loss": 12.0, "step": 24115 }, { "epoch": 1.3132125815959106, "grad_norm": 0.5325063396721057, "learning_rate": 5.572730802071343e-05, "loss": 11.888, "step": 24116 }, { "epoch": 1.3132670355924936, "grad_norm": 0.570325299049395, "learning_rate": 5.571940124438013e-05, "loss": 11.9914, "step": 24117 }, { "epoch": 1.3133214895890766, "grad_norm": 0.5685258266474903, "learning_rate": 5.571149481238016e-05, "loss": 11.9793, "step": 24118 }, { "epoch": 1.3133759435856596, "grad_norm": 0.5074342320921148, "learning_rate": 5.570358872477507e-05, "loss": 11.8027, "step": 24119 }, { "epoch": 1.3134303975822426, "grad_norm": 0.5158155993323239, "learning_rate": 5.5695682981626266e-05, "loss": 11.8288, "step": 24120 }, { "epoch": 1.3134848515788256, "grad_norm": 0.5378069571240769, "learning_rate": 5.5687777582995284e-05, "loss": 11.7794, "step": 24121 }, { "epoch": 1.3135393055754085, "grad_norm": 0.5895099475242283, "learning_rate": 5.567987252894355e-05, "loss": 12.0278, "step": 24122 }, { "epoch": 1.3135937595719915, "grad_norm": 0.5474999267889047, "learning_rate": 5.56719678195326e-05, "loss": 11.9065, "step": 24123 }, { "epoch": 1.3136482135685745, "grad_norm": 0.6210183064545841, "learning_rate": 5.566406345482382e-05, "loss": 12.0531, "step": 24124 }, { "epoch": 1.3137026675651575, "grad_norm": 0.5584809577507116, "learning_rate": 5.565615943487872e-05, "loss": 11.88, "step": 24125 }, { "epoch": 1.3137571215617405, "grad_norm": 0.5620921521487785, "learning_rate": 5.564825575975883e-05, "loss": 11.8425, "step": 24126 }, { "epoch": 1.3138115755583235, "grad_norm": 0.6000775354646204, "learning_rate": 5.5640352429525466e-05, "loss": 11.9881, "step": 24127 }, { "epoch": 1.3138660295549067, "grad_norm": 0.5725438406253455, "learning_rate": 5.5632449444240165e-05, "loss": 11.8342, "step": 24128 }, { "epoch": 1.3139204835514897, "grad_norm": 0.5747252928413819, "learning_rate": 5.5624546803964414e-05, "loss": 11.9359, "step": 24129 }, { "epoch": 1.3139749375480727, "grad_norm": 0.5580723502020485, "learning_rate": 5.5616644508759585e-05, "loss": 11.8348, "step": 24130 }, { "epoch": 1.3140293915446557, "grad_norm": 0.5206431814318879, "learning_rate": 5.560874255868722e-05, "loss": 11.994, "step": 24131 }, { "epoch": 1.3140838455412387, "grad_norm": 0.5139602020182866, "learning_rate": 5.5600840953808675e-05, "loss": 11.8856, "step": 24132 }, { "epoch": 1.3141382995378217, "grad_norm": 0.5273997701362135, "learning_rate": 5.5592939694185473e-05, "loss": 11.9026, "step": 24133 }, { "epoch": 1.3141927535344047, "grad_norm": 0.5255371617969886, "learning_rate": 5.5585038779878984e-05, "loss": 11.869, "step": 24134 }, { "epoch": 1.3142472075309877, "grad_norm": 0.5605092161408242, "learning_rate": 5.557713821095067e-05, "loss": 11.9114, "step": 24135 }, { "epoch": 1.3143016615275707, "grad_norm": 0.5323861289247597, "learning_rate": 5.556923798746204e-05, "loss": 11.8343, "step": 24136 }, { "epoch": 1.3143561155241539, "grad_norm": 0.5361199374183426, "learning_rate": 5.556133810947446e-05, "loss": 11.9019, "step": 24137 }, { "epoch": 1.3144105695207369, "grad_norm": 0.5515330296315245, "learning_rate": 5.5553438577049354e-05, "loss": 11.9666, "step": 24138 }, { "epoch": 1.3144650235173199, "grad_norm": 0.532408575148832, "learning_rate": 5.5545539390248135e-05, "loss": 12.028, "step": 24139 }, { "epoch": 1.3145194775139029, "grad_norm": 0.5354823885175315, "learning_rate": 5.553764054913225e-05, "loss": 11.9947, "step": 24140 }, { "epoch": 1.3145739315104858, "grad_norm": 0.5659012633566877, "learning_rate": 5.552974205376318e-05, "loss": 11.8438, "step": 24141 }, { "epoch": 1.3146283855070688, "grad_norm": 0.5160737526667496, "learning_rate": 5.552184390420223e-05, "loss": 11.8927, "step": 24142 }, { "epoch": 1.3146828395036518, "grad_norm": 0.523950389511685, "learning_rate": 5.551394610051095e-05, "loss": 11.8478, "step": 24143 }, { "epoch": 1.3147372935002348, "grad_norm": 0.5525784865107607, "learning_rate": 5.550604864275062e-05, "loss": 11.9292, "step": 24144 }, { "epoch": 1.3147917474968178, "grad_norm": 0.5705633641822888, "learning_rate": 5.5498151530982765e-05, "loss": 11.9413, "step": 24145 }, { "epoch": 1.3148462014934008, "grad_norm": 0.5416032511796919, "learning_rate": 5.549025476526872e-05, "loss": 11.989, "step": 24146 }, { "epoch": 1.3149006554899838, "grad_norm": 0.5547615693651402, "learning_rate": 5.548235834566995e-05, "loss": 11.8103, "step": 24147 }, { "epoch": 1.3149551094865668, "grad_norm": 0.529193120232034, "learning_rate": 5.5474462272247816e-05, "loss": 11.8657, "step": 24148 }, { "epoch": 1.3150095634831498, "grad_norm": 0.5883229261148182, "learning_rate": 5.5466566545063705e-05, "loss": 11.8602, "step": 24149 }, { "epoch": 1.3150640174797328, "grad_norm": 0.5435475930209137, "learning_rate": 5.545867116417903e-05, "loss": 11.9026, "step": 24150 }, { "epoch": 1.315118471476316, "grad_norm": 0.5343514848972449, "learning_rate": 5.545077612965525e-05, "loss": 11.9724, "step": 24151 }, { "epoch": 1.315172925472899, "grad_norm": 0.5512682603081, "learning_rate": 5.544288144155365e-05, "loss": 11.9269, "step": 24152 }, { "epoch": 1.315227379469482, "grad_norm": 0.5353583863282051, "learning_rate": 5.543498709993573e-05, "loss": 11.957, "step": 24153 }, { "epoch": 1.315281833466065, "grad_norm": 0.5511459412465701, "learning_rate": 5.5427093104862785e-05, "loss": 11.9519, "step": 24154 }, { "epoch": 1.315336287462648, "grad_norm": 0.5174468989461876, "learning_rate": 5.541919945639626e-05, "loss": 11.8569, "step": 24155 }, { "epoch": 1.315390741459231, "grad_norm": 0.4994135320521054, "learning_rate": 5.54113061545975e-05, "loss": 11.927, "step": 24156 }, { "epoch": 1.315445195455814, "grad_norm": 0.509100283757051, "learning_rate": 5.540341319952793e-05, "loss": 11.8701, "step": 24157 }, { "epoch": 1.315499649452397, "grad_norm": 0.5681110353943868, "learning_rate": 5.5395520591248896e-05, "loss": 11.9094, "step": 24158 }, { "epoch": 1.31555410344898, "grad_norm": 0.5072899762004406, "learning_rate": 5.5387628329821736e-05, "loss": 11.9164, "step": 24159 }, { "epoch": 1.3156085574455632, "grad_norm": 0.5677614542523862, "learning_rate": 5.537973641530789e-05, "loss": 12.0422, "step": 24160 }, { "epoch": 1.3156630114421461, "grad_norm": 0.5821230787364737, "learning_rate": 5.537184484776867e-05, "loss": 11.8967, "step": 24161 }, { "epoch": 1.3157174654387291, "grad_norm": 0.5055793912300793, "learning_rate": 5.536395362726548e-05, "loss": 11.9621, "step": 24162 }, { "epoch": 1.3157719194353121, "grad_norm": 0.5134850537248695, "learning_rate": 5.535606275385968e-05, "loss": 11.7587, "step": 24163 }, { "epoch": 1.3158263734318951, "grad_norm": 0.5419693543723557, "learning_rate": 5.53481722276126e-05, "loss": 11.9077, "step": 24164 }, { "epoch": 1.3158808274284781, "grad_norm": 0.5173375709901956, "learning_rate": 5.5340282048585654e-05, "loss": 12.0389, "step": 24165 }, { "epoch": 1.315935281425061, "grad_norm": 0.5504550053957293, "learning_rate": 5.533239221684015e-05, "loss": 11.9779, "step": 24166 }, { "epoch": 1.315989735421644, "grad_norm": 0.5451182986736287, "learning_rate": 5.5324502732437454e-05, "loss": 11.9775, "step": 24167 }, { "epoch": 1.316044189418227, "grad_norm": 0.503295735563272, "learning_rate": 5.531661359543895e-05, "loss": 11.9263, "step": 24168 }, { "epoch": 1.31609864341481, "grad_norm": 0.5789356449436505, "learning_rate": 5.53087248059059e-05, "loss": 11.9394, "step": 24169 }, { "epoch": 1.316153097411393, "grad_norm": 0.5325233102245421, "learning_rate": 5.5300836363899736e-05, "loss": 12.0002, "step": 24170 }, { "epoch": 1.316207551407976, "grad_norm": 0.5495782431769719, "learning_rate": 5.529294826948173e-05, "loss": 11.6923, "step": 24171 }, { "epoch": 1.316262005404559, "grad_norm": 0.512139927840057, "learning_rate": 5.528506052271325e-05, "loss": 11.8159, "step": 24172 }, { "epoch": 1.316316459401142, "grad_norm": 0.48019392667061545, "learning_rate": 5.527717312365569e-05, "loss": 11.8032, "step": 24173 }, { "epoch": 1.316370913397725, "grad_norm": 0.539613227120157, "learning_rate": 5.526928607237028e-05, "loss": 11.8105, "step": 24174 }, { "epoch": 1.3164253673943083, "grad_norm": 0.5114423431699913, "learning_rate": 5.5261399368918454e-05, "loss": 11.7604, "step": 24175 }, { "epoch": 1.3164798213908913, "grad_norm": 0.5524076486521812, "learning_rate": 5.5253513013361434e-05, "loss": 11.8275, "step": 24176 }, { "epoch": 1.3165342753874743, "grad_norm": 0.5331113835805055, "learning_rate": 5.5245627005760656e-05, "loss": 11.7874, "step": 24177 }, { "epoch": 1.3165887293840572, "grad_norm": 0.5454035226612589, "learning_rate": 5.5237741346177385e-05, "loss": 11.9775, "step": 24178 }, { "epoch": 1.3166431833806402, "grad_norm": 0.5283015966927866, "learning_rate": 5.52298560346729e-05, "loss": 11.8976, "step": 24179 }, { "epoch": 1.3166976373772232, "grad_norm": 0.5871850154321271, "learning_rate": 5.5221971071308595e-05, "loss": 11.9353, "step": 24180 }, { "epoch": 1.3167520913738062, "grad_norm": 0.6136044105531907, "learning_rate": 5.521408645614572e-05, "loss": 11.8291, "step": 24181 }, { "epoch": 1.3168065453703892, "grad_norm": 0.5487668700359158, "learning_rate": 5.520620218924566e-05, "loss": 12.0251, "step": 24182 }, { "epoch": 1.3168609993669722, "grad_norm": 0.5327122282694071, "learning_rate": 5.519831827066965e-05, "loss": 11.9015, "step": 24183 }, { "epoch": 1.3169154533635554, "grad_norm": 0.5624584878840289, "learning_rate": 5.519043470047902e-05, "loss": 11.9414, "step": 24184 }, { "epoch": 1.3169699073601384, "grad_norm": 0.5313704514202723, "learning_rate": 5.518255147873512e-05, "loss": 11.8792, "step": 24185 }, { "epoch": 1.3170243613567214, "grad_norm": 0.6087527962046092, "learning_rate": 5.517466860549919e-05, "loss": 11.7326, "step": 24186 }, { "epoch": 1.3170788153533044, "grad_norm": 0.5545308368584826, "learning_rate": 5.5166786080832635e-05, "loss": 11.9461, "step": 24187 }, { "epoch": 1.3171332693498874, "grad_norm": 0.5577385464793357, "learning_rate": 5.5158903904796576e-05, "loss": 11.8884, "step": 24188 }, { "epoch": 1.3171877233464704, "grad_norm": 0.5656530028652127, "learning_rate": 5.515102207745241e-05, "loss": 11.9084, "step": 24189 }, { "epoch": 1.3172421773430534, "grad_norm": 0.5910234974929589, "learning_rate": 5.5143140598861455e-05, "loss": 11.8402, "step": 24190 }, { "epoch": 1.3172966313396364, "grad_norm": 0.5865373586103833, "learning_rate": 5.513525946908491e-05, "loss": 11.9717, "step": 24191 }, { "epoch": 1.3173510853362194, "grad_norm": 0.5158602067307513, "learning_rate": 5.512737868818416e-05, "loss": 12.0789, "step": 24192 }, { "epoch": 1.3174055393328024, "grad_norm": 0.5260099951371222, "learning_rate": 5.51194982562204e-05, "loss": 11.8863, "step": 24193 }, { "epoch": 1.3174599933293853, "grad_norm": 0.5960360865287289, "learning_rate": 5.5111618173254984e-05, "loss": 11.8579, "step": 24194 }, { "epoch": 1.3175144473259683, "grad_norm": 0.5974298738793148, "learning_rate": 5.510373843934913e-05, "loss": 11.9543, "step": 24195 }, { "epoch": 1.3175689013225513, "grad_norm": 0.5251629446111129, "learning_rate": 5.509585905456411e-05, "loss": 11.8436, "step": 24196 }, { "epoch": 1.3176233553191343, "grad_norm": 0.5550405248967265, "learning_rate": 5.508798001896132e-05, "loss": 11.9545, "step": 24197 }, { "epoch": 1.3176778093157175, "grad_norm": 0.48351700828475436, "learning_rate": 5.508010133260184e-05, "loss": 11.8354, "step": 24198 }, { "epoch": 1.3177322633123005, "grad_norm": 0.5464638364304195, "learning_rate": 5.5072222995547026e-05, "loss": 11.8961, "step": 24199 }, { "epoch": 1.3177867173088835, "grad_norm": 0.5637109969518448, "learning_rate": 5.506434500785817e-05, "loss": 12.0174, "step": 24200 }, { "epoch": 1.3178411713054665, "grad_norm": 0.5533901495275418, "learning_rate": 5.5056467369596484e-05, "loss": 11.8048, "step": 24201 }, { "epoch": 1.3178956253020495, "grad_norm": 0.5677288863780411, "learning_rate": 5.504859008082328e-05, "loss": 11.8226, "step": 24202 }, { "epoch": 1.3179500792986325, "grad_norm": 0.6358236306367222, "learning_rate": 5.504071314159973e-05, "loss": 11.9235, "step": 24203 }, { "epoch": 1.3180045332952155, "grad_norm": 0.5985184796954829, "learning_rate": 5.503283655198718e-05, "loss": 11.8566, "step": 24204 }, { "epoch": 1.3180589872917985, "grad_norm": 0.5934497309754219, "learning_rate": 5.502496031204678e-05, "loss": 11.9266, "step": 24205 }, { "epoch": 1.3181134412883815, "grad_norm": 0.5245202950791581, "learning_rate": 5.501708442183985e-05, "loss": 11.8096, "step": 24206 }, { "epoch": 1.3181678952849647, "grad_norm": 0.5235175834459708, "learning_rate": 5.5009208881427686e-05, "loss": 11.9094, "step": 24207 }, { "epoch": 1.3182223492815477, "grad_norm": 0.5299367809455927, "learning_rate": 5.500133369087137e-05, "loss": 11.9188, "step": 24208 }, { "epoch": 1.3182768032781307, "grad_norm": 0.5295516934968243, "learning_rate": 5.4993458850232285e-05, "loss": 11.8589, "step": 24209 }, { "epoch": 1.3183312572747137, "grad_norm": 0.5885540068868155, "learning_rate": 5.4985584359571565e-05, "loss": 12.0495, "step": 24210 }, { "epoch": 1.3183857112712967, "grad_norm": 0.6380428176961217, "learning_rate": 5.497771021895048e-05, "loss": 11.8852, "step": 24211 }, { "epoch": 1.3184401652678797, "grad_norm": 0.5672198964296776, "learning_rate": 5.496983642843033e-05, "loss": 12.014, "step": 24212 }, { "epoch": 1.3184946192644627, "grad_norm": 0.8291922617570414, "learning_rate": 5.4961962988072237e-05, "loss": 11.9553, "step": 24213 }, { "epoch": 1.3185490732610456, "grad_norm": 0.5886373816221026, "learning_rate": 5.495408989793751e-05, "loss": 11.9391, "step": 24214 }, { "epoch": 1.3186035272576286, "grad_norm": 0.5320349527412451, "learning_rate": 5.49462171580873e-05, "loss": 11.9512, "step": 24215 }, { "epoch": 1.3186579812542116, "grad_norm": 0.4912981698863273, "learning_rate": 5.4938344768582886e-05, "loss": 11.8482, "step": 24216 }, { "epoch": 1.3187124352507946, "grad_norm": 0.5738559075083566, "learning_rate": 5.493047272948547e-05, "loss": 11.9387, "step": 24217 }, { "epoch": 1.3187668892473776, "grad_norm": 0.6180322146989785, "learning_rate": 5.492260104085621e-05, "loss": 11.9237, "step": 24218 }, { "epoch": 1.3188213432439606, "grad_norm": 0.5092424770447668, "learning_rate": 5.491472970275642e-05, "loss": 11.8272, "step": 24219 }, { "epoch": 1.3188757972405436, "grad_norm": 0.6367962032492092, "learning_rate": 5.490685871524719e-05, "loss": 12.0051, "step": 24220 }, { "epoch": 1.3189302512371268, "grad_norm": 0.537058284768866, "learning_rate": 5.48989880783898e-05, "loss": 11.8841, "step": 24221 }, { "epoch": 1.3189847052337098, "grad_norm": 0.5582860930165967, "learning_rate": 5.4891117792245494e-05, "loss": 11.9276, "step": 24222 }, { "epoch": 1.3190391592302928, "grad_norm": 0.6171494646409815, "learning_rate": 5.488324785687537e-05, "loss": 11.9321, "step": 24223 }, { "epoch": 1.3190936132268758, "grad_norm": 0.5364331402214078, "learning_rate": 5.48753782723407e-05, "loss": 11.8419, "step": 24224 }, { "epoch": 1.3191480672234588, "grad_norm": 0.5715325938987341, "learning_rate": 5.4867509038702644e-05, "loss": 12.0285, "step": 24225 }, { "epoch": 1.3192025212200418, "grad_norm": 0.5696662353364319, "learning_rate": 5.485964015602243e-05, "loss": 11.8765, "step": 24226 }, { "epoch": 1.3192569752166248, "grad_norm": 0.553330927101105, "learning_rate": 5.4851771624361236e-05, "loss": 11.9422, "step": 24227 }, { "epoch": 1.3193114292132078, "grad_norm": 0.6435706481748136, "learning_rate": 5.4843903443780185e-05, "loss": 12.1121, "step": 24228 }, { "epoch": 1.3193658832097908, "grad_norm": 0.5227215737264612, "learning_rate": 5.4836035614340566e-05, "loss": 11.9436, "step": 24229 }, { "epoch": 1.319420337206374, "grad_norm": 0.7070280218748599, "learning_rate": 5.482816813610345e-05, "loss": 11.7973, "step": 24230 }, { "epoch": 1.319474791202957, "grad_norm": 0.5108586704723379, "learning_rate": 5.482030100913014e-05, "loss": 11.7991, "step": 24231 }, { "epoch": 1.31952924519954, "grad_norm": 0.6067175139272265, "learning_rate": 5.481243423348168e-05, "loss": 11.8569, "step": 24232 }, { "epoch": 1.319583699196123, "grad_norm": 0.5096432546957752, "learning_rate": 5.480456780921932e-05, "loss": 11.9371, "step": 24233 }, { "epoch": 1.319638153192706, "grad_norm": 0.568550815802961, "learning_rate": 5.479670173640426e-05, "loss": 11.9072, "step": 24234 }, { "epoch": 1.319692607189289, "grad_norm": 0.5512807238669505, "learning_rate": 5.478883601509759e-05, "loss": 11.9034, "step": 24235 }, { "epoch": 1.319747061185872, "grad_norm": 0.5151034126730991, "learning_rate": 5.478097064536054e-05, "loss": 11.9168, "step": 24236 }, { "epoch": 1.319801515182455, "grad_norm": 0.5536047071293094, "learning_rate": 5.477310562725426e-05, "loss": 11.9195, "step": 24237 }, { "epoch": 1.319855969179038, "grad_norm": 0.5049946864835259, "learning_rate": 5.476524096083986e-05, "loss": 11.7928, "step": 24238 }, { "epoch": 1.319910423175621, "grad_norm": 0.5610898251218176, "learning_rate": 5.475737664617856e-05, "loss": 11.7958, "step": 24239 }, { "epoch": 1.319964877172204, "grad_norm": 0.5647761995622971, "learning_rate": 5.474951268333145e-05, "loss": 11.8977, "step": 24240 }, { "epoch": 1.320019331168787, "grad_norm": 0.5679213557007524, "learning_rate": 5.474164907235976e-05, "loss": 11.83, "step": 24241 }, { "epoch": 1.3200737851653699, "grad_norm": 0.5218434301393386, "learning_rate": 5.473378581332456e-05, "loss": 11.928, "step": 24242 }, { "epoch": 1.3201282391619529, "grad_norm": 0.5110787526488226, "learning_rate": 5.472592290628707e-05, "loss": 11.8448, "step": 24243 }, { "epoch": 1.3201826931585359, "grad_norm": 0.5731643963526519, "learning_rate": 5.471806035130834e-05, "loss": 11.8394, "step": 24244 }, { "epoch": 1.320237147155119, "grad_norm": 0.5569903728328777, "learning_rate": 5.4710198148449576e-05, "loss": 12.0095, "step": 24245 }, { "epoch": 1.320291601151702, "grad_norm": 0.5234051767866967, "learning_rate": 5.470233629777195e-05, "loss": 11.9206, "step": 24246 }, { "epoch": 1.320346055148285, "grad_norm": 0.5198547765554428, "learning_rate": 5.469447479933656e-05, "loss": 11.9631, "step": 24247 }, { "epoch": 1.320400509144868, "grad_norm": 0.5586794934699644, "learning_rate": 5.468661365320449e-05, "loss": 12.0107, "step": 24248 }, { "epoch": 1.320454963141451, "grad_norm": 0.5489873444126732, "learning_rate": 5.467875285943693e-05, "loss": 11.9004, "step": 24249 }, { "epoch": 1.320509417138034, "grad_norm": 0.5008034757135255, "learning_rate": 5.467089241809498e-05, "loss": 11.891, "step": 24250 }, { "epoch": 1.320563871134617, "grad_norm": 0.532421244376691, "learning_rate": 5.466303232923979e-05, "loss": 11.8025, "step": 24251 }, { "epoch": 1.3206183251312, "grad_norm": 0.5069687974078494, "learning_rate": 5.465517259293242e-05, "loss": 11.7175, "step": 24252 }, { "epoch": 1.3206727791277832, "grad_norm": 0.5436007210662951, "learning_rate": 5.464731320923409e-05, "loss": 11.9452, "step": 24253 }, { "epoch": 1.3207272331243662, "grad_norm": 0.537046876651317, "learning_rate": 5.463945417820582e-05, "loss": 11.7874, "step": 24254 }, { "epoch": 1.3207816871209492, "grad_norm": 0.5993244268052655, "learning_rate": 5.463159549990875e-05, "loss": 11.9102, "step": 24255 }, { "epoch": 1.3208361411175322, "grad_norm": 0.617724157337032, "learning_rate": 5.4623737174404034e-05, "loss": 11.8869, "step": 24256 }, { "epoch": 1.3208905951141152, "grad_norm": 0.549806880415778, "learning_rate": 5.461587920175276e-05, "loss": 11.9112, "step": 24257 }, { "epoch": 1.3209450491106982, "grad_norm": 0.5788189893060056, "learning_rate": 5.460802158201601e-05, "loss": 11.9908, "step": 24258 }, { "epoch": 1.3209995031072812, "grad_norm": 0.57018458798926, "learning_rate": 5.4600164315254876e-05, "loss": 11.8401, "step": 24259 }, { "epoch": 1.3210539571038642, "grad_norm": 0.5149251024593897, "learning_rate": 5.459230740153046e-05, "loss": 11.9464, "step": 24260 }, { "epoch": 1.3211084111004472, "grad_norm": 0.6838468707258352, "learning_rate": 5.458445084090392e-05, "loss": 11.927, "step": 24261 }, { "epoch": 1.3211628650970302, "grad_norm": 0.5518764799362267, "learning_rate": 5.4576594633436275e-05, "loss": 11.803, "step": 24262 }, { "epoch": 1.3212173190936132, "grad_norm": 0.5660078351865124, "learning_rate": 5.456873877918868e-05, "loss": 11.8962, "step": 24263 }, { "epoch": 1.3212717730901962, "grad_norm": 0.5327815934793932, "learning_rate": 5.4560883278222156e-05, "loss": 11.8649, "step": 24264 }, { "epoch": 1.3213262270867792, "grad_norm": 0.575616894847649, "learning_rate": 5.455302813059786e-05, "loss": 12.0352, "step": 24265 }, { "epoch": 1.3213806810833622, "grad_norm": 0.5414418027524431, "learning_rate": 5.454517333637679e-05, "loss": 11.9338, "step": 24266 }, { "epoch": 1.3214351350799451, "grad_norm": 0.5228913502066164, "learning_rate": 5.4537318895620126e-05, "loss": 11.9759, "step": 24267 }, { "epoch": 1.3214895890765284, "grad_norm": 0.5037217239524571, "learning_rate": 5.4529464808388894e-05, "loss": 11.9221, "step": 24268 }, { "epoch": 1.3215440430731114, "grad_norm": 0.6567086658448069, "learning_rate": 5.4521611074744125e-05, "loss": 11.8346, "step": 24269 }, { "epoch": 1.3215984970696943, "grad_norm": 0.5419176707417476, "learning_rate": 5.4513757694746935e-05, "loss": 11.9521, "step": 24270 }, { "epoch": 1.3216529510662773, "grad_norm": 0.47978489373554256, "learning_rate": 5.450590466845844e-05, "loss": 11.9456, "step": 24271 }, { "epoch": 1.3217074050628603, "grad_norm": 0.5356943236231767, "learning_rate": 5.449805199593962e-05, "loss": 11.9803, "step": 24272 }, { "epoch": 1.3217618590594433, "grad_norm": 0.5273473014510761, "learning_rate": 5.449019967725161e-05, "loss": 11.8619, "step": 24273 }, { "epoch": 1.3218163130560263, "grad_norm": 0.5501787690953788, "learning_rate": 5.44823477124554e-05, "loss": 12.0075, "step": 24274 }, { "epoch": 1.3218707670526093, "grad_norm": 0.5408211223098347, "learning_rate": 5.447449610161213e-05, "loss": 11.9897, "step": 24275 }, { "epoch": 1.3219252210491923, "grad_norm": 0.5398280967216086, "learning_rate": 5.4466644844782765e-05, "loss": 11.9916, "step": 24276 }, { "epoch": 1.3219796750457755, "grad_norm": 0.5126606641286382, "learning_rate": 5.445879394202846e-05, "loss": 11.9482, "step": 24277 }, { "epoch": 1.3220341290423585, "grad_norm": 0.5223280025625218, "learning_rate": 5.44509433934102e-05, "loss": 11.8561, "step": 24278 }, { "epoch": 1.3220885830389415, "grad_norm": 0.5009270176350861, "learning_rate": 5.444309319898901e-05, "loss": 11.7228, "step": 24279 }, { "epoch": 1.3221430370355245, "grad_norm": 0.5563132453475934, "learning_rate": 5.443524335882601e-05, "loss": 11.9657, "step": 24280 }, { "epoch": 1.3221974910321075, "grad_norm": 0.5256210233397686, "learning_rate": 5.442739387298215e-05, "loss": 11.9307, "step": 24281 }, { "epoch": 1.3222519450286905, "grad_norm": 0.5373238434806185, "learning_rate": 5.441954474151851e-05, "loss": 11.9033, "step": 24282 }, { "epoch": 1.3223063990252735, "grad_norm": 0.5461130735811333, "learning_rate": 5.44116959644962e-05, "loss": 11.9797, "step": 24283 }, { "epoch": 1.3223608530218565, "grad_norm": 0.5347686060063062, "learning_rate": 5.440384754197613e-05, "loss": 11.8984, "step": 24284 }, { "epoch": 1.3224153070184395, "grad_norm": 0.5468334467073818, "learning_rate": 5.4395999474019434e-05, "loss": 11.8378, "step": 24285 }, { "epoch": 1.3224697610150224, "grad_norm": 0.5826262109080343, "learning_rate": 5.4388151760687033e-05, "loss": 12.0424, "step": 24286 }, { "epoch": 1.3225242150116054, "grad_norm": 0.5224605350270002, "learning_rate": 5.438030440204007e-05, "loss": 11.9409, "step": 24287 }, { "epoch": 1.3225786690081884, "grad_norm": 0.4771661678720687, "learning_rate": 5.437245739813952e-05, "loss": 11.8821, "step": 24288 }, { "epoch": 1.3226331230047714, "grad_norm": 0.5457161085290173, "learning_rate": 5.436461074904634e-05, "loss": 11.8993, "step": 24289 }, { "epoch": 1.3226875770013544, "grad_norm": 0.5260466621327258, "learning_rate": 5.435676445482165e-05, "loss": 12.0321, "step": 24290 }, { "epoch": 1.3227420309979376, "grad_norm": 0.5487822149381587, "learning_rate": 5.4348918515526356e-05, "loss": 11.9641, "step": 24291 }, { "epoch": 1.3227964849945206, "grad_norm": 0.5397681819089222, "learning_rate": 5.434107293122158e-05, "loss": 11.8594, "step": 24292 }, { "epoch": 1.3228509389911036, "grad_norm": 0.5507672832446914, "learning_rate": 5.4333227701968246e-05, "loss": 11.9825, "step": 24293 }, { "epoch": 1.3229053929876866, "grad_norm": 0.7380789884234731, "learning_rate": 5.4325382827827374e-05, "loss": 12.0395, "step": 24294 }, { "epoch": 1.3229598469842696, "grad_norm": 0.5656365995695363, "learning_rate": 5.431753830886004e-05, "loss": 11.9359, "step": 24295 }, { "epoch": 1.3230143009808526, "grad_norm": 0.5887100710632452, "learning_rate": 5.4309694145127145e-05, "loss": 11.9868, "step": 24296 }, { "epoch": 1.3230687549774356, "grad_norm": 0.5049209463935754, "learning_rate": 5.4301850336689755e-05, "loss": 11.8749, "step": 24297 }, { "epoch": 1.3231232089740186, "grad_norm": 0.544689104187778, "learning_rate": 5.429400688360886e-05, "loss": 11.9015, "step": 24298 }, { "epoch": 1.3231776629706016, "grad_norm": 0.5438795568555299, "learning_rate": 5.428616378594538e-05, "loss": 11.82, "step": 24299 }, { "epoch": 1.3232321169671848, "grad_norm": 0.5328578054970738, "learning_rate": 5.427832104376041e-05, "loss": 11.7133, "step": 24300 }, { "epoch": 1.3232865709637678, "grad_norm": 0.5500810269527809, "learning_rate": 5.427047865711483e-05, "loss": 11.9908, "step": 24301 }, { "epoch": 1.3233410249603508, "grad_norm": 0.6258240133814889, "learning_rate": 5.4262636626069715e-05, "loss": 11.9089, "step": 24302 }, { "epoch": 1.3233954789569338, "grad_norm": 0.5670219671761658, "learning_rate": 5.425479495068597e-05, "loss": 11.8998, "step": 24303 }, { "epoch": 1.3234499329535168, "grad_norm": 0.5993249992658796, "learning_rate": 5.4246953631024633e-05, "loss": 12.1534, "step": 24304 }, { "epoch": 1.3235043869500998, "grad_norm": 0.5772207235736112, "learning_rate": 5.4239112667146676e-05, "loss": 11.945, "step": 24305 }, { "epoch": 1.3235588409466827, "grad_norm": 0.5207581196920541, "learning_rate": 5.4231272059113014e-05, "loss": 11.925, "step": 24306 }, { "epoch": 1.3236132949432657, "grad_norm": 0.6237959400309706, "learning_rate": 5.4223431806984706e-05, "loss": 11.9534, "step": 24307 }, { "epoch": 1.3236677489398487, "grad_norm": 0.6336839598475581, "learning_rate": 5.421559191082266e-05, "loss": 12.0141, "step": 24308 }, { "epoch": 1.3237222029364317, "grad_norm": 0.5943396579206478, "learning_rate": 5.42077523706878e-05, "loss": 12.0129, "step": 24309 }, { "epoch": 1.3237766569330147, "grad_norm": 0.5404487076541026, "learning_rate": 5.4199913186641194e-05, "loss": 11.9076, "step": 24310 }, { "epoch": 1.3238311109295977, "grad_norm": 0.5890314787145994, "learning_rate": 5.4192074358743696e-05, "loss": 11.961, "step": 24311 }, { "epoch": 1.3238855649261807, "grad_norm": 0.5241183180791975, "learning_rate": 5.418423588705634e-05, "loss": 11.8418, "step": 24312 }, { "epoch": 1.3239400189227637, "grad_norm": 0.5657800702247222, "learning_rate": 5.417639777164003e-05, "loss": 11.9448, "step": 24313 }, { "epoch": 1.3239944729193467, "grad_norm": 0.5704038597285788, "learning_rate": 5.416856001255577e-05, "loss": 11.9572, "step": 24314 }, { "epoch": 1.32404892691593, "grad_norm": 0.5161054332427804, "learning_rate": 5.4160722609864425e-05, "loss": 11.8315, "step": 24315 }, { "epoch": 1.324103380912513, "grad_norm": 0.5530415399281876, "learning_rate": 5.4152885563626985e-05, "loss": 11.7823, "step": 24316 }, { "epoch": 1.3241578349090959, "grad_norm": 0.6341196816585852, "learning_rate": 5.414504887390446e-05, "loss": 12.083, "step": 24317 }, { "epoch": 1.3242122889056789, "grad_norm": 0.5866654115954333, "learning_rate": 5.41372125407577e-05, "loss": 12.0122, "step": 24318 }, { "epoch": 1.3242667429022619, "grad_norm": 0.5358296736449896, "learning_rate": 5.4129376564247635e-05, "loss": 11.8467, "step": 24319 }, { "epoch": 1.3243211968988449, "grad_norm": 0.5614961187626117, "learning_rate": 5.412154094443527e-05, "loss": 12.1306, "step": 24320 }, { "epoch": 1.3243756508954279, "grad_norm": 0.5591502289964349, "learning_rate": 5.4113705681381455e-05, "loss": 11.8171, "step": 24321 }, { "epoch": 1.3244301048920109, "grad_norm": 0.5498758234731576, "learning_rate": 5.41058707751472e-05, "loss": 11.8002, "step": 24322 }, { "epoch": 1.324484558888594, "grad_norm": 0.5196810290051165, "learning_rate": 5.409803622579335e-05, "loss": 11.9014, "step": 24323 }, { "epoch": 1.324539012885177, "grad_norm": 0.4875341647826073, "learning_rate": 5.409020203338091e-05, "loss": 11.9946, "step": 24324 }, { "epoch": 1.32459346688176, "grad_norm": 0.5962898438983585, "learning_rate": 5.4082368197970724e-05, "loss": 11.8129, "step": 24325 }, { "epoch": 1.324647920878343, "grad_norm": 0.5242946635270267, "learning_rate": 5.407453471962377e-05, "loss": 12.0071, "step": 24326 }, { "epoch": 1.324702374874926, "grad_norm": 0.5151795242568236, "learning_rate": 5.4066701598400904e-05, "loss": 11.8575, "step": 24327 }, { "epoch": 1.324756828871509, "grad_norm": 0.508533860739261, "learning_rate": 5.405886883436311e-05, "loss": 11.9134, "step": 24328 }, { "epoch": 1.324811282868092, "grad_norm": 0.5317603989694534, "learning_rate": 5.4051036427571254e-05, "loss": 11.8262, "step": 24329 }, { "epoch": 1.324865736864675, "grad_norm": 0.5590911167033477, "learning_rate": 5.404320437808621e-05, "loss": 11.9181, "step": 24330 }, { "epoch": 1.324920190861258, "grad_norm": 0.5598679141628443, "learning_rate": 5.40353726859689e-05, "loss": 11.9615, "step": 24331 }, { "epoch": 1.324974644857841, "grad_norm": 0.5136271129954619, "learning_rate": 5.40275413512803e-05, "loss": 11.8343, "step": 24332 }, { "epoch": 1.325029098854424, "grad_norm": 0.5734043586137014, "learning_rate": 5.401971037408119e-05, "loss": 11.9757, "step": 24333 }, { "epoch": 1.325083552851007, "grad_norm": 0.5598120208934344, "learning_rate": 5.401187975443257e-05, "loss": 11.9283, "step": 24334 }, { "epoch": 1.32513800684759, "grad_norm": 0.5448110291910014, "learning_rate": 5.400404949239525e-05, "loss": 11.8949, "step": 24335 }, { "epoch": 1.325192460844173, "grad_norm": 0.5642133229271152, "learning_rate": 5.399621958803018e-05, "loss": 11.9411, "step": 24336 }, { "epoch": 1.325246914840756, "grad_norm": 0.5177282489550481, "learning_rate": 5.39883900413982e-05, "loss": 11.9441, "step": 24337 }, { "epoch": 1.3253013688373392, "grad_norm": 0.5626101259603494, "learning_rate": 5.3980560852560244e-05, "loss": 11.9748, "step": 24338 }, { "epoch": 1.3253558228339222, "grad_norm": 0.524196397718115, "learning_rate": 5.3972732021577175e-05, "loss": 11.857, "step": 24339 }, { "epoch": 1.3254102768305052, "grad_norm": 0.5694045290272125, "learning_rate": 5.3964903548509816e-05, "loss": 11.9094, "step": 24340 }, { "epoch": 1.3254647308270882, "grad_norm": 0.5012759947368628, "learning_rate": 5.3957075433419126e-05, "loss": 11.9684, "step": 24341 }, { "epoch": 1.3255191848236711, "grad_norm": 0.5424842903304663, "learning_rate": 5.394924767636589e-05, "loss": 11.8677, "step": 24342 }, { "epoch": 1.3255736388202541, "grad_norm": 0.5094963885154813, "learning_rate": 5.3941420277411035e-05, "loss": 11.9124, "step": 24343 }, { "epoch": 1.3256280928168371, "grad_norm": 0.5695293612183323, "learning_rate": 5.3933593236615465e-05, "loss": 11.7505, "step": 24344 }, { "epoch": 1.3256825468134201, "grad_norm": 0.5244664486741112, "learning_rate": 5.392576655403996e-05, "loss": 11.8237, "step": 24345 }, { "epoch": 1.3257370008100031, "grad_norm": 0.5433440505139582, "learning_rate": 5.3917940229745446e-05, "loss": 11.9967, "step": 24346 }, { "epoch": 1.3257914548065863, "grad_norm": 0.5418608453655283, "learning_rate": 5.3910114263792734e-05, "loss": 11.9847, "step": 24347 }, { "epoch": 1.3258459088031693, "grad_norm": 0.5146203601815554, "learning_rate": 5.3902288656242714e-05, "loss": 12.0123, "step": 24348 }, { "epoch": 1.3259003627997523, "grad_norm": 0.6105695683271954, "learning_rate": 5.389446340715626e-05, "loss": 11.8639, "step": 24349 }, { "epoch": 1.3259548167963353, "grad_norm": 0.5043270843253015, "learning_rate": 5.3886638516594136e-05, "loss": 11.9484, "step": 24350 }, { "epoch": 1.3260092707929183, "grad_norm": 0.6502463641638829, "learning_rate": 5.3878813984617294e-05, "loss": 12.0225, "step": 24351 }, { "epoch": 1.3260637247895013, "grad_norm": 0.5707764957435898, "learning_rate": 5.387098981128648e-05, "loss": 11.9455, "step": 24352 }, { "epoch": 1.3261181787860843, "grad_norm": 0.5853274073453641, "learning_rate": 5.386316599666259e-05, "loss": 11.9512, "step": 24353 }, { "epoch": 1.3261726327826673, "grad_norm": 0.5331348033249079, "learning_rate": 5.385534254080649e-05, "loss": 11.8601, "step": 24354 }, { "epoch": 1.3262270867792503, "grad_norm": 0.535285651695918, "learning_rate": 5.3847519443778946e-05, "loss": 11.7711, "step": 24355 }, { "epoch": 1.3262815407758333, "grad_norm": 0.5094922793687584, "learning_rate": 5.3839696705640876e-05, "loss": 11.8656, "step": 24356 }, { "epoch": 1.3263359947724163, "grad_norm": 0.549080849299552, "learning_rate": 5.383187432645303e-05, "loss": 11.8211, "step": 24357 }, { "epoch": 1.3263904487689993, "grad_norm": 0.5365808510333939, "learning_rate": 5.382405230627629e-05, "loss": 11.9175, "step": 24358 }, { "epoch": 1.3264449027655822, "grad_norm": 0.6795797989768289, "learning_rate": 5.3816230645171494e-05, "loss": 12.0065, "step": 24359 }, { "epoch": 1.3264993567621652, "grad_norm": 0.6255954293734473, "learning_rate": 5.3808409343199376e-05, "loss": 11.967, "step": 24360 }, { "epoch": 1.3265538107587485, "grad_norm": 0.6366357279444079, "learning_rate": 5.380058840042085e-05, "loss": 11.8996, "step": 24361 }, { "epoch": 1.3266082647553314, "grad_norm": 0.6546017019244861, "learning_rate": 5.379276781689666e-05, "loss": 12.0854, "step": 24362 }, { "epoch": 1.3266627187519144, "grad_norm": 0.5896448603649662, "learning_rate": 5.3784947592687706e-05, "loss": 11.9614, "step": 24363 }, { "epoch": 1.3267171727484974, "grad_norm": 0.5935185979095843, "learning_rate": 5.3777127727854704e-05, "loss": 11.9514, "step": 24364 }, { "epoch": 1.3267716267450804, "grad_norm": 0.5664400696177851, "learning_rate": 5.3769308222458495e-05, "loss": 11.9728, "step": 24365 }, { "epoch": 1.3268260807416634, "grad_norm": 0.6030275831908156, "learning_rate": 5.3761489076559954e-05, "loss": 11.9886, "step": 24366 }, { "epoch": 1.3268805347382464, "grad_norm": 0.5607611995842933, "learning_rate": 5.3753670290219784e-05, "loss": 11.8956, "step": 24367 }, { "epoch": 1.3269349887348294, "grad_norm": 0.6031829565266845, "learning_rate": 5.374585186349888e-05, "loss": 12.0342, "step": 24368 }, { "epoch": 1.3269894427314124, "grad_norm": 0.5511490707745242, "learning_rate": 5.373803379645797e-05, "loss": 11.9302, "step": 24369 }, { "epoch": 1.3270438967279956, "grad_norm": 0.5238988219315068, "learning_rate": 5.373021608915783e-05, "loss": 11.92, "step": 24370 }, { "epoch": 1.3270983507245786, "grad_norm": 0.6318560091882891, "learning_rate": 5.372239874165934e-05, "loss": 11.8764, "step": 24371 }, { "epoch": 1.3271528047211616, "grad_norm": 0.5305198440793832, "learning_rate": 5.3714581754023184e-05, "loss": 11.9477, "step": 24372 }, { "epoch": 1.3272072587177446, "grad_norm": 0.5510571124139215, "learning_rate": 5.3706765126310254e-05, "loss": 12.007, "step": 24373 }, { "epoch": 1.3272617127143276, "grad_norm": 0.5449207675209723, "learning_rate": 5.3698948858581245e-05, "loss": 11.8913, "step": 24374 }, { "epoch": 1.3273161667109106, "grad_norm": 0.5610527251170293, "learning_rate": 5.369113295089696e-05, "loss": 11.868, "step": 24375 }, { "epoch": 1.3273706207074936, "grad_norm": 0.4940865860536345, "learning_rate": 5.368331740331824e-05, "loss": 11.8369, "step": 24376 }, { "epoch": 1.3274250747040766, "grad_norm": 0.5277221793351025, "learning_rate": 5.367550221590576e-05, "loss": 11.8139, "step": 24377 }, { "epoch": 1.3274795287006596, "grad_norm": 0.5952348535984634, "learning_rate": 5.3667687388720434e-05, "loss": 11.9857, "step": 24378 }, { "epoch": 1.3275339826972425, "grad_norm": 0.5445620574401022, "learning_rate": 5.365987292182286e-05, "loss": 11.8433, "step": 24379 }, { "epoch": 1.3275884366938255, "grad_norm": 0.5296148059847346, "learning_rate": 5.365205881527389e-05, "loss": 11.8902, "step": 24380 }, { "epoch": 1.3276428906904085, "grad_norm": 0.5251909452213419, "learning_rate": 5.364424506913433e-05, "loss": 11.9295, "step": 24381 }, { "epoch": 1.3276973446869915, "grad_norm": 0.530573351604225, "learning_rate": 5.363643168346484e-05, "loss": 11.9302, "step": 24382 }, { "epoch": 1.3277517986835745, "grad_norm": 0.54045335631268, "learning_rate": 5.362861865832627e-05, "loss": 11.9627, "step": 24383 }, { "epoch": 1.3278062526801575, "grad_norm": 0.612712364336341, "learning_rate": 5.36208059937793e-05, "loss": 12.139, "step": 24384 }, { "epoch": 1.3278607066767407, "grad_norm": 0.5417724530265201, "learning_rate": 5.361299368988477e-05, "loss": 12.0772, "step": 24385 }, { "epoch": 1.3279151606733237, "grad_norm": 0.6751105267403107, "learning_rate": 5.3605181746703325e-05, "loss": 11.9898, "step": 24386 }, { "epoch": 1.3279696146699067, "grad_norm": 0.615979215688675, "learning_rate": 5.3597370164295776e-05, "loss": 11.9471, "step": 24387 }, { "epoch": 1.3280240686664897, "grad_norm": 0.5665303733879382, "learning_rate": 5.358955894272295e-05, "loss": 11.9218, "step": 24388 }, { "epoch": 1.3280785226630727, "grad_norm": 0.5351548006752035, "learning_rate": 5.3581748082045394e-05, "loss": 11.9376, "step": 24389 }, { "epoch": 1.3281329766596557, "grad_norm": 0.5269036829983818, "learning_rate": 5.3573937582324004e-05, "loss": 11.9498, "step": 24390 }, { "epoch": 1.3281874306562387, "grad_norm": 0.5086667444009628, "learning_rate": 5.356612744361942e-05, "loss": 12.0167, "step": 24391 }, { "epoch": 1.3282418846528217, "grad_norm": 0.5647739110462783, "learning_rate": 5.355831766599242e-05, "loss": 11.8642, "step": 24392 }, { "epoch": 1.3282963386494049, "grad_norm": 0.5607725796111824, "learning_rate": 5.355050824950376e-05, "loss": 11.9575, "step": 24393 }, { "epoch": 1.3283507926459879, "grad_norm": 0.5592590286363692, "learning_rate": 5.35426991942141e-05, "loss": 12.0412, "step": 24394 }, { "epoch": 1.3284052466425709, "grad_norm": 0.5414799946184244, "learning_rate": 5.353489050018423e-05, "loss": 11.8695, "step": 24395 }, { "epoch": 1.3284597006391539, "grad_norm": 0.5355594770259369, "learning_rate": 5.352708216747482e-05, "loss": 11.9433, "step": 24396 }, { "epoch": 1.3285141546357369, "grad_norm": 0.5112299339077214, "learning_rate": 5.351927419614665e-05, "loss": 11.8644, "step": 24397 }, { "epoch": 1.3285686086323198, "grad_norm": 0.6137036904474334, "learning_rate": 5.351146658626041e-05, "loss": 11.9186, "step": 24398 }, { "epoch": 1.3286230626289028, "grad_norm": 0.6907624157770516, "learning_rate": 5.3503659337876735e-05, "loss": 11.9553, "step": 24399 }, { "epoch": 1.3286775166254858, "grad_norm": 0.5539291518076767, "learning_rate": 5.349585245105646e-05, "loss": 11.9558, "step": 24400 }, { "epoch": 1.3287319706220688, "grad_norm": 0.51189330897039, "learning_rate": 5.348804592586019e-05, "loss": 11.8123, "step": 24401 }, { "epoch": 1.3287864246186518, "grad_norm": 0.5444129357563464, "learning_rate": 5.3480239762348684e-05, "loss": 11.8758, "step": 24402 }, { "epoch": 1.3288408786152348, "grad_norm": 0.6037881277554985, "learning_rate": 5.347243396058266e-05, "loss": 12.0076, "step": 24403 }, { "epoch": 1.3288953326118178, "grad_norm": 0.546067934881867, "learning_rate": 5.346462852062277e-05, "loss": 12.0116, "step": 24404 }, { "epoch": 1.3289497866084008, "grad_norm": 0.48186776905170187, "learning_rate": 5.345682344252976e-05, "loss": 11.8478, "step": 24405 }, { "epoch": 1.3290042406049838, "grad_norm": 0.5434736762770173, "learning_rate": 5.344901872636425e-05, "loss": 11.8911, "step": 24406 }, { "epoch": 1.3290586946015668, "grad_norm": 0.6060622428524457, "learning_rate": 5.3441214372187033e-05, "loss": 11.9495, "step": 24407 }, { "epoch": 1.32911314859815, "grad_norm": 0.5304292446815432, "learning_rate": 5.343341038005873e-05, "loss": 12.0005, "step": 24408 }, { "epoch": 1.329167602594733, "grad_norm": 0.677423001758076, "learning_rate": 5.342560675004e-05, "loss": 12.0136, "step": 24409 }, { "epoch": 1.329222056591316, "grad_norm": 0.5518555001602644, "learning_rate": 5.341780348219161e-05, "loss": 12.0136, "step": 24410 }, { "epoch": 1.329276510587899, "grad_norm": 0.500025501783482, "learning_rate": 5.341000057657415e-05, "loss": 11.8849, "step": 24411 }, { "epoch": 1.329330964584482, "grad_norm": 0.5974749784099989, "learning_rate": 5.340219803324838e-05, "loss": 11.8318, "step": 24412 }, { "epoch": 1.329385418581065, "grad_norm": 0.5510432659653153, "learning_rate": 5.339439585227488e-05, "loss": 11.807, "step": 24413 }, { "epoch": 1.329439872577648, "grad_norm": 0.5955238957795417, "learning_rate": 5.3386594033714376e-05, "loss": 11.9354, "step": 24414 }, { "epoch": 1.329494326574231, "grad_norm": 0.5705872812703834, "learning_rate": 5.337879257762758e-05, "loss": 11.9356, "step": 24415 }, { "epoch": 1.329548780570814, "grad_norm": 0.5612735588298833, "learning_rate": 5.337099148407507e-05, "loss": 11.856, "step": 24416 }, { "epoch": 1.3296032345673972, "grad_norm": 0.5406940615655401, "learning_rate": 5.336319075311759e-05, "loss": 11.9474, "step": 24417 }, { "epoch": 1.3296576885639801, "grad_norm": 0.6178418823145054, "learning_rate": 5.335539038481575e-05, "loss": 12.0239, "step": 24418 }, { "epoch": 1.3297121425605631, "grad_norm": 0.5790966834105574, "learning_rate": 5.3347590379230186e-05, "loss": 11.9535, "step": 24419 }, { "epoch": 1.3297665965571461, "grad_norm": 0.5336972842510217, "learning_rate": 5.3339790736421625e-05, "loss": 11.9026, "step": 24420 }, { "epoch": 1.3298210505537291, "grad_norm": 0.5112016277825095, "learning_rate": 5.333199145645064e-05, "loss": 11.9398, "step": 24421 }, { "epoch": 1.3298755045503121, "grad_norm": 0.5677668095965251, "learning_rate": 5.332419253937795e-05, "loss": 11.8973, "step": 24422 }, { "epoch": 1.329929958546895, "grad_norm": 0.5846641795364168, "learning_rate": 5.3316393985264114e-05, "loss": 11.867, "step": 24423 }, { "epoch": 1.329984412543478, "grad_norm": 0.5615385904841783, "learning_rate": 5.3308595794169844e-05, "loss": 11.9397, "step": 24424 }, { "epoch": 1.330038866540061, "grad_norm": 0.5154131337766664, "learning_rate": 5.330079796615579e-05, "loss": 11.8674, "step": 24425 }, { "epoch": 1.330093320536644, "grad_norm": 0.5112974569783938, "learning_rate": 5.329300050128254e-05, "loss": 11.8693, "step": 24426 }, { "epoch": 1.330147774533227, "grad_norm": 0.547558573878345, "learning_rate": 5.328520339961078e-05, "loss": 11.8623, "step": 24427 }, { "epoch": 1.33020222852981, "grad_norm": 0.562825292170743, "learning_rate": 5.327740666120107e-05, "loss": 11.8996, "step": 24428 }, { "epoch": 1.330256682526393, "grad_norm": 0.5582074920459502, "learning_rate": 5.3269610286114126e-05, "loss": 11.7927, "step": 24429 }, { "epoch": 1.330311136522976, "grad_norm": 0.5180893200089595, "learning_rate": 5.3261814274410523e-05, "loss": 11.8159, "step": 24430 }, { "epoch": 1.3303655905195593, "grad_norm": 0.6306627129274233, "learning_rate": 5.325401862615087e-05, "loss": 11.8643, "step": 24431 }, { "epoch": 1.3304200445161423, "grad_norm": 0.5500720777771656, "learning_rate": 5.324622334139583e-05, "loss": 11.8704, "step": 24432 }, { "epoch": 1.3304744985127253, "grad_norm": 0.53183988508379, "learning_rate": 5.323842842020597e-05, "loss": 11.8969, "step": 24433 }, { "epoch": 1.3305289525093082, "grad_norm": 0.5738645551365829, "learning_rate": 5.323063386264196e-05, "loss": 11.8925, "step": 24434 }, { "epoch": 1.3305834065058912, "grad_norm": 0.5545923361344481, "learning_rate": 5.3222839668764335e-05, "loss": 11.8049, "step": 24435 }, { "epoch": 1.3306378605024742, "grad_norm": 0.534822476559011, "learning_rate": 5.321504583863377e-05, "loss": 11.8954, "step": 24436 }, { "epoch": 1.3306923144990572, "grad_norm": 0.5072876371324885, "learning_rate": 5.320725237231089e-05, "loss": 11.9209, "step": 24437 }, { "epoch": 1.3307467684956402, "grad_norm": 0.5403614508507687, "learning_rate": 5.319945926985622e-05, "loss": 11.897, "step": 24438 }, { "epoch": 1.3308012224922232, "grad_norm": 0.5495669396723336, "learning_rate": 5.319166653133049e-05, "loss": 11.9898, "step": 24439 }, { "epoch": 1.3308556764888064, "grad_norm": 0.49418161704933866, "learning_rate": 5.318387415679413e-05, "loss": 11.7318, "step": 24440 }, { "epoch": 1.3309101304853894, "grad_norm": 0.5258276724310946, "learning_rate": 5.31760821463078e-05, "loss": 11.7775, "step": 24441 }, { "epoch": 1.3309645844819724, "grad_norm": 0.5363831551349146, "learning_rate": 5.316829049993217e-05, "loss": 11.9428, "step": 24442 }, { "epoch": 1.3310190384785554, "grad_norm": 0.5348364804923785, "learning_rate": 5.316049921772772e-05, "loss": 11.85, "step": 24443 }, { "epoch": 1.3310734924751384, "grad_norm": 0.5216952354541312, "learning_rate": 5.315270829975512e-05, "loss": 11.9201, "step": 24444 }, { "epoch": 1.3311279464717214, "grad_norm": 0.6610065133816551, "learning_rate": 5.314491774607487e-05, "loss": 11.9976, "step": 24445 }, { "epoch": 1.3311824004683044, "grad_norm": 0.5636125963597513, "learning_rate": 5.3137127556747645e-05, "loss": 12.0046, "step": 24446 }, { "epoch": 1.3312368544648874, "grad_norm": 0.5023090693827555, "learning_rate": 5.3129337731833926e-05, "loss": 11.9616, "step": 24447 }, { "epoch": 1.3312913084614704, "grad_norm": 0.6430240866603419, "learning_rate": 5.312154827139435e-05, "loss": 11.9319, "step": 24448 }, { "epoch": 1.3313457624580534, "grad_norm": 0.5633246444295514, "learning_rate": 5.3113759175489554e-05, "loss": 11.9111, "step": 24449 }, { "epoch": 1.3314002164546364, "grad_norm": 0.5242721961133227, "learning_rate": 5.310597044417995e-05, "loss": 11.9042, "step": 24450 }, { "epoch": 1.3314546704512193, "grad_norm": 0.5296018505627887, "learning_rate": 5.309818207752617e-05, "loss": 11.9196, "step": 24451 }, { "epoch": 1.3315091244478023, "grad_norm": 0.6046831151338544, "learning_rate": 5.309039407558885e-05, "loss": 11.9763, "step": 24452 }, { "epoch": 1.3315635784443853, "grad_norm": 0.5651034819341412, "learning_rate": 5.3082606438428437e-05, "loss": 11.8477, "step": 24453 }, { "epoch": 1.3316180324409685, "grad_norm": 0.5141442487505451, "learning_rate": 5.3074819166105594e-05, "loss": 11.6833, "step": 24454 }, { "epoch": 1.3316724864375515, "grad_norm": 0.49895548784066357, "learning_rate": 5.306703225868079e-05, "loss": 11.7769, "step": 24455 }, { "epoch": 1.3317269404341345, "grad_norm": 0.5204968497412266, "learning_rate": 5.305924571621464e-05, "loss": 11.8639, "step": 24456 }, { "epoch": 1.3317813944307175, "grad_norm": 0.5857602081241274, "learning_rate": 5.3051459538767645e-05, "loss": 11.9054, "step": 24457 }, { "epoch": 1.3318358484273005, "grad_norm": 0.5659551784629318, "learning_rate": 5.304367372640035e-05, "loss": 12.0636, "step": 24458 }, { "epoch": 1.3318903024238835, "grad_norm": 0.5773041733355975, "learning_rate": 5.303588827917343e-05, "loss": 12.0934, "step": 24459 }, { "epoch": 1.3319447564204665, "grad_norm": 0.5280070943189968, "learning_rate": 5.3028103197147226e-05, "loss": 11.9716, "step": 24460 }, { "epoch": 1.3319992104170495, "grad_norm": 0.5735275222348536, "learning_rate": 5.3020318480382404e-05, "loss": 11.8713, "step": 24461 }, { "epoch": 1.3320536644136325, "grad_norm": 0.48462536419252333, "learning_rate": 5.301253412893943e-05, "loss": 11.8588, "step": 24462 }, { "epoch": 1.3321081184102157, "grad_norm": 0.5338053619619285, "learning_rate": 5.300475014287887e-05, "loss": 11.8625, "step": 24463 }, { "epoch": 1.3321625724067987, "grad_norm": 0.5613163297630709, "learning_rate": 5.299696652226129e-05, "loss": 11.9371, "step": 24464 }, { "epoch": 1.3322170264033817, "grad_norm": 0.5626807947280761, "learning_rate": 5.298918326714715e-05, "loss": 11.8772, "step": 24465 }, { "epoch": 1.3322714803999647, "grad_norm": 0.601793737070652, "learning_rate": 5.2981400377597036e-05, "loss": 11.9692, "step": 24466 }, { "epoch": 1.3323259343965477, "grad_norm": 0.5741701224189307, "learning_rate": 5.29736178536714e-05, "loss": 11.901, "step": 24467 }, { "epoch": 1.3323803883931307, "grad_norm": 0.5736657088276385, "learning_rate": 5.296583569543083e-05, "loss": 11.9024, "step": 24468 }, { "epoch": 1.3324348423897137, "grad_norm": 0.5978380892139845, "learning_rate": 5.295805390293582e-05, "loss": 11.9477, "step": 24469 }, { "epoch": 1.3324892963862967, "grad_norm": 0.5819745812226959, "learning_rate": 5.295027247624683e-05, "loss": 12.0241, "step": 24470 }, { "epoch": 1.3325437503828796, "grad_norm": 0.5016597715091647, "learning_rate": 5.294249141542444e-05, "loss": 11.8833, "step": 24471 }, { "epoch": 1.3325982043794626, "grad_norm": 0.5297508849514344, "learning_rate": 5.293471072052911e-05, "loss": 11.8493, "step": 24472 }, { "epoch": 1.3326526583760456, "grad_norm": 0.5317063762016192, "learning_rate": 5.292693039162135e-05, "loss": 11.8721, "step": 24473 }, { "epoch": 1.3327071123726286, "grad_norm": 0.5432951166550877, "learning_rate": 5.291915042876172e-05, "loss": 11.8806, "step": 24474 }, { "epoch": 1.3327615663692116, "grad_norm": 0.5898378762569593, "learning_rate": 5.291137083201062e-05, "loss": 11.969, "step": 24475 }, { "epoch": 1.3328160203657946, "grad_norm": 0.571938426440639, "learning_rate": 5.290359160142864e-05, "loss": 11.8397, "step": 24476 }, { "epoch": 1.3328704743623776, "grad_norm": 0.5815525163283235, "learning_rate": 5.28958127370762e-05, "loss": 11.9098, "step": 24477 }, { "epoch": 1.3329249283589608, "grad_norm": 0.5968767462739033, "learning_rate": 5.288803423901385e-05, "loss": 11.9672, "step": 24478 }, { "epoch": 1.3329793823555438, "grad_norm": 0.5754187193247214, "learning_rate": 5.288025610730205e-05, "loss": 11.9174, "step": 24479 }, { "epoch": 1.3330338363521268, "grad_norm": 0.5898023123021086, "learning_rate": 5.287247834200124e-05, "loss": 11.9645, "step": 24480 }, { "epoch": 1.3330882903487098, "grad_norm": 0.5287901927630678, "learning_rate": 5.2864700943171975e-05, "loss": 11.9177, "step": 24481 }, { "epoch": 1.3331427443452928, "grad_norm": 0.6187442586926514, "learning_rate": 5.285692391087467e-05, "loss": 11.9769, "step": 24482 }, { "epoch": 1.3331971983418758, "grad_norm": 0.5660317088857969, "learning_rate": 5.2849147245169875e-05, "loss": 12.0203, "step": 24483 }, { "epoch": 1.3332516523384588, "grad_norm": 0.5753054490269349, "learning_rate": 5.2841370946117963e-05, "loss": 11.9575, "step": 24484 }, { "epoch": 1.3333061063350418, "grad_norm": 0.4944736485581836, "learning_rate": 5.2833595013779466e-05, "loss": 11.9116, "step": 24485 }, { "epoch": 1.333360560331625, "grad_norm": 0.4940920220088117, "learning_rate": 5.282581944821489e-05, "loss": 11.8072, "step": 24486 }, { "epoch": 1.333415014328208, "grad_norm": 0.5380199073446132, "learning_rate": 5.28180442494846e-05, "loss": 11.9904, "step": 24487 }, { "epoch": 1.333469468324791, "grad_norm": 0.6578582201421894, "learning_rate": 5.281026941764916e-05, "loss": 12.122, "step": 24488 }, { "epoch": 1.333523922321374, "grad_norm": 0.5837883037327447, "learning_rate": 5.280249495276896e-05, "loss": 11.8773, "step": 24489 }, { "epoch": 1.333578376317957, "grad_norm": 0.5433843728532075, "learning_rate": 5.2794720854904464e-05, "loss": 11.9688, "step": 24490 }, { "epoch": 1.33363283031454, "grad_norm": 0.560986084298655, "learning_rate": 5.278694712411616e-05, "loss": 12.0565, "step": 24491 }, { "epoch": 1.333687284311123, "grad_norm": 0.515790404395713, "learning_rate": 5.277917376046444e-05, "loss": 11.9032, "step": 24492 }, { "epoch": 1.333741738307706, "grad_norm": 0.55056638242684, "learning_rate": 5.2771400764009814e-05, "loss": 11.9398, "step": 24493 }, { "epoch": 1.333796192304289, "grad_norm": 0.5499523177854556, "learning_rate": 5.276362813481266e-05, "loss": 11.8566, "step": 24494 }, { "epoch": 1.333850646300872, "grad_norm": 0.5444431428316789, "learning_rate": 5.2755855872933505e-05, "loss": 11.9437, "step": 24495 }, { "epoch": 1.333905100297455, "grad_norm": 0.5484030889653387, "learning_rate": 5.274808397843267e-05, "loss": 11.78, "step": 24496 }, { "epoch": 1.333959554294038, "grad_norm": 0.5904625367617514, "learning_rate": 5.274031245137068e-05, "loss": 11.8786, "step": 24497 }, { "epoch": 1.334014008290621, "grad_norm": 0.5597318931763078, "learning_rate": 5.273254129180798e-05, "loss": 11.9085, "step": 24498 }, { "epoch": 1.3340684622872039, "grad_norm": 0.5368922913995148, "learning_rate": 5.2724770499804975e-05, "loss": 11.8652, "step": 24499 }, { "epoch": 1.3341229162837869, "grad_norm": 0.5684830044239768, "learning_rate": 5.2717000075422026e-05, "loss": 11.9427, "step": 24500 }, { "epoch": 1.33417737028037, "grad_norm": 0.5291246322300477, "learning_rate": 5.270923001871967e-05, "loss": 11.8035, "step": 24501 }, { "epoch": 1.334231824276953, "grad_norm": 0.7121779788468687, "learning_rate": 5.270146032975822e-05, "loss": 12.0363, "step": 24502 }, { "epoch": 1.334286278273536, "grad_norm": 0.6311172161278445, "learning_rate": 5.26936910085982e-05, "loss": 11.7624, "step": 24503 }, { "epoch": 1.334340732270119, "grad_norm": 0.5395628372007741, "learning_rate": 5.268592205529992e-05, "loss": 11.9868, "step": 24504 }, { "epoch": 1.334395186266702, "grad_norm": 0.5465804213449359, "learning_rate": 5.267815346992388e-05, "loss": 11.9089, "step": 24505 }, { "epoch": 1.334449640263285, "grad_norm": 0.5342797119889956, "learning_rate": 5.267038525253043e-05, "loss": 11.8063, "step": 24506 }, { "epoch": 1.334504094259868, "grad_norm": 0.5615334252366635, "learning_rate": 5.266261740317999e-05, "loss": 11.9759, "step": 24507 }, { "epoch": 1.334558548256451, "grad_norm": 0.5544772304893303, "learning_rate": 5.265484992193301e-05, "loss": 11.7509, "step": 24508 }, { "epoch": 1.334613002253034, "grad_norm": 0.6759601781059053, "learning_rate": 5.264708280884988e-05, "loss": 12.0361, "step": 24509 }, { "epoch": 1.3346674562496172, "grad_norm": 0.5188243734877167, "learning_rate": 5.2639316063990954e-05, "loss": 11.8241, "step": 24510 }, { "epoch": 1.3347219102462002, "grad_norm": 0.6528429173010141, "learning_rate": 5.2631549687416615e-05, "loss": 11.9826, "step": 24511 }, { "epoch": 1.3347763642427832, "grad_norm": 0.55452836762981, "learning_rate": 5.26237836791873e-05, "loss": 11.8725, "step": 24512 }, { "epoch": 1.3348308182393662, "grad_norm": 0.5459149363313589, "learning_rate": 5.261601803936341e-05, "loss": 11.8963, "step": 24513 }, { "epoch": 1.3348852722359492, "grad_norm": 0.5294156458948727, "learning_rate": 5.2608252768005286e-05, "loss": 11.869, "step": 24514 }, { "epoch": 1.3349397262325322, "grad_norm": 0.602535210597049, "learning_rate": 5.260048786517337e-05, "loss": 11.9743, "step": 24515 }, { "epoch": 1.3349941802291152, "grad_norm": 0.6139798693690568, "learning_rate": 5.2592723330927965e-05, "loss": 12.0177, "step": 24516 }, { "epoch": 1.3350486342256982, "grad_norm": 0.5571415481736457, "learning_rate": 5.258495916532953e-05, "loss": 11.8239, "step": 24517 }, { "epoch": 1.3351030882222812, "grad_norm": 0.6025306949219089, "learning_rate": 5.257719536843837e-05, "loss": 12.0201, "step": 24518 }, { "epoch": 1.3351575422188642, "grad_norm": 0.5926428550507084, "learning_rate": 5.256943194031494e-05, "loss": 11.9947, "step": 24519 }, { "epoch": 1.3352119962154472, "grad_norm": 0.547831804283872, "learning_rate": 5.256166888101954e-05, "loss": 11.8195, "step": 24520 }, { "epoch": 1.3352664502120302, "grad_norm": 0.5741966033625567, "learning_rate": 5.2553906190612535e-05, "loss": 11.9355, "step": 24521 }, { "epoch": 1.3353209042086132, "grad_norm": 0.5688103106950454, "learning_rate": 5.254614386915431e-05, "loss": 11.8386, "step": 24522 }, { "epoch": 1.3353753582051962, "grad_norm": 0.5438147681305743, "learning_rate": 5.253838191670527e-05, "loss": 11.9718, "step": 24523 }, { "epoch": 1.3354298122017794, "grad_norm": 0.5413384242111259, "learning_rate": 5.2530620333325696e-05, "loss": 11.7993, "step": 24524 }, { "epoch": 1.3354842661983624, "grad_norm": 0.5437024314337768, "learning_rate": 5.2522859119076006e-05, "loss": 11.9408, "step": 24525 }, { "epoch": 1.3355387201949454, "grad_norm": 0.5641061542872094, "learning_rate": 5.251509827401648e-05, "loss": 11.9104, "step": 24526 }, { "epoch": 1.3355931741915283, "grad_norm": 0.5531141663828754, "learning_rate": 5.250733779820758e-05, "loss": 11.9219, "step": 24527 }, { "epoch": 1.3356476281881113, "grad_norm": 0.584910086915897, "learning_rate": 5.2499577691709526e-05, "loss": 11.9262, "step": 24528 }, { "epoch": 1.3357020821846943, "grad_norm": 0.4837542733785465, "learning_rate": 5.249181795458276e-05, "loss": 11.7622, "step": 24529 }, { "epoch": 1.3357565361812773, "grad_norm": 0.5236927571332141, "learning_rate": 5.248405858688759e-05, "loss": 12.0457, "step": 24530 }, { "epoch": 1.3358109901778603, "grad_norm": 0.5643534582442321, "learning_rate": 5.247629958868432e-05, "loss": 11.8375, "step": 24531 }, { "epoch": 1.3358654441744433, "grad_norm": 0.6102868601131803, "learning_rate": 5.246854096003333e-05, "loss": 11.8655, "step": 24532 }, { "epoch": 1.3359198981710265, "grad_norm": 0.5241006028585222, "learning_rate": 5.2460782700994914e-05, "loss": 11.9341, "step": 24533 }, { "epoch": 1.3359743521676095, "grad_norm": 0.5393948666480911, "learning_rate": 5.2453024811629416e-05, "loss": 11.897, "step": 24534 }, { "epoch": 1.3360288061641925, "grad_norm": 0.5685925549442461, "learning_rate": 5.2445267291997214e-05, "loss": 11.9586, "step": 24535 }, { "epoch": 1.3360832601607755, "grad_norm": 0.5566302805644933, "learning_rate": 5.243751014215855e-05, "loss": 11.9093, "step": 24536 }, { "epoch": 1.3361377141573585, "grad_norm": 0.5696849007858308, "learning_rate": 5.2429753362173814e-05, "loss": 11.7861, "step": 24537 }, { "epoch": 1.3361921681539415, "grad_norm": 0.5224432631826207, "learning_rate": 5.242199695210328e-05, "loss": 11.8364, "step": 24538 }, { "epoch": 1.3362466221505245, "grad_norm": 0.5013398649851833, "learning_rate": 5.2414240912007286e-05, "loss": 11.9152, "step": 24539 }, { "epoch": 1.3363010761471075, "grad_norm": 0.6143408768650449, "learning_rate": 5.240648524194616e-05, "loss": 12.0068, "step": 24540 }, { "epoch": 1.3363555301436905, "grad_norm": 0.5643609576473816, "learning_rate": 5.239872994198012e-05, "loss": 11.8676, "step": 24541 }, { "epoch": 1.3364099841402735, "grad_norm": 0.4948973319808279, "learning_rate": 5.23909750121696e-05, "loss": 11.9777, "step": 24542 }, { "epoch": 1.3364644381368564, "grad_norm": 0.5487869439216826, "learning_rate": 5.238322045257479e-05, "loss": 12.0445, "step": 24543 }, { "epoch": 1.3365188921334394, "grad_norm": 0.5121733353384509, "learning_rate": 5.237546626325609e-05, "loss": 11.8944, "step": 24544 }, { "epoch": 1.3365733461300224, "grad_norm": 0.5654297358283139, "learning_rate": 5.2367712444273697e-05, "loss": 11.9342, "step": 24545 }, { "epoch": 1.3366278001266054, "grad_norm": 0.6073104836902603, "learning_rate": 5.2359958995687973e-05, "loss": 11.9483, "step": 24546 }, { "epoch": 1.3366822541231884, "grad_norm": 0.5533297583586704, "learning_rate": 5.235220591755925e-05, "loss": 11.8096, "step": 24547 }, { "epoch": 1.3367367081197716, "grad_norm": 0.5467902640031804, "learning_rate": 5.23444532099477e-05, "loss": 11.8904, "step": 24548 }, { "epoch": 1.3367911621163546, "grad_norm": 0.523885299781789, "learning_rate": 5.2336700872913724e-05, "loss": 11.8243, "step": 24549 }, { "epoch": 1.3368456161129376, "grad_norm": 0.5399534978432189, "learning_rate": 5.232894890651756e-05, "loss": 11.8559, "step": 24550 }, { "epoch": 1.3369000701095206, "grad_norm": 0.5301881372557181, "learning_rate": 5.232119731081944e-05, "loss": 11.9029, "step": 24551 }, { "epoch": 1.3369545241061036, "grad_norm": 0.5376157825698882, "learning_rate": 5.2313446085879714e-05, "loss": 11.9368, "step": 24552 }, { "epoch": 1.3370089781026866, "grad_norm": 0.602887367693488, "learning_rate": 5.23056952317586e-05, "loss": 11.889, "step": 24553 }, { "epoch": 1.3370634320992696, "grad_norm": 0.5694006198900216, "learning_rate": 5.2297944748516436e-05, "loss": 11.9248, "step": 24554 }, { "epoch": 1.3371178860958526, "grad_norm": 0.5864370331877468, "learning_rate": 5.2290194636213406e-05, "loss": 11.8373, "step": 24555 }, { "epoch": 1.3371723400924358, "grad_norm": 0.656334369441371, "learning_rate": 5.228244489490983e-05, "loss": 12.0397, "step": 24556 }, { "epoch": 1.3372267940890188, "grad_norm": 0.5181307474081975, "learning_rate": 5.227469552466602e-05, "loss": 11.9691, "step": 24557 }, { "epoch": 1.3372812480856018, "grad_norm": 0.6116110610278155, "learning_rate": 5.226694652554211e-05, "loss": 11.8194, "step": 24558 }, { "epoch": 1.3373357020821848, "grad_norm": 0.5458583824834689, "learning_rate": 5.225919789759853e-05, "loss": 11.8655, "step": 24559 }, { "epoch": 1.3373901560787678, "grad_norm": 0.5168599012507781, "learning_rate": 5.225144964089536e-05, "loss": 11.7887, "step": 24560 }, { "epoch": 1.3374446100753508, "grad_norm": 0.5947057044254064, "learning_rate": 5.2243701755492916e-05, "loss": 12.0628, "step": 24561 }, { "epoch": 1.3374990640719338, "grad_norm": 0.5383409990700289, "learning_rate": 5.223595424145149e-05, "loss": 11.8655, "step": 24562 }, { "epoch": 1.3375535180685167, "grad_norm": 0.5026192462405705, "learning_rate": 5.222820709883125e-05, "loss": 11.9324, "step": 24563 }, { "epoch": 1.3376079720650997, "grad_norm": 0.5398532142020857, "learning_rate": 5.2220460327692534e-05, "loss": 11.8372, "step": 24564 }, { "epoch": 1.3376624260616827, "grad_norm": 0.5478062871579805, "learning_rate": 5.221271392809548e-05, "loss": 11.8825, "step": 24565 }, { "epoch": 1.3377168800582657, "grad_norm": 0.5492161498882498, "learning_rate": 5.220496790010043e-05, "loss": 11.8732, "step": 24566 }, { "epoch": 1.3377713340548487, "grad_norm": 0.5711176807493772, "learning_rate": 5.219722224376752e-05, "loss": 12.0181, "step": 24567 }, { "epoch": 1.3378257880514317, "grad_norm": 0.5198765195672812, "learning_rate": 5.218947695915702e-05, "loss": 12.0444, "step": 24568 }, { "epoch": 1.3378802420480147, "grad_norm": 0.56607808304362, "learning_rate": 5.2181732046329216e-05, "loss": 11.9568, "step": 24569 }, { "epoch": 1.3379346960445977, "grad_norm": 0.5950939842943866, "learning_rate": 5.217398750534428e-05, "loss": 11.8325, "step": 24570 }, { "epoch": 1.337989150041181, "grad_norm": 0.5148668023660602, "learning_rate": 5.216624333626239e-05, "loss": 11.9365, "step": 24571 }, { "epoch": 1.338043604037764, "grad_norm": 0.5832434549780464, "learning_rate": 5.215849953914386e-05, "loss": 11.9594, "step": 24572 }, { "epoch": 1.338098058034347, "grad_norm": 0.5323926007781992, "learning_rate": 5.2150756114048814e-05, "loss": 11.8446, "step": 24573 }, { "epoch": 1.3381525120309299, "grad_norm": 0.6715059857263452, "learning_rate": 5.214301306103756e-05, "loss": 11.7965, "step": 24574 }, { "epoch": 1.3382069660275129, "grad_norm": 0.5972905943475129, "learning_rate": 5.213527038017022e-05, "loss": 11.9489, "step": 24575 }, { "epoch": 1.3382614200240959, "grad_norm": 0.6272140179190515, "learning_rate": 5.212752807150708e-05, "loss": 11.8413, "step": 24576 }, { "epoch": 1.3383158740206789, "grad_norm": 0.5213794721947207, "learning_rate": 5.2119786135108284e-05, "loss": 11.8303, "step": 24577 }, { "epoch": 1.3383703280172619, "grad_norm": 0.6199835343926031, "learning_rate": 5.211204457103404e-05, "loss": 11.9136, "step": 24578 }, { "epoch": 1.3384247820138448, "grad_norm": 0.6456791515026141, "learning_rate": 5.2104303379344624e-05, "loss": 11.9072, "step": 24579 }, { "epoch": 1.338479236010428, "grad_norm": 0.49702494056767493, "learning_rate": 5.209656256010016e-05, "loss": 11.7111, "step": 24580 }, { "epoch": 1.338533690007011, "grad_norm": 0.5736092594306474, "learning_rate": 5.208882211336088e-05, "loss": 11.918, "step": 24581 }, { "epoch": 1.338588144003594, "grad_norm": 0.5551138604712236, "learning_rate": 5.2081082039186904e-05, "loss": 11.782, "step": 24582 }, { "epoch": 1.338642598000177, "grad_norm": 0.532812278004759, "learning_rate": 5.2073342337638476e-05, "loss": 11.829, "step": 24583 }, { "epoch": 1.33869705199676, "grad_norm": 0.6295566813729274, "learning_rate": 5.206560300877581e-05, "loss": 11.8954, "step": 24584 }, { "epoch": 1.338751505993343, "grad_norm": 0.5912823516384009, "learning_rate": 5.2057864052659e-05, "loss": 11.9551, "step": 24585 }, { "epoch": 1.338805959989926, "grad_norm": 0.5743786285938248, "learning_rate": 5.205012546934833e-05, "loss": 11.971, "step": 24586 }, { "epoch": 1.338860413986509, "grad_norm": 0.5508310161518909, "learning_rate": 5.2042387258903894e-05, "loss": 12.0477, "step": 24587 }, { "epoch": 1.338914867983092, "grad_norm": 0.5812276817649266, "learning_rate": 5.203464942138594e-05, "loss": 11.8536, "step": 24588 }, { "epoch": 1.338969321979675, "grad_norm": 0.5663361545925268, "learning_rate": 5.202691195685454e-05, "loss": 11.8048, "step": 24589 }, { "epoch": 1.339023775976258, "grad_norm": 0.5225567662720166, "learning_rate": 5.201917486536996e-05, "loss": 11.7475, "step": 24590 }, { "epoch": 1.339078229972841, "grad_norm": 0.5407205251730083, "learning_rate": 5.201143814699233e-05, "loss": 11.9106, "step": 24591 }, { "epoch": 1.339132683969424, "grad_norm": 0.53190134231552, "learning_rate": 5.200370180178176e-05, "loss": 11.9709, "step": 24592 }, { "epoch": 1.339187137966007, "grad_norm": 0.5828162426608439, "learning_rate": 5.1995965829798496e-05, "loss": 11.9461, "step": 24593 }, { "epoch": 1.3392415919625902, "grad_norm": 0.5606408360367208, "learning_rate": 5.198823023110262e-05, "loss": 11.9083, "step": 24594 }, { "epoch": 1.3392960459591732, "grad_norm": 0.5499153178025608, "learning_rate": 5.19804950057543e-05, "loss": 12.0599, "step": 24595 }, { "epoch": 1.3393504999557562, "grad_norm": 0.6272253214515026, "learning_rate": 5.197276015381376e-05, "loss": 12.0533, "step": 24596 }, { "epoch": 1.3394049539523392, "grad_norm": 0.5874620400508778, "learning_rate": 5.196502567534105e-05, "loss": 11.934, "step": 24597 }, { "epoch": 1.3394594079489222, "grad_norm": 0.5537663911405211, "learning_rate": 5.1957291570396385e-05, "loss": 11.8273, "step": 24598 }, { "epoch": 1.3395138619455051, "grad_norm": 0.5634268633033013, "learning_rate": 5.194955783903984e-05, "loss": 11.9272, "step": 24599 }, { "epoch": 1.3395683159420881, "grad_norm": 0.5515821492661436, "learning_rate": 5.1941824481331626e-05, "loss": 11.9599, "step": 24600 }, { "epoch": 1.3396227699386711, "grad_norm": 0.5012151590987814, "learning_rate": 5.193409149733186e-05, "loss": 11.928, "step": 24601 }, { "epoch": 1.3396772239352541, "grad_norm": 0.5688783355733719, "learning_rate": 5.1926358887100604e-05, "loss": 11.8969, "step": 24602 }, { "epoch": 1.3397316779318373, "grad_norm": 0.4964984174350957, "learning_rate": 5.191862665069809e-05, "loss": 11.7818, "step": 24603 }, { "epoch": 1.3397861319284203, "grad_norm": 0.5021339874537967, "learning_rate": 5.191089478818436e-05, "loss": 11.96, "step": 24604 }, { "epoch": 1.3398405859250033, "grad_norm": 0.5401406439005793, "learning_rate": 5.1903163299619575e-05, "loss": 11.8579, "step": 24605 }, { "epoch": 1.3398950399215863, "grad_norm": 0.5401028643326528, "learning_rate": 5.18954321850639e-05, "loss": 11.9232, "step": 24606 }, { "epoch": 1.3399494939181693, "grad_norm": 0.5118698735519258, "learning_rate": 5.1887701444577376e-05, "loss": 11.9735, "step": 24607 }, { "epoch": 1.3400039479147523, "grad_norm": 0.5270790401729122, "learning_rate": 5.187997107822018e-05, "loss": 11.9389, "step": 24608 }, { "epoch": 1.3400584019113353, "grad_norm": 0.5570540128271331, "learning_rate": 5.1872241086052374e-05, "loss": 11.8259, "step": 24609 }, { "epoch": 1.3401128559079183, "grad_norm": 0.5356794580221736, "learning_rate": 5.186451146813411e-05, "loss": 12.009, "step": 24610 }, { "epoch": 1.3401673099045013, "grad_norm": 0.6214559074683523, "learning_rate": 5.18567822245255e-05, "loss": 11.8368, "step": 24611 }, { "epoch": 1.3402217639010843, "grad_norm": 0.5544359717823504, "learning_rate": 5.1849053355286583e-05, "loss": 11.8034, "step": 24612 }, { "epoch": 1.3402762178976673, "grad_norm": 0.5700299153115338, "learning_rate": 5.184132486047752e-05, "loss": 11.9758, "step": 24613 }, { "epoch": 1.3403306718942503, "grad_norm": 0.5951283432976049, "learning_rate": 5.1833596740158376e-05, "loss": 11.8935, "step": 24614 }, { "epoch": 1.3403851258908333, "grad_norm": 0.5615266933171653, "learning_rate": 5.1825868994389304e-05, "loss": 11.8921, "step": 24615 }, { "epoch": 1.3404395798874162, "grad_norm": 0.4976954408349177, "learning_rate": 5.1818141623230285e-05, "loss": 11.7621, "step": 24616 }, { "epoch": 1.3404940338839992, "grad_norm": 0.5422445813314399, "learning_rate": 5.18104146267415e-05, "loss": 11.9329, "step": 24617 }, { "epoch": 1.3405484878805825, "grad_norm": 0.580614160854788, "learning_rate": 5.1802688004983046e-05, "loss": 12.011, "step": 24618 }, { "epoch": 1.3406029418771654, "grad_norm": 0.5532611306209777, "learning_rate": 5.1794961758014936e-05, "loss": 11.9552, "step": 24619 }, { "epoch": 1.3406573958737484, "grad_norm": 0.5203892391062399, "learning_rate": 5.178723588589732e-05, "loss": 11.9495, "step": 24620 }, { "epoch": 1.3407118498703314, "grad_norm": 0.6274415942007482, "learning_rate": 5.177951038869024e-05, "loss": 11.9656, "step": 24621 }, { "epoch": 1.3407663038669144, "grad_norm": 0.5545298279893404, "learning_rate": 5.177178526645375e-05, "loss": 11.8779, "step": 24622 }, { "epoch": 1.3408207578634974, "grad_norm": 0.5475076096938358, "learning_rate": 5.176406051924798e-05, "loss": 11.8929, "step": 24623 }, { "epoch": 1.3408752118600804, "grad_norm": 0.5193635150414153, "learning_rate": 5.175633614713292e-05, "loss": 11.8949, "step": 24624 }, { "epoch": 1.3409296658566634, "grad_norm": 0.5310161217107139, "learning_rate": 5.174861215016873e-05, "loss": 11.8812, "step": 24625 }, { "epoch": 1.3409841198532466, "grad_norm": 0.5776940149513465, "learning_rate": 5.174088852841536e-05, "loss": 12.0292, "step": 24626 }, { "epoch": 1.3410385738498296, "grad_norm": 0.5795239369137751, "learning_rate": 5.173316528193295e-05, "loss": 11.8541, "step": 24627 }, { "epoch": 1.3410930278464126, "grad_norm": 0.6115089736970947, "learning_rate": 5.172544241078159e-05, "loss": 11.9109, "step": 24628 }, { "epoch": 1.3411474818429956, "grad_norm": 0.5783162807753064, "learning_rate": 5.171771991502123e-05, "loss": 11.7701, "step": 24629 }, { "epoch": 1.3412019358395786, "grad_norm": 0.546458977271874, "learning_rate": 5.1709997794712064e-05, "loss": 11.7898, "step": 24630 }, { "epoch": 1.3412563898361616, "grad_norm": 0.5056918842436856, "learning_rate": 5.170227604991398e-05, "loss": 11.8176, "step": 24631 }, { "epoch": 1.3413108438327446, "grad_norm": 0.47401625281647586, "learning_rate": 5.169455468068709e-05, "loss": 11.878, "step": 24632 }, { "epoch": 1.3413652978293276, "grad_norm": 0.5009397159064994, "learning_rate": 5.1686833687091475e-05, "loss": 11.758, "step": 24633 }, { "epoch": 1.3414197518259106, "grad_norm": 0.5628233913388982, "learning_rate": 5.1679113069187115e-05, "loss": 12.0128, "step": 24634 }, { "epoch": 1.3414742058224935, "grad_norm": 0.498647738732315, "learning_rate": 5.1671392827034106e-05, "loss": 11.9058, "step": 24635 }, { "epoch": 1.3415286598190765, "grad_norm": 0.49257969311586597, "learning_rate": 5.166367296069241e-05, "loss": 11.9904, "step": 24636 }, { "epoch": 1.3415831138156595, "grad_norm": 0.5506269605856507, "learning_rate": 5.1655953470222154e-05, "loss": 11.857, "step": 24637 }, { "epoch": 1.3416375678122425, "grad_norm": 0.559868816327295, "learning_rate": 5.164823435568327e-05, "loss": 12.0076, "step": 24638 }, { "epoch": 1.3416920218088255, "grad_norm": 0.5731561395593245, "learning_rate": 5.164051561713581e-05, "loss": 11.9248, "step": 24639 }, { "epoch": 1.3417464758054085, "grad_norm": 0.5540462796840516, "learning_rate": 5.16327972546399e-05, "loss": 11.8274, "step": 24640 }, { "epoch": 1.3418009298019917, "grad_norm": 0.5320036454013334, "learning_rate": 5.1625079268255386e-05, "loss": 11.9924, "step": 24641 }, { "epoch": 1.3418553837985747, "grad_norm": 0.5241473322774366, "learning_rate": 5.1617361658042405e-05, "loss": 11.8711, "step": 24642 }, { "epoch": 1.3419098377951577, "grad_norm": 0.5055339413936103, "learning_rate": 5.16096444240609e-05, "loss": 11.9069, "step": 24643 }, { "epoch": 1.3419642917917407, "grad_norm": 0.5048298227752752, "learning_rate": 5.160192756637091e-05, "loss": 11.9758, "step": 24644 }, { "epoch": 1.3420187457883237, "grad_norm": 0.5124961733519724, "learning_rate": 5.1594211085032484e-05, "loss": 11.7453, "step": 24645 }, { "epoch": 1.3420731997849067, "grad_norm": 0.5360412781099567, "learning_rate": 5.158649498010557e-05, "loss": 11.8183, "step": 24646 }, { "epoch": 1.3421276537814897, "grad_norm": 0.5217600581621155, "learning_rate": 5.157877925165021e-05, "loss": 11.8677, "step": 24647 }, { "epoch": 1.3421821077780727, "grad_norm": 0.5520672183749687, "learning_rate": 5.157106389972635e-05, "loss": 11.852, "step": 24648 }, { "epoch": 1.3422365617746557, "grad_norm": 0.5344003240530755, "learning_rate": 5.156334892439405e-05, "loss": 11.978, "step": 24649 }, { "epoch": 1.3422910157712389, "grad_norm": 0.5756585787262845, "learning_rate": 5.1555634325713284e-05, "loss": 11.8156, "step": 24650 }, { "epoch": 1.3423454697678219, "grad_norm": 0.5104893397762217, "learning_rate": 5.154792010374399e-05, "loss": 11.8454, "step": 24651 }, { "epoch": 1.3423999237644049, "grad_norm": 0.5863480953662468, "learning_rate": 5.154020625854623e-05, "loss": 11.9262, "step": 24652 }, { "epoch": 1.3424543777609879, "grad_norm": 0.492592261822517, "learning_rate": 5.153249279017991e-05, "loss": 11.9287, "step": 24653 }, { "epoch": 1.3425088317575709, "grad_norm": 0.5872998914003171, "learning_rate": 5.152477969870506e-05, "loss": 11.908, "step": 24654 }, { "epoch": 1.3425632857541538, "grad_norm": 0.5605485116233468, "learning_rate": 5.1517066984181685e-05, "loss": 11.8801, "step": 24655 }, { "epoch": 1.3426177397507368, "grad_norm": 0.5990806711376991, "learning_rate": 5.1509354646669695e-05, "loss": 11.9292, "step": 24656 }, { "epoch": 1.3426721937473198, "grad_norm": 0.7279063077741952, "learning_rate": 5.1501642686229126e-05, "loss": 12.01, "step": 24657 }, { "epoch": 1.3427266477439028, "grad_norm": 0.5294510462086448, "learning_rate": 5.1493931102919893e-05, "loss": 11.9327, "step": 24658 }, { "epoch": 1.3427811017404858, "grad_norm": 0.5012113477080508, "learning_rate": 5.1486219896802e-05, "loss": 11.979, "step": 24659 }, { "epoch": 1.3428355557370688, "grad_norm": 0.5208283316466333, "learning_rate": 5.147850906793542e-05, "loss": 11.8707, "step": 24660 }, { "epoch": 1.3428900097336518, "grad_norm": 0.582335091795844, "learning_rate": 5.1470798616380045e-05, "loss": 12.0025, "step": 24661 }, { "epoch": 1.3429444637302348, "grad_norm": 0.6343594526751137, "learning_rate": 5.146308854219591e-05, "loss": 12.1038, "step": 24662 }, { "epoch": 1.3429989177268178, "grad_norm": 0.5244064185971522, "learning_rate": 5.145537884544291e-05, "loss": 11.9433, "step": 24663 }, { "epoch": 1.343053371723401, "grad_norm": 0.7054565994001688, "learning_rate": 5.1447669526181055e-05, "loss": 12.0494, "step": 24664 }, { "epoch": 1.343107825719984, "grad_norm": 0.570649830549098, "learning_rate": 5.143996058447023e-05, "loss": 11.9568, "step": 24665 }, { "epoch": 1.343162279716567, "grad_norm": 0.6754946316357943, "learning_rate": 5.143225202037042e-05, "loss": 12.0417, "step": 24666 }, { "epoch": 1.34321673371315, "grad_norm": 0.5069186991077531, "learning_rate": 5.14245438339416e-05, "loss": 11.9761, "step": 24667 }, { "epoch": 1.343271187709733, "grad_norm": 0.5415640747473932, "learning_rate": 5.141683602524363e-05, "loss": 11.8968, "step": 24668 }, { "epoch": 1.343325641706316, "grad_norm": 0.5194004817624808, "learning_rate": 5.140912859433653e-05, "loss": 11.9379, "step": 24669 }, { "epoch": 1.343380095702899, "grad_norm": 0.5227581353768905, "learning_rate": 5.140142154128018e-05, "loss": 11.8972, "step": 24670 }, { "epoch": 1.343434549699482, "grad_norm": 0.6213626815070582, "learning_rate": 5.139371486613451e-05, "loss": 11.9509, "step": 24671 }, { "epoch": 1.343489003696065, "grad_norm": 0.5427038923186286, "learning_rate": 5.13860085689595e-05, "loss": 11.887, "step": 24672 }, { "epoch": 1.3435434576926482, "grad_norm": 0.536390506451965, "learning_rate": 5.1378302649814994e-05, "loss": 11.7878, "step": 24673 }, { "epoch": 1.3435979116892312, "grad_norm": 0.5814214936671521, "learning_rate": 5.1370597108761e-05, "loss": 12.0254, "step": 24674 }, { "epoch": 1.3436523656858141, "grad_norm": 0.6034499203431405, "learning_rate": 5.136289194585736e-05, "loss": 12.001, "step": 24675 }, { "epoch": 1.3437068196823971, "grad_norm": 0.5879422515014885, "learning_rate": 5.1355187161164034e-05, "loss": 11.9203, "step": 24676 }, { "epoch": 1.3437612736789801, "grad_norm": 0.5096279975774792, "learning_rate": 5.134748275474096e-05, "loss": 11.9085, "step": 24677 }, { "epoch": 1.3438157276755631, "grad_norm": 0.5210584951397081, "learning_rate": 5.133977872664799e-05, "loss": 11.9083, "step": 24678 }, { "epoch": 1.3438701816721461, "grad_norm": 0.5695970056479077, "learning_rate": 5.1332075076945084e-05, "loss": 11.7609, "step": 24679 }, { "epoch": 1.343924635668729, "grad_norm": 0.5433632179172042, "learning_rate": 5.132437180569214e-05, "loss": 11.8395, "step": 24680 }, { "epoch": 1.343979089665312, "grad_norm": 0.5408495754745256, "learning_rate": 5.131666891294899e-05, "loss": 11.8961, "step": 24681 }, { "epoch": 1.344033543661895, "grad_norm": 0.5367184083183594, "learning_rate": 5.130896639877565e-05, "loss": 11.92, "step": 24682 }, { "epoch": 1.344087997658478, "grad_norm": 0.6222809867914364, "learning_rate": 5.13012642632319e-05, "loss": 11.8826, "step": 24683 }, { "epoch": 1.344142451655061, "grad_norm": 0.5244961821012735, "learning_rate": 5.129356250637771e-05, "loss": 11.989, "step": 24684 }, { "epoch": 1.344196905651644, "grad_norm": 0.548225762241452, "learning_rate": 5.1285861128272924e-05, "loss": 11.8568, "step": 24685 }, { "epoch": 1.344251359648227, "grad_norm": 0.5935253842941441, "learning_rate": 5.1278160128977494e-05, "loss": 11.9319, "step": 24686 }, { "epoch": 1.3443058136448103, "grad_norm": 0.5671982583908727, "learning_rate": 5.1270459508551207e-05, "loss": 11.9201, "step": 24687 }, { "epoch": 1.3443602676413933, "grad_norm": 0.5610219671353242, "learning_rate": 5.126275926705402e-05, "loss": 11.9223, "step": 24688 }, { "epoch": 1.3444147216379763, "grad_norm": 0.5069111973494803, "learning_rate": 5.12550594045458e-05, "loss": 11.8724, "step": 24689 }, { "epoch": 1.3444691756345593, "grad_norm": 0.5620002209813412, "learning_rate": 5.1247359921086444e-05, "loss": 12.0272, "step": 24690 }, { "epoch": 1.3445236296311422, "grad_norm": 0.6137102404689122, "learning_rate": 5.1239660816735765e-05, "loss": 12.0232, "step": 24691 }, { "epoch": 1.3445780836277252, "grad_norm": 0.5901212641170603, "learning_rate": 5.123196209155364e-05, "loss": 11.9088, "step": 24692 }, { "epoch": 1.3446325376243082, "grad_norm": 0.5769018309674903, "learning_rate": 5.1224263745599955e-05, "loss": 11.8818, "step": 24693 }, { "epoch": 1.3446869916208912, "grad_norm": 0.5005582799986129, "learning_rate": 5.121656577893461e-05, "loss": 11.7601, "step": 24694 }, { "epoch": 1.3447414456174742, "grad_norm": 0.5932355413713493, "learning_rate": 5.120886819161739e-05, "loss": 12.0082, "step": 24695 }, { "epoch": 1.3447958996140574, "grad_norm": 0.5305838490879435, "learning_rate": 5.120117098370824e-05, "loss": 12.0842, "step": 24696 }, { "epoch": 1.3448503536106404, "grad_norm": 0.5598975906879371, "learning_rate": 5.119347415526692e-05, "loss": 11.8764, "step": 24697 }, { "epoch": 1.3449048076072234, "grad_norm": 0.5366234881540021, "learning_rate": 5.118577770635337e-05, "loss": 11.9488, "step": 24698 }, { "epoch": 1.3449592616038064, "grad_norm": 0.5326388231089194, "learning_rate": 5.1178081637027356e-05, "loss": 11.8126, "step": 24699 }, { "epoch": 1.3450137156003894, "grad_norm": 0.5264614507788572, "learning_rate": 5.117038594734882e-05, "loss": 11.9151, "step": 24700 }, { "epoch": 1.3450681695969724, "grad_norm": 0.5568658150801595, "learning_rate": 5.116269063737753e-05, "loss": 12.0364, "step": 24701 }, { "epoch": 1.3451226235935554, "grad_norm": 0.5426277076062659, "learning_rate": 5.115499570717333e-05, "loss": 11.7796, "step": 24702 }, { "epoch": 1.3451770775901384, "grad_norm": 0.5405251899144996, "learning_rate": 5.1147301156796065e-05, "loss": 12.0124, "step": 24703 }, { "epoch": 1.3452315315867214, "grad_norm": 0.5650397200967617, "learning_rate": 5.113960698630562e-05, "loss": 11.94, "step": 24704 }, { "epoch": 1.3452859855833044, "grad_norm": 0.6118572302047037, "learning_rate": 5.113191319576174e-05, "loss": 11.9214, "step": 24705 }, { "epoch": 1.3453404395798874, "grad_norm": 0.5286389829934046, "learning_rate": 5.112421978522435e-05, "loss": 11.743, "step": 24706 }, { "epoch": 1.3453948935764704, "grad_norm": 0.5937446151677802, "learning_rate": 5.111652675475317e-05, "loss": 11.9297, "step": 24707 }, { "epoch": 1.3454493475730533, "grad_norm": 0.540039344359391, "learning_rate": 5.110883410440812e-05, "loss": 11.9706, "step": 24708 }, { "epoch": 1.3455038015696363, "grad_norm": 0.5318002732872339, "learning_rate": 5.110114183424893e-05, "loss": 11.8151, "step": 24709 }, { "epoch": 1.3455582555662193, "grad_norm": 0.5701680828712842, "learning_rate": 5.109344994433547e-05, "loss": 11.9632, "step": 24710 }, { "epoch": 1.3456127095628025, "grad_norm": 0.5322747869200056, "learning_rate": 5.108575843472763e-05, "loss": 11.953, "step": 24711 }, { "epoch": 1.3456671635593855, "grad_norm": 0.6386986840153218, "learning_rate": 5.107806730548503e-05, "loss": 11.9134, "step": 24712 }, { "epoch": 1.3457216175559685, "grad_norm": 0.5787294346188318, "learning_rate": 5.107037655666765e-05, "loss": 11.8885, "step": 24713 }, { "epoch": 1.3457760715525515, "grad_norm": 0.48484950700769913, "learning_rate": 5.1062686188335184e-05, "loss": 11.911, "step": 24714 }, { "epoch": 1.3458305255491345, "grad_norm": 0.560911470456335, "learning_rate": 5.1054996200547465e-05, "loss": 11.9997, "step": 24715 }, { "epoch": 1.3458849795457175, "grad_norm": 0.5227067169613693, "learning_rate": 5.104730659336435e-05, "loss": 11.8588, "step": 24716 }, { "epoch": 1.3459394335423005, "grad_norm": 0.5328178749192024, "learning_rate": 5.103961736684555e-05, "loss": 11.8337, "step": 24717 }, { "epoch": 1.3459938875388835, "grad_norm": 0.574303286229096, "learning_rate": 5.1031928521050933e-05, "loss": 11.9587, "step": 24718 }, { "epoch": 1.3460483415354665, "grad_norm": 0.5469256303813594, "learning_rate": 5.102424005604021e-05, "loss": 11.8126, "step": 24719 }, { "epoch": 1.3461027955320497, "grad_norm": 0.5502648367970351, "learning_rate": 5.101655197187325e-05, "loss": 11.8713, "step": 24720 }, { "epoch": 1.3461572495286327, "grad_norm": 0.5825838781402476, "learning_rate": 5.10088642686098e-05, "loss": 11.9476, "step": 24721 }, { "epoch": 1.3462117035252157, "grad_norm": 0.5312492793536634, "learning_rate": 5.100117694630959e-05, "loss": 11.9215, "step": 24722 }, { "epoch": 1.3462661575217987, "grad_norm": 0.5925804114597782, "learning_rate": 5.0993490005032506e-05, "loss": 11.9656, "step": 24723 }, { "epoch": 1.3463206115183817, "grad_norm": 0.597512351118672, "learning_rate": 5.098580344483821e-05, "loss": 11.857, "step": 24724 }, { "epoch": 1.3463750655149647, "grad_norm": 0.5527618332523438, "learning_rate": 5.097811726578652e-05, "loss": 12.0091, "step": 24725 }, { "epoch": 1.3464295195115477, "grad_norm": 0.5815921715331781, "learning_rate": 5.0970431467937254e-05, "loss": 11.9153, "step": 24726 }, { "epoch": 1.3464839735081306, "grad_norm": 0.49469894164678824, "learning_rate": 5.09627460513501e-05, "loss": 11.874, "step": 24727 }, { "epoch": 1.3465384275047136, "grad_norm": 0.5202688863510311, "learning_rate": 5.095506101608488e-05, "loss": 11.915, "step": 24728 }, { "epoch": 1.3465928815012966, "grad_norm": 0.5437772142738702, "learning_rate": 5.0947376362201304e-05, "loss": 11.7232, "step": 24729 }, { "epoch": 1.3466473354978796, "grad_norm": 0.538512507411147, "learning_rate": 5.0939692089759195e-05, "loss": 11.9361, "step": 24730 }, { "epoch": 1.3467017894944626, "grad_norm": 0.5852451636107295, "learning_rate": 5.093200819881827e-05, "loss": 11.8578, "step": 24731 }, { "epoch": 1.3467562434910456, "grad_norm": 0.5744333870393465, "learning_rate": 5.092432468943823e-05, "loss": 11.9063, "step": 24732 }, { "epoch": 1.3468106974876286, "grad_norm": 0.5660255027197925, "learning_rate": 5.091664156167892e-05, "loss": 11.9843, "step": 24733 }, { "epoch": 1.3468651514842118, "grad_norm": 0.8385179784822178, "learning_rate": 5.090895881559998e-05, "loss": 11.9528, "step": 24734 }, { "epoch": 1.3469196054807948, "grad_norm": 0.5644388973902699, "learning_rate": 5.090127645126127e-05, "loss": 11.9393, "step": 24735 }, { "epoch": 1.3469740594773778, "grad_norm": 0.5061500219352385, "learning_rate": 5.089359446872242e-05, "loss": 11.8778, "step": 24736 }, { "epoch": 1.3470285134739608, "grad_norm": 0.6064606475805742, "learning_rate": 5.088591286804322e-05, "loss": 12.0032, "step": 24737 }, { "epoch": 1.3470829674705438, "grad_norm": 0.6085706595767844, "learning_rate": 5.0878231649283424e-05, "loss": 12.0709, "step": 24738 }, { "epoch": 1.3471374214671268, "grad_norm": 0.5112846789243923, "learning_rate": 5.0870550812502704e-05, "loss": 11.8724, "step": 24739 }, { "epoch": 1.3471918754637098, "grad_norm": 0.5642057804212833, "learning_rate": 5.086287035776086e-05, "loss": 11.9766, "step": 24740 }, { "epoch": 1.3472463294602928, "grad_norm": 0.538498722557541, "learning_rate": 5.0855190285117585e-05, "loss": 11.9649, "step": 24741 }, { "epoch": 1.3473007834568758, "grad_norm": 0.5167847240960812, "learning_rate": 5.084751059463253e-05, "loss": 11.85, "step": 24742 }, { "epoch": 1.347355237453459, "grad_norm": 0.5688071269020584, "learning_rate": 5.0839831286365535e-05, "loss": 11.9503, "step": 24743 }, { "epoch": 1.347409691450042, "grad_norm": 0.6078205857455574, "learning_rate": 5.083215236037622e-05, "loss": 11.8844, "step": 24744 }, { "epoch": 1.347464145446625, "grad_norm": 0.5504748636150579, "learning_rate": 5.082447381672435e-05, "loss": 11.9087, "step": 24745 }, { "epoch": 1.347518599443208, "grad_norm": 0.556827012989121, "learning_rate": 5.081679565546959e-05, "loss": 11.8018, "step": 24746 }, { "epoch": 1.347573053439791, "grad_norm": 0.5326846761653032, "learning_rate": 5.080911787667173e-05, "loss": 11.8898, "step": 24747 }, { "epoch": 1.347627507436374, "grad_norm": 0.4771338407625155, "learning_rate": 5.080144048039036e-05, "loss": 11.9173, "step": 24748 }, { "epoch": 1.347681961432957, "grad_norm": 0.5514342083191643, "learning_rate": 5.079376346668525e-05, "loss": 11.9127, "step": 24749 }, { "epoch": 1.34773641542954, "grad_norm": 0.49419029608704157, "learning_rate": 5.078608683561612e-05, "loss": 11.8876, "step": 24750 }, { "epoch": 1.347790869426123, "grad_norm": 0.5668823733875155, "learning_rate": 5.077841058724263e-05, "loss": 11.8694, "step": 24751 }, { "epoch": 1.347845323422706, "grad_norm": 0.532912246970959, "learning_rate": 5.077073472162444e-05, "loss": 12.0189, "step": 24752 }, { "epoch": 1.347899777419289, "grad_norm": 0.5671877456159174, "learning_rate": 5.076305923882131e-05, "loss": 11.974, "step": 24753 }, { "epoch": 1.347954231415872, "grad_norm": 0.5616592638243587, "learning_rate": 5.0755384138892846e-05, "loss": 11.8129, "step": 24754 }, { "epoch": 1.3480086854124549, "grad_norm": 0.570931293347498, "learning_rate": 5.074770942189882e-05, "loss": 11.9228, "step": 24755 }, { "epoch": 1.3480631394090379, "grad_norm": 0.5067208853009946, "learning_rate": 5.074003508789882e-05, "loss": 11.9231, "step": 24756 }, { "epoch": 1.348117593405621, "grad_norm": 0.48252121944302095, "learning_rate": 5.073236113695261e-05, "loss": 11.7249, "step": 24757 }, { "epoch": 1.348172047402204, "grad_norm": 0.517549680877548, "learning_rate": 5.072468756911979e-05, "loss": 11.9628, "step": 24758 }, { "epoch": 1.348226501398787, "grad_norm": 0.6033185712884095, "learning_rate": 5.0717014384460036e-05, "loss": 11.9381, "step": 24759 }, { "epoch": 1.34828095539537, "grad_norm": 0.5521904788131673, "learning_rate": 5.070934158303311e-05, "loss": 11.8499, "step": 24760 }, { "epoch": 1.348335409391953, "grad_norm": 0.5511601673095677, "learning_rate": 5.0701669164898603e-05, "loss": 11.8565, "step": 24761 }, { "epoch": 1.348389863388536, "grad_norm": 0.5607754924702814, "learning_rate": 5.069399713011618e-05, "loss": 11.8313, "step": 24762 }, { "epoch": 1.348444317385119, "grad_norm": 0.5987504656166703, "learning_rate": 5.0686325478745466e-05, "loss": 11.9435, "step": 24763 }, { "epoch": 1.348498771381702, "grad_norm": 0.5803073742557056, "learning_rate": 5.067865421084615e-05, "loss": 12.02, "step": 24764 }, { "epoch": 1.348553225378285, "grad_norm": 0.5411527019282852, "learning_rate": 5.0670983326477936e-05, "loss": 12.0168, "step": 24765 }, { "epoch": 1.3486076793748683, "grad_norm": 0.5220430512587575, "learning_rate": 5.0663312825700395e-05, "loss": 11.9947, "step": 24766 }, { "epoch": 1.3486621333714512, "grad_norm": 0.5189351798939571, "learning_rate": 5.065564270857323e-05, "loss": 11.868, "step": 24767 }, { "epoch": 1.3487165873680342, "grad_norm": 0.6134265449723086, "learning_rate": 5.064797297515604e-05, "loss": 12.0574, "step": 24768 }, { "epoch": 1.3487710413646172, "grad_norm": 0.5799543714779022, "learning_rate": 5.0640303625508514e-05, "loss": 11.7574, "step": 24769 }, { "epoch": 1.3488254953612002, "grad_norm": 0.47761785916124516, "learning_rate": 5.063263465969024e-05, "loss": 11.93, "step": 24770 }, { "epoch": 1.3488799493577832, "grad_norm": 0.532400116807748, "learning_rate": 5.06249660777609e-05, "loss": 11.9961, "step": 24771 }, { "epoch": 1.3489344033543662, "grad_norm": 0.5422298339313437, "learning_rate": 5.0617297879780114e-05, "loss": 11.8649, "step": 24772 }, { "epoch": 1.3489888573509492, "grad_norm": 0.5578831490030823, "learning_rate": 5.060963006580746e-05, "loss": 11.8773, "step": 24773 }, { "epoch": 1.3490433113475322, "grad_norm": 0.5969063133631078, "learning_rate": 5.06019626359026e-05, "loss": 12.0546, "step": 24774 }, { "epoch": 1.3490977653441152, "grad_norm": 0.5065166455132252, "learning_rate": 5.059429559012521e-05, "loss": 11.8178, "step": 24775 }, { "epoch": 1.3491522193406982, "grad_norm": 0.531827767695221, "learning_rate": 5.058662892853483e-05, "loss": 12.0171, "step": 24776 }, { "epoch": 1.3492066733372812, "grad_norm": 0.502000655939235, "learning_rate": 5.057896265119113e-05, "loss": 11.8882, "step": 24777 }, { "epoch": 1.3492611273338642, "grad_norm": 0.5363887226520877, "learning_rate": 5.057129675815368e-05, "loss": 11.7719, "step": 24778 }, { "epoch": 1.3493155813304472, "grad_norm": 0.5046527835178513, "learning_rate": 5.056363124948216e-05, "loss": 11.9025, "step": 24779 }, { "epoch": 1.3493700353270301, "grad_norm": 0.5511538817038009, "learning_rate": 5.055596612523607e-05, "loss": 11.9255, "step": 24780 }, { "epoch": 1.3494244893236134, "grad_norm": 0.508313966078211, "learning_rate": 5.054830138547514e-05, "loss": 11.8887, "step": 24781 }, { "epoch": 1.3494789433201964, "grad_norm": 0.5247460253492393, "learning_rate": 5.054063703025891e-05, "loss": 11.8504, "step": 24782 }, { "epoch": 1.3495333973167793, "grad_norm": 0.5344680484674968, "learning_rate": 5.053297305964693e-05, "loss": 11.851, "step": 24783 }, { "epoch": 1.3495878513133623, "grad_norm": 0.5471176781384924, "learning_rate": 5.0525309473698887e-05, "loss": 11.8643, "step": 24784 }, { "epoch": 1.3496423053099453, "grad_norm": 0.5463936967650441, "learning_rate": 5.051764627247431e-05, "loss": 12.0349, "step": 24785 }, { "epoch": 1.3496967593065283, "grad_norm": 0.526339112385976, "learning_rate": 5.05099834560328e-05, "loss": 11.8445, "step": 24786 }, { "epoch": 1.3497512133031113, "grad_norm": 0.5630772598984861, "learning_rate": 5.0502321024434e-05, "loss": 11.9418, "step": 24787 }, { "epoch": 1.3498056672996943, "grad_norm": 0.551988855827672, "learning_rate": 5.049465897773742e-05, "loss": 11.9497, "step": 24788 }, { "epoch": 1.3498601212962775, "grad_norm": 0.5761189634907042, "learning_rate": 5.04869973160027e-05, "loss": 11.9864, "step": 24789 }, { "epoch": 1.3499145752928605, "grad_norm": 0.5447156986558133, "learning_rate": 5.047933603928936e-05, "loss": 11.9061, "step": 24790 }, { "epoch": 1.3499690292894435, "grad_norm": 0.5088697949684696, "learning_rate": 5.047167514765705e-05, "loss": 11.9659, "step": 24791 }, { "epoch": 1.3500234832860265, "grad_norm": 0.5166478617367919, "learning_rate": 5.0464014641165304e-05, "loss": 11.9017, "step": 24792 }, { "epoch": 1.3500779372826095, "grad_norm": 0.49868874735038105, "learning_rate": 5.045635451987365e-05, "loss": 11.9322, "step": 24793 }, { "epoch": 1.3501323912791925, "grad_norm": 0.5614526172360104, "learning_rate": 5.044869478384171e-05, "loss": 12.0084, "step": 24794 }, { "epoch": 1.3501868452757755, "grad_norm": 0.5314844398684915, "learning_rate": 5.044103543312901e-05, "loss": 11.7961, "step": 24795 }, { "epoch": 1.3502412992723585, "grad_norm": 0.5552772230831415, "learning_rate": 5.043337646779516e-05, "loss": 11.9476, "step": 24796 }, { "epoch": 1.3502957532689415, "grad_norm": 0.5023316306806409, "learning_rate": 5.042571788789965e-05, "loss": 11.8229, "step": 24797 }, { "epoch": 1.3503502072655245, "grad_norm": 0.5011525564571289, "learning_rate": 5.0418059693502065e-05, "loss": 11.8788, "step": 24798 }, { "epoch": 1.3504046612621075, "grad_norm": 0.5200250815765821, "learning_rate": 5.041040188466202e-05, "loss": 11.9296, "step": 24799 }, { "epoch": 1.3504591152586904, "grad_norm": 0.6615102104803817, "learning_rate": 5.040274446143896e-05, "loss": 12.0247, "step": 24800 }, { "epoch": 1.3505135692552734, "grad_norm": 0.595575885434631, "learning_rate": 5.0395087423892494e-05, "loss": 11.8824, "step": 24801 }, { "epoch": 1.3505680232518564, "grad_norm": 0.5113084742992144, "learning_rate": 5.0387430772082166e-05, "loss": 11.819, "step": 24802 }, { "epoch": 1.3506224772484394, "grad_norm": 0.5640317571968053, "learning_rate": 5.0379774506067455e-05, "loss": 11.7749, "step": 24803 }, { "epoch": 1.3506769312450226, "grad_norm": 0.661235023433339, "learning_rate": 5.0372118625907984e-05, "loss": 12.0037, "step": 24804 }, { "epoch": 1.3507313852416056, "grad_norm": 0.5357440654739187, "learning_rate": 5.036446313166319e-05, "loss": 11.9756, "step": 24805 }, { "epoch": 1.3507858392381886, "grad_norm": 0.5308581111306975, "learning_rate": 5.03568080233927e-05, "loss": 11.8394, "step": 24806 }, { "epoch": 1.3508402932347716, "grad_norm": 0.4946856917540988, "learning_rate": 5.034915330115595e-05, "loss": 11.8253, "step": 24807 }, { "epoch": 1.3508947472313546, "grad_norm": 0.5021633837792272, "learning_rate": 5.034149896501252e-05, "loss": 11.9361, "step": 24808 }, { "epoch": 1.3509492012279376, "grad_norm": 0.5608247897623255, "learning_rate": 5.0333845015021966e-05, "loss": 11.9191, "step": 24809 }, { "epoch": 1.3510036552245206, "grad_norm": 0.5198315437772559, "learning_rate": 5.032619145124371e-05, "loss": 11.9982, "step": 24810 }, { "epoch": 1.3510581092211036, "grad_norm": 0.6440230274453169, "learning_rate": 5.0318538273737406e-05, "loss": 11.9214, "step": 24811 }, { "epoch": 1.3511125632176866, "grad_norm": 0.5506894777910234, "learning_rate": 5.031088548256239e-05, "loss": 11.9466, "step": 24812 }, { "epoch": 1.3511670172142698, "grad_norm": 0.5293822800229081, "learning_rate": 5.0303233077778265e-05, "loss": 11.9223, "step": 24813 }, { "epoch": 1.3512214712108528, "grad_norm": 0.5591197450370005, "learning_rate": 5.0295581059444584e-05, "loss": 11.8849, "step": 24814 }, { "epoch": 1.3512759252074358, "grad_norm": 0.5050360193004697, "learning_rate": 5.028792942762075e-05, "loss": 11.8091, "step": 24815 }, { "epoch": 1.3513303792040188, "grad_norm": 0.49635674747590736, "learning_rate": 5.0280278182366356e-05, "loss": 11.9033, "step": 24816 }, { "epoch": 1.3513848332006018, "grad_norm": 0.5362261060313038, "learning_rate": 5.0272627323740815e-05, "loss": 11.9254, "step": 24817 }, { "epoch": 1.3514392871971848, "grad_norm": 0.5232223570308987, "learning_rate": 5.0264976851803716e-05, "loss": 11.8598, "step": 24818 }, { "epoch": 1.3514937411937678, "grad_norm": 0.5556135801606398, "learning_rate": 5.025732676661444e-05, "loss": 11.9191, "step": 24819 }, { "epoch": 1.3515481951903507, "grad_norm": 0.5240704904942667, "learning_rate": 5.0249677068232534e-05, "loss": 11.9336, "step": 24820 }, { "epoch": 1.3516026491869337, "grad_norm": 0.5573539217851793, "learning_rate": 5.024202775671758e-05, "loss": 11.9374, "step": 24821 }, { "epoch": 1.3516571031835167, "grad_norm": 0.5501697310148302, "learning_rate": 5.023437883212887e-05, "loss": 11.9706, "step": 24822 }, { "epoch": 1.3517115571800997, "grad_norm": 0.5063270064138293, "learning_rate": 5.022673029452598e-05, "loss": 11.9344, "step": 24823 }, { "epoch": 1.3517660111766827, "grad_norm": 0.49663763397517213, "learning_rate": 5.021908214396842e-05, "loss": 11.9286, "step": 24824 }, { "epoch": 1.3518204651732657, "grad_norm": 0.670876281443096, "learning_rate": 5.021143438051559e-05, "loss": 11.8892, "step": 24825 }, { "epoch": 1.3518749191698487, "grad_norm": 0.5606586921665635, "learning_rate": 5.020378700422702e-05, "loss": 12.0507, "step": 24826 }, { "epoch": 1.351929373166432, "grad_norm": 0.5107979228761818, "learning_rate": 5.019614001516213e-05, "loss": 11.947, "step": 24827 }, { "epoch": 1.351983827163015, "grad_norm": 0.5142782902402404, "learning_rate": 5.018849341338043e-05, "loss": 11.9033, "step": 24828 }, { "epoch": 1.352038281159598, "grad_norm": 0.5959443943528853, "learning_rate": 5.018084719894134e-05, "loss": 11.8944, "step": 24829 }, { "epoch": 1.352092735156181, "grad_norm": 0.5487928972675663, "learning_rate": 5.0173201371904336e-05, "loss": 11.912, "step": 24830 }, { "epoch": 1.3521471891527639, "grad_norm": 0.5716065146438394, "learning_rate": 5.016555593232893e-05, "loss": 11.9627, "step": 24831 }, { "epoch": 1.3522016431493469, "grad_norm": 0.5838625403119779, "learning_rate": 5.015791088027446e-05, "loss": 11.8686, "step": 24832 }, { "epoch": 1.3522560971459299, "grad_norm": 0.5147912815954611, "learning_rate": 5.0150266215800454e-05, "loss": 11.9449, "step": 24833 }, { "epoch": 1.3523105511425129, "grad_norm": 0.5475568976877669, "learning_rate": 5.014262193896632e-05, "loss": 11.887, "step": 24834 }, { "epoch": 1.3523650051390959, "grad_norm": 0.5511623497369028, "learning_rate": 5.013497804983152e-05, "loss": 11.9286, "step": 24835 }, { "epoch": 1.352419459135679, "grad_norm": 0.5407169117373076, "learning_rate": 5.0127334548455504e-05, "loss": 11.8254, "step": 24836 }, { "epoch": 1.352473913132262, "grad_norm": 0.5617974496305375, "learning_rate": 5.0119691434897685e-05, "loss": 11.8755, "step": 24837 }, { "epoch": 1.352528367128845, "grad_norm": 0.5490719035581467, "learning_rate": 5.011204870921754e-05, "loss": 11.8921, "step": 24838 }, { "epoch": 1.352582821125428, "grad_norm": 0.5316405625088215, "learning_rate": 5.0104406371474435e-05, "loss": 11.9682, "step": 24839 }, { "epoch": 1.352637275122011, "grad_norm": 0.5493005504319712, "learning_rate": 5.009676442172787e-05, "loss": 11.8559, "step": 24840 }, { "epoch": 1.352691729118594, "grad_norm": 0.5042634568948384, "learning_rate": 5.008912286003723e-05, "loss": 11.8346, "step": 24841 }, { "epoch": 1.352746183115177, "grad_norm": 0.6517204678807134, "learning_rate": 5.008148168646191e-05, "loss": 11.9003, "step": 24842 }, { "epoch": 1.35280063711176, "grad_norm": 0.5750013340571146, "learning_rate": 5.0073840901061395e-05, "loss": 11.9398, "step": 24843 }, { "epoch": 1.352855091108343, "grad_norm": 0.5844993004803447, "learning_rate": 5.006620050389502e-05, "loss": 11.9265, "step": 24844 }, { "epoch": 1.352909545104926, "grad_norm": 0.5277049449750013, "learning_rate": 5.005856049502229e-05, "loss": 11.9073, "step": 24845 }, { "epoch": 1.352963999101509, "grad_norm": 0.4876604087834955, "learning_rate": 5.005092087450254e-05, "loss": 11.8304, "step": 24846 }, { "epoch": 1.353018453098092, "grad_norm": 0.5619475806379481, "learning_rate": 5.004328164239518e-05, "loss": 11.9532, "step": 24847 }, { "epoch": 1.353072907094675, "grad_norm": 0.53215760304349, "learning_rate": 5.003564279875969e-05, "loss": 11.8062, "step": 24848 }, { "epoch": 1.353127361091258, "grad_norm": 0.5662691808049553, "learning_rate": 5.00280043436554e-05, "loss": 11.9444, "step": 24849 }, { "epoch": 1.353181815087841, "grad_norm": 0.5046826535678688, "learning_rate": 5.0020366277141754e-05, "loss": 11.8574, "step": 24850 }, { "epoch": 1.3532362690844242, "grad_norm": 0.5465399341865931, "learning_rate": 5.0012728599278114e-05, "loss": 11.9395, "step": 24851 }, { "epoch": 1.3532907230810072, "grad_norm": 0.5409542996884615, "learning_rate": 5.000509131012384e-05, "loss": 11.8603, "step": 24852 }, { "epoch": 1.3533451770775902, "grad_norm": 0.587445380148168, "learning_rate": 4.999745440973841e-05, "loss": 11.8569, "step": 24853 }, { "epoch": 1.3533996310741732, "grad_norm": 0.5277849863970896, "learning_rate": 4.998981789818111e-05, "loss": 11.8868, "step": 24854 }, { "epoch": 1.3534540850707562, "grad_norm": 0.5301040362510373, "learning_rate": 4.998218177551143e-05, "loss": 11.8316, "step": 24855 }, { "epoch": 1.3535085390673391, "grad_norm": 0.5613774863843418, "learning_rate": 4.997454604178864e-05, "loss": 11.9655, "step": 24856 }, { "epoch": 1.3535629930639221, "grad_norm": 0.544793573707874, "learning_rate": 4.9966910697072166e-05, "loss": 11.9052, "step": 24857 }, { "epoch": 1.3536174470605051, "grad_norm": 0.5252444204166443, "learning_rate": 4.995927574142143e-05, "loss": 11.898, "step": 24858 }, { "epoch": 1.3536719010570883, "grad_norm": 0.5068669077648342, "learning_rate": 4.995164117489571e-05, "loss": 11.7729, "step": 24859 }, { "epoch": 1.3537263550536713, "grad_norm": 0.502792964628515, "learning_rate": 4.9944006997554474e-05, "loss": 11.9333, "step": 24860 }, { "epoch": 1.3537808090502543, "grad_norm": 0.5630809332803862, "learning_rate": 4.993637320945699e-05, "loss": 11.9314, "step": 24861 }, { "epoch": 1.3538352630468373, "grad_norm": 0.5063112968886009, "learning_rate": 4.9928739810662696e-05, "loss": 11.8642, "step": 24862 }, { "epoch": 1.3538897170434203, "grad_norm": 0.6370058690335387, "learning_rate": 4.992110680123092e-05, "loss": 11.953, "step": 24863 }, { "epoch": 1.3539441710400033, "grad_norm": 0.611175174799429, "learning_rate": 4.991347418122099e-05, "loss": 11.8584, "step": 24864 }, { "epoch": 1.3539986250365863, "grad_norm": 0.562780777127195, "learning_rate": 4.990584195069231e-05, "loss": 11.8797, "step": 24865 }, { "epoch": 1.3540530790331693, "grad_norm": 0.5177942871062793, "learning_rate": 4.989821010970417e-05, "loss": 11.8644, "step": 24866 }, { "epoch": 1.3541075330297523, "grad_norm": 0.5616015574802201, "learning_rate": 4.989057865831599e-05, "loss": 11.9686, "step": 24867 }, { "epoch": 1.3541619870263353, "grad_norm": 0.5276231084759514, "learning_rate": 4.988294759658703e-05, "loss": 11.8057, "step": 24868 }, { "epoch": 1.3542164410229183, "grad_norm": 0.550768685905666, "learning_rate": 4.9875316924576676e-05, "loss": 11.9847, "step": 24869 }, { "epoch": 1.3542708950195013, "grad_norm": 0.5744872109531516, "learning_rate": 4.9867686642344304e-05, "loss": 11.9256, "step": 24870 }, { "epoch": 1.3543253490160843, "grad_norm": 0.5274101657090788, "learning_rate": 4.986005674994917e-05, "loss": 11.9319, "step": 24871 }, { "epoch": 1.3543798030126672, "grad_norm": 0.5618023876300211, "learning_rate": 4.985242724745067e-05, "loss": 11.9709, "step": 24872 }, { "epoch": 1.3544342570092502, "grad_norm": 0.5078380684729866, "learning_rate": 4.984479813490812e-05, "loss": 11.7467, "step": 24873 }, { "epoch": 1.3544887110058335, "grad_norm": 0.5867966059024414, "learning_rate": 4.983716941238079e-05, "loss": 11.7997, "step": 24874 }, { "epoch": 1.3545431650024164, "grad_norm": 0.5338787637271132, "learning_rate": 4.9829541079928075e-05, "loss": 11.7774, "step": 24875 }, { "epoch": 1.3545976189989994, "grad_norm": 0.5902886660661296, "learning_rate": 4.9821913137609224e-05, "loss": 11.956, "step": 24876 }, { "epoch": 1.3546520729955824, "grad_norm": 0.5348857228702834, "learning_rate": 4.9814285585483634e-05, "loss": 11.9083, "step": 24877 }, { "epoch": 1.3547065269921654, "grad_norm": 0.5916120896647244, "learning_rate": 4.980665842361054e-05, "loss": 11.9277, "step": 24878 }, { "epoch": 1.3547609809887484, "grad_norm": 0.5844802679823214, "learning_rate": 4.9799031652049277e-05, "loss": 11.8835, "step": 24879 }, { "epoch": 1.3548154349853314, "grad_norm": 0.5590944803749194, "learning_rate": 4.9791405270859215e-05, "loss": 11.9702, "step": 24880 }, { "epoch": 1.3548698889819144, "grad_norm": 0.6653605419238408, "learning_rate": 4.9783779280099564e-05, "loss": 12.0233, "step": 24881 }, { "epoch": 1.3549243429784974, "grad_norm": 0.5345790397368297, "learning_rate": 4.977615367982973e-05, "loss": 11.8585, "step": 24882 }, { "epoch": 1.3549787969750806, "grad_norm": 0.4899892796727021, "learning_rate": 4.976852847010889e-05, "loss": 11.6916, "step": 24883 }, { "epoch": 1.3550332509716636, "grad_norm": 0.5972743588836793, "learning_rate": 4.976090365099638e-05, "loss": 11.9908, "step": 24884 }, { "epoch": 1.3550877049682466, "grad_norm": 0.6000639830695981, "learning_rate": 4.975327922255155e-05, "loss": 11.8087, "step": 24885 }, { "epoch": 1.3551421589648296, "grad_norm": 0.5487570979590168, "learning_rate": 4.974565518483362e-05, "loss": 11.9484, "step": 24886 }, { "epoch": 1.3551966129614126, "grad_norm": 0.5623411375132161, "learning_rate": 4.973803153790193e-05, "loss": 11.8157, "step": 24887 }, { "epoch": 1.3552510669579956, "grad_norm": 0.606979887834055, "learning_rate": 4.97304082818157e-05, "loss": 11.8969, "step": 24888 }, { "epoch": 1.3553055209545786, "grad_norm": 0.5377885831843022, "learning_rate": 4.972278541663429e-05, "loss": 11.8924, "step": 24889 }, { "epoch": 1.3553599749511616, "grad_norm": 0.5205384980636952, "learning_rate": 4.9715162942416884e-05, "loss": 11.928, "step": 24890 }, { "epoch": 1.3554144289477446, "grad_norm": 0.614298542184706, "learning_rate": 4.97075408592228e-05, "loss": 12.0008, "step": 24891 }, { "epoch": 1.3554688829443275, "grad_norm": 0.5766063853548465, "learning_rate": 4.9699919167111395e-05, "loss": 11.9444, "step": 24892 }, { "epoch": 1.3555233369409105, "grad_norm": 0.5616804705502019, "learning_rate": 4.969229786614179e-05, "loss": 11.9267, "step": 24893 }, { "epoch": 1.3555777909374935, "grad_norm": 0.5489564411038799, "learning_rate": 4.968467695637333e-05, "loss": 11.9348, "step": 24894 }, { "epoch": 1.3556322449340765, "grad_norm": 0.6474272376209784, "learning_rate": 4.9677056437865244e-05, "loss": 11.5415, "step": 24895 }, { "epoch": 1.3556866989306595, "grad_norm": 0.5156865616846723, "learning_rate": 4.9669436310676785e-05, "loss": 11.8739, "step": 24896 }, { "epoch": 1.3557411529272427, "grad_norm": 0.5824334064447036, "learning_rate": 4.966181657486728e-05, "loss": 12.044, "step": 24897 }, { "epoch": 1.3557956069238257, "grad_norm": 0.5951441337550208, "learning_rate": 4.9654197230495895e-05, "loss": 11.8968, "step": 24898 }, { "epoch": 1.3558500609204087, "grad_norm": 0.5427233998533754, "learning_rate": 4.9646578277621947e-05, "loss": 11.7835, "step": 24899 }, { "epoch": 1.3559045149169917, "grad_norm": 0.5475569697171353, "learning_rate": 4.963895971630461e-05, "loss": 11.8824, "step": 24900 }, { "epoch": 1.3559589689135747, "grad_norm": 0.5679944901373409, "learning_rate": 4.9631341546603215e-05, "loss": 11.8764, "step": 24901 }, { "epoch": 1.3560134229101577, "grad_norm": 0.5977108933058346, "learning_rate": 4.962372376857695e-05, "loss": 12.0388, "step": 24902 }, { "epoch": 1.3560678769067407, "grad_norm": 0.5859425763958728, "learning_rate": 4.961610638228502e-05, "loss": 11.8136, "step": 24903 }, { "epoch": 1.3561223309033237, "grad_norm": 0.5281229218351752, "learning_rate": 4.960848938778673e-05, "loss": 11.867, "step": 24904 }, { "epoch": 1.3561767848999067, "grad_norm": 0.5318152722254618, "learning_rate": 4.960087278514124e-05, "loss": 11.8967, "step": 24905 }, { "epoch": 1.3562312388964899, "grad_norm": 0.5261775820020697, "learning_rate": 4.959325657440782e-05, "loss": 11.8881, "step": 24906 }, { "epoch": 1.3562856928930729, "grad_norm": 0.6243009110779695, "learning_rate": 4.958564075564573e-05, "loss": 11.922, "step": 24907 }, { "epoch": 1.3563401468896559, "grad_norm": 0.5097660088059349, "learning_rate": 4.95780253289141e-05, "loss": 11.9009, "step": 24908 }, { "epoch": 1.3563946008862389, "grad_norm": 0.5416975746395439, "learning_rate": 4.957041029427225e-05, "loss": 11.917, "step": 24909 }, { "epoch": 1.3564490548828219, "grad_norm": 0.5849221474800707, "learning_rate": 4.9562795651779316e-05, "loss": 11.9707, "step": 24910 }, { "epoch": 1.3565035088794049, "grad_norm": 0.5897685853806195, "learning_rate": 4.955518140149456e-05, "loss": 11.9131, "step": 24911 }, { "epoch": 1.3565579628759878, "grad_norm": 0.5436011638671914, "learning_rate": 4.954756754347719e-05, "loss": 11.7322, "step": 24912 }, { "epoch": 1.3566124168725708, "grad_norm": 0.6011848819011538, "learning_rate": 4.9539954077786353e-05, "loss": 11.8981, "step": 24913 }, { "epoch": 1.3566668708691538, "grad_norm": 0.5648134955862548, "learning_rate": 4.953234100448133e-05, "loss": 12.0249, "step": 24914 }, { "epoch": 1.3567213248657368, "grad_norm": 0.5168500769792862, "learning_rate": 4.952472832362125e-05, "loss": 11.8804, "step": 24915 }, { "epoch": 1.3567757788623198, "grad_norm": 0.6142474939863247, "learning_rate": 4.951711603526539e-05, "loss": 11.8979, "step": 24916 }, { "epoch": 1.3568302328589028, "grad_norm": 0.5684425158241186, "learning_rate": 4.9509504139472854e-05, "loss": 11.9123, "step": 24917 }, { "epoch": 1.3568846868554858, "grad_norm": 0.5667423344177543, "learning_rate": 4.950189263630288e-05, "loss": 11.9643, "step": 24918 }, { "epoch": 1.3569391408520688, "grad_norm": 0.5771960361685287, "learning_rate": 4.949428152581471e-05, "loss": 11.859, "step": 24919 }, { "epoch": 1.3569935948486518, "grad_norm": 0.5392846440373192, "learning_rate": 4.9486670808067426e-05, "loss": 11.9325, "step": 24920 }, { "epoch": 1.357048048845235, "grad_norm": 0.5666340357531485, "learning_rate": 4.947906048312031e-05, "loss": 12.0368, "step": 24921 }, { "epoch": 1.357102502841818, "grad_norm": 0.5274722567663747, "learning_rate": 4.9471450551032485e-05, "loss": 11.851, "step": 24922 }, { "epoch": 1.357156956838401, "grad_norm": 0.5371877229027102, "learning_rate": 4.94638410118631e-05, "loss": 11.8442, "step": 24923 }, { "epoch": 1.357211410834984, "grad_norm": 0.5163432852791142, "learning_rate": 4.9456231865671386e-05, "loss": 12.0214, "step": 24924 }, { "epoch": 1.357265864831567, "grad_norm": 0.5429702860581987, "learning_rate": 4.944862311251647e-05, "loss": 11.8162, "step": 24925 }, { "epoch": 1.35732031882815, "grad_norm": 0.49390150469090427, "learning_rate": 4.944101475245756e-05, "loss": 11.7471, "step": 24926 }, { "epoch": 1.357374772824733, "grad_norm": 0.5073739712088102, "learning_rate": 4.943340678555377e-05, "loss": 11.9097, "step": 24927 }, { "epoch": 1.357429226821316, "grad_norm": 0.5357613198551402, "learning_rate": 4.9425799211864286e-05, "loss": 11.8786, "step": 24928 }, { "epoch": 1.3574836808178992, "grad_norm": 0.5262640462521961, "learning_rate": 4.9418192031448306e-05, "loss": 11.9509, "step": 24929 }, { "epoch": 1.3575381348144822, "grad_norm": 0.595498353326288, "learning_rate": 4.9410585244364916e-05, "loss": 11.9361, "step": 24930 }, { "epoch": 1.3575925888110651, "grad_norm": 0.6056796594259529, "learning_rate": 4.940297885067333e-05, "loss": 11.8909, "step": 24931 }, { "epoch": 1.3576470428076481, "grad_norm": 0.5782904163323127, "learning_rate": 4.939537285043266e-05, "loss": 12.0208, "step": 24932 }, { "epoch": 1.3577014968042311, "grad_norm": 0.49495929222631285, "learning_rate": 4.938776724370203e-05, "loss": 11.7721, "step": 24933 }, { "epoch": 1.3577559508008141, "grad_norm": 0.5133983141652042, "learning_rate": 4.938016203054064e-05, "loss": 11.861, "step": 24934 }, { "epoch": 1.3578104047973971, "grad_norm": 0.4951558290143576, "learning_rate": 4.9372557211007574e-05, "loss": 11.9239, "step": 24935 }, { "epoch": 1.3578648587939801, "grad_norm": 0.5733830050411346, "learning_rate": 4.936495278516202e-05, "loss": 11.8048, "step": 24936 }, { "epoch": 1.357919312790563, "grad_norm": 0.5362533987137719, "learning_rate": 4.935734875306305e-05, "loss": 11.9157, "step": 24937 }, { "epoch": 1.357973766787146, "grad_norm": 0.6084958145610728, "learning_rate": 4.9349745114769863e-05, "loss": 11.7731, "step": 24938 }, { "epoch": 1.358028220783729, "grad_norm": 0.5651082642116758, "learning_rate": 4.934214187034152e-05, "loss": 11.86, "step": 24939 }, { "epoch": 1.358082674780312, "grad_norm": 0.5388066316508631, "learning_rate": 4.933453901983718e-05, "loss": 11.8609, "step": 24940 }, { "epoch": 1.358137128776895, "grad_norm": 0.550791723540091, "learning_rate": 4.9326936563315984e-05, "loss": 11.9139, "step": 24941 }, { "epoch": 1.358191582773478, "grad_norm": 0.5582596246482534, "learning_rate": 4.931933450083704e-05, "loss": 11.908, "step": 24942 }, { "epoch": 1.358246036770061, "grad_norm": 0.5569203634323466, "learning_rate": 4.9311732832459446e-05, "loss": 11.9891, "step": 24943 }, { "epoch": 1.3583004907666443, "grad_norm": 0.5156216176427671, "learning_rate": 4.930413155824228e-05, "loss": 11.7841, "step": 24944 }, { "epoch": 1.3583549447632273, "grad_norm": 0.5196163341381659, "learning_rate": 4.929653067824469e-05, "loss": 11.9316, "step": 24945 }, { "epoch": 1.3584093987598103, "grad_norm": 0.5008788675494064, "learning_rate": 4.9288930192525815e-05, "loss": 11.9397, "step": 24946 }, { "epoch": 1.3584638527563933, "grad_norm": 0.5955207579212103, "learning_rate": 4.928133010114469e-05, "loss": 11.9151, "step": 24947 }, { "epoch": 1.3585183067529762, "grad_norm": 0.6049074577657653, "learning_rate": 4.927373040416049e-05, "loss": 11.7953, "step": 24948 }, { "epoch": 1.3585727607495592, "grad_norm": 0.5651218456736897, "learning_rate": 4.926613110163222e-05, "loss": 11.9555, "step": 24949 }, { "epoch": 1.3586272147461422, "grad_norm": 0.5609675285351408, "learning_rate": 4.9258532193619064e-05, "loss": 11.8516, "step": 24950 }, { "epoch": 1.3586816687427252, "grad_norm": 0.5203702149403598, "learning_rate": 4.925093368018004e-05, "loss": 11.8554, "step": 24951 }, { "epoch": 1.3587361227393082, "grad_norm": 0.5292728328043207, "learning_rate": 4.924333556137429e-05, "loss": 11.9143, "step": 24952 }, { "epoch": 1.3587905767358914, "grad_norm": 0.5660082640547254, "learning_rate": 4.9235737837260885e-05, "loss": 12.0576, "step": 24953 }, { "epoch": 1.3588450307324744, "grad_norm": 0.5166251485491004, "learning_rate": 4.922814050789886e-05, "loss": 11.8077, "step": 24954 }, { "epoch": 1.3588994847290574, "grad_norm": 0.5195651561494627, "learning_rate": 4.9220543573347324e-05, "loss": 11.809, "step": 24955 }, { "epoch": 1.3589539387256404, "grad_norm": 0.56371074239359, "learning_rate": 4.921294703366539e-05, "loss": 11.8172, "step": 24956 }, { "epoch": 1.3590083927222234, "grad_norm": 0.5142443462087007, "learning_rate": 4.920535088891207e-05, "loss": 11.727, "step": 24957 }, { "epoch": 1.3590628467188064, "grad_norm": 0.5345036061525329, "learning_rate": 4.9197755139146495e-05, "loss": 11.8688, "step": 24958 }, { "epoch": 1.3591173007153894, "grad_norm": 0.613562091302831, "learning_rate": 4.919015978442765e-05, "loss": 11.9773, "step": 24959 }, { "epoch": 1.3591717547119724, "grad_norm": 0.5397915387932382, "learning_rate": 4.9182564824814684e-05, "loss": 11.7113, "step": 24960 }, { "epoch": 1.3592262087085554, "grad_norm": 0.549886462065191, "learning_rate": 4.9174970260366585e-05, "loss": 11.946, "step": 24961 }, { "epoch": 1.3592806627051384, "grad_norm": 0.550303995844349, "learning_rate": 4.916737609114247e-05, "loss": 11.8847, "step": 24962 }, { "epoch": 1.3593351167017214, "grad_norm": 0.5478981988353996, "learning_rate": 4.9159782317201364e-05, "loss": 11.9462, "step": 24963 }, { "epoch": 1.3593895706983044, "grad_norm": 0.5773689655458489, "learning_rate": 4.9152188938602285e-05, "loss": 11.759, "step": 24964 }, { "epoch": 1.3594440246948873, "grad_norm": 0.5539399400396439, "learning_rate": 4.914459595540435e-05, "loss": 11.8204, "step": 24965 }, { "epoch": 1.3594984786914703, "grad_norm": 0.5791466590168848, "learning_rate": 4.9137003367666525e-05, "loss": 12.0109, "step": 24966 }, { "epoch": 1.3595529326880536, "grad_norm": 0.5757685910387154, "learning_rate": 4.912941117544788e-05, "loss": 11.9751, "step": 24967 }, { "epoch": 1.3596073866846365, "grad_norm": 0.545043819119108, "learning_rate": 4.912181937880752e-05, "loss": 11.8309, "step": 24968 }, { "epoch": 1.3596618406812195, "grad_norm": 0.527299133856043, "learning_rate": 4.9114227977804384e-05, "loss": 11.8493, "step": 24969 }, { "epoch": 1.3597162946778025, "grad_norm": 0.5551793271739268, "learning_rate": 4.9106636972497575e-05, "loss": 12.0106, "step": 24970 }, { "epoch": 1.3597707486743855, "grad_norm": 0.5471731565250786, "learning_rate": 4.9099046362946053e-05, "loss": 11.8008, "step": 24971 }, { "epoch": 1.3598252026709685, "grad_norm": 0.5184502680272319, "learning_rate": 4.909145614920893e-05, "loss": 11.9077, "step": 24972 }, { "epoch": 1.3598796566675515, "grad_norm": 0.5701095237909425, "learning_rate": 4.9083866331345174e-05, "loss": 11.8327, "step": 24973 }, { "epoch": 1.3599341106641345, "grad_norm": 0.6190660343408305, "learning_rate": 4.907627690941378e-05, "loss": 11.9276, "step": 24974 }, { "epoch": 1.3599885646607175, "grad_norm": 0.5447161710312252, "learning_rate": 4.906868788347383e-05, "loss": 11.9611, "step": 24975 }, { "epoch": 1.3600430186573007, "grad_norm": 0.58208473872352, "learning_rate": 4.9061099253584264e-05, "loss": 11.8794, "step": 24976 }, { "epoch": 1.3600974726538837, "grad_norm": 0.5814764443183652, "learning_rate": 4.9053511019804136e-05, "loss": 11.9842, "step": 24977 }, { "epoch": 1.3601519266504667, "grad_norm": 0.5274231067978153, "learning_rate": 4.904592318219249e-05, "loss": 11.9158, "step": 24978 }, { "epoch": 1.3602063806470497, "grad_norm": 0.5660687616682143, "learning_rate": 4.903833574080825e-05, "loss": 11.879, "step": 24979 }, { "epoch": 1.3602608346436327, "grad_norm": 0.5969691407107678, "learning_rate": 4.9030748695710494e-05, "loss": 11.8296, "step": 24980 }, { "epoch": 1.3603152886402157, "grad_norm": 0.532669920349359, "learning_rate": 4.902316204695815e-05, "loss": 11.8981, "step": 24981 }, { "epoch": 1.3603697426367987, "grad_norm": 0.5229960340859549, "learning_rate": 4.901557579461028e-05, "loss": 11.9678, "step": 24982 }, { "epoch": 1.3604241966333817, "grad_norm": 0.5684134918572549, "learning_rate": 4.900798993872584e-05, "loss": 11.9296, "step": 24983 }, { "epoch": 1.3604786506299646, "grad_norm": 0.5987188117018615, "learning_rate": 4.900040447936379e-05, "loss": 11.9985, "step": 24984 }, { "epoch": 1.3605331046265476, "grad_norm": 0.7762637705718533, "learning_rate": 4.899281941658319e-05, "loss": 11.8949, "step": 24985 }, { "epoch": 1.3605875586231306, "grad_norm": 0.5754549163370596, "learning_rate": 4.8985234750442934e-05, "loss": 11.9371, "step": 24986 }, { "epoch": 1.3606420126197136, "grad_norm": 0.5003681898766952, "learning_rate": 4.897765048100209e-05, "loss": 11.7886, "step": 24987 }, { "epoch": 1.3606964666162966, "grad_norm": 0.4882107238229703, "learning_rate": 4.897006660831954e-05, "loss": 11.8665, "step": 24988 }, { "epoch": 1.3607509206128796, "grad_norm": 0.716362334980051, "learning_rate": 4.896248313245433e-05, "loss": 11.7667, "step": 24989 }, { "epoch": 1.3608053746094628, "grad_norm": 0.5407693854731415, "learning_rate": 4.895490005346543e-05, "loss": 11.914, "step": 24990 }, { "epoch": 1.3608598286060458, "grad_norm": 0.5903030712307776, "learning_rate": 4.894731737141177e-05, "loss": 11.8986, "step": 24991 }, { "epoch": 1.3609142826026288, "grad_norm": 0.5072839435812865, "learning_rate": 4.89397350863524e-05, "loss": 11.892, "step": 24992 }, { "epoch": 1.3609687365992118, "grad_norm": 0.501903336905804, "learning_rate": 4.893215319834613e-05, "loss": 11.8784, "step": 24993 }, { "epoch": 1.3610231905957948, "grad_norm": 0.5636339992920806, "learning_rate": 4.8924571707452004e-05, "loss": 12.0286, "step": 24994 }, { "epoch": 1.3610776445923778, "grad_norm": 0.5393615922258653, "learning_rate": 4.891699061372901e-05, "loss": 11.8979, "step": 24995 }, { "epoch": 1.3611320985889608, "grad_norm": 0.6009854715854602, "learning_rate": 4.890940991723603e-05, "loss": 12.016, "step": 24996 }, { "epoch": 1.3611865525855438, "grad_norm": 0.5298412273781102, "learning_rate": 4.890182961803208e-05, "loss": 11.9466, "step": 24997 }, { "epoch": 1.3612410065821268, "grad_norm": 0.568971366230619, "learning_rate": 4.8894249716176044e-05, "loss": 11.9406, "step": 24998 }, { "epoch": 1.36129546057871, "grad_norm": 0.4855817437367864, "learning_rate": 4.8886670211726916e-05, "loss": 11.8565, "step": 24999 }, { "epoch": 1.361349914575293, "grad_norm": 0.5258545939766616, "learning_rate": 4.887909110474358e-05, "loss": 11.778, "step": 25000 }, { "epoch": 1.361404368571876, "grad_norm": 0.5644727999056377, "learning_rate": 4.8871512395285015e-05, "loss": 11.906, "step": 25001 }, { "epoch": 1.361458822568459, "grad_norm": 0.5631330583033027, "learning_rate": 4.8863934083410165e-05, "loss": 11.9083, "step": 25002 }, { "epoch": 1.361513276565042, "grad_norm": 0.5928746067366175, "learning_rate": 4.885635616917795e-05, "loss": 11.8918, "step": 25003 }, { "epoch": 1.361567730561625, "grad_norm": 0.5687408818579682, "learning_rate": 4.884877865264724e-05, "loss": 11.764, "step": 25004 }, { "epoch": 1.361622184558208, "grad_norm": 0.5547182565620317, "learning_rate": 4.884120153387706e-05, "loss": 11.7977, "step": 25005 }, { "epoch": 1.361676638554791, "grad_norm": 0.5406958966746962, "learning_rate": 4.8833624812926215e-05, "loss": 11.9221, "step": 25006 }, { "epoch": 1.361731092551374, "grad_norm": 0.6347223954139799, "learning_rate": 4.882604848985374e-05, "loss": 11.9636, "step": 25007 }, { "epoch": 1.361785546547957, "grad_norm": 0.5340752072264334, "learning_rate": 4.881847256471844e-05, "loss": 11.8655, "step": 25008 }, { "epoch": 1.36184000054454, "grad_norm": 0.5216907951302637, "learning_rate": 4.881089703757934e-05, "loss": 11.9513, "step": 25009 }, { "epoch": 1.361894454541123, "grad_norm": 0.5954455984659678, "learning_rate": 4.8803321908495235e-05, "loss": 11.9156, "step": 25010 }, { "epoch": 1.361948908537706, "grad_norm": 0.5497393167262156, "learning_rate": 4.879574717752509e-05, "loss": 11.8963, "step": 25011 }, { "epoch": 1.3620033625342889, "grad_norm": 0.6359159377486772, "learning_rate": 4.878817284472783e-05, "loss": 11.9282, "step": 25012 }, { "epoch": 1.3620578165308719, "grad_norm": 0.5206324241828418, "learning_rate": 4.8780598910162346e-05, "loss": 11.8689, "step": 25013 }, { "epoch": 1.362112270527455, "grad_norm": 0.5272454399438127, "learning_rate": 4.8773025373887496e-05, "loss": 11.8483, "step": 25014 }, { "epoch": 1.362166724524038, "grad_norm": 0.5282543903076978, "learning_rate": 4.8765452235962164e-05, "loss": 11.9176, "step": 25015 }, { "epoch": 1.362221178520621, "grad_norm": 0.559768472267573, "learning_rate": 4.875787949644526e-05, "loss": 11.8522, "step": 25016 }, { "epoch": 1.362275632517204, "grad_norm": 0.4938271457560536, "learning_rate": 4.875030715539572e-05, "loss": 11.9746, "step": 25017 }, { "epoch": 1.362330086513787, "grad_norm": 0.5901155786033425, "learning_rate": 4.874273521287235e-05, "loss": 12.0275, "step": 25018 }, { "epoch": 1.36238454051037, "grad_norm": 0.5128795830427856, "learning_rate": 4.8735163668934105e-05, "loss": 11.8649, "step": 25019 }, { "epoch": 1.362438994506953, "grad_norm": 0.5243891263339125, "learning_rate": 4.872759252363979e-05, "loss": 12.0431, "step": 25020 }, { "epoch": 1.362493448503536, "grad_norm": 0.5440153012281209, "learning_rate": 4.872002177704834e-05, "loss": 11.8967, "step": 25021 }, { "epoch": 1.3625479025001193, "grad_norm": 0.5309573831848331, "learning_rate": 4.871245142921856e-05, "loss": 11.8478, "step": 25022 }, { "epoch": 1.3626023564967022, "grad_norm": 0.6028129225695973, "learning_rate": 4.870488148020941e-05, "loss": 12.0187, "step": 25023 }, { "epoch": 1.3626568104932852, "grad_norm": 0.5359101509285282, "learning_rate": 4.8697311930079705e-05, "loss": 11.8899, "step": 25024 }, { "epoch": 1.3627112644898682, "grad_norm": 0.5397611360516368, "learning_rate": 4.8689742778888256e-05, "loss": 11.7515, "step": 25025 }, { "epoch": 1.3627657184864512, "grad_norm": 0.524039195851762, "learning_rate": 4.868217402669398e-05, "loss": 11.8701, "step": 25026 }, { "epoch": 1.3628201724830342, "grad_norm": 0.5795503373074804, "learning_rate": 4.8674605673555765e-05, "loss": 11.9561, "step": 25027 }, { "epoch": 1.3628746264796172, "grad_norm": 0.5932548255088791, "learning_rate": 4.866703771953238e-05, "loss": 11.9275, "step": 25028 }, { "epoch": 1.3629290804762002, "grad_norm": 0.4853918644768155, "learning_rate": 4.865947016468275e-05, "loss": 11.9024, "step": 25029 }, { "epoch": 1.3629835344727832, "grad_norm": 0.5411828980209702, "learning_rate": 4.865190300906567e-05, "loss": 11.8984, "step": 25030 }, { "epoch": 1.3630379884693662, "grad_norm": 0.5102095294681734, "learning_rate": 4.864433625274003e-05, "loss": 11.8875, "step": 25031 }, { "epoch": 1.3630924424659492, "grad_norm": 0.5811649139961952, "learning_rate": 4.86367698957646e-05, "loss": 11.9619, "step": 25032 }, { "epoch": 1.3631468964625322, "grad_norm": 0.5633498008262142, "learning_rate": 4.862920393819831e-05, "loss": 11.889, "step": 25033 }, { "epoch": 1.3632013504591152, "grad_norm": 0.5285322694474356, "learning_rate": 4.862163838009993e-05, "loss": 11.8194, "step": 25034 }, { "epoch": 1.3632558044556982, "grad_norm": 0.5347508240447599, "learning_rate": 4.8614073221528286e-05, "loss": 11.9274, "step": 25035 }, { "epoch": 1.3633102584522812, "grad_norm": 0.5788175449431141, "learning_rate": 4.8606508462542254e-05, "loss": 11.8674, "step": 25036 }, { "epoch": 1.3633647124488644, "grad_norm": 0.5147804669991923, "learning_rate": 4.85989441032006e-05, "loss": 11.824, "step": 25037 }, { "epoch": 1.3634191664454474, "grad_norm": 0.5149866176731968, "learning_rate": 4.859138014356217e-05, "loss": 11.915, "step": 25038 }, { "epoch": 1.3634736204420304, "grad_norm": 0.5697637018498767, "learning_rate": 4.858381658368583e-05, "loss": 11.8232, "step": 25039 }, { "epoch": 1.3635280744386133, "grad_norm": 0.5000018076758913, "learning_rate": 4.857625342363031e-05, "loss": 11.793, "step": 25040 }, { "epoch": 1.3635825284351963, "grad_norm": 0.5272847581622794, "learning_rate": 4.856869066345452e-05, "loss": 11.8191, "step": 25041 }, { "epoch": 1.3636369824317793, "grad_norm": 0.5357826017991316, "learning_rate": 4.856112830321716e-05, "loss": 11.9674, "step": 25042 }, { "epoch": 1.3636914364283623, "grad_norm": 0.5568912482017461, "learning_rate": 4.855356634297714e-05, "loss": 11.8877, "step": 25043 }, { "epoch": 1.3637458904249453, "grad_norm": 0.5041668188356421, "learning_rate": 4.854600478279322e-05, "loss": 11.9672, "step": 25044 }, { "epoch": 1.3638003444215283, "grad_norm": 0.5459840938632199, "learning_rate": 4.853844362272415e-05, "loss": 11.941, "step": 25045 }, { "epoch": 1.3638547984181115, "grad_norm": 0.7894134375331131, "learning_rate": 4.853088286282882e-05, "loss": 12.0943, "step": 25046 }, { "epoch": 1.3639092524146945, "grad_norm": 0.572334369362874, "learning_rate": 4.8523322503165926e-05, "loss": 11.9471, "step": 25047 }, { "epoch": 1.3639637064112775, "grad_norm": 0.5214567336702625, "learning_rate": 4.851576254379435e-05, "loss": 11.8591, "step": 25048 }, { "epoch": 1.3640181604078605, "grad_norm": 0.49154675703009915, "learning_rate": 4.850820298477281e-05, "loss": 11.8707, "step": 25049 }, { "epoch": 1.3640726144044435, "grad_norm": 0.5325034004871464, "learning_rate": 4.850064382616011e-05, "loss": 11.8024, "step": 25050 }, { "epoch": 1.3641270684010265, "grad_norm": 0.5189007373633713, "learning_rate": 4.849308506801509e-05, "loss": 11.8567, "step": 25051 }, { "epoch": 1.3641815223976095, "grad_norm": 0.5972202800016274, "learning_rate": 4.848552671039642e-05, "loss": 11.76, "step": 25052 }, { "epoch": 1.3642359763941925, "grad_norm": 0.5665482075263434, "learning_rate": 4.847796875336298e-05, "loss": 11.8383, "step": 25053 }, { "epoch": 1.3642904303907755, "grad_norm": 0.5531479756601952, "learning_rate": 4.84704111969735e-05, "loss": 11.774, "step": 25054 }, { "epoch": 1.3643448843873585, "grad_norm": 0.5419518727756614, "learning_rate": 4.84628540412867e-05, "loss": 11.8875, "step": 25055 }, { "epoch": 1.3643993383839415, "grad_norm": 0.5558341957921653, "learning_rate": 4.845529728636143e-05, "loss": 11.8634, "step": 25056 }, { "epoch": 1.3644537923805244, "grad_norm": 0.614957087424945, "learning_rate": 4.844774093225638e-05, "loss": 11.7651, "step": 25057 }, { "epoch": 1.3645082463771074, "grad_norm": 0.580647493413801, "learning_rate": 4.844018497903038e-05, "loss": 11.8813, "step": 25058 }, { "epoch": 1.3645627003736904, "grad_norm": 0.49358035332314476, "learning_rate": 4.8432629426742104e-05, "loss": 11.9072, "step": 25059 }, { "epoch": 1.3646171543702736, "grad_norm": 0.5582621331307084, "learning_rate": 4.8425074275450344e-05, "loss": 11.9912, "step": 25060 }, { "epoch": 1.3646716083668566, "grad_norm": 0.5544974137118196, "learning_rate": 4.8417519525213906e-05, "loss": 11.925, "step": 25061 }, { "epoch": 1.3647260623634396, "grad_norm": 0.5267663796662333, "learning_rate": 4.8409965176091445e-05, "loss": 11.8615, "step": 25062 }, { "epoch": 1.3647805163600226, "grad_norm": 0.52441552411104, "learning_rate": 4.840241122814181e-05, "loss": 11.7422, "step": 25063 }, { "epoch": 1.3648349703566056, "grad_norm": 0.5639695274470757, "learning_rate": 4.839485768142361e-05, "loss": 11.8825, "step": 25064 }, { "epoch": 1.3648894243531886, "grad_norm": 0.5510800558494531, "learning_rate": 4.838730453599566e-05, "loss": 11.8495, "step": 25065 }, { "epoch": 1.3649438783497716, "grad_norm": 0.5248793378922084, "learning_rate": 4.8379751791916716e-05, "loss": 11.8267, "step": 25066 }, { "epoch": 1.3649983323463546, "grad_norm": 0.5712846925018582, "learning_rate": 4.837219944924544e-05, "loss": 11.7966, "step": 25067 }, { "epoch": 1.3650527863429376, "grad_norm": 0.49709859810052864, "learning_rate": 4.836464750804064e-05, "loss": 11.929, "step": 25068 }, { "epoch": 1.3651072403395208, "grad_norm": 0.587437006672066, "learning_rate": 4.8357095968360955e-05, "loss": 11.9191, "step": 25069 }, { "epoch": 1.3651616943361038, "grad_norm": 0.522421371357169, "learning_rate": 4.83495448302652e-05, "loss": 11.9529, "step": 25070 }, { "epoch": 1.3652161483326868, "grad_norm": 0.5547914272280554, "learning_rate": 4.8341994093812005e-05, "loss": 11.8742, "step": 25071 }, { "epoch": 1.3652706023292698, "grad_norm": 0.5628350573401014, "learning_rate": 4.833444375906012e-05, "loss": 11.8248, "step": 25072 }, { "epoch": 1.3653250563258528, "grad_norm": 0.5454799580843143, "learning_rate": 4.8326893826068354e-05, "loss": 11.8551, "step": 25073 }, { "epoch": 1.3653795103224358, "grad_norm": 0.5680693707193344, "learning_rate": 4.831934429489524e-05, "loss": 11.8932, "step": 25074 }, { "epoch": 1.3654339643190188, "grad_norm": 0.5389331220279573, "learning_rate": 4.831179516559958e-05, "loss": 11.9218, "step": 25075 }, { "epoch": 1.3654884183156017, "grad_norm": 0.6550773298623871, "learning_rate": 4.83042464382401e-05, "loss": 11.8776, "step": 25076 }, { "epoch": 1.3655428723121847, "grad_norm": 0.5385274700697291, "learning_rate": 4.829669811287544e-05, "loss": 11.9966, "step": 25077 }, { "epoch": 1.3655973263087677, "grad_norm": 0.5645934639414184, "learning_rate": 4.828915018956435e-05, "loss": 12.0673, "step": 25078 }, { "epoch": 1.3656517803053507, "grad_norm": 0.526032775194877, "learning_rate": 4.828160266836547e-05, "loss": 11.8598, "step": 25079 }, { "epoch": 1.3657062343019337, "grad_norm": 0.5167559172748148, "learning_rate": 4.827405554933756e-05, "loss": 11.9208, "step": 25080 }, { "epoch": 1.3657606882985167, "grad_norm": 0.5437637163621964, "learning_rate": 4.826650883253923e-05, "loss": 11.8471, "step": 25081 }, { "epoch": 1.3658151422950997, "grad_norm": 0.588792792656736, "learning_rate": 4.8258962518029205e-05, "loss": 11.7768, "step": 25082 }, { "epoch": 1.3658695962916827, "grad_norm": 0.5382521194921016, "learning_rate": 4.825141660586623e-05, "loss": 11.8772, "step": 25083 }, { "epoch": 1.365924050288266, "grad_norm": 0.5069262275811791, "learning_rate": 4.824387109610885e-05, "loss": 11.9157, "step": 25084 }, { "epoch": 1.365978504284849, "grad_norm": 0.6846066968569539, "learning_rate": 4.823632598881585e-05, "loss": 11.6901, "step": 25085 }, { "epoch": 1.366032958281432, "grad_norm": 0.5400538581503838, "learning_rate": 4.822878128404581e-05, "loss": 11.9139, "step": 25086 }, { "epoch": 1.366087412278015, "grad_norm": 0.548696516380634, "learning_rate": 4.8221236981857455e-05, "loss": 11.8388, "step": 25087 }, { "epoch": 1.3661418662745979, "grad_norm": 0.6096702113572322, "learning_rate": 4.821369308230949e-05, "loss": 12.0497, "step": 25088 }, { "epoch": 1.3661963202711809, "grad_norm": 0.5265305841633061, "learning_rate": 4.820614958546048e-05, "loss": 11.8539, "step": 25089 }, { "epoch": 1.3662507742677639, "grad_norm": 0.5527700762439866, "learning_rate": 4.819860649136919e-05, "loss": 11.9079, "step": 25090 }, { "epoch": 1.3663052282643469, "grad_norm": 0.5337874650232489, "learning_rate": 4.8191063800094175e-05, "loss": 11.9632, "step": 25091 }, { "epoch": 1.36635968226093, "grad_norm": 0.5312908667458723, "learning_rate": 4.818352151169418e-05, "loss": 11.9241, "step": 25092 }, { "epoch": 1.366414136257513, "grad_norm": 0.5200527242232768, "learning_rate": 4.8175979626227806e-05, "loss": 11.8154, "step": 25093 }, { "epoch": 1.366468590254096, "grad_norm": 0.6659346802966777, "learning_rate": 4.816843814375367e-05, "loss": 12.0484, "step": 25094 }, { "epoch": 1.366523044250679, "grad_norm": 0.5748616208321881, "learning_rate": 4.816089706433048e-05, "loss": 11.8186, "step": 25095 }, { "epoch": 1.366577498247262, "grad_norm": 0.59562321821412, "learning_rate": 4.8153356388016827e-05, "loss": 11.965, "step": 25096 }, { "epoch": 1.366631952243845, "grad_norm": 0.5705926117797742, "learning_rate": 4.814581611487139e-05, "loss": 11.8715, "step": 25097 }, { "epoch": 1.366686406240428, "grad_norm": 0.5407234790532826, "learning_rate": 4.8138276244952754e-05, "loss": 11.7471, "step": 25098 }, { "epoch": 1.366740860237011, "grad_norm": 0.6501824942662773, "learning_rate": 4.813073677831958e-05, "loss": 11.9337, "step": 25099 }, { "epoch": 1.366795314233594, "grad_norm": 0.5784681159521441, "learning_rate": 4.812319771503053e-05, "loss": 11.9465, "step": 25100 }, { "epoch": 1.366849768230177, "grad_norm": 0.6056400943832342, "learning_rate": 4.811565905514416e-05, "loss": 12.0192, "step": 25101 }, { "epoch": 1.36690422222676, "grad_norm": 0.5396109459761824, "learning_rate": 4.810812079871916e-05, "loss": 11.9325, "step": 25102 }, { "epoch": 1.366958676223343, "grad_norm": 0.5546552113436038, "learning_rate": 4.810058294581412e-05, "loss": 11.9535, "step": 25103 }, { "epoch": 1.367013130219926, "grad_norm": 0.5599864368610015, "learning_rate": 4.809304549648761e-05, "loss": 11.9501, "step": 25104 }, { "epoch": 1.367067584216509, "grad_norm": 0.49451505886383473, "learning_rate": 4.808550845079832e-05, "loss": 11.9211, "step": 25105 }, { "epoch": 1.367122038213092, "grad_norm": 0.55109309273116, "learning_rate": 4.807797180880479e-05, "loss": 11.9217, "step": 25106 }, { "epoch": 1.3671764922096752, "grad_norm": 0.537961764596016, "learning_rate": 4.8070435570565685e-05, "loss": 11.798, "step": 25107 }, { "epoch": 1.3672309462062582, "grad_norm": 0.541574789119973, "learning_rate": 4.806289973613956e-05, "loss": 11.7621, "step": 25108 }, { "epoch": 1.3672854002028412, "grad_norm": 0.5324905199750999, "learning_rate": 4.8055364305585014e-05, "loss": 11.9363, "step": 25109 }, { "epoch": 1.3673398541994242, "grad_norm": 0.5219113301890548, "learning_rate": 4.804782927896072e-05, "loss": 11.8879, "step": 25110 }, { "epoch": 1.3673943081960072, "grad_norm": 0.5669892893288252, "learning_rate": 4.804029465632518e-05, "loss": 11.9929, "step": 25111 }, { "epoch": 1.3674487621925902, "grad_norm": 0.5728257842324002, "learning_rate": 4.8032760437737055e-05, "loss": 11.8895, "step": 25112 }, { "epoch": 1.3675032161891731, "grad_norm": 0.5028461713954278, "learning_rate": 4.8025226623254906e-05, "loss": 11.8547, "step": 25113 }, { "epoch": 1.3675576701857561, "grad_norm": 0.51435943829678, "learning_rate": 4.8017693212937266e-05, "loss": 11.8178, "step": 25114 }, { "epoch": 1.3676121241823391, "grad_norm": 0.5929600916261433, "learning_rate": 4.8010160206842803e-05, "loss": 11.7993, "step": 25115 }, { "epoch": 1.3676665781789223, "grad_norm": 0.5382834120972866, "learning_rate": 4.800262760503002e-05, "loss": 11.9301, "step": 25116 }, { "epoch": 1.3677210321755053, "grad_norm": 0.49008064164726056, "learning_rate": 4.799509540755754e-05, "loss": 11.935, "step": 25117 }, { "epoch": 1.3677754861720883, "grad_norm": 0.5619016132843284, "learning_rate": 4.798756361448391e-05, "loss": 11.8332, "step": 25118 }, { "epoch": 1.3678299401686713, "grad_norm": 0.4842402516284035, "learning_rate": 4.798003222586773e-05, "loss": 11.9025, "step": 25119 }, { "epoch": 1.3678843941652543, "grad_norm": 0.5270994832602205, "learning_rate": 4.797250124176751e-05, "loss": 11.7359, "step": 25120 }, { "epoch": 1.3679388481618373, "grad_norm": 0.5546253579220396, "learning_rate": 4.796497066224184e-05, "loss": 11.8169, "step": 25121 }, { "epoch": 1.3679933021584203, "grad_norm": 0.5895691163162081, "learning_rate": 4.795744048734932e-05, "loss": 11.8789, "step": 25122 }, { "epoch": 1.3680477561550033, "grad_norm": 0.5342975863541174, "learning_rate": 4.794991071714848e-05, "loss": 11.9294, "step": 25123 }, { "epoch": 1.3681022101515863, "grad_norm": 0.5366704933948835, "learning_rate": 4.794238135169783e-05, "loss": 11.8725, "step": 25124 }, { "epoch": 1.3681566641481693, "grad_norm": 0.5003059131680908, "learning_rate": 4.7934852391055985e-05, "loss": 11.9016, "step": 25125 }, { "epoch": 1.3682111181447523, "grad_norm": 0.5766781270961125, "learning_rate": 4.792732383528141e-05, "loss": 12.0251, "step": 25126 }, { "epoch": 1.3682655721413353, "grad_norm": 0.5609779498089513, "learning_rate": 4.791979568443274e-05, "loss": 11.8133, "step": 25127 }, { "epoch": 1.3683200261379183, "grad_norm": 0.5411245033745566, "learning_rate": 4.7912267938568445e-05, "loss": 12.0362, "step": 25128 }, { "epoch": 1.3683744801345012, "grad_norm": 0.5237389445196476, "learning_rate": 4.7904740597747124e-05, "loss": 11.9776, "step": 25129 }, { "epoch": 1.3684289341310845, "grad_norm": 0.7018697693221225, "learning_rate": 4.789721366202724e-05, "loss": 12.1431, "step": 25130 }, { "epoch": 1.3684833881276675, "grad_norm": 0.5315774511077166, "learning_rate": 4.7889687131467355e-05, "loss": 11.8995, "step": 25131 }, { "epoch": 1.3685378421242504, "grad_norm": 0.5699725123374426, "learning_rate": 4.7882161006126034e-05, "loss": 12.0593, "step": 25132 }, { "epoch": 1.3685922961208334, "grad_norm": 0.5555549938187669, "learning_rate": 4.7874635286061776e-05, "loss": 11.9752, "step": 25133 }, { "epoch": 1.3686467501174164, "grad_norm": 0.5380393157728871, "learning_rate": 4.78671099713331e-05, "loss": 11.9835, "step": 25134 }, { "epoch": 1.3687012041139994, "grad_norm": 0.5505625833372509, "learning_rate": 4.785958506199848e-05, "loss": 11.8858, "step": 25135 }, { "epoch": 1.3687556581105824, "grad_norm": 0.5378844181894424, "learning_rate": 4.7852060558116466e-05, "loss": 11.8883, "step": 25136 }, { "epoch": 1.3688101121071654, "grad_norm": 0.5239550100816478, "learning_rate": 4.7844536459745615e-05, "loss": 11.9211, "step": 25137 }, { "epoch": 1.3688645661037484, "grad_norm": 0.5158765324469426, "learning_rate": 4.783701276694436e-05, "loss": 11.774, "step": 25138 }, { "epoch": 1.3689190201003316, "grad_norm": 0.544013191229186, "learning_rate": 4.782948947977127e-05, "loss": 11.8005, "step": 25139 }, { "epoch": 1.3689734740969146, "grad_norm": 0.6405344375573496, "learning_rate": 4.7821966598284786e-05, "loss": 11.8133, "step": 25140 }, { "epoch": 1.3690279280934976, "grad_norm": 0.5154339333499073, "learning_rate": 4.7814444122543476e-05, "loss": 11.8936, "step": 25141 }, { "epoch": 1.3690823820900806, "grad_norm": 0.5362485119681301, "learning_rate": 4.7806922052605765e-05, "loss": 11.8353, "step": 25142 }, { "epoch": 1.3691368360866636, "grad_norm": 0.5109631313283256, "learning_rate": 4.779940038853018e-05, "loss": 12.0095, "step": 25143 }, { "epoch": 1.3691912900832466, "grad_norm": 0.5810936133366243, "learning_rate": 4.7791879130375286e-05, "loss": 11.9121, "step": 25144 }, { "epoch": 1.3692457440798296, "grad_norm": 0.5453383983930444, "learning_rate": 4.778435827819943e-05, "loss": 11.8733, "step": 25145 }, { "epoch": 1.3693001980764126, "grad_norm": 0.6126937632389491, "learning_rate": 4.777683783206118e-05, "loss": 11.8779, "step": 25146 }, { "epoch": 1.3693546520729956, "grad_norm": 0.5910559879078056, "learning_rate": 4.7769317792018974e-05, "loss": 11.9472, "step": 25147 }, { "epoch": 1.3694091060695786, "grad_norm": 0.5774818513328772, "learning_rate": 4.77617981581313e-05, "loss": 11.918, "step": 25148 }, { "epoch": 1.3694635600661615, "grad_norm": 0.5975863467060338, "learning_rate": 4.7754278930456685e-05, "loss": 12.0273, "step": 25149 }, { "epoch": 1.3695180140627445, "grad_norm": 0.5685265374387557, "learning_rate": 4.774676010905353e-05, "loss": 11.905, "step": 25150 }, { "epoch": 1.3695724680593275, "grad_norm": 0.5739833439041315, "learning_rate": 4.7739241693980366e-05, "loss": 11.9733, "step": 25151 }, { "epoch": 1.3696269220559105, "grad_norm": 0.5565029887800292, "learning_rate": 4.773172368529558e-05, "loss": 11.8743, "step": 25152 }, { "epoch": 1.3696813760524935, "grad_norm": 0.5897144964944023, "learning_rate": 4.772420608305771e-05, "loss": 12.0013, "step": 25153 }, { "epoch": 1.3697358300490767, "grad_norm": 0.5215098716775922, "learning_rate": 4.771668888732519e-05, "loss": 11.794, "step": 25154 }, { "epoch": 1.3697902840456597, "grad_norm": 0.5405015588673393, "learning_rate": 4.770917209815642e-05, "loss": 11.9028, "step": 25155 }, { "epoch": 1.3698447380422427, "grad_norm": 0.5862248229033459, "learning_rate": 4.770165571560994e-05, "loss": 11.9305, "step": 25156 }, { "epoch": 1.3698991920388257, "grad_norm": 0.5012201539882242, "learning_rate": 4.769413973974412e-05, "loss": 11.8819, "step": 25157 }, { "epoch": 1.3699536460354087, "grad_norm": 0.6094271616706446, "learning_rate": 4.768662417061743e-05, "loss": 12.0085, "step": 25158 }, { "epoch": 1.3700081000319917, "grad_norm": 0.5649822763892307, "learning_rate": 4.767910900828837e-05, "loss": 11.9952, "step": 25159 }, { "epoch": 1.3700625540285747, "grad_norm": 0.5411521046562873, "learning_rate": 4.76715942528153e-05, "loss": 11.9949, "step": 25160 }, { "epoch": 1.3701170080251577, "grad_norm": 0.5488463848978068, "learning_rate": 4.766407990425671e-05, "loss": 11.9352, "step": 25161 }, { "epoch": 1.370171462021741, "grad_norm": 0.5654767948971301, "learning_rate": 4.765656596267099e-05, "loss": 11.8449, "step": 25162 }, { "epoch": 1.3702259160183239, "grad_norm": 0.5506834463189413, "learning_rate": 4.7649052428116614e-05, "loss": 11.8977, "step": 25163 }, { "epoch": 1.3702803700149069, "grad_norm": 0.5271581165985557, "learning_rate": 4.7641539300651994e-05, "loss": 11.9711, "step": 25164 }, { "epoch": 1.3703348240114899, "grad_norm": 0.568354222805055, "learning_rate": 4.763402658033551e-05, "loss": 11.7845, "step": 25165 }, { "epoch": 1.3703892780080729, "grad_norm": 0.5373291371622491, "learning_rate": 4.7626514267225654e-05, "loss": 11.9826, "step": 25166 }, { "epoch": 1.3704437320046559, "grad_norm": 0.562625605928785, "learning_rate": 4.761900236138076e-05, "loss": 11.9212, "step": 25167 }, { "epoch": 1.3704981860012388, "grad_norm": 0.4849888466500203, "learning_rate": 4.7611490862859333e-05, "loss": 11.84, "step": 25168 }, { "epoch": 1.3705526399978218, "grad_norm": 0.634616885300659, "learning_rate": 4.76039797717197e-05, "loss": 11.8696, "step": 25169 }, { "epoch": 1.3706070939944048, "grad_norm": 0.654571175472763, "learning_rate": 4.75964690880203e-05, "loss": 11.9599, "step": 25170 }, { "epoch": 1.3706615479909878, "grad_norm": 0.5840094561144437, "learning_rate": 4.758895881181959e-05, "loss": 11.9393, "step": 25171 }, { "epoch": 1.3707160019875708, "grad_norm": 0.5412491234691605, "learning_rate": 4.7581448943175886e-05, "loss": 12.0059, "step": 25172 }, { "epoch": 1.3707704559841538, "grad_norm": 0.5806772752957869, "learning_rate": 4.757393948214767e-05, "loss": 11.7347, "step": 25173 }, { "epoch": 1.3708249099807368, "grad_norm": 0.5187746714760103, "learning_rate": 4.756643042879329e-05, "loss": 11.8312, "step": 25174 }, { "epoch": 1.3708793639773198, "grad_norm": 0.526855635336909, "learning_rate": 4.7558921783171095e-05, "loss": 11.8599, "step": 25175 }, { "epoch": 1.3709338179739028, "grad_norm": 0.5579671947534771, "learning_rate": 4.755141354533956e-05, "loss": 11.8686, "step": 25176 }, { "epoch": 1.370988271970486, "grad_norm": 0.5205755938172167, "learning_rate": 4.7543905715356984e-05, "loss": 11.8199, "step": 25177 }, { "epoch": 1.371042725967069, "grad_norm": 0.5795151769743531, "learning_rate": 4.753639829328185e-05, "loss": 11.8959, "step": 25178 }, { "epoch": 1.371097179963652, "grad_norm": 0.5063909628788978, "learning_rate": 4.7528891279172424e-05, "loss": 11.8671, "step": 25179 }, { "epoch": 1.371151633960235, "grad_norm": 0.5889294962063523, "learning_rate": 4.752138467308714e-05, "loss": 11.9673, "step": 25180 }, { "epoch": 1.371206087956818, "grad_norm": 0.49894799623537534, "learning_rate": 4.7513878475084414e-05, "loss": 11.681, "step": 25181 }, { "epoch": 1.371260541953401, "grad_norm": 0.5264034405288752, "learning_rate": 4.750637268522254e-05, "loss": 11.7979, "step": 25182 }, { "epoch": 1.371314995949984, "grad_norm": 0.5741823667048461, "learning_rate": 4.749886730355995e-05, "loss": 11.9187, "step": 25183 }, { "epoch": 1.371369449946567, "grad_norm": 0.5478759188318844, "learning_rate": 4.7491362330154964e-05, "loss": 11.9207, "step": 25184 }, { "epoch": 1.37142390394315, "grad_norm": 0.5566758328287349, "learning_rate": 4.748385776506592e-05, "loss": 11.8836, "step": 25185 }, { "epoch": 1.3714783579397332, "grad_norm": 0.5954818195244415, "learning_rate": 4.747635360835124e-05, "loss": 11.9352, "step": 25186 }, { "epoch": 1.3715328119363162, "grad_norm": 0.5044565346058839, "learning_rate": 4.74688498600692e-05, "loss": 11.8805, "step": 25187 }, { "epoch": 1.3715872659328991, "grad_norm": 0.5042383277607629, "learning_rate": 4.7461346520278236e-05, "loss": 11.8401, "step": 25188 }, { "epoch": 1.3716417199294821, "grad_norm": 0.4840766664645052, "learning_rate": 4.745384358903662e-05, "loss": 11.9215, "step": 25189 }, { "epoch": 1.3716961739260651, "grad_norm": 0.5489749929948173, "learning_rate": 4.7446341066402754e-05, "loss": 11.908, "step": 25190 }, { "epoch": 1.3717506279226481, "grad_norm": 0.526247010486432, "learning_rate": 4.7438838952434916e-05, "loss": 11.8905, "step": 25191 }, { "epoch": 1.3718050819192311, "grad_norm": 0.6014050591841199, "learning_rate": 4.743133724719149e-05, "loss": 12.0415, "step": 25192 }, { "epoch": 1.371859535915814, "grad_norm": 0.5231203855374652, "learning_rate": 4.7423835950730834e-05, "loss": 11.8379, "step": 25193 }, { "epoch": 1.371913989912397, "grad_norm": 0.5238528836274371, "learning_rate": 4.741633506311125e-05, "loss": 12.0402, "step": 25194 }, { "epoch": 1.37196844390898, "grad_norm": 0.4942266262421094, "learning_rate": 4.740883458439106e-05, "loss": 11.8109, "step": 25195 }, { "epoch": 1.372022897905563, "grad_norm": 0.5534674871706498, "learning_rate": 4.740133451462855e-05, "loss": 11.9346, "step": 25196 }, { "epoch": 1.372077351902146, "grad_norm": 0.5036089615406288, "learning_rate": 4.739383485388209e-05, "loss": 11.8729, "step": 25197 }, { "epoch": 1.372131805898729, "grad_norm": 0.5676058382743236, "learning_rate": 4.738633560221003e-05, "loss": 11.9334, "step": 25198 }, { "epoch": 1.372186259895312, "grad_norm": 0.5308956539066537, "learning_rate": 4.7378836759670606e-05, "loss": 11.9188, "step": 25199 }, { "epoch": 1.3722407138918953, "grad_norm": 0.5227422629580815, "learning_rate": 4.737133832632221e-05, "loss": 11.892, "step": 25200 }, { "epoch": 1.3722951678884783, "grad_norm": 0.5778140275861255, "learning_rate": 4.736384030222308e-05, "loss": 11.9294, "step": 25201 }, { "epoch": 1.3723496218850613, "grad_norm": 0.600801053051945, "learning_rate": 4.7356342687431585e-05, "loss": 11.8202, "step": 25202 }, { "epoch": 1.3724040758816443, "grad_norm": 0.6657469160372627, "learning_rate": 4.734884548200597e-05, "loss": 12.0397, "step": 25203 }, { "epoch": 1.3724585298782273, "grad_norm": 0.5863296049708091, "learning_rate": 4.7341348686004596e-05, "loss": 11.8541, "step": 25204 }, { "epoch": 1.3725129838748102, "grad_norm": 0.571748109594584, "learning_rate": 4.733385229948572e-05, "loss": 11.8864, "step": 25205 }, { "epoch": 1.3725674378713932, "grad_norm": 0.5720556581573786, "learning_rate": 4.7326356322507606e-05, "loss": 11.9563, "step": 25206 }, { "epoch": 1.3726218918679762, "grad_norm": 0.6269365995903576, "learning_rate": 4.731886075512858e-05, "loss": 11.8724, "step": 25207 }, { "epoch": 1.3726763458645592, "grad_norm": 0.5166449411134307, "learning_rate": 4.7311365597406964e-05, "loss": 11.7918, "step": 25208 }, { "epoch": 1.3727307998611424, "grad_norm": 0.5363266455847857, "learning_rate": 4.7303870849400964e-05, "loss": 11.8909, "step": 25209 }, { "epoch": 1.3727852538577254, "grad_norm": 0.5368339221215567, "learning_rate": 4.729637651116895e-05, "loss": 11.9264, "step": 25210 }, { "epoch": 1.3728397078543084, "grad_norm": 0.56968156954211, "learning_rate": 4.72888825827691e-05, "loss": 11.9953, "step": 25211 }, { "epoch": 1.3728941618508914, "grad_norm": 0.5571619729912212, "learning_rate": 4.728138906425978e-05, "loss": 11.9766, "step": 25212 }, { "epoch": 1.3729486158474744, "grad_norm": 0.5685060197223936, "learning_rate": 4.7273895955699185e-05, "loss": 11.6695, "step": 25213 }, { "epoch": 1.3730030698440574, "grad_norm": 0.5598016315734076, "learning_rate": 4.726640325714565e-05, "loss": 11.7932, "step": 25214 }, { "epoch": 1.3730575238406404, "grad_norm": 0.5451234135253817, "learning_rate": 4.725891096865742e-05, "loss": 11.8479, "step": 25215 }, { "epoch": 1.3731119778372234, "grad_norm": 0.5907122942209788, "learning_rate": 4.7251419090292694e-05, "loss": 11.9886, "step": 25216 }, { "epoch": 1.3731664318338064, "grad_norm": 0.59937953453529, "learning_rate": 4.724392762210982e-05, "loss": 11.925, "step": 25217 }, { "epoch": 1.3732208858303894, "grad_norm": 0.6053097495226107, "learning_rate": 4.723643656416698e-05, "loss": 11.8532, "step": 25218 }, { "epoch": 1.3732753398269724, "grad_norm": 0.5266506477173108, "learning_rate": 4.722894591652244e-05, "loss": 11.8468, "step": 25219 }, { "epoch": 1.3733297938235554, "grad_norm": 0.5443021348448976, "learning_rate": 4.722145567923452e-05, "loss": 11.8475, "step": 25220 }, { "epoch": 1.3733842478201383, "grad_norm": 0.5543951736863234, "learning_rate": 4.7213965852361364e-05, "loss": 11.8765, "step": 25221 }, { "epoch": 1.3734387018167213, "grad_norm": 0.6131721022895514, "learning_rate": 4.72064764359613e-05, "loss": 11.8663, "step": 25222 }, { "epoch": 1.3734931558133046, "grad_norm": 0.5172387160024928, "learning_rate": 4.71989874300925e-05, "loss": 11.9415, "step": 25223 }, { "epoch": 1.3735476098098875, "grad_norm": 0.5210427710252998, "learning_rate": 4.719149883481326e-05, "loss": 11.9812, "step": 25224 }, { "epoch": 1.3736020638064705, "grad_norm": 0.5658738284479276, "learning_rate": 4.718401065018179e-05, "loss": 11.9707, "step": 25225 }, { "epoch": 1.3736565178030535, "grad_norm": 0.6153394309268462, "learning_rate": 4.717652287625626e-05, "loss": 11.9621, "step": 25226 }, { "epoch": 1.3737109717996365, "grad_norm": 0.50846970213708, "learning_rate": 4.716903551309498e-05, "loss": 11.8379, "step": 25227 }, { "epoch": 1.3737654257962195, "grad_norm": 0.555380332133304, "learning_rate": 4.7161548560756116e-05, "loss": 11.9199, "step": 25228 }, { "epoch": 1.3738198797928025, "grad_norm": 0.5489522499044746, "learning_rate": 4.71540620192979e-05, "loss": 11.8861, "step": 25229 }, { "epoch": 1.3738743337893855, "grad_norm": 0.552731690999242, "learning_rate": 4.714657588877861e-05, "loss": 11.9407, "step": 25230 }, { "epoch": 1.3739287877859685, "grad_norm": 0.6860486499702981, "learning_rate": 4.713909016925637e-05, "loss": 12.0791, "step": 25231 }, { "epoch": 1.3739832417825517, "grad_norm": 0.5805692979565028, "learning_rate": 4.713160486078946e-05, "loss": 11.894, "step": 25232 }, { "epoch": 1.3740376957791347, "grad_norm": 0.5302705322568467, "learning_rate": 4.7124119963436034e-05, "loss": 11.8835, "step": 25233 }, { "epoch": 1.3740921497757177, "grad_norm": 0.5372320335109962, "learning_rate": 4.7116635477254336e-05, "loss": 11.9224, "step": 25234 }, { "epoch": 1.3741466037723007, "grad_norm": 0.6281229387489039, "learning_rate": 4.710915140230255e-05, "loss": 11.8337, "step": 25235 }, { "epoch": 1.3742010577688837, "grad_norm": 0.6253712785976908, "learning_rate": 4.710166773863885e-05, "loss": 11.819, "step": 25236 }, { "epoch": 1.3742555117654667, "grad_norm": 0.5015754396671326, "learning_rate": 4.7094184486321476e-05, "loss": 11.97, "step": 25237 }, { "epoch": 1.3743099657620497, "grad_norm": 0.5246357859823589, "learning_rate": 4.708670164540857e-05, "loss": 11.921, "step": 25238 }, { "epoch": 1.3743644197586327, "grad_norm": 0.5362420472950458, "learning_rate": 4.707921921595838e-05, "loss": 11.9285, "step": 25239 }, { "epoch": 1.3744188737552157, "grad_norm": 0.5114109675130338, "learning_rate": 4.707173719802902e-05, "loss": 11.9729, "step": 25240 }, { "epoch": 1.3744733277517986, "grad_norm": 0.5685769139154091, "learning_rate": 4.70642555916787e-05, "loss": 12.0068, "step": 25241 }, { "epoch": 1.3745277817483816, "grad_norm": 0.5272460377539192, "learning_rate": 4.705677439696565e-05, "loss": 11.8695, "step": 25242 }, { "epoch": 1.3745822357449646, "grad_norm": 0.5174468059830823, "learning_rate": 4.704929361394795e-05, "loss": 11.8628, "step": 25243 }, { "epoch": 1.3746366897415476, "grad_norm": 0.5454333648435192, "learning_rate": 4.7041813242683874e-05, "loss": 11.8649, "step": 25244 }, { "epoch": 1.3746911437381306, "grad_norm": 0.5325871754894, "learning_rate": 4.703433328323155e-05, "loss": 11.9209, "step": 25245 }, { "epoch": 1.3747455977347136, "grad_norm": 0.5632304771927739, "learning_rate": 4.702685373564907e-05, "loss": 11.95, "step": 25246 }, { "epoch": 1.3748000517312968, "grad_norm": 0.5144207844755943, "learning_rate": 4.701937459999471e-05, "loss": 11.9093, "step": 25247 }, { "epoch": 1.3748545057278798, "grad_norm": 0.5334950450649695, "learning_rate": 4.701189587632654e-05, "loss": 11.9731, "step": 25248 }, { "epoch": 1.3749089597244628, "grad_norm": 0.5165965965441954, "learning_rate": 4.7004417564702785e-05, "loss": 11.7983, "step": 25249 }, { "epoch": 1.3749634137210458, "grad_norm": 0.5635104825476014, "learning_rate": 4.699693966518154e-05, "loss": 11.9499, "step": 25250 }, { "epoch": 1.3750178677176288, "grad_norm": 0.5991284988665184, "learning_rate": 4.698946217782101e-05, "loss": 11.8144, "step": 25251 }, { "epoch": 1.3750723217142118, "grad_norm": 0.5224801381198569, "learning_rate": 4.698198510267928e-05, "loss": 11.6778, "step": 25252 }, { "epoch": 1.3751267757107948, "grad_norm": 0.5569192678137207, "learning_rate": 4.6974508439814523e-05, "loss": 11.8938, "step": 25253 }, { "epoch": 1.3751812297073778, "grad_norm": 0.6074234566892095, "learning_rate": 4.6967032189284955e-05, "loss": 11.9493, "step": 25254 }, { "epoch": 1.3752356837039608, "grad_norm": 0.5195477621291372, "learning_rate": 4.695955635114856e-05, "loss": 11.9062, "step": 25255 }, { "epoch": 1.375290137700544, "grad_norm": 0.5788947863090088, "learning_rate": 4.695208092546355e-05, "loss": 11.9226, "step": 25256 }, { "epoch": 1.375344591697127, "grad_norm": 0.5659844363225056, "learning_rate": 4.69446059122881e-05, "loss": 11.6769, "step": 25257 }, { "epoch": 1.37539904569371, "grad_norm": 0.5824028162796828, "learning_rate": 4.693713131168024e-05, "loss": 11.9505, "step": 25258 }, { "epoch": 1.375453499690293, "grad_norm": 0.581703679716766, "learning_rate": 4.69296571236982e-05, "loss": 11.8626, "step": 25259 }, { "epoch": 1.375507953686876, "grad_norm": 0.5528773091074491, "learning_rate": 4.6922183348399996e-05, "loss": 11.8795, "step": 25260 }, { "epoch": 1.375562407683459, "grad_norm": 0.5539793709418636, "learning_rate": 4.6914709985843844e-05, "loss": 12.0209, "step": 25261 }, { "epoch": 1.375616861680042, "grad_norm": 0.557919650522969, "learning_rate": 4.6907237036087756e-05, "loss": 11.9375, "step": 25262 }, { "epoch": 1.375671315676625, "grad_norm": 0.5279663707755494, "learning_rate": 4.689976449918991e-05, "loss": 11.8639, "step": 25263 }, { "epoch": 1.375725769673208, "grad_norm": 0.524774750805883, "learning_rate": 4.6892292375208467e-05, "loss": 11.8669, "step": 25264 }, { "epoch": 1.375780223669791, "grad_norm": 0.5125114864252607, "learning_rate": 4.6884820664201404e-05, "loss": 11.7979, "step": 25265 }, { "epoch": 1.375834677666374, "grad_norm": 0.5581420293347898, "learning_rate": 4.6877349366226906e-05, "loss": 11.7847, "step": 25266 }, { "epoch": 1.375889131662957, "grad_norm": 0.5485033093475268, "learning_rate": 4.686987848134301e-05, "loss": 11.9629, "step": 25267 }, { "epoch": 1.37594358565954, "grad_norm": 0.5842899444584857, "learning_rate": 4.686240800960786e-05, "loss": 11.8865, "step": 25268 }, { "epoch": 1.3759980396561229, "grad_norm": 0.5167122307149978, "learning_rate": 4.6854937951079566e-05, "loss": 11.9847, "step": 25269 }, { "epoch": 1.376052493652706, "grad_norm": 0.5384273414088226, "learning_rate": 4.6847468305816144e-05, "loss": 11.9743, "step": 25270 }, { "epoch": 1.376106947649289, "grad_norm": 0.537810582793952, "learning_rate": 4.683999907387577e-05, "loss": 11.9524, "step": 25271 }, { "epoch": 1.376161401645872, "grad_norm": 0.5046327608039041, "learning_rate": 4.683253025531644e-05, "loss": 11.8534, "step": 25272 }, { "epoch": 1.376215855642455, "grad_norm": 0.529295704030328, "learning_rate": 4.6825061850196304e-05, "loss": 11.8482, "step": 25273 }, { "epoch": 1.376270309639038, "grad_norm": 0.6293232892194328, "learning_rate": 4.68175938585734e-05, "loss": 12.0959, "step": 25274 }, { "epoch": 1.376324763635621, "grad_norm": 0.5299306994355352, "learning_rate": 4.681012628050578e-05, "loss": 11.8061, "step": 25275 }, { "epoch": 1.376379217632204, "grad_norm": 0.5470687913712922, "learning_rate": 4.680265911605157e-05, "loss": 11.961, "step": 25276 }, { "epoch": 1.376433671628787, "grad_norm": 0.6534864760950344, "learning_rate": 4.679519236526877e-05, "loss": 11.8893, "step": 25277 }, { "epoch": 1.37648812562537, "grad_norm": 0.5708609657460811, "learning_rate": 4.678772602821547e-05, "loss": 11.8014, "step": 25278 }, { "epoch": 1.3765425796219533, "grad_norm": 0.515604461607012, "learning_rate": 4.678026010494977e-05, "loss": 11.9886, "step": 25279 }, { "epoch": 1.3765970336185362, "grad_norm": 0.5606195302778902, "learning_rate": 4.6772794595529665e-05, "loss": 11.8297, "step": 25280 }, { "epoch": 1.3766514876151192, "grad_norm": 0.6045793687423382, "learning_rate": 4.676532950001327e-05, "loss": 11.8623, "step": 25281 }, { "epoch": 1.3767059416117022, "grad_norm": 0.5499582869822852, "learning_rate": 4.6757864818458565e-05, "loss": 11.9649, "step": 25282 }, { "epoch": 1.3767603956082852, "grad_norm": 0.586820199174203, "learning_rate": 4.675040055092366e-05, "loss": 11.8915, "step": 25283 }, { "epoch": 1.3768148496048682, "grad_norm": 0.5161861001214637, "learning_rate": 4.6742936697466574e-05, "loss": 11.8473, "step": 25284 }, { "epoch": 1.3768693036014512, "grad_norm": 0.5552569018298428, "learning_rate": 4.673547325814531e-05, "loss": 11.8344, "step": 25285 }, { "epoch": 1.3769237575980342, "grad_norm": 0.5176177753705812, "learning_rate": 4.672801023301797e-05, "loss": 11.9716, "step": 25286 }, { "epoch": 1.3769782115946172, "grad_norm": 0.530667070775309, "learning_rate": 4.672054762214253e-05, "loss": 11.8369, "step": 25287 }, { "epoch": 1.3770326655912002, "grad_norm": 0.5371237937745807, "learning_rate": 4.671308542557707e-05, "loss": 11.8416, "step": 25288 }, { "epoch": 1.3770871195877832, "grad_norm": 0.5502273005808457, "learning_rate": 4.670562364337957e-05, "loss": 11.7173, "step": 25289 }, { "epoch": 1.3771415735843662, "grad_norm": 0.5168596563174641, "learning_rate": 4.669816227560807e-05, "loss": 11.8472, "step": 25290 }, { "epoch": 1.3771960275809492, "grad_norm": 0.5011662809415851, "learning_rate": 4.669070132232063e-05, "loss": 11.9003, "step": 25291 }, { "epoch": 1.3772504815775322, "grad_norm": 0.4688201579640735, "learning_rate": 4.668324078357521e-05, "loss": 11.8583, "step": 25292 }, { "epoch": 1.3773049355741154, "grad_norm": 0.5398657184161583, "learning_rate": 4.667578065942989e-05, "loss": 11.8276, "step": 25293 }, { "epoch": 1.3773593895706984, "grad_norm": 0.5140614422749251, "learning_rate": 4.666832094994259e-05, "loss": 11.7812, "step": 25294 }, { "epoch": 1.3774138435672814, "grad_norm": 0.5605245670159629, "learning_rate": 4.666086165517142e-05, "loss": 11.984, "step": 25295 }, { "epoch": 1.3774682975638644, "grad_norm": 0.5837910327272393, "learning_rate": 4.665340277517434e-05, "loss": 11.8923, "step": 25296 }, { "epoch": 1.3775227515604473, "grad_norm": 0.5181840213998935, "learning_rate": 4.6645944310009295e-05, "loss": 11.8295, "step": 25297 }, { "epoch": 1.3775772055570303, "grad_norm": 0.5549549871853204, "learning_rate": 4.663848625973438e-05, "loss": 11.8531, "step": 25298 }, { "epoch": 1.3776316595536133, "grad_norm": 0.5703559806526367, "learning_rate": 4.66310286244075e-05, "loss": 11.8954, "step": 25299 }, { "epoch": 1.3776861135501963, "grad_norm": 0.5370731923959247, "learning_rate": 4.662357140408673e-05, "loss": 11.915, "step": 25300 }, { "epoch": 1.3777405675467793, "grad_norm": 0.544693483049611, "learning_rate": 4.6616114598829994e-05, "loss": 11.8466, "step": 25301 }, { "epoch": 1.3777950215433625, "grad_norm": 0.5423966923917084, "learning_rate": 4.660865820869529e-05, "loss": 11.8965, "step": 25302 }, { "epoch": 1.3778494755399455, "grad_norm": 0.6431719711641443, "learning_rate": 4.660120223374066e-05, "loss": 11.924, "step": 25303 }, { "epoch": 1.3779039295365285, "grad_norm": 0.5159256627432793, "learning_rate": 4.6593746674023994e-05, "loss": 11.8755, "step": 25304 }, { "epoch": 1.3779583835331115, "grad_norm": 0.5051576987891798, "learning_rate": 4.658629152960335e-05, "loss": 11.8992, "step": 25305 }, { "epoch": 1.3780128375296945, "grad_norm": 0.5555260791708165, "learning_rate": 4.657883680053666e-05, "loss": 11.9193, "step": 25306 }, { "epoch": 1.3780672915262775, "grad_norm": 0.605734829380128, "learning_rate": 4.657138248688185e-05, "loss": 11.8852, "step": 25307 }, { "epoch": 1.3781217455228605, "grad_norm": 0.5981299656884339, "learning_rate": 4.656392858869698e-05, "loss": 11.737, "step": 25308 }, { "epoch": 1.3781761995194435, "grad_norm": 0.5605431575308834, "learning_rate": 4.655647510603991e-05, "loss": 11.8532, "step": 25309 }, { "epoch": 1.3782306535160265, "grad_norm": 0.6098572930186241, "learning_rate": 4.6549022038968704e-05, "loss": 11.8812, "step": 25310 }, { "epoch": 1.3782851075126095, "grad_norm": 0.5475993808226356, "learning_rate": 4.654156938754122e-05, "loss": 11.8025, "step": 25311 }, { "epoch": 1.3783395615091925, "grad_norm": 0.6035442108574312, "learning_rate": 4.653411715181546e-05, "loss": 11.8948, "step": 25312 }, { "epoch": 1.3783940155057754, "grad_norm": 0.6582446198534526, "learning_rate": 4.65266653318494e-05, "loss": 11.9297, "step": 25313 }, { "epoch": 1.3784484695023584, "grad_norm": 0.5647195182100125, "learning_rate": 4.651921392770093e-05, "loss": 11.7704, "step": 25314 }, { "epoch": 1.3785029234989414, "grad_norm": 0.5096170369971617, "learning_rate": 4.651176293942811e-05, "loss": 11.884, "step": 25315 }, { "epoch": 1.3785573774955244, "grad_norm": 0.5158635753513805, "learning_rate": 4.65043123670887e-05, "loss": 11.9498, "step": 25316 }, { "epoch": 1.3786118314921076, "grad_norm": 0.5335481980621415, "learning_rate": 4.649686221074072e-05, "loss": 11.9132, "step": 25317 }, { "epoch": 1.3786662854886906, "grad_norm": 0.6399638169629862, "learning_rate": 4.648941247044216e-05, "loss": 11.9258, "step": 25318 }, { "epoch": 1.3787207394852736, "grad_norm": 0.6079666643576195, "learning_rate": 4.648196314625086e-05, "loss": 11.9187, "step": 25319 }, { "epoch": 1.3787751934818566, "grad_norm": 0.5271068845853558, "learning_rate": 4.647451423822484e-05, "loss": 11.8874, "step": 25320 }, { "epoch": 1.3788296474784396, "grad_norm": 0.5557132555366495, "learning_rate": 4.6467065746421925e-05, "loss": 11.8037, "step": 25321 }, { "epoch": 1.3788841014750226, "grad_norm": 0.6228311442435652, "learning_rate": 4.645961767090012e-05, "loss": 12.0277, "step": 25322 }, { "epoch": 1.3789385554716056, "grad_norm": 0.5514131563069321, "learning_rate": 4.645217001171728e-05, "loss": 11.9728, "step": 25323 }, { "epoch": 1.3789930094681886, "grad_norm": 0.49910688224173033, "learning_rate": 4.644472276893134e-05, "loss": 11.8547, "step": 25324 }, { "epoch": 1.3790474634647718, "grad_norm": 0.5045112355743566, "learning_rate": 4.643727594260029e-05, "loss": 11.9359, "step": 25325 }, { "epoch": 1.3791019174613548, "grad_norm": 0.5641923857059223, "learning_rate": 4.642982953278189e-05, "loss": 12.0442, "step": 25326 }, { "epoch": 1.3791563714579378, "grad_norm": 0.564463834160809, "learning_rate": 4.642238353953412e-05, "loss": 11.8735, "step": 25327 }, { "epoch": 1.3792108254545208, "grad_norm": 0.570900474626188, "learning_rate": 4.641493796291492e-05, "loss": 11.9399, "step": 25328 }, { "epoch": 1.3792652794511038, "grad_norm": 0.5315766014289637, "learning_rate": 4.64074928029821e-05, "loss": 11.8108, "step": 25329 }, { "epoch": 1.3793197334476868, "grad_norm": 0.5351653281513066, "learning_rate": 4.6400048059793656e-05, "loss": 11.9468, "step": 25330 }, { "epoch": 1.3793741874442698, "grad_norm": 0.5941142858822028, "learning_rate": 4.639260373340738e-05, "loss": 11.8205, "step": 25331 }, { "epoch": 1.3794286414408528, "grad_norm": 0.5827751609890155, "learning_rate": 4.638515982388125e-05, "loss": 11.8947, "step": 25332 }, { "epoch": 1.3794830954374357, "grad_norm": 0.5521555797331066, "learning_rate": 4.6377716331273066e-05, "loss": 11.909, "step": 25333 }, { "epoch": 1.3795375494340187, "grad_norm": 0.5263839897161878, "learning_rate": 4.637027325564076e-05, "loss": 11.8809, "step": 25334 }, { "epoch": 1.3795920034306017, "grad_norm": 0.5406684908264028, "learning_rate": 4.636283059704227e-05, "loss": 11.9414, "step": 25335 }, { "epoch": 1.3796464574271847, "grad_norm": 0.5338849962676493, "learning_rate": 4.635538835553533e-05, "loss": 11.8371, "step": 25336 }, { "epoch": 1.3797009114237677, "grad_norm": 0.5699664733897476, "learning_rate": 4.6347946531177935e-05, "loss": 11.9682, "step": 25337 }, { "epoch": 1.3797553654203507, "grad_norm": 0.5638315708518855, "learning_rate": 4.634050512402786e-05, "loss": 11.8624, "step": 25338 }, { "epoch": 1.3798098194169337, "grad_norm": 0.502264483104613, "learning_rate": 4.633306413414301e-05, "loss": 11.8048, "step": 25339 }, { "epoch": 1.379864273413517, "grad_norm": 0.5765177970339285, "learning_rate": 4.63256235615813e-05, "loss": 11.8838, "step": 25340 }, { "epoch": 1.3799187274101, "grad_norm": 0.5291928412229079, "learning_rate": 4.631818340640049e-05, "loss": 11.7628, "step": 25341 }, { "epoch": 1.379973181406683, "grad_norm": 0.5935222709828194, "learning_rate": 4.631074366865855e-05, "loss": 11.9262, "step": 25342 }, { "epoch": 1.380027635403266, "grad_norm": 0.5485942271044094, "learning_rate": 4.630330434841321e-05, "loss": 11.8891, "step": 25343 }, { "epoch": 1.3800820893998489, "grad_norm": 0.6240681136806207, "learning_rate": 4.629586544572243e-05, "loss": 12.0713, "step": 25344 }, { "epoch": 1.3801365433964319, "grad_norm": 0.5251818277570874, "learning_rate": 4.6288426960644006e-05, "loss": 11.9282, "step": 25345 }, { "epoch": 1.3801909973930149, "grad_norm": 0.6202449369655912, "learning_rate": 4.628098889323574e-05, "loss": 12.031, "step": 25346 }, { "epoch": 1.3802454513895979, "grad_norm": 0.5865949498063342, "learning_rate": 4.627355124355556e-05, "loss": 11.914, "step": 25347 }, { "epoch": 1.3802999053861809, "grad_norm": 0.6780010259118175, "learning_rate": 4.626611401166121e-05, "loss": 11.8229, "step": 25348 }, { "epoch": 1.380354359382764, "grad_norm": 0.5327655092422529, "learning_rate": 4.62586771976106e-05, "loss": 11.9316, "step": 25349 }, { "epoch": 1.380408813379347, "grad_norm": 0.6142761650187462, "learning_rate": 4.62512408014615e-05, "loss": 11.9944, "step": 25350 }, { "epoch": 1.38046326737593, "grad_norm": 0.5161875552161428, "learning_rate": 4.6243804823271766e-05, "loss": 11.8983, "step": 25351 }, { "epoch": 1.380517721372513, "grad_norm": 0.5663489760836916, "learning_rate": 4.6236369263099254e-05, "loss": 11.8626, "step": 25352 }, { "epoch": 1.380572175369096, "grad_norm": 0.5621841637946295, "learning_rate": 4.622893412100171e-05, "loss": 12.046, "step": 25353 }, { "epoch": 1.380626629365679, "grad_norm": 0.5500645122406784, "learning_rate": 4.622149939703704e-05, "loss": 11.7661, "step": 25354 }, { "epoch": 1.380681083362262, "grad_norm": 0.5699335645189085, "learning_rate": 4.6214065091263e-05, "loss": 11.8789, "step": 25355 }, { "epoch": 1.380735537358845, "grad_norm": 0.5879388378839406, "learning_rate": 4.620663120373738e-05, "loss": 11.9825, "step": 25356 }, { "epoch": 1.380789991355428, "grad_norm": 0.6442632725935946, "learning_rate": 4.619919773451805e-05, "loss": 11.9544, "step": 25357 }, { "epoch": 1.380844445352011, "grad_norm": 0.5388417844431643, "learning_rate": 4.6191764683662744e-05, "loss": 11.9933, "step": 25358 }, { "epoch": 1.380898899348594, "grad_norm": 0.5419684114350882, "learning_rate": 4.618433205122933e-05, "loss": 11.987, "step": 25359 }, { "epoch": 1.380953353345177, "grad_norm": 0.5144860434373913, "learning_rate": 4.617689983727555e-05, "loss": 11.9195, "step": 25360 }, { "epoch": 1.38100780734176, "grad_norm": 0.5200277033184444, "learning_rate": 4.616946804185921e-05, "loss": 11.9442, "step": 25361 }, { "epoch": 1.381062261338343, "grad_norm": 0.5905218274944044, "learning_rate": 4.6162036665038155e-05, "loss": 11.8349, "step": 25362 }, { "epoch": 1.3811167153349262, "grad_norm": 0.5034152148165404, "learning_rate": 4.61546057068701e-05, "loss": 11.8981, "step": 25363 }, { "epoch": 1.3811711693315092, "grad_norm": 0.5545798446381608, "learning_rate": 4.614717516741289e-05, "loss": 11.9117, "step": 25364 }, { "epoch": 1.3812256233280922, "grad_norm": 0.5808052724832219, "learning_rate": 4.6139745046724294e-05, "loss": 11.8981, "step": 25365 }, { "epoch": 1.3812800773246752, "grad_norm": 0.5312349226885734, "learning_rate": 4.613231534486202e-05, "loss": 11.8773, "step": 25366 }, { "epoch": 1.3813345313212582, "grad_norm": 0.5482902407992518, "learning_rate": 4.6124886061883934e-05, "loss": 11.963, "step": 25367 }, { "epoch": 1.3813889853178412, "grad_norm": 0.5904775818451609, "learning_rate": 4.6117457197847736e-05, "loss": 11.9922, "step": 25368 }, { "epoch": 1.3814434393144241, "grad_norm": 0.5438179334951186, "learning_rate": 4.6110028752811266e-05, "loss": 11.934, "step": 25369 }, { "epoch": 1.3814978933110071, "grad_norm": 0.5615586565282236, "learning_rate": 4.6102600726832204e-05, "loss": 11.8404, "step": 25370 }, { "epoch": 1.3815523473075901, "grad_norm": 0.5757624633721171, "learning_rate": 4.609517311996839e-05, "loss": 11.9536, "step": 25371 }, { "epoch": 1.3816068013041733, "grad_norm": 0.5315687733262756, "learning_rate": 4.608774593227753e-05, "loss": 11.7931, "step": 25372 }, { "epoch": 1.3816612553007563, "grad_norm": 0.510926070633032, "learning_rate": 4.608031916381739e-05, "loss": 11.7279, "step": 25373 }, { "epoch": 1.3817157092973393, "grad_norm": 0.5413068769685286, "learning_rate": 4.607289281464578e-05, "loss": 11.9082, "step": 25374 }, { "epoch": 1.3817701632939223, "grad_norm": 0.5720049337632385, "learning_rate": 4.6065466884820376e-05, "loss": 11.8959, "step": 25375 }, { "epoch": 1.3818246172905053, "grad_norm": 0.5415679879386224, "learning_rate": 4.605804137439892e-05, "loss": 11.702, "step": 25376 }, { "epoch": 1.3818790712870883, "grad_norm": 0.519743435854354, "learning_rate": 4.605061628343922e-05, "loss": 11.9134, "step": 25377 }, { "epoch": 1.3819335252836713, "grad_norm": 0.5362190930944459, "learning_rate": 4.604319161199894e-05, "loss": 11.9347, "step": 25378 }, { "epoch": 1.3819879792802543, "grad_norm": 0.5978464665510768, "learning_rate": 4.603576736013587e-05, "loss": 11.8299, "step": 25379 }, { "epoch": 1.3820424332768373, "grad_norm": 0.601051854487191, "learning_rate": 4.60283435279077e-05, "loss": 11.8637, "step": 25380 }, { "epoch": 1.3820968872734203, "grad_norm": 0.5364435772542022, "learning_rate": 4.602092011537222e-05, "loss": 11.9527, "step": 25381 }, { "epoch": 1.3821513412700033, "grad_norm": 0.5385852669288471, "learning_rate": 4.601349712258708e-05, "loss": 11.8286, "step": 25382 }, { "epoch": 1.3822057952665863, "grad_norm": 0.5261441609401453, "learning_rate": 4.600607454961004e-05, "loss": 11.8859, "step": 25383 }, { "epoch": 1.3822602492631693, "grad_norm": 0.5543163456264253, "learning_rate": 4.599865239649885e-05, "loss": 11.8436, "step": 25384 }, { "epoch": 1.3823147032597523, "grad_norm": 0.5696395854086732, "learning_rate": 4.59912306633112e-05, "loss": 11.8876, "step": 25385 }, { "epoch": 1.3823691572563352, "grad_norm": 0.5372364461671898, "learning_rate": 4.59838093501048e-05, "loss": 11.7971, "step": 25386 }, { "epoch": 1.3824236112529185, "grad_norm": 0.5336524515688087, "learning_rate": 4.597638845693733e-05, "loss": 11.9275, "step": 25387 }, { "epoch": 1.3824780652495015, "grad_norm": 0.5475560823657954, "learning_rate": 4.5968967983866495e-05, "loss": 11.8402, "step": 25388 }, { "epoch": 1.3825325192460844, "grad_norm": 0.5369080152016062, "learning_rate": 4.5961547930950086e-05, "loss": 11.911, "step": 25389 }, { "epoch": 1.3825869732426674, "grad_norm": 0.5314198399045027, "learning_rate": 4.595412829824569e-05, "loss": 11.819, "step": 25390 }, { "epoch": 1.3826414272392504, "grad_norm": 0.5615156119415065, "learning_rate": 4.59467090858111e-05, "loss": 11.8952, "step": 25391 }, { "epoch": 1.3826958812358334, "grad_norm": 0.49312984592688724, "learning_rate": 4.5939290293703926e-05, "loss": 11.7849, "step": 25392 }, { "epoch": 1.3827503352324164, "grad_norm": 0.5322878051254277, "learning_rate": 4.593187192198195e-05, "loss": 11.9094, "step": 25393 }, { "epoch": 1.3828047892289994, "grad_norm": 0.5838399952260754, "learning_rate": 4.5924453970702755e-05, "loss": 11.9369, "step": 25394 }, { "epoch": 1.3828592432255826, "grad_norm": 0.5053029548424226, "learning_rate": 4.591703643992411e-05, "loss": 11.7684, "step": 25395 }, { "epoch": 1.3829136972221656, "grad_norm": 0.5615231519740667, "learning_rate": 4.5909619329703655e-05, "loss": 11.9489, "step": 25396 }, { "epoch": 1.3829681512187486, "grad_norm": 0.5421039509312398, "learning_rate": 4.590220264009903e-05, "loss": 11.9166, "step": 25397 }, { "epoch": 1.3830226052153316, "grad_norm": 0.5251245200652788, "learning_rate": 4.589478637116801e-05, "loss": 11.7721, "step": 25398 }, { "epoch": 1.3830770592119146, "grad_norm": 0.518286402281066, "learning_rate": 4.588737052296815e-05, "loss": 11.8004, "step": 25399 }, { "epoch": 1.3831315132084976, "grad_norm": 0.5281743691607864, "learning_rate": 4.587995509555717e-05, "loss": 11.8995, "step": 25400 }, { "epoch": 1.3831859672050806, "grad_norm": 0.5350429109521715, "learning_rate": 4.587254008899278e-05, "loss": 11.805, "step": 25401 }, { "epoch": 1.3832404212016636, "grad_norm": 0.5528001096401002, "learning_rate": 4.586512550333255e-05, "loss": 11.9221, "step": 25402 }, { "epoch": 1.3832948751982466, "grad_norm": 0.7356239817084284, "learning_rate": 4.5857711338634235e-05, "loss": 11.975, "step": 25403 }, { "epoch": 1.3833493291948296, "grad_norm": 0.5268355168398967, "learning_rate": 4.585029759495538e-05, "loss": 11.951, "step": 25404 }, { "epoch": 1.3834037831914126, "grad_norm": 0.5754927109925986, "learning_rate": 4.5842884272353745e-05, "loss": 12.0442, "step": 25405 }, { "epoch": 1.3834582371879955, "grad_norm": 0.5323192851534655, "learning_rate": 4.583547137088692e-05, "loss": 11.9776, "step": 25406 }, { "epoch": 1.3835126911845785, "grad_norm": 0.6187177938805167, "learning_rate": 4.5828058890612516e-05, "loss": 11.9483, "step": 25407 }, { "epoch": 1.3835671451811615, "grad_norm": 0.5396065121654304, "learning_rate": 4.582064683158823e-05, "loss": 11.8958, "step": 25408 }, { "epoch": 1.3836215991777445, "grad_norm": 0.5310752884032458, "learning_rate": 4.5813235193871665e-05, "loss": 11.8103, "step": 25409 }, { "epoch": 1.3836760531743277, "grad_norm": 0.5781549138458341, "learning_rate": 4.580582397752046e-05, "loss": 12.0116, "step": 25410 }, { "epoch": 1.3837305071709107, "grad_norm": 0.5857054738959128, "learning_rate": 4.5798413182592305e-05, "loss": 11.7495, "step": 25411 }, { "epoch": 1.3837849611674937, "grad_norm": 0.5771792869696822, "learning_rate": 4.5791002809144724e-05, "loss": 11.9045, "step": 25412 }, { "epoch": 1.3838394151640767, "grad_norm": 0.5173491338387444, "learning_rate": 4.5783592857235444e-05, "loss": 11.8329, "step": 25413 }, { "epoch": 1.3838938691606597, "grad_norm": 0.6159445884622708, "learning_rate": 4.577618332692199e-05, "loss": 12.0177, "step": 25414 }, { "epoch": 1.3839483231572427, "grad_norm": 0.6027146149136118, "learning_rate": 4.576877421826208e-05, "loss": 11.9161, "step": 25415 }, { "epoch": 1.3840027771538257, "grad_norm": 0.5776180683335476, "learning_rate": 4.576136553131327e-05, "loss": 11.8918, "step": 25416 }, { "epoch": 1.3840572311504087, "grad_norm": 0.5812378036134579, "learning_rate": 4.575395726613314e-05, "loss": 11.9817, "step": 25417 }, { "epoch": 1.3841116851469917, "grad_norm": 0.5221685414790718, "learning_rate": 4.574654942277937e-05, "loss": 11.901, "step": 25418 }, { "epoch": 1.384166139143575, "grad_norm": 0.5835118401881975, "learning_rate": 4.57391420013095e-05, "loss": 11.9185, "step": 25419 }, { "epoch": 1.3842205931401579, "grad_norm": 0.5535971253639945, "learning_rate": 4.573173500178119e-05, "loss": 11.8291, "step": 25420 }, { "epoch": 1.3842750471367409, "grad_norm": 0.5632179748324755, "learning_rate": 4.5724328424251985e-05, "loss": 11.932, "step": 25421 }, { "epoch": 1.3843295011333239, "grad_norm": 0.565666884742778, "learning_rate": 4.5716922268779495e-05, "loss": 11.8911, "step": 25422 }, { "epoch": 1.3843839551299069, "grad_norm": 0.567112815335102, "learning_rate": 4.570951653542136e-05, "loss": 12.0588, "step": 25423 }, { "epoch": 1.3844384091264899, "grad_norm": 0.5168100326162838, "learning_rate": 4.570211122423509e-05, "loss": 11.8124, "step": 25424 }, { "epoch": 1.3844928631230728, "grad_norm": 0.4863290188853976, "learning_rate": 4.5694706335278346e-05, "loss": 11.8577, "step": 25425 }, { "epoch": 1.3845473171196558, "grad_norm": 0.6330527557283268, "learning_rate": 4.568730186860867e-05, "loss": 12.1193, "step": 25426 }, { "epoch": 1.3846017711162388, "grad_norm": 0.5254447852363948, "learning_rate": 4.5679897824283615e-05, "loss": 11.9341, "step": 25427 }, { "epoch": 1.3846562251128218, "grad_norm": 0.6192901503995081, "learning_rate": 4.56724942023608e-05, "loss": 11.846, "step": 25428 }, { "epoch": 1.3847106791094048, "grad_norm": 0.5601019083569937, "learning_rate": 4.566509100289777e-05, "loss": 12.0017, "step": 25429 }, { "epoch": 1.3847651331059878, "grad_norm": 0.6557075919599537, "learning_rate": 4.565768822595213e-05, "loss": 11.9712, "step": 25430 }, { "epoch": 1.3848195871025708, "grad_norm": 0.5268907779907597, "learning_rate": 4.5650285871581376e-05, "loss": 11.873, "step": 25431 }, { "epoch": 1.3848740410991538, "grad_norm": 0.623219367007664, "learning_rate": 4.564288393984313e-05, "loss": 11.9674, "step": 25432 }, { "epoch": 1.384928495095737, "grad_norm": 0.5609954238970658, "learning_rate": 4.563548243079495e-05, "loss": 11.8812, "step": 25433 }, { "epoch": 1.38498294909232, "grad_norm": 0.5168749717472204, "learning_rate": 4.562808134449436e-05, "loss": 11.9109, "step": 25434 }, { "epoch": 1.385037403088903, "grad_norm": 0.545321160679276, "learning_rate": 4.562068068099895e-05, "loss": 11.8895, "step": 25435 }, { "epoch": 1.385091857085486, "grad_norm": 0.6123168173423389, "learning_rate": 4.561328044036625e-05, "loss": 11.9163, "step": 25436 }, { "epoch": 1.385146311082069, "grad_norm": 0.5544430228111364, "learning_rate": 4.5605880622653766e-05, "loss": 11.8053, "step": 25437 }, { "epoch": 1.385200765078652, "grad_norm": 0.5243571797546914, "learning_rate": 4.559848122791911e-05, "loss": 12.0372, "step": 25438 }, { "epoch": 1.385255219075235, "grad_norm": 0.5226354727460679, "learning_rate": 4.559108225621975e-05, "loss": 11.8838, "step": 25439 }, { "epoch": 1.385309673071818, "grad_norm": 0.5399147634701176, "learning_rate": 4.55836837076133e-05, "loss": 11.9785, "step": 25440 }, { "epoch": 1.385364127068401, "grad_norm": 0.5556679512831352, "learning_rate": 4.557628558215722e-05, "loss": 11.7897, "step": 25441 }, { "epoch": 1.3854185810649842, "grad_norm": 0.5826198731737683, "learning_rate": 4.5568887879909096e-05, "loss": 11.6895, "step": 25442 }, { "epoch": 1.3854730350615672, "grad_norm": 0.5721627565551967, "learning_rate": 4.556149060092639e-05, "loss": 11.9325, "step": 25443 }, { "epoch": 1.3855274890581502, "grad_norm": 0.627853917146858, "learning_rate": 4.555409374526668e-05, "loss": 11.8464, "step": 25444 }, { "epoch": 1.3855819430547331, "grad_norm": 0.5387585089526937, "learning_rate": 4.5546697312987484e-05, "loss": 11.9004, "step": 25445 }, { "epoch": 1.3856363970513161, "grad_norm": 0.5306238776071194, "learning_rate": 4.553930130414631e-05, "loss": 11.9283, "step": 25446 }, { "epoch": 1.3856908510478991, "grad_norm": 0.5280528107632034, "learning_rate": 4.5531905718800627e-05, "loss": 11.858, "step": 25447 }, { "epoch": 1.3857453050444821, "grad_norm": 0.5283147579328331, "learning_rate": 4.5524510557008014e-05, "loss": 11.7974, "step": 25448 }, { "epoch": 1.3857997590410651, "grad_norm": 0.5155778636904704, "learning_rate": 4.551711581882591e-05, "loss": 11.8523, "step": 25449 }, { "epoch": 1.385854213037648, "grad_norm": 0.5445239226906861, "learning_rate": 4.5509721504311877e-05, "loss": 11.909, "step": 25450 }, { "epoch": 1.385908667034231, "grad_norm": 0.5471540714237298, "learning_rate": 4.550232761352335e-05, "loss": 11.9284, "step": 25451 }, { "epoch": 1.385963121030814, "grad_norm": 0.7100869057455694, "learning_rate": 4.5494934146517906e-05, "loss": 12.0101, "step": 25452 }, { "epoch": 1.386017575027397, "grad_norm": 0.5891335356799535, "learning_rate": 4.548754110335297e-05, "loss": 11.8092, "step": 25453 }, { "epoch": 1.38607202902398, "grad_norm": 0.5924728538463313, "learning_rate": 4.548014848408607e-05, "loss": 11.883, "step": 25454 }, { "epoch": 1.386126483020563, "grad_norm": 0.5510637713760189, "learning_rate": 4.5472756288774656e-05, "loss": 11.8986, "step": 25455 }, { "epoch": 1.386180937017146, "grad_norm": 0.5799269518433228, "learning_rate": 4.5465364517476275e-05, "loss": 12.1175, "step": 25456 }, { "epoch": 1.3862353910137293, "grad_norm": 0.500171227649584, "learning_rate": 4.545797317024835e-05, "loss": 11.8863, "step": 25457 }, { "epoch": 1.3862898450103123, "grad_norm": 0.5859140594240135, "learning_rate": 4.545058224714834e-05, "loss": 11.8102, "step": 25458 }, { "epoch": 1.3863442990068953, "grad_norm": 0.5806878009529896, "learning_rate": 4.544319174823376e-05, "loss": 11.9901, "step": 25459 }, { "epoch": 1.3863987530034783, "grad_norm": 0.5696105103622016, "learning_rate": 4.5435801673562096e-05, "loss": 11.9582, "step": 25460 }, { "epoch": 1.3864532070000612, "grad_norm": 0.5470078970069159, "learning_rate": 4.542841202319076e-05, "loss": 11.8457, "step": 25461 }, { "epoch": 1.3865076609966442, "grad_norm": 0.6089801746010085, "learning_rate": 4.542102279717727e-05, "loss": 11.8616, "step": 25462 }, { "epoch": 1.3865621149932272, "grad_norm": 0.5430565653973743, "learning_rate": 4.541363399557903e-05, "loss": 11.8596, "step": 25463 }, { "epoch": 1.3866165689898102, "grad_norm": 0.5840183025092082, "learning_rate": 4.540624561845356e-05, "loss": 11.7728, "step": 25464 }, { "epoch": 1.3866710229863934, "grad_norm": 0.5835431124442011, "learning_rate": 4.5398857665858243e-05, "loss": 11.8373, "step": 25465 }, { "epoch": 1.3867254769829764, "grad_norm": 0.503293458535758, "learning_rate": 4.53914701378506e-05, "loss": 11.7757, "step": 25466 }, { "epoch": 1.3867799309795594, "grad_norm": 0.5566424047691468, "learning_rate": 4.538408303448804e-05, "loss": 11.7665, "step": 25467 }, { "epoch": 1.3868343849761424, "grad_norm": 0.6272897259346766, "learning_rate": 4.537669635582799e-05, "loss": 11.9857, "step": 25468 }, { "epoch": 1.3868888389727254, "grad_norm": 0.6602007550727755, "learning_rate": 4.5369310101927933e-05, "loss": 11.8863, "step": 25469 }, { "epoch": 1.3869432929693084, "grad_norm": 0.5598081238967012, "learning_rate": 4.5361924272845246e-05, "loss": 11.9909, "step": 25470 }, { "epoch": 1.3869977469658914, "grad_norm": 0.5666801433389889, "learning_rate": 4.5354538868637395e-05, "loss": 11.8528, "step": 25471 }, { "epoch": 1.3870522009624744, "grad_norm": 0.5043592237807832, "learning_rate": 4.534715388936186e-05, "loss": 11.9294, "step": 25472 }, { "epoch": 1.3871066549590574, "grad_norm": 0.5636143197585487, "learning_rate": 4.5339769335075986e-05, "loss": 11.9502, "step": 25473 }, { "epoch": 1.3871611089556404, "grad_norm": 0.5240660368865463, "learning_rate": 4.533238520583726e-05, "loss": 11.9469, "step": 25474 }, { "epoch": 1.3872155629522234, "grad_norm": 0.5128105974233724, "learning_rate": 4.532500150170305e-05, "loss": 11.9357, "step": 25475 }, { "epoch": 1.3872700169488064, "grad_norm": 0.5404955144886905, "learning_rate": 4.531761822273082e-05, "loss": 11.9008, "step": 25476 }, { "epoch": 1.3873244709453894, "grad_norm": 0.5251588220746668, "learning_rate": 4.531023536897797e-05, "loss": 11.8544, "step": 25477 }, { "epoch": 1.3873789249419723, "grad_norm": 0.5430300012882553, "learning_rate": 4.530285294050186e-05, "loss": 11.9175, "step": 25478 }, { "epoch": 1.3874333789385553, "grad_norm": 0.5686834287718434, "learning_rate": 4.5295470937359976e-05, "loss": 12.0042, "step": 25479 }, { "epoch": 1.3874878329351386, "grad_norm": 0.5858173784688535, "learning_rate": 4.528808935960964e-05, "loss": 11.9313, "step": 25480 }, { "epoch": 1.3875422869317215, "grad_norm": 0.5593095346896827, "learning_rate": 4.528070820730831e-05, "loss": 11.8404, "step": 25481 }, { "epoch": 1.3875967409283045, "grad_norm": 0.5063867083757851, "learning_rate": 4.5273327480513395e-05, "loss": 11.8159, "step": 25482 }, { "epoch": 1.3876511949248875, "grad_norm": 0.6199157314152283, "learning_rate": 4.526594717928223e-05, "loss": 11.986, "step": 25483 }, { "epoch": 1.3877056489214705, "grad_norm": 0.5319134109511767, "learning_rate": 4.5258567303672286e-05, "loss": 11.9429, "step": 25484 }, { "epoch": 1.3877601029180535, "grad_norm": 0.583256809519084, "learning_rate": 4.525118785374085e-05, "loss": 11.8272, "step": 25485 }, { "epoch": 1.3878145569146365, "grad_norm": 0.5252811924613185, "learning_rate": 4.52438088295454e-05, "loss": 11.9259, "step": 25486 }, { "epoch": 1.3878690109112195, "grad_norm": 0.6129171070906239, "learning_rate": 4.523643023114328e-05, "loss": 11.8242, "step": 25487 }, { "epoch": 1.3879234649078025, "grad_norm": 0.5827912163869879, "learning_rate": 4.522905205859182e-05, "loss": 12.0187, "step": 25488 }, { "epoch": 1.3879779189043857, "grad_norm": 0.48887177592055336, "learning_rate": 4.522167431194848e-05, "loss": 11.9158, "step": 25489 }, { "epoch": 1.3880323729009687, "grad_norm": 0.5907938633320445, "learning_rate": 4.521429699127054e-05, "loss": 11.9518, "step": 25490 }, { "epoch": 1.3880868268975517, "grad_norm": 0.540746337114204, "learning_rate": 4.5206920096615455e-05, "loss": 11.9923, "step": 25491 }, { "epoch": 1.3881412808941347, "grad_norm": 0.5476902998471366, "learning_rate": 4.519954362804052e-05, "loss": 11.8386, "step": 25492 }, { "epoch": 1.3881957348907177, "grad_norm": 0.5363041472656379, "learning_rate": 4.519216758560312e-05, "loss": 11.8305, "step": 25493 }, { "epoch": 1.3882501888873007, "grad_norm": 0.5791260720557126, "learning_rate": 4.518479196936064e-05, "loss": 11.9111, "step": 25494 }, { "epoch": 1.3883046428838837, "grad_norm": 0.6012232839727382, "learning_rate": 4.5177416779370386e-05, "loss": 11.9442, "step": 25495 }, { "epoch": 1.3883590968804667, "grad_norm": 0.559152603766884, "learning_rate": 4.5170042015689765e-05, "loss": 11.9554, "step": 25496 }, { "epoch": 1.3884135508770497, "grad_norm": 0.5294207091331442, "learning_rate": 4.5162667678376094e-05, "loss": 11.8791, "step": 25497 }, { "epoch": 1.3884680048736326, "grad_norm": 0.5144270454198645, "learning_rate": 4.5155293767486686e-05, "loss": 11.8537, "step": 25498 }, { "epoch": 1.3885224588702156, "grad_norm": 0.5470857206995124, "learning_rate": 4.5147920283078936e-05, "loss": 11.8058, "step": 25499 }, { "epoch": 1.3885769128667986, "grad_norm": 0.5702107730197208, "learning_rate": 4.514054722521013e-05, "loss": 11.9726, "step": 25500 }, { "epoch": 1.3886313668633816, "grad_norm": 0.5766661552214912, "learning_rate": 4.513317459393765e-05, "loss": 11.8671, "step": 25501 }, { "epoch": 1.3886858208599646, "grad_norm": 0.5540396004218782, "learning_rate": 4.5125802389318785e-05, "loss": 11.9179, "step": 25502 }, { "epoch": 1.3887402748565478, "grad_norm": 0.5613776022593168, "learning_rate": 4.511843061141091e-05, "loss": 11.9476, "step": 25503 }, { "epoch": 1.3887947288531308, "grad_norm": 0.524968666061809, "learning_rate": 4.5111059260271294e-05, "loss": 11.9534, "step": 25504 }, { "epoch": 1.3888491828497138, "grad_norm": 0.5134529876106015, "learning_rate": 4.5103688335957276e-05, "loss": 11.8218, "step": 25505 }, { "epoch": 1.3889036368462968, "grad_norm": 0.550685330143858, "learning_rate": 4.509631783852626e-05, "loss": 11.9059, "step": 25506 }, { "epoch": 1.3889580908428798, "grad_norm": 0.5379171415541388, "learning_rate": 4.508894776803542e-05, "loss": 11.9005, "step": 25507 }, { "epoch": 1.3890125448394628, "grad_norm": 0.5284340806057991, "learning_rate": 4.5081578124542126e-05, "loss": 11.952, "step": 25508 }, { "epoch": 1.3890669988360458, "grad_norm": 0.5385966255964703, "learning_rate": 4.507420890810372e-05, "loss": 11.7745, "step": 25509 }, { "epoch": 1.3891214528326288, "grad_norm": 0.513349547827117, "learning_rate": 4.506684011877744e-05, "loss": 11.8494, "step": 25510 }, { "epoch": 1.3891759068292118, "grad_norm": 0.5573192302187278, "learning_rate": 4.505947175662066e-05, "loss": 11.959, "step": 25511 }, { "epoch": 1.389230360825795, "grad_norm": 0.5856179498725235, "learning_rate": 4.505210382169062e-05, "loss": 12.0187, "step": 25512 }, { "epoch": 1.389284814822378, "grad_norm": 0.5847391135503383, "learning_rate": 4.504473631404465e-05, "loss": 11.9218, "step": 25513 }, { "epoch": 1.389339268818961, "grad_norm": 0.5553770178542953, "learning_rate": 4.503736923374e-05, "loss": 11.8982, "step": 25514 }, { "epoch": 1.389393722815544, "grad_norm": 0.5086326480257842, "learning_rate": 4.5030002580833985e-05, "loss": 11.6923, "step": 25515 }, { "epoch": 1.389448176812127, "grad_norm": 0.5409528637701126, "learning_rate": 4.5022636355383966e-05, "loss": 11.9011, "step": 25516 }, { "epoch": 1.38950263080871, "grad_norm": 0.5161787951550454, "learning_rate": 4.501527055744707e-05, "loss": 11.8725, "step": 25517 }, { "epoch": 1.389557084805293, "grad_norm": 0.5369030135149553, "learning_rate": 4.500790518708068e-05, "loss": 11.6291, "step": 25518 }, { "epoch": 1.389611538801876, "grad_norm": 0.5515006321898562, "learning_rate": 4.5000540244342015e-05, "loss": 11.858, "step": 25519 }, { "epoch": 1.389665992798459, "grad_norm": 0.6431150513188725, "learning_rate": 4.4993175729288374e-05, "loss": 11.751, "step": 25520 }, { "epoch": 1.389720446795042, "grad_norm": 0.6076053213737065, "learning_rate": 4.498581164197705e-05, "loss": 12.0461, "step": 25521 }, { "epoch": 1.389774900791625, "grad_norm": 0.46667096650716783, "learning_rate": 4.4978447982465247e-05, "loss": 11.7802, "step": 25522 }, { "epoch": 1.389829354788208, "grad_norm": 0.5114626162584534, "learning_rate": 4.49710847508103e-05, "loss": 12.0273, "step": 25523 }, { "epoch": 1.389883808784791, "grad_norm": 0.6418240926117168, "learning_rate": 4.4963721947069383e-05, "loss": 11.9599, "step": 25524 }, { "epoch": 1.389938262781374, "grad_norm": 0.4839788863961615, "learning_rate": 4.495635957129983e-05, "loss": 11.7584, "step": 25525 }, { "epoch": 1.389992716777957, "grad_norm": 0.5026526852372909, "learning_rate": 4.494899762355887e-05, "loss": 11.8142, "step": 25526 }, { "epoch": 1.39004717077454, "grad_norm": 0.5296149944903044, "learning_rate": 4.4941636103903684e-05, "loss": 11.9144, "step": 25527 }, { "epoch": 1.390101624771123, "grad_norm": 0.515495589090639, "learning_rate": 4.493427501239161e-05, "loss": 11.8679, "step": 25528 }, { "epoch": 1.390156078767706, "grad_norm": 0.5398518962699593, "learning_rate": 4.492691434907982e-05, "loss": 11.8615, "step": 25529 }, { "epoch": 1.390210532764289, "grad_norm": 0.581374360757756, "learning_rate": 4.491955411402557e-05, "loss": 11.9026, "step": 25530 }, { "epoch": 1.390264986760872, "grad_norm": 0.5838567757464395, "learning_rate": 4.491219430728615e-05, "loss": 11.9498, "step": 25531 }, { "epoch": 1.390319440757455, "grad_norm": 0.5434824934602308, "learning_rate": 4.4904834928918696e-05, "loss": 11.8908, "step": 25532 }, { "epoch": 1.390373894754038, "grad_norm": 0.5400376119511892, "learning_rate": 4.489747597898053e-05, "loss": 11.8023, "step": 25533 }, { "epoch": 1.390428348750621, "grad_norm": 0.5228692699215824, "learning_rate": 4.489011745752879e-05, "loss": 11.87, "step": 25534 }, { "epoch": 1.3904828027472043, "grad_norm": 0.5350715212943529, "learning_rate": 4.4882759364620787e-05, "loss": 11.944, "step": 25535 }, { "epoch": 1.3905372567437873, "grad_norm": 0.5297107854174508, "learning_rate": 4.487540170031368e-05, "loss": 11.9025, "step": 25536 }, { "epoch": 1.3905917107403702, "grad_norm": 0.5217943928297886, "learning_rate": 4.486804446466465e-05, "loss": 11.8636, "step": 25537 }, { "epoch": 1.3906461647369532, "grad_norm": 0.5646540885623449, "learning_rate": 4.4860687657731004e-05, "loss": 11.9166, "step": 25538 }, { "epoch": 1.3907006187335362, "grad_norm": 0.5172199674405743, "learning_rate": 4.4853331279569856e-05, "loss": 11.7803, "step": 25539 }, { "epoch": 1.3907550727301192, "grad_norm": 0.5196236659981314, "learning_rate": 4.484597533023849e-05, "loss": 11.8839, "step": 25540 }, { "epoch": 1.3908095267267022, "grad_norm": 0.5585562010892676, "learning_rate": 4.4838619809794025e-05, "loss": 11.8597, "step": 25541 }, { "epoch": 1.3908639807232852, "grad_norm": 0.511576594652112, "learning_rate": 4.483126471829371e-05, "loss": 11.8981, "step": 25542 }, { "epoch": 1.3909184347198682, "grad_norm": 0.6139840837053527, "learning_rate": 4.482391005579476e-05, "loss": 12.0498, "step": 25543 }, { "epoch": 1.3909728887164512, "grad_norm": 0.5784039918772768, "learning_rate": 4.4816555822354314e-05, "loss": 11.9092, "step": 25544 }, { "epoch": 1.3910273427130342, "grad_norm": 0.5074897671565245, "learning_rate": 4.480920201802961e-05, "loss": 11.6981, "step": 25545 }, { "epoch": 1.3910817967096172, "grad_norm": 0.4931749930730538, "learning_rate": 4.480184864287781e-05, "loss": 11.872, "step": 25546 }, { "epoch": 1.3911362507062002, "grad_norm": 0.5373058663523753, "learning_rate": 4.479449569695606e-05, "loss": 11.7687, "step": 25547 }, { "epoch": 1.3911907047027832, "grad_norm": 0.5148041142482348, "learning_rate": 4.47871431803216e-05, "loss": 11.8017, "step": 25548 }, { "epoch": 1.3912451586993662, "grad_norm": 0.5340044989149939, "learning_rate": 4.477979109303154e-05, "loss": 11.9403, "step": 25549 }, { "epoch": 1.3912996126959494, "grad_norm": 0.4967906977063719, "learning_rate": 4.4772439435143124e-05, "loss": 11.8576, "step": 25550 }, { "epoch": 1.3913540666925324, "grad_norm": 0.5141829250771999, "learning_rate": 4.4765088206713434e-05, "loss": 11.969, "step": 25551 }, { "epoch": 1.3914085206891154, "grad_norm": 0.5594723020273837, "learning_rate": 4.4757737407799724e-05, "loss": 11.8893, "step": 25552 }, { "epoch": 1.3914629746856984, "grad_norm": 0.5806813743109116, "learning_rate": 4.4750387038459086e-05, "loss": 11.8356, "step": 25553 }, { "epoch": 1.3915174286822813, "grad_norm": 0.568429998278889, "learning_rate": 4.4743037098748696e-05, "loss": 11.8732, "step": 25554 }, { "epoch": 1.3915718826788643, "grad_norm": 0.5419538497127687, "learning_rate": 4.473568758872575e-05, "loss": 11.7468, "step": 25555 }, { "epoch": 1.3916263366754473, "grad_norm": 0.5225619019455237, "learning_rate": 4.472833850844736e-05, "loss": 11.8665, "step": 25556 }, { "epoch": 1.3916807906720303, "grad_norm": 0.5396102597901121, "learning_rate": 4.4720989857970654e-05, "loss": 12.0524, "step": 25557 }, { "epoch": 1.3917352446686133, "grad_norm": 0.5108951221640262, "learning_rate": 4.471364163735283e-05, "loss": 11.9733, "step": 25558 }, { "epoch": 1.3917896986651965, "grad_norm": 0.5130039467516576, "learning_rate": 4.470629384665096e-05, "loss": 11.9014, "step": 25559 }, { "epoch": 1.3918441526617795, "grad_norm": 0.5247712073740278, "learning_rate": 4.469894648592227e-05, "loss": 11.9558, "step": 25560 }, { "epoch": 1.3918986066583625, "grad_norm": 0.5626381865461848, "learning_rate": 4.46915995552238e-05, "loss": 11.9597, "step": 25561 }, { "epoch": 1.3919530606549455, "grad_norm": 0.5176142223681709, "learning_rate": 4.468425305461277e-05, "loss": 11.8549, "step": 25562 }, { "epoch": 1.3920075146515285, "grad_norm": 0.5361655587293035, "learning_rate": 4.4676906984146225e-05, "loss": 11.8692, "step": 25563 }, { "epoch": 1.3920619686481115, "grad_norm": 0.5196121113509619, "learning_rate": 4.4669561343881326e-05, "loss": 11.7564, "step": 25564 }, { "epoch": 1.3921164226446945, "grad_norm": 0.5965802711646296, "learning_rate": 4.466221613387524e-05, "loss": 11.9678, "step": 25565 }, { "epoch": 1.3921708766412775, "grad_norm": 0.5278279479966292, "learning_rate": 4.465487135418504e-05, "loss": 11.8423, "step": 25566 }, { "epoch": 1.3922253306378605, "grad_norm": 0.4899969182386487, "learning_rate": 4.464752700486784e-05, "loss": 11.8754, "step": 25567 }, { "epoch": 1.3922797846344435, "grad_norm": 0.5544441426716407, "learning_rate": 4.4640183085980715e-05, "loss": 11.9249, "step": 25568 }, { "epoch": 1.3923342386310265, "grad_norm": 0.534695084533497, "learning_rate": 4.463283959758081e-05, "loss": 11.8792, "step": 25569 }, { "epoch": 1.3923886926276094, "grad_norm": 0.5690192789312117, "learning_rate": 4.4625496539725264e-05, "loss": 11.8757, "step": 25570 }, { "epoch": 1.3924431466241924, "grad_norm": 0.4945283277132608, "learning_rate": 4.461815391247112e-05, "loss": 11.809, "step": 25571 }, { "epoch": 1.3924976006207754, "grad_norm": 0.6311904744090591, "learning_rate": 4.461081171587552e-05, "loss": 11.9291, "step": 25572 }, { "epoch": 1.3925520546173586, "grad_norm": 0.5131269660543281, "learning_rate": 4.4603469949995504e-05, "loss": 11.861, "step": 25573 }, { "epoch": 1.3926065086139416, "grad_norm": 0.5299962043621316, "learning_rate": 4.4596128614888235e-05, "loss": 11.7927, "step": 25574 }, { "epoch": 1.3926609626105246, "grad_norm": 0.4829217822093533, "learning_rate": 4.458878771061072e-05, "loss": 11.8651, "step": 25575 }, { "epoch": 1.3927154166071076, "grad_norm": 0.5485101424678609, "learning_rate": 4.458144723722013e-05, "loss": 11.9357, "step": 25576 }, { "epoch": 1.3927698706036906, "grad_norm": 0.5766981103581194, "learning_rate": 4.4574107194773495e-05, "loss": 11.898, "step": 25577 }, { "epoch": 1.3928243246002736, "grad_norm": 0.5593629114152067, "learning_rate": 4.456676758332785e-05, "loss": 12.0337, "step": 25578 }, { "epoch": 1.3928787785968566, "grad_norm": 0.5440644568553376, "learning_rate": 4.455942840294033e-05, "loss": 11.9586, "step": 25579 }, { "epoch": 1.3929332325934396, "grad_norm": 0.5471786288865869, "learning_rate": 4.455208965366803e-05, "loss": 11.9155, "step": 25580 }, { "epoch": 1.3929876865900226, "grad_norm": 0.5732036331337723, "learning_rate": 4.454475133556794e-05, "loss": 11.8596, "step": 25581 }, { "epoch": 1.3930421405866058, "grad_norm": 0.510982090028107, "learning_rate": 4.45374134486972e-05, "loss": 11.9077, "step": 25582 }, { "epoch": 1.3930965945831888, "grad_norm": 0.5482976943136708, "learning_rate": 4.4530075993112785e-05, "loss": 11.9974, "step": 25583 }, { "epoch": 1.3931510485797718, "grad_norm": 0.5461209763693049, "learning_rate": 4.4522738968871844e-05, "loss": 11.9263, "step": 25584 }, { "epoch": 1.3932055025763548, "grad_norm": 0.5476284641533394, "learning_rate": 4.451540237603137e-05, "loss": 11.8504, "step": 25585 }, { "epoch": 1.3932599565729378, "grad_norm": 0.5555034223480412, "learning_rate": 4.450806621464842e-05, "loss": 11.8776, "step": 25586 }, { "epoch": 1.3933144105695208, "grad_norm": 0.5415518693860845, "learning_rate": 4.450073048478012e-05, "loss": 11.9524, "step": 25587 }, { "epoch": 1.3933688645661038, "grad_norm": 0.4836607640817616, "learning_rate": 4.449339518648338e-05, "loss": 11.8876, "step": 25588 }, { "epoch": 1.3934233185626868, "grad_norm": 0.5806205706248562, "learning_rate": 4.448606031981534e-05, "loss": 11.9442, "step": 25589 }, { "epoch": 1.3934777725592697, "grad_norm": 0.5245357843627159, "learning_rate": 4.447872588483296e-05, "loss": 11.884, "step": 25590 }, { "epoch": 1.3935322265558527, "grad_norm": 0.5933993341539028, "learning_rate": 4.447139188159334e-05, "loss": 11.8883, "step": 25591 }, { "epoch": 1.3935866805524357, "grad_norm": 0.5626036087049634, "learning_rate": 4.446405831015351e-05, "loss": 11.9668, "step": 25592 }, { "epoch": 1.3936411345490187, "grad_norm": 0.5854899098641337, "learning_rate": 4.4456725170570446e-05, "loss": 11.9953, "step": 25593 }, { "epoch": 1.3936955885456017, "grad_norm": 0.583069753177118, "learning_rate": 4.444939246290123e-05, "loss": 11.967, "step": 25594 }, { "epoch": 1.3937500425421847, "grad_norm": 0.5900050905800227, "learning_rate": 4.444206018720283e-05, "loss": 11.8586, "step": 25595 }, { "epoch": 1.393804496538768, "grad_norm": 0.4992737059483989, "learning_rate": 4.443472834353232e-05, "loss": 11.8711, "step": 25596 }, { "epoch": 1.393858950535351, "grad_norm": 0.5004623240886439, "learning_rate": 4.4427396931946675e-05, "loss": 11.9301, "step": 25597 }, { "epoch": 1.393913404531934, "grad_norm": 0.5464915720685761, "learning_rate": 4.442006595250288e-05, "loss": 11.7774, "step": 25598 }, { "epoch": 1.393967858528517, "grad_norm": 0.5200105712903359, "learning_rate": 4.4412735405258e-05, "loss": 11.9007, "step": 25599 }, { "epoch": 1.3940223125251, "grad_norm": 0.5065329383440792, "learning_rate": 4.4405405290268976e-05, "loss": 11.952, "step": 25600 }, { "epoch": 1.3940767665216829, "grad_norm": 0.5755515418314147, "learning_rate": 4.439807560759285e-05, "loss": 11.9608, "step": 25601 }, { "epoch": 1.3941312205182659, "grad_norm": 0.5944838060345434, "learning_rate": 4.439074635728665e-05, "loss": 11.9533, "step": 25602 }, { "epoch": 1.3941856745148489, "grad_norm": 0.5493298611399842, "learning_rate": 4.438341753940729e-05, "loss": 11.8226, "step": 25603 }, { "epoch": 1.3942401285114319, "grad_norm": 0.5235303374651344, "learning_rate": 4.437608915401185e-05, "loss": 11.9139, "step": 25604 }, { "epoch": 1.394294582508015, "grad_norm": 0.5791529929868455, "learning_rate": 4.436876120115723e-05, "loss": 11.7544, "step": 25605 }, { "epoch": 1.394349036504598, "grad_norm": 0.5406835856521723, "learning_rate": 4.4361433680900474e-05, "loss": 11.9002, "step": 25606 }, { "epoch": 1.394403490501181, "grad_norm": 0.5226489783085283, "learning_rate": 4.435410659329855e-05, "loss": 11.8162, "step": 25607 }, { "epoch": 1.394457944497764, "grad_norm": 0.5366347793195804, "learning_rate": 4.434677993840839e-05, "loss": 11.8039, "step": 25608 }, { "epoch": 1.394512398494347, "grad_norm": 0.5243448582888759, "learning_rate": 4.433945371628704e-05, "loss": 11.8087, "step": 25609 }, { "epoch": 1.39456685249093, "grad_norm": 0.5581672782464008, "learning_rate": 4.433212792699138e-05, "loss": 11.8108, "step": 25610 }, { "epoch": 1.394621306487513, "grad_norm": 0.5777969188109933, "learning_rate": 4.432480257057848e-05, "loss": 12.0268, "step": 25611 }, { "epoch": 1.394675760484096, "grad_norm": 0.5438379840429651, "learning_rate": 4.431747764710522e-05, "loss": 11.8112, "step": 25612 }, { "epoch": 1.394730214480679, "grad_norm": 0.5495210033207965, "learning_rate": 4.431015315662859e-05, "loss": 11.909, "step": 25613 }, { "epoch": 1.394784668477262, "grad_norm": 0.5553615982274764, "learning_rate": 4.430282909920558e-05, "loss": 11.9504, "step": 25614 }, { "epoch": 1.394839122473845, "grad_norm": 0.5296519106752849, "learning_rate": 4.429550547489307e-05, "loss": 11.948, "step": 25615 }, { "epoch": 1.394893576470428, "grad_norm": 0.5662093547186087, "learning_rate": 4.4288182283748094e-05, "loss": 11.9523, "step": 25616 }, { "epoch": 1.394948030467011, "grad_norm": 0.5814211401726735, "learning_rate": 4.4280859525827557e-05, "loss": 11.9414, "step": 25617 }, { "epoch": 1.395002484463594, "grad_norm": 0.5176920390380298, "learning_rate": 4.427353720118835e-05, "loss": 11.8059, "step": 25618 }, { "epoch": 1.395056938460177, "grad_norm": 0.5637400044293797, "learning_rate": 4.426621530988751e-05, "loss": 11.9364, "step": 25619 }, { "epoch": 1.3951113924567602, "grad_norm": 0.5891127385535817, "learning_rate": 4.4258893851981895e-05, "loss": 11.7332, "step": 25620 }, { "epoch": 1.3951658464533432, "grad_norm": 0.5876437351777504, "learning_rate": 4.42515728275285e-05, "loss": 11.9318, "step": 25621 }, { "epoch": 1.3952203004499262, "grad_norm": 0.49966714443086996, "learning_rate": 4.424425223658419e-05, "loss": 11.9052, "step": 25622 }, { "epoch": 1.3952747544465092, "grad_norm": 0.5713599956178447, "learning_rate": 4.423693207920596e-05, "loss": 11.9294, "step": 25623 }, { "epoch": 1.3953292084430922, "grad_norm": 0.5477477694312434, "learning_rate": 4.4229612355450655e-05, "loss": 11.756, "step": 25624 }, { "epoch": 1.3953836624396752, "grad_norm": 0.48363716283238173, "learning_rate": 4.422229306537524e-05, "loss": 11.7877, "step": 25625 }, { "epoch": 1.3954381164362581, "grad_norm": 0.5237849668974416, "learning_rate": 4.421497420903668e-05, "loss": 11.8695, "step": 25626 }, { "epoch": 1.3954925704328411, "grad_norm": 0.5289670155708505, "learning_rate": 4.420765578649182e-05, "loss": 11.6797, "step": 25627 }, { "epoch": 1.3955470244294244, "grad_norm": 0.5385830800928001, "learning_rate": 4.420033779779754e-05, "loss": 11.8105, "step": 25628 }, { "epoch": 1.3956014784260073, "grad_norm": 0.5452827851664649, "learning_rate": 4.419302024301084e-05, "loss": 11.9292, "step": 25629 }, { "epoch": 1.3956559324225903, "grad_norm": 0.5027393070915539, "learning_rate": 4.4185703122188536e-05, "loss": 11.8437, "step": 25630 }, { "epoch": 1.3957103864191733, "grad_norm": 0.5452691469643637, "learning_rate": 4.417838643538761e-05, "loss": 11.9122, "step": 25631 }, { "epoch": 1.3957648404157563, "grad_norm": 0.572446812699789, "learning_rate": 4.417107018266486e-05, "loss": 11.8799, "step": 25632 }, { "epoch": 1.3958192944123393, "grad_norm": 0.5232701785438433, "learning_rate": 4.416375436407727e-05, "loss": 11.6885, "step": 25633 }, { "epoch": 1.3958737484089223, "grad_norm": 0.5650317891925306, "learning_rate": 4.415643897968166e-05, "loss": 11.8378, "step": 25634 }, { "epoch": 1.3959282024055053, "grad_norm": 0.597295783221329, "learning_rate": 4.414912402953494e-05, "loss": 11.8465, "step": 25635 }, { "epoch": 1.3959826564020883, "grad_norm": 0.5439921230625333, "learning_rate": 4.4141809513694043e-05, "loss": 11.8836, "step": 25636 }, { "epoch": 1.3960371103986713, "grad_norm": 0.5133879306520974, "learning_rate": 4.4134495432215795e-05, "loss": 11.894, "step": 25637 }, { "epoch": 1.3960915643952543, "grad_norm": 0.5308147457633371, "learning_rate": 4.4127181785157077e-05, "loss": 11.8861, "step": 25638 }, { "epoch": 1.3961460183918373, "grad_norm": 0.580976142919681, "learning_rate": 4.411986857257473e-05, "loss": 11.8624, "step": 25639 }, { "epoch": 1.3962004723884203, "grad_norm": 0.5221320733890685, "learning_rate": 4.411255579452566e-05, "loss": 11.8193, "step": 25640 }, { "epoch": 1.3962549263850033, "grad_norm": 0.5638440618118854, "learning_rate": 4.410524345106676e-05, "loss": 11.9277, "step": 25641 }, { "epoch": 1.3963093803815863, "grad_norm": 0.5243922082073832, "learning_rate": 4.409793154225482e-05, "loss": 11.8344, "step": 25642 }, { "epoch": 1.3963638343781695, "grad_norm": 0.5442168463416602, "learning_rate": 4.4090620068146785e-05, "loss": 11.8964, "step": 25643 }, { "epoch": 1.3964182883747525, "grad_norm": 0.5371329213804302, "learning_rate": 4.408330902879942e-05, "loss": 11.7654, "step": 25644 }, { "epoch": 1.3964727423713355, "grad_norm": 0.640722081839925, "learning_rate": 4.407599842426967e-05, "loss": 12.1038, "step": 25645 }, { "epoch": 1.3965271963679184, "grad_norm": 0.5837988396414286, "learning_rate": 4.406868825461429e-05, "loss": 11.963, "step": 25646 }, { "epoch": 1.3965816503645014, "grad_norm": 0.515178023263635, "learning_rate": 4.4061378519890206e-05, "loss": 11.874, "step": 25647 }, { "epoch": 1.3966361043610844, "grad_norm": 0.5772878515993738, "learning_rate": 4.4054069220154215e-05, "loss": 11.7675, "step": 25648 }, { "epoch": 1.3966905583576674, "grad_norm": 0.5585453623940094, "learning_rate": 4.404676035546313e-05, "loss": 11.896, "step": 25649 }, { "epoch": 1.3967450123542504, "grad_norm": 0.5446295610336994, "learning_rate": 4.403945192587383e-05, "loss": 11.9455, "step": 25650 }, { "epoch": 1.3967994663508334, "grad_norm": 0.520477903605035, "learning_rate": 4.403214393144317e-05, "loss": 11.984, "step": 25651 }, { "epoch": 1.3968539203474166, "grad_norm": 0.7631798390604274, "learning_rate": 4.402483637222791e-05, "loss": 11.8636, "step": 25652 }, { "epoch": 1.3969083743439996, "grad_norm": 0.5357976761801222, "learning_rate": 4.4017529248284936e-05, "loss": 11.844, "step": 25653 }, { "epoch": 1.3969628283405826, "grad_norm": 0.5375128494981319, "learning_rate": 4.4010222559671024e-05, "loss": 11.8929, "step": 25654 }, { "epoch": 1.3970172823371656, "grad_norm": 0.5684473019634566, "learning_rate": 4.400291630644303e-05, "loss": 11.8925, "step": 25655 }, { "epoch": 1.3970717363337486, "grad_norm": 0.6145092032176454, "learning_rate": 4.399561048865772e-05, "loss": 11.8634, "step": 25656 }, { "epoch": 1.3971261903303316, "grad_norm": 0.5871747327630277, "learning_rate": 4.398830510637198e-05, "loss": 12.017, "step": 25657 }, { "epoch": 1.3971806443269146, "grad_norm": 0.5408892166409712, "learning_rate": 4.398100015964256e-05, "loss": 11.9226, "step": 25658 }, { "epoch": 1.3972350983234976, "grad_norm": 0.6011747196050518, "learning_rate": 4.397369564852624e-05, "loss": 11.846, "step": 25659 }, { "epoch": 1.3972895523200806, "grad_norm": 0.5365215992691857, "learning_rate": 4.39663915730799e-05, "loss": 11.9564, "step": 25660 }, { "epoch": 1.3973440063166636, "grad_norm": 0.5578866734708908, "learning_rate": 4.395908793336026e-05, "loss": 11.9316, "step": 25661 }, { "epoch": 1.3973984603132465, "grad_norm": 0.5684847679081799, "learning_rate": 4.395178472942415e-05, "loss": 11.9762, "step": 25662 }, { "epoch": 1.3974529143098295, "grad_norm": 0.5737354362562226, "learning_rate": 4.394448196132839e-05, "loss": 11.8986, "step": 25663 }, { "epoch": 1.3975073683064125, "grad_norm": 0.5643940556914142, "learning_rate": 4.393717962912971e-05, "loss": 11.8787, "step": 25664 }, { "epoch": 1.3975618223029955, "grad_norm": 0.5163855351759655, "learning_rate": 4.392987773288496e-05, "loss": 11.9327, "step": 25665 }, { "epoch": 1.3976162762995787, "grad_norm": 0.5165131419827805, "learning_rate": 4.392257627265084e-05, "loss": 11.8745, "step": 25666 }, { "epoch": 1.3976707302961617, "grad_norm": 0.5623763370391212, "learning_rate": 4.3915275248484224e-05, "loss": 11.8753, "step": 25667 }, { "epoch": 1.3977251842927447, "grad_norm": 0.5021095954282441, "learning_rate": 4.390797466044182e-05, "loss": 11.8867, "step": 25668 }, { "epoch": 1.3977796382893277, "grad_norm": 0.5343072695118061, "learning_rate": 4.3900674508580374e-05, "loss": 11.9957, "step": 25669 }, { "epoch": 1.3978340922859107, "grad_norm": 0.5840461650399251, "learning_rate": 4.389337479295673e-05, "loss": 11.9446, "step": 25670 }, { "epoch": 1.3978885462824937, "grad_norm": 0.5555837544430504, "learning_rate": 4.388607551362758e-05, "loss": 11.8412, "step": 25671 }, { "epoch": 1.3979430002790767, "grad_norm": 0.5567206681928537, "learning_rate": 4.3878776670649746e-05, "loss": 11.841, "step": 25672 }, { "epoch": 1.3979974542756597, "grad_norm": 0.5194923352163608, "learning_rate": 4.3871478264079924e-05, "loss": 11.7848, "step": 25673 }, { "epoch": 1.3980519082722427, "grad_norm": 0.49589168254954724, "learning_rate": 4.38641802939749e-05, "loss": 11.8045, "step": 25674 }, { "epoch": 1.398106362268826, "grad_norm": 0.5753414999346085, "learning_rate": 4.385688276039146e-05, "loss": 11.8016, "step": 25675 }, { "epoch": 1.398160816265409, "grad_norm": 0.5054072524763868, "learning_rate": 4.384958566338627e-05, "loss": 11.9053, "step": 25676 }, { "epoch": 1.3982152702619919, "grad_norm": 0.5207491949903695, "learning_rate": 4.384228900301617e-05, "loss": 11.877, "step": 25677 }, { "epoch": 1.3982697242585749, "grad_norm": 0.6050903200238807, "learning_rate": 4.3834992779337845e-05, "loss": 11.9729, "step": 25678 }, { "epoch": 1.3983241782551579, "grad_norm": 0.5035147841598213, "learning_rate": 4.3827696992407995e-05, "loss": 11.8971, "step": 25679 }, { "epoch": 1.3983786322517409, "grad_norm": 0.5849917897173539, "learning_rate": 4.382040164228343e-05, "loss": 11.9739, "step": 25680 }, { "epoch": 1.3984330862483239, "grad_norm": 0.5173595634324137, "learning_rate": 4.3813106729020806e-05, "loss": 11.7193, "step": 25681 }, { "epoch": 1.3984875402449068, "grad_norm": 0.5088712814321042, "learning_rate": 4.380581225267693e-05, "loss": 11.8223, "step": 25682 }, { "epoch": 1.3985419942414898, "grad_norm": 0.5513836839263556, "learning_rate": 4.379851821330843e-05, "loss": 11.8423, "step": 25683 }, { "epoch": 1.3985964482380728, "grad_norm": 0.5057797012944087, "learning_rate": 4.379122461097208e-05, "loss": 11.6728, "step": 25684 }, { "epoch": 1.3986509022346558, "grad_norm": 0.5718664348820757, "learning_rate": 4.3783931445724645e-05, "loss": 11.9907, "step": 25685 }, { "epoch": 1.3987053562312388, "grad_norm": 0.5910400993603159, "learning_rate": 4.377663871762274e-05, "loss": 12.0681, "step": 25686 }, { "epoch": 1.3987598102278218, "grad_norm": 0.8504708164787771, "learning_rate": 4.376934642672319e-05, "loss": 11.9959, "step": 25687 }, { "epoch": 1.3988142642244048, "grad_norm": 0.5088988067425663, "learning_rate": 4.376205457308257e-05, "loss": 11.8203, "step": 25688 }, { "epoch": 1.3988687182209878, "grad_norm": 0.5316068582897, "learning_rate": 4.375476315675764e-05, "loss": 11.8087, "step": 25689 }, { "epoch": 1.398923172217571, "grad_norm": 0.5454832715356545, "learning_rate": 4.3747472177805135e-05, "loss": 11.8985, "step": 25690 }, { "epoch": 1.398977626214154, "grad_norm": 0.5422557431517581, "learning_rate": 4.37401816362817e-05, "loss": 11.9926, "step": 25691 }, { "epoch": 1.399032080210737, "grad_norm": 0.5232367087176085, "learning_rate": 4.373289153224407e-05, "loss": 11.794, "step": 25692 }, { "epoch": 1.39908653420732, "grad_norm": 0.5764699835647442, "learning_rate": 4.372560186574887e-05, "loss": 11.8392, "step": 25693 }, { "epoch": 1.399140988203903, "grad_norm": 0.5653583270220415, "learning_rate": 4.371831263685288e-05, "loss": 11.8948, "step": 25694 }, { "epoch": 1.399195442200486, "grad_norm": 0.5332046554670394, "learning_rate": 4.3711023845612675e-05, "loss": 11.964, "step": 25695 }, { "epoch": 1.399249896197069, "grad_norm": 0.5613374131471114, "learning_rate": 4.3703735492085e-05, "loss": 11.922, "step": 25696 }, { "epoch": 1.399304350193652, "grad_norm": 0.5404182515945336, "learning_rate": 4.369644757632658e-05, "loss": 11.9681, "step": 25697 }, { "epoch": 1.3993588041902352, "grad_norm": 0.4801664996309418, "learning_rate": 4.368916009839397e-05, "loss": 11.8808, "step": 25698 }, { "epoch": 1.3994132581868182, "grad_norm": 0.5792402197427226, "learning_rate": 4.368187305834388e-05, "loss": 11.7865, "step": 25699 }, { "epoch": 1.3994677121834012, "grad_norm": 0.5203970169492241, "learning_rate": 4.367458645623304e-05, "loss": 11.7728, "step": 25700 }, { "epoch": 1.3995221661799842, "grad_norm": 0.581598290117175, "learning_rate": 4.366730029211801e-05, "loss": 11.8598, "step": 25701 }, { "epoch": 1.3995766201765671, "grad_norm": 0.6091364758528232, "learning_rate": 4.366001456605554e-05, "loss": 11.893, "step": 25702 }, { "epoch": 1.3996310741731501, "grad_norm": 0.5477307382500277, "learning_rate": 4.365272927810221e-05, "loss": 11.8992, "step": 25703 }, { "epoch": 1.3996855281697331, "grad_norm": 0.5498687874056694, "learning_rate": 4.3645444428314754e-05, "loss": 11.7799, "step": 25704 }, { "epoch": 1.3997399821663161, "grad_norm": 0.5837738167710438, "learning_rate": 4.363816001674972e-05, "loss": 12.0774, "step": 25705 }, { "epoch": 1.3997944361628991, "grad_norm": 0.5492987014768775, "learning_rate": 4.363087604346385e-05, "loss": 11.9124, "step": 25706 }, { "epoch": 1.399848890159482, "grad_norm": 0.51453150083243, "learning_rate": 4.362359250851374e-05, "loss": 11.8891, "step": 25707 }, { "epoch": 1.399903344156065, "grad_norm": 0.5271191344921754, "learning_rate": 4.3616309411955994e-05, "loss": 11.8725, "step": 25708 }, { "epoch": 1.399957798152648, "grad_norm": 0.5335090745817481, "learning_rate": 4.3609026753847315e-05, "loss": 11.9013, "step": 25709 }, { "epoch": 1.400012252149231, "grad_norm": 0.5408831310830207, "learning_rate": 4.360174453424427e-05, "loss": 11.8668, "step": 25710 }, { "epoch": 1.400066706145814, "grad_norm": 0.5873790115469334, "learning_rate": 4.3594462753203516e-05, "loss": 11.8959, "step": 25711 }, { "epoch": 1.400121160142397, "grad_norm": 0.5084114241267758, "learning_rate": 4.358718141078171e-05, "loss": 11.7836, "step": 25712 }, { "epoch": 1.4001756141389803, "grad_norm": 0.5610339364949798, "learning_rate": 4.357990050703541e-05, "loss": 11.8994, "step": 25713 }, { "epoch": 1.4002300681355633, "grad_norm": 0.5326563329470386, "learning_rate": 4.3572620042021296e-05, "loss": 11.9368, "step": 25714 }, { "epoch": 1.4002845221321463, "grad_norm": 0.48618825218399014, "learning_rate": 4.356534001579592e-05, "loss": 11.7222, "step": 25715 }, { "epoch": 1.4003389761287293, "grad_norm": 0.5149899727645597, "learning_rate": 4.355806042841596e-05, "loss": 11.9233, "step": 25716 }, { "epoch": 1.4003934301253123, "grad_norm": 0.5748589060599986, "learning_rate": 4.355078127993798e-05, "loss": 11.9294, "step": 25717 }, { "epoch": 1.4004478841218952, "grad_norm": 0.683915970408038, "learning_rate": 4.354350257041857e-05, "loss": 11.9369, "step": 25718 }, { "epoch": 1.4005023381184782, "grad_norm": 0.5652671862783292, "learning_rate": 4.3536224299914374e-05, "loss": 11.8888, "step": 25719 }, { "epoch": 1.4005567921150612, "grad_norm": 0.5390889528715983, "learning_rate": 4.352894646848194e-05, "loss": 11.9011, "step": 25720 }, { "epoch": 1.4006112461116442, "grad_norm": 0.5569542491738358, "learning_rate": 4.352166907617792e-05, "loss": 11.8275, "step": 25721 }, { "epoch": 1.4006657001082274, "grad_norm": 0.526211258247391, "learning_rate": 4.351439212305883e-05, "loss": 11.989, "step": 25722 }, { "epoch": 1.4007201541048104, "grad_norm": 0.5817649898617785, "learning_rate": 4.3507115609181315e-05, "loss": 11.7951, "step": 25723 }, { "epoch": 1.4007746081013934, "grad_norm": 0.5849301463570367, "learning_rate": 4.349983953460197e-05, "loss": 11.8035, "step": 25724 }, { "epoch": 1.4008290620979764, "grad_norm": 0.5743438742601729, "learning_rate": 4.3492563899377316e-05, "loss": 11.8936, "step": 25725 }, { "epoch": 1.4008835160945594, "grad_norm": 0.5725940876010246, "learning_rate": 4.3485288703563986e-05, "loss": 11.9187, "step": 25726 }, { "epoch": 1.4009379700911424, "grad_norm": 0.533858575078255, "learning_rate": 4.347801394721851e-05, "loss": 11.8609, "step": 25727 }, { "epoch": 1.4009924240877254, "grad_norm": 0.5972171361554862, "learning_rate": 4.347073963039749e-05, "loss": 11.8956, "step": 25728 }, { "epoch": 1.4010468780843084, "grad_norm": 0.5431114513833376, "learning_rate": 4.3463465753157505e-05, "loss": 11.8938, "step": 25729 }, { "epoch": 1.4011013320808914, "grad_norm": 0.5755390994591522, "learning_rate": 4.3456192315555034e-05, "loss": 11.9397, "step": 25730 }, { "epoch": 1.4011557860774744, "grad_norm": 0.5730787357320373, "learning_rate": 4.344891931764674e-05, "loss": 11.8986, "step": 25731 }, { "epoch": 1.4012102400740574, "grad_norm": 0.5007471459092729, "learning_rate": 4.34416467594891e-05, "loss": 11.8538, "step": 25732 }, { "epoch": 1.4012646940706404, "grad_norm": 0.5371171533501721, "learning_rate": 4.343437464113869e-05, "loss": 11.8889, "step": 25733 }, { "epoch": 1.4013191480672234, "grad_norm": 0.5478632832461535, "learning_rate": 4.342710296265211e-05, "loss": 11.8248, "step": 25734 }, { "epoch": 1.4013736020638063, "grad_norm": 0.5631985721648314, "learning_rate": 4.341983172408582e-05, "loss": 11.875, "step": 25735 }, { "epoch": 1.4014280560603896, "grad_norm": 0.5449914502765917, "learning_rate": 4.341256092549646e-05, "loss": 11.8051, "step": 25736 }, { "epoch": 1.4014825100569726, "grad_norm": 0.5375566491885477, "learning_rate": 4.340529056694047e-05, "loss": 11.9043, "step": 25737 }, { "epoch": 1.4015369640535555, "grad_norm": 0.5365139119787555, "learning_rate": 4.339802064847447e-05, "loss": 11.9476, "step": 25738 }, { "epoch": 1.4015914180501385, "grad_norm": 0.5766863127162225, "learning_rate": 4.339075117015495e-05, "loss": 11.8478, "step": 25739 }, { "epoch": 1.4016458720467215, "grad_norm": 0.6210547963280137, "learning_rate": 4.338348213203841e-05, "loss": 11.8514, "step": 25740 }, { "epoch": 1.4017003260433045, "grad_norm": 0.5459567670103419, "learning_rate": 4.3376213534181445e-05, "loss": 11.7452, "step": 25741 }, { "epoch": 1.4017547800398875, "grad_norm": 0.5594405762582951, "learning_rate": 4.33689453766405e-05, "loss": 11.9725, "step": 25742 }, { "epoch": 1.4018092340364705, "grad_norm": 0.5454613069963479, "learning_rate": 4.3361677659472164e-05, "loss": 11.9589, "step": 25743 }, { "epoch": 1.4018636880330535, "grad_norm": 0.5481028587273373, "learning_rate": 4.335441038273289e-05, "loss": 11.8762, "step": 25744 }, { "epoch": 1.4019181420296367, "grad_norm": 0.5606037842650509, "learning_rate": 4.334714354647923e-05, "loss": 11.8584, "step": 25745 }, { "epoch": 1.4019725960262197, "grad_norm": 0.5185782378086978, "learning_rate": 4.33398771507677e-05, "loss": 11.8792, "step": 25746 }, { "epoch": 1.4020270500228027, "grad_norm": 0.5281439600207601, "learning_rate": 4.333261119565476e-05, "loss": 12.0006, "step": 25747 }, { "epoch": 1.4020815040193857, "grad_norm": 0.6277291921330076, "learning_rate": 4.332534568119698e-05, "loss": 11.8947, "step": 25748 }, { "epoch": 1.4021359580159687, "grad_norm": 0.534869803590322, "learning_rate": 4.33180806074508e-05, "loss": 11.9372, "step": 25749 }, { "epoch": 1.4021904120125517, "grad_norm": 0.5720715166334139, "learning_rate": 4.331081597447272e-05, "loss": 12.0421, "step": 25750 }, { "epoch": 1.4022448660091347, "grad_norm": 0.6700032731573184, "learning_rate": 4.330355178231926e-05, "loss": 11.8688, "step": 25751 }, { "epoch": 1.4022993200057177, "grad_norm": 0.6422941064293342, "learning_rate": 4.329628803104685e-05, "loss": 11.9984, "step": 25752 }, { "epoch": 1.4023537740023007, "grad_norm": 0.5218657818654752, "learning_rate": 4.3289024720712055e-05, "loss": 11.8448, "step": 25753 }, { "epoch": 1.4024082279988836, "grad_norm": 0.5567331107952798, "learning_rate": 4.3281761851371284e-05, "loss": 11.8074, "step": 25754 }, { "epoch": 1.4024626819954666, "grad_norm": 0.553640112261936, "learning_rate": 4.327449942308107e-05, "loss": 11.6889, "step": 25755 }, { "epoch": 1.4025171359920496, "grad_norm": 0.5626446887770098, "learning_rate": 4.326723743589783e-05, "loss": 11.9616, "step": 25756 }, { "epoch": 1.4025715899886326, "grad_norm": 0.5404421380628863, "learning_rate": 4.325997588987808e-05, "loss": 11.9401, "step": 25757 }, { "epoch": 1.4026260439852156, "grad_norm": 0.5994161744225048, "learning_rate": 4.325271478507833e-05, "loss": 11.8843, "step": 25758 }, { "epoch": 1.4026804979817986, "grad_norm": 0.507601738856093, "learning_rate": 4.324545412155492e-05, "loss": 11.8573, "step": 25759 }, { "epoch": 1.4027349519783818, "grad_norm": 0.5399060464408709, "learning_rate": 4.323819389936437e-05, "loss": 11.8914, "step": 25760 }, { "epoch": 1.4027894059749648, "grad_norm": 0.4985717580031476, "learning_rate": 4.3230934118563185e-05, "loss": 11.9473, "step": 25761 }, { "epoch": 1.4028438599715478, "grad_norm": 0.5098343717376963, "learning_rate": 4.322367477920773e-05, "loss": 11.8673, "step": 25762 }, { "epoch": 1.4028983139681308, "grad_norm": 0.5031072887679476, "learning_rate": 4.321641588135454e-05, "loss": 11.9143, "step": 25763 }, { "epoch": 1.4029527679647138, "grad_norm": 0.5128637907285493, "learning_rate": 4.320915742506e-05, "loss": 11.806, "step": 25764 }, { "epoch": 1.4030072219612968, "grad_norm": 0.5643419053060134, "learning_rate": 4.32018994103806e-05, "loss": 11.9542, "step": 25765 }, { "epoch": 1.4030616759578798, "grad_norm": 0.5496149552258993, "learning_rate": 4.319464183737272e-05, "loss": 11.9774, "step": 25766 }, { "epoch": 1.4031161299544628, "grad_norm": 0.5385685349043441, "learning_rate": 4.318738470609284e-05, "loss": 11.7526, "step": 25767 }, { "epoch": 1.403170583951046, "grad_norm": 0.5051635585643108, "learning_rate": 4.318012801659744e-05, "loss": 11.8489, "step": 25768 }, { "epoch": 1.403225037947629, "grad_norm": 0.5528945036488232, "learning_rate": 4.3172871768942834e-05, "loss": 12.0119, "step": 25769 }, { "epoch": 1.403279491944212, "grad_norm": 0.5778477693792172, "learning_rate": 4.3165615963185545e-05, "loss": 11.9029, "step": 25770 }, { "epoch": 1.403333945940795, "grad_norm": 0.5397905335023614, "learning_rate": 4.315836059938191e-05, "loss": 11.828, "step": 25771 }, { "epoch": 1.403388399937378, "grad_norm": 0.5438297685566836, "learning_rate": 4.31511056775884e-05, "loss": 11.8823, "step": 25772 }, { "epoch": 1.403442853933961, "grad_norm": 0.5701260681333727, "learning_rate": 4.314385119786146e-05, "loss": 11.8675, "step": 25773 }, { "epoch": 1.403497307930544, "grad_norm": 0.5539240777978596, "learning_rate": 4.313659716025744e-05, "loss": 11.9092, "step": 25774 }, { "epoch": 1.403551761927127, "grad_norm": 0.5696074381941687, "learning_rate": 4.312934356483281e-05, "loss": 11.845, "step": 25775 }, { "epoch": 1.40360621592371, "grad_norm": 0.6578220433315457, "learning_rate": 4.31220904116439e-05, "loss": 12.0892, "step": 25776 }, { "epoch": 1.403660669920293, "grad_norm": 0.5337505459524566, "learning_rate": 4.3114837700747203e-05, "loss": 11.8102, "step": 25777 }, { "epoch": 1.403715123916876, "grad_norm": 0.5252660110524694, "learning_rate": 4.310758543219905e-05, "loss": 11.8788, "step": 25778 }, { "epoch": 1.403769577913459, "grad_norm": 0.5426806737980335, "learning_rate": 4.310033360605582e-05, "loss": 11.8705, "step": 25779 }, { "epoch": 1.403824031910042, "grad_norm": 0.5849980006069722, "learning_rate": 4.309308222237398e-05, "loss": 11.8774, "step": 25780 }, { "epoch": 1.403878485906625, "grad_norm": 0.5298430039427744, "learning_rate": 4.308583128120984e-05, "loss": 11.9699, "step": 25781 }, { "epoch": 1.4039329399032079, "grad_norm": 0.5711868513615855, "learning_rate": 4.307858078261983e-05, "loss": 11.8563, "step": 25782 }, { "epoch": 1.403987393899791, "grad_norm": 0.6034781661497142, "learning_rate": 4.307133072666034e-05, "loss": 11.9382, "step": 25783 }, { "epoch": 1.404041847896374, "grad_norm": 0.5275655011643641, "learning_rate": 4.30640811133877e-05, "loss": 11.883, "step": 25784 }, { "epoch": 1.404096301892957, "grad_norm": 0.6132439151156021, "learning_rate": 4.305683194285834e-05, "loss": 11.9922, "step": 25785 }, { "epoch": 1.40415075588954, "grad_norm": 0.4809888890409843, "learning_rate": 4.304958321512858e-05, "loss": 11.8524, "step": 25786 }, { "epoch": 1.404205209886123, "grad_norm": 0.5915868691239101, "learning_rate": 4.3042334930254855e-05, "loss": 11.9001, "step": 25787 }, { "epoch": 1.404259663882706, "grad_norm": 0.4780354993578049, "learning_rate": 4.303508708829347e-05, "loss": 11.8547, "step": 25788 }, { "epoch": 1.404314117879289, "grad_norm": 0.5796849881484587, "learning_rate": 4.302783968930078e-05, "loss": 11.8499, "step": 25789 }, { "epoch": 1.404368571875872, "grad_norm": 0.5537851104142394, "learning_rate": 4.3020592733333185e-05, "loss": 11.8688, "step": 25790 }, { "epoch": 1.404423025872455, "grad_norm": 0.5442721735305788, "learning_rate": 4.301334622044698e-05, "loss": 11.8563, "step": 25791 }, { "epoch": 1.4044774798690383, "grad_norm": 0.5955860010807744, "learning_rate": 4.3006100150698594e-05, "loss": 11.9449, "step": 25792 }, { "epoch": 1.4045319338656213, "grad_norm": 0.5395442100716481, "learning_rate": 4.299885452414429e-05, "loss": 11.7314, "step": 25793 }, { "epoch": 1.4045863878622042, "grad_norm": 0.5194883904364771, "learning_rate": 4.2991609340840455e-05, "loss": 11.916, "step": 25794 }, { "epoch": 1.4046408418587872, "grad_norm": 0.5643779043933354, "learning_rate": 4.298436460084346e-05, "loss": 11.814, "step": 25795 }, { "epoch": 1.4046952958553702, "grad_norm": 0.6175041382935421, "learning_rate": 4.2977120304209586e-05, "loss": 11.7237, "step": 25796 }, { "epoch": 1.4047497498519532, "grad_norm": 0.5257130784124201, "learning_rate": 4.29698764509952e-05, "loss": 11.9171, "step": 25797 }, { "epoch": 1.4048042038485362, "grad_norm": 0.5466752839396776, "learning_rate": 4.296263304125664e-05, "loss": 11.8934, "step": 25798 }, { "epoch": 1.4048586578451192, "grad_norm": 0.5403470529263875, "learning_rate": 4.295539007505016e-05, "loss": 11.9722, "step": 25799 }, { "epoch": 1.4049131118417022, "grad_norm": 0.5531022681001917, "learning_rate": 4.294814755243217e-05, "loss": 11.9278, "step": 25800 }, { "epoch": 1.4049675658382852, "grad_norm": 0.5067067110711966, "learning_rate": 4.2940905473458926e-05, "loss": 11.8181, "step": 25801 }, { "epoch": 1.4050220198348682, "grad_norm": 0.5813067824610553, "learning_rate": 4.29336638381868e-05, "loss": 11.9637, "step": 25802 }, { "epoch": 1.4050764738314512, "grad_norm": 0.5669991039890107, "learning_rate": 4.2926422646672036e-05, "loss": 11.8125, "step": 25803 }, { "epoch": 1.4051309278280342, "grad_norm": 0.49300588130870826, "learning_rate": 4.2919181898970984e-05, "loss": 11.8649, "step": 25804 }, { "epoch": 1.4051853818246172, "grad_norm": 0.5076407580270612, "learning_rate": 4.291194159513998e-05, "loss": 11.8819, "step": 25805 }, { "epoch": 1.4052398358212004, "grad_norm": 0.5236553023447134, "learning_rate": 4.2904701735235256e-05, "loss": 11.7725, "step": 25806 }, { "epoch": 1.4052942898177834, "grad_norm": 0.5537238593380521, "learning_rate": 4.289746231931319e-05, "loss": 11.8104, "step": 25807 }, { "epoch": 1.4053487438143664, "grad_norm": 0.6155129506674, "learning_rate": 4.289022334743001e-05, "loss": 11.8991, "step": 25808 }, { "epoch": 1.4054031978109494, "grad_norm": 0.5394076255226601, "learning_rate": 4.2882984819642015e-05, "loss": 11.9354, "step": 25809 }, { "epoch": 1.4054576518075323, "grad_norm": 0.5782024250137687, "learning_rate": 4.287574673600554e-05, "loss": 11.8426, "step": 25810 }, { "epoch": 1.4055121058041153, "grad_norm": 0.5313240044500805, "learning_rate": 4.28685090965768e-05, "loss": 11.8289, "step": 25811 }, { "epoch": 1.4055665598006983, "grad_norm": 0.5398556323000993, "learning_rate": 4.286127190141216e-05, "loss": 11.9861, "step": 25812 }, { "epoch": 1.4056210137972813, "grad_norm": 0.5323480588440391, "learning_rate": 4.285403515056781e-05, "loss": 11.9367, "step": 25813 }, { "epoch": 1.4056754677938643, "grad_norm": 0.5714705974394412, "learning_rate": 4.28467988441001e-05, "loss": 11.8651, "step": 25814 }, { "epoch": 1.4057299217904475, "grad_norm": 0.5258662331210137, "learning_rate": 4.283956298206523e-05, "loss": 12.0163, "step": 25815 }, { "epoch": 1.4057843757870305, "grad_norm": 0.5249059355857912, "learning_rate": 4.28323275645195e-05, "loss": 11.933, "step": 25816 }, { "epoch": 1.4058388297836135, "grad_norm": 0.5088191200406901, "learning_rate": 4.282509259151922e-05, "loss": 11.9727, "step": 25817 }, { "epoch": 1.4058932837801965, "grad_norm": 0.6366457523019411, "learning_rate": 4.281785806312061e-05, "loss": 11.6813, "step": 25818 }, { "epoch": 1.4059477377767795, "grad_norm": 0.5581525711871052, "learning_rate": 4.281062397937992e-05, "loss": 11.836, "step": 25819 }, { "epoch": 1.4060021917733625, "grad_norm": 0.6347471647572209, "learning_rate": 4.280339034035338e-05, "loss": 11.9899, "step": 25820 }, { "epoch": 1.4060566457699455, "grad_norm": 0.5325272664915921, "learning_rate": 4.279615714609726e-05, "loss": 11.829, "step": 25821 }, { "epoch": 1.4061110997665285, "grad_norm": 0.48389713357782344, "learning_rate": 4.278892439666785e-05, "loss": 11.8101, "step": 25822 }, { "epoch": 1.4061655537631115, "grad_norm": 0.5360783902279086, "learning_rate": 4.278169209212133e-05, "loss": 11.8839, "step": 25823 }, { "epoch": 1.4062200077596945, "grad_norm": 0.5481278820150643, "learning_rate": 4.2774460232514e-05, "loss": 11.8488, "step": 25824 }, { "epoch": 1.4062744617562775, "grad_norm": 0.5720749791879448, "learning_rate": 4.276722881790203e-05, "loss": 11.8759, "step": 25825 }, { "epoch": 1.4063289157528605, "grad_norm": 0.5443309638373263, "learning_rate": 4.275999784834172e-05, "loss": 11.7165, "step": 25826 }, { "epoch": 1.4063833697494434, "grad_norm": 0.5959614687964457, "learning_rate": 4.275276732388923e-05, "loss": 11.933, "step": 25827 }, { "epoch": 1.4064378237460264, "grad_norm": 0.5179583093528469, "learning_rate": 4.274553724460086e-05, "loss": 11.8332, "step": 25828 }, { "epoch": 1.4064922777426097, "grad_norm": 0.5274418774361285, "learning_rate": 4.273830761053279e-05, "loss": 11.8372, "step": 25829 }, { "epoch": 1.4065467317391926, "grad_norm": 0.5491854188103588, "learning_rate": 4.273107842174121e-05, "loss": 11.862, "step": 25830 }, { "epoch": 1.4066011857357756, "grad_norm": 0.560334495452463, "learning_rate": 4.272384967828237e-05, "loss": 11.8708, "step": 25831 }, { "epoch": 1.4066556397323586, "grad_norm": 0.4878984289510161, "learning_rate": 4.2716621380212505e-05, "loss": 11.8301, "step": 25832 }, { "epoch": 1.4067100937289416, "grad_norm": 0.5708076057091399, "learning_rate": 4.270939352758778e-05, "loss": 11.7839, "step": 25833 }, { "epoch": 1.4067645477255246, "grad_norm": 0.5787300159516624, "learning_rate": 4.270216612046445e-05, "loss": 11.909, "step": 25834 }, { "epoch": 1.4068190017221076, "grad_norm": 0.5941794725826334, "learning_rate": 4.269493915889864e-05, "loss": 12.0, "step": 25835 }, { "epoch": 1.4068734557186906, "grad_norm": 0.5648772748489747, "learning_rate": 4.2687712642946634e-05, "loss": 11.8915, "step": 25836 }, { "epoch": 1.4069279097152736, "grad_norm": 0.5066468622749574, "learning_rate": 4.268048657266455e-05, "loss": 11.8217, "step": 25837 }, { "epoch": 1.4069823637118568, "grad_norm": 0.613763671049065, "learning_rate": 4.267326094810865e-05, "loss": 12.0097, "step": 25838 }, { "epoch": 1.4070368177084398, "grad_norm": 0.5151996896175168, "learning_rate": 4.2666035769335086e-05, "loss": 11.8486, "step": 25839 }, { "epoch": 1.4070912717050228, "grad_norm": 0.5229846377104254, "learning_rate": 4.265881103640002e-05, "loss": 11.9894, "step": 25840 }, { "epoch": 1.4071457257016058, "grad_norm": 0.5552971388629591, "learning_rate": 4.265158674935967e-05, "loss": 11.8026, "step": 25841 }, { "epoch": 1.4072001796981888, "grad_norm": 0.5820783232430011, "learning_rate": 4.2644362908270174e-05, "loss": 11.8772, "step": 25842 }, { "epoch": 1.4072546336947718, "grad_norm": 0.5743819396700995, "learning_rate": 4.263713951318773e-05, "loss": 11.8406, "step": 25843 }, { "epoch": 1.4073090876913548, "grad_norm": 0.5078189515427319, "learning_rate": 4.262991656416854e-05, "loss": 11.6802, "step": 25844 }, { "epoch": 1.4073635416879378, "grad_norm": 0.5683440894347913, "learning_rate": 4.262269406126871e-05, "loss": 11.8316, "step": 25845 }, { "epoch": 1.4074179956845208, "grad_norm": 0.5744332619359597, "learning_rate": 4.261547200454446e-05, "loss": 11.9266, "step": 25846 }, { "epoch": 1.4074724496811037, "grad_norm": 0.5612854370753293, "learning_rate": 4.26082503940519e-05, "loss": 11.8786, "step": 25847 }, { "epoch": 1.4075269036776867, "grad_norm": 0.5525059653758136, "learning_rate": 4.2601029229847235e-05, "loss": 11.777, "step": 25848 }, { "epoch": 1.4075813576742697, "grad_norm": 0.6006601390818188, "learning_rate": 4.25938085119866e-05, "loss": 11.9093, "step": 25849 }, { "epoch": 1.4076358116708527, "grad_norm": 0.5156693231260574, "learning_rate": 4.2586588240526095e-05, "loss": 11.9375, "step": 25850 }, { "epoch": 1.4076902656674357, "grad_norm": 0.5669870649255772, "learning_rate": 4.2579368415521946e-05, "loss": 11.9091, "step": 25851 }, { "epoch": 1.4077447196640187, "grad_norm": 0.5197345446943886, "learning_rate": 4.2572149037030226e-05, "loss": 11.839, "step": 25852 }, { "epoch": 1.407799173660602, "grad_norm": 0.5070070801694782, "learning_rate": 4.2564930105107104e-05, "loss": 11.9568, "step": 25853 }, { "epoch": 1.407853627657185, "grad_norm": 0.5339323222051076, "learning_rate": 4.255771161980875e-05, "loss": 11.812, "step": 25854 }, { "epoch": 1.407908081653768, "grad_norm": 0.5444994776815661, "learning_rate": 4.255049358119123e-05, "loss": 11.913, "step": 25855 }, { "epoch": 1.407962535650351, "grad_norm": 0.5496574992041923, "learning_rate": 4.254327598931074e-05, "loss": 11.9443, "step": 25856 }, { "epoch": 1.408016989646934, "grad_norm": 0.5914788405976005, "learning_rate": 4.253605884422334e-05, "loss": 11.8381, "step": 25857 }, { "epoch": 1.4080714436435169, "grad_norm": 0.5455994167009618, "learning_rate": 4.2528842145985214e-05, "loss": 11.8733, "step": 25858 }, { "epoch": 1.4081258976400999, "grad_norm": 0.5345820288709208, "learning_rate": 4.252162589465245e-05, "loss": 12.0014, "step": 25859 }, { "epoch": 1.4081803516366829, "grad_norm": 0.5798557355366813, "learning_rate": 4.251441009028113e-05, "loss": 11.958, "step": 25860 }, { "epoch": 1.408234805633266, "grad_norm": 0.5207074577882463, "learning_rate": 4.2507194732927433e-05, "loss": 11.8906, "step": 25861 }, { "epoch": 1.408289259629849, "grad_norm": 0.5041419379473353, "learning_rate": 4.2499979822647396e-05, "loss": 11.8203, "step": 25862 }, { "epoch": 1.408343713626432, "grad_norm": 0.5565637241578298, "learning_rate": 4.2492765359497187e-05, "loss": 11.7416, "step": 25863 }, { "epoch": 1.408398167623015, "grad_norm": 0.5591302357801531, "learning_rate": 4.248555134353285e-05, "loss": 12.0768, "step": 25864 }, { "epoch": 1.408452621619598, "grad_norm": 0.5502567109883069, "learning_rate": 4.2478337774810504e-05, "loss": 11.9551, "step": 25865 }, { "epoch": 1.408507075616181, "grad_norm": 0.5295241005744646, "learning_rate": 4.2471124653386294e-05, "loss": 11.8909, "step": 25866 }, { "epoch": 1.408561529612764, "grad_norm": 0.5834154258213458, "learning_rate": 4.2463911979316226e-05, "loss": 11.6616, "step": 25867 }, { "epoch": 1.408615983609347, "grad_norm": 0.5363876180307716, "learning_rate": 4.245669975265646e-05, "loss": 11.9225, "step": 25868 }, { "epoch": 1.40867043760593, "grad_norm": 0.5021256883576487, "learning_rate": 4.2449487973463055e-05, "loss": 11.9048, "step": 25869 }, { "epoch": 1.408724891602513, "grad_norm": 0.5526527474962228, "learning_rate": 4.2442276641792046e-05, "loss": 11.9494, "step": 25870 }, { "epoch": 1.408779345599096, "grad_norm": 0.5518407773921247, "learning_rate": 4.2435065757699575e-05, "loss": 11.8001, "step": 25871 }, { "epoch": 1.408833799595679, "grad_norm": 0.535865054136345, "learning_rate": 4.242785532124165e-05, "loss": 11.7701, "step": 25872 }, { "epoch": 1.408888253592262, "grad_norm": 0.5547459065259486, "learning_rate": 4.242064533247442e-05, "loss": 11.9321, "step": 25873 }, { "epoch": 1.408942707588845, "grad_norm": 0.5226768792334109, "learning_rate": 4.241343579145387e-05, "loss": 11.8714, "step": 25874 }, { "epoch": 1.408997161585428, "grad_norm": 0.5393516411682427, "learning_rate": 4.240622669823614e-05, "loss": 11.9472, "step": 25875 }, { "epoch": 1.4090516155820112, "grad_norm": 0.49869323998245224, "learning_rate": 4.2399018052877205e-05, "loss": 11.8296, "step": 25876 }, { "epoch": 1.4091060695785942, "grad_norm": 0.5525337755186532, "learning_rate": 4.2391809855433164e-05, "loss": 11.8329, "step": 25877 }, { "epoch": 1.4091605235751772, "grad_norm": 0.5040937477148583, "learning_rate": 4.2384602105960115e-05, "loss": 11.9391, "step": 25878 }, { "epoch": 1.4092149775717602, "grad_norm": 0.5958430697037971, "learning_rate": 4.237739480451406e-05, "loss": 12.0315, "step": 25879 }, { "epoch": 1.4092694315683432, "grad_norm": 0.5141700211675545, "learning_rate": 4.237018795115101e-05, "loss": 11.8403, "step": 25880 }, { "epoch": 1.4093238855649262, "grad_norm": 0.5118231921700256, "learning_rate": 4.236298154592707e-05, "loss": 11.9423, "step": 25881 }, { "epoch": 1.4093783395615092, "grad_norm": 0.5595852370792733, "learning_rate": 4.235577558889823e-05, "loss": 11.9391, "step": 25882 }, { "epoch": 1.4094327935580921, "grad_norm": 0.5730760624709044, "learning_rate": 4.2348570080120576e-05, "loss": 11.8369, "step": 25883 }, { "epoch": 1.4094872475546751, "grad_norm": 0.5328544162840606, "learning_rate": 4.234136501965007e-05, "loss": 11.8821, "step": 25884 }, { "epoch": 1.4095417015512584, "grad_norm": 0.49246478701004204, "learning_rate": 4.2334160407542824e-05, "loss": 11.9199, "step": 25885 }, { "epoch": 1.4095961555478413, "grad_norm": 0.598958754766261, "learning_rate": 4.232695624385478e-05, "loss": 11.7857, "step": 25886 }, { "epoch": 1.4096506095444243, "grad_norm": 0.522251487365959, "learning_rate": 4.2319752528641986e-05, "loss": 11.8622, "step": 25887 }, { "epoch": 1.4097050635410073, "grad_norm": 0.5077513731704751, "learning_rate": 4.231254926196051e-05, "loss": 11.8541, "step": 25888 }, { "epoch": 1.4097595175375903, "grad_norm": 0.5783091568956663, "learning_rate": 4.230534644386632e-05, "loss": 11.8103, "step": 25889 }, { "epoch": 1.4098139715341733, "grad_norm": 0.5622060053448952, "learning_rate": 4.229814407441544e-05, "loss": 11.8969, "step": 25890 }, { "epoch": 1.4098684255307563, "grad_norm": 0.6072125634276111, "learning_rate": 4.2290942153663825e-05, "loss": 11.9061, "step": 25891 }, { "epoch": 1.4099228795273393, "grad_norm": 0.6973086392010021, "learning_rate": 4.228374068166753e-05, "loss": 11.8754, "step": 25892 }, { "epoch": 1.4099773335239223, "grad_norm": 0.513304898126486, "learning_rate": 4.2276539658482574e-05, "loss": 11.8946, "step": 25893 }, { "epoch": 1.4100317875205053, "grad_norm": 0.5527366824529903, "learning_rate": 4.226933908416489e-05, "loss": 11.7546, "step": 25894 }, { "epoch": 1.4100862415170883, "grad_norm": 0.5491514441669649, "learning_rate": 4.226213895877056e-05, "loss": 11.9177, "step": 25895 }, { "epoch": 1.4101406955136713, "grad_norm": 0.550730264638003, "learning_rate": 4.2254939282355466e-05, "loss": 11.8666, "step": 25896 }, { "epoch": 1.4101951495102543, "grad_norm": 0.5456382860175364, "learning_rate": 4.224774005497568e-05, "loss": 11.753, "step": 25897 }, { "epoch": 1.4102496035068373, "grad_norm": 0.5516554284156222, "learning_rate": 4.224054127668713e-05, "loss": 11.8482, "step": 25898 }, { "epoch": 1.4103040575034205, "grad_norm": 0.5625259871003233, "learning_rate": 4.223334294754585e-05, "loss": 11.904, "step": 25899 }, { "epoch": 1.4103585115000035, "grad_norm": 0.5618478905726333, "learning_rate": 4.222614506760778e-05, "loss": 11.9644, "step": 25900 }, { "epoch": 1.4104129654965865, "grad_norm": 0.6460636520761004, "learning_rate": 4.221894763692886e-05, "loss": 12.0559, "step": 25901 }, { "epoch": 1.4104674194931694, "grad_norm": 0.5273121768935696, "learning_rate": 4.22117506555651e-05, "loss": 11.9369, "step": 25902 }, { "epoch": 1.4105218734897524, "grad_norm": 0.5162378960358935, "learning_rate": 4.2204554123572485e-05, "loss": 11.9836, "step": 25903 }, { "epoch": 1.4105763274863354, "grad_norm": 0.5196327394712076, "learning_rate": 4.219735804100691e-05, "loss": 11.7405, "step": 25904 }, { "epoch": 1.4106307814829184, "grad_norm": 0.5254282810437462, "learning_rate": 4.219016240792442e-05, "loss": 11.9094, "step": 25905 }, { "epoch": 1.4106852354795014, "grad_norm": 0.5216154572046777, "learning_rate": 4.218296722438087e-05, "loss": 11.8347, "step": 25906 }, { "epoch": 1.4107396894760844, "grad_norm": 0.541167348260566, "learning_rate": 4.217577249043231e-05, "loss": 11.9124, "step": 25907 }, { "epoch": 1.4107941434726676, "grad_norm": 0.5565774346147542, "learning_rate": 4.21685782061346e-05, "loss": 11.8609, "step": 25908 }, { "epoch": 1.4108485974692506, "grad_norm": 0.51847801428641, "learning_rate": 4.216138437154377e-05, "loss": 11.862, "step": 25909 }, { "epoch": 1.4109030514658336, "grad_norm": 0.5120099100789547, "learning_rate": 4.2154190986715704e-05, "loss": 11.8856, "step": 25910 }, { "epoch": 1.4109575054624166, "grad_norm": 0.5283512903397058, "learning_rate": 4.214699805170632e-05, "loss": 11.864, "step": 25911 }, { "epoch": 1.4110119594589996, "grad_norm": 0.5636639989040957, "learning_rate": 4.213980556657161e-05, "loss": 11.9639, "step": 25912 }, { "epoch": 1.4110664134555826, "grad_norm": 0.6077813526368897, "learning_rate": 4.213261353136745e-05, "loss": 11.934, "step": 25913 }, { "epoch": 1.4111208674521656, "grad_norm": 0.5086606259692638, "learning_rate": 4.212542194614978e-05, "loss": 11.7002, "step": 25914 }, { "epoch": 1.4111753214487486, "grad_norm": 0.5502609041887185, "learning_rate": 4.211823081097458e-05, "loss": 11.8365, "step": 25915 }, { "epoch": 1.4112297754453316, "grad_norm": 0.5273037271101029, "learning_rate": 4.2111040125897685e-05, "loss": 11.8961, "step": 25916 }, { "epoch": 1.4112842294419146, "grad_norm": 0.5133770616820434, "learning_rate": 4.210384989097508e-05, "loss": 11.8726, "step": 25917 }, { "epoch": 1.4113386834384976, "grad_norm": 0.5029753219698895, "learning_rate": 4.209666010626262e-05, "loss": 11.79, "step": 25918 }, { "epoch": 1.4113931374350805, "grad_norm": 0.4950874803758819, "learning_rate": 4.208947077181626e-05, "loss": 11.8046, "step": 25919 }, { "epoch": 1.4114475914316635, "grad_norm": 0.6203688283204638, "learning_rate": 4.208228188769191e-05, "loss": 11.9855, "step": 25920 }, { "epoch": 1.4115020454282465, "grad_norm": 0.5162160457790488, "learning_rate": 4.20750934539454e-05, "loss": 11.8382, "step": 25921 }, { "epoch": 1.4115564994248295, "grad_norm": 0.5355800112870308, "learning_rate": 4.206790547063272e-05, "loss": 11.8719, "step": 25922 }, { "epoch": 1.4116109534214127, "grad_norm": 0.6020767351640445, "learning_rate": 4.206071793780968e-05, "loss": 11.9148, "step": 25923 }, { "epoch": 1.4116654074179957, "grad_norm": 0.5947597914235936, "learning_rate": 4.2053530855532254e-05, "loss": 11.9118, "step": 25924 }, { "epoch": 1.4117198614145787, "grad_norm": 0.5909367902531036, "learning_rate": 4.204634422385625e-05, "loss": 11.84, "step": 25925 }, { "epoch": 1.4117743154111617, "grad_norm": 0.5570109662277771, "learning_rate": 4.203915804283759e-05, "loss": 11.8911, "step": 25926 }, { "epoch": 1.4118287694077447, "grad_norm": 0.5541344179528458, "learning_rate": 4.2031972312532196e-05, "loss": 11.8311, "step": 25927 }, { "epoch": 1.4118832234043277, "grad_norm": 0.5637549961939454, "learning_rate": 4.202478703299587e-05, "loss": 11.8737, "step": 25928 }, { "epoch": 1.4119376774009107, "grad_norm": 0.5726764453654672, "learning_rate": 4.201760220428457e-05, "loss": 11.9924, "step": 25929 }, { "epoch": 1.4119921313974937, "grad_norm": 0.5894911664127516, "learning_rate": 4.2010417826454106e-05, "loss": 11.8918, "step": 25930 }, { "epoch": 1.412046585394077, "grad_norm": 0.6004226055445582, "learning_rate": 4.200323389956032e-05, "loss": 11.8862, "step": 25931 }, { "epoch": 1.41210103939066, "grad_norm": 0.5289710695479329, "learning_rate": 4.199605042365916e-05, "loss": 11.7657, "step": 25932 }, { "epoch": 1.4121554933872429, "grad_norm": 0.5841983179479884, "learning_rate": 4.19888673988064e-05, "loss": 11.9163, "step": 25933 }, { "epoch": 1.4122099473838259, "grad_norm": 0.5469696232190195, "learning_rate": 4.1981684825057966e-05, "loss": 11.9362, "step": 25934 }, { "epoch": 1.4122644013804089, "grad_norm": 0.5852481285258428, "learning_rate": 4.197450270246964e-05, "loss": 11.9623, "step": 25935 }, { "epoch": 1.4123188553769919, "grad_norm": 0.5803512784379835, "learning_rate": 4.196732103109733e-05, "loss": 11.9014, "step": 25936 }, { "epoch": 1.4123733093735749, "grad_norm": 0.5560973915181886, "learning_rate": 4.196013981099689e-05, "loss": 11.9202, "step": 25937 }, { "epoch": 1.4124277633701579, "grad_norm": 0.5695512447169855, "learning_rate": 4.1952959042224095e-05, "loss": 11.9226, "step": 25938 }, { "epoch": 1.4124822173667408, "grad_norm": 0.555477549408239, "learning_rate": 4.1945778724834906e-05, "loss": 11.9163, "step": 25939 }, { "epoch": 1.4125366713633238, "grad_norm": 0.5212130144156074, "learning_rate": 4.193859885888499e-05, "loss": 11.9148, "step": 25940 }, { "epoch": 1.4125911253599068, "grad_norm": 0.5884439021591624, "learning_rate": 4.1931419444430274e-05, "loss": 11.9955, "step": 25941 }, { "epoch": 1.4126455793564898, "grad_norm": 0.5246729502820188, "learning_rate": 4.192424048152662e-05, "loss": 11.9317, "step": 25942 }, { "epoch": 1.4127000333530728, "grad_norm": 0.5588712101795598, "learning_rate": 4.1917061970229775e-05, "loss": 11.8533, "step": 25943 }, { "epoch": 1.4127544873496558, "grad_norm": 0.5507920691027859, "learning_rate": 4.1909883910595625e-05, "loss": 11.9232, "step": 25944 }, { "epoch": 1.4128089413462388, "grad_norm": 0.6211357304721636, "learning_rate": 4.190270630267994e-05, "loss": 11.8887, "step": 25945 }, { "epoch": 1.412863395342822, "grad_norm": 0.5255692057499124, "learning_rate": 4.189552914653857e-05, "loss": 11.8578, "step": 25946 }, { "epoch": 1.412917849339405, "grad_norm": 0.5233315789125356, "learning_rate": 4.188835244222729e-05, "loss": 12.0373, "step": 25947 }, { "epoch": 1.412972303335988, "grad_norm": 0.6191500565628775, "learning_rate": 4.188117618980192e-05, "loss": 11.9274, "step": 25948 }, { "epoch": 1.413026757332571, "grad_norm": 0.5176264051522861, "learning_rate": 4.187400038931834e-05, "loss": 11.8133, "step": 25949 }, { "epoch": 1.413081211329154, "grad_norm": 0.5517724031078244, "learning_rate": 4.1866825040832205e-05, "loss": 11.953, "step": 25950 }, { "epoch": 1.413135665325737, "grad_norm": 0.5731983108006287, "learning_rate": 4.185965014439939e-05, "loss": 11.9859, "step": 25951 }, { "epoch": 1.41319011932232, "grad_norm": 0.5820963108441654, "learning_rate": 4.1852475700075724e-05, "loss": 11.7734, "step": 25952 }, { "epoch": 1.413244573318903, "grad_norm": 0.5010189682805626, "learning_rate": 4.184530170791692e-05, "loss": 11.8814, "step": 25953 }, { "epoch": 1.413299027315486, "grad_norm": 0.5401379374152437, "learning_rate": 4.183812816797884e-05, "loss": 11.9156, "step": 25954 }, { "epoch": 1.4133534813120692, "grad_norm": 0.5478793243519098, "learning_rate": 4.183095508031719e-05, "loss": 11.7476, "step": 25955 }, { "epoch": 1.4134079353086522, "grad_norm": 0.5464283418929661, "learning_rate": 4.182378244498784e-05, "loss": 11.9163, "step": 25956 }, { "epoch": 1.4134623893052352, "grad_norm": 0.4842141536250966, "learning_rate": 4.1816610262046464e-05, "loss": 11.7685, "step": 25957 }, { "epoch": 1.4135168433018181, "grad_norm": 0.5477737922724731, "learning_rate": 4.180943853154888e-05, "loss": 12.0035, "step": 25958 }, { "epoch": 1.4135712972984011, "grad_norm": 0.538179849185479, "learning_rate": 4.1802267253550945e-05, "loss": 11.8145, "step": 25959 }, { "epoch": 1.4136257512949841, "grad_norm": 0.5399542451935301, "learning_rate": 4.179509642810826e-05, "loss": 11.7071, "step": 25960 }, { "epoch": 1.4136802052915671, "grad_norm": 0.5188807013594945, "learning_rate": 4.178792605527669e-05, "loss": 11.9183, "step": 25961 }, { "epoch": 1.4137346592881501, "grad_norm": 0.5547543279310141, "learning_rate": 4.178075613511195e-05, "loss": 11.8354, "step": 25962 }, { "epoch": 1.4137891132847331, "grad_norm": 0.5450098727823062, "learning_rate": 4.1773586667669815e-05, "loss": 11.9785, "step": 25963 }, { "epoch": 1.413843567281316, "grad_norm": 0.5382920333937433, "learning_rate": 4.1766417653006065e-05, "loss": 11.7307, "step": 25964 }, { "epoch": 1.413898021277899, "grad_norm": 0.5608936529144182, "learning_rate": 4.175924909117638e-05, "loss": 11.9767, "step": 25965 }, { "epoch": 1.413952475274482, "grad_norm": 0.5389868238399339, "learning_rate": 4.175208098223657e-05, "loss": 11.7917, "step": 25966 }, { "epoch": 1.414006929271065, "grad_norm": 0.5091692630679351, "learning_rate": 4.174491332624233e-05, "loss": 11.8441, "step": 25967 }, { "epoch": 1.414061383267648, "grad_norm": 0.6467762865340976, "learning_rate": 4.173774612324943e-05, "loss": 11.9332, "step": 25968 }, { "epoch": 1.4141158372642313, "grad_norm": 0.5567841750221176, "learning_rate": 4.17305793733136e-05, "loss": 11.9145, "step": 25969 }, { "epoch": 1.4141702912608143, "grad_norm": 0.5222396009735883, "learning_rate": 4.1723413076490514e-05, "loss": 11.8906, "step": 25970 }, { "epoch": 1.4142247452573973, "grad_norm": 0.5485755108908761, "learning_rate": 4.171624723283598e-05, "loss": 11.913, "step": 25971 }, { "epoch": 1.4142791992539803, "grad_norm": 0.5922561876464869, "learning_rate": 4.170908184240565e-05, "loss": 11.9357, "step": 25972 }, { "epoch": 1.4143336532505633, "grad_norm": 0.5856668848009153, "learning_rate": 4.17019169052553e-05, "loss": 11.8688, "step": 25973 }, { "epoch": 1.4143881072471463, "grad_norm": 0.538227228542079, "learning_rate": 4.169475242144059e-05, "loss": 11.9368, "step": 25974 }, { "epoch": 1.4144425612437292, "grad_norm": 0.5515683291958238, "learning_rate": 4.168758839101726e-05, "loss": 11.8369, "step": 25975 }, { "epoch": 1.4144970152403122, "grad_norm": 0.5537332162780446, "learning_rate": 4.168042481404106e-05, "loss": 11.9316, "step": 25976 }, { "epoch": 1.4145514692368952, "grad_norm": 0.5196901118528175, "learning_rate": 4.1673261690567625e-05, "loss": 11.9138, "step": 25977 }, { "epoch": 1.4146059232334784, "grad_norm": 0.5744457087145551, "learning_rate": 4.1666099020652725e-05, "loss": 11.9619, "step": 25978 }, { "epoch": 1.4146603772300614, "grad_norm": 0.5410766570338509, "learning_rate": 4.1658936804352014e-05, "loss": 11.8747, "step": 25979 }, { "epoch": 1.4147148312266444, "grad_norm": 0.4945232811183512, "learning_rate": 4.165177504172116e-05, "loss": 11.8752, "step": 25980 }, { "epoch": 1.4147692852232274, "grad_norm": 0.508991236122, "learning_rate": 4.164461373281593e-05, "loss": 11.9007, "step": 25981 }, { "epoch": 1.4148237392198104, "grad_norm": 0.5330165501366706, "learning_rate": 4.1637452877691926e-05, "loss": 11.8423, "step": 25982 }, { "epoch": 1.4148781932163934, "grad_norm": 0.5652449436507545, "learning_rate": 4.163029247640492e-05, "loss": 11.6582, "step": 25983 }, { "epoch": 1.4149326472129764, "grad_norm": 0.519014294380056, "learning_rate": 4.16231325290105e-05, "loss": 11.8555, "step": 25984 }, { "epoch": 1.4149871012095594, "grad_norm": 0.5537374027393267, "learning_rate": 4.16159730355644e-05, "loss": 11.7105, "step": 25985 }, { "epoch": 1.4150415552061424, "grad_norm": 0.570714286819854, "learning_rate": 4.160881399612232e-05, "loss": 12.0244, "step": 25986 }, { "epoch": 1.4150960092027254, "grad_norm": 0.5083293404096292, "learning_rate": 4.160165541073986e-05, "loss": 11.8749, "step": 25987 }, { "epoch": 1.4151504631993084, "grad_norm": 0.5373861650093892, "learning_rate": 4.1594497279472756e-05, "loss": 11.9579, "step": 25988 }, { "epoch": 1.4152049171958914, "grad_norm": 0.5370248081639699, "learning_rate": 4.158733960237664e-05, "loss": 11.8867, "step": 25989 }, { "epoch": 1.4152593711924744, "grad_norm": 0.5386214998025568, "learning_rate": 4.1580182379507125e-05, "loss": 11.8309, "step": 25990 }, { "epoch": 1.4153138251890574, "grad_norm": 0.5192645532576364, "learning_rate": 4.157302561091995e-05, "loss": 11.8792, "step": 25991 }, { "epoch": 1.4153682791856403, "grad_norm": 0.5721741265348732, "learning_rate": 4.156586929667068e-05, "loss": 11.7963, "step": 25992 }, { "epoch": 1.4154227331822236, "grad_norm": 0.5601051246503809, "learning_rate": 4.1558713436815055e-05, "loss": 11.9062, "step": 25993 }, { "epoch": 1.4154771871788066, "grad_norm": 0.5503199851394696, "learning_rate": 4.1551558031408634e-05, "loss": 11.7497, "step": 25994 }, { "epoch": 1.4155316411753895, "grad_norm": 0.5728285074879068, "learning_rate": 4.154440308050713e-05, "loss": 11.8929, "step": 25995 }, { "epoch": 1.4155860951719725, "grad_norm": 0.5737624825764145, "learning_rate": 4.1537248584166136e-05, "loss": 11.8988, "step": 25996 }, { "epoch": 1.4156405491685555, "grad_norm": 0.5604210694310348, "learning_rate": 4.1530094542441276e-05, "loss": 11.6098, "step": 25997 }, { "epoch": 1.4156950031651385, "grad_norm": 0.5971442269481954, "learning_rate": 4.152294095538826e-05, "loss": 11.632, "step": 25998 }, { "epoch": 1.4157494571617215, "grad_norm": 0.587712392055818, "learning_rate": 4.151578782306266e-05, "loss": 11.8529, "step": 25999 }, { "epoch": 1.4158039111583045, "grad_norm": 0.5199201438969665, "learning_rate": 4.150863514552006e-05, "loss": 11.8342, "step": 26000 }, { "epoch": 1.4158583651548877, "grad_norm": 0.5491790085603228, "learning_rate": 4.150148292281615e-05, "loss": 11.8861, "step": 26001 }, { "epoch": 1.4159128191514707, "grad_norm": 0.5391689353760124, "learning_rate": 4.149433115500649e-05, "loss": 11.9343, "step": 26002 }, { "epoch": 1.4159672731480537, "grad_norm": 0.5539038199046, "learning_rate": 4.148717984214676e-05, "loss": 11.8852, "step": 26003 }, { "epoch": 1.4160217271446367, "grad_norm": 0.5887859534750126, "learning_rate": 4.1480028984292494e-05, "loss": 11.8545, "step": 26004 }, { "epoch": 1.4160761811412197, "grad_norm": 0.4961945883089661, "learning_rate": 4.147287858149939e-05, "loss": 11.819, "step": 26005 }, { "epoch": 1.4161306351378027, "grad_norm": 0.5518306064887253, "learning_rate": 4.1465728633822945e-05, "loss": 11.9726, "step": 26006 }, { "epoch": 1.4161850891343857, "grad_norm": 0.5842209688541331, "learning_rate": 4.1458579141318806e-05, "loss": 11.8569, "step": 26007 }, { "epoch": 1.4162395431309687, "grad_norm": 0.5350962990333932, "learning_rate": 4.145143010404261e-05, "loss": 11.9312, "step": 26008 }, { "epoch": 1.4162939971275517, "grad_norm": 0.5791220006522387, "learning_rate": 4.144428152204992e-05, "loss": 11.9225, "step": 26009 }, { "epoch": 1.4163484511241347, "grad_norm": 0.5465158073386612, "learning_rate": 4.143713339539631e-05, "loss": 11.9973, "step": 26010 }, { "epoch": 1.4164029051207176, "grad_norm": 0.54725218969395, "learning_rate": 4.1429985724137344e-05, "loss": 11.8318, "step": 26011 }, { "epoch": 1.4164573591173006, "grad_norm": 0.5809625208062601, "learning_rate": 4.142283850832862e-05, "loss": 11.957, "step": 26012 }, { "epoch": 1.4165118131138836, "grad_norm": 0.522191329054975, "learning_rate": 4.141569174802577e-05, "loss": 11.8639, "step": 26013 }, { "epoch": 1.4165662671104666, "grad_norm": 0.5063744605868702, "learning_rate": 4.1408545443284296e-05, "loss": 11.8261, "step": 26014 }, { "epoch": 1.4166207211070496, "grad_norm": 0.5846019839427633, "learning_rate": 4.140139959415983e-05, "loss": 11.8435, "step": 26015 }, { "epoch": 1.4166751751036328, "grad_norm": 0.5365235964460565, "learning_rate": 4.139425420070786e-05, "loss": 11.9106, "step": 26016 }, { "epoch": 1.4167296291002158, "grad_norm": 0.5778544788992478, "learning_rate": 4.1387109262984045e-05, "loss": 11.7469, "step": 26017 }, { "epoch": 1.4167840830967988, "grad_norm": 0.553398277724836, "learning_rate": 4.137996478104386e-05, "loss": 11.8099, "step": 26018 }, { "epoch": 1.4168385370933818, "grad_norm": 0.5741702152478732, "learning_rate": 4.13728207549429e-05, "loss": 11.8734, "step": 26019 }, { "epoch": 1.4168929910899648, "grad_norm": 0.550900619627361, "learning_rate": 4.136567718473678e-05, "loss": 11.8239, "step": 26020 }, { "epoch": 1.4169474450865478, "grad_norm": 0.544200301115898, "learning_rate": 4.135853407048093e-05, "loss": 11.8117, "step": 26021 }, { "epoch": 1.4170018990831308, "grad_norm": 0.5174507075260631, "learning_rate": 4.135139141223098e-05, "loss": 11.908, "step": 26022 }, { "epoch": 1.4170563530797138, "grad_norm": 0.6257017394501336, "learning_rate": 4.1344249210042416e-05, "loss": 12.0665, "step": 26023 }, { "epoch": 1.4171108070762968, "grad_norm": 0.533852899698103, "learning_rate": 4.13371074639708e-05, "loss": 11.7009, "step": 26024 }, { "epoch": 1.41716526107288, "grad_norm": 0.5477964478698966, "learning_rate": 4.132996617407171e-05, "loss": 11.8385, "step": 26025 }, { "epoch": 1.417219715069463, "grad_norm": 0.6206393240749204, "learning_rate": 4.132282534040062e-05, "loss": 11.9334, "step": 26026 }, { "epoch": 1.417274169066046, "grad_norm": 0.5392711500843311, "learning_rate": 4.13156849630131e-05, "loss": 11.5982, "step": 26027 }, { "epoch": 1.417328623062629, "grad_norm": 0.5773436095740883, "learning_rate": 4.130854504196463e-05, "loss": 11.9154, "step": 26028 }, { "epoch": 1.417383077059212, "grad_norm": 0.614142876669388, "learning_rate": 4.130140557731078e-05, "loss": 11.7, "step": 26029 }, { "epoch": 1.417437531055795, "grad_norm": 0.6222823179624032, "learning_rate": 4.1294266569107054e-05, "loss": 11.9589, "step": 26030 }, { "epoch": 1.417491985052378, "grad_norm": 0.4657596046349355, "learning_rate": 4.1287128017408915e-05, "loss": 11.8332, "step": 26031 }, { "epoch": 1.417546439048961, "grad_norm": 0.533174547148453, "learning_rate": 4.127998992227196e-05, "loss": 11.8108, "step": 26032 }, { "epoch": 1.417600893045544, "grad_norm": 0.5240046027988714, "learning_rate": 4.1272852283751603e-05, "loss": 11.9401, "step": 26033 }, { "epoch": 1.417655347042127, "grad_norm": 0.5544403069218031, "learning_rate": 4.1265715101903405e-05, "loss": 11.8395, "step": 26034 }, { "epoch": 1.41770980103871, "grad_norm": 0.4970618399510174, "learning_rate": 4.1258578376782886e-05, "loss": 11.7907, "step": 26035 }, { "epoch": 1.417764255035293, "grad_norm": 0.5300108850664907, "learning_rate": 4.125144210844547e-05, "loss": 11.8877, "step": 26036 }, { "epoch": 1.417818709031876, "grad_norm": 0.5711334236326334, "learning_rate": 4.1244306296946736e-05, "loss": 11.9022, "step": 26037 }, { "epoch": 1.417873163028459, "grad_norm": 0.6409867052380667, "learning_rate": 4.1237170942342094e-05, "loss": 11.9016, "step": 26038 }, { "epoch": 1.417927617025042, "grad_norm": 0.5461814100789183, "learning_rate": 4.12300360446871e-05, "loss": 11.8645, "step": 26039 }, { "epoch": 1.417982071021625, "grad_norm": 0.5403477338091139, "learning_rate": 4.12229016040372e-05, "loss": 11.8089, "step": 26040 }, { "epoch": 1.418036525018208, "grad_norm": 0.5330103235941679, "learning_rate": 4.121576762044784e-05, "loss": 11.8497, "step": 26041 }, { "epoch": 1.418090979014791, "grad_norm": 0.5678020052658446, "learning_rate": 4.1208634093974575e-05, "loss": 11.9499, "step": 26042 }, { "epoch": 1.418145433011374, "grad_norm": 0.5869124138748899, "learning_rate": 4.120150102467279e-05, "loss": 11.8834, "step": 26043 }, { "epoch": 1.418199887007957, "grad_norm": 0.5267235374511102, "learning_rate": 4.1194368412598015e-05, "loss": 11.9181, "step": 26044 }, { "epoch": 1.41825434100454, "grad_norm": 0.5408424887123142, "learning_rate": 4.118723625780566e-05, "loss": 11.907, "step": 26045 }, { "epoch": 1.418308795001123, "grad_norm": 0.5820581437565443, "learning_rate": 4.118010456035123e-05, "loss": 11.8812, "step": 26046 }, { "epoch": 1.418363248997706, "grad_norm": 0.5774985774803503, "learning_rate": 4.11729733202902e-05, "loss": 11.9813, "step": 26047 }, { "epoch": 1.4184177029942893, "grad_norm": 0.5343610725050975, "learning_rate": 4.1165842537677954e-05, "loss": 11.8822, "step": 26048 }, { "epoch": 1.4184721569908723, "grad_norm": 0.537262384378862, "learning_rate": 4.1158712212570036e-05, "loss": 11.9668, "step": 26049 }, { "epoch": 1.4185266109874552, "grad_norm": 0.5063228528453032, "learning_rate": 4.115158234502182e-05, "loss": 11.7703, "step": 26050 }, { "epoch": 1.4185810649840382, "grad_norm": 0.5652784886923571, "learning_rate": 4.114445293508875e-05, "loss": 11.8963, "step": 26051 }, { "epoch": 1.4186355189806212, "grad_norm": 0.565569947237846, "learning_rate": 4.1137323982826314e-05, "loss": 11.8755, "step": 26052 }, { "epoch": 1.4186899729772042, "grad_norm": 0.5304698072295083, "learning_rate": 4.113019548828988e-05, "loss": 11.7958, "step": 26053 }, { "epoch": 1.4187444269737872, "grad_norm": 0.6203083568848862, "learning_rate": 4.1123067451534944e-05, "loss": 11.9796, "step": 26054 }, { "epoch": 1.4187988809703702, "grad_norm": 0.5354760052231521, "learning_rate": 4.111593987261689e-05, "loss": 11.9106, "step": 26055 }, { "epoch": 1.4188533349669532, "grad_norm": 0.5340381212757845, "learning_rate": 4.110881275159115e-05, "loss": 11.9029, "step": 26056 }, { "epoch": 1.4189077889635362, "grad_norm": 0.6101841736073859, "learning_rate": 4.11016860885132e-05, "loss": 11.9062, "step": 26057 }, { "epoch": 1.4189622429601192, "grad_norm": 0.5232982904949554, "learning_rate": 4.109455988343839e-05, "loss": 11.8734, "step": 26058 }, { "epoch": 1.4190166969567022, "grad_norm": 0.5467383575943048, "learning_rate": 4.108743413642219e-05, "loss": 11.7749, "step": 26059 }, { "epoch": 1.4190711509532852, "grad_norm": 0.5555725119473592, "learning_rate": 4.1080308847519966e-05, "loss": 11.7675, "step": 26060 }, { "epoch": 1.4191256049498682, "grad_norm": 0.524627134341648, "learning_rate": 4.1073184016787124e-05, "loss": 11.8669, "step": 26061 }, { "epoch": 1.4191800589464514, "grad_norm": 0.5357802924944004, "learning_rate": 4.106605964427911e-05, "loss": 11.8418, "step": 26062 }, { "epoch": 1.4192345129430344, "grad_norm": 0.5722948634309796, "learning_rate": 4.105893573005128e-05, "loss": 11.8218, "step": 26063 }, { "epoch": 1.4192889669396174, "grad_norm": 0.5262608179823776, "learning_rate": 4.1051812274159064e-05, "loss": 11.9163, "step": 26064 }, { "epoch": 1.4193434209362004, "grad_norm": 0.5398488565451185, "learning_rate": 4.104468927665782e-05, "loss": 11.9241, "step": 26065 }, { "epoch": 1.4193978749327834, "grad_norm": 0.5687012735162117, "learning_rate": 4.1037566737602994e-05, "loss": 11.8976, "step": 26066 }, { "epoch": 1.4194523289293663, "grad_norm": 0.5131670006623822, "learning_rate": 4.1030444657049894e-05, "loss": 11.8278, "step": 26067 }, { "epoch": 1.4195067829259493, "grad_norm": 0.5906771309699338, "learning_rate": 4.102332303505395e-05, "loss": 11.9589, "step": 26068 }, { "epoch": 1.4195612369225323, "grad_norm": 0.5221242590418391, "learning_rate": 4.1016201871670566e-05, "loss": 11.7565, "step": 26069 }, { "epoch": 1.4196156909191153, "grad_norm": 0.5080847532759977, "learning_rate": 4.10090811669551e-05, "loss": 11.8155, "step": 26070 }, { "epoch": 1.4196701449156985, "grad_norm": 0.6013787933184735, "learning_rate": 4.10019609209629e-05, "loss": 12.0543, "step": 26071 }, { "epoch": 1.4197245989122815, "grad_norm": 0.5656051014291049, "learning_rate": 4.0994841133749316e-05, "loss": 11.9374, "step": 26072 }, { "epoch": 1.4197790529088645, "grad_norm": 0.5337381619939967, "learning_rate": 4.098772180536975e-05, "loss": 11.8808, "step": 26073 }, { "epoch": 1.4198335069054475, "grad_norm": 0.5726554967655715, "learning_rate": 4.098060293587957e-05, "loss": 11.8528, "step": 26074 }, { "epoch": 1.4198879609020305, "grad_norm": 0.6861360714871024, "learning_rate": 4.09734845253341e-05, "loss": 11.9746, "step": 26075 }, { "epoch": 1.4199424148986135, "grad_norm": 0.5730838285555493, "learning_rate": 4.096636657378875e-05, "loss": 11.8877, "step": 26076 }, { "epoch": 1.4199968688951965, "grad_norm": 0.5645379395460011, "learning_rate": 4.09592490812988e-05, "loss": 11.9075, "step": 26077 }, { "epoch": 1.4200513228917795, "grad_norm": 0.5888163244518616, "learning_rate": 4.0952132047919654e-05, "loss": 11.8095, "step": 26078 }, { "epoch": 1.4201057768883625, "grad_norm": 0.5114499624367852, "learning_rate": 4.094501547370661e-05, "loss": 11.8477, "step": 26079 }, { "epoch": 1.4201602308849455, "grad_norm": 0.5129660993821542, "learning_rate": 4.093789935871505e-05, "loss": 11.847, "step": 26080 }, { "epoch": 1.4202146848815285, "grad_norm": 0.5390772687294157, "learning_rate": 4.09307837030003e-05, "loss": 11.9161, "step": 26081 }, { "epoch": 1.4202691388781115, "grad_norm": 0.5380222504227737, "learning_rate": 4.092366850661763e-05, "loss": 11.8749, "step": 26082 }, { "epoch": 1.4203235928746945, "grad_norm": 0.5877424641458174, "learning_rate": 4.0916553769622435e-05, "loss": 11.9353, "step": 26083 }, { "epoch": 1.4203780468712774, "grad_norm": 0.5670860176310908, "learning_rate": 4.0909439492070054e-05, "loss": 11.8937, "step": 26084 }, { "epoch": 1.4204325008678604, "grad_norm": 0.6083928727948095, "learning_rate": 4.090232567401576e-05, "loss": 11.9563, "step": 26085 }, { "epoch": 1.4204869548644437, "grad_norm": 0.5287215448878694, "learning_rate": 4.089521231551492e-05, "loss": 11.8916, "step": 26086 }, { "epoch": 1.4205414088610266, "grad_norm": 0.509914956110607, "learning_rate": 4.088809941662278e-05, "loss": 11.7701, "step": 26087 }, { "epoch": 1.4205958628576096, "grad_norm": 0.6305841139139768, "learning_rate": 4.088098697739473e-05, "loss": 11.903, "step": 26088 }, { "epoch": 1.4206503168541926, "grad_norm": 0.5456601598513064, "learning_rate": 4.087387499788601e-05, "loss": 11.8674, "step": 26089 }, { "epoch": 1.4207047708507756, "grad_norm": 0.5477823649061792, "learning_rate": 4.086676347815198e-05, "loss": 11.8448, "step": 26090 }, { "epoch": 1.4207592248473586, "grad_norm": 0.56747306327434, "learning_rate": 4.0859652418247915e-05, "loss": 11.9006, "step": 26091 }, { "epoch": 1.4208136788439416, "grad_norm": 0.566420837252433, "learning_rate": 4.0852541818229085e-05, "loss": 11.8488, "step": 26092 }, { "epoch": 1.4208681328405246, "grad_norm": 0.5619605691291363, "learning_rate": 4.084543167815084e-05, "loss": 11.9379, "step": 26093 }, { "epoch": 1.4209225868371076, "grad_norm": 0.5967933155463008, "learning_rate": 4.08383219980684e-05, "loss": 11.9458, "step": 26094 }, { "epoch": 1.4209770408336908, "grad_norm": 0.5697095771538786, "learning_rate": 4.083121277803709e-05, "loss": 11.9208, "step": 26095 }, { "epoch": 1.4210314948302738, "grad_norm": 0.5351231640418428, "learning_rate": 4.082410401811222e-05, "loss": 11.8725, "step": 26096 }, { "epoch": 1.4210859488268568, "grad_norm": 0.6268998248893082, "learning_rate": 4.081699571834902e-05, "loss": 11.958, "step": 26097 }, { "epoch": 1.4211404028234398, "grad_norm": 0.5329125590064157, "learning_rate": 4.080988787880281e-05, "loss": 11.8002, "step": 26098 }, { "epoch": 1.4211948568200228, "grad_norm": 0.6251433177643696, "learning_rate": 4.0802780499528804e-05, "loss": 12.0663, "step": 26099 }, { "epoch": 1.4212493108166058, "grad_norm": 0.5002374462493898, "learning_rate": 4.079567358058235e-05, "loss": 11.7771, "step": 26100 }, { "epoch": 1.4213037648131888, "grad_norm": 0.5244976714655124, "learning_rate": 4.078856712201866e-05, "loss": 11.8338, "step": 26101 }, { "epoch": 1.4213582188097718, "grad_norm": 0.5511202951548435, "learning_rate": 4.0781461123892964e-05, "loss": 11.8922, "step": 26102 }, { "epoch": 1.4214126728063547, "grad_norm": 0.5225968468841047, "learning_rate": 4.077435558626059e-05, "loss": 11.8962, "step": 26103 }, { "epoch": 1.4214671268029377, "grad_norm": 0.5870296286405391, "learning_rate": 4.0767250509176734e-05, "loss": 11.9209, "step": 26104 }, { "epoch": 1.4215215807995207, "grad_norm": 0.5483696163688081, "learning_rate": 4.076014589269667e-05, "loss": 11.854, "step": 26105 }, { "epoch": 1.4215760347961037, "grad_norm": 0.5629190365951552, "learning_rate": 4.0753041736875675e-05, "loss": 11.9133, "step": 26106 }, { "epoch": 1.4216304887926867, "grad_norm": 0.5403040801153737, "learning_rate": 4.0745938041768926e-05, "loss": 11.9471, "step": 26107 }, { "epoch": 1.4216849427892697, "grad_norm": 0.6256786439182609, "learning_rate": 4.0738834807431734e-05, "loss": 12.0484, "step": 26108 }, { "epoch": 1.421739396785853, "grad_norm": 0.5477870347909671, "learning_rate": 4.073173203391928e-05, "loss": 11.8385, "step": 26109 }, { "epoch": 1.421793850782436, "grad_norm": 0.5396897595736808, "learning_rate": 4.0724629721286846e-05, "loss": 11.9866, "step": 26110 }, { "epoch": 1.421848304779019, "grad_norm": 0.548165056983725, "learning_rate": 4.071752786958962e-05, "loss": 11.9578, "step": 26111 }, { "epoch": 1.421902758775602, "grad_norm": 0.5067345279142136, "learning_rate": 4.071042647888281e-05, "loss": 11.9181, "step": 26112 }, { "epoch": 1.421957212772185, "grad_norm": 0.5302774756390238, "learning_rate": 4.0703325549221715e-05, "loss": 11.7496, "step": 26113 }, { "epoch": 1.422011666768768, "grad_norm": 0.5373970293476852, "learning_rate": 4.069622508066146e-05, "loss": 11.9389, "step": 26114 }, { "epoch": 1.4220661207653509, "grad_norm": 0.5824346892358903, "learning_rate": 4.068912507325733e-05, "loss": 11.7986, "step": 26115 }, { "epoch": 1.4221205747619339, "grad_norm": 0.5117474109183592, "learning_rate": 4.0682025527064486e-05, "loss": 11.8509, "step": 26116 }, { "epoch": 1.4221750287585169, "grad_norm": 0.5371351655839012, "learning_rate": 4.067492644213815e-05, "loss": 11.8704, "step": 26117 }, { "epoch": 1.4222294827551, "grad_norm": 0.548480832880369, "learning_rate": 4.066782781853358e-05, "loss": 11.9652, "step": 26118 }, { "epoch": 1.422283936751683, "grad_norm": 0.5413346781788357, "learning_rate": 4.066072965630588e-05, "loss": 11.8309, "step": 26119 }, { "epoch": 1.422338390748266, "grad_norm": 0.5582722841388086, "learning_rate": 4.065363195551039e-05, "loss": 11.9146, "step": 26120 }, { "epoch": 1.422392844744849, "grad_norm": 0.6060756787871406, "learning_rate": 4.064653471620213e-05, "loss": 12.0039, "step": 26121 }, { "epoch": 1.422447298741432, "grad_norm": 0.5455678881802932, "learning_rate": 4.0639437938436365e-05, "loss": 11.8754, "step": 26122 }, { "epoch": 1.422501752738015, "grad_norm": 0.5312607281569431, "learning_rate": 4.063234162226832e-05, "loss": 11.8576, "step": 26123 }, { "epoch": 1.422556206734598, "grad_norm": 0.5362934715754557, "learning_rate": 4.062524576775312e-05, "loss": 11.8552, "step": 26124 }, { "epoch": 1.422610660731181, "grad_norm": 0.5407476284695961, "learning_rate": 4.0618150374946e-05, "loss": 11.9082, "step": 26125 }, { "epoch": 1.422665114727764, "grad_norm": 0.5480359721976515, "learning_rate": 4.061105544390205e-05, "loss": 11.9509, "step": 26126 }, { "epoch": 1.422719568724347, "grad_norm": 0.5530294624497818, "learning_rate": 4.060396097467653e-05, "loss": 11.952, "step": 26127 }, { "epoch": 1.42277402272093, "grad_norm": 0.5398715923136687, "learning_rate": 4.0596866967324546e-05, "loss": 11.8422, "step": 26128 }, { "epoch": 1.422828476717513, "grad_norm": 0.4983930270551854, "learning_rate": 4.058977342190128e-05, "loss": 11.8372, "step": 26129 }, { "epoch": 1.422882930714096, "grad_norm": 0.6456253775489371, "learning_rate": 4.058268033846198e-05, "loss": 11.8939, "step": 26130 }, { "epoch": 1.422937384710679, "grad_norm": 0.5009297295174231, "learning_rate": 4.057558771706163e-05, "loss": 11.7764, "step": 26131 }, { "epoch": 1.4229918387072622, "grad_norm": 0.5131048532860858, "learning_rate": 4.056849555775549e-05, "loss": 11.8219, "step": 26132 }, { "epoch": 1.4230462927038452, "grad_norm": 0.5496043825935185, "learning_rate": 4.056140386059873e-05, "loss": 11.8435, "step": 26133 }, { "epoch": 1.4231007467004282, "grad_norm": 0.5249219292855265, "learning_rate": 4.055431262564643e-05, "loss": 11.9183, "step": 26134 }, { "epoch": 1.4231552006970112, "grad_norm": 0.541743106037878, "learning_rate": 4.054722185295379e-05, "loss": 11.886, "step": 26135 }, { "epoch": 1.4232096546935942, "grad_norm": 0.4990124456180477, "learning_rate": 4.0540131542575885e-05, "loss": 11.9758, "step": 26136 }, { "epoch": 1.4232641086901772, "grad_norm": 0.5689709929035197, "learning_rate": 4.0533041694567944e-05, "loss": 12.0183, "step": 26137 }, { "epoch": 1.4233185626867602, "grad_norm": 0.5319986108079289, "learning_rate": 4.0525952308984995e-05, "loss": 11.93, "step": 26138 }, { "epoch": 1.4233730166833432, "grad_norm": 0.5119422973923216, "learning_rate": 4.051886338588221e-05, "loss": 11.8446, "step": 26139 }, { "epoch": 1.4234274706799261, "grad_norm": 0.529031137841042, "learning_rate": 4.051177492531479e-05, "loss": 11.8661, "step": 26140 }, { "epoch": 1.4234819246765094, "grad_norm": 0.5263093224068561, "learning_rate": 4.050468692733772e-05, "loss": 11.9608, "step": 26141 }, { "epoch": 1.4235363786730924, "grad_norm": 0.5701329276668913, "learning_rate": 4.049759939200621e-05, "loss": 11.7892, "step": 26142 }, { "epoch": 1.4235908326696753, "grad_norm": 0.5224754995736903, "learning_rate": 4.049051231937532e-05, "loss": 11.8852, "step": 26143 }, { "epoch": 1.4236452866662583, "grad_norm": 0.6701563146020101, "learning_rate": 4.0483425709500175e-05, "loss": 11.981, "step": 26144 }, { "epoch": 1.4236997406628413, "grad_norm": 0.5642484403765652, "learning_rate": 4.047633956243594e-05, "loss": 11.8037, "step": 26145 }, { "epoch": 1.4237541946594243, "grad_norm": 0.519367438488522, "learning_rate": 4.046925387823762e-05, "loss": 11.8752, "step": 26146 }, { "epoch": 1.4238086486560073, "grad_norm": 0.6012658325997082, "learning_rate": 4.046216865696041e-05, "loss": 11.9951, "step": 26147 }, { "epoch": 1.4238631026525903, "grad_norm": 0.5626756090497997, "learning_rate": 4.045508389865933e-05, "loss": 11.9036, "step": 26148 }, { "epoch": 1.4239175566491733, "grad_norm": 0.5470869524108306, "learning_rate": 4.044799960338953e-05, "loss": 11.8585, "step": 26149 }, { "epoch": 1.4239720106457563, "grad_norm": 0.5231068677699756, "learning_rate": 4.044091577120608e-05, "loss": 11.9079, "step": 26150 }, { "epoch": 1.4240264646423393, "grad_norm": 0.5193856497954444, "learning_rate": 4.043383240216402e-05, "loss": 11.8574, "step": 26151 }, { "epoch": 1.4240809186389223, "grad_norm": 0.5775055306382821, "learning_rate": 4.042674949631849e-05, "loss": 11.8931, "step": 26152 }, { "epoch": 1.4241353726355053, "grad_norm": 0.5401020807876373, "learning_rate": 4.0419667053724533e-05, "loss": 11.8396, "step": 26153 }, { "epoch": 1.4241898266320883, "grad_norm": 0.5886578061616173, "learning_rate": 4.0412585074437214e-05, "loss": 11.8322, "step": 26154 }, { "epoch": 1.4242442806286713, "grad_norm": 0.630354039105725, "learning_rate": 4.040550355851168e-05, "loss": 11.8991, "step": 26155 }, { "epoch": 1.4242987346252545, "grad_norm": 0.5108081604678174, "learning_rate": 4.0398422506002896e-05, "loss": 11.8605, "step": 26156 }, { "epoch": 1.4243531886218375, "grad_norm": 0.5493105690597134, "learning_rate": 4.039134191696602e-05, "loss": 11.8864, "step": 26157 }, { "epoch": 1.4244076426184205, "grad_norm": 0.5939040435766363, "learning_rate": 4.0384261791456024e-05, "loss": 11.8893, "step": 26158 }, { "epoch": 1.4244620966150034, "grad_norm": 0.5848380633593319, "learning_rate": 4.037718212952805e-05, "loss": 11.8828, "step": 26159 }, { "epoch": 1.4245165506115864, "grad_norm": 0.603612017642678, "learning_rate": 4.0370102931237055e-05, "loss": 11.9141, "step": 26160 }, { "epoch": 1.4245710046081694, "grad_norm": 0.6001998674268149, "learning_rate": 4.036302419663819e-05, "loss": 11.8094, "step": 26161 }, { "epoch": 1.4246254586047524, "grad_norm": 0.5414205918706573, "learning_rate": 4.035594592578644e-05, "loss": 11.8223, "step": 26162 }, { "epoch": 1.4246799126013354, "grad_norm": 0.5063186603290926, "learning_rate": 4.034886811873683e-05, "loss": 11.7757, "step": 26163 }, { "epoch": 1.4247343665979186, "grad_norm": 0.5185572655618436, "learning_rate": 4.034179077554445e-05, "loss": 11.8154, "step": 26164 }, { "epoch": 1.4247888205945016, "grad_norm": 0.5204732265212506, "learning_rate": 4.0334713896264285e-05, "loss": 11.7669, "step": 26165 }, { "epoch": 1.4248432745910846, "grad_norm": 0.5523725407549351, "learning_rate": 4.0327637480951384e-05, "loss": 11.8258, "step": 26166 }, { "epoch": 1.4248977285876676, "grad_norm": 0.5470036729652743, "learning_rate": 4.0320561529660816e-05, "loss": 11.8053, "step": 26167 }, { "epoch": 1.4249521825842506, "grad_norm": 0.5282009126328882, "learning_rate": 4.031348604244753e-05, "loss": 11.8806, "step": 26168 }, { "epoch": 1.4250066365808336, "grad_norm": 0.6393707360575117, "learning_rate": 4.030641101936663e-05, "loss": 11.7239, "step": 26169 }, { "epoch": 1.4250610905774166, "grad_norm": 0.49075736873806214, "learning_rate": 4.029933646047306e-05, "loss": 11.8649, "step": 26170 }, { "epoch": 1.4251155445739996, "grad_norm": 0.587060484404055, "learning_rate": 4.0292262365821885e-05, "loss": 11.8673, "step": 26171 }, { "epoch": 1.4251699985705826, "grad_norm": 0.5549308115617355, "learning_rate": 4.0285188735468095e-05, "loss": 11.7866, "step": 26172 }, { "epoch": 1.4252244525671656, "grad_norm": 0.6082517650070302, "learning_rate": 4.0278115569466645e-05, "loss": 12.0176, "step": 26173 }, { "epoch": 1.4252789065637486, "grad_norm": 0.5425571203776478, "learning_rate": 4.027104286787263e-05, "loss": 11.8957, "step": 26174 }, { "epoch": 1.4253333605603316, "grad_norm": 0.5535651829595233, "learning_rate": 4.026397063074097e-05, "loss": 11.6618, "step": 26175 }, { "epoch": 1.4253878145569145, "grad_norm": 0.5535735271816487, "learning_rate": 4.0256898858126726e-05, "loss": 11.8162, "step": 26176 }, { "epoch": 1.4254422685534975, "grad_norm": 0.5490468139436682, "learning_rate": 4.0249827550084815e-05, "loss": 11.8025, "step": 26177 }, { "epoch": 1.4254967225500805, "grad_norm": 0.5263919424235999, "learning_rate": 4.024275670667027e-05, "loss": 11.784, "step": 26178 }, { "epoch": 1.4255511765466637, "grad_norm": 0.6019793120904708, "learning_rate": 4.0235686327938096e-05, "loss": 11.7365, "step": 26179 }, { "epoch": 1.4256056305432467, "grad_norm": 0.5459217845914554, "learning_rate": 4.022861641394322e-05, "loss": 11.8549, "step": 26180 }, { "epoch": 1.4256600845398297, "grad_norm": 0.5721077614789521, "learning_rate": 4.0221546964740684e-05, "loss": 11.8439, "step": 26181 }, { "epoch": 1.4257145385364127, "grad_norm": 0.5334336493246139, "learning_rate": 4.0214477980385427e-05, "loss": 11.9349, "step": 26182 }, { "epoch": 1.4257689925329957, "grad_norm": 0.5811059264341433, "learning_rate": 4.020740946093237e-05, "loss": 11.7375, "step": 26183 }, { "epoch": 1.4258234465295787, "grad_norm": 0.5033816827030118, "learning_rate": 4.0200341406436557e-05, "loss": 11.8887, "step": 26184 }, { "epoch": 1.4258779005261617, "grad_norm": 0.6284397320310644, "learning_rate": 4.019327381695289e-05, "loss": 11.6962, "step": 26185 }, { "epoch": 1.4259323545227447, "grad_norm": 0.529838968932736, "learning_rate": 4.018620669253639e-05, "loss": 11.7569, "step": 26186 }, { "epoch": 1.4259868085193277, "grad_norm": 0.5636641361792095, "learning_rate": 4.0179140033241936e-05, "loss": 11.7074, "step": 26187 }, { "epoch": 1.426041262515911, "grad_norm": 0.5397557261826808, "learning_rate": 4.0172073839124527e-05, "loss": 11.9393, "step": 26188 }, { "epoch": 1.426095716512494, "grad_norm": 0.7155158038755675, "learning_rate": 4.0165008110239144e-05, "loss": 12.053, "step": 26189 }, { "epoch": 1.4261501705090769, "grad_norm": 0.6202564568682207, "learning_rate": 4.015794284664065e-05, "loss": 11.8824, "step": 26190 }, { "epoch": 1.4262046245056599, "grad_norm": 0.5361782999426571, "learning_rate": 4.01508780483841e-05, "loss": 11.8593, "step": 26191 }, { "epoch": 1.4262590785022429, "grad_norm": 0.51339325970096, "learning_rate": 4.0143813715524295e-05, "loss": 11.7959, "step": 26192 }, { "epoch": 1.4263135324988259, "grad_norm": 0.5247329542683532, "learning_rate": 4.013674984811622e-05, "loss": 11.8474, "step": 26193 }, { "epoch": 1.4263679864954089, "grad_norm": 0.5310358060783806, "learning_rate": 4.012968644621487e-05, "loss": 11.8446, "step": 26194 }, { "epoch": 1.4264224404919918, "grad_norm": 0.5698145778999093, "learning_rate": 4.012262350987507e-05, "loss": 11.9861, "step": 26195 }, { "epoch": 1.4264768944885748, "grad_norm": 0.5218084305810632, "learning_rate": 4.011556103915183e-05, "loss": 11.7827, "step": 26196 }, { "epoch": 1.4265313484851578, "grad_norm": 0.5828391582600716, "learning_rate": 4.010849903409999e-05, "loss": 11.8251, "step": 26197 }, { "epoch": 1.4265858024817408, "grad_norm": 0.5927900006822193, "learning_rate": 4.010143749477454e-05, "loss": 11.8264, "step": 26198 }, { "epoch": 1.4266402564783238, "grad_norm": 0.5523033674032058, "learning_rate": 4.0094376421230326e-05, "loss": 11.8712, "step": 26199 }, { "epoch": 1.4266947104749068, "grad_norm": 0.48796831006239316, "learning_rate": 4.0087315813522283e-05, "loss": 11.7577, "step": 26200 }, { "epoch": 1.4267491644714898, "grad_norm": 0.6614840011013181, "learning_rate": 4.00802556717054e-05, "loss": 11.8877, "step": 26201 }, { "epoch": 1.426803618468073, "grad_norm": 0.509287481742404, "learning_rate": 4.0073195995834424e-05, "loss": 11.7933, "step": 26202 }, { "epoch": 1.426858072464656, "grad_norm": 0.5295338816705573, "learning_rate": 4.0066136785964316e-05, "loss": 11.8831, "step": 26203 }, { "epoch": 1.426912526461239, "grad_norm": 0.5209799435848458, "learning_rate": 4.0059078042150024e-05, "loss": 11.6572, "step": 26204 }, { "epoch": 1.426966980457822, "grad_norm": 0.5310987449037925, "learning_rate": 4.0052019764446355e-05, "loss": 11.8869, "step": 26205 }, { "epoch": 1.427021434454405, "grad_norm": 0.5326269055056083, "learning_rate": 4.0044961952908264e-05, "loss": 11.7507, "step": 26206 }, { "epoch": 1.427075888450988, "grad_norm": 0.5432080036948125, "learning_rate": 4.003790460759058e-05, "loss": 11.9245, "step": 26207 }, { "epoch": 1.427130342447571, "grad_norm": 0.5271225762138729, "learning_rate": 4.0030847728548235e-05, "loss": 11.9062, "step": 26208 }, { "epoch": 1.427184796444154, "grad_norm": 0.5309945912159396, "learning_rate": 4.002379131583603e-05, "loss": 11.9904, "step": 26209 }, { "epoch": 1.427239250440737, "grad_norm": 0.5184897846958649, "learning_rate": 4.0016735369508896e-05, "loss": 11.8469, "step": 26210 }, { "epoch": 1.4272937044373202, "grad_norm": 0.5304836564332092, "learning_rate": 4.000967988962176e-05, "loss": 11.8576, "step": 26211 }, { "epoch": 1.4273481584339032, "grad_norm": 0.5057999406033876, "learning_rate": 4.000262487622932e-05, "loss": 11.8753, "step": 26212 }, { "epoch": 1.4274026124304862, "grad_norm": 0.6491732972027163, "learning_rate": 3.999557032938659e-05, "loss": 11.9121, "step": 26213 }, { "epoch": 1.4274570664270692, "grad_norm": 0.5739909975227664, "learning_rate": 3.998851624914832e-05, "loss": 11.9992, "step": 26214 }, { "epoch": 1.4275115204236521, "grad_norm": 0.4940367374012289, "learning_rate": 3.998146263556941e-05, "loss": 11.7942, "step": 26215 }, { "epoch": 1.4275659744202351, "grad_norm": 0.544345263463131, "learning_rate": 3.997440948870476e-05, "loss": 11.7832, "step": 26216 }, { "epoch": 1.4276204284168181, "grad_norm": 0.5316257964238552, "learning_rate": 3.9967356808609126e-05, "loss": 11.9227, "step": 26217 }, { "epoch": 1.4276748824134011, "grad_norm": 0.49500755455490225, "learning_rate": 3.9960304595337414e-05, "loss": 11.8202, "step": 26218 }, { "epoch": 1.4277293364099841, "grad_norm": 0.5218596838066738, "learning_rate": 3.995325284894442e-05, "loss": 11.8303, "step": 26219 }, { "epoch": 1.427783790406567, "grad_norm": 0.529783229415229, "learning_rate": 3.994620156948504e-05, "loss": 11.8972, "step": 26220 }, { "epoch": 1.42783824440315, "grad_norm": 0.5265848539579019, "learning_rate": 3.993915075701407e-05, "loss": 11.8772, "step": 26221 }, { "epoch": 1.427892698399733, "grad_norm": 0.5954826891122208, "learning_rate": 3.9932100411586296e-05, "loss": 11.9406, "step": 26222 }, { "epoch": 1.427947152396316, "grad_norm": 0.5807564782298249, "learning_rate": 3.992505053325662e-05, "loss": 11.7988, "step": 26223 }, { "epoch": 1.428001606392899, "grad_norm": 0.5196498650807065, "learning_rate": 3.9918001122079785e-05, "loss": 11.9653, "step": 26224 }, { "epoch": 1.428056060389482, "grad_norm": 0.5158191391345669, "learning_rate": 3.9910952178110686e-05, "loss": 11.8003, "step": 26225 }, { "epoch": 1.4281105143860653, "grad_norm": 0.5334710572942586, "learning_rate": 3.9903903701404054e-05, "loss": 11.8635, "step": 26226 }, { "epoch": 1.4281649683826483, "grad_norm": 0.5423864235929465, "learning_rate": 3.9896855692014755e-05, "loss": 11.8995, "step": 26227 }, { "epoch": 1.4282194223792313, "grad_norm": 0.5497611115481867, "learning_rate": 3.988980814999763e-05, "loss": 11.9004, "step": 26228 }, { "epoch": 1.4282738763758143, "grad_norm": 0.611807334997668, "learning_rate": 3.9882761075407396e-05, "loss": 11.6557, "step": 26229 }, { "epoch": 1.4283283303723973, "grad_norm": 0.5336473333260907, "learning_rate": 3.9875714468298916e-05, "loss": 11.7722, "step": 26230 }, { "epoch": 1.4283827843689803, "grad_norm": 0.5725208809146682, "learning_rate": 3.9868668328726974e-05, "loss": 11.9451, "step": 26231 }, { "epoch": 1.4284372383655632, "grad_norm": 0.5715020903163366, "learning_rate": 3.986162265674632e-05, "loss": 11.9609, "step": 26232 }, { "epoch": 1.4284916923621462, "grad_norm": 0.6011111369036927, "learning_rate": 3.98545774524118e-05, "loss": 11.9879, "step": 26233 }, { "epoch": 1.4285461463587295, "grad_norm": 0.5837201780273968, "learning_rate": 3.9847532715778147e-05, "loss": 11.8455, "step": 26234 }, { "epoch": 1.4286006003553124, "grad_norm": 0.5816848722841986, "learning_rate": 3.984048844690019e-05, "loss": 11.9353, "step": 26235 }, { "epoch": 1.4286550543518954, "grad_norm": 0.5700366504916987, "learning_rate": 3.983344464583266e-05, "loss": 11.9536, "step": 26236 }, { "epoch": 1.4287095083484784, "grad_norm": 0.539345981134819, "learning_rate": 3.9826401312630345e-05, "loss": 11.7942, "step": 26237 }, { "epoch": 1.4287639623450614, "grad_norm": 0.5475082507150271, "learning_rate": 3.981935844734806e-05, "loss": 11.7893, "step": 26238 }, { "epoch": 1.4288184163416444, "grad_norm": 0.5207017972419579, "learning_rate": 3.981231605004051e-05, "loss": 11.8262, "step": 26239 }, { "epoch": 1.4288728703382274, "grad_norm": 0.5933324562536302, "learning_rate": 3.9805274120762516e-05, "loss": 11.9662, "step": 26240 }, { "epoch": 1.4289273243348104, "grad_norm": 0.5644267583206891, "learning_rate": 3.97982326595688e-05, "loss": 11.8991, "step": 26241 }, { "epoch": 1.4289817783313934, "grad_norm": 0.5959134431198703, "learning_rate": 3.9791191666514093e-05, "loss": 11.891, "step": 26242 }, { "epoch": 1.4290362323279764, "grad_norm": 0.5069766114074111, "learning_rate": 3.9784151141653206e-05, "loss": 11.83, "step": 26243 }, { "epoch": 1.4290906863245594, "grad_norm": 0.500081980322355, "learning_rate": 3.977711108504083e-05, "loss": 11.8952, "step": 26244 }, { "epoch": 1.4291451403211424, "grad_norm": 0.5509883593483207, "learning_rate": 3.977007149673177e-05, "loss": 11.9447, "step": 26245 }, { "epoch": 1.4291995943177254, "grad_norm": 0.5105273151697763, "learning_rate": 3.97630323767807e-05, "loss": 11.8086, "step": 26246 }, { "epoch": 1.4292540483143084, "grad_norm": 0.5205523131391283, "learning_rate": 3.9755993725242414e-05, "loss": 11.9408, "step": 26247 }, { "epoch": 1.4293085023108913, "grad_norm": 0.499158442176314, "learning_rate": 3.9748955542171605e-05, "loss": 11.8502, "step": 26248 }, { "epoch": 1.4293629563074746, "grad_norm": 0.4944646452886164, "learning_rate": 3.9741917827623024e-05, "loss": 11.9085, "step": 26249 }, { "epoch": 1.4294174103040576, "grad_norm": 0.5931700142739695, "learning_rate": 3.973488058165141e-05, "loss": 11.7838, "step": 26250 }, { "epoch": 1.4294718643006405, "grad_norm": 0.5208407011360756, "learning_rate": 3.972784380431149e-05, "loss": 11.7544, "step": 26251 }, { "epoch": 1.4295263182972235, "grad_norm": 0.5181320276905301, "learning_rate": 3.972080749565792e-05, "loss": 11.8213, "step": 26252 }, { "epoch": 1.4295807722938065, "grad_norm": 0.5528504837602145, "learning_rate": 3.9713771655745504e-05, "loss": 11.8798, "step": 26253 }, { "epoch": 1.4296352262903895, "grad_norm": 0.5721125616895366, "learning_rate": 3.970673628462886e-05, "loss": 11.8666, "step": 26254 }, { "epoch": 1.4296896802869725, "grad_norm": 0.5634194346236857, "learning_rate": 3.96997013823628e-05, "loss": 11.809, "step": 26255 }, { "epoch": 1.4297441342835555, "grad_norm": 0.5339333376747236, "learning_rate": 3.9692666949001925e-05, "loss": 11.8948, "step": 26256 }, { "epoch": 1.4297985882801385, "grad_norm": 0.6087385653395961, "learning_rate": 3.968563298460102e-05, "loss": 11.8149, "step": 26257 }, { "epoch": 1.4298530422767217, "grad_norm": 0.4913557122914008, "learning_rate": 3.967859948921472e-05, "loss": 11.7974, "step": 26258 }, { "epoch": 1.4299074962733047, "grad_norm": 0.5510039420406786, "learning_rate": 3.9671566462897734e-05, "loss": 11.9316, "step": 26259 }, { "epoch": 1.4299619502698877, "grad_norm": 0.6037638777716818, "learning_rate": 3.9664533905704815e-05, "loss": 11.9781, "step": 26260 }, { "epoch": 1.4300164042664707, "grad_norm": 0.5375655881978909, "learning_rate": 3.965750181769059e-05, "loss": 11.9076, "step": 26261 }, { "epoch": 1.4300708582630537, "grad_norm": 0.5605225482339553, "learning_rate": 3.965047019890975e-05, "loss": 11.898, "step": 26262 }, { "epoch": 1.4301253122596367, "grad_norm": 0.5463242407483168, "learning_rate": 3.964343904941694e-05, "loss": 11.8452, "step": 26263 }, { "epoch": 1.4301797662562197, "grad_norm": 0.5270700342324047, "learning_rate": 3.9636408369266874e-05, "loss": 11.8032, "step": 26264 }, { "epoch": 1.4302342202528027, "grad_norm": 0.5304016085082199, "learning_rate": 3.962937815851425e-05, "loss": 11.7853, "step": 26265 }, { "epoch": 1.4302886742493857, "grad_norm": 0.5134334150262553, "learning_rate": 3.9622348417213674e-05, "loss": 11.8255, "step": 26266 }, { "epoch": 1.4303431282459687, "grad_norm": 0.5402625393626279, "learning_rate": 3.961531914541987e-05, "loss": 11.863, "step": 26267 }, { "epoch": 1.4303975822425516, "grad_norm": 0.5673066880455563, "learning_rate": 3.960829034318745e-05, "loss": 11.7563, "step": 26268 }, { "epoch": 1.4304520362391346, "grad_norm": 0.5133852115553672, "learning_rate": 3.960126201057112e-05, "loss": 11.8093, "step": 26269 }, { "epoch": 1.4305064902357176, "grad_norm": 0.5717736604225998, "learning_rate": 3.959423414762546e-05, "loss": 11.9245, "step": 26270 }, { "epoch": 1.4305609442323006, "grad_norm": 0.5379993862072009, "learning_rate": 3.9587206754405215e-05, "loss": 11.7369, "step": 26271 }, { "epoch": 1.4306153982288838, "grad_norm": 0.5560623358336024, "learning_rate": 3.958017983096497e-05, "loss": 11.9136, "step": 26272 }, { "epoch": 1.4306698522254668, "grad_norm": 0.5375427123651487, "learning_rate": 3.957315337735935e-05, "loss": 11.7954, "step": 26273 }, { "epoch": 1.4307243062220498, "grad_norm": 0.5535774098794369, "learning_rate": 3.956612739364306e-05, "loss": 11.7769, "step": 26274 }, { "epoch": 1.4307787602186328, "grad_norm": 0.5391283028540276, "learning_rate": 3.955910187987066e-05, "loss": 11.9049, "step": 26275 }, { "epoch": 1.4308332142152158, "grad_norm": 0.5617764524621167, "learning_rate": 3.955207683609682e-05, "loss": 11.8965, "step": 26276 }, { "epoch": 1.4308876682117988, "grad_norm": 0.561202540887149, "learning_rate": 3.9545052262376205e-05, "loss": 11.9329, "step": 26277 }, { "epoch": 1.4309421222083818, "grad_norm": 0.5495423846866626, "learning_rate": 3.953802815876336e-05, "loss": 11.8629, "step": 26278 }, { "epoch": 1.4309965762049648, "grad_norm": 0.6215470131896877, "learning_rate": 3.9531004525312984e-05, "loss": 12.0592, "step": 26279 }, { "epoch": 1.4310510302015478, "grad_norm": 0.5042639495011494, "learning_rate": 3.9523981362079633e-05, "loss": 11.8585, "step": 26280 }, { "epoch": 1.431105484198131, "grad_norm": 0.5553416594210632, "learning_rate": 3.951695866911798e-05, "loss": 11.7984, "step": 26281 }, { "epoch": 1.431159938194714, "grad_norm": 0.5139239586395046, "learning_rate": 3.9509936446482584e-05, "loss": 11.9491, "step": 26282 }, { "epoch": 1.431214392191297, "grad_norm": 0.5871534059786202, "learning_rate": 3.9502914694228043e-05, "loss": 11.9945, "step": 26283 }, { "epoch": 1.43126884618788, "grad_norm": 0.5534052479716304, "learning_rate": 3.9495893412409015e-05, "loss": 11.8266, "step": 26284 }, { "epoch": 1.431323300184463, "grad_norm": 0.5597620717468003, "learning_rate": 3.948887260108003e-05, "loss": 11.9313, "step": 26285 }, { "epoch": 1.431377754181046, "grad_norm": 0.5563427268608134, "learning_rate": 3.948185226029571e-05, "loss": 11.8254, "step": 26286 }, { "epoch": 1.431432208177629, "grad_norm": 0.5759137649375125, "learning_rate": 3.9474832390110705e-05, "loss": 11.9367, "step": 26287 }, { "epoch": 1.431486662174212, "grad_norm": 0.49215373796281114, "learning_rate": 3.9467812990579514e-05, "loss": 11.8003, "step": 26288 }, { "epoch": 1.431541116170795, "grad_norm": 0.5960469170428271, "learning_rate": 3.94607940617568e-05, "loss": 11.9812, "step": 26289 }, { "epoch": 1.431595570167378, "grad_norm": 0.5400588528318553, "learning_rate": 3.945377560369706e-05, "loss": 11.8169, "step": 26290 }, { "epoch": 1.431650024163961, "grad_norm": 0.5116560040426905, "learning_rate": 3.944675761645495e-05, "loss": 11.8077, "step": 26291 }, { "epoch": 1.431704478160544, "grad_norm": 0.5938105752669841, "learning_rate": 3.9439740100085024e-05, "loss": 11.8301, "step": 26292 }, { "epoch": 1.431758932157127, "grad_norm": 0.5676136162209637, "learning_rate": 3.9432723054641786e-05, "loss": 11.7587, "step": 26293 }, { "epoch": 1.43181338615371, "grad_norm": 0.6049528042386776, "learning_rate": 3.942570648017988e-05, "loss": 11.9591, "step": 26294 }, { "epoch": 1.431867840150293, "grad_norm": 0.5602248595932456, "learning_rate": 3.9418690376753806e-05, "loss": 11.8574, "step": 26295 }, { "epoch": 1.431922294146876, "grad_norm": 0.5735933686420074, "learning_rate": 3.94116747444182e-05, "loss": 11.9627, "step": 26296 }, { "epoch": 1.431976748143459, "grad_norm": 0.5344121507458838, "learning_rate": 3.940465958322753e-05, "loss": 11.8407, "step": 26297 }, { "epoch": 1.432031202140042, "grad_norm": 0.5387036759812672, "learning_rate": 3.9397644893236396e-05, "loss": 11.941, "step": 26298 }, { "epoch": 1.432085656136625, "grad_norm": 0.5520957462342303, "learning_rate": 3.939063067449936e-05, "loss": 11.83, "step": 26299 }, { "epoch": 1.432140110133208, "grad_norm": 0.5798661274176112, "learning_rate": 3.9383616927070924e-05, "loss": 11.9121, "step": 26300 }, { "epoch": 1.432194564129791, "grad_norm": 0.5537868892540694, "learning_rate": 3.937660365100567e-05, "loss": 12.0007, "step": 26301 }, { "epoch": 1.432249018126374, "grad_norm": 0.6438595153869893, "learning_rate": 3.9369590846358115e-05, "loss": 11.8297, "step": 26302 }, { "epoch": 1.432303472122957, "grad_norm": 0.5848865296047472, "learning_rate": 3.9362578513182766e-05, "loss": 12.022, "step": 26303 }, { "epoch": 1.4323579261195403, "grad_norm": 0.5646558160250067, "learning_rate": 3.9355566651534206e-05, "loss": 11.7747, "step": 26304 }, { "epoch": 1.4324123801161233, "grad_norm": 0.5127284651778233, "learning_rate": 3.934855526146689e-05, "loss": 11.8638, "step": 26305 }, { "epoch": 1.4324668341127063, "grad_norm": 0.5319451536490147, "learning_rate": 3.934154434303541e-05, "loss": 11.9292, "step": 26306 }, { "epoch": 1.4325212881092892, "grad_norm": 0.546971837514057, "learning_rate": 3.9334533896294226e-05, "loss": 11.9256, "step": 26307 }, { "epoch": 1.4325757421058722, "grad_norm": 0.5490548675384516, "learning_rate": 3.9327523921297884e-05, "loss": 11.8369, "step": 26308 }, { "epoch": 1.4326301961024552, "grad_norm": 0.5982828939345142, "learning_rate": 3.932051441810092e-05, "loss": 11.8974, "step": 26309 }, { "epoch": 1.4326846500990382, "grad_norm": 0.5310811719517157, "learning_rate": 3.931350538675777e-05, "loss": 11.8493, "step": 26310 }, { "epoch": 1.4327391040956212, "grad_norm": 0.5493572506616808, "learning_rate": 3.930649682732302e-05, "loss": 11.7759, "step": 26311 }, { "epoch": 1.4327935580922042, "grad_norm": 0.5325032859555068, "learning_rate": 3.929948873985113e-05, "loss": 11.9618, "step": 26312 }, { "epoch": 1.4328480120887872, "grad_norm": 0.5283287961537859, "learning_rate": 3.9292481124396565e-05, "loss": 11.8385, "step": 26313 }, { "epoch": 1.4329024660853702, "grad_norm": 0.5244158558104707, "learning_rate": 3.9285473981013876e-05, "loss": 11.6144, "step": 26314 }, { "epoch": 1.4329569200819532, "grad_norm": 0.5108402266768652, "learning_rate": 3.9278467309757485e-05, "loss": 11.8209, "step": 26315 }, { "epoch": 1.4330113740785362, "grad_norm": 0.5963943287252947, "learning_rate": 3.927146111068196e-05, "loss": 11.8848, "step": 26316 }, { "epoch": 1.4330658280751192, "grad_norm": 0.5553531840371618, "learning_rate": 3.9264455383841694e-05, "loss": 11.8934, "step": 26317 }, { "epoch": 1.4331202820717022, "grad_norm": 0.5009744804337983, "learning_rate": 3.925745012929125e-05, "loss": 11.81, "step": 26318 }, { "epoch": 1.4331747360682854, "grad_norm": 0.5576770648042227, "learning_rate": 3.925044534708502e-05, "loss": 11.8416, "step": 26319 }, { "epoch": 1.4332291900648684, "grad_norm": 0.5287430877263224, "learning_rate": 3.924344103727752e-05, "loss": 11.9291, "step": 26320 }, { "epoch": 1.4332836440614514, "grad_norm": 0.5399687620022304, "learning_rate": 3.923643719992324e-05, "loss": 11.9051, "step": 26321 }, { "epoch": 1.4333380980580344, "grad_norm": 0.553287743124179, "learning_rate": 3.922943383507662e-05, "loss": 11.6551, "step": 26322 }, { "epoch": 1.4333925520546174, "grad_norm": 0.5524811046205452, "learning_rate": 3.922243094279211e-05, "loss": 11.6851, "step": 26323 }, { "epoch": 1.4334470060512003, "grad_norm": 0.5337478631145688, "learning_rate": 3.9215428523124134e-05, "loss": 11.9709, "step": 26324 }, { "epoch": 1.4335014600477833, "grad_norm": 0.5352497624881862, "learning_rate": 3.920842657612718e-05, "loss": 11.8142, "step": 26325 }, { "epoch": 1.4335559140443663, "grad_norm": 0.5726735856035453, "learning_rate": 3.9201425101855734e-05, "loss": 11.9576, "step": 26326 }, { "epoch": 1.4336103680409493, "grad_norm": 0.5719626561301574, "learning_rate": 3.9194424100364166e-05, "loss": 11.8859, "step": 26327 }, { "epoch": 1.4336648220375325, "grad_norm": 0.515771477977669, "learning_rate": 3.918742357170698e-05, "loss": 11.848, "step": 26328 }, { "epoch": 1.4337192760341155, "grad_norm": 0.5041864435515397, "learning_rate": 3.918042351593857e-05, "loss": 11.9247, "step": 26329 }, { "epoch": 1.4337737300306985, "grad_norm": 0.5749954366515224, "learning_rate": 3.9173423933113405e-05, "loss": 11.7697, "step": 26330 }, { "epoch": 1.4338281840272815, "grad_norm": 0.5694306716716669, "learning_rate": 3.916642482328586e-05, "loss": 11.9713, "step": 26331 }, { "epoch": 1.4338826380238645, "grad_norm": 0.579256836282674, "learning_rate": 3.915942618651045e-05, "loss": 11.896, "step": 26332 }, { "epoch": 1.4339370920204475, "grad_norm": 0.5563756159888272, "learning_rate": 3.915242802284152e-05, "loss": 11.9364, "step": 26333 }, { "epoch": 1.4339915460170305, "grad_norm": 0.6281289022665371, "learning_rate": 3.91454303323335e-05, "loss": 11.9363, "step": 26334 }, { "epoch": 1.4340460000136135, "grad_norm": 0.5904478348938093, "learning_rate": 3.9138433115040804e-05, "loss": 11.7967, "step": 26335 }, { "epoch": 1.4341004540101965, "grad_norm": 0.5799892327547969, "learning_rate": 3.91314363710179e-05, "loss": 11.8623, "step": 26336 }, { "epoch": 1.4341549080067795, "grad_norm": 0.5405389470553, "learning_rate": 3.9124440100319123e-05, "loss": 11.9452, "step": 26337 }, { "epoch": 1.4342093620033625, "grad_norm": 0.6783720474388546, "learning_rate": 3.911744430299895e-05, "loss": 11.9873, "step": 26338 }, { "epoch": 1.4342638159999455, "grad_norm": 0.6327982039133199, "learning_rate": 3.911044897911169e-05, "loss": 11.9228, "step": 26339 }, { "epoch": 1.4343182699965284, "grad_norm": 0.5590534285869779, "learning_rate": 3.910345412871184e-05, "loss": 11.891, "step": 26340 }, { "epoch": 1.4343727239931114, "grad_norm": 0.5534170392644077, "learning_rate": 3.90964597518537e-05, "loss": 11.9337, "step": 26341 }, { "epoch": 1.4344271779896947, "grad_norm": 0.5288421244190704, "learning_rate": 3.9089465848591735e-05, "loss": 11.8466, "step": 26342 }, { "epoch": 1.4344816319862776, "grad_norm": 0.5962069198937746, "learning_rate": 3.90824724189803e-05, "loss": 11.8624, "step": 26343 }, { "epoch": 1.4345360859828606, "grad_norm": 0.5750638101000574, "learning_rate": 3.907547946307374e-05, "loss": 11.9572, "step": 26344 }, { "epoch": 1.4345905399794436, "grad_norm": 0.5350056698965392, "learning_rate": 3.90684869809265e-05, "loss": 11.9767, "step": 26345 }, { "epoch": 1.4346449939760266, "grad_norm": 0.5163007456316633, "learning_rate": 3.906149497259289e-05, "loss": 11.7562, "step": 26346 }, { "epoch": 1.4346994479726096, "grad_norm": 0.5973698289624069, "learning_rate": 3.905450343812732e-05, "loss": 11.8655, "step": 26347 }, { "epoch": 1.4347539019691926, "grad_norm": 0.5816683959688256, "learning_rate": 3.904751237758418e-05, "loss": 12.0081, "step": 26348 }, { "epoch": 1.4348083559657756, "grad_norm": 0.5248681375402489, "learning_rate": 3.904052179101778e-05, "loss": 11.8766, "step": 26349 }, { "epoch": 1.4348628099623586, "grad_norm": 0.5227799303859438, "learning_rate": 3.9033531678482535e-05, "loss": 11.8141, "step": 26350 }, { "epoch": 1.4349172639589418, "grad_norm": 0.6169144903374221, "learning_rate": 3.9026542040032733e-05, "loss": 11.8988, "step": 26351 }, { "epoch": 1.4349717179555248, "grad_norm": 0.5674696223899558, "learning_rate": 3.901955287572281e-05, "loss": 11.9535, "step": 26352 }, { "epoch": 1.4350261719521078, "grad_norm": 0.5495261823381598, "learning_rate": 3.901256418560706e-05, "loss": 11.7811, "step": 26353 }, { "epoch": 1.4350806259486908, "grad_norm": 0.5634648186678949, "learning_rate": 3.900557596973981e-05, "loss": 11.9454, "step": 26354 }, { "epoch": 1.4351350799452738, "grad_norm": 0.5225187981987457, "learning_rate": 3.899858822817546e-05, "loss": 11.8261, "step": 26355 }, { "epoch": 1.4351895339418568, "grad_norm": 0.4900501069485667, "learning_rate": 3.8991600960968285e-05, "loss": 11.8859, "step": 26356 }, { "epoch": 1.4352439879384398, "grad_norm": 0.5506176697346621, "learning_rate": 3.898461416817265e-05, "loss": 11.9595, "step": 26357 }, { "epoch": 1.4352984419350228, "grad_norm": 0.5510950018951903, "learning_rate": 3.8977627849842926e-05, "loss": 11.9026, "step": 26358 }, { "epoch": 1.4353528959316058, "grad_norm": 0.5596867563777409, "learning_rate": 3.8970642006033366e-05, "loss": 11.9269, "step": 26359 }, { "epoch": 1.4354073499281887, "grad_norm": 0.5554123574041167, "learning_rate": 3.896365663679836e-05, "loss": 11.6873, "step": 26360 }, { "epoch": 1.4354618039247717, "grad_norm": 0.4755923426377247, "learning_rate": 3.895667174219216e-05, "loss": 11.8354, "step": 26361 }, { "epoch": 1.4355162579213547, "grad_norm": 0.5352883198171227, "learning_rate": 3.894968732226916e-05, "loss": 11.7626, "step": 26362 }, { "epoch": 1.4355707119179377, "grad_norm": 0.5174955441146005, "learning_rate": 3.8942703377083636e-05, "loss": 11.836, "step": 26363 }, { "epoch": 1.4356251659145207, "grad_norm": 0.5788548460555466, "learning_rate": 3.8935719906689836e-05, "loss": 11.9306, "step": 26364 }, { "epoch": 1.435679619911104, "grad_norm": 0.5036639279723619, "learning_rate": 3.892873691114216e-05, "loss": 11.9272, "step": 26365 }, { "epoch": 1.435734073907687, "grad_norm": 0.5320876843083453, "learning_rate": 3.892175439049484e-05, "loss": 11.7054, "step": 26366 }, { "epoch": 1.43578852790427, "grad_norm": 0.5157691623536944, "learning_rate": 3.891477234480223e-05, "loss": 11.9383, "step": 26367 }, { "epoch": 1.435842981900853, "grad_norm": 0.5954104878448915, "learning_rate": 3.8907790774118555e-05, "loss": 11.7849, "step": 26368 }, { "epoch": 1.435897435897436, "grad_norm": 0.6063313458480252, "learning_rate": 3.8900809678498155e-05, "loss": 11.6839, "step": 26369 }, { "epoch": 1.435951889894019, "grad_norm": 0.5522728385786745, "learning_rate": 3.8893829057995326e-05, "loss": 11.861, "step": 26370 }, { "epoch": 1.4360063438906019, "grad_norm": 0.5285990004560783, "learning_rate": 3.8886848912664306e-05, "loss": 11.8667, "step": 26371 }, { "epoch": 1.4360607978871849, "grad_norm": 0.5034565597361829, "learning_rate": 3.887986924255946e-05, "loss": 11.7953, "step": 26372 }, { "epoch": 1.4361152518837679, "grad_norm": 0.5746892136454145, "learning_rate": 3.887289004773493e-05, "loss": 11.9137, "step": 26373 }, { "epoch": 1.436169705880351, "grad_norm": 0.6797035662605884, "learning_rate": 3.886591132824506e-05, "loss": 11.9601, "step": 26374 }, { "epoch": 1.436224159876934, "grad_norm": 0.6305808313574126, "learning_rate": 3.885893308414417e-05, "loss": 11.9089, "step": 26375 }, { "epoch": 1.436278613873517, "grad_norm": 0.5573557777157498, "learning_rate": 3.885195531548641e-05, "loss": 11.8543, "step": 26376 }, { "epoch": 1.4363330678701, "grad_norm": 0.6008072377443651, "learning_rate": 3.884497802232614e-05, "loss": 11.9674, "step": 26377 }, { "epoch": 1.436387521866683, "grad_norm": 0.5162928348344781, "learning_rate": 3.883800120471754e-05, "loss": 11.8639, "step": 26378 }, { "epoch": 1.436441975863266, "grad_norm": 0.5573660627832177, "learning_rate": 3.883102486271495e-05, "loss": 11.9008, "step": 26379 }, { "epoch": 1.436496429859849, "grad_norm": 0.6070489743820368, "learning_rate": 3.882404899637252e-05, "loss": 11.9311, "step": 26380 }, { "epoch": 1.436550883856432, "grad_norm": 0.5135783551282166, "learning_rate": 3.8817073605744544e-05, "loss": 11.8667, "step": 26381 }, { "epoch": 1.436605337853015, "grad_norm": 0.5885763562446317, "learning_rate": 3.881009869088534e-05, "loss": 11.8776, "step": 26382 }, { "epoch": 1.436659791849598, "grad_norm": 0.5819358618645047, "learning_rate": 3.8803124251849e-05, "loss": 11.7464, "step": 26383 }, { "epoch": 1.436714245846181, "grad_norm": 0.525154708840969, "learning_rate": 3.8796150288689824e-05, "loss": 11.8576, "step": 26384 }, { "epoch": 1.436768699842764, "grad_norm": 0.5921139619493957, "learning_rate": 3.878917680146208e-05, "loss": 11.8638, "step": 26385 }, { "epoch": 1.436823153839347, "grad_norm": 0.718740564888115, "learning_rate": 3.878220379021993e-05, "loss": 11.9646, "step": 26386 }, { "epoch": 1.43687760783593, "grad_norm": 0.5153265648328553, "learning_rate": 3.877523125501767e-05, "loss": 11.9061, "step": 26387 }, { "epoch": 1.436932061832513, "grad_norm": 0.5316327421807723, "learning_rate": 3.876825919590944e-05, "loss": 11.9404, "step": 26388 }, { "epoch": 1.4369865158290962, "grad_norm": 0.5156205903919038, "learning_rate": 3.8761287612949526e-05, "loss": 11.9662, "step": 26389 }, { "epoch": 1.4370409698256792, "grad_norm": 0.5184465002568762, "learning_rate": 3.875431650619208e-05, "loss": 11.8669, "step": 26390 }, { "epoch": 1.4370954238222622, "grad_norm": 0.5099609041209606, "learning_rate": 3.874734587569134e-05, "loss": 11.8613, "step": 26391 }, { "epoch": 1.4371498778188452, "grad_norm": 0.5144846205085002, "learning_rate": 3.874037572150158e-05, "loss": 11.7146, "step": 26392 }, { "epoch": 1.4372043318154282, "grad_norm": 0.5811415516372281, "learning_rate": 3.8733406043676855e-05, "loss": 11.8797, "step": 26393 }, { "epoch": 1.4372587858120112, "grad_norm": 0.5806101047973045, "learning_rate": 3.8726436842271485e-05, "loss": 11.9977, "step": 26394 }, { "epoch": 1.4373132398085942, "grad_norm": 0.5283532018391751, "learning_rate": 3.8719468117339576e-05, "loss": 11.8021, "step": 26395 }, { "epoch": 1.4373676938051771, "grad_norm": 0.5078686479502453, "learning_rate": 3.871249986893536e-05, "loss": 11.9617, "step": 26396 }, { "epoch": 1.4374221478017604, "grad_norm": 0.6796389836641767, "learning_rate": 3.8705532097113064e-05, "loss": 11.91, "step": 26397 }, { "epoch": 1.4374766017983434, "grad_norm": 0.5635833071322215, "learning_rate": 3.869856480192679e-05, "loss": 11.8082, "step": 26398 }, { "epoch": 1.4375310557949263, "grad_norm": 0.6260701241950422, "learning_rate": 3.8691597983430784e-05, "loss": 12.012, "step": 26399 }, { "epoch": 1.4375855097915093, "grad_norm": 0.5743393146103634, "learning_rate": 3.868463164167916e-05, "loss": 11.9071, "step": 26400 }, { "epoch": 1.4376399637880923, "grad_norm": 0.5868376642187261, "learning_rate": 3.867766577672617e-05, "loss": 11.9936, "step": 26401 }, { "epoch": 1.4376944177846753, "grad_norm": 0.6182174557983814, "learning_rate": 3.867070038862592e-05, "loss": 11.759, "step": 26402 }, { "epoch": 1.4377488717812583, "grad_norm": 0.5351313167747331, "learning_rate": 3.866373547743256e-05, "loss": 11.7878, "step": 26403 }, { "epoch": 1.4378033257778413, "grad_norm": 0.5716894709738916, "learning_rate": 3.8656771043200327e-05, "loss": 11.9386, "step": 26404 }, { "epoch": 1.4378577797744243, "grad_norm": 0.5221652512282031, "learning_rate": 3.864980708598328e-05, "loss": 11.8306, "step": 26405 }, { "epoch": 1.4379122337710073, "grad_norm": 0.7000787175088273, "learning_rate": 3.864284360583562e-05, "loss": 11.7572, "step": 26406 }, { "epoch": 1.4379666877675903, "grad_norm": 0.5853533856279501, "learning_rate": 3.8635880602811535e-05, "loss": 11.7812, "step": 26407 }, { "epoch": 1.4380211417641733, "grad_norm": 0.5571842516686745, "learning_rate": 3.86289180769651e-05, "loss": 11.8221, "step": 26408 }, { "epoch": 1.4380755957607563, "grad_norm": 0.6136674336037315, "learning_rate": 3.862195602835053e-05, "loss": 11.9586, "step": 26409 }, { "epoch": 1.4381300497573393, "grad_norm": 0.492538472415326, "learning_rate": 3.861499445702188e-05, "loss": 11.771, "step": 26410 }, { "epoch": 1.4381845037539223, "grad_norm": 0.6849348467466794, "learning_rate": 3.860803336303337e-05, "loss": 11.8212, "step": 26411 }, { "epoch": 1.4382389577505055, "grad_norm": 0.567441722797438, "learning_rate": 3.860107274643908e-05, "loss": 11.9102, "step": 26412 }, { "epoch": 1.4382934117470885, "grad_norm": 0.5350889633818435, "learning_rate": 3.859411260729311e-05, "loss": 11.7683, "step": 26413 }, { "epoch": 1.4383478657436715, "grad_norm": 0.5794163374270717, "learning_rate": 3.8587152945649664e-05, "loss": 11.9231, "step": 26414 }, { "epoch": 1.4384023197402545, "grad_norm": 0.5222541612633167, "learning_rate": 3.8580193761562764e-05, "loss": 11.9064, "step": 26415 }, { "epoch": 1.4384567737368374, "grad_norm": 0.5449803229054676, "learning_rate": 3.857323505508663e-05, "loss": 11.8881, "step": 26416 }, { "epoch": 1.4385112277334204, "grad_norm": 0.5909731847978851, "learning_rate": 3.856627682627527e-05, "loss": 11.8117, "step": 26417 }, { "epoch": 1.4385656817300034, "grad_norm": 0.5943101138668914, "learning_rate": 3.8559319075182855e-05, "loss": 11.8565, "step": 26418 }, { "epoch": 1.4386201357265864, "grad_norm": 0.48160549429955773, "learning_rate": 3.855236180186351e-05, "loss": 11.8283, "step": 26419 }, { "epoch": 1.4386745897231694, "grad_norm": 0.5934331987570598, "learning_rate": 3.854540500637127e-05, "loss": 11.9252, "step": 26420 }, { "epoch": 1.4387290437197526, "grad_norm": 0.5368310818555759, "learning_rate": 3.853844868876031e-05, "loss": 11.8327, "step": 26421 }, { "epoch": 1.4387834977163356, "grad_norm": 0.5534315381139365, "learning_rate": 3.853149284908466e-05, "loss": 11.8371, "step": 26422 }, { "epoch": 1.4388379517129186, "grad_norm": 0.5518777788488775, "learning_rate": 3.852453748739841e-05, "loss": 11.9097, "step": 26423 }, { "epoch": 1.4388924057095016, "grad_norm": 0.5599390786515479, "learning_rate": 3.8517582603755696e-05, "loss": 12.0061, "step": 26424 }, { "epoch": 1.4389468597060846, "grad_norm": 0.5794946414300994, "learning_rate": 3.851062819821054e-05, "loss": 11.8319, "step": 26425 }, { "epoch": 1.4390013137026676, "grad_norm": 0.5266736447050263, "learning_rate": 3.850367427081708e-05, "loss": 11.7306, "step": 26426 }, { "epoch": 1.4390557676992506, "grad_norm": 0.5496319407583486, "learning_rate": 3.8496720821629326e-05, "loss": 11.915, "step": 26427 }, { "epoch": 1.4391102216958336, "grad_norm": 0.5414984675723962, "learning_rate": 3.848976785070143e-05, "loss": 11.8731, "step": 26428 }, { "epoch": 1.4391646756924166, "grad_norm": 0.5293628663317904, "learning_rate": 3.848281535808738e-05, "loss": 11.7896, "step": 26429 }, { "epoch": 1.4392191296889996, "grad_norm": 0.6275777157507865, "learning_rate": 3.8475863343841255e-05, "loss": 11.9586, "step": 26430 }, { "epoch": 1.4392735836855826, "grad_norm": 0.5526135047547414, "learning_rate": 3.8468911808017184e-05, "loss": 11.8154, "step": 26431 }, { "epoch": 1.4393280376821656, "grad_norm": 0.5011062321628569, "learning_rate": 3.846196075066917e-05, "loss": 11.9022, "step": 26432 }, { "epoch": 1.4393824916787485, "grad_norm": 0.5148144995516717, "learning_rate": 3.845501017185123e-05, "loss": 11.795, "step": 26433 }, { "epoch": 1.4394369456753315, "grad_norm": 0.5343934689759077, "learning_rate": 3.8448060071617496e-05, "loss": 11.7295, "step": 26434 }, { "epoch": 1.4394913996719148, "grad_norm": 0.5310400119548662, "learning_rate": 3.844111045002193e-05, "loss": 11.7665, "step": 26435 }, { "epoch": 1.4395458536684977, "grad_norm": 0.5262723138066365, "learning_rate": 3.8434161307118655e-05, "loss": 11.796, "step": 26436 }, { "epoch": 1.4396003076650807, "grad_norm": 0.5262234443805276, "learning_rate": 3.842721264296162e-05, "loss": 11.7942, "step": 26437 }, { "epoch": 1.4396547616616637, "grad_norm": 0.5552265090171387, "learning_rate": 3.8420264457604946e-05, "loss": 11.858, "step": 26438 }, { "epoch": 1.4397092156582467, "grad_norm": 0.5284576041256092, "learning_rate": 3.841331675110259e-05, "loss": 11.9226, "step": 26439 }, { "epoch": 1.4397636696548297, "grad_norm": 0.528154978750724, "learning_rate": 3.84063695235086e-05, "loss": 11.8791, "step": 26440 }, { "epoch": 1.4398181236514127, "grad_norm": 0.5375114336456656, "learning_rate": 3.839942277487706e-05, "loss": 11.9747, "step": 26441 }, { "epoch": 1.4398725776479957, "grad_norm": 0.6520639507404585, "learning_rate": 3.839247650526192e-05, "loss": 11.9064, "step": 26442 }, { "epoch": 1.4399270316445787, "grad_norm": 0.4965413455352172, "learning_rate": 3.8385530714717235e-05, "loss": 11.8684, "step": 26443 }, { "epoch": 1.439981485641162, "grad_norm": 0.5642819911632675, "learning_rate": 3.837858540329694e-05, "loss": 11.9912, "step": 26444 }, { "epoch": 1.440035939637745, "grad_norm": 0.5705476875390401, "learning_rate": 3.837164057105511e-05, "loss": 12.0032, "step": 26445 }, { "epoch": 1.440090393634328, "grad_norm": 0.516073449236866, "learning_rate": 3.836469621804578e-05, "loss": 11.6778, "step": 26446 }, { "epoch": 1.4401448476309109, "grad_norm": 0.5273706663052106, "learning_rate": 3.835775234432286e-05, "loss": 11.8991, "step": 26447 }, { "epoch": 1.4401993016274939, "grad_norm": 0.516940943447397, "learning_rate": 3.8350808949940444e-05, "loss": 11.9498, "step": 26448 }, { "epoch": 1.4402537556240769, "grad_norm": 0.6129870866541962, "learning_rate": 3.8343866034952426e-05, "loss": 11.9421, "step": 26449 }, { "epoch": 1.4403082096206599, "grad_norm": 0.5000039200400039, "learning_rate": 3.8336923599412886e-05, "loss": 11.9062, "step": 26450 }, { "epoch": 1.4403626636172429, "grad_norm": 0.5895570357586272, "learning_rate": 3.832998164337574e-05, "loss": 11.8764, "step": 26451 }, { "epoch": 1.4404171176138258, "grad_norm": 0.5096744817429743, "learning_rate": 3.8323040166894996e-05, "loss": 11.9575, "step": 26452 }, { "epoch": 1.4404715716104088, "grad_norm": 0.5210919444623549, "learning_rate": 3.83160991700247e-05, "loss": 11.8493, "step": 26453 }, { "epoch": 1.4405260256069918, "grad_norm": 0.566028201220997, "learning_rate": 3.83091586528187e-05, "loss": 11.9164, "step": 26454 }, { "epoch": 1.4405804796035748, "grad_norm": 0.5592746555967717, "learning_rate": 3.830221861533102e-05, "loss": 11.7384, "step": 26455 }, { "epoch": 1.4406349336001578, "grad_norm": 0.49807439105786255, "learning_rate": 3.829527905761567e-05, "loss": 11.8128, "step": 26456 }, { "epoch": 1.4406893875967408, "grad_norm": 0.4993852580956869, "learning_rate": 3.828833997972655e-05, "loss": 11.9153, "step": 26457 }, { "epoch": 1.4407438415933238, "grad_norm": 0.5091481273852589, "learning_rate": 3.828140138171767e-05, "loss": 11.7583, "step": 26458 }, { "epoch": 1.440798295589907, "grad_norm": 0.5316267220699452, "learning_rate": 3.827446326364295e-05, "loss": 11.8889, "step": 26459 }, { "epoch": 1.44085274958649, "grad_norm": 0.5049445355096555, "learning_rate": 3.8267525625556363e-05, "loss": 11.7802, "step": 26460 }, { "epoch": 1.440907203583073, "grad_norm": 0.4884318562691344, "learning_rate": 3.8260588467511824e-05, "loss": 11.8209, "step": 26461 }, { "epoch": 1.440961657579656, "grad_norm": 0.5591009650182845, "learning_rate": 3.8253651789563316e-05, "loss": 11.7407, "step": 26462 }, { "epoch": 1.441016111576239, "grad_norm": 0.5713114332985573, "learning_rate": 3.8246715591764825e-05, "loss": 11.8233, "step": 26463 }, { "epoch": 1.441070565572822, "grad_norm": 0.5529993521007889, "learning_rate": 3.823977987417016e-05, "loss": 11.793, "step": 26464 }, { "epoch": 1.441125019569405, "grad_norm": 0.5700467073177319, "learning_rate": 3.8232844636833364e-05, "loss": 11.8365, "step": 26465 }, { "epoch": 1.441179473565988, "grad_norm": 0.557722646863616, "learning_rate": 3.8225909879808285e-05, "loss": 11.7904, "step": 26466 }, { "epoch": 1.4412339275625712, "grad_norm": 0.5416506466773395, "learning_rate": 3.8218975603148885e-05, "loss": 11.9212, "step": 26467 }, { "epoch": 1.4412883815591542, "grad_norm": 0.4982781364953499, "learning_rate": 3.821204180690914e-05, "loss": 11.7878, "step": 26468 }, { "epoch": 1.4413428355557372, "grad_norm": 0.5143642027104275, "learning_rate": 3.820510849114288e-05, "loss": 11.8249, "step": 26469 }, { "epoch": 1.4413972895523202, "grad_norm": 0.5456973299290206, "learning_rate": 3.819817565590409e-05, "loss": 11.7678, "step": 26470 }, { "epoch": 1.4414517435489032, "grad_norm": 0.4926600743195348, "learning_rate": 3.8191243301246616e-05, "loss": 11.8152, "step": 26471 }, { "epoch": 1.4415061975454861, "grad_norm": 0.5032933729527044, "learning_rate": 3.818431142722444e-05, "loss": 11.8554, "step": 26472 }, { "epoch": 1.4415606515420691, "grad_norm": 0.5550752795060577, "learning_rate": 3.817738003389142e-05, "loss": 11.8841, "step": 26473 }, { "epoch": 1.4416151055386521, "grad_norm": 0.5282346410220012, "learning_rate": 3.817044912130143e-05, "loss": 11.8373, "step": 26474 }, { "epoch": 1.4416695595352351, "grad_norm": 0.6017067112650445, "learning_rate": 3.8163518689508425e-05, "loss": 11.9226, "step": 26475 }, { "epoch": 1.4417240135318181, "grad_norm": 0.4762906312380366, "learning_rate": 3.8156588738566245e-05, "loss": 11.8273, "step": 26476 }, { "epoch": 1.441778467528401, "grad_norm": 0.5889751542683911, "learning_rate": 3.8149659268528824e-05, "loss": 11.9261, "step": 26477 }, { "epoch": 1.441832921524984, "grad_norm": 0.5465175591594423, "learning_rate": 3.814273027945e-05, "loss": 11.9285, "step": 26478 }, { "epoch": 1.441887375521567, "grad_norm": 0.5498044555217565, "learning_rate": 3.8135801771383674e-05, "loss": 11.845, "step": 26479 }, { "epoch": 1.44194182951815, "grad_norm": 0.5569685373284927, "learning_rate": 3.812887374438376e-05, "loss": 11.8335, "step": 26480 }, { "epoch": 1.441996283514733, "grad_norm": 0.5465969630641462, "learning_rate": 3.8121946198504066e-05, "loss": 11.9757, "step": 26481 }, { "epoch": 1.4420507375113163, "grad_norm": 0.49155857069869185, "learning_rate": 3.811501913379853e-05, "loss": 11.8822, "step": 26482 }, { "epoch": 1.4421051915078993, "grad_norm": 0.5603115641034294, "learning_rate": 3.8108092550320985e-05, "loss": 11.9908, "step": 26483 }, { "epoch": 1.4421596455044823, "grad_norm": 0.5073353673802056, "learning_rate": 3.810116644812526e-05, "loss": 11.9003, "step": 26484 }, { "epoch": 1.4422140995010653, "grad_norm": 0.5392727103105533, "learning_rate": 3.809424082726528e-05, "loss": 11.9213, "step": 26485 }, { "epoch": 1.4422685534976483, "grad_norm": 0.5492288241804698, "learning_rate": 3.8087315687794824e-05, "loss": 11.8974, "step": 26486 }, { "epoch": 1.4423230074942313, "grad_norm": 0.5193820078847496, "learning_rate": 3.8080391029767825e-05, "loss": 11.9137, "step": 26487 }, { "epoch": 1.4423774614908142, "grad_norm": 0.5612989517853024, "learning_rate": 3.807346685323805e-05, "loss": 11.8743, "step": 26488 }, { "epoch": 1.4424319154873972, "grad_norm": 0.5516156638909567, "learning_rate": 3.806654315825938e-05, "loss": 11.9698, "step": 26489 }, { "epoch": 1.4424863694839802, "grad_norm": 0.533090466164547, "learning_rate": 3.805961994488569e-05, "loss": 11.8209, "step": 26490 }, { "epoch": 1.4425408234805634, "grad_norm": 0.5513457231363742, "learning_rate": 3.8052697213170763e-05, "loss": 11.8851, "step": 26491 }, { "epoch": 1.4425952774771464, "grad_norm": 0.5166375002092177, "learning_rate": 3.8045774963168465e-05, "loss": 11.9074, "step": 26492 }, { "epoch": 1.4426497314737294, "grad_norm": 0.5589315034614036, "learning_rate": 3.8038853194932636e-05, "loss": 11.8397, "step": 26493 }, { "epoch": 1.4427041854703124, "grad_norm": 0.6089769791437563, "learning_rate": 3.803193190851702e-05, "loss": 12.0076, "step": 26494 }, { "epoch": 1.4427586394668954, "grad_norm": 0.5698836597910661, "learning_rate": 3.802501110397553e-05, "loss": 11.8266, "step": 26495 }, { "epoch": 1.4428130934634784, "grad_norm": 0.5435638151684871, "learning_rate": 3.8018090781361914e-05, "loss": 11.887, "step": 26496 }, { "epoch": 1.4428675474600614, "grad_norm": 0.5469715169926146, "learning_rate": 3.8011170940730056e-05, "loss": 11.8753, "step": 26497 }, { "epoch": 1.4429220014566444, "grad_norm": 0.518215807742917, "learning_rate": 3.80042515821337e-05, "loss": 11.8149, "step": 26498 }, { "epoch": 1.4429764554532274, "grad_norm": 0.5426434446045338, "learning_rate": 3.799733270562671e-05, "loss": 11.9739, "step": 26499 }, { "epoch": 1.4430309094498104, "grad_norm": 0.5954430547312034, "learning_rate": 3.7990414311262815e-05, "loss": 11.8652, "step": 26500 }, { "epoch": 1.4430853634463934, "grad_norm": 0.5290342827964081, "learning_rate": 3.7983496399095865e-05, "loss": 11.8137, "step": 26501 }, { "epoch": 1.4431398174429764, "grad_norm": 0.5430989247657333, "learning_rate": 3.797657896917968e-05, "loss": 11.9709, "step": 26502 }, { "epoch": 1.4431942714395594, "grad_norm": 0.547745476687581, "learning_rate": 3.796966202156802e-05, "loss": 11.9644, "step": 26503 }, { "epoch": 1.4432487254361424, "grad_norm": 0.5779703008891022, "learning_rate": 3.7962745556314636e-05, "loss": 11.9251, "step": 26504 }, { "epoch": 1.4433031794327256, "grad_norm": 0.5508320217686239, "learning_rate": 3.795582957347338e-05, "loss": 11.8728, "step": 26505 }, { "epoch": 1.4433576334293086, "grad_norm": 0.5561825859949718, "learning_rate": 3.7948914073097964e-05, "loss": 11.9296, "step": 26506 }, { "epoch": 1.4434120874258916, "grad_norm": 0.5901528433694065, "learning_rate": 3.7941999055242236e-05, "loss": 11.757, "step": 26507 }, { "epoch": 1.4434665414224745, "grad_norm": 0.5434915409575033, "learning_rate": 3.793508451995989e-05, "loss": 11.8442, "step": 26508 }, { "epoch": 1.4435209954190575, "grad_norm": 0.5755886356664981, "learning_rate": 3.792817046730477e-05, "loss": 11.9183, "step": 26509 }, { "epoch": 1.4435754494156405, "grad_norm": 0.5414810281444997, "learning_rate": 3.792125689733057e-05, "loss": 11.7676, "step": 26510 }, { "epoch": 1.4436299034122235, "grad_norm": 0.5362016108857978, "learning_rate": 3.791434381009109e-05, "loss": 11.839, "step": 26511 }, { "epoch": 1.4436843574088065, "grad_norm": 0.5387616562075546, "learning_rate": 3.790743120564012e-05, "loss": 11.8144, "step": 26512 }, { "epoch": 1.4437388114053895, "grad_norm": 0.5298039663680805, "learning_rate": 3.790051908403138e-05, "loss": 11.8176, "step": 26513 }, { "epoch": 1.4437932654019727, "grad_norm": 0.6147624955791243, "learning_rate": 3.789360744531861e-05, "loss": 11.812, "step": 26514 }, { "epoch": 1.4438477193985557, "grad_norm": 0.5303930286967303, "learning_rate": 3.788669628955554e-05, "loss": 11.7855, "step": 26515 }, { "epoch": 1.4439021733951387, "grad_norm": 0.5383850553182316, "learning_rate": 3.787978561679593e-05, "loss": 11.8885, "step": 26516 }, { "epoch": 1.4439566273917217, "grad_norm": 0.5290483551962385, "learning_rate": 3.7872875427093554e-05, "loss": 11.9994, "step": 26517 }, { "epoch": 1.4440110813883047, "grad_norm": 0.5625168397314703, "learning_rate": 3.786596572050209e-05, "loss": 11.8345, "step": 26518 }, { "epoch": 1.4440655353848877, "grad_norm": 0.5108397956744112, "learning_rate": 3.7859056497075326e-05, "loss": 11.8848, "step": 26519 }, { "epoch": 1.4441199893814707, "grad_norm": 0.5683020133212698, "learning_rate": 3.785214775686693e-05, "loss": 11.7156, "step": 26520 }, { "epoch": 1.4441744433780537, "grad_norm": 0.548609603558456, "learning_rate": 3.78452394999307e-05, "loss": 11.7671, "step": 26521 }, { "epoch": 1.4442288973746367, "grad_norm": 0.5811885727393167, "learning_rate": 3.7838331726320254e-05, "loss": 11.8624, "step": 26522 }, { "epoch": 1.4442833513712197, "grad_norm": 0.5426540756261988, "learning_rate": 3.7831424436089415e-05, "loss": 11.8728, "step": 26523 }, { "epoch": 1.4443378053678027, "grad_norm": 0.5602572778176816, "learning_rate": 3.7824517629291835e-05, "loss": 11.8197, "step": 26524 }, { "epoch": 1.4443922593643856, "grad_norm": 0.5192908691092869, "learning_rate": 3.7817611305981205e-05, "loss": 11.8315, "step": 26525 }, { "epoch": 1.4444467133609686, "grad_norm": 0.534263257596746, "learning_rate": 3.781070546621129e-05, "loss": 11.889, "step": 26526 }, { "epoch": 1.4445011673575516, "grad_norm": 0.5953627627965478, "learning_rate": 3.7803800110035725e-05, "loss": 11.9301, "step": 26527 }, { "epoch": 1.4445556213541346, "grad_norm": 0.6921158756832816, "learning_rate": 3.7796895237508245e-05, "loss": 12.0371, "step": 26528 }, { "epoch": 1.4446100753507178, "grad_norm": 0.5317652565629187, "learning_rate": 3.778999084868257e-05, "loss": 11.9459, "step": 26529 }, { "epoch": 1.4446645293473008, "grad_norm": 0.5907446369727426, "learning_rate": 3.7783086943612324e-05, "loss": 11.8588, "step": 26530 }, { "epoch": 1.4447189833438838, "grad_norm": 0.5225637083776867, "learning_rate": 3.777618352235125e-05, "loss": 11.8373, "step": 26531 }, { "epoch": 1.4447734373404668, "grad_norm": 0.5645536273713637, "learning_rate": 3.7769280584952994e-05, "loss": 11.9279, "step": 26532 }, { "epoch": 1.4448278913370498, "grad_norm": 0.519573391850562, "learning_rate": 3.7762378131471266e-05, "loss": 11.6787, "step": 26533 }, { "epoch": 1.4448823453336328, "grad_norm": 0.5483649445711375, "learning_rate": 3.7755476161959736e-05, "loss": 11.9166, "step": 26534 }, { "epoch": 1.4449367993302158, "grad_norm": 0.5154928719869454, "learning_rate": 3.7748574676472016e-05, "loss": 11.8932, "step": 26535 }, { "epoch": 1.4449912533267988, "grad_norm": 0.581986857093285, "learning_rate": 3.7741673675061865e-05, "loss": 11.9404, "step": 26536 }, { "epoch": 1.445045707323382, "grad_norm": 0.5947792692617778, "learning_rate": 3.7734773157782854e-05, "loss": 11.8795, "step": 26537 }, { "epoch": 1.445100161319965, "grad_norm": 0.5449383458016724, "learning_rate": 3.772787312468869e-05, "loss": 12.0347, "step": 26538 }, { "epoch": 1.445154615316548, "grad_norm": 0.5352143213393263, "learning_rate": 3.7720973575833075e-05, "loss": 11.8536, "step": 26539 }, { "epoch": 1.445209069313131, "grad_norm": 0.545495224596101, "learning_rate": 3.771407451126957e-05, "loss": 11.8957, "step": 26540 }, { "epoch": 1.445263523309714, "grad_norm": 0.5775567875249777, "learning_rate": 3.7707175931051896e-05, "loss": 11.9274, "step": 26541 }, { "epoch": 1.445317977306297, "grad_norm": 0.574280040621081, "learning_rate": 3.770027783523364e-05, "loss": 11.9951, "step": 26542 }, { "epoch": 1.44537243130288, "grad_norm": 0.5640495140559315, "learning_rate": 3.769338022386851e-05, "loss": 11.8683, "step": 26543 }, { "epoch": 1.445426885299463, "grad_norm": 0.5200569374135725, "learning_rate": 3.768648309701009e-05, "loss": 11.8488, "step": 26544 }, { "epoch": 1.445481339296046, "grad_norm": 0.5270282307754426, "learning_rate": 3.767958645471201e-05, "loss": 11.9137, "step": 26545 }, { "epoch": 1.445535793292629, "grad_norm": 0.5133276504647482, "learning_rate": 3.767269029702795e-05, "loss": 11.8536, "step": 26546 }, { "epoch": 1.445590247289212, "grad_norm": 0.5383081779076734, "learning_rate": 3.766579462401146e-05, "loss": 11.96, "step": 26547 }, { "epoch": 1.445644701285795, "grad_norm": 0.5721004487225891, "learning_rate": 3.7658899435716245e-05, "loss": 11.8655, "step": 26548 }, { "epoch": 1.445699155282378, "grad_norm": 0.46913941220405814, "learning_rate": 3.7652004732195834e-05, "loss": 11.6833, "step": 26549 }, { "epoch": 1.445753609278961, "grad_norm": 0.5752380497709699, "learning_rate": 3.76451105135039e-05, "loss": 11.8896, "step": 26550 }, { "epoch": 1.445808063275544, "grad_norm": 0.5650594853578803, "learning_rate": 3.763821677969408e-05, "loss": 11.835, "step": 26551 }, { "epoch": 1.4458625172721271, "grad_norm": 0.5662700850432301, "learning_rate": 3.7631323530819905e-05, "loss": 11.8166, "step": 26552 }, { "epoch": 1.44591697126871, "grad_norm": 0.5807502724892819, "learning_rate": 3.762443076693506e-05, "loss": 11.746, "step": 26553 }, { "epoch": 1.445971425265293, "grad_norm": 0.5565076886180103, "learning_rate": 3.7617538488093094e-05, "loss": 11.881, "step": 26554 }, { "epoch": 1.446025879261876, "grad_norm": 0.5800596085095544, "learning_rate": 3.761064669434758e-05, "loss": 11.9344, "step": 26555 }, { "epoch": 1.446080333258459, "grad_norm": 0.5521810673268931, "learning_rate": 3.7603755385752185e-05, "loss": 11.94, "step": 26556 }, { "epoch": 1.446134787255042, "grad_norm": 0.523797652176005, "learning_rate": 3.75968645623604e-05, "loss": 11.9031, "step": 26557 }, { "epoch": 1.446189241251625, "grad_norm": 0.5597867020012961, "learning_rate": 3.75899742242259e-05, "loss": 11.8146, "step": 26558 }, { "epoch": 1.446243695248208, "grad_norm": 0.5392480061645936, "learning_rate": 3.758308437140219e-05, "loss": 11.7328, "step": 26559 }, { "epoch": 1.446298149244791, "grad_norm": 0.6091671909538555, "learning_rate": 3.757619500394289e-05, "loss": 12.0145, "step": 26560 }, { "epoch": 1.4463526032413743, "grad_norm": 0.5692700371527594, "learning_rate": 3.756930612190159e-05, "loss": 11.8579, "step": 26561 }, { "epoch": 1.4464070572379573, "grad_norm": 0.6063908307754797, "learning_rate": 3.7562417725331814e-05, "loss": 11.8455, "step": 26562 }, { "epoch": 1.4464615112345403, "grad_norm": 0.594514892602684, "learning_rate": 3.755552981428722e-05, "loss": 11.8091, "step": 26563 }, { "epoch": 1.4465159652311232, "grad_norm": 0.5557790256658615, "learning_rate": 3.754864238882121e-05, "loss": 11.8945, "step": 26564 }, { "epoch": 1.4465704192277062, "grad_norm": 0.521261547004267, "learning_rate": 3.754175544898745e-05, "loss": 11.8221, "step": 26565 }, { "epoch": 1.4466248732242892, "grad_norm": 0.5929063166523696, "learning_rate": 3.753486899483949e-05, "loss": 11.9031, "step": 26566 }, { "epoch": 1.4466793272208722, "grad_norm": 0.4929755068670029, "learning_rate": 3.7527983026430834e-05, "loss": 12.0085, "step": 26567 }, { "epoch": 1.4467337812174552, "grad_norm": 0.5439263161014277, "learning_rate": 3.752109754381511e-05, "loss": 11.9281, "step": 26568 }, { "epoch": 1.4467882352140382, "grad_norm": 0.5373762935560514, "learning_rate": 3.751421254704576e-05, "loss": 11.9186, "step": 26569 }, { "epoch": 1.4468426892106212, "grad_norm": 0.5807048933876086, "learning_rate": 3.75073280361764e-05, "loss": 11.9166, "step": 26570 }, { "epoch": 1.4468971432072042, "grad_norm": 0.5481809421062326, "learning_rate": 3.7500444011260515e-05, "loss": 11.8524, "step": 26571 }, { "epoch": 1.4469515972037872, "grad_norm": 0.587755210628643, "learning_rate": 3.749356047235165e-05, "loss": 12.0914, "step": 26572 }, { "epoch": 1.4470060512003702, "grad_norm": 0.5665313473285608, "learning_rate": 3.7486677419503427e-05, "loss": 11.8461, "step": 26573 }, { "epoch": 1.4470605051969532, "grad_norm": 0.5106968799287922, "learning_rate": 3.74797948527692e-05, "loss": 11.8505, "step": 26574 }, { "epoch": 1.4471149591935364, "grad_norm": 0.5304620584865829, "learning_rate": 3.7472912772202605e-05, "loss": 11.8942, "step": 26575 }, { "epoch": 1.4471694131901194, "grad_norm": 0.5264843733801304, "learning_rate": 3.74660311778571e-05, "loss": 11.8378, "step": 26576 }, { "epoch": 1.4472238671867024, "grad_norm": 0.529979963182147, "learning_rate": 3.7459150069786216e-05, "loss": 11.838, "step": 26577 }, { "epoch": 1.4472783211832854, "grad_norm": 0.5467720129741784, "learning_rate": 3.745226944804352e-05, "loss": 11.8257, "step": 26578 }, { "epoch": 1.4473327751798684, "grad_norm": 0.533493764804981, "learning_rate": 3.744538931268241e-05, "loss": 11.8858, "step": 26579 }, { "epoch": 1.4473872291764514, "grad_norm": 0.5378775567100834, "learning_rate": 3.7438509663756494e-05, "loss": 11.8686, "step": 26580 }, { "epoch": 1.4474416831730343, "grad_norm": 0.5566027107870536, "learning_rate": 3.743163050131917e-05, "loss": 11.8811, "step": 26581 }, { "epoch": 1.4474961371696173, "grad_norm": 0.5515944992876728, "learning_rate": 3.742475182542403e-05, "loss": 11.8411, "step": 26582 }, { "epoch": 1.4475505911662003, "grad_norm": 0.552049968058611, "learning_rate": 3.74178736361245e-05, "loss": 11.8082, "step": 26583 }, { "epoch": 1.4476050451627835, "grad_norm": 0.537822660831308, "learning_rate": 3.741099593347406e-05, "loss": 11.9451, "step": 26584 }, { "epoch": 1.4476594991593665, "grad_norm": 0.5853675462032129, "learning_rate": 3.740411871752622e-05, "loss": 11.9511, "step": 26585 }, { "epoch": 1.4477139531559495, "grad_norm": 0.5226096967266199, "learning_rate": 3.739724198833444e-05, "loss": 11.8439, "step": 26586 }, { "epoch": 1.4477684071525325, "grad_norm": 0.5312786117885538, "learning_rate": 3.739036574595221e-05, "loss": 11.9, "step": 26587 }, { "epoch": 1.4478228611491155, "grad_norm": 0.6276620068702384, "learning_rate": 3.7383489990433005e-05, "loss": 11.8458, "step": 26588 }, { "epoch": 1.4478773151456985, "grad_norm": 0.680144299531745, "learning_rate": 3.737661472183026e-05, "loss": 12.0234, "step": 26589 }, { "epoch": 1.4479317691422815, "grad_norm": 0.5355976618891843, "learning_rate": 3.73697399401975e-05, "loss": 11.8629, "step": 26590 }, { "epoch": 1.4479862231388645, "grad_norm": 0.5919818661633061, "learning_rate": 3.736286564558811e-05, "loss": 11.9697, "step": 26591 }, { "epoch": 1.4480406771354475, "grad_norm": 0.5310696870018928, "learning_rate": 3.7355991838055606e-05, "loss": 11.8087, "step": 26592 }, { "epoch": 1.4480951311320305, "grad_norm": 0.5297474909597147, "learning_rate": 3.734911851765339e-05, "loss": 11.8513, "step": 26593 }, { "epoch": 1.4481495851286135, "grad_norm": 0.5349907359447504, "learning_rate": 3.7342245684434964e-05, "loss": 11.8563, "step": 26594 }, { "epoch": 1.4482040391251965, "grad_norm": 0.6776675951913991, "learning_rate": 3.733537333845375e-05, "loss": 11.9932, "step": 26595 }, { "epoch": 1.4482584931217795, "grad_norm": 0.506046570760118, "learning_rate": 3.7328501479763144e-05, "loss": 11.8887, "step": 26596 }, { "epoch": 1.4483129471183624, "grad_norm": 0.5844919760727451, "learning_rate": 3.732163010841665e-05, "loss": 11.7822, "step": 26597 }, { "epoch": 1.4483674011149457, "grad_norm": 0.5726426211448638, "learning_rate": 3.7314759224467646e-05, "loss": 11.8709, "step": 26598 }, { "epoch": 1.4484218551115287, "grad_norm": 0.6322767415967914, "learning_rate": 3.730788882796957e-05, "loss": 11.7881, "step": 26599 }, { "epoch": 1.4484763091081116, "grad_norm": 0.5457326375300916, "learning_rate": 3.730101891897592e-05, "loss": 11.8803, "step": 26600 }, { "epoch": 1.4485307631046946, "grad_norm": 0.5716956568315725, "learning_rate": 3.729414949754001e-05, "loss": 11.7994, "step": 26601 }, { "epoch": 1.4485852171012776, "grad_norm": 0.5718924603303069, "learning_rate": 3.728728056371536e-05, "loss": 11.8763, "step": 26602 }, { "epoch": 1.4486396710978606, "grad_norm": 0.5960681913198139, "learning_rate": 3.728041211755529e-05, "loss": 11.85, "step": 26603 }, { "epoch": 1.4486941250944436, "grad_norm": 0.5632622976026171, "learning_rate": 3.7273544159113284e-05, "loss": 11.9303, "step": 26604 }, { "epoch": 1.4487485790910266, "grad_norm": 0.6080133805513952, "learning_rate": 3.726667668844271e-05, "loss": 11.8867, "step": 26605 }, { "epoch": 1.4488030330876096, "grad_norm": 0.5636445259452803, "learning_rate": 3.725980970559696e-05, "loss": 11.9338, "step": 26606 }, { "epoch": 1.4488574870841928, "grad_norm": 0.5272925428562419, "learning_rate": 3.7252943210629475e-05, "loss": 11.8475, "step": 26607 }, { "epoch": 1.4489119410807758, "grad_norm": 0.5407317561188552, "learning_rate": 3.72460772035936e-05, "loss": 11.8126, "step": 26608 }, { "epoch": 1.4489663950773588, "grad_norm": 0.5934215016958524, "learning_rate": 3.723921168454275e-05, "loss": 11.9487, "step": 26609 }, { "epoch": 1.4490208490739418, "grad_norm": 0.6207775714699713, "learning_rate": 3.723234665353035e-05, "loss": 11.9526, "step": 26610 }, { "epoch": 1.4490753030705248, "grad_norm": 0.5380609919260102, "learning_rate": 3.7225482110609725e-05, "loss": 11.932, "step": 26611 }, { "epoch": 1.4491297570671078, "grad_norm": 0.5668187977897106, "learning_rate": 3.72186180558343e-05, "loss": 11.7625, "step": 26612 }, { "epoch": 1.4491842110636908, "grad_norm": 0.6256438118898571, "learning_rate": 3.721175448925739e-05, "loss": 11.9884, "step": 26613 }, { "epoch": 1.4492386650602738, "grad_norm": 0.5845515425396075, "learning_rate": 3.720489141093245e-05, "loss": 11.7293, "step": 26614 }, { "epoch": 1.4492931190568568, "grad_norm": 0.5545123865862376, "learning_rate": 3.71980288209128e-05, "loss": 11.6521, "step": 26615 }, { "epoch": 1.4493475730534398, "grad_norm": 0.5884598664294155, "learning_rate": 3.719116671925178e-05, "loss": 11.8231, "step": 26616 }, { "epoch": 1.4494020270500227, "grad_norm": 0.5254250350294184, "learning_rate": 3.7184305106002815e-05, "loss": 11.8132, "step": 26617 }, { "epoch": 1.4494564810466057, "grad_norm": 0.5477490901207946, "learning_rate": 3.717744398121919e-05, "loss": 11.8797, "step": 26618 }, { "epoch": 1.4495109350431887, "grad_norm": 0.5908517690865315, "learning_rate": 3.717058334495432e-05, "loss": 11.8457, "step": 26619 }, { "epoch": 1.4495653890397717, "grad_norm": 0.51775420041226, "learning_rate": 3.716372319726151e-05, "loss": 11.6898, "step": 26620 }, { "epoch": 1.4496198430363547, "grad_norm": 0.4816928323939602, "learning_rate": 3.715686353819413e-05, "loss": 11.718, "step": 26621 }, { "epoch": 1.449674297032938, "grad_norm": 0.5499302883225158, "learning_rate": 3.7150004367805544e-05, "loss": 11.7975, "step": 26622 }, { "epoch": 1.449728751029521, "grad_norm": 0.5083234784909617, "learning_rate": 3.714314568614904e-05, "loss": 11.9855, "step": 26623 }, { "epoch": 1.449783205026104, "grad_norm": 0.5703637258080845, "learning_rate": 3.713628749327803e-05, "loss": 11.9224, "step": 26624 }, { "epoch": 1.449837659022687, "grad_norm": 0.521465439116705, "learning_rate": 3.712942978924573e-05, "loss": 11.911, "step": 26625 }, { "epoch": 1.44989211301927, "grad_norm": 0.5457540952526343, "learning_rate": 3.712257257410553e-05, "loss": 11.8902, "step": 26626 }, { "epoch": 1.449946567015853, "grad_norm": 0.6187793111457757, "learning_rate": 3.711571584791078e-05, "loss": 12.0318, "step": 26627 }, { "epoch": 1.4500010210124359, "grad_norm": 0.5475059480060864, "learning_rate": 3.710885961071473e-05, "loss": 11.7676, "step": 26628 }, { "epoch": 1.4500554750090189, "grad_norm": 0.5617799550263257, "learning_rate": 3.710200386257078e-05, "loss": 11.8396, "step": 26629 }, { "epoch": 1.4501099290056019, "grad_norm": 0.5459667987203591, "learning_rate": 3.7095148603532145e-05, "loss": 11.9124, "step": 26630 }, { "epoch": 1.450164383002185, "grad_norm": 0.4810094622553554, "learning_rate": 3.7088293833652235e-05, "loss": 11.9806, "step": 26631 }, { "epoch": 1.450218836998768, "grad_norm": 0.5419299697593373, "learning_rate": 3.708143955298427e-05, "loss": 11.9082, "step": 26632 }, { "epoch": 1.450273290995351, "grad_norm": 0.5519326139380485, "learning_rate": 3.707458576158157e-05, "loss": 11.843, "step": 26633 }, { "epoch": 1.450327744991934, "grad_norm": 0.5701770708516417, "learning_rate": 3.706773245949752e-05, "loss": 11.872, "step": 26634 }, { "epoch": 1.450382198988517, "grad_norm": 0.5318936987815376, "learning_rate": 3.7060879646785263e-05, "loss": 11.8942, "step": 26635 }, { "epoch": 1.4504366529851, "grad_norm": 0.5432286107373698, "learning_rate": 3.7054027323498154e-05, "loss": 11.9042, "step": 26636 }, { "epoch": 1.450491106981683, "grad_norm": 0.5471231654232496, "learning_rate": 3.704717548968953e-05, "loss": 11.8201, "step": 26637 }, { "epoch": 1.450545560978266, "grad_norm": 0.5292156627920321, "learning_rate": 3.704032414541258e-05, "loss": 11.9441, "step": 26638 }, { "epoch": 1.450600014974849, "grad_norm": 0.5307262638375472, "learning_rate": 3.703347329072068e-05, "loss": 11.7009, "step": 26639 }, { "epoch": 1.450654468971432, "grad_norm": 0.5031604460807896, "learning_rate": 3.702662292566701e-05, "loss": 11.8017, "step": 26640 }, { "epoch": 1.450708922968015, "grad_norm": 0.5135695826525555, "learning_rate": 3.701977305030492e-05, "loss": 11.9195, "step": 26641 }, { "epoch": 1.450763376964598, "grad_norm": 0.523123755323666, "learning_rate": 3.701292366468759e-05, "loss": 11.8516, "step": 26642 }, { "epoch": 1.450817830961181, "grad_norm": 0.5273212093046554, "learning_rate": 3.700607476886834e-05, "loss": 11.7828, "step": 26643 }, { "epoch": 1.450872284957764, "grad_norm": 0.5765967455236608, "learning_rate": 3.699922636290047e-05, "loss": 11.7969, "step": 26644 }, { "epoch": 1.4509267389543472, "grad_norm": 0.5088291676098585, "learning_rate": 3.699237844683713e-05, "loss": 11.7438, "step": 26645 }, { "epoch": 1.4509811929509302, "grad_norm": 0.619557520415979, "learning_rate": 3.6985531020731645e-05, "loss": 11.8212, "step": 26646 }, { "epoch": 1.4510356469475132, "grad_norm": 0.563658842852241, "learning_rate": 3.697868408463721e-05, "loss": 11.8958, "step": 26647 }, { "epoch": 1.4510901009440962, "grad_norm": 0.5518264658910694, "learning_rate": 3.6971837638607086e-05, "loss": 11.829, "step": 26648 }, { "epoch": 1.4511445549406792, "grad_norm": 0.5108141829811468, "learning_rate": 3.696499168269456e-05, "loss": 11.715, "step": 26649 }, { "epoch": 1.4511990089372622, "grad_norm": 0.5799726850046848, "learning_rate": 3.6958146216952806e-05, "loss": 11.8591, "step": 26650 }, { "epoch": 1.4512534629338452, "grad_norm": 0.6435227694286262, "learning_rate": 3.6951301241435096e-05, "loss": 11.8321, "step": 26651 }, { "epoch": 1.4513079169304282, "grad_norm": 0.547431749579825, "learning_rate": 3.6944456756194625e-05, "loss": 11.8445, "step": 26652 }, { "epoch": 1.4513623709270111, "grad_norm": 0.5623785425870065, "learning_rate": 3.6937612761284654e-05, "loss": 11.9257, "step": 26653 }, { "epoch": 1.4514168249235944, "grad_norm": 0.6250343284096292, "learning_rate": 3.693076925675839e-05, "loss": 12.0168, "step": 26654 }, { "epoch": 1.4514712789201774, "grad_norm": 0.5355238706229178, "learning_rate": 3.6923926242669e-05, "loss": 11.8364, "step": 26655 }, { "epoch": 1.4515257329167603, "grad_norm": 0.5228373184359539, "learning_rate": 3.6917083719069775e-05, "loss": 11.7161, "step": 26656 }, { "epoch": 1.4515801869133433, "grad_norm": 0.5314041809686707, "learning_rate": 3.691024168601386e-05, "loss": 11.9017, "step": 26657 }, { "epoch": 1.4516346409099263, "grad_norm": 0.504874262963168, "learning_rate": 3.6903400143554476e-05, "loss": 11.8323, "step": 26658 }, { "epoch": 1.4516890949065093, "grad_norm": 0.5325775240786298, "learning_rate": 3.6896559091744866e-05, "loss": 11.9752, "step": 26659 }, { "epoch": 1.4517435489030923, "grad_norm": 0.5333450132962121, "learning_rate": 3.6889718530638165e-05, "loss": 11.8763, "step": 26660 }, { "epoch": 1.4517980028996753, "grad_norm": 0.5229551905724732, "learning_rate": 3.688287846028763e-05, "loss": 11.8127, "step": 26661 }, { "epoch": 1.4518524568962583, "grad_norm": 0.5546493919859037, "learning_rate": 3.687603888074638e-05, "loss": 11.8888, "step": 26662 }, { "epoch": 1.4519069108928413, "grad_norm": 0.6208436977953069, "learning_rate": 3.686919979206768e-05, "loss": 11.8377, "step": 26663 }, { "epoch": 1.4519613648894243, "grad_norm": 0.6029887441206228, "learning_rate": 3.6862361194304663e-05, "loss": 11.8657, "step": 26664 }, { "epoch": 1.4520158188860073, "grad_norm": 0.5307189853439686, "learning_rate": 3.685552308751048e-05, "loss": 11.8823, "step": 26665 }, { "epoch": 1.4520702728825903, "grad_norm": 0.567352885839594, "learning_rate": 3.6848685471738375e-05, "loss": 11.8497, "step": 26666 }, { "epoch": 1.4521247268791733, "grad_norm": 0.5181823060831318, "learning_rate": 3.684184834704144e-05, "loss": 11.9856, "step": 26667 }, { "epoch": 1.4521791808757565, "grad_norm": 0.49518589676387914, "learning_rate": 3.683501171347292e-05, "loss": 11.9018, "step": 26668 }, { "epoch": 1.4522336348723395, "grad_norm": 0.5724251951825899, "learning_rate": 3.682817557108592e-05, "loss": 11.9484, "step": 26669 }, { "epoch": 1.4522880888689225, "grad_norm": 0.4889244917085866, "learning_rate": 3.68213399199336e-05, "loss": 11.8869, "step": 26670 }, { "epoch": 1.4523425428655055, "grad_norm": 0.5889861321989771, "learning_rate": 3.681450476006919e-05, "loss": 11.8546, "step": 26671 }, { "epoch": 1.4523969968620885, "grad_norm": 0.5525937298946881, "learning_rate": 3.680767009154574e-05, "loss": 11.8557, "step": 26672 }, { "epoch": 1.4524514508586714, "grad_norm": 0.5281489612725787, "learning_rate": 3.680083591441649e-05, "loss": 11.8243, "step": 26673 }, { "epoch": 1.4525059048552544, "grad_norm": 0.5640452792677921, "learning_rate": 3.679400222873454e-05, "loss": 12.0197, "step": 26674 }, { "epoch": 1.4525603588518374, "grad_norm": 0.5531250123575686, "learning_rate": 3.678716903455298e-05, "loss": 11.909, "step": 26675 }, { "epoch": 1.4526148128484204, "grad_norm": 0.516898248232284, "learning_rate": 3.6780336331925035e-05, "loss": 11.9252, "step": 26676 }, { "epoch": 1.4526692668450036, "grad_norm": 0.5915812540439752, "learning_rate": 3.677350412090377e-05, "loss": 11.8243, "step": 26677 }, { "epoch": 1.4527237208415866, "grad_norm": 0.5657633122548344, "learning_rate": 3.676667240154236e-05, "loss": 11.8663, "step": 26678 }, { "epoch": 1.4527781748381696, "grad_norm": 0.5015793374178141, "learning_rate": 3.6759841173893884e-05, "loss": 11.7767, "step": 26679 }, { "epoch": 1.4528326288347526, "grad_norm": 0.5368184892830379, "learning_rate": 3.6753010438011524e-05, "loss": 11.9265, "step": 26680 }, { "epoch": 1.4528870828313356, "grad_norm": 0.578257864527884, "learning_rate": 3.674618019394832e-05, "loss": 12.0579, "step": 26681 }, { "epoch": 1.4529415368279186, "grad_norm": 0.550452838930861, "learning_rate": 3.6739350441757436e-05, "loss": 11.8322, "step": 26682 }, { "epoch": 1.4529959908245016, "grad_norm": 0.5818041465059394, "learning_rate": 3.6732521181492e-05, "loss": 11.957, "step": 26683 }, { "epoch": 1.4530504448210846, "grad_norm": 0.49841726471481923, "learning_rate": 3.672569241320509e-05, "loss": 11.9075, "step": 26684 }, { "epoch": 1.4531048988176676, "grad_norm": 0.5062898364607856, "learning_rate": 3.671886413694977e-05, "loss": 11.7822, "step": 26685 }, { "epoch": 1.4531593528142506, "grad_norm": 0.5812687075027585, "learning_rate": 3.671203635277921e-05, "loss": 11.9392, "step": 26686 }, { "epoch": 1.4532138068108336, "grad_norm": 0.5438822429058972, "learning_rate": 3.670520906074644e-05, "loss": 12.0322, "step": 26687 }, { "epoch": 1.4532682608074166, "grad_norm": 0.6587171183868992, "learning_rate": 3.6698382260904605e-05, "loss": 11.8866, "step": 26688 }, { "epoch": 1.4533227148039995, "grad_norm": 0.6229946243171511, "learning_rate": 3.669155595330673e-05, "loss": 11.8018, "step": 26689 }, { "epoch": 1.4533771688005825, "grad_norm": 0.5237312266741596, "learning_rate": 3.668473013800599e-05, "loss": 11.7315, "step": 26690 }, { "epoch": 1.4534316227971655, "grad_norm": 0.5458746431351207, "learning_rate": 3.667790481505534e-05, "loss": 11.8983, "step": 26691 }, { "epoch": 1.4534860767937487, "grad_norm": 0.5293096620609764, "learning_rate": 3.667107998450794e-05, "loss": 11.8849, "step": 26692 }, { "epoch": 1.4535405307903317, "grad_norm": 0.5801932421878874, "learning_rate": 3.666425564641687e-05, "loss": 11.8451, "step": 26693 }, { "epoch": 1.4535949847869147, "grad_norm": 0.5618362862651421, "learning_rate": 3.665743180083517e-05, "loss": 11.9924, "step": 26694 }, { "epoch": 1.4536494387834977, "grad_norm": 0.5225101256088879, "learning_rate": 3.6650608447815904e-05, "loss": 11.7398, "step": 26695 }, { "epoch": 1.4537038927800807, "grad_norm": 0.560091871139057, "learning_rate": 3.6643785587412106e-05, "loss": 11.764, "step": 26696 }, { "epoch": 1.4537583467766637, "grad_norm": 0.49389084916806575, "learning_rate": 3.6636963219676843e-05, "loss": 11.7378, "step": 26697 }, { "epoch": 1.4538128007732467, "grad_norm": 0.5167474388162381, "learning_rate": 3.6630141344663214e-05, "loss": 11.8895, "step": 26698 }, { "epoch": 1.4538672547698297, "grad_norm": 0.5112341266163132, "learning_rate": 3.662331996242421e-05, "loss": 11.7585, "step": 26699 }, { "epoch": 1.453921708766413, "grad_norm": 0.6146450847183496, "learning_rate": 3.661649907301292e-05, "loss": 11.8389, "step": 26700 }, { "epoch": 1.453976162762996, "grad_norm": 0.5976576909184441, "learning_rate": 3.660967867648234e-05, "loss": 11.934, "step": 26701 }, { "epoch": 1.454030616759579, "grad_norm": 0.5418991571391522, "learning_rate": 3.660285877288555e-05, "loss": 12.0553, "step": 26702 }, { "epoch": 1.454085070756162, "grad_norm": 0.515063518740225, "learning_rate": 3.659603936227554e-05, "loss": 11.8825, "step": 26703 }, { "epoch": 1.4541395247527449, "grad_norm": 0.6161357065413106, "learning_rate": 3.658922044470538e-05, "loss": 11.9571, "step": 26704 }, { "epoch": 1.4541939787493279, "grad_norm": 0.5561470406601793, "learning_rate": 3.658240202022809e-05, "loss": 11.8942, "step": 26705 }, { "epoch": 1.4542484327459109, "grad_norm": 0.5795350217089544, "learning_rate": 3.657558408889664e-05, "loss": 11.9671, "step": 26706 }, { "epoch": 1.4543028867424939, "grad_norm": 0.6153799742189493, "learning_rate": 3.6568766650764085e-05, "loss": 11.8301, "step": 26707 }, { "epoch": 1.4543573407390769, "grad_norm": 0.5661994072207569, "learning_rate": 3.656194970588347e-05, "loss": 11.7437, "step": 26708 }, { "epoch": 1.4544117947356598, "grad_norm": 0.5755120456972027, "learning_rate": 3.655513325430774e-05, "loss": 11.8081, "step": 26709 }, { "epoch": 1.4544662487322428, "grad_norm": 0.5819433422622243, "learning_rate": 3.6548317296089966e-05, "loss": 11.7607, "step": 26710 }, { "epoch": 1.4545207027288258, "grad_norm": 0.5758021353347903, "learning_rate": 3.6541501831283086e-05, "loss": 11.8294, "step": 26711 }, { "epoch": 1.4545751567254088, "grad_norm": 0.5337121951230486, "learning_rate": 3.653468685994016e-05, "loss": 12.0003, "step": 26712 }, { "epoch": 1.4546296107219918, "grad_norm": 0.5368404474628399, "learning_rate": 3.6527872382114116e-05, "loss": 11.5876, "step": 26713 }, { "epoch": 1.4546840647185748, "grad_norm": 0.5330001295476007, "learning_rate": 3.652105839785802e-05, "loss": 11.7357, "step": 26714 }, { "epoch": 1.454738518715158, "grad_norm": 0.5481479255822557, "learning_rate": 3.651424490722481e-05, "loss": 11.8336, "step": 26715 }, { "epoch": 1.454792972711741, "grad_norm": 0.5863006525049337, "learning_rate": 3.6507431910267454e-05, "loss": 12.0167, "step": 26716 }, { "epoch": 1.454847426708324, "grad_norm": 0.5850871674472374, "learning_rate": 3.650061940703899e-05, "loss": 11.8807, "step": 26717 }, { "epoch": 1.454901880704907, "grad_norm": 0.5020447523837402, "learning_rate": 3.649380739759232e-05, "loss": 11.9299, "step": 26718 }, { "epoch": 1.45495633470149, "grad_norm": 0.5603533755896191, "learning_rate": 3.6486995881980454e-05, "loss": 11.7836, "step": 26719 }, { "epoch": 1.455010788698073, "grad_norm": 0.49994528514717607, "learning_rate": 3.648018486025639e-05, "loss": 11.8348, "step": 26720 }, { "epoch": 1.455065242694656, "grad_norm": 0.5114811811329905, "learning_rate": 3.647337433247304e-05, "loss": 11.7676, "step": 26721 }, { "epoch": 1.455119696691239, "grad_norm": 0.4767098339040379, "learning_rate": 3.64665642986834e-05, "loss": 11.8318, "step": 26722 }, { "epoch": 1.455174150687822, "grad_norm": 0.5873454360190589, "learning_rate": 3.645975475894039e-05, "loss": 11.8242, "step": 26723 }, { "epoch": 1.4552286046844052, "grad_norm": 0.7365266586157547, "learning_rate": 3.645294571329702e-05, "loss": 11.8758, "step": 26724 }, { "epoch": 1.4552830586809882, "grad_norm": 0.6174881721673229, "learning_rate": 3.6446137161806194e-05, "loss": 12.0069, "step": 26725 }, { "epoch": 1.4553375126775712, "grad_norm": 0.5699336506367176, "learning_rate": 3.6439329104520824e-05, "loss": 11.9938, "step": 26726 }, { "epoch": 1.4553919666741542, "grad_norm": 0.5293799174951075, "learning_rate": 3.6432521541493924e-05, "loss": 11.9106, "step": 26727 }, { "epoch": 1.4554464206707372, "grad_norm": 0.5537992367636208, "learning_rate": 3.642571447277837e-05, "loss": 11.7465, "step": 26728 }, { "epoch": 1.4555008746673201, "grad_norm": 0.4849929869678965, "learning_rate": 3.6418907898427156e-05, "loss": 11.8867, "step": 26729 }, { "epoch": 1.4555553286639031, "grad_norm": 0.585787101802056, "learning_rate": 3.641210181849314e-05, "loss": 11.8157, "step": 26730 }, { "epoch": 1.4556097826604861, "grad_norm": 0.6370423223181031, "learning_rate": 3.6405296233029285e-05, "loss": 11.9242, "step": 26731 }, { "epoch": 1.4556642366570691, "grad_norm": 0.5208407888828812, "learning_rate": 3.639849114208854e-05, "loss": 11.7121, "step": 26732 }, { "epoch": 1.4557186906536521, "grad_norm": 0.5123049212917185, "learning_rate": 3.6391686545723766e-05, "loss": 11.7233, "step": 26733 }, { "epoch": 1.455773144650235, "grad_norm": 0.5498272494814008, "learning_rate": 3.638488244398795e-05, "loss": 11.8731, "step": 26734 }, { "epoch": 1.455827598646818, "grad_norm": 0.5633624620278386, "learning_rate": 3.637807883693394e-05, "loss": 11.8209, "step": 26735 }, { "epoch": 1.455882052643401, "grad_norm": 0.5101358751773256, "learning_rate": 3.6371275724614616e-05, "loss": 11.6467, "step": 26736 }, { "epoch": 1.455936506639984, "grad_norm": 0.6661543826322869, "learning_rate": 3.6364473107082976e-05, "loss": 11.8434, "step": 26737 }, { "epoch": 1.4559909606365673, "grad_norm": 0.5377742590857267, "learning_rate": 3.6357670984391825e-05, "loss": 11.9791, "step": 26738 }, { "epoch": 1.4560454146331503, "grad_norm": 0.6069873937588747, "learning_rate": 3.635086935659412e-05, "loss": 11.985, "step": 26739 }, { "epoch": 1.4560998686297333, "grad_norm": 0.5805923894623883, "learning_rate": 3.634406822374271e-05, "loss": 11.7486, "step": 26740 }, { "epoch": 1.4561543226263163, "grad_norm": 0.5034230426158893, "learning_rate": 3.6337267585890486e-05, "loss": 11.9536, "step": 26741 }, { "epoch": 1.4562087766228993, "grad_norm": 0.5110394749315411, "learning_rate": 3.63304674430904e-05, "loss": 11.7933, "step": 26742 }, { "epoch": 1.4562632306194823, "grad_norm": 0.5787534043271866, "learning_rate": 3.632366779539522e-05, "loss": 12.0233, "step": 26743 }, { "epoch": 1.4563176846160653, "grad_norm": 0.542646975531092, "learning_rate": 3.631686864285793e-05, "loss": 11.8384, "step": 26744 }, { "epoch": 1.4563721386126482, "grad_norm": 0.5042075821464146, "learning_rate": 3.631006998553134e-05, "loss": 11.8898, "step": 26745 }, { "epoch": 1.4564265926092312, "grad_norm": 0.5677960509359683, "learning_rate": 3.63032718234683e-05, "loss": 11.8293, "step": 26746 }, { "epoch": 1.4564810466058145, "grad_norm": 0.49353872529268183, "learning_rate": 3.6296474156721725e-05, "loss": 11.8039, "step": 26747 }, { "epoch": 1.4565355006023974, "grad_norm": 0.5477240079817448, "learning_rate": 3.628967698534441e-05, "loss": 11.7792, "step": 26748 }, { "epoch": 1.4565899545989804, "grad_norm": 0.5096398289991971, "learning_rate": 3.62828803093893e-05, "loss": 11.8037, "step": 26749 }, { "epoch": 1.4566444085955634, "grad_norm": 0.5487537163745176, "learning_rate": 3.627608412890916e-05, "loss": 12.0059, "step": 26750 }, { "epoch": 1.4566988625921464, "grad_norm": 0.5507765976045028, "learning_rate": 3.6269288443956906e-05, "loss": 11.9403, "step": 26751 }, { "epoch": 1.4567533165887294, "grad_norm": 0.5494549162198724, "learning_rate": 3.626249325458533e-05, "loss": 11.8644, "step": 26752 }, { "epoch": 1.4568077705853124, "grad_norm": 0.4974389737999597, "learning_rate": 3.625569856084728e-05, "loss": 11.7748, "step": 26753 }, { "epoch": 1.4568622245818954, "grad_norm": 0.5486646637385234, "learning_rate": 3.624890436279565e-05, "loss": 11.8773, "step": 26754 }, { "epoch": 1.4569166785784784, "grad_norm": 0.5552403145496668, "learning_rate": 3.624211066048323e-05, "loss": 11.8685, "step": 26755 }, { "epoch": 1.4569711325750614, "grad_norm": 0.5307731225341632, "learning_rate": 3.623531745396282e-05, "loss": 11.8372, "step": 26756 }, { "epoch": 1.4570255865716444, "grad_norm": 0.5385312329365134, "learning_rate": 3.6228524743287294e-05, "loss": 11.742, "step": 26757 }, { "epoch": 1.4570800405682274, "grad_norm": 0.5731116936757621, "learning_rate": 3.622173252850943e-05, "loss": 11.869, "step": 26758 }, { "epoch": 1.4571344945648104, "grad_norm": 0.5654345221512659, "learning_rate": 3.621494080968211e-05, "loss": 11.9001, "step": 26759 }, { "epoch": 1.4571889485613934, "grad_norm": 0.5589460771174387, "learning_rate": 3.620814958685807e-05, "loss": 11.9373, "step": 26760 }, { "epoch": 1.4572434025579764, "grad_norm": 0.5212287814667456, "learning_rate": 3.6201358860090184e-05, "loss": 11.8175, "step": 26761 }, { "epoch": 1.4572978565545596, "grad_norm": 0.5560803817828982, "learning_rate": 3.61945686294312e-05, "loss": 12.0346, "step": 26762 }, { "epoch": 1.4573523105511426, "grad_norm": 0.4815342156419329, "learning_rate": 3.618777889493394e-05, "loss": 11.8207, "step": 26763 }, { "epoch": 1.4574067645477256, "grad_norm": 0.5340409393213523, "learning_rate": 3.618098965665126e-05, "loss": 11.8581, "step": 26764 }, { "epoch": 1.4574612185443085, "grad_norm": 0.5808746622743467, "learning_rate": 3.6174200914635904e-05, "loss": 11.7477, "step": 26765 }, { "epoch": 1.4575156725408915, "grad_norm": 0.5066700160259542, "learning_rate": 3.616741266894067e-05, "loss": 11.899, "step": 26766 }, { "epoch": 1.4575701265374745, "grad_norm": 0.5024104816794057, "learning_rate": 3.6160624919618304e-05, "loss": 11.8207, "step": 26767 }, { "epoch": 1.4576245805340575, "grad_norm": 0.6249859002216991, "learning_rate": 3.6153837666721616e-05, "loss": 11.919, "step": 26768 }, { "epoch": 1.4576790345306405, "grad_norm": 0.5393837181351506, "learning_rate": 3.614705091030344e-05, "loss": 11.675, "step": 26769 }, { "epoch": 1.4577334885272237, "grad_norm": 0.5552178948971118, "learning_rate": 3.614026465041645e-05, "loss": 11.8879, "step": 26770 }, { "epoch": 1.4577879425238067, "grad_norm": 0.5082658302359733, "learning_rate": 3.6133478887113525e-05, "loss": 11.8018, "step": 26771 }, { "epoch": 1.4578423965203897, "grad_norm": 0.5895837781272815, "learning_rate": 3.612669362044734e-05, "loss": 11.9849, "step": 26772 }, { "epoch": 1.4578968505169727, "grad_norm": 0.5581416663970848, "learning_rate": 3.611990885047073e-05, "loss": 11.9058, "step": 26773 }, { "epoch": 1.4579513045135557, "grad_norm": 0.5629753655164212, "learning_rate": 3.6113124577236376e-05, "loss": 11.7372, "step": 26774 }, { "epoch": 1.4580057585101387, "grad_norm": 0.6344005842001699, "learning_rate": 3.610634080079713e-05, "loss": 12.0204, "step": 26775 }, { "epoch": 1.4580602125067217, "grad_norm": 0.5377170799900914, "learning_rate": 3.609955752120568e-05, "loss": 11.9078, "step": 26776 }, { "epoch": 1.4581146665033047, "grad_norm": 0.518073274344814, "learning_rate": 3.609277473851477e-05, "loss": 11.7801, "step": 26777 }, { "epoch": 1.4581691204998877, "grad_norm": 0.5436874352859177, "learning_rate": 3.6085992452777184e-05, "loss": 11.8592, "step": 26778 }, { "epoch": 1.4582235744964707, "grad_norm": 0.5310667102839995, "learning_rate": 3.6079210664045607e-05, "loss": 11.8141, "step": 26779 }, { "epoch": 1.4582780284930537, "grad_norm": 0.586173361703436, "learning_rate": 3.60724293723728e-05, "loss": 11.759, "step": 26780 }, { "epoch": 1.4583324824896367, "grad_norm": 0.5070016668348178, "learning_rate": 3.606564857781154e-05, "loss": 11.8323, "step": 26781 }, { "epoch": 1.4583869364862196, "grad_norm": 0.5335407611867968, "learning_rate": 3.605886828041449e-05, "loss": 11.7087, "step": 26782 }, { "epoch": 1.4584413904828026, "grad_norm": 0.6227259365432857, "learning_rate": 3.605208848023444e-05, "loss": 11.8679, "step": 26783 }, { "epoch": 1.4584958444793856, "grad_norm": 0.4915145450358055, "learning_rate": 3.6045309177324027e-05, "loss": 11.7542, "step": 26784 }, { "epoch": 1.4585502984759688, "grad_norm": 0.5394648993560738, "learning_rate": 3.6038530371736055e-05, "loss": 11.8166, "step": 26785 }, { "epoch": 1.4586047524725518, "grad_norm": 0.5687555144585736, "learning_rate": 3.603175206352321e-05, "loss": 11.9645, "step": 26786 }, { "epoch": 1.4586592064691348, "grad_norm": 0.5606517627161273, "learning_rate": 3.602497425273813e-05, "loss": 11.8242, "step": 26787 }, { "epoch": 1.4587136604657178, "grad_norm": 0.5292711113321935, "learning_rate": 3.6018196939433626e-05, "loss": 11.8848, "step": 26788 }, { "epoch": 1.4587681144623008, "grad_norm": 0.5183681193963549, "learning_rate": 3.601142012366232e-05, "loss": 11.9129, "step": 26789 }, { "epoch": 1.4588225684588838, "grad_norm": 0.5621041526731029, "learning_rate": 3.6004643805476943e-05, "loss": 11.8582, "step": 26790 }, { "epoch": 1.4588770224554668, "grad_norm": 0.5960845339392585, "learning_rate": 3.599786798493021e-05, "loss": 11.9627, "step": 26791 }, { "epoch": 1.4589314764520498, "grad_norm": 0.5491270168168505, "learning_rate": 3.5991092662074765e-05, "loss": 11.7948, "step": 26792 }, { "epoch": 1.4589859304486328, "grad_norm": 0.5597381496196345, "learning_rate": 3.598431783696335e-05, "loss": 11.8733, "step": 26793 }, { "epoch": 1.459040384445216, "grad_norm": 0.5240365781155311, "learning_rate": 3.597754350964858e-05, "loss": 11.8348, "step": 26794 }, { "epoch": 1.459094838441799, "grad_norm": 0.6225060645311038, "learning_rate": 3.597076968018321e-05, "loss": 11.9037, "step": 26795 }, { "epoch": 1.459149292438382, "grad_norm": 0.5528228808901301, "learning_rate": 3.596399634861987e-05, "loss": 11.7331, "step": 26796 }, { "epoch": 1.459203746434965, "grad_norm": 0.5245412508166607, "learning_rate": 3.5957223515011195e-05, "loss": 11.8954, "step": 26797 }, { "epoch": 1.459258200431548, "grad_norm": 0.5355427766129638, "learning_rate": 3.595045117940991e-05, "loss": 11.8718, "step": 26798 }, { "epoch": 1.459312654428131, "grad_norm": 0.528865987818189, "learning_rate": 3.594367934186865e-05, "loss": 11.8158, "step": 26799 }, { "epoch": 1.459367108424714, "grad_norm": 0.5227031212702511, "learning_rate": 3.5936908002440105e-05, "loss": 11.8405, "step": 26800 }, { "epoch": 1.459421562421297, "grad_norm": 0.6284403378354587, "learning_rate": 3.593013716117687e-05, "loss": 11.9101, "step": 26801 }, { "epoch": 1.45947601641788, "grad_norm": 0.6028463324801152, "learning_rate": 3.592336681813163e-05, "loss": 11.8608, "step": 26802 }, { "epoch": 1.459530470414463, "grad_norm": 0.5399263416332223, "learning_rate": 3.591659697335707e-05, "loss": 11.7132, "step": 26803 }, { "epoch": 1.459584924411046, "grad_norm": 0.5004537076128194, "learning_rate": 3.590982762690578e-05, "loss": 11.8438, "step": 26804 }, { "epoch": 1.459639378407629, "grad_norm": 0.505222743084213, "learning_rate": 3.5903058778830434e-05, "loss": 11.9408, "step": 26805 }, { "epoch": 1.459693832404212, "grad_norm": 0.6122278623686817, "learning_rate": 3.5896290429183656e-05, "loss": 11.9357, "step": 26806 }, { "epoch": 1.459748286400795, "grad_norm": 0.5553954049374712, "learning_rate": 3.5889522578018044e-05, "loss": 11.8876, "step": 26807 }, { "epoch": 1.4598027403973781, "grad_norm": 0.5401387797359927, "learning_rate": 3.588275522538629e-05, "loss": 11.9887, "step": 26808 }, { "epoch": 1.459857194393961, "grad_norm": 0.5533280896905917, "learning_rate": 3.587598837134095e-05, "loss": 11.9955, "step": 26809 }, { "epoch": 1.459911648390544, "grad_norm": 0.6550990427751122, "learning_rate": 3.5869222015934715e-05, "loss": 11.8289, "step": 26810 }, { "epoch": 1.459966102387127, "grad_norm": 0.587633376029004, "learning_rate": 3.5862456159220114e-05, "loss": 11.8928, "step": 26811 }, { "epoch": 1.46002055638371, "grad_norm": 0.557811769610465, "learning_rate": 3.585569080124983e-05, "loss": 11.8358, "step": 26812 }, { "epoch": 1.460075010380293, "grad_norm": 0.5273554918722372, "learning_rate": 3.584892594207647e-05, "loss": 11.7675, "step": 26813 }, { "epoch": 1.460129464376876, "grad_norm": 0.5055439436397466, "learning_rate": 3.5842161581752596e-05, "loss": 11.8395, "step": 26814 }, { "epoch": 1.460183918373459, "grad_norm": 0.5862420016312873, "learning_rate": 3.58353977203309e-05, "loss": 11.9083, "step": 26815 }, { "epoch": 1.460238372370042, "grad_norm": 0.5139011935152126, "learning_rate": 3.5828634357863835e-05, "loss": 11.9216, "step": 26816 }, { "epoch": 1.4602928263666253, "grad_norm": 0.5439183726164122, "learning_rate": 3.582187149440408e-05, "loss": 11.8978, "step": 26817 }, { "epoch": 1.4603472803632083, "grad_norm": 0.5520476594550109, "learning_rate": 3.581510913000424e-05, "loss": 11.9112, "step": 26818 }, { "epoch": 1.4604017343597913, "grad_norm": 0.5260466044548181, "learning_rate": 3.580834726471686e-05, "loss": 11.7303, "step": 26819 }, { "epoch": 1.4604561883563743, "grad_norm": 0.4896045580720908, "learning_rate": 3.5801585898594545e-05, "loss": 11.8574, "step": 26820 }, { "epoch": 1.4605106423529572, "grad_norm": 0.5525963234266409, "learning_rate": 3.579482503168985e-05, "loss": 11.9571, "step": 26821 }, { "epoch": 1.4605650963495402, "grad_norm": 0.5674735658381522, "learning_rate": 3.5788064664055384e-05, "loss": 11.9662, "step": 26822 }, { "epoch": 1.4606195503461232, "grad_norm": 0.5309966560276173, "learning_rate": 3.578130479574367e-05, "loss": 11.8387, "step": 26823 }, { "epoch": 1.4606740043427062, "grad_norm": 0.6262015901906925, "learning_rate": 3.5774545426807296e-05, "loss": 11.9699, "step": 26824 }, { "epoch": 1.4607284583392892, "grad_norm": 0.5931571144835259, "learning_rate": 3.57677865572989e-05, "loss": 11.8291, "step": 26825 }, { "epoch": 1.4607829123358722, "grad_norm": 0.5761147142916926, "learning_rate": 3.5761028187270896e-05, "loss": 11.8606, "step": 26826 }, { "epoch": 1.4608373663324552, "grad_norm": 0.5143222559023625, "learning_rate": 3.575427031677594e-05, "loss": 11.8135, "step": 26827 }, { "epoch": 1.4608918203290382, "grad_norm": 0.5204746049835881, "learning_rate": 3.574751294586652e-05, "loss": 11.9793, "step": 26828 }, { "epoch": 1.4609462743256212, "grad_norm": 0.5909807716816972, "learning_rate": 3.574075607459522e-05, "loss": 11.751, "step": 26829 }, { "epoch": 1.4610007283222042, "grad_norm": 0.6470226609947656, "learning_rate": 3.5733999703014596e-05, "loss": 11.9792, "step": 26830 }, { "epoch": 1.4610551823187872, "grad_norm": 0.5570834055776428, "learning_rate": 3.572724383117715e-05, "loss": 11.8176, "step": 26831 }, { "epoch": 1.4611096363153704, "grad_norm": 0.5649935117040239, "learning_rate": 3.572048845913546e-05, "loss": 11.872, "step": 26832 }, { "epoch": 1.4611640903119534, "grad_norm": 0.5690616025433386, "learning_rate": 3.5713733586942e-05, "loss": 11.8906, "step": 26833 }, { "epoch": 1.4612185443085364, "grad_norm": 0.5559128719296456, "learning_rate": 3.570697921464936e-05, "loss": 11.8206, "step": 26834 }, { "epoch": 1.4612729983051194, "grad_norm": 0.568355227035462, "learning_rate": 3.5700225342310045e-05, "loss": 11.8729, "step": 26835 }, { "epoch": 1.4613274523017024, "grad_norm": 0.5144879399681113, "learning_rate": 3.5693471969976514e-05, "loss": 11.8475, "step": 26836 }, { "epoch": 1.4613819062982853, "grad_norm": 0.5461364187896164, "learning_rate": 3.568671909770136e-05, "loss": 11.8737, "step": 26837 }, { "epoch": 1.4614363602948683, "grad_norm": 0.5846842650329348, "learning_rate": 3.567996672553704e-05, "loss": 11.7958, "step": 26838 }, { "epoch": 1.4614908142914513, "grad_norm": 0.5830549694521869, "learning_rate": 3.5673214853536074e-05, "loss": 11.9108, "step": 26839 }, { "epoch": 1.4615452682880345, "grad_norm": 0.5879849804474622, "learning_rate": 3.566646348175102e-05, "loss": 11.8296, "step": 26840 }, { "epoch": 1.4615997222846175, "grad_norm": 0.5461265208976671, "learning_rate": 3.5659712610234295e-05, "loss": 11.9402, "step": 26841 }, { "epoch": 1.4616541762812005, "grad_norm": 0.609963121610458, "learning_rate": 3.565296223903848e-05, "loss": 11.9677, "step": 26842 }, { "epoch": 1.4617086302777835, "grad_norm": 0.5175958788029443, "learning_rate": 3.564621236821597e-05, "loss": 11.8477, "step": 26843 }, { "epoch": 1.4617630842743665, "grad_norm": 0.5896009516127384, "learning_rate": 3.563946299781935e-05, "loss": 11.8115, "step": 26844 }, { "epoch": 1.4618175382709495, "grad_norm": 0.5422785192947653, "learning_rate": 3.5632714127901054e-05, "loss": 11.9648, "step": 26845 }, { "epoch": 1.4618719922675325, "grad_norm": 0.4731157840095486, "learning_rate": 3.562596575851354e-05, "loss": 11.7739, "step": 26846 }, { "epoch": 1.4619264462641155, "grad_norm": 0.5296582183190118, "learning_rate": 3.5619217889709346e-05, "loss": 11.8756, "step": 26847 }, { "epoch": 1.4619809002606985, "grad_norm": 0.5278908416265546, "learning_rate": 3.561247052154086e-05, "loss": 11.7552, "step": 26848 }, { "epoch": 1.4620353542572815, "grad_norm": 0.5306894901091312, "learning_rate": 3.5605723654060654e-05, "loss": 11.8228, "step": 26849 }, { "epoch": 1.4620898082538645, "grad_norm": 0.5127144944073668, "learning_rate": 3.55989772873211e-05, "loss": 11.9017, "step": 26850 }, { "epoch": 1.4621442622504475, "grad_norm": 0.5350098128136271, "learning_rate": 3.55922314213747e-05, "loss": 11.879, "step": 26851 }, { "epoch": 1.4621987162470305, "grad_norm": 0.5825422245832014, "learning_rate": 3.5585486056273943e-05, "loss": 11.9091, "step": 26852 }, { "epoch": 1.4622531702436135, "grad_norm": 0.5155547722424547, "learning_rate": 3.557874119207121e-05, "loss": 11.8226, "step": 26853 }, { "epoch": 1.4623076242401964, "grad_norm": 0.5701044575689473, "learning_rate": 3.557199682881902e-05, "loss": 11.8285, "step": 26854 }, { "epoch": 1.4623620782367797, "grad_norm": 0.5585870580704279, "learning_rate": 3.556525296656979e-05, "loss": 11.8385, "step": 26855 }, { "epoch": 1.4624165322333627, "grad_norm": 0.5699723718271553, "learning_rate": 3.555850960537593e-05, "loss": 11.8206, "step": 26856 }, { "epoch": 1.4624709862299456, "grad_norm": 0.5263886877754508, "learning_rate": 3.555176674528994e-05, "loss": 11.8327, "step": 26857 }, { "epoch": 1.4625254402265286, "grad_norm": 0.5653618440115944, "learning_rate": 3.554502438636419e-05, "loss": 11.8992, "step": 26858 }, { "epoch": 1.4625798942231116, "grad_norm": 0.5660923805417012, "learning_rate": 3.553828252865117e-05, "loss": 11.8532, "step": 26859 }, { "epoch": 1.4626343482196946, "grad_norm": 0.5369221715785782, "learning_rate": 3.553154117220323e-05, "loss": 11.8881, "step": 26860 }, { "epoch": 1.4626888022162776, "grad_norm": 0.5552046678845857, "learning_rate": 3.552480031707285e-05, "loss": 11.8117, "step": 26861 }, { "epoch": 1.4627432562128606, "grad_norm": 0.5309263407310856, "learning_rate": 3.551805996331247e-05, "loss": 11.8196, "step": 26862 }, { "epoch": 1.4627977102094436, "grad_norm": 0.5603158701294209, "learning_rate": 3.551132011097442e-05, "loss": 11.928, "step": 26863 }, { "epoch": 1.4628521642060268, "grad_norm": 0.5828384033150691, "learning_rate": 3.55045807601112e-05, "loss": 11.7837, "step": 26864 }, { "epoch": 1.4629066182026098, "grad_norm": 0.5465292314854292, "learning_rate": 3.549784191077519e-05, "loss": 11.912, "step": 26865 }, { "epoch": 1.4629610721991928, "grad_norm": 0.5273812907053981, "learning_rate": 3.549110356301873e-05, "loss": 11.895, "step": 26866 }, { "epoch": 1.4630155261957758, "grad_norm": 0.5416311755046903, "learning_rate": 3.548436571689431e-05, "loss": 11.9237, "step": 26867 }, { "epoch": 1.4630699801923588, "grad_norm": 0.5237906236370793, "learning_rate": 3.547762837245424e-05, "loss": 11.7981, "step": 26868 }, { "epoch": 1.4631244341889418, "grad_norm": 0.5410911256517951, "learning_rate": 3.547089152975098e-05, "loss": 11.7256, "step": 26869 }, { "epoch": 1.4631788881855248, "grad_norm": 0.497770829428523, "learning_rate": 3.546415518883687e-05, "loss": 11.8333, "step": 26870 }, { "epoch": 1.4632333421821078, "grad_norm": 0.5575192326980732, "learning_rate": 3.545741934976434e-05, "loss": 11.8322, "step": 26871 }, { "epoch": 1.4632877961786908, "grad_norm": 0.5396793254369858, "learning_rate": 3.545068401258571e-05, "loss": 11.8685, "step": 26872 }, { "epoch": 1.4633422501752738, "grad_norm": 0.5514470270430754, "learning_rate": 3.544394917735337e-05, "loss": 11.8436, "step": 26873 }, { "epoch": 1.4633967041718567, "grad_norm": 0.5739962542643732, "learning_rate": 3.543721484411976e-05, "loss": 11.8729, "step": 26874 }, { "epoch": 1.4634511581684397, "grad_norm": 0.5346187295383611, "learning_rate": 3.543048101293719e-05, "loss": 11.9297, "step": 26875 }, { "epoch": 1.4635056121650227, "grad_norm": 0.5533855270475371, "learning_rate": 3.5423747683857986e-05, "loss": 11.8589, "step": 26876 }, { "epoch": 1.4635600661616057, "grad_norm": 0.5170854857149493, "learning_rate": 3.5417014856934595e-05, "loss": 11.9063, "step": 26877 }, { "epoch": 1.463614520158189, "grad_norm": 0.5254200164626402, "learning_rate": 3.541028253221929e-05, "loss": 11.8278, "step": 26878 }, { "epoch": 1.463668974154772, "grad_norm": 0.5644971590330566, "learning_rate": 3.5403550709764486e-05, "loss": 11.9211, "step": 26879 }, { "epoch": 1.463723428151355, "grad_norm": 0.554265191206821, "learning_rate": 3.539681938962248e-05, "loss": 11.8722, "step": 26880 }, { "epoch": 1.463777882147938, "grad_norm": 0.5473133514142409, "learning_rate": 3.539008857184567e-05, "loss": 11.8108, "step": 26881 }, { "epoch": 1.463832336144521, "grad_norm": 0.5564749672755559, "learning_rate": 3.538335825648633e-05, "loss": 11.9022, "step": 26882 }, { "epoch": 1.463886790141104, "grad_norm": 0.5924881033935094, "learning_rate": 3.537662844359687e-05, "loss": 11.88, "step": 26883 }, { "epoch": 1.463941244137687, "grad_norm": 0.5988835492623468, "learning_rate": 3.5369899133229554e-05, "loss": 11.982, "step": 26884 }, { "epoch": 1.4639956981342699, "grad_norm": 0.4902957174226832, "learning_rate": 3.536317032543673e-05, "loss": 11.7601, "step": 26885 }, { "epoch": 1.4640501521308529, "grad_norm": 0.5221769566812132, "learning_rate": 3.535644202027081e-05, "loss": 11.8759, "step": 26886 }, { "epoch": 1.464104606127436, "grad_norm": 0.5963895010595072, "learning_rate": 3.534971421778397e-05, "loss": 11.8333, "step": 26887 }, { "epoch": 1.464159060124019, "grad_norm": 0.5724937814045064, "learning_rate": 3.5342986918028584e-05, "loss": 11.8156, "step": 26888 }, { "epoch": 1.464213514120602, "grad_norm": 0.5734674796494019, "learning_rate": 3.533626012105702e-05, "loss": 11.9431, "step": 26889 }, { "epoch": 1.464267968117185, "grad_norm": 0.5768639096487074, "learning_rate": 3.532953382692151e-05, "loss": 11.8823, "step": 26890 }, { "epoch": 1.464322422113768, "grad_norm": 0.5828853823118294, "learning_rate": 3.5322808035674414e-05, "loss": 11.8061, "step": 26891 }, { "epoch": 1.464376876110351, "grad_norm": 0.6427780669610677, "learning_rate": 3.5316082747367984e-05, "loss": 11.8337, "step": 26892 }, { "epoch": 1.464431330106934, "grad_norm": 0.532718269284918, "learning_rate": 3.530935796205457e-05, "loss": 11.9549, "step": 26893 }, { "epoch": 1.464485784103517, "grad_norm": 0.5845266960964168, "learning_rate": 3.53026336797864e-05, "loss": 11.9907, "step": 26894 }, { "epoch": 1.4645402381001, "grad_norm": 0.5820803008122897, "learning_rate": 3.529590990061581e-05, "loss": 11.9355, "step": 26895 }, { "epoch": 1.464594692096683, "grad_norm": 0.553904992996588, "learning_rate": 3.5289186624595125e-05, "loss": 11.8495, "step": 26896 }, { "epoch": 1.464649146093266, "grad_norm": 0.502351866275459, "learning_rate": 3.528246385177653e-05, "loss": 11.8297, "step": 26897 }, { "epoch": 1.464703600089849, "grad_norm": 0.5226287251745798, "learning_rate": 3.5275741582212376e-05, "loss": 11.8943, "step": 26898 }, { "epoch": 1.464758054086432, "grad_norm": 0.5174506947448685, "learning_rate": 3.526901981595487e-05, "loss": 11.8376, "step": 26899 }, { "epoch": 1.464812508083015, "grad_norm": 0.5745438889851867, "learning_rate": 3.526229855305633e-05, "loss": 11.875, "step": 26900 }, { "epoch": 1.4648669620795982, "grad_norm": 0.5942905440575059, "learning_rate": 3.525557779356904e-05, "loss": 11.9326, "step": 26901 }, { "epoch": 1.4649214160761812, "grad_norm": 0.5543504431445094, "learning_rate": 3.52488575375452e-05, "loss": 11.8438, "step": 26902 }, { "epoch": 1.4649758700727642, "grad_norm": 0.6329014310092056, "learning_rate": 3.524213778503714e-05, "loss": 12.005, "step": 26903 }, { "epoch": 1.4650303240693472, "grad_norm": 0.5071093456661097, "learning_rate": 3.523541853609704e-05, "loss": 11.6483, "step": 26904 }, { "epoch": 1.4650847780659302, "grad_norm": 0.5729737739930458, "learning_rate": 3.522869979077723e-05, "loss": 11.9562, "step": 26905 }, { "epoch": 1.4651392320625132, "grad_norm": 0.4999666939485268, "learning_rate": 3.52219815491299e-05, "loss": 11.8352, "step": 26906 }, { "epoch": 1.4651936860590962, "grad_norm": 0.5454996946055474, "learning_rate": 3.5215263811207276e-05, "loss": 11.8997, "step": 26907 }, { "epoch": 1.4652481400556792, "grad_norm": 0.5751158072911432, "learning_rate": 3.5208546577061666e-05, "loss": 11.7981, "step": 26908 }, { "epoch": 1.4653025940522622, "grad_norm": 0.6659713664357423, "learning_rate": 3.520182984674522e-05, "loss": 11.8795, "step": 26909 }, { "epoch": 1.4653570480488454, "grad_norm": 0.5503720926795402, "learning_rate": 3.519511362031021e-05, "loss": 11.8338, "step": 26910 }, { "epoch": 1.4654115020454284, "grad_norm": 0.5594942983638554, "learning_rate": 3.518839789780891e-05, "loss": 11.9687, "step": 26911 }, { "epoch": 1.4654659560420114, "grad_norm": 0.5921855345390399, "learning_rate": 3.518168267929345e-05, "loss": 11.8966, "step": 26912 }, { "epoch": 1.4655204100385943, "grad_norm": 0.5396709006485998, "learning_rate": 3.517496796481614e-05, "loss": 11.9207, "step": 26913 }, { "epoch": 1.4655748640351773, "grad_norm": 0.5722722756055318, "learning_rate": 3.516825375442912e-05, "loss": 11.943, "step": 26914 }, { "epoch": 1.4656293180317603, "grad_norm": 0.541991721209282, "learning_rate": 3.516154004818465e-05, "loss": 11.8641, "step": 26915 }, { "epoch": 1.4656837720283433, "grad_norm": 0.5808604813469752, "learning_rate": 3.5154826846134925e-05, "loss": 11.777, "step": 26916 }, { "epoch": 1.4657382260249263, "grad_norm": 0.5337465519442687, "learning_rate": 3.51481141483321e-05, "loss": 11.8277, "step": 26917 }, { "epoch": 1.4657926800215093, "grad_norm": 0.5614730591651275, "learning_rate": 3.514140195482846e-05, "loss": 11.8785, "step": 26918 }, { "epoch": 1.4658471340180923, "grad_norm": 0.5212476666733246, "learning_rate": 3.513469026567612e-05, "loss": 11.8603, "step": 26919 }, { "epoch": 1.4659015880146753, "grad_norm": 0.5539456935912866, "learning_rate": 3.512797908092733e-05, "loss": 11.8429, "step": 26920 }, { "epoch": 1.4659560420112583, "grad_norm": 0.5525381910997136, "learning_rate": 3.5121268400634235e-05, "loss": 11.9267, "step": 26921 }, { "epoch": 1.4660104960078413, "grad_norm": 0.5257762624339773, "learning_rate": 3.5114558224849025e-05, "loss": 11.8649, "step": 26922 }, { "epoch": 1.4660649500044243, "grad_norm": 0.5861112674124763, "learning_rate": 3.510784855362392e-05, "loss": 11.9031, "step": 26923 }, { "epoch": 1.4661194040010073, "grad_norm": 0.5735585913197729, "learning_rate": 3.510113938701105e-05, "loss": 11.8264, "step": 26924 }, { "epoch": 1.4661738579975905, "grad_norm": 0.5952942463803762, "learning_rate": 3.509443072506263e-05, "loss": 11.8942, "step": 26925 }, { "epoch": 1.4662283119941735, "grad_norm": 0.5060379458745126, "learning_rate": 3.5087722567830796e-05, "loss": 11.833, "step": 26926 }, { "epoch": 1.4662827659907565, "grad_norm": 0.5227254367275919, "learning_rate": 3.508101491536769e-05, "loss": 11.8399, "step": 26927 }, { "epoch": 1.4663372199873395, "grad_norm": 0.5728525605740037, "learning_rate": 3.507430776772553e-05, "loss": 11.8202, "step": 26928 }, { "epoch": 1.4663916739839225, "grad_norm": 0.578307121397152, "learning_rate": 3.506760112495642e-05, "loss": 11.9659, "step": 26929 }, { "epoch": 1.4664461279805054, "grad_norm": 0.5611285752605445, "learning_rate": 3.5060894987112544e-05, "loss": 11.6997, "step": 26930 }, { "epoch": 1.4665005819770884, "grad_norm": 0.6009530812971448, "learning_rate": 3.5054189354246015e-05, "loss": 11.8917, "step": 26931 }, { "epoch": 1.4665550359736714, "grad_norm": 0.49966910990061286, "learning_rate": 3.504748422640904e-05, "loss": 11.8914, "step": 26932 }, { "epoch": 1.4666094899702546, "grad_norm": 0.6037952896568908, "learning_rate": 3.5040779603653684e-05, "loss": 11.8515, "step": 26933 }, { "epoch": 1.4666639439668376, "grad_norm": 0.566280631163193, "learning_rate": 3.5034075486032115e-05, "loss": 11.7609, "step": 26934 }, { "epoch": 1.4667183979634206, "grad_norm": 0.571709024512032, "learning_rate": 3.502737187359651e-05, "loss": 11.8101, "step": 26935 }, { "epoch": 1.4667728519600036, "grad_norm": 0.5679542079821897, "learning_rate": 3.5020668766398955e-05, "loss": 11.8582, "step": 26936 }, { "epoch": 1.4668273059565866, "grad_norm": 0.5674774159137708, "learning_rate": 3.501396616449154e-05, "loss": 11.8548, "step": 26937 }, { "epoch": 1.4668817599531696, "grad_norm": 0.538239445264612, "learning_rate": 3.500726406792646e-05, "loss": 11.8107, "step": 26938 }, { "epoch": 1.4669362139497526, "grad_norm": 0.6377770860059915, "learning_rate": 3.500056247675575e-05, "loss": 11.749, "step": 26939 }, { "epoch": 1.4669906679463356, "grad_norm": 0.563975129858514, "learning_rate": 3.499386139103161e-05, "loss": 11.7855, "step": 26940 }, { "epoch": 1.4670451219429186, "grad_norm": 0.53408722706937, "learning_rate": 3.498716081080607e-05, "loss": 11.9532, "step": 26941 }, { "epoch": 1.4670995759395016, "grad_norm": 0.5466688182286152, "learning_rate": 3.49804607361313e-05, "loss": 11.8466, "step": 26942 }, { "epoch": 1.4671540299360846, "grad_norm": 0.6010772436464253, "learning_rate": 3.4973761167059346e-05, "loss": 11.9362, "step": 26943 }, { "epoch": 1.4672084839326676, "grad_norm": 0.5392786398843316, "learning_rate": 3.496706210364232e-05, "loss": 11.8418, "step": 26944 }, { "epoch": 1.4672629379292506, "grad_norm": 0.6659633762584934, "learning_rate": 3.4960363545932376e-05, "loss": 11.8143, "step": 26945 }, { "epoch": 1.4673173919258335, "grad_norm": 0.6231163485999179, "learning_rate": 3.495366549398154e-05, "loss": 11.775, "step": 26946 }, { "epoch": 1.4673718459224165, "grad_norm": 0.5094187662757288, "learning_rate": 3.49469679478419e-05, "loss": 11.818, "step": 26947 }, { "epoch": 1.4674262999189998, "grad_norm": 0.5494230557980302, "learning_rate": 3.494027090756552e-05, "loss": 11.9304, "step": 26948 }, { "epoch": 1.4674807539155827, "grad_norm": 0.5640995906812241, "learning_rate": 3.4933574373204515e-05, "loss": 11.9446, "step": 26949 }, { "epoch": 1.4675352079121657, "grad_norm": 0.5499026076881367, "learning_rate": 3.4926878344810964e-05, "loss": 11.9071, "step": 26950 }, { "epoch": 1.4675896619087487, "grad_norm": 0.5215088938960376, "learning_rate": 3.4920182822436895e-05, "loss": 11.8015, "step": 26951 }, { "epoch": 1.4676441159053317, "grad_norm": 0.5455027911489024, "learning_rate": 3.491348780613444e-05, "loss": 11.8753, "step": 26952 }, { "epoch": 1.4676985699019147, "grad_norm": 0.6132704465202785, "learning_rate": 3.490679329595558e-05, "loss": 11.9248, "step": 26953 }, { "epoch": 1.4677530238984977, "grad_norm": 0.5588797520801592, "learning_rate": 3.4900099291952435e-05, "loss": 11.9458, "step": 26954 }, { "epoch": 1.4678074778950807, "grad_norm": 0.5575655682178964, "learning_rate": 3.4893405794177e-05, "loss": 11.7532, "step": 26955 }, { "epoch": 1.4678619318916637, "grad_norm": 0.5791931871644307, "learning_rate": 3.4886712802681406e-05, "loss": 11.888, "step": 26956 }, { "epoch": 1.467916385888247, "grad_norm": 0.520258128178175, "learning_rate": 3.4880020317517645e-05, "loss": 11.8153, "step": 26957 }, { "epoch": 1.46797083988483, "grad_norm": 0.5632911075780257, "learning_rate": 3.4873328338737746e-05, "loss": 11.9586, "step": 26958 }, { "epoch": 1.468025293881413, "grad_norm": 0.5242617532428729, "learning_rate": 3.4866636866393757e-05, "loss": 11.8639, "step": 26959 }, { "epoch": 1.4680797478779959, "grad_norm": 0.5768426935427615, "learning_rate": 3.4859945900537746e-05, "loss": 11.8118, "step": 26960 }, { "epoch": 1.4681342018745789, "grad_norm": 0.5871673347753809, "learning_rate": 3.485325544122171e-05, "loss": 11.9576, "step": 26961 }, { "epoch": 1.4681886558711619, "grad_norm": 0.5450289652016302, "learning_rate": 3.484656548849771e-05, "loss": 11.6982, "step": 26962 }, { "epoch": 1.4682431098677449, "grad_norm": 0.5727478440550319, "learning_rate": 3.4839876042417695e-05, "loss": 11.8777, "step": 26963 }, { "epoch": 1.4682975638643279, "grad_norm": 0.547667895974564, "learning_rate": 3.483318710303378e-05, "loss": 11.9015, "step": 26964 }, { "epoch": 1.4683520178609109, "grad_norm": 0.5668453309871445, "learning_rate": 3.482649867039789e-05, "loss": 11.9573, "step": 26965 }, { "epoch": 1.4684064718574938, "grad_norm": 0.5466427199304342, "learning_rate": 3.481981074456211e-05, "loss": 11.9304, "step": 26966 }, { "epoch": 1.4684609258540768, "grad_norm": 0.5118188742957974, "learning_rate": 3.4813123325578415e-05, "loss": 11.7512, "step": 26967 }, { "epoch": 1.4685153798506598, "grad_norm": 0.5745895014674879, "learning_rate": 3.480643641349877e-05, "loss": 11.9672, "step": 26968 }, { "epoch": 1.4685698338472428, "grad_norm": 0.5358808814029702, "learning_rate": 3.479975000837524e-05, "loss": 11.8356, "step": 26969 }, { "epoch": 1.4686242878438258, "grad_norm": 0.5066478546280682, "learning_rate": 3.4793064110259755e-05, "loss": 11.8727, "step": 26970 }, { "epoch": 1.468678741840409, "grad_norm": 0.5347859064300481, "learning_rate": 3.478637871920435e-05, "loss": 11.884, "step": 26971 }, { "epoch": 1.468733195836992, "grad_norm": 0.5496757316401885, "learning_rate": 3.4779693835261015e-05, "loss": 11.8785, "step": 26972 }, { "epoch": 1.468787649833575, "grad_norm": 0.5679918776674071, "learning_rate": 3.47730094584817e-05, "loss": 11.7205, "step": 26973 }, { "epoch": 1.468842103830158, "grad_norm": 0.5752347630178402, "learning_rate": 3.476632558891843e-05, "loss": 11.7147, "step": 26974 }, { "epoch": 1.468896557826741, "grad_norm": 0.5453418953600246, "learning_rate": 3.475964222662311e-05, "loss": 11.9417, "step": 26975 }, { "epoch": 1.468951011823324, "grad_norm": 0.5675759849355251, "learning_rate": 3.47529593716478e-05, "loss": 11.823, "step": 26976 }, { "epoch": 1.469005465819907, "grad_norm": 0.5538553395942424, "learning_rate": 3.474627702404441e-05, "loss": 11.8549, "step": 26977 }, { "epoch": 1.46905991981649, "grad_norm": 0.4988055556439985, "learning_rate": 3.473959518386488e-05, "loss": 11.8171, "step": 26978 }, { "epoch": 1.469114373813073, "grad_norm": 0.5308248569857584, "learning_rate": 3.473291385116124e-05, "loss": 11.8448, "step": 26979 }, { "epoch": 1.4691688278096562, "grad_norm": 0.5654780362187766, "learning_rate": 3.4726233025985375e-05, "loss": 11.7154, "step": 26980 }, { "epoch": 1.4692232818062392, "grad_norm": 0.4706152556619751, "learning_rate": 3.47195527083893e-05, "loss": 11.8449, "step": 26981 }, { "epoch": 1.4692777358028222, "grad_norm": 0.5511450909412954, "learning_rate": 3.47128728984249e-05, "loss": 11.9013, "step": 26982 }, { "epoch": 1.4693321897994052, "grad_norm": 0.5091049474093662, "learning_rate": 3.4706193596144144e-05, "loss": 11.8955, "step": 26983 }, { "epoch": 1.4693866437959882, "grad_norm": 0.5733711417322171, "learning_rate": 3.469951480159901e-05, "loss": 11.8904, "step": 26984 }, { "epoch": 1.4694410977925711, "grad_norm": 0.50403405339265, "learning_rate": 3.469283651484136e-05, "loss": 11.951, "step": 26985 }, { "epoch": 1.4694955517891541, "grad_norm": 0.551467789854702, "learning_rate": 3.4686158735923216e-05, "loss": 11.9534, "step": 26986 }, { "epoch": 1.4695500057857371, "grad_norm": 0.5554771604025316, "learning_rate": 3.4679481464896446e-05, "loss": 11.8636, "step": 26987 }, { "epoch": 1.4696044597823201, "grad_norm": 0.5434516447398801, "learning_rate": 3.4672804701812945e-05, "loss": 11.9037, "step": 26988 }, { "epoch": 1.4696589137789031, "grad_norm": 0.5314844299161825, "learning_rate": 3.466612844672471e-05, "loss": 11.8527, "step": 26989 }, { "epoch": 1.4697133677754861, "grad_norm": 0.5504031324251071, "learning_rate": 3.465945269968358e-05, "loss": 12.0211, "step": 26990 }, { "epoch": 1.469767821772069, "grad_norm": 0.5704168540629599, "learning_rate": 3.465277746074154e-05, "loss": 11.8902, "step": 26991 }, { "epoch": 1.469822275768652, "grad_norm": 0.4879255321437034, "learning_rate": 3.464610272995043e-05, "loss": 11.7967, "step": 26992 }, { "epoch": 1.469876729765235, "grad_norm": 0.5395397518607035, "learning_rate": 3.4639428507362174e-05, "loss": 11.7737, "step": 26993 }, { "epoch": 1.469931183761818, "grad_norm": 0.5583342025955563, "learning_rate": 3.463275479302872e-05, "loss": 11.7656, "step": 26994 }, { "epoch": 1.4699856377584013, "grad_norm": 0.5333749188630824, "learning_rate": 3.46260815870019e-05, "loss": 11.8344, "step": 26995 }, { "epoch": 1.4700400917549843, "grad_norm": 0.5947621372013558, "learning_rate": 3.4619408889333696e-05, "loss": 11.835, "step": 26996 }, { "epoch": 1.4700945457515673, "grad_norm": 0.5218967318383265, "learning_rate": 3.461273670007587e-05, "loss": 11.835, "step": 26997 }, { "epoch": 1.4701489997481503, "grad_norm": 0.5692122610369998, "learning_rate": 3.460606501928035e-05, "loss": 11.8929, "step": 26998 }, { "epoch": 1.4702034537447333, "grad_norm": 0.4741647367532856, "learning_rate": 3.4599393846999087e-05, "loss": 11.6957, "step": 26999 }, { "epoch": 1.4702579077413163, "grad_norm": 0.5489315259413341, "learning_rate": 3.459272318328387e-05, "loss": 11.8165, "step": 27000 }, { "epoch": 1.4703123617378993, "grad_norm": 0.5828263256267341, "learning_rate": 3.4586053028186636e-05, "loss": 11.8819, "step": 27001 }, { "epoch": 1.4703668157344822, "grad_norm": 0.574516434369927, "learning_rate": 3.457938338175919e-05, "loss": 11.9278, "step": 27002 }, { "epoch": 1.4704212697310655, "grad_norm": 0.5907239366505275, "learning_rate": 3.457271424405346e-05, "loss": 11.928, "step": 27003 }, { "epoch": 1.4704757237276485, "grad_norm": 0.5465245700889383, "learning_rate": 3.4566045615121246e-05, "loss": 11.9369, "step": 27004 }, { "epoch": 1.4705301777242314, "grad_norm": 0.5332316149709876, "learning_rate": 3.455937749501445e-05, "loss": 11.96, "step": 27005 }, { "epoch": 1.4705846317208144, "grad_norm": 0.5861251101772974, "learning_rate": 3.455270988378495e-05, "loss": 11.8555, "step": 27006 }, { "epoch": 1.4706390857173974, "grad_norm": 0.5417709300912483, "learning_rate": 3.45460427814845e-05, "loss": 11.8343, "step": 27007 }, { "epoch": 1.4706935397139804, "grad_norm": 0.5464346475311613, "learning_rate": 3.4539376188165e-05, "loss": 12.0091, "step": 27008 }, { "epoch": 1.4707479937105634, "grad_norm": 0.5347709733589778, "learning_rate": 3.453271010387832e-05, "loss": 11.8109, "step": 27009 }, { "epoch": 1.4708024477071464, "grad_norm": 0.5393284119988231, "learning_rate": 3.4526044528676225e-05, "loss": 11.9394, "step": 27010 }, { "epoch": 1.4708569017037294, "grad_norm": 0.5806957972972273, "learning_rate": 3.4519379462610625e-05, "loss": 11.7509, "step": 27011 }, { "epoch": 1.4709113557003124, "grad_norm": 0.5465121406012963, "learning_rate": 3.4512714905733277e-05, "loss": 11.9348, "step": 27012 }, { "epoch": 1.4709658096968954, "grad_norm": 0.5724799408772205, "learning_rate": 3.450605085809607e-05, "loss": 11.9405, "step": 27013 }, { "epoch": 1.4710202636934784, "grad_norm": 0.5955513575638715, "learning_rate": 3.449938731975078e-05, "loss": 11.9518, "step": 27014 }, { "epoch": 1.4710747176900614, "grad_norm": 0.6018328515717035, "learning_rate": 3.4492724290749224e-05, "loss": 11.9525, "step": 27015 }, { "epoch": 1.4711291716866444, "grad_norm": 0.5015215623563867, "learning_rate": 3.4486061771143296e-05, "loss": 11.7717, "step": 27016 }, { "epoch": 1.4711836256832274, "grad_norm": 0.49988841942712686, "learning_rate": 3.4479399760984664e-05, "loss": 11.8735, "step": 27017 }, { "epoch": 1.4712380796798106, "grad_norm": 0.5245592975923902, "learning_rate": 3.447273826032525e-05, "loss": 11.8615, "step": 27018 }, { "epoch": 1.4712925336763936, "grad_norm": 0.649376184336951, "learning_rate": 3.446607726921679e-05, "loss": 11.9087, "step": 27019 }, { "epoch": 1.4713469876729766, "grad_norm": 0.5830823200606501, "learning_rate": 3.4459416787711084e-05, "loss": 11.947, "step": 27020 }, { "epoch": 1.4714014416695596, "grad_norm": 0.5540168823321698, "learning_rate": 3.445275681585999e-05, "loss": 11.7104, "step": 27021 }, { "epoch": 1.4714558956661425, "grad_norm": 0.5830936253241314, "learning_rate": 3.444609735371521e-05, "loss": 11.8026, "step": 27022 }, { "epoch": 1.4715103496627255, "grad_norm": 0.5401627797490386, "learning_rate": 3.4439438401328606e-05, "loss": 11.8363, "step": 27023 }, { "epoch": 1.4715648036593085, "grad_norm": 0.5427904249373643, "learning_rate": 3.443277995875189e-05, "loss": 11.7379, "step": 27024 }, { "epoch": 1.4716192576558915, "grad_norm": 0.5524654577776319, "learning_rate": 3.442612202603691e-05, "loss": 11.9151, "step": 27025 }, { "epoch": 1.4716737116524745, "grad_norm": 0.5471381024249221, "learning_rate": 3.4419464603235364e-05, "loss": 11.8217, "step": 27026 }, { "epoch": 1.4717281656490577, "grad_norm": 0.5546715885633946, "learning_rate": 3.4412807690399096e-05, "loss": 11.7589, "step": 27027 }, { "epoch": 1.4717826196456407, "grad_norm": 0.7117188961593346, "learning_rate": 3.440615128757984e-05, "loss": 11.8712, "step": 27028 }, { "epoch": 1.4718370736422237, "grad_norm": 0.5420285753965889, "learning_rate": 3.4399495394829305e-05, "loss": 11.9132, "step": 27029 }, { "epoch": 1.4718915276388067, "grad_norm": 0.543851358459557, "learning_rate": 3.4392840012199304e-05, "loss": 11.7777, "step": 27030 }, { "epoch": 1.4719459816353897, "grad_norm": 0.49003595741190664, "learning_rate": 3.438618513974161e-05, "loss": 11.801, "step": 27031 }, { "epoch": 1.4720004356319727, "grad_norm": 0.514922000773794, "learning_rate": 3.437953077750791e-05, "loss": 11.7983, "step": 27032 }, { "epoch": 1.4720548896285557, "grad_norm": 0.5555140945535076, "learning_rate": 3.437287692555003e-05, "loss": 11.7921, "step": 27033 }, { "epoch": 1.4721093436251387, "grad_norm": 0.5197018757079596, "learning_rate": 3.436622358391961e-05, "loss": 11.8424, "step": 27034 }, { "epoch": 1.4721637976217217, "grad_norm": 0.5746961787467441, "learning_rate": 3.435957075266849e-05, "loss": 11.8774, "step": 27035 }, { "epoch": 1.4722182516183047, "grad_norm": 0.5523608801389424, "learning_rate": 3.4352918431848313e-05, "loss": 11.9063, "step": 27036 }, { "epoch": 1.4722727056148877, "grad_norm": 0.6098172617612233, "learning_rate": 3.434626662151089e-05, "loss": 11.9233, "step": 27037 }, { "epoch": 1.4723271596114706, "grad_norm": 0.6189974377781735, "learning_rate": 3.4339615321707904e-05, "loss": 11.8561, "step": 27038 }, { "epoch": 1.4723816136080536, "grad_norm": 0.6098684049964824, "learning_rate": 3.433296453249105e-05, "loss": 12.0428, "step": 27039 }, { "epoch": 1.4724360676046366, "grad_norm": 0.5231442378536864, "learning_rate": 3.4326314253912114e-05, "loss": 11.9368, "step": 27040 }, { "epoch": 1.4724905216012198, "grad_norm": 0.5554595187181036, "learning_rate": 3.431966448602274e-05, "loss": 11.8724, "step": 27041 }, { "epoch": 1.4725449755978028, "grad_norm": 0.5093832129644811, "learning_rate": 3.431301522887467e-05, "loss": 11.8652, "step": 27042 }, { "epoch": 1.4725994295943858, "grad_norm": 0.532916989178289, "learning_rate": 3.4306366482519624e-05, "loss": 11.7608, "step": 27043 }, { "epoch": 1.4726538835909688, "grad_norm": 0.5299826967679284, "learning_rate": 3.429971824700927e-05, "loss": 11.7297, "step": 27044 }, { "epoch": 1.4727083375875518, "grad_norm": 0.5470163791515646, "learning_rate": 3.429307052239536e-05, "loss": 11.8786, "step": 27045 }, { "epoch": 1.4727627915841348, "grad_norm": 0.5429542506097446, "learning_rate": 3.4286423308729524e-05, "loss": 11.7596, "step": 27046 }, { "epoch": 1.4728172455807178, "grad_norm": 0.5193121313872467, "learning_rate": 3.4279776606063496e-05, "loss": 11.8058, "step": 27047 }, { "epoch": 1.4728716995773008, "grad_norm": 0.569015986442028, "learning_rate": 3.427313041444896e-05, "loss": 11.921, "step": 27048 }, { "epoch": 1.4729261535738838, "grad_norm": 0.5396491219347833, "learning_rate": 3.4266484733937546e-05, "loss": 11.9579, "step": 27049 }, { "epoch": 1.472980607570467, "grad_norm": 0.5072043815273053, "learning_rate": 3.425983956458101e-05, "loss": 11.8264, "step": 27050 }, { "epoch": 1.47303506156705, "grad_norm": 0.5150497274906632, "learning_rate": 3.425319490643094e-05, "loss": 11.8747, "step": 27051 }, { "epoch": 1.473089515563633, "grad_norm": 0.5666219037141724, "learning_rate": 3.42465507595391e-05, "loss": 11.9068, "step": 27052 }, { "epoch": 1.473143969560216, "grad_norm": 0.4999371458346644, "learning_rate": 3.423990712395707e-05, "loss": 11.8077, "step": 27053 }, { "epoch": 1.473198423556799, "grad_norm": 0.5375262988369534, "learning_rate": 3.4233263999736543e-05, "loss": 11.8238, "step": 27054 }, { "epoch": 1.473252877553382, "grad_norm": 0.5524602798318212, "learning_rate": 3.4226621386929224e-05, "loss": 11.9338, "step": 27055 }, { "epoch": 1.473307331549965, "grad_norm": 0.5441144221728975, "learning_rate": 3.421997928558669e-05, "loss": 11.8395, "step": 27056 }, { "epoch": 1.473361785546548, "grad_norm": 0.5611645080034972, "learning_rate": 3.421333769576067e-05, "loss": 11.9071, "step": 27057 }, { "epoch": 1.473416239543131, "grad_norm": 0.5502104309668763, "learning_rate": 3.420669661750277e-05, "loss": 11.7065, "step": 27058 }, { "epoch": 1.473470693539714, "grad_norm": 0.5715937041666886, "learning_rate": 3.4200056050864584e-05, "loss": 11.7687, "step": 27059 }, { "epoch": 1.473525147536297, "grad_norm": 0.6181960696255637, "learning_rate": 3.419341599589784e-05, "loss": 11.8218, "step": 27060 }, { "epoch": 1.47357960153288, "grad_norm": 0.5294926288327422, "learning_rate": 3.418677645265409e-05, "loss": 11.7405, "step": 27061 }, { "epoch": 1.473634055529463, "grad_norm": 0.8029805969217824, "learning_rate": 3.4180137421185035e-05, "loss": 11.9257, "step": 27062 }, { "epoch": 1.473688509526046, "grad_norm": 0.5349932841043552, "learning_rate": 3.417349890154224e-05, "loss": 11.8885, "step": 27063 }, { "epoch": 1.473742963522629, "grad_norm": 0.5586481148900625, "learning_rate": 3.416686089377735e-05, "loss": 11.8633, "step": 27064 }, { "epoch": 1.4737974175192121, "grad_norm": 0.5273718362697052, "learning_rate": 3.4160223397942037e-05, "loss": 11.8968, "step": 27065 }, { "epoch": 1.473851871515795, "grad_norm": 0.5669173366538417, "learning_rate": 3.4153586414087824e-05, "loss": 11.9404, "step": 27066 }, { "epoch": 1.473906325512378, "grad_norm": 0.6056961832563036, "learning_rate": 3.414694994226644e-05, "loss": 11.8837, "step": 27067 }, { "epoch": 1.473960779508961, "grad_norm": 0.5432162802028999, "learning_rate": 3.414031398252935e-05, "loss": 11.9145, "step": 27068 }, { "epoch": 1.474015233505544, "grad_norm": 0.5311333608968709, "learning_rate": 3.4133678534928216e-05, "loss": 11.8465, "step": 27069 }, { "epoch": 1.474069687502127, "grad_norm": 0.535893314131085, "learning_rate": 3.412704359951467e-05, "loss": 11.8586, "step": 27070 }, { "epoch": 1.47412414149871, "grad_norm": 0.524757750295078, "learning_rate": 3.412040917634026e-05, "loss": 11.8801, "step": 27071 }, { "epoch": 1.474178595495293, "grad_norm": 0.5756576209868174, "learning_rate": 3.4113775265456626e-05, "loss": 11.9087, "step": 27072 }, { "epoch": 1.4742330494918763, "grad_norm": 0.6782042329146925, "learning_rate": 3.4107141866915294e-05, "loss": 11.9043, "step": 27073 }, { "epoch": 1.4742875034884593, "grad_norm": 0.5520135168377776, "learning_rate": 3.410050898076791e-05, "loss": 11.9487, "step": 27074 }, { "epoch": 1.4743419574850423, "grad_norm": 0.4930499779384862, "learning_rate": 3.4093876607065976e-05, "loss": 11.8126, "step": 27075 }, { "epoch": 1.4743964114816253, "grad_norm": 0.5479393717310947, "learning_rate": 3.4087244745861125e-05, "loss": 11.8866, "step": 27076 }, { "epoch": 1.4744508654782083, "grad_norm": 0.5562895428702546, "learning_rate": 3.4080613397204977e-05, "loss": 11.906, "step": 27077 }, { "epoch": 1.4745053194747912, "grad_norm": 0.5444885985476847, "learning_rate": 3.407398256114896e-05, "loss": 11.6886, "step": 27078 }, { "epoch": 1.4745597734713742, "grad_norm": 0.5548738264640611, "learning_rate": 3.406735223774471e-05, "loss": 11.836, "step": 27079 }, { "epoch": 1.4746142274679572, "grad_norm": 0.5348139660551197, "learning_rate": 3.406072242704382e-05, "loss": 11.8202, "step": 27080 }, { "epoch": 1.4746686814645402, "grad_norm": 0.511501103488106, "learning_rate": 3.405409312909777e-05, "loss": 11.6727, "step": 27081 }, { "epoch": 1.4747231354611232, "grad_norm": 0.5598396385414746, "learning_rate": 3.40474643439582e-05, "loss": 11.8583, "step": 27082 }, { "epoch": 1.4747775894577062, "grad_norm": 0.5076146583988852, "learning_rate": 3.404083607167657e-05, "loss": 11.9152, "step": 27083 }, { "epoch": 1.4748320434542892, "grad_norm": 0.5537755440569728, "learning_rate": 3.403420831230449e-05, "loss": 11.8594, "step": 27084 }, { "epoch": 1.4748864974508722, "grad_norm": 0.5293285022532415, "learning_rate": 3.402758106589343e-05, "loss": 11.9193, "step": 27085 }, { "epoch": 1.4749409514474552, "grad_norm": 0.5408443603695325, "learning_rate": 3.402095433249502e-05, "loss": 11.7753, "step": 27086 }, { "epoch": 1.4749954054440382, "grad_norm": 0.5749917278803268, "learning_rate": 3.401432811216071e-05, "loss": 11.9071, "step": 27087 }, { "epoch": 1.4750498594406214, "grad_norm": 0.5156306979181503, "learning_rate": 3.400770240494202e-05, "loss": 11.749, "step": 27088 }, { "epoch": 1.4751043134372044, "grad_norm": 0.559651083921084, "learning_rate": 3.400107721089054e-05, "loss": 11.765, "step": 27089 }, { "epoch": 1.4751587674337874, "grad_norm": 0.612724934333478, "learning_rate": 3.399445253005772e-05, "loss": 11.8683, "step": 27090 }, { "epoch": 1.4752132214303704, "grad_norm": 0.59789341134019, "learning_rate": 3.39878283624951e-05, "loss": 11.946, "step": 27091 }, { "epoch": 1.4752676754269534, "grad_norm": 0.5544059016115417, "learning_rate": 3.398120470825423e-05, "loss": 11.8487, "step": 27092 }, { "epoch": 1.4753221294235364, "grad_norm": 0.5617610908798364, "learning_rate": 3.3974581567386564e-05, "loss": 11.8222, "step": 27093 }, { "epoch": 1.4753765834201193, "grad_norm": 0.5125756027948952, "learning_rate": 3.396795893994365e-05, "loss": 11.8395, "step": 27094 }, { "epoch": 1.4754310374167023, "grad_norm": 0.5559725961950754, "learning_rate": 3.396133682597692e-05, "loss": 11.9392, "step": 27095 }, { "epoch": 1.4754854914132853, "grad_norm": 0.5762186452667207, "learning_rate": 3.395471522553795e-05, "loss": 11.7305, "step": 27096 }, { "epoch": 1.4755399454098685, "grad_norm": 0.531698633923273, "learning_rate": 3.394809413867819e-05, "loss": 11.8277, "step": 27097 }, { "epoch": 1.4755943994064515, "grad_norm": 0.5482631295246632, "learning_rate": 3.394147356544909e-05, "loss": 11.8116, "step": 27098 }, { "epoch": 1.4756488534030345, "grad_norm": 0.6278240149208363, "learning_rate": 3.393485350590221e-05, "loss": 11.8782, "step": 27099 }, { "epoch": 1.4757033073996175, "grad_norm": 0.5759202327727695, "learning_rate": 3.392823396008895e-05, "loss": 11.7914, "step": 27100 }, { "epoch": 1.4757577613962005, "grad_norm": 0.5773957919324828, "learning_rate": 3.392161492806085e-05, "loss": 11.8509, "step": 27101 }, { "epoch": 1.4758122153927835, "grad_norm": 0.5272776305146423, "learning_rate": 3.3914996409869335e-05, "loss": 11.8905, "step": 27102 }, { "epoch": 1.4758666693893665, "grad_norm": 0.6199382628951214, "learning_rate": 3.3908378405565876e-05, "loss": 11.9084, "step": 27103 }, { "epoch": 1.4759211233859495, "grad_norm": 0.5278872701414912, "learning_rate": 3.3901760915201995e-05, "loss": 11.8079, "step": 27104 }, { "epoch": 1.4759755773825325, "grad_norm": 0.542572280098002, "learning_rate": 3.389514393882906e-05, "loss": 11.9388, "step": 27105 }, { "epoch": 1.4760300313791155, "grad_norm": 0.5820755273708499, "learning_rate": 3.388852747649862e-05, "loss": 11.899, "step": 27106 }, { "epoch": 1.4760844853756985, "grad_norm": 0.5562626129763535, "learning_rate": 3.388191152826207e-05, "loss": 11.7676, "step": 27107 }, { "epoch": 1.4761389393722815, "grad_norm": 0.5507773169644343, "learning_rate": 3.3875296094170826e-05, "loss": 11.6364, "step": 27108 }, { "epoch": 1.4761933933688645, "grad_norm": 0.5315939561427894, "learning_rate": 3.38686811742764e-05, "loss": 11.7449, "step": 27109 }, { "epoch": 1.4762478473654475, "grad_norm": 0.5459717502917776, "learning_rate": 3.3862066768630184e-05, "loss": 11.7552, "step": 27110 }, { "epoch": 1.4763023013620307, "grad_norm": 0.5277619271971716, "learning_rate": 3.385545287728364e-05, "loss": 11.8546, "step": 27111 }, { "epoch": 1.4763567553586137, "grad_norm": 0.5954012509501209, "learning_rate": 3.3848839500288166e-05, "loss": 11.8795, "step": 27112 }, { "epoch": 1.4764112093551967, "grad_norm": 0.5124358491319496, "learning_rate": 3.384222663769521e-05, "loss": 11.8748, "step": 27113 }, { "epoch": 1.4764656633517796, "grad_norm": 0.6025619047189447, "learning_rate": 3.383561428955623e-05, "loss": 11.9018, "step": 27114 }, { "epoch": 1.4765201173483626, "grad_norm": 0.5699948163670611, "learning_rate": 3.382900245592257e-05, "loss": 11.9429, "step": 27115 }, { "epoch": 1.4765745713449456, "grad_norm": 0.517490357130833, "learning_rate": 3.382239113684571e-05, "loss": 11.8018, "step": 27116 }, { "epoch": 1.4766290253415286, "grad_norm": 0.5466848815284313, "learning_rate": 3.3815780332377044e-05, "loss": 11.7851, "step": 27117 }, { "epoch": 1.4766834793381116, "grad_norm": 0.5606494793356981, "learning_rate": 3.3809170042567925e-05, "loss": 11.9099, "step": 27118 }, { "epoch": 1.4767379333346946, "grad_norm": 0.5845511369310461, "learning_rate": 3.3802560267469855e-05, "loss": 11.9356, "step": 27119 }, { "epoch": 1.4767923873312778, "grad_norm": 0.5967304989228428, "learning_rate": 3.379595100713413e-05, "loss": 11.924, "step": 27120 }, { "epoch": 1.4768468413278608, "grad_norm": 0.513447914758633, "learning_rate": 3.3789342261612224e-05, "loss": 11.9511, "step": 27121 }, { "epoch": 1.4769012953244438, "grad_norm": 0.565957331964041, "learning_rate": 3.378273403095547e-05, "loss": 11.6514, "step": 27122 }, { "epoch": 1.4769557493210268, "grad_norm": 0.5842253427170512, "learning_rate": 3.37761263152153e-05, "loss": 11.962, "step": 27123 }, { "epoch": 1.4770102033176098, "grad_norm": 0.5845901353598117, "learning_rate": 3.376951911444306e-05, "loss": 11.9118, "step": 27124 }, { "epoch": 1.4770646573141928, "grad_norm": 0.5141469175908666, "learning_rate": 3.376291242869014e-05, "loss": 11.8073, "step": 27125 }, { "epoch": 1.4771191113107758, "grad_norm": 0.5259914534495058, "learning_rate": 3.3756306258007954e-05, "loss": 11.8461, "step": 27126 }, { "epoch": 1.4771735653073588, "grad_norm": 0.5459010431752896, "learning_rate": 3.374970060244784e-05, "loss": 11.8832, "step": 27127 }, { "epoch": 1.4772280193039418, "grad_norm": 0.5528517960244562, "learning_rate": 3.374309546206113e-05, "loss": 11.823, "step": 27128 }, { "epoch": 1.4772824733005248, "grad_norm": 0.5271159165239279, "learning_rate": 3.373649083689926e-05, "loss": 11.904, "step": 27129 }, { "epoch": 1.4773369272971077, "grad_norm": 0.5966994098474412, "learning_rate": 3.372988672701351e-05, "loss": 11.9556, "step": 27130 }, { "epoch": 1.4773913812936907, "grad_norm": 0.5744693204930843, "learning_rate": 3.372328313245531e-05, "loss": 11.8177, "step": 27131 }, { "epoch": 1.4774458352902737, "grad_norm": 0.5765051641289127, "learning_rate": 3.371668005327594e-05, "loss": 11.8644, "step": 27132 }, { "epoch": 1.4775002892868567, "grad_norm": 0.5233858411786155, "learning_rate": 3.371007748952681e-05, "loss": 11.8243, "step": 27133 }, { "epoch": 1.47755474328344, "grad_norm": 0.6552093420357177, "learning_rate": 3.370347544125921e-05, "loss": 11.9979, "step": 27134 }, { "epoch": 1.477609197280023, "grad_norm": 0.559480773537894, "learning_rate": 3.369687390852454e-05, "loss": 11.865, "step": 27135 }, { "epoch": 1.477663651276606, "grad_norm": 0.5642823448196956, "learning_rate": 3.369027289137405e-05, "loss": 11.9018, "step": 27136 }, { "epoch": 1.477718105273189, "grad_norm": 0.5322223232586138, "learning_rate": 3.3683672389859166e-05, "loss": 11.8025, "step": 27137 }, { "epoch": 1.477772559269772, "grad_norm": 0.587293504402518, "learning_rate": 3.367707240403115e-05, "loss": 11.8876, "step": 27138 }, { "epoch": 1.477827013266355, "grad_norm": 0.5464093118301003, "learning_rate": 3.367047293394132e-05, "loss": 11.9181, "step": 27139 }, { "epoch": 1.477881467262938, "grad_norm": 0.5373653748455144, "learning_rate": 3.366387397964101e-05, "loss": 11.8001, "step": 27140 }, { "epoch": 1.477935921259521, "grad_norm": 0.5878366181510546, "learning_rate": 3.365727554118159e-05, "loss": 11.9296, "step": 27141 }, { "epoch": 1.4779903752561039, "grad_norm": 0.5284923666358852, "learning_rate": 3.365067761861427e-05, "loss": 11.8898, "step": 27142 }, { "epoch": 1.478044829252687, "grad_norm": 0.5664966967686911, "learning_rate": 3.364408021199045e-05, "loss": 11.8015, "step": 27143 }, { "epoch": 1.47809928324927, "grad_norm": 0.566726081494145, "learning_rate": 3.363748332136135e-05, "loss": 11.79, "step": 27144 }, { "epoch": 1.478153737245853, "grad_norm": 0.508897171974987, "learning_rate": 3.363088694677834e-05, "loss": 11.7891, "step": 27145 }, { "epoch": 1.478208191242436, "grad_norm": 0.5855396399672527, "learning_rate": 3.362429108829266e-05, "loss": 12.0, "step": 27146 }, { "epoch": 1.478262645239019, "grad_norm": 0.5839440654246064, "learning_rate": 3.3617695745955646e-05, "loss": 11.7482, "step": 27147 }, { "epoch": 1.478317099235602, "grad_norm": 0.5294692175472873, "learning_rate": 3.361110091981857e-05, "loss": 11.9796, "step": 27148 }, { "epoch": 1.478371553232185, "grad_norm": 0.5490297186905433, "learning_rate": 3.3604506609932674e-05, "loss": 11.6764, "step": 27149 }, { "epoch": 1.478426007228768, "grad_norm": 0.5190701710804367, "learning_rate": 3.35979128163493e-05, "loss": 12.0312, "step": 27150 }, { "epoch": 1.478480461225351, "grad_norm": 0.5108576585974903, "learning_rate": 3.3591319539119656e-05, "loss": 11.8501, "step": 27151 }, { "epoch": 1.478534915221934, "grad_norm": 0.5622735264811781, "learning_rate": 3.358472677829505e-05, "loss": 11.9249, "step": 27152 }, { "epoch": 1.478589369218517, "grad_norm": 0.5439652232085131, "learning_rate": 3.357813453392679e-05, "loss": 11.8032, "step": 27153 }, { "epoch": 1.4786438232151, "grad_norm": 0.5089868565051143, "learning_rate": 3.3571542806066047e-05, "loss": 11.8561, "step": 27154 }, { "epoch": 1.478698277211683, "grad_norm": 0.5547826449591965, "learning_rate": 3.356495159476416e-05, "loss": 11.774, "step": 27155 }, { "epoch": 1.478752731208266, "grad_norm": 0.5473036932778695, "learning_rate": 3.3558360900072325e-05, "loss": 11.8986, "step": 27156 }, { "epoch": 1.478807185204849, "grad_norm": 0.4951983523661329, "learning_rate": 3.355177072204184e-05, "loss": 11.88, "step": 27157 }, { "epoch": 1.4788616392014322, "grad_norm": 0.5196955795091953, "learning_rate": 3.354518106072394e-05, "loss": 11.845, "step": 27158 }, { "epoch": 1.4789160931980152, "grad_norm": 0.5520316211913328, "learning_rate": 3.353859191616982e-05, "loss": 11.8677, "step": 27159 }, { "epoch": 1.4789705471945982, "grad_norm": 0.6203878430456891, "learning_rate": 3.353200328843079e-05, "loss": 11.92, "step": 27160 }, { "epoch": 1.4790250011911812, "grad_norm": 0.5786738160243766, "learning_rate": 3.352541517755802e-05, "loss": 11.9021, "step": 27161 }, { "epoch": 1.4790794551877642, "grad_norm": 0.5143903061802476, "learning_rate": 3.351882758360276e-05, "loss": 11.8808, "step": 27162 }, { "epoch": 1.4791339091843472, "grad_norm": 0.5973229100998572, "learning_rate": 3.351224050661629e-05, "loss": 11.9367, "step": 27163 }, { "epoch": 1.4791883631809302, "grad_norm": 0.528959301008005, "learning_rate": 3.350565394664974e-05, "loss": 11.8738, "step": 27164 }, { "epoch": 1.4792428171775132, "grad_norm": 0.5771028782922764, "learning_rate": 3.349906790375442e-05, "loss": 11.9089, "step": 27165 }, { "epoch": 1.4792972711740962, "grad_norm": 0.591567422479549, "learning_rate": 3.349248237798146e-05, "loss": 11.9304, "step": 27166 }, { "epoch": 1.4793517251706794, "grad_norm": 0.48787454573548533, "learning_rate": 3.3485897369382146e-05, "loss": 11.781, "step": 27167 }, { "epoch": 1.4794061791672624, "grad_norm": 0.5086313821470303, "learning_rate": 3.347931287800765e-05, "loss": 11.7327, "step": 27168 }, { "epoch": 1.4794606331638454, "grad_norm": 0.5750569091876963, "learning_rate": 3.347272890390915e-05, "loss": 11.9718, "step": 27169 }, { "epoch": 1.4795150871604283, "grad_norm": 0.5910052756781702, "learning_rate": 3.346614544713789e-05, "loss": 11.7934, "step": 27170 }, { "epoch": 1.4795695411570113, "grad_norm": 0.7537333335813179, "learning_rate": 3.3459562507745e-05, "loss": 11.8531, "step": 27171 }, { "epoch": 1.4796239951535943, "grad_norm": 0.5711350890046221, "learning_rate": 3.345298008578175e-05, "loss": 11.8902, "step": 27172 }, { "epoch": 1.4796784491501773, "grad_norm": 0.5146417451766456, "learning_rate": 3.344639818129926e-05, "loss": 11.8732, "step": 27173 }, { "epoch": 1.4797329031467603, "grad_norm": 0.5226190157213103, "learning_rate": 3.343981679434873e-05, "loss": 11.7984, "step": 27174 }, { "epoch": 1.4797873571433433, "grad_norm": 0.5976252647813438, "learning_rate": 3.343323592498138e-05, "loss": 11.995, "step": 27175 }, { "epoch": 1.4798418111399263, "grad_norm": 0.6078734907301634, "learning_rate": 3.3426655573248324e-05, "loss": 11.7342, "step": 27176 }, { "epoch": 1.4798962651365093, "grad_norm": 0.5852577048129345, "learning_rate": 3.342007573920078e-05, "loss": 11.8116, "step": 27177 }, { "epoch": 1.4799507191330923, "grad_norm": 0.5894873191799361, "learning_rate": 3.34134964228899e-05, "loss": 11.8434, "step": 27178 }, { "epoch": 1.4800051731296753, "grad_norm": 0.5910698681153836, "learning_rate": 3.340691762436681e-05, "loss": 11.688, "step": 27179 }, { "epoch": 1.4800596271262583, "grad_norm": 0.6702628794636158, "learning_rate": 3.340033934368273e-05, "loss": 11.8892, "step": 27180 }, { "epoch": 1.4801140811228415, "grad_norm": 0.5375409497292924, "learning_rate": 3.3393761580888736e-05, "loss": 11.7689, "step": 27181 }, { "epoch": 1.4801685351194245, "grad_norm": 0.5686594922761874, "learning_rate": 3.338718433603606e-05, "loss": 11.7577, "step": 27182 }, { "epoch": 1.4802229891160075, "grad_norm": 0.5169845193658228, "learning_rate": 3.3380607609175785e-05, "loss": 11.824, "step": 27183 }, { "epoch": 1.4802774431125905, "grad_norm": 0.5274731427167206, "learning_rate": 3.3374031400359095e-05, "loss": 11.8404, "step": 27184 }, { "epoch": 1.4803318971091735, "grad_norm": 0.567724516103824, "learning_rate": 3.336745570963709e-05, "loss": 11.7453, "step": 27185 }, { "epoch": 1.4803863511057564, "grad_norm": 0.5501182615912221, "learning_rate": 3.336088053706092e-05, "loss": 11.8781, "step": 27186 }, { "epoch": 1.4804408051023394, "grad_norm": 0.5725508482541988, "learning_rate": 3.3354305882681746e-05, "loss": 11.9257, "step": 27187 }, { "epoch": 1.4804952590989224, "grad_norm": 0.6068025079210324, "learning_rate": 3.334773174655067e-05, "loss": 11.9728, "step": 27188 }, { "epoch": 1.4805497130955054, "grad_norm": 0.5027437770027873, "learning_rate": 3.3341158128718776e-05, "loss": 11.7719, "step": 27189 }, { "epoch": 1.4806041670920886, "grad_norm": 0.5177253109463887, "learning_rate": 3.333458502923725e-05, "loss": 11.7254, "step": 27190 }, { "epoch": 1.4806586210886716, "grad_norm": 0.5774954096402204, "learning_rate": 3.332801244815714e-05, "loss": 11.8728, "step": 27191 }, { "epoch": 1.4807130750852546, "grad_norm": 0.6294710587232561, "learning_rate": 3.332144038552961e-05, "loss": 12.0012, "step": 27192 }, { "epoch": 1.4807675290818376, "grad_norm": 0.5595296786294132, "learning_rate": 3.331486884140572e-05, "loss": 11.8264, "step": 27193 }, { "epoch": 1.4808219830784206, "grad_norm": 0.527091612607428, "learning_rate": 3.330829781583662e-05, "loss": 11.8461, "step": 27194 }, { "epoch": 1.4808764370750036, "grad_norm": 0.5274658985174875, "learning_rate": 3.330172730887334e-05, "loss": 11.7673, "step": 27195 }, { "epoch": 1.4809308910715866, "grad_norm": 0.5809138311464556, "learning_rate": 3.329515732056703e-05, "loss": 11.9266, "step": 27196 }, { "epoch": 1.4809853450681696, "grad_norm": 0.5390868402140865, "learning_rate": 3.328858785096878e-05, "loss": 11.8711, "step": 27197 }, { "epoch": 1.4810397990647526, "grad_norm": 0.4756542962327035, "learning_rate": 3.328201890012966e-05, "loss": 11.8575, "step": 27198 }, { "epoch": 1.4810942530613356, "grad_norm": 0.5462471967775758, "learning_rate": 3.3275450468100764e-05, "loss": 11.8843, "step": 27199 }, { "epoch": 1.4811487070579186, "grad_norm": 0.5040527822746074, "learning_rate": 3.3268882554933114e-05, "loss": 11.8946, "step": 27200 }, { "epoch": 1.4812031610545016, "grad_norm": 0.5543580122913088, "learning_rate": 3.326231516067782e-05, "loss": 11.8207, "step": 27201 }, { "epoch": 1.4812576150510846, "grad_norm": 0.6029877438599717, "learning_rate": 3.325574828538599e-05, "loss": 11.9576, "step": 27202 }, { "epoch": 1.4813120690476675, "grad_norm": 0.5570732431349827, "learning_rate": 3.324918192910863e-05, "loss": 11.6534, "step": 27203 }, { "epoch": 1.4813665230442508, "grad_norm": 0.5318652584892374, "learning_rate": 3.324261609189685e-05, "loss": 11.9389, "step": 27204 }, { "epoch": 1.4814209770408338, "grad_norm": 0.5413052857900214, "learning_rate": 3.323605077380164e-05, "loss": 11.7172, "step": 27205 }, { "epoch": 1.4814754310374167, "grad_norm": 0.5925252601545924, "learning_rate": 3.322948597487414e-05, "loss": 11.8808, "step": 27206 }, { "epoch": 1.4815298850339997, "grad_norm": 0.5347771775967283, "learning_rate": 3.322292169516532e-05, "loss": 11.9355, "step": 27207 }, { "epoch": 1.4815843390305827, "grad_norm": 0.5609134224122345, "learning_rate": 3.3216357934726294e-05, "loss": 11.8862, "step": 27208 }, { "epoch": 1.4816387930271657, "grad_norm": 0.5312910095736958, "learning_rate": 3.320979469360808e-05, "loss": 11.8217, "step": 27209 }, { "epoch": 1.4816932470237487, "grad_norm": 0.5650646943245703, "learning_rate": 3.320323197186165e-05, "loss": 11.8769, "step": 27210 }, { "epoch": 1.4817477010203317, "grad_norm": 0.5883610555504796, "learning_rate": 3.31966697695381e-05, "loss": 11.8561, "step": 27211 }, { "epoch": 1.4818021550169147, "grad_norm": 0.5382984898612555, "learning_rate": 3.319010808668848e-05, "loss": 11.7359, "step": 27212 }, { "epoch": 1.481856609013498, "grad_norm": 0.6328079032372491, "learning_rate": 3.318354692336375e-05, "loss": 11.9215, "step": 27213 }, { "epoch": 1.481911063010081, "grad_norm": 0.5521219231718056, "learning_rate": 3.3176986279614994e-05, "loss": 11.7316, "step": 27214 }, { "epoch": 1.481965517006664, "grad_norm": 0.5810596107266891, "learning_rate": 3.317042615549317e-05, "loss": 12.0288, "step": 27215 }, { "epoch": 1.482019971003247, "grad_norm": 0.5162672593751939, "learning_rate": 3.316386655104935e-05, "loss": 11.8178, "step": 27216 }, { "epoch": 1.4820744249998299, "grad_norm": 0.5837226705935764, "learning_rate": 3.315730746633448e-05, "loss": 11.9309, "step": 27217 }, { "epoch": 1.4821288789964129, "grad_norm": 0.5838394899791232, "learning_rate": 3.3150748901399634e-05, "loss": 11.8939, "step": 27218 }, { "epoch": 1.4821833329929959, "grad_norm": 0.49102244179581195, "learning_rate": 3.314419085629577e-05, "loss": 11.893, "step": 27219 }, { "epoch": 1.4822377869895789, "grad_norm": 0.5228855868017309, "learning_rate": 3.3137633331073855e-05, "loss": 11.7481, "step": 27220 }, { "epoch": 1.4822922409861619, "grad_norm": 0.5828162795906983, "learning_rate": 3.313107632578495e-05, "loss": 11.9261, "step": 27221 }, { "epoch": 1.4823466949827449, "grad_norm": 0.5306854490185376, "learning_rate": 3.3124519840479985e-05, "loss": 11.8847, "step": 27222 }, { "epoch": 1.4824011489793278, "grad_norm": 0.49920585092632636, "learning_rate": 3.3117963875209955e-05, "loss": 11.9139, "step": 27223 }, { "epoch": 1.4824556029759108, "grad_norm": 0.5191636265092698, "learning_rate": 3.3111408430025906e-05, "loss": 11.8197, "step": 27224 }, { "epoch": 1.4825100569724938, "grad_norm": 0.5373763862972796, "learning_rate": 3.310485350497871e-05, "loss": 11.9339, "step": 27225 }, { "epoch": 1.4825645109690768, "grad_norm": 0.5363460680014575, "learning_rate": 3.3098299100119435e-05, "loss": 11.8584, "step": 27226 }, { "epoch": 1.4826189649656598, "grad_norm": 0.602641280895511, "learning_rate": 3.309174521549897e-05, "loss": 11.8895, "step": 27227 }, { "epoch": 1.482673418962243, "grad_norm": 0.4825496469900355, "learning_rate": 3.308519185116834e-05, "loss": 11.8072, "step": 27228 }, { "epoch": 1.482727872958826, "grad_norm": 0.5247850017060505, "learning_rate": 3.3078639007178495e-05, "loss": 11.8737, "step": 27229 }, { "epoch": 1.482782326955409, "grad_norm": 0.5279798067277318, "learning_rate": 3.307208668358033e-05, "loss": 11.8489, "step": 27230 }, { "epoch": 1.482836780951992, "grad_norm": 0.5190133333592362, "learning_rate": 3.306553488042489e-05, "loss": 11.8621, "step": 27231 }, { "epoch": 1.482891234948575, "grad_norm": 0.5516550293797355, "learning_rate": 3.3058983597763026e-05, "loss": 11.9184, "step": 27232 }, { "epoch": 1.482945688945158, "grad_norm": 0.4975954920367842, "learning_rate": 3.3052432835645744e-05, "loss": 11.8549, "step": 27233 }, { "epoch": 1.483000142941741, "grad_norm": 0.5627263185035444, "learning_rate": 3.304588259412399e-05, "loss": 11.8656, "step": 27234 }, { "epoch": 1.483054596938324, "grad_norm": 0.5895512964169496, "learning_rate": 3.3039332873248666e-05, "loss": 12.0113, "step": 27235 }, { "epoch": 1.4831090509349072, "grad_norm": 0.54498354106306, "learning_rate": 3.303278367307073e-05, "loss": 11.7448, "step": 27236 }, { "epoch": 1.4831635049314902, "grad_norm": 0.5789294053716956, "learning_rate": 3.3026234993641095e-05, "loss": 11.9806, "step": 27237 }, { "epoch": 1.4832179589280732, "grad_norm": 0.5721138297062113, "learning_rate": 3.30196868350107e-05, "loss": 11.8589, "step": 27238 }, { "epoch": 1.4832724129246562, "grad_norm": 0.5906386088344636, "learning_rate": 3.301313919723046e-05, "loss": 11.9199, "step": 27239 }, { "epoch": 1.4833268669212392, "grad_norm": 0.5347278439720046, "learning_rate": 3.300659208035124e-05, "loss": 11.8485, "step": 27240 }, { "epoch": 1.4833813209178222, "grad_norm": 0.5051752633238162, "learning_rate": 3.300004548442404e-05, "loss": 11.815, "step": 27241 }, { "epoch": 1.4834357749144051, "grad_norm": 0.6329251790011808, "learning_rate": 3.299349940949968e-05, "loss": 11.9331, "step": 27242 }, { "epoch": 1.4834902289109881, "grad_norm": 0.5489740006734455, "learning_rate": 3.2986953855629145e-05, "loss": 11.7848, "step": 27243 }, { "epoch": 1.4835446829075711, "grad_norm": 0.5848667235338048, "learning_rate": 3.2980408822863264e-05, "loss": 11.8227, "step": 27244 }, { "epoch": 1.4835991369041541, "grad_norm": 0.5789435272763168, "learning_rate": 3.297386431125296e-05, "loss": 11.8886, "step": 27245 }, { "epoch": 1.4836535909007371, "grad_norm": 0.529242405696322, "learning_rate": 3.296732032084916e-05, "loss": 11.724, "step": 27246 }, { "epoch": 1.48370804489732, "grad_norm": 0.5779624953404515, "learning_rate": 3.296077685170269e-05, "loss": 11.8703, "step": 27247 }, { "epoch": 1.483762498893903, "grad_norm": 0.5223023362809981, "learning_rate": 3.2954233903864515e-05, "loss": 11.7858, "step": 27248 }, { "epoch": 1.483816952890486, "grad_norm": 0.5962154089421761, "learning_rate": 3.294769147738541e-05, "loss": 11.888, "step": 27249 }, { "epoch": 1.483871406887069, "grad_norm": 0.5160660724809407, "learning_rate": 3.2941149572316286e-05, "loss": 11.8307, "step": 27250 }, { "epoch": 1.4839258608836523, "grad_norm": 0.5809806957806917, "learning_rate": 3.2934608188708075e-05, "loss": 11.8838, "step": 27251 }, { "epoch": 1.4839803148802353, "grad_norm": 0.48361595968755866, "learning_rate": 3.292806732661156e-05, "loss": 11.8405, "step": 27252 }, { "epoch": 1.4840347688768183, "grad_norm": 0.4974798421988643, "learning_rate": 3.292152698607768e-05, "loss": 11.8206, "step": 27253 }, { "epoch": 1.4840892228734013, "grad_norm": 0.5968044592205556, "learning_rate": 3.2914987167157206e-05, "loss": 11.8325, "step": 27254 }, { "epoch": 1.4841436768699843, "grad_norm": 0.5455372703310573, "learning_rate": 3.2908447869901095e-05, "loss": 11.8612, "step": 27255 }, { "epoch": 1.4841981308665673, "grad_norm": 0.616961306700275, "learning_rate": 3.2901909094360096e-05, "loss": 12.0473, "step": 27256 }, { "epoch": 1.4842525848631503, "grad_norm": 0.5819114537843179, "learning_rate": 3.2895370840585104e-05, "loss": 11.7873, "step": 27257 }, { "epoch": 1.4843070388597333, "grad_norm": 0.5389869348792732, "learning_rate": 3.2888833108627035e-05, "loss": 11.851, "step": 27258 }, { "epoch": 1.4843614928563162, "grad_norm": 0.5731211277536921, "learning_rate": 3.2882295898536584e-05, "loss": 11.8152, "step": 27259 }, { "epoch": 1.4844159468528995, "grad_norm": 0.5025569124630986, "learning_rate": 3.287575921036467e-05, "loss": 11.8047, "step": 27260 }, { "epoch": 1.4844704008494825, "grad_norm": 0.5881762750936129, "learning_rate": 3.2869223044162126e-05, "loss": 11.8476, "step": 27261 }, { "epoch": 1.4845248548460654, "grad_norm": 0.5608032378207175, "learning_rate": 3.286268739997973e-05, "loss": 11.863, "step": 27262 }, { "epoch": 1.4845793088426484, "grad_norm": 0.5110285164030312, "learning_rate": 3.285615227786838e-05, "loss": 11.8661, "step": 27263 }, { "epoch": 1.4846337628392314, "grad_norm": 0.704162504125869, "learning_rate": 3.2849617677878805e-05, "loss": 11.7386, "step": 27264 }, { "epoch": 1.4846882168358144, "grad_norm": 0.5746161075372516, "learning_rate": 3.2843083600061895e-05, "loss": 11.6459, "step": 27265 }, { "epoch": 1.4847426708323974, "grad_norm": 0.5199599932562631, "learning_rate": 3.28365500444684e-05, "loss": 11.73, "step": 27266 }, { "epoch": 1.4847971248289804, "grad_norm": 0.5906330491008318, "learning_rate": 3.283001701114916e-05, "loss": 12.0179, "step": 27267 }, { "epoch": 1.4848515788255634, "grad_norm": 0.5395303467151444, "learning_rate": 3.282348450015503e-05, "loss": 12.0826, "step": 27268 }, { "epoch": 1.4849060328221464, "grad_norm": 0.51679973935423, "learning_rate": 3.2816952511536705e-05, "loss": 11.7768, "step": 27269 }, { "epoch": 1.4849604868187294, "grad_norm": 0.5479399524253692, "learning_rate": 3.281042104534503e-05, "loss": 11.8409, "step": 27270 }, { "epoch": 1.4850149408153124, "grad_norm": 0.5411507669843482, "learning_rate": 3.280389010163077e-05, "loss": 11.9591, "step": 27271 }, { "epoch": 1.4850693948118954, "grad_norm": 0.5923250906597739, "learning_rate": 3.279735968044473e-05, "loss": 11.976, "step": 27272 }, { "epoch": 1.4851238488084784, "grad_norm": 0.4882082373529916, "learning_rate": 3.279082978183772e-05, "loss": 11.8757, "step": 27273 }, { "epoch": 1.4851783028050616, "grad_norm": 0.5303569538341536, "learning_rate": 3.2784300405860455e-05, "loss": 11.9316, "step": 27274 }, { "epoch": 1.4852327568016446, "grad_norm": 0.5347246877158316, "learning_rate": 3.277777155256378e-05, "loss": 11.7729, "step": 27275 }, { "epoch": 1.4852872107982276, "grad_norm": 0.5595970988754599, "learning_rate": 3.2771243221998384e-05, "loss": 11.869, "step": 27276 }, { "epoch": 1.4853416647948106, "grad_norm": 0.5650652753851264, "learning_rate": 3.2764715414215106e-05, "loss": 11.8208, "step": 27277 }, { "epoch": 1.4853961187913935, "grad_norm": 0.5484065079949246, "learning_rate": 3.275818812926469e-05, "loss": 11.8264, "step": 27278 }, { "epoch": 1.4854505727879765, "grad_norm": 0.5189689296845389, "learning_rate": 3.2751661367197836e-05, "loss": 11.667, "step": 27279 }, { "epoch": 1.4855050267845595, "grad_norm": 0.5436340697901008, "learning_rate": 3.2745135128065386e-05, "loss": 11.914, "step": 27280 }, { "epoch": 1.4855594807811425, "grad_norm": 0.5124001145040258, "learning_rate": 3.2738609411918e-05, "loss": 11.951, "step": 27281 }, { "epoch": 1.4856139347777255, "grad_norm": 0.5277418345053874, "learning_rate": 3.2732084218806467e-05, "loss": 11.8618, "step": 27282 }, { "epoch": 1.4856683887743087, "grad_norm": 0.6326896215702091, "learning_rate": 3.272555954878157e-05, "loss": 11.7973, "step": 27283 }, { "epoch": 1.4857228427708917, "grad_norm": 0.5674423458562502, "learning_rate": 3.271903540189396e-05, "loss": 11.9219, "step": 27284 }, { "epoch": 1.4857772967674747, "grad_norm": 0.5087084102268827, "learning_rate": 3.271251177819446e-05, "loss": 11.9181, "step": 27285 }, { "epoch": 1.4858317507640577, "grad_norm": 0.8901096032229812, "learning_rate": 3.27059886777337e-05, "loss": 11.8686, "step": 27286 }, { "epoch": 1.4858862047606407, "grad_norm": 0.5073872482333592, "learning_rate": 3.2699466100562504e-05, "loss": 11.7898, "step": 27287 }, { "epoch": 1.4859406587572237, "grad_norm": 0.5211698973456306, "learning_rate": 3.269294404673155e-05, "loss": 11.8486, "step": 27288 }, { "epoch": 1.4859951127538067, "grad_norm": 0.5368291141901312, "learning_rate": 3.2686422516291504e-05, "loss": 11.8866, "step": 27289 }, { "epoch": 1.4860495667503897, "grad_norm": 0.5693300384485326, "learning_rate": 3.2679901509293164e-05, "loss": 11.8772, "step": 27290 }, { "epoch": 1.4861040207469727, "grad_norm": 0.5215937928340507, "learning_rate": 3.2673381025787165e-05, "loss": 11.869, "step": 27291 }, { "epoch": 1.4861584747435557, "grad_norm": 0.5708249218961806, "learning_rate": 3.266686106582428e-05, "loss": 11.8806, "step": 27292 }, { "epoch": 1.4862129287401387, "grad_norm": 0.63677097635389, "learning_rate": 3.266034162945515e-05, "loss": 11.8537, "step": 27293 }, { "epoch": 1.4862673827367217, "grad_norm": 0.5857846513150303, "learning_rate": 3.265382271673048e-05, "loss": 11.8732, "step": 27294 }, { "epoch": 1.4863218367333046, "grad_norm": 0.520796712326528, "learning_rate": 3.2647304327701034e-05, "loss": 11.7786, "step": 27295 }, { "epoch": 1.4863762907298876, "grad_norm": 0.5983854752980385, "learning_rate": 3.264078646241739e-05, "loss": 11.9918, "step": 27296 }, { "epoch": 1.4864307447264706, "grad_norm": 0.5101282818263799, "learning_rate": 3.263426912093033e-05, "loss": 11.6416, "step": 27297 }, { "epoch": 1.4864851987230538, "grad_norm": 0.5038209482467594, "learning_rate": 3.2627752303290496e-05, "loss": 11.7531, "step": 27298 }, { "epoch": 1.4865396527196368, "grad_norm": 0.5320127817485272, "learning_rate": 3.262123600954852e-05, "loss": 11.7726, "step": 27299 }, { "epoch": 1.4865941067162198, "grad_norm": 0.5398572104230178, "learning_rate": 3.2614720239755135e-05, "loss": 11.7259, "step": 27300 }, { "epoch": 1.4866485607128028, "grad_norm": 0.5973062249520685, "learning_rate": 3.2608204993960964e-05, "loss": 11.9197, "step": 27301 }, { "epoch": 1.4867030147093858, "grad_norm": 0.5978273206228052, "learning_rate": 3.260169027221672e-05, "loss": 11.8774, "step": 27302 }, { "epoch": 1.4867574687059688, "grad_norm": 0.6872509022041908, "learning_rate": 3.259517607457301e-05, "loss": 11.906, "step": 27303 }, { "epoch": 1.4868119227025518, "grad_norm": 0.5169416606520894, "learning_rate": 3.258866240108054e-05, "loss": 11.8721, "step": 27304 }, { "epoch": 1.4868663766991348, "grad_norm": 0.5240611112350078, "learning_rate": 3.25821492517899e-05, "loss": 11.7707, "step": 27305 }, { "epoch": 1.486920830695718, "grad_norm": 0.5654095582004188, "learning_rate": 3.257563662675178e-05, "loss": 11.8115, "step": 27306 }, { "epoch": 1.486975284692301, "grad_norm": 0.5085421091455207, "learning_rate": 3.256912452601685e-05, "loss": 11.8982, "step": 27307 }, { "epoch": 1.487029738688884, "grad_norm": 0.5707968444123652, "learning_rate": 3.256261294963572e-05, "loss": 11.7993, "step": 27308 }, { "epoch": 1.487084192685467, "grad_norm": 0.50828426846241, "learning_rate": 3.2556101897658976e-05, "loss": 11.9359, "step": 27309 }, { "epoch": 1.48713864668205, "grad_norm": 0.5131446602261969, "learning_rate": 3.254959137013733e-05, "loss": 11.7652, "step": 27310 }, { "epoch": 1.487193100678633, "grad_norm": 0.5417230565673917, "learning_rate": 3.254308136712135e-05, "loss": 12.0064, "step": 27311 }, { "epoch": 1.487247554675216, "grad_norm": 0.557403104913808, "learning_rate": 3.2536571888661706e-05, "loss": 11.9038, "step": 27312 }, { "epoch": 1.487302008671799, "grad_norm": 0.5115471585697475, "learning_rate": 3.253006293480897e-05, "loss": 11.8616, "step": 27313 }, { "epoch": 1.487356462668382, "grad_norm": 0.5656769013152173, "learning_rate": 3.2523554505613796e-05, "loss": 11.8502, "step": 27314 }, { "epoch": 1.487410916664965, "grad_norm": 0.4902086284984136, "learning_rate": 3.251704660112675e-05, "loss": 11.773, "step": 27315 }, { "epoch": 1.487465370661548, "grad_norm": 0.5067827185346057, "learning_rate": 3.2510539221398475e-05, "loss": 11.8763, "step": 27316 }, { "epoch": 1.487519824658131, "grad_norm": 0.5197292409505939, "learning_rate": 3.25040323664796e-05, "loss": 11.8191, "step": 27317 }, { "epoch": 1.487574278654714, "grad_norm": 0.5697866881209924, "learning_rate": 3.2497526036420644e-05, "loss": 11.8394, "step": 27318 }, { "epoch": 1.487628732651297, "grad_norm": 0.5313578729803738, "learning_rate": 3.2491020231272317e-05, "loss": 11.7565, "step": 27319 }, { "epoch": 1.48768318664788, "grad_norm": 0.5369869662143938, "learning_rate": 3.2484514951085065e-05, "loss": 11.9415, "step": 27320 }, { "epoch": 1.4877376406444631, "grad_norm": 0.5238407392481245, "learning_rate": 3.2478010195909556e-05, "loss": 11.8026, "step": 27321 }, { "epoch": 1.4877920946410461, "grad_norm": 0.5856891411990525, "learning_rate": 3.247150596579639e-05, "loss": 11.9195, "step": 27322 }, { "epoch": 1.487846548637629, "grad_norm": 0.5187091122853142, "learning_rate": 3.24650022607961e-05, "loss": 11.9496, "step": 27323 }, { "epoch": 1.487901002634212, "grad_norm": 0.5612849698542438, "learning_rate": 3.245849908095929e-05, "loss": 11.8396, "step": 27324 }, { "epoch": 1.487955456630795, "grad_norm": 0.5008938401427246, "learning_rate": 3.245199642633649e-05, "loss": 11.7777, "step": 27325 }, { "epoch": 1.488009910627378, "grad_norm": 0.5242050525716808, "learning_rate": 3.244549429697834e-05, "loss": 11.962, "step": 27326 }, { "epoch": 1.488064364623961, "grad_norm": 0.5871022906923379, "learning_rate": 3.243899269293531e-05, "loss": 11.8592, "step": 27327 }, { "epoch": 1.488118818620544, "grad_norm": 0.5388255460765787, "learning_rate": 3.243249161425801e-05, "loss": 11.6366, "step": 27328 }, { "epoch": 1.488173272617127, "grad_norm": 0.5691555834204939, "learning_rate": 3.242599106099704e-05, "loss": 11.8457, "step": 27329 }, { "epoch": 1.4882277266137103, "grad_norm": 0.53414967703767, "learning_rate": 3.2419491033202843e-05, "loss": 11.8391, "step": 27330 }, { "epoch": 1.4882821806102933, "grad_norm": 0.5408069509464369, "learning_rate": 3.241299153092601e-05, "loss": 11.8947, "step": 27331 }, { "epoch": 1.4883366346068763, "grad_norm": 0.5642851220105962, "learning_rate": 3.240649255421713e-05, "loss": 11.8473, "step": 27332 }, { "epoch": 1.4883910886034593, "grad_norm": 0.5484846794469815, "learning_rate": 3.239999410312665e-05, "loss": 11.9208, "step": 27333 }, { "epoch": 1.4884455426000422, "grad_norm": 0.47128622223821753, "learning_rate": 3.23934961777052e-05, "loss": 11.8681, "step": 27334 }, { "epoch": 1.4884999965966252, "grad_norm": 0.5297904201959726, "learning_rate": 3.238699877800322e-05, "loss": 11.9234, "step": 27335 }, { "epoch": 1.4885544505932082, "grad_norm": 0.4994056879425562, "learning_rate": 3.2380501904071315e-05, "loss": 11.8726, "step": 27336 }, { "epoch": 1.4886089045897912, "grad_norm": 0.6012553319637874, "learning_rate": 3.237400555595993e-05, "loss": 11.8352, "step": 27337 }, { "epoch": 1.4886633585863742, "grad_norm": 0.5717342952793103, "learning_rate": 3.236750973371966e-05, "loss": 11.7764, "step": 27338 }, { "epoch": 1.4887178125829572, "grad_norm": 0.5721044207906831, "learning_rate": 3.236101443740097e-05, "loss": 11.9964, "step": 27339 }, { "epoch": 1.4887722665795402, "grad_norm": 0.543351701834864, "learning_rate": 3.235451966705434e-05, "loss": 11.6909, "step": 27340 }, { "epoch": 1.4888267205761232, "grad_norm": 0.5593434669192275, "learning_rate": 3.234802542273034e-05, "loss": 11.8715, "step": 27341 }, { "epoch": 1.4888811745727062, "grad_norm": 0.5956820363149719, "learning_rate": 3.234153170447941e-05, "loss": 11.8743, "step": 27342 }, { "epoch": 1.4889356285692892, "grad_norm": 0.5586765541934234, "learning_rate": 3.233503851235208e-05, "loss": 11.86, "step": 27343 }, { "epoch": 1.4889900825658724, "grad_norm": 0.5375570701571059, "learning_rate": 3.232854584639887e-05, "loss": 11.8627, "step": 27344 }, { "epoch": 1.4890445365624554, "grad_norm": 0.5788137075225459, "learning_rate": 3.2322053706670194e-05, "loss": 11.8417, "step": 27345 }, { "epoch": 1.4890989905590384, "grad_norm": 0.5739870435953575, "learning_rate": 3.231556209321662e-05, "loss": 11.8072, "step": 27346 }, { "epoch": 1.4891534445556214, "grad_norm": 0.5200634454133051, "learning_rate": 3.2309071006088554e-05, "loss": 11.8978, "step": 27347 }, { "epoch": 1.4892078985522044, "grad_norm": 0.5267569385414985, "learning_rate": 3.230258044533653e-05, "loss": 11.791, "step": 27348 }, { "epoch": 1.4892623525487874, "grad_norm": 0.5257919766051812, "learning_rate": 3.229609041101099e-05, "loss": 11.7765, "step": 27349 }, { "epoch": 1.4893168065453704, "grad_norm": 0.5379618945486823, "learning_rate": 3.228960090316239e-05, "loss": 11.7958, "step": 27350 }, { "epoch": 1.4893712605419533, "grad_norm": 0.5464085762061488, "learning_rate": 3.228311192184122e-05, "loss": 11.9128, "step": 27351 }, { "epoch": 1.4894257145385363, "grad_norm": 0.5120607901818375, "learning_rate": 3.227662346709791e-05, "loss": 11.8575, "step": 27352 }, { "epoch": 1.4894801685351196, "grad_norm": 0.555661724649654, "learning_rate": 3.227013553898296e-05, "loss": 11.8716, "step": 27353 }, { "epoch": 1.4895346225317025, "grad_norm": 0.5199119991369252, "learning_rate": 3.2263648137546765e-05, "loss": 11.8153, "step": 27354 }, { "epoch": 1.4895890765282855, "grad_norm": 0.514488265265781, "learning_rate": 3.22571612628398e-05, "loss": 11.7517, "step": 27355 }, { "epoch": 1.4896435305248685, "grad_norm": 0.5500673005441195, "learning_rate": 3.2250674914912536e-05, "loss": 11.918, "step": 27356 }, { "epoch": 1.4896979845214515, "grad_norm": 0.6316095639101168, "learning_rate": 3.224418909381536e-05, "loss": 11.861, "step": 27357 }, { "epoch": 1.4897524385180345, "grad_norm": 0.5401492324875024, "learning_rate": 3.223770379959876e-05, "loss": 11.827, "step": 27358 }, { "epoch": 1.4898068925146175, "grad_norm": 0.5351032328964, "learning_rate": 3.2231219032313144e-05, "loss": 11.917, "step": 27359 }, { "epoch": 1.4898613465112005, "grad_norm": 0.5419580382352344, "learning_rate": 3.222473479200889e-05, "loss": 11.8503, "step": 27360 }, { "epoch": 1.4899158005077835, "grad_norm": 0.5611074554911892, "learning_rate": 3.2218251078736505e-05, "loss": 11.9644, "step": 27361 }, { "epoch": 1.4899702545043665, "grad_norm": 0.5479997393111928, "learning_rate": 3.2211767892546344e-05, "loss": 11.8396, "step": 27362 }, { "epoch": 1.4900247085009495, "grad_norm": 0.566210287760261, "learning_rate": 3.220528523348886e-05, "loss": 11.7726, "step": 27363 }, { "epoch": 1.4900791624975325, "grad_norm": 0.5335510246762156, "learning_rate": 3.2198803101614425e-05, "loss": 11.9138, "step": 27364 }, { "epoch": 1.4901336164941155, "grad_norm": 0.5369260579828239, "learning_rate": 3.2192321496973465e-05, "loss": 11.6365, "step": 27365 }, { "epoch": 1.4901880704906985, "grad_norm": 0.5763778858222389, "learning_rate": 3.218584041961641e-05, "loss": 11.9058, "step": 27366 }, { "epoch": 1.4902425244872815, "grad_norm": 0.5695051398147223, "learning_rate": 3.217935986959361e-05, "loss": 11.9074, "step": 27367 }, { "epoch": 1.4902969784838647, "grad_norm": 0.5087613081774589, "learning_rate": 3.217287984695551e-05, "loss": 11.7241, "step": 27368 }, { "epoch": 1.4903514324804477, "grad_norm": 0.5395579202495738, "learning_rate": 3.2166400351752465e-05, "loss": 11.7826, "step": 27369 }, { "epoch": 1.4904058864770307, "grad_norm": 0.5027300400031914, "learning_rate": 3.215992138403484e-05, "loss": 11.7519, "step": 27370 }, { "epoch": 1.4904603404736136, "grad_norm": 0.5456917563778277, "learning_rate": 3.215344294385307e-05, "loss": 11.8791, "step": 27371 }, { "epoch": 1.4905147944701966, "grad_norm": 0.5098160299614349, "learning_rate": 3.214696503125748e-05, "loss": 11.7655, "step": 27372 }, { "epoch": 1.4905692484667796, "grad_norm": 0.5985508874443944, "learning_rate": 3.21404876462985e-05, "loss": 11.8313, "step": 27373 }, { "epoch": 1.4906237024633626, "grad_norm": 0.5881527697539021, "learning_rate": 3.213401078902644e-05, "loss": 11.9214, "step": 27374 }, { "epoch": 1.4906781564599456, "grad_norm": 0.5772487234516996, "learning_rate": 3.212753445949172e-05, "loss": 11.7601, "step": 27375 }, { "epoch": 1.4907326104565288, "grad_norm": 0.5953524314552884, "learning_rate": 3.212105865774464e-05, "loss": 11.8854, "step": 27376 }, { "epoch": 1.4907870644531118, "grad_norm": 0.5253379304485232, "learning_rate": 3.2114583383835595e-05, "loss": 11.7607, "step": 27377 }, { "epoch": 1.4908415184496948, "grad_norm": 0.5686002338526486, "learning_rate": 3.210810863781496e-05, "loss": 11.7952, "step": 27378 }, { "epoch": 1.4908959724462778, "grad_norm": 0.5781451795322011, "learning_rate": 3.210163441973306e-05, "loss": 11.8081, "step": 27379 }, { "epoch": 1.4909504264428608, "grad_norm": 0.550138091581712, "learning_rate": 3.209516072964022e-05, "loss": 11.8952, "step": 27380 }, { "epoch": 1.4910048804394438, "grad_norm": 0.5790573792479207, "learning_rate": 3.2088687567586804e-05, "loss": 11.7505, "step": 27381 }, { "epoch": 1.4910593344360268, "grad_norm": 0.5289096816977832, "learning_rate": 3.2082214933623134e-05, "loss": 11.7907, "step": 27382 }, { "epoch": 1.4911137884326098, "grad_norm": 0.46653037365099215, "learning_rate": 3.207574282779957e-05, "loss": 11.8671, "step": 27383 }, { "epoch": 1.4911682424291928, "grad_norm": 0.5352656293213426, "learning_rate": 3.2069271250166385e-05, "loss": 11.8939, "step": 27384 }, { "epoch": 1.4912226964257758, "grad_norm": 0.4975280705881872, "learning_rate": 3.206280020077398e-05, "loss": 11.9769, "step": 27385 }, { "epoch": 1.4912771504223588, "grad_norm": 0.5429788044732909, "learning_rate": 3.2056329679672606e-05, "loss": 11.8679, "step": 27386 }, { "epoch": 1.4913316044189417, "grad_norm": 0.551843620543023, "learning_rate": 3.2049859686912584e-05, "loss": 11.7358, "step": 27387 }, { "epoch": 1.4913860584155247, "grad_norm": 0.52908555993108, "learning_rate": 3.20433902225443e-05, "loss": 11.7486, "step": 27388 }, { "epoch": 1.4914405124121077, "grad_norm": 0.8751017592020649, "learning_rate": 3.2036921286618e-05, "loss": 11.9371, "step": 27389 }, { "epoch": 1.4914949664086907, "grad_norm": 0.6231913141871868, "learning_rate": 3.2030452879184014e-05, "loss": 11.8704, "step": 27390 }, { "epoch": 1.491549420405274, "grad_norm": 0.5603397525587256, "learning_rate": 3.2023985000292575e-05, "loss": 11.8782, "step": 27391 }, { "epoch": 1.491603874401857, "grad_norm": 0.6754076910478701, "learning_rate": 3.201751764999403e-05, "loss": 11.9525, "step": 27392 }, { "epoch": 1.49165832839844, "grad_norm": 0.5935902729391448, "learning_rate": 3.201105082833872e-05, "loss": 11.8441, "step": 27393 }, { "epoch": 1.491712782395023, "grad_norm": 0.5449317540943305, "learning_rate": 3.200458453537682e-05, "loss": 11.8019, "step": 27394 }, { "epoch": 1.491767236391606, "grad_norm": 0.5499611987491785, "learning_rate": 3.199811877115873e-05, "loss": 11.8566, "step": 27395 }, { "epoch": 1.491821690388189, "grad_norm": 0.5725463749876839, "learning_rate": 3.199165353573462e-05, "loss": 11.7469, "step": 27396 }, { "epoch": 1.491876144384772, "grad_norm": 0.5345352879477128, "learning_rate": 3.198518882915487e-05, "loss": 11.8076, "step": 27397 }, { "epoch": 1.4919305983813549, "grad_norm": 0.5150328479226377, "learning_rate": 3.1978724651469647e-05, "loss": 11.8037, "step": 27398 }, { "epoch": 1.4919850523779379, "grad_norm": 0.5672482133986002, "learning_rate": 3.197226100272931e-05, "loss": 11.9698, "step": 27399 }, { "epoch": 1.492039506374521, "grad_norm": 0.540766156130379, "learning_rate": 3.196579788298407e-05, "loss": 11.7337, "step": 27400 }, { "epoch": 1.492093960371104, "grad_norm": 0.5686484632273681, "learning_rate": 3.1959335292284175e-05, "loss": 11.9007, "step": 27401 }, { "epoch": 1.492148414367687, "grad_norm": 0.5120570343678219, "learning_rate": 3.1952873230679926e-05, "loss": 11.7939, "step": 27402 }, { "epoch": 1.49220286836427, "grad_norm": 0.5517134705371104, "learning_rate": 3.194641169822152e-05, "loss": 11.6883, "step": 27403 }, { "epoch": 1.492257322360853, "grad_norm": 0.5242483670753166, "learning_rate": 3.193995069495922e-05, "loss": 11.7722, "step": 27404 }, { "epoch": 1.492311776357436, "grad_norm": 0.6385521546705989, "learning_rate": 3.193349022094332e-05, "loss": 11.9355, "step": 27405 }, { "epoch": 1.492366230354019, "grad_norm": 0.5338069368985067, "learning_rate": 3.1927030276223966e-05, "loss": 11.7156, "step": 27406 }, { "epoch": 1.492420684350602, "grad_norm": 0.5373913974375919, "learning_rate": 3.192057086085148e-05, "loss": 11.8471, "step": 27407 }, { "epoch": 1.492475138347185, "grad_norm": 0.5928493974126836, "learning_rate": 3.1914111974876026e-05, "loss": 11.7267, "step": 27408 }, { "epoch": 1.492529592343768, "grad_norm": 0.5168927346027926, "learning_rate": 3.1907653618347886e-05, "loss": 11.8028, "step": 27409 }, { "epoch": 1.492584046340351, "grad_norm": 0.5313684037282164, "learning_rate": 3.190119579131725e-05, "loss": 11.8494, "step": 27410 }, { "epoch": 1.492638500336934, "grad_norm": 0.5136660413035197, "learning_rate": 3.18947384938343e-05, "loss": 11.7845, "step": 27411 }, { "epoch": 1.492692954333517, "grad_norm": 0.5357556278925202, "learning_rate": 3.188828172594932e-05, "loss": 11.8182, "step": 27412 }, { "epoch": 1.4927474083301, "grad_norm": 0.5275353878504169, "learning_rate": 3.188182548771245e-05, "loss": 11.7841, "step": 27413 }, { "epoch": 1.4928018623266832, "grad_norm": 0.5064172315746848, "learning_rate": 3.1875369779173924e-05, "loss": 11.7459, "step": 27414 }, { "epoch": 1.4928563163232662, "grad_norm": 0.5470100177461124, "learning_rate": 3.1868914600383994e-05, "loss": 11.9278, "step": 27415 }, { "epoch": 1.4929107703198492, "grad_norm": 0.4988042980869149, "learning_rate": 3.186245995139276e-05, "loss": 11.8884, "step": 27416 }, { "epoch": 1.4929652243164322, "grad_norm": 0.542600917398713, "learning_rate": 3.18560058322505e-05, "loss": 11.7948, "step": 27417 }, { "epoch": 1.4930196783130152, "grad_norm": 0.5337346171684353, "learning_rate": 3.184955224300734e-05, "loss": 11.8023, "step": 27418 }, { "epoch": 1.4930741323095982, "grad_norm": 0.5068117506775958, "learning_rate": 3.184309918371352e-05, "loss": 11.751, "step": 27419 }, { "epoch": 1.4931285863061812, "grad_norm": 0.5335797519704031, "learning_rate": 3.18366466544192e-05, "loss": 11.76, "step": 27420 }, { "epoch": 1.4931830403027642, "grad_norm": 0.5469394051136713, "learning_rate": 3.1830194655174505e-05, "loss": 11.7379, "step": 27421 }, { "epoch": 1.4932374942993472, "grad_norm": 0.5380651376103969, "learning_rate": 3.1823743186029675e-05, "loss": 11.9291, "step": 27422 }, { "epoch": 1.4932919482959304, "grad_norm": 0.50377001974569, "learning_rate": 3.1817292247034824e-05, "loss": 11.8882, "step": 27423 }, { "epoch": 1.4933464022925134, "grad_norm": 0.5398332871826095, "learning_rate": 3.181084183824018e-05, "loss": 11.8276, "step": 27424 }, { "epoch": 1.4934008562890964, "grad_norm": 0.5402220267565918, "learning_rate": 3.180439195969582e-05, "loss": 11.8085, "step": 27425 }, { "epoch": 1.4934553102856793, "grad_norm": 0.5355265862902128, "learning_rate": 3.179794261145196e-05, "loss": 11.8738, "step": 27426 }, { "epoch": 1.4935097642822623, "grad_norm": 0.55237386828305, "learning_rate": 3.179149379355877e-05, "loss": 11.8119, "step": 27427 }, { "epoch": 1.4935642182788453, "grad_norm": 0.497504691561615, "learning_rate": 3.178504550606631e-05, "loss": 11.9204, "step": 27428 }, { "epoch": 1.4936186722754283, "grad_norm": 0.5127452387862489, "learning_rate": 3.177859774902483e-05, "loss": 11.8487, "step": 27429 }, { "epoch": 1.4936731262720113, "grad_norm": 0.5758347834761272, "learning_rate": 3.17721505224844e-05, "loss": 11.9149, "step": 27430 }, { "epoch": 1.4937275802685943, "grad_norm": 0.5547243846772557, "learning_rate": 3.1765703826495144e-05, "loss": 11.7884, "step": 27431 }, { "epoch": 1.4937820342651773, "grad_norm": 0.5571913268408929, "learning_rate": 3.1759257661107245e-05, "loss": 11.715, "step": 27432 }, { "epoch": 1.4938364882617603, "grad_norm": 0.5442979747736589, "learning_rate": 3.175281202637077e-05, "loss": 11.8839, "step": 27433 }, { "epoch": 1.4938909422583433, "grad_norm": 0.5704327202416556, "learning_rate": 3.174636692233591e-05, "loss": 11.8487, "step": 27434 }, { "epoch": 1.4939453962549263, "grad_norm": 0.5372537584832451, "learning_rate": 3.173992234905272e-05, "loss": 11.8977, "step": 27435 }, { "epoch": 1.4939998502515093, "grad_norm": 0.575209458369802, "learning_rate": 3.173347830657133e-05, "loss": 11.9214, "step": 27436 }, { "epoch": 1.4940543042480925, "grad_norm": 0.5796686629508384, "learning_rate": 3.17270347949419e-05, "loss": 11.8689, "step": 27437 }, { "epoch": 1.4941087582446755, "grad_norm": 0.5433666967461873, "learning_rate": 3.1720591814214464e-05, "loss": 11.9079, "step": 27438 }, { "epoch": 1.4941632122412585, "grad_norm": 0.5236638767969538, "learning_rate": 3.1714149364439215e-05, "loss": 11.852, "step": 27439 }, { "epoch": 1.4942176662378415, "grad_norm": 0.5577906353201473, "learning_rate": 3.1707707445666135e-05, "loss": 11.8241, "step": 27440 }, { "epoch": 1.4942721202344245, "grad_norm": 0.49045688724140346, "learning_rate": 3.1701266057945376e-05, "loss": 11.8646, "step": 27441 }, { "epoch": 1.4943265742310075, "grad_norm": 0.5170163146468425, "learning_rate": 3.169482520132705e-05, "loss": 11.8163, "step": 27442 }, { "epoch": 1.4943810282275904, "grad_norm": 0.5074604276050942, "learning_rate": 3.168838487586119e-05, "loss": 11.8348, "step": 27443 }, { "epoch": 1.4944354822241734, "grad_norm": 0.4994581872740865, "learning_rate": 3.168194508159794e-05, "loss": 11.8019, "step": 27444 }, { "epoch": 1.4944899362207564, "grad_norm": 0.5539482960695543, "learning_rate": 3.16755058185873e-05, "loss": 11.9257, "step": 27445 }, { "epoch": 1.4945443902173396, "grad_norm": 0.5394138641393187, "learning_rate": 3.166906708687943e-05, "loss": 11.8341, "step": 27446 }, { "epoch": 1.4945988442139226, "grad_norm": 0.5958720947355558, "learning_rate": 3.1662628886524314e-05, "loss": 11.7851, "step": 27447 }, { "epoch": 1.4946532982105056, "grad_norm": 0.5055716382702036, "learning_rate": 3.165619121757206e-05, "loss": 11.9507, "step": 27448 }, { "epoch": 1.4947077522070886, "grad_norm": 0.5850583357709767, "learning_rate": 3.164975408007279e-05, "loss": 11.8939, "step": 27449 }, { "epoch": 1.4947622062036716, "grad_norm": 0.5091230101697962, "learning_rate": 3.1643317474076415e-05, "loss": 11.8763, "step": 27450 }, { "epoch": 1.4948166602002546, "grad_norm": 0.5378135812842038, "learning_rate": 3.163688139963311e-05, "loss": 11.8288, "step": 27451 }, { "epoch": 1.4948711141968376, "grad_norm": 0.550914214934827, "learning_rate": 3.163044585679286e-05, "loss": 11.957, "step": 27452 }, { "epoch": 1.4949255681934206, "grad_norm": 0.5402679003309571, "learning_rate": 3.162401084560571e-05, "loss": 11.8529, "step": 27453 }, { "epoch": 1.4949800221900036, "grad_norm": 0.5627741211877516, "learning_rate": 3.161757636612176e-05, "loss": 11.8402, "step": 27454 }, { "epoch": 1.4950344761865866, "grad_norm": 0.5389489879191961, "learning_rate": 3.161114241839096e-05, "loss": 11.7944, "step": 27455 }, { "epoch": 1.4950889301831696, "grad_norm": 0.5577135803377378, "learning_rate": 3.1604709002463426e-05, "loss": 11.8685, "step": 27456 }, { "epoch": 1.4951433841797526, "grad_norm": 0.5315667289223625, "learning_rate": 3.159827611838912e-05, "loss": 11.853, "step": 27457 }, { "epoch": 1.4951978381763356, "grad_norm": 0.5531299342302193, "learning_rate": 3.159184376621811e-05, "loss": 11.8088, "step": 27458 }, { "epoch": 1.4952522921729186, "grad_norm": 0.5639067210014592, "learning_rate": 3.15854119460004e-05, "loss": 11.87, "step": 27459 }, { "epoch": 1.4953067461695015, "grad_norm": 0.5060470268627262, "learning_rate": 3.1578980657785975e-05, "loss": 11.9204, "step": 27460 }, { "epoch": 1.4953612001660848, "grad_norm": 0.7839953306743349, "learning_rate": 3.1572549901624895e-05, "loss": 11.9339, "step": 27461 }, { "epoch": 1.4954156541626678, "grad_norm": 0.5774357181060241, "learning_rate": 3.156611967756711e-05, "loss": 11.6995, "step": 27462 }, { "epoch": 1.4954701081592507, "grad_norm": 0.5582189399702306, "learning_rate": 3.155968998566264e-05, "loss": 11.823, "step": 27463 }, { "epoch": 1.4955245621558337, "grad_norm": 0.53568887973156, "learning_rate": 3.155326082596155e-05, "loss": 11.8139, "step": 27464 }, { "epoch": 1.4955790161524167, "grad_norm": 0.5031229417344054, "learning_rate": 3.1546832198513754e-05, "loss": 11.9377, "step": 27465 }, { "epoch": 1.4956334701489997, "grad_norm": 0.5423070313971351, "learning_rate": 3.154040410336929e-05, "loss": 11.8203, "step": 27466 }, { "epoch": 1.4956879241455827, "grad_norm": 0.5539134542696412, "learning_rate": 3.1533976540578095e-05, "loss": 11.8151, "step": 27467 }, { "epoch": 1.4957423781421657, "grad_norm": 0.6590430201518052, "learning_rate": 3.152754951019021e-05, "loss": 12.0469, "step": 27468 }, { "epoch": 1.495796832138749, "grad_norm": 0.5380775390880129, "learning_rate": 3.152112301225556e-05, "loss": 11.8812, "step": 27469 }, { "epoch": 1.495851286135332, "grad_norm": 0.5424455925133175, "learning_rate": 3.151469704682416e-05, "loss": 11.8462, "step": 27470 }, { "epoch": 1.495905740131915, "grad_norm": 0.5286895412193657, "learning_rate": 3.150827161394597e-05, "loss": 11.8277, "step": 27471 }, { "epoch": 1.495960194128498, "grad_norm": 0.5286892757526992, "learning_rate": 3.150184671367091e-05, "loss": 11.8305, "step": 27472 }, { "epoch": 1.496014648125081, "grad_norm": 0.5745537850910586, "learning_rate": 3.149542234604902e-05, "loss": 11.9197, "step": 27473 }, { "epoch": 1.4960691021216639, "grad_norm": 0.48156888306081325, "learning_rate": 3.148899851113018e-05, "loss": 11.7803, "step": 27474 }, { "epoch": 1.4961235561182469, "grad_norm": 0.5226758497171498, "learning_rate": 3.148257520896436e-05, "loss": 11.8624, "step": 27475 }, { "epoch": 1.4961780101148299, "grad_norm": 0.6284035486208138, "learning_rate": 3.147615243960157e-05, "loss": 11.9189, "step": 27476 }, { "epoch": 1.4962324641114129, "grad_norm": 0.5525820937708962, "learning_rate": 3.146973020309168e-05, "loss": 11.8693, "step": 27477 }, { "epoch": 1.4962869181079959, "grad_norm": 0.5280852682684858, "learning_rate": 3.146330849948468e-05, "loss": 11.8642, "step": 27478 }, { "epoch": 1.4963413721045788, "grad_norm": 0.5388026049611822, "learning_rate": 3.145688732883047e-05, "loss": 11.8511, "step": 27479 }, { "epoch": 1.4963958261011618, "grad_norm": 0.5225958946227364, "learning_rate": 3.1450466691179014e-05, "loss": 11.8485, "step": 27480 }, { "epoch": 1.4964502800977448, "grad_norm": 0.5239720890394016, "learning_rate": 3.144404658658024e-05, "loss": 11.9028, "step": 27481 }, { "epoch": 1.4965047340943278, "grad_norm": 0.5550563471128637, "learning_rate": 3.1437627015084016e-05, "loss": 11.8473, "step": 27482 }, { "epoch": 1.4965591880909108, "grad_norm": 0.5317330658116989, "learning_rate": 3.143120797674034e-05, "loss": 11.8971, "step": 27483 }, { "epoch": 1.496613642087494, "grad_norm": 0.5109791195007246, "learning_rate": 3.142478947159906e-05, "loss": 12.0197, "step": 27484 }, { "epoch": 1.496668096084077, "grad_norm": 0.5453036897113615, "learning_rate": 3.141837149971011e-05, "loss": 11.6837, "step": 27485 }, { "epoch": 1.49672255008066, "grad_norm": 0.5486934779186424, "learning_rate": 3.141195406112344e-05, "loss": 11.8049, "step": 27486 }, { "epoch": 1.496777004077243, "grad_norm": 0.5505034490465567, "learning_rate": 3.1405537155888876e-05, "loss": 11.9074, "step": 27487 }, { "epoch": 1.496831458073826, "grad_norm": 0.4991785681869255, "learning_rate": 3.1399120784056404e-05, "loss": 11.8099, "step": 27488 }, { "epoch": 1.496885912070409, "grad_norm": 0.483806822887898, "learning_rate": 3.1392704945675835e-05, "loss": 11.6764, "step": 27489 }, { "epoch": 1.496940366066992, "grad_norm": 0.5662450510412216, "learning_rate": 3.1386289640797126e-05, "loss": 11.9674, "step": 27490 }, { "epoch": 1.496994820063575, "grad_norm": 0.4846114197861738, "learning_rate": 3.137987486947015e-05, "loss": 11.7917, "step": 27491 }, { "epoch": 1.497049274060158, "grad_norm": 0.5326804141127223, "learning_rate": 3.137346063174472e-05, "loss": 11.8509, "step": 27492 }, { "epoch": 1.4971037280567412, "grad_norm": 0.54381282457603, "learning_rate": 3.1367046927670815e-05, "loss": 11.7573, "step": 27493 }, { "epoch": 1.4971581820533242, "grad_norm": 0.6531117726889754, "learning_rate": 3.136063375729823e-05, "loss": 12.0029, "step": 27494 }, { "epoch": 1.4972126360499072, "grad_norm": 0.7901619653358373, "learning_rate": 3.135422112067691e-05, "loss": 11.8016, "step": 27495 }, { "epoch": 1.4972670900464902, "grad_norm": 0.547921772757661, "learning_rate": 3.134780901785663e-05, "loss": 11.8898, "step": 27496 }, { "epoch": 1.4973215440430732, "grad_norm": 0.581070980445588, "learning_rate": 3.13413974488873e-05, "loss": 11.8743, "step": 27497 }, { "epoch": 1.4973759980396562, "grad_norm": 0.5626893852553062, "learning_rate": 3.1334986413818826e-05, "loss": 11.9637, "step": 27498 }, { "epoch": 1.4974304520362391, "grad_norm": 0.56260109220869, "learning_rate": 3.132857591270096e-05, "loss": 11.9424, "step": 27499 }, { "epoch": 1.4974849060328221, "grad_norm": 0.5682717579984866, "learning_rate": 3.132216594558368e-05, "loss": 11.9458, "step": 27500 }, { "epoch": 1.4975393600294051, "grad_norm": 0.5087272968067512, "learning_rate": 3.1315756512516694e-05, "loss": 11.8205, "step": 27501 }, { "epoch": 1.4975938140259881, "grad_norm": 0.5251530675231519, "learning_rate": 3.130934761354989e-05, "loss": 11.8179, "step": 27502 }, { "epoch": 1.4976482680225711, "grad_norm": 0.6019276926576036, "learning_rate": 3.1302939248733164e-05, "loss": 11.9282, "step": 27503 }, { "epoch": 1.497702722019154, "grad_norm": 0.5623108223998298, "learning_rate": 3.129653141811626e-05, "loss": 11.8528, "step": 27504 }, { "epoch": 1.497757176015737, "grad_norm": 0.5165853157628106, "learning_rate": 3.1290124121749087e-05, "loss": 11.8269, "step": 27505 }, { "epoch": 1.49781163001232, "grad_norm": 0.5454551760674319, "learning_rate": 3.1283717359681394e-05, "loss": 11.8371, "step": 27506 }, { "epoch": 1.4978660840089033, "grad_norm": 0.513751383959271, "learning_rate": 3.127731113196308e-05, "loss": 11.8529, "step": 27507 }, { "epoch": 1.4979205380054863, "grad_norm": 0.7296877830900464, "learning_rate": 3.1270905438643885e-05, "loss": 11.7038, "step": 27508 }, { "epoch": 1.4979749920020693, "grad_norm": 0.5451329658973117, "learning_rate": 3.126450027977366e-05, "loss": 11.8812, "step": 27509 }, { "epoch": 1.4980294459986523, "grad_norm": 0.5304672373545064, "learning_rate": 3.125809565540225e-05, "loss": 11.9342, "step": 27510 }, { "epoch": 1.4980838999952353, "grad_norm": 0.5229017499272024, "learning_rate": 3.1251691565579376e-05, "loss": 11.8463, "step": 27511 }, { "epoch": 1.4981383539918183, "grad_norm": 0.5460362783778945, "learning_rate": 3.124528801035487e-05, "loss": 11.8818, "step": 27512 }, { "epoch": 1.4981928079884013, "grad_norm": 0.5116544705483668, "learning_rate": 3.123888498977856e-05, "loss": 11.9706, "step": 27513 }, { "epoch": 1.4982472619849843, "grad_norm": 0.5778308106786579, "learning_rate": 3.1232482503900185e-05, "loss": 11.8801, "step": 27514 }, { "epoch": 1.4983017159815673, "grad_norm": 0.6156861772561578, "learning_rate": 3.122608055276959e-05, "loss": 11.9433, "step": 27515 }, { "epoch": 1.4983561699781505, "grad_norm": 0.6010348328230253, "learning_rate": 3.1219679136436494e-05, "loss": 11.933, "step": 27516 }, { "epoch": 1.4984106239747335, "grad_norm": 0.6208290159700537, "learning_rate": 3.121327825495074e-05, "loss": 11.9482, "step": 27517 }, { "epoch": 1.4984650779713165, "grad_norm": 0.5385137364443083, "learning_rate": 3.120687790836204e-05, "loss": 11.7787, "step": 27518 }, { "epoch": 1.4985195319678994, "grad_norm": 0.5725944104336594, "learning_rate": 3.1200478096720185e-05, "loss": 11.8878, "step": 27519 }, { "epoch": 1.4985739859644824, "grad_norm": 0.6063329120900592, "learning_rate": 3.1194078820075026e-05, "loss": 11.7532, "step": 27520 }, { "epoch": 1.4986284399610654, "grad_norm": 0.5704832095542197, "learning_rate": 3.118768007847618e-05, "loss": 11.8878, "step": 27521 }, { "epoch": 1.4986828939576484, "grad_norm": 0.5636863502859587, "learning_rate": 3.1181281871973514e-05, "loss": 11.9443, "step": 27522 }, { "epoch": 1.4987373479542314, "grad_norm": 0.5147934802112546, "learning_rate": 3.117488420061669e-05, "loss": 11.9173, "step": 27523 }, { "epoch": 1.4987918019508144, "grad_norm": 0.51464273422368, "learning_rate": 3.1168487064455524e-05, "loss": 11.9362, "step": 27524 }, { "epoch": 1.4988462559473974, "grad_norm": 0.4749948706665351, "learning_rate": 3.1162090463539773e-05, "loss": 11.7872, "step": 27525 }, { "epoch": 1.4989007099439804, "grad_norm": 0.5811691982443821, "learning_rate": 3.115569439791911e-05, "loss": 11.7787, "step": 27526 }, { "epoch": 1.4989551639405634, "grad_norm": 0.5207677503485039, "learning_rate": 3.114929886764335e-05, "loss": 11.8194, "step": 27527 }, { "epoch": 1.4990096179371464, "grad_norm": 0.5618206163224754, "learning_rate": 3.114290387276216e-05, "loss": 11.8607, "step": 27528 }, { "epoch": 1.4990640719337294, "grad_norm": 0.6222593230654404, "learning_rate": 3.113650941332533e-05, "loss": 11.8066, "step": 27529 }, { "epoch": 1.4991185259303124, "grad_norm": 0.5485997733806728, "learning_rate": 3.113011548938255e-05, "loss": 11.7379, "step": 27530 }, { "epoch": 1.4991729799268956, "grad_norm": 0.5699558382653481, "learning_rate": 3.112372210098351e-05, "loss": 11.9523, "step": 27531 }, { "epoch": 1.4992274339234786, "grad_norm": 0.5336572321578744, "learning_rate": 3.1117329248177984e-05, "loss": 11.8289, "step": 27532 }, { "epoch": 1.4992818879200616, "grad_norm": 0.5966800422283287, "learning_rate": 3.111093693101563e-05, "loss": 11.9526, "step": 27533 }, { "epoch": 1.4993363419166446, "grad_norm": 0.5315714128725967, "learning_rate": 3.1104545149546184e-05, "loss": 11.8079, "step": 27534 }, { "epoch": 1.4993907959132275, "grad_norm": 0.5280188799287588, "learning_rate": 3.109815390381938e-05, "loss": 11.9168, "step": 27535 }, { "epoch": 1.4994452499098105, "grad_norm": 0.4887410793402791, "learning_rate": 3.109176319388485e-05, "loss": 11.8738, "step": 27536 }, { "epoch": 1.4994997039063935, "grad_norm": 0.606420131416671, "learning_rate": 3.1085373019792366e-05, "loss": 11.8706, "step": 27537 }, { "epoch": 1.4995541579029765, "grad_norm": 0.5491822896002987, "learning_rate": 3.107898338159153e-05, "loss": 11.773, "step": 27538 }, { "epoch": 1.4996086118995597, "grad_norm": 0.5137174202577975, "learning_rate": 3.107259427933212e-05, "loss": 11.7138, "step": 27539 }, { "epoch": 1.4996630658961427, "grad_norm": 0.5568255752004765, "learning_rate": 3.106620571306378e-05, "loss": 11.78, "step": 27540 }, { "epoch": 1.4997175198927257, "grad_norm": 0.5316097749800234, "learning_rate": 3.105981768283614e-05, "loss": 11.8818, "step": 27541 }, { "epoch": 1.4997719738893087, "grad_norm": 0.5207641553822407, "learning_rate": 3.1053430188698976e-05, "loss": 11.7427, "step": 27542 }, { "epoch": 1.4998264278858917, "grad_norm": 0.5309694357002798, "learning_rate": 3.1047043230701844e-05, "loss": 11.8229, "step": 27543 }, { "epoch": 1.4998808818824747, "grad_norm": 0.5583661611807208, "learning_rate": 3.1040656808894505e-05, "loss": 11.8332, "step": 27544 }, { "epoch": 1.4999353358790577, "grad_norm": 0.5743922278384787, "learning_rate": 3.103427092332656e-05, "loss": 11.8431, "step": 27545 }, { "epoch": 1.4999897898756407, "grad_norm": 0.5261854774839165, "learning_rate": 3.1027885574047687e-05, "loss": 11.8617, "step": 27546 }, { "epoch": 1.5000442438722237, "grad_norm": 0.5468380951743005, "learning_rate": 3.102150076110757e-05, "loss": 11.7561, "step": 27547 }, { "epoch": 1.5000986978688067, "grad_norm": 0.5654126809901402, "learning_rate": 3.101511648455579e-05, "loss": 11.8551, "step": 27548 }, { "epoch": 1.5001531518653897, "grad_norm": 0.5495863535567939, "learning_rate": 3.100873274444208e-05, "loss": 11.809, "step": 27549 }, { "epoch": 1.5002076058619727, "grad_norm": 0.5647235853031947, "learning_rate": 3.1002349540816036e-05, "loss": 11.8476, "step": 27550 }, { "epoch": 1.5002620598585557, "grad_norm": 0.5435244911535195, "learning_rate": 3.0995966873727244e-05, "loss": 11.7316, "step": 27551 }, { "epoch": 1.5003165138551386, "grad_norm": 0.529410883315443, "learning_rate": 3.098958474322543e-05, "loss": 11.8691, "step": 27552 }, { "epoch": 1.5003709678517216, "grad_norm": 0.570198110631626, "learning_rate": 3.098320314936015e-05, "loss": 11.7387, "step": 27553 }, { "epoch": 1.5004254218483046, "grad_norm": 0.5399444238497815, "learning_rate": 3.0976822092181076e-05, "loss": 11.7691, "step": 27554 }, { "epoch": 1.5004798758448878, "grad_norm": 0.6302267951639604, "learning_rate": 3.097044157173778e-05, "loss": 11.8268, "step": 27555 }, { "epoch": 1.5005343298414708, "grad_norm": 0.5475409955087848, "learning_rate": 3.096406158807995e-05, "loss": 11.7685, "step": 27556 }, { "epoch": 1.5005887838380538, "grad_norm": 0.5299594529500552, "learning_rate": 3.0957682141257104e-05, "loss": 11.8292, "step": 27557 }, { "epoch": 1.5006432378346368, "grad_norm": 0.5538666764621011, "learning_rate": 3.0951303231318916e-05, "loss": 11.6462, "step": 27558 }, { "epoch": 1.5006976918312198, "grad_norm": 0.5317166062254615, "learning_rate": 3.0944924858314994e-05, "loss": 11.7942, "step": 27559 }, { "epoch": 1.5007521458278028, "grad_norm": 0.581170481397359, "learning_rate": 3.093854702229493e-05, "loss": 11.7875, "step": 27560 }, { "epoch": 1.500806599824386, "grad_norm": 0.5594560718388232, "learning_rate": 3.093216972330827e-05, "loss": 11.7466, "step": 27561 }, { "epoch": 1.500861053820969, "grad_norm": 0.5414119794473163, "learning_rate": 3.092579296140467e-05, "loss": 11.8654, "step": 27562 }, { "epoch": 1.500915507817552, "grad_norm": 0.5203958651338018, "learning_rate": 3.0919416736633646e-05, "loss": 11.9641, "step": 27563 }, { "epoch": 1.500969961814135, "grad_norm": 0.5535061061668877, "learning_rate": 3.091304104904487e-05, "loss": 11.8742, "step": 27564 }, { "epoch": 1.501024415810718, "grad_norm": 0.5362178767221011, "learning_rate": 3.0906665898687826e-05, "loss": 11.8637, "step": 27565 }, { "epoch": 1.501078869807301, "grad_norm": 0.5127350859931332, "learning_rate": 3.090029128561218e-05, "loss": 11.8934, "step": 27566 }, { "epoch": 1.501133323803884, "grad_norm": 0.5298054538718383, "learning_rate": 3.089391720986742e-05, "loss": 11.8764, "step": 27567 }, { "epoch": 1.501187777800467, "grad_norm": 0.6092771664732751, "learning_rate": 3.088754367150315e-05, "loss": 11.8243, "step": 27568 }, { "epoch": 1.50124223179705, "grad_norm": 0.5284515644674422, "learning_rate": 3.088117067056896e-05, "loss": 11.8535, "step": 27569 }, { "epoch": 1.501296685793633, "grad_norm": 0.571331530496941, "learning_rate": 3.0874798207114374e-05, "loss": 11.9048, "step": 27570 }, { "epoch": 1.501351139790216, "grad_norm": 0.6116606438283967, "learning_rate": 3.0868426281188953e-05, "loss": 11.9523, "step": 27571 }, { "epoch": 1.501405593786799, "grad_norm": 0.5388547942213677, "learning_rate": 3.0862054892842215e-05, "loss": 11.8108, "step": 27572 }, { "epoch": 1.501460047783382, "grad_norm": 0.5460617994491654, "learning_rate": 3.0855684042123734e-05, "loss": 11.8827, "step": 27573 }, { "epoch": 1.501514501779965, "grad_norm": 0.5466345431925912, "learning_rate": 3.084931372908307e-05, "loss": 11.662, "step": 27574 }, { "epoch": 1.501568955776548, "grad_norm": 0.5919496457101667, "learning_rate": 3.0842943953769724e-05, "loss": 11.9597, "step": 27575 }, { "epoch": 1.501623409773131, "grad_norm": 0.5290296969291574, "learning_rate": 3.083657471623326e-05, "loss": 11.8332, "step": 27576 }, { "epoch": 1.501677863769714, "grad_norm": 0.5476704734827672, "learning_rate": 3.0830206016523165e-05, "loss": 11.913, "step": 27577 }, { "epoch": 1.501732317766297, "grad_norm": 0.5052560576496058, "learning_rate": 3.0823837854689016e-05, "loss": 11.9057, "step": 27578 }, { "epoch": 1.5017867717628801, "grad_norm": 0.5616683807127564, "learning_rate": 3.081747023078028e-05, "loss": 11.8198, "step": 27579 }, { "epoch": 1.501841225759463, "grad_norm": 0.5784261645398846, "learning_rate": 3.081110314484652e-05, "loss": 11.697, "step": 27580 }, { "epoch": 1.501895679756046, "grad_norm": 0.4839773202934603, "learning_rate": 3.0804736596937225e-05, "loss": 11.8623, "step": 27581 }, { "epoch": 1.501950133752629, "grad_norm": 0.5926065659373663, "learning_rate": 3.079837058710188e-05, "loss": 11.8601, "step": 27582 }, { "epoch": 1.502004587749212, "grad_norm": 0.5667121528056517, "learning_rate": 3.079200511539e-05, "loss": 11.8723, "step": 27583 }, { "epoch": 1.502059041745795, "grad_norm": 0.5492222967762245, "learning_rate": 3.0785640181851125e-05, "loss": 11.8033, "step": 27584 }, { "epoch": 1.5021134957423783, "grad_norm": 0.5894426805620854, "learning_rate": 3.07792757865347e-05, "loss": 11.8386, "step": 27585 }, { "epoch": 1.5021679497389613, "grad_norm": 0.5891161954158775, "learning_rate": 3.0772911929490265e-05, "loss": 11.8987, "step": 27586 }, { "epoch": 1.5022224037355443, "grad_norm": 0.5071649840710323, "learning_rate": 3.076654861076723e-05, "loss": 11.8914, "step": 27587 }, { "epoch": 1.5022768577321273, "grad_norm": 0.5372359479170464, "learning_rate": 3.076018583041517e-05, "loss": 11.8784, "step": 27588 }, { "epoch": 1.5023313117287103, "grad_norm": 0.53300440436641, "learning_rate": 3.075382358848348e-05, "loss": 11.8932, "step": 27589 }, { "epoch": 1.5023857657252933, "grad_norm": 0.5549183348199073, "learning_rate": 3.07474618850217e-05, "loss": 11.8055, "step": 27590 }, { "epoch": 1.5024402197218762, "grad_norm": 0.5364623809143403, "learning_rate": 3.074110072007927e-05, "loss": 11.8346, "step": 27591 }, { "epoch": 1.5024946737184592, "grad_norm": 0.5980968037882382, "learning_rate": 3.073474009370563e-05, "loss": 11.8335, "step": 27592 }, { "epoch": 1.5025491277150422, "grad_norm": 0.5319294149873626, "learning_rate": 3.07283800059503e-05, "loss": 11.7779, "step": 27593 }, { "epoch": 1.5026035817116252, "grad_norm": 0.5135358376415784, "learning_rate": 3.072202045686265e-05, "loss": 11.7976, "step": 27594 }, { "epoch": 1.5026580357082082, "grad_norm": 0.5577114057239492, "learning_rate": 3.0715661446492217e-05, "loss": 11.8907, "step": 27595 }, { "epoch": 1.5027124897047912, "grad_norm": 0.5472486261440314, "learning_rate": 3.070930297488843e-05, "loss": 11.9129, "step": 27596 }, { "epoch": 1.5027669437013742, "grad_norm": 0.5574978807079234, "learning_rate": 3.0702945042100706e-05, "loss": 11.8204, "step": 27597 }, { "epoch": 1.5028213976979572, "grad_norm": 0.5722485819548241, "learning_rate": 3.0696587648178523e-05, "loss": 11.8558, "step": 27598 }, { "epoch": 1.5028758516945402, "grad_norm": 0.5395526032028541, "learning_rate": 3.069023079317127e-05, "loss": 11.8176, "step": 27599 }, { "epoch": 1.5029303056911232, "grad_norm": 0.5277757089922277, "learning_rate": 3.0683874477128436e-05, "loss": 11.8558, "step": 27600 }, { "epoch": 1.5029847596877062, "grad_norm": 0.5240624033785303, "learning_rate": 3.067751870009942e-05, "loss": 11.9188, "step": 27601 }, { "epoch": 1.5030392136842894, "grad_norm": 0.5084139043867187, "learning_rate": 3.067116346213361e-05, "loss": 11.8472, "step": 27602 }, { "epoch": 1.5030936676808724, "grad_norm": 0.5409512735160412, "learning_rate": 3.06648087632805e-05, "loss": 11.8957, "step": 27603 }, { "epoch": 1.5031481216774554, "grad_norm": 0.5711421532119695, "learning_rate": 3.0658454603589416e-05, "loss": 11.9514, "step": 27604 }, { "epoch": 1.5032025756740384, "grad_norm": 0.5798785861361018, "learning_rate": 3.065210098310985e-05, "loss": 11.9032, "step": 27605 }, { "epoch": 1.5032570296706214, "grad_norm": 0.5517773569951979, "learning_rate": 3.0645747901891164e-05, "loss": 11.7489, "step": 27606 }, { "epoch": 1.5033114836672044, "grad_norm": 0.60294794538028, "learning_rate": 3.063939535998276e-05, "loss": 11.8538, "step": 27607 }, { "epoch": 1.5033659376637876, "grad_norm": 0.5379968417713321, "learning_rate": 3.0633043357434074e-05, "loss": 11.7519, "step": 27608 }, { "epoch": 1.5034203916603706, "grad_norm": 0.5558431679438995, "learning_rate": 3.0626691894294456e-05, "loss": 11.8692, "step": 27609 }, { "epoch": 1.5034748456569536, "grad_norm": 0.56468562645044, "learning_rate": 3.0620340970613345e-05, "loss": 11.7864, "step": 27610 }, { "epoch": 1.5035292996535365, "grad_norm": 0.5267122393320834, "learning_rate": 3.06139905864401e-05, "loss": 11.7804, "step": 27611 }, { "epoch": 1.5035837536501195, "grad_norm": 0.5499475642516489, "learning_rate": 3.060764074182406e-05, "loss": 11.775, "step": 27612 }, { "epoch": 1.5036382076467025, "grad_norm": 0.5143490316338917, "learning_rate": 3.0601291436814684e-05, "loss": 11.8559, "step": 27613 }, { "epoch": 1.5036926616432855, "grad_norm": 0.5706754059374372, "learning_rate": 3.059494267146127e-05, "loss": 11.8854, "step": 27614 }, { "epoch": 1.5037471156398685, "grad_norm": 0.523767343686082, "learning_rate": 3.058859444581326e-05, "loss": 11.885, "step": 27615 }, { "epoch": 1.5038015696364515, "grad_norm": 0.5624296865878132, "learning_rate": 3.058224675991993e-05, "loss": 11.6968, "step": 27616 }, { "epoch": 1.5038560236330345, "grad_norm": 0.49422260520353717, "learning_rate": 3.0575899613830706e-05, "loss": 11.7223, "step": 27617 }, { "epoch": 1.5039104776296175, "grad_norm": 0.5247207589597501, "learning_rate": 3.0569553007594956e-05, "loss": 11.8117, "step": 27618 }, { "epoch": 1.5039649316262005, "grad_norm": 0.5646454568774119, "learning_rate": 3.056320694126197e-05, "loss": 11.8483, "step": 27619 }, { "epoch": 1.5040193856227835, "grad_norm": 0.5403338629392758, "learning_rate": 3.0556861414881154e-05, "loss": 11.8416, "step": 27620 }, { "epoch": 1.5040738396193665, "grad_norm": 0.5505254544530112, "learning_rate": 3.0550516428501854e-05, "loss": 11.8749, "step": 27621 }, { "epoch": 1.5041282936159495, "grad_norm": 0.4985326885128025, "learning_rate": 3.054417198217333e-05, "loss": 11.7613, "step": 27622 }, { "epoch": 1.5041827476125325, "grad_norm": 0.5408151241099561, "learning_rate": 3.0537828075945016e-05, "loss": 11.9199, "step": 27623 }, { "epoch": 1.5042372016091154, "grad_norm": 0.5483937991568044, "learning_rate": 3.053148470986617e-05, "loss": 11.9476, "step": 27624 }, { "epoch": 1.5042916556056987, "grad_norm": 0.6080094206391734, "learning_rate": 3.052514188398617e-05, "loss": 11.7556, "step": 27625 }, { "epoch": 1.5043461096022817, "grad_norm": 0.5883997187373496, "learning_rate": 3.05187995983543e-05, "loss": 11.8255, "step": 27626 }, { "epoch": 1.5044005635988646, "grad_norm": 0.563012064872864, "learning_rate": 3.0512457853019917e-05, "loss": 11.9002, "step": 27627 }, { "epoch": 1.5044550175954476, "grad_norm": 0.566417441781685, "learning_rate": 3.050611664803228e-05, "loss": 11.7473, "step": 27628 }, { "epoch": 1.5045094715920306, "grad_norm": 0.5262720486523642, "learning_rate": 3.049977598344075e-05, "loss": 11.9355, "step": 27629 }, { "epoch": 1.5045639255886136, "grad_norm": 0.5155669848516421, "learning_rate": 3.0493435859294628e-05, "loss": 11.7297, "step": 27630 }, { "epoch": 1.5046183795851968, "grad_norm": 0.5817023527518846, "learning_rate": 3.0487096275643224e-05, "loss": 11.8661, "step": 27631 }, { "epoch": 1.5046728335817798, "grad_norm": 0.5217576368753092, "learning_rate": 3.0480757232535772e-05, "loss": 11.8647, "step": 27632 }, { "epoch": 1.5047272875783628, "grad_norm": 0.5230937332016243, "learning_rate": 3.0474418730021648e-05, "loss": 11.8024, "step": 27633 }, { "epoch": 1.5047817415749458, "grad_norm": 0.5179976205184285, "learning_rate": 3.0468080768150076e-05, "loss": 11.7769, "step": 27634 }, { "epoch": 1.5048361955715288, "grad_norm": 0.5224160824425776, "learning_rate": 3.0461743346970395e-05, "loss": 11.9731, "step": 27635 }, { "epoch": 1.5048906495681118, "grad_norm": 0.5364814966305287, "learning_rate": 3.045540646653182e-05, "loss": 11.7945, "step": 27636 }, { "epoch": 1.5049451035646948, "grad_norm": 0.5789394424236494, "learning_rate": 3.0449070126883707e-05, "loss": 11.8448, "step": 27637 }, { "epoch": 1.5049995575612778, "grad_norm": 0.5730519306389764, "learning_rate": 3.0442734328075263e-05, "loss": 11.8303, "step": 27638 }, { "epoch": 1.5050540115578608, "grad_norm": 0.5712768170623889, "learning_rate": 3.0436399070155774e-05, "loss": 11.9416, "step": 27639 }, { "epoch": 1.5051084655544438, "grad_norm": 0.5352127555076341, "learning_rate": 3.0430064353174538e-05, "loss": 11.8445, "step": 27640 }, { "epoch": 1.5051629195510268, "grad_norm": 0.5426642678899108, "learning_rate": 3.0423730177180797e-05, "loss": 11.8103, "step": 27641 }, { "epoch": 1.5052173735476098, "grad_norm": 0.5461720622822323, "learning_rate": 3.0417396542223798e-05, "loss": 11.9051, "step": 27642 }, { "epoch": 1.5052718275441928, "grad_norm": 0.5544016741604803, "learning_rate": 3.041106344835275e-05, "loss": 11.8508, "step": 27643 }, { "epoch": 1.5053262815407757, "grad_norm": 0.5802545894786407, "learning_rate": 3.040473089561695e-05, "loss": 11.7987, "step": 27644 }, { "epoch": 1.5053807355373587, "grad_norm": 0.5435056876544804, "learning_rate": 3.039839888406567e-05, "loss": 11.7582, "step": 27645 }, { "epoch": 1.5054351895339417, "grad_norm": 0.5498441096741119, "learning_rate": 3.0392067413748083e-05, "loss": 11.8857, "step": 27646 }, { "epoch": 1.5054896435305247, "grad_norm": 0.5868732495443124, "learning_rate": 3.0385736484713477e-05, "loss": 11.7373, "step": 27647 }, { "epoch": 1.5055440975271077, "grad_norm": 0.5631848433435084, "learning_rate": 3.037940609701102e-05, "loss": 11.7864, "step": 27648 }, { "epoch": 1.505598551523691, "grad_norm": 0.5860801684071715, "learning_rate": 3.0373076250690026e-05, "loss": 11.7146, "step": 27649 }, { "epoch": 1.505653005520274, "grad_norm": 0.5585283707571668, "learning_rate": 3.036674694579962e-05, "loss": 11.8924, "step": 27650 }, { "epoch": 1.505707459516857, "grad_norm": 0.5920091119800553, "learning_rate": 3.0360418182389105e-05, "loss": 11.9214, "step": 27651 }, { "epoch": 1.50576191351344, "grad_norm": 0.5741591184526227, "learning_rate": 3.035408996050766e-05, "loss": 11.8055, "step": 27652 }, { "epoch": 1.505816367510023, "grad_norm": 0.4973276984223243, "learning_rate": 3.0347762280204462e-05, "loss": 11.7137, "step": 27653 }, { "epoch": 1.5058708215066061, "grad_norm": 0.5516625562789796, "learning_rate": 3.0341435141528763e-05, "loss": 11.7877, "step": 27654 }, { "epoch": 1.505925275503189, "grad_norm": 0.5708887913135768, "learning_rate": 3.033510854452972e-05, "loss": 11.9488, "step": 27655 }, { "epoch": 1.505979729499772, "grad_norm": 0.5741516223263851, "learning_rate": 3.032878248925657e-05, "loss": 11.9446, "step": 27656 }, { "epoch": 1.506034183496355, "grad_norm": 0.5531204656209718, "learning_rate": 3.0322456975758505e-05, "loss": 11.8402, "step": 27657 }, { "epoch": 1.506088637492938, "grad_norm": 0.6218445852612702, "learning_rate": 3.0316132004084674e-05, "loss": 11.9344, "step": 27658 }, { "epoch": 1.506143091489521, "grad_norm": 0.5769309911162305, "learning_rate": 3.030980757428432e-05, "loss": 11.9579, "step": 27659 }, { "epoch": 1.506197545486104, "grad_norm": 0.5402758990034201, "learning_rate": 3.0303483686406555e-05, "loss": 11.889, "step": 27660 }, { "epoch": 1.506251999482687, "grad_norm": 0.6158061534664355, "learning_rate": 3.029716034050062e-05, "loss": 11.792, "step": 27661 }, { "epoch": 1.50630645347927, "grad_norm": 0.5213652094806597, "learning_rate": 3.0290837536615656e-05, "loss": 11.7095, "step": 27662 }, { "epoch": 1.506360907475853, "grad_norm": 0.5524929572467947, "learning_rate": 3.0284515274800807e-05, "loss": 11.8645, "step": 27663 }, { "epoch": 1.506415361472436, "grad_norm": 0.5287964101011609, "learning_rate": 3.0278193555105283e-05, "loss": 11.9561, "step": 27664 }, { "epoch": 1.506469815469019, "grad_norm": 0.5456827261862065, "learning_rate": 3.027187237757818e-05, "loss": 11.7935, "step": 27665 }, { "epoch": 1.506524269465602, "grad_norm": 0.5560675880179424, "learning_rate": 3.0265551742268693e-05, "loss": 11.8534, "step": 27666 }, { "epoch": 1.506578723462185, "grad_norm": 0.4839734380980397, "learning_rate": 3.0259231649226015e-05, "loss": 11.7421, "step": 27667 }, { "epoch": 1.506633177458768, "grad_norm": 0.5314535741315, "learning_rate": 3.0252912098499207e-05, "loss": 11.7904, "step": 27668 }, { "epoch": 1.506687631455351, "grad_norm": 0.5362603181572028, "learning_rate": 3.0246593090137476e-05, "loss": 11.803, "step": 27669 }, { "epoch": 1.506742085451934, "grad_norm": 0.5808088826584501, "learning_rate": 3.0240274624189913e-05, "loss": 11.9047, "step": 27670 }, { "epoch": 1.506796539448517, "grad_norm": 0.578753416107352, "learning_rate": 3.02339567007057e-05, "loss": 11.8853, "step": 27671 }, { "epoch": 1.5068509934451002, "grad_norm": 0.508534977626483, "learning_rate": 3.0227639319733936e-05, "loss": 11.8025, "step": 27672 }, { "epoch": 1.5069054474416832, "grad_norm": 0.5839536048259578, "learning_rate": 3.0221322481323723e-05, "loss": 11.8539, "step": 27673 }, { "epoch": 1.5069599014382662, "grad_norm": 0.5056608957084763, "learning_rate": 3.021500618552424e-05, "loss": 11.8683, "step": 27674 }, { "epoch": 1.5070143554348492, "grad_norm": 0.48905232113237707, "learning_rate": 3.0208690432384546e-05, "loss": 11.7323, "step": 27675 }, { "epoch": 1.5070688094314322, "grad_norm": 0.5823855342076538, "learning_rate": 3.0202375221953805e-05, "loss": 11.8104, "step": 27676 }, { "epoch": 1.5071232634280152, "grad_norm": 0.545191595494048, "learning_rate": 3.019606055428106e-05, "loss": 11.9079, "step": 27677 }, { "epoch": 1.5071777174245984, "grad_norm": 0.5396027113593479, "learning_rate": 3.0189746429415468e-05, "loss": 11.7344, "step": 27678 }, { "epoch": 1.5072321714211814, "grad_norm": 0.5411755554388232, "learning_rate": 3.0183432847406134e-05, "loss": 12.0117, "step": 27679 }, { "epoch": 1.5072866254177644, "grad_norm": 0.5251088975317232, "learning_rate": 3.0177119808302113e-05, "loss": 11.8839, "step": 27680 }, { "epoch": 1.5073410794143474, "grad_norm": 0.5613355898052612, "learning_rate": 3.0170807312152537e-05, "loss": 11.7858, "step": 27681 }, { "epoch": 1.5073955334109304, "grad_norm": 0.5848409840479958, "learning_rate": 3.0164495359006484e-05, "loss": 11.9158, "step": 27682 }, { "epoch": 1.5074499874075133, "grad_norm": 0.5313528696351432, "learning_rate": 3.0158183948912988e-05, "loss": 11.8243, "step": 27683 }, { "epoch": 1.5075044414040963, "grad_norm": 0.5753534768744759, "learning_rate": 3.0151873081921213e-05, "loss": 11.905, "step": 27684 }, { "epoch": 1.5075588954006793, "grad_norm": 0.5783521566718891, "learning_rate": 3.0145562758080137e-05, "loss": 11.8615, "step": 27685 }, { "epoch": 1.5076133493972623, "grad_norm": 0.5092573533801059, "learning_rate": 3.0139252977438914e-05, "loss": 11.8508, "step": 27686 }, { "epoch": 1.5076678033938453, "grad_norm": 0.5199298621276031, "learning_rate": 3.013294374004655e-05, "loss": 11.6786, "step": 27687 }, { "epoch": 1.5077222573904283, "grad_norm": 0.562591696967236, "learning_rate": 3.0126635045952133e-05, "loss": 11.7552, "step": 27688 }, { "epoch": 1.5077767113870113, "grad_norm": 0.567123205368442, "learning_rate": 3.0120326895204753e-05, "loss": 11.9026, "step": 27689 }, { "epoch": 1.5078311653835943, "grad_norm": 0.5685875255988978, "learning_rate": 3.011401928785339e-05, "loss": 11.7773, "step": 27690 }, { "epoch": 1.5078856193801773, "grad_norm": 0.5327316258202117, "learning_rate": 3.0107712223947203e-05, "loss": 11.7722, "step": 27691 }, { "epoch": 1.5079400733767603, "grad_norm": 0.5250633768345059, "learning_rate": 3.0101405703535103e-05, "loss": 11.8296, "step": 27692 }, { "epoch": 1.5079945273733433, "grad_norm": 0.5514445851102612, "learning_rate": 3.0095099726666187e-05, "loss": 11.8859, "step": 27693 }, { "epoch": 1.5080489813699263, "grad_norm": 0.6038059950739845, "learning_rate": 3.0088794293389532e-05, "loss": 11.8462, "step": 27694 }, { "epoch": 1.5081034353665095, "grad_norm": 0.5387197878737624, "learning_rate": 3.008248940375411e-05, "loss": 11.9464, "step": 27695 }, { "epoch": 1.5081578893630925, "grad_norm": 0.5045327878282647, "learning_rate": 3.0076185057809003e-05, "loss": 11.8414, "step": 27696 }, { "epoch": 1.5082123433596755, "grad_norm": 0.543435502192343, "learning_rate": 3.0069881255603182e-05, "loss": 11.7743, "step": 27697 }, { "epoch": 1.5082667973562585, "grad_norm": 0.49555653997269605, "learning_rate": 3.006357799718572e-05, "loss": 11.8638, "step": 27698 }, { "epoch": 1.5083212513528415, "grad_norm": 0.6831213653704303, "learning_rate": 3.005727528260557e-05, "loss": 11.8205, "step": 27699 }, { "epoch": 1.5083757053494244, "grad_norm": 0.5093461458203183, "learning_rate": 3.0050973111911772e-05, "loss": 11.8062, "step": 27700 }, { "epoch": 1.5084301593460077, "grad_norm": 0.5545139771784615, "learning_rate": 3.004467148515341e-05, "loss": 11.8289, "step": 27701 }, { "epoch": 1.5084846133425907, "grad_norm": 0.6564560130369709, "learning_rate": 3.0038370402379344e-05, "loss": 11.7958, "step": 27702 }, { "epoch": 1.5085390673391736, "grad_norm": 0.5399395695077479, "learning_rate": 3.0032069863638678e-05, "loss": 11.8155, "step": 27703 }, { "epoch": 1.5085935213357566, "grad_norm": 0.5239996833966908, "learning_rate": 3.0025769868980335e-05, "loss": 11.8158, "step": 27704 }, { "epoch": 1.5086479753323396, "grad_norm": 0.527652555018816, "learning_rate": 3.0019470418453345e-05, "loss": 11.877, "step": 27705 }, { "epoch": 1.5087024293289226, "grad_norm": 0.5482037668581788, "learning_rate": 3.0013171512106718e-05, "loss": 11.9077, "step": 27706 }, { "epoch": 1.5087568833255056, "grad_norm": 0.5593091392044128, "learning_rate": 3.0006873149989377e-05, "loss": 11.8471, "step": 27707 }, { "epoch": 1.5088113373220886, "grad_norm": 0.5489678883026025, "learning_rate": 3.000057533215036e-05, "loss": 11.9846, "step": 27708 }, { "epoch": 1.5088657913186716, "grad_norm": 0.5048261198947238, "learning_rate": 2.999427805863858e-05, "loss": 11.767, "step": 27709 }, { "epoch": 1.5089202453152546, "grad_norm": 0.5385155346180013, "learning_rate": 2.9987981329503056e-05, "loss": 11.802, "step": 27710 }, { "epoch": 1.5089746993118376, "grad_norm": 0.5682527811224917, "learning_rate": 2.9981685144792737e-05, "loss": 11.7803, "step": 27711 }, { "epoch": 1.5090291533084206, "grad_norm": 0.5867884195522941, "learning_rate": 2.9975389504556538e-05, "loss": 11.7611, "step": 27712 }, { "epoch": 1.5090836073050036, "grad_norm": 0.5388582827725946, "learning_rate": 2.996909440884349e-05, "loss": 11.8301, "step": 27713 }, { "epoch": 1.5091380613015866, "grad_norm": 0.5467698982091564, "learning_rate": 2.9962799857702474e-05, "loss": 11.7841, "step": 27714 }, { "epoch": 1.5091925152981696, "grad_norm": 0.6212780696901338, "learning_rate": 2.995650585118247e-05, "loss": 11.985, "step": 27715 }, { "epoch": 1.5092469692947525, "grad_norm": 0.5645021872395943, "learning_rate": 2.9950212389332466e-05, "loss": 11.8428, "step": 27716 }, { "epoch": 1.5093014232913355, "grad_norm": 0.5347293351712962, "learning_rate": 2.994391947220131e-05, "loss": 11.7633, "step": 27717 }, { "epoch": 1.5093558772879185, "grad_norm": 0.6065627903828362, "learning_rate": 2.993762709983803e-05, "loss": 11.8202, "step": 27718 }, { "epoch": 1.5094103312845017, "grad_norm": 0.5478554910871952, "learning_rate": 2.9931335272291472e-05, "loss": 11.8343, "step": 27719 }, { "epoch": 1.5094647852810847, "grad_norm": 0.5887533294741611, "learning_rate": 2.9925043989610635e-05, "loss": 11.9709, "step": 27720 }, { "epoch": 1.5095192392776677, "grad_norm": 0.5367691135942377, "learning_rate": 2.99187532518444e-05, "loss": 11.7524, "step": 27721 }, { "epoch": 1.5095736932742507, "grad_norm": 0.5920294223461078, "learning_rate": 2.9912463059041673e-05, "loss": 11.8158, "step": 27722 }, { "epoch": 1.5096281472708337, "grad_norm": 0.5799886961127216, "learning_rate": 2.9906173411251414e-05, "loss": 11.9282, "step": 27723 }, { "epoch": 1.509682601267417, "grad_norm": 0.5420188804440307, "learning_rate": 2.9899884308522475e-05, "loss": 11.7917, "step": 27724 }, { "epoch": 1.509737055264, "grad_norm": 0.6077614752035385, "learning_rate": 2.9893595750903813e-05, "loss": 11.8512, "step": 27725 }, { "epoch": 1.509791509260583, "grad_norm": 0.500352912740398, "learning_rate": 2.9887307738444293e-05, "loss": 11.8185, "step": 27726 }, { "epoch": 1.509845963257166, "grad_norm": 0.6414151378311607, "learning_rate": 2.9881020271192806e-05, "loss": 11.8646, "step": 27727 }, { "epoch": 1.509900417253749, "grad_norm": 0.562359974934004, "learning_rate": 2.9874733349198315e-05, "loss": 11.8485, "step": 27728 }, { "epoch": 1.509954871250332, "grad_norm": 0.5324743453276087, "learning_rate": 2.9868446972509612e-05, "loss": 11.9092, "step": 27729 }, { "epoch": 1.510009325246915, "grad_norm": 0.6062978713611572, "learning_rate": 2.986216114117566e-05, "loss": 11.7869, "step": 27730 }, { "epoch": 1.5100637792434979, "grad_norm": 0.5741654731406654, "learning_rate": 2.985587585524532e-05, "loss": 11.872, "step": 27731 }, { "epoch": 1.5101182332400809, "grad_norm": 0.5104704082013496, "learning_rate": 2.9849591114767406e-05, "loss": 11.7474, "step": 27732 }, { "epoch": 1.5101726872366639, "grad_norm": 0.674178818188482, "learning_rate": 2.984330691979087e-05, "loss": 11.9635, "step": 27733 }, { "epoch": 1.5102271412332469, "grad_norm": 0.5652969158957791, "learning_rate": 2.9837023270364506e-05, "loss": 11.9251, "step": 27734 }, { "epoch": 1.5102815952298299, "grad_norm": 0.5609797645699056, "learning_rate": 2.9830740166537264e-05, "loss": 11.8837, "step": 27735 }, { "epoch": 1.5103360492264128, "grad_norm": 0.5159221518667632, "learning_rate": 2.9824457608357902e-05, "loss": 11.8842, "step": 27736 }, { "epoch": 1.5103905032229958, "grad_norm": 0.5129693179919498, "learning_rate": 2.9818175595875342e-05, "loss": 11.6384, "step": 27737 }, { "epoch": 1.5104449572195788, "grad_norm": 0.606667141464947, "learning_rate": 2.9811894129138452e-05, "loss": 11.9409, "step": 27738 }, { "epoch": 1.5104994112161618, "grad_norm": 0.5693205394135664, "learning_rate": 2.9805613208196003e-05, "loss": 11.8966, "step": 27739 }, { "epoch": 1.5105538652127448, "grad_norm": 0.5250548845680687, "learning_rate": 2.9799332833096906e-05, "loss": 11.711, "step": 27740 }, { "epoch": 1.5106083192093278, "grad_norm": 0.5524343060557678, "learning_rate": 2.979305300388997e-05, "loss": 11.8614, "step": 27741 }, { "epoch": 1.510662773205911, "grad_norm": 0.5485382355577155, "learning_rate": 2.9786773720624007e-05, "loss": 11.8623, "step": 27742 }, { "epoch": 1.510717227202494, "grad_norm": 0.5195175777305084, "learning_rate": 2.9780494983347885e-05, "loss": 11.8537, "step": 27743 }, { "epoch": 1.510771681199077, "grad_norm": 0.5646632657230068, "learning_rate": 2.9774216792110386e-05, "loss": 11.9142, "step": 27744 }, { "epoch": 1.51082613519566, "grad_norm": 0.5326355239330797, "learning_rate": 2.976793914696039e-05, "loss": 11.7431, "step": 27745 }, { "epoch": 1.510880589192243, "grad_norm": 0.5640738171778821, "learning_rate": 2.9761662047946638e-05, "loss": 11.8633, "step": 27746 }, { "epoch": 1.510935043188826, "grad_norm": 0.6548038237412614, "learning_rate": 2.9755385495118014e-05, "loss": 11.8872, "step": 27747 }, { "epoch": 1.5109894971854092, "grad_norm": 0.6111004721881695, "learning_rate": 2.9749109488523265e-05, "loss": 11.8724, "step": 27748 }, { "epoch": 1.5110439511819922, "grad_norm": 0.5840157481701392, "learning_rate": 2.9742834028211207e-05, "loss": 11.888, "step": 27749 }, { "epoch": 1.5110984051785752, "grad_norm": 0.512270983141964, "learning_rate": 2.97365591142307e-05, "loss": 11.8493, "step": 27750 }, { "epoch": 1.5111528591751582, "grad_norm": 0.5047354628998809, "learning_rate": 2.9730284746630454e-05, "loss": 11.7814, "step": 27751 }, { "epoch": 1.5112073131717412, "grad_norm": 0.5192742936666448, "learning_rate": 2.9724010925459368e-05, "loss": 11.9035, "step": 27752 }, { "epoch": 1.5112617671683242, "grad_norm": 0.5700787671591359, "learning_rate": 2.9717737650766085e-05, "loss": 11.7511, "step": 27753 }, { "epoch": 1.5113162211649072, "grad_norm": 0.4802551680400022, "learning_rate": 2.9711464922599474e-05, "loss": 11.6913, "step": 27754 }, { "epoch": 1.5113706751614902, "grad_norm": 0.5204942213256752, "learning_rate": 2.9705192741008325e-05, "loss": 11.7288, "step": 27755 }, { "epoch": 1.5114251291580731, "grad_norm": 0.5405846651405644, "learning_rate": 2.9698921106041354e-05, "loss": 11.797, "step": 27756 }, { "epoch": 1.5114795831546561, "grad_norm": 0.5781230033029797, "learning_rate": 2.969265001774739e-05, "loss": 11.7932, "step": 27757 }, { "epoch": 1.5115340371512391, "grad_norm": 0.5185954624122529, "learning_rate": 2.968637947617514e-05, "loss": 11.756, "step": 27758 }, { "epoch": 1.5115884911478221, "grad_norm": 0.5698397862436131, "learning_rate": 2.968010948137343e-05, "loss": 11.8523, "step": 27759 }, { "epoch": 1.5116429451444051, "grad_norm": 0.5756019090718169, "learning_rate": 2.9673840033390943e-05, "loss": 11.8631, "step": 27760 }, { "epoch": 1.511697399140988, "grad_norm": 0.5585769282172337, "learning_rate": 2.9667571132276474e-05, "loss": 11.8178, "step": 27761 }, { "epoch": 1.511751853137571, "grad_norm": 0.5598159438504424, "learning_rate": 2.9661302778078826e-05, "loss": 11.8843, "step": 27762 }, { "epoch": 1.511806307134154, "grad_norm": 0.5718899983595637, "learning_rate": 2.965503497084663e-05, "loss": 11.9957, "step": 27763 }, { "epoch": 1.511860761130737, "grad_norm": 0.5435589226540075, "learning_rate": 2.9648767710628665e-05, "loss": 11.9282, "step": 27764 }, { "epoch": 1.5119152151273203, "grad_norm": 0.5583589531844237, "learning_rate": 2.964250099747372e-05, "loss": 11.8914, "step": 27765 }, { "epoch": 1.5119696691239033, "grad_norm": 0.6002200385594159, "learning_rate": 2.9636234831430454e-05, "loss": 11.7539, "step": 27766 }, { "epoch": 1.5120241231204863, "grad_norm": 0.5466406512985008, "learning_rate": 2.962996921254766e-05, "loss": 11.797, "step": 27767 }, { "epoch": 1.5120785771170693, "grad_norm": 0.49651751791199267, "learning_rate": 2.9623704140873986e-05, "loss": 11.8796, "step": 27768 }, { "epoch": 1.5121330311136523, "grad_norm": 0.5640010234271187, "learning_rate": 2.961743961645823e-05, "loss": 11.9479, "step": 27769 }, { "epoch": 1.5121874851102353, "grad_norm": 0.4978289056008298, "learning_rate": 2.961117563934903e-05, "loss": 11.758, "step": 27770 }, { "epoch": 1.5122419391068185, "grad_norm": 0.5258343399477016, "learning_rate": 2.9604912209595136e-05, "loss": 11.7486, "step": 27771 }, { "epoch": 1.5122963931034015, "grad_norm": 0.5353552731019385, "learning_rate": 2.959864932724532e-05, "loss": 11.8828, "step": 27772 }, { "epoch": 1.5123508470999845, "grad_norm": 0.5380838597696318, "learning_rate": 2.959238699234814e-05, "loss": 11.9841, "step": 27773 }, { "epoch": 1.5124053010965675, "grad_norm": 0.5472535488504849, "learning_rate": 2.9586125204952398e-05, "loss": 11.7373, "step": 27774 }, { "epoch": 1.5124597550931504, "grad_norm": 0.49168558506863086, "learning_rate": 2.9579863965106724e-05, "loss": 11.677, "step": 27775 }, { "epoch": 1.5125142090897334, "grad_norm": 0.6033380433241394, "learning_rate": 2.957360327285984e-05, "loss": 11.9156, "step": 27776 }, { "epoch": 1.5125686630863164, "grad_norm": 0.5972369825776153, "learning_rate": 2.956734312826046e-05, "loss": 11.9468, "step": 27777 }, { "epoch": 1.5126231170828994, "grad_norm": 0.5765631999622778, "learning_rate": 2.9561083531357203e-05, "loss": 11.8256, "step": 27778 }, { "epoch": 1.5126775710794824, "grad_norm": 0.5718339356550207, "learning_rate": 2.9554824482198794e-05, "loss": 11.9699, "step": 27779 }, { "epoch": 1.5127320250760654, "grad_norm": 0.5322174718543619, "learning_rate": 2.9548565980833854e-05, "loss": 11.8512, "step": 27780 }, { "epoch": 1.5127864790726484, "grad_norm": 0.5482797675858028, "learning_rate": 2.9542308027311116e-05, "loss": 11.9528, "step": 27781 }, { "epoch": 1.5128409330692314, "grad_norm": 0.5400244281915337, "learning_rate": 2.953605062167921e-05, "loss": 11.7887, "step": 27782 }, { "epoch": 1.5128953870658144, "grad_norm": 0.6011909227453015, "learning_rate": 2.9529793763986758e-05, "loss": 11.8717, "step": 27783 }, { "epoch": 1.5129498410623974, "grad_norm": 0.5506693073369622, "learning_rate": 2.952353745428247e-05, "loss": 11.7844, "step": 27784 }, { "epoch": 1.5130042950589804, "grad_norm": 0.5740683367698717, "learning_rate": 2.9517281692614952e-05, "loss": 11.9149, "step": 27785 }, { "epoch": 1.5130587490555634, "grad_norm": 0.5624485248051482, "learning_rate": 2.9511026479032867e-05, "loss": 11.9163, "step": 27786 }, { "epoch": 1.5131132030521464, "grad_norm": 0.54204018303972, "learning_rate": 2.9504771813584887e-05, "loss": 11.9727, "step": 27787 }, { "epoch": 1.5131676570487296, "grad_norm": 0.5594505465196601, "learning_rate": 2.9498517696319605e-05, "loss": 11.9219, "step": 27788 }, { "epoch": 1.5132221110453126, "grad_norm": 0.6689923677952158, "learning_rate": 2.9492264127285695e-05, "loss": 12.0173, "step": 27789 }, { "epoch": 1.5132765650418956, "grad_norm": 0.5000079040973773, "learning_rate": 2.948601110653173e-05, "loss": 11.8615, "step": 27790 }, { "epoch": 1.5133310190384786, "grad_norm": 0.5409981726138181, "learning_rate": 2.94797586341064e-05, "loss": 11.7796, "step": 27791 }, { "epoch": 1.5133854730350615, "grad_norm": 0.5024907674996787, "learning_rate": 2.947350671005831e-05, "loss": 11.8399, "step": 27792 }, { "epoch": 1.5134399270316445, "grad_norm": 0.5657093388356965, "learning_rate": 2.9467255334436006e-05, "loss": 11.8278, "step": 27793 }, { "epoch": 1.5134943810282278, "grad_norm": 0.512819115467977, "learning_rate": 2.9461004507288194e-05, "loss": 11.8364, "step": 27794 }, { "epoch": 1.5135488350248107, "grad_norm": 0.5853750031511613, "learning_rate": 2.9454754228663407e-05, "loss": 11.7854, "step": 27795 }, { "epoch": 1.5136032890213937, "grad_norm": 0.5658294543457406, "learning_rate": 2.9448504498610307e-05, "loss": 11.9533, "step": 27796 }, { "epoch": 1.5136577430179767, "grad_norm": 0.516569400584985, "learning_rate": 2.9442255317177448e-05, "loss": 11.7878, "step": 27797 }, { "epoch": 1.5137121970145597, "grad_norm": 0.5623119523680566, "learning_rate": 2.9436006684413444e-05, "loss": 11.8565, "step": 27798 }, { "epoch": 1.5137666510111427, "grad_norm": 0.5305491088570766, "learning_rate": 2.9429758600366906e-05, "loss": 11.7577, "step": 27799 }, { "epoch": 1.5138211050077257, "grad_norm": 0.4661915877437278, "learning_rate": 2.9423511065086375e-05, "loss": 11.6817, "step": 27800 }, { "epoch": 1.5138755590043087, "grad_norm": 0.6026615705906947, "learning_rate": 2.941726407862049e-05, "loss": 11.9328, "step": 27801 }, { "epoch": 1.5139300130008917, "grad_norm": 0.5188263326926184, "learning_rate": 2.941101764101779e-05, "loss": 11.8143, "step": 27802 }, { "epoch": 1.5139844669974747, "grad_norm": 0.5143942788249989, "learning_rate": 2.940477175232683e-05, "loss": 11.8826, "step": 27803 }, { "epoch": 1.5140389209940577, "grad_norm": 0.5289844667408242, "learning_rate": 2.9398526412596228e-05, "loss": 11.6707, "step": 27804 }, { "epoch": 1.5140933749906407, "grad_norm": 0.5190835605898211, "learning_rate": 2.9392281621874495e-05, "loss": 11.8505, "step": 27805 }, { "epoch": 1.5141478289872237, "grad_norm": 0.6307976520850446, "learning_rate": 2.938603738021026e-05, "loss": 11.8924, "step": 27806 }, { "epoch": 1.5142022829838067, "grad_norm": 0.5463165588990535, "learning_rate": 2.9379793687652003e-05, "loss": 11.8728, "step": 27807 }, { "epoch": 1.5142567369803897, "grad_norm": 0.5091881255534446, "learning_rate": 2.937355054424835e-05, "loss": 11.7606, "step": 27808 }, { "epoch": 1.5143111909769726, "grad_norm": 0.5107749718863238, "learning_rate": 2.9367307950047775e-05, "loss": 11.6613, "step": 27809 }, { "epoch": 1.5143656449735556, "grad_norm": 0.605009771388253, "learning_rate": 2.9361065905098862e-05, "loss": 11.8747, "step": 27810 }, { "epoch": 1.5144200989701386, "grad_norm": 0.6343331401690552, "learning_rate": 2.9354824409450165e-05, "loss": 11.9454, "step": 27811 }, { "epoch": 1.5144745529667218, "grad_norm": 0.5961261935633988, "learning_rate": 2.9348583463150215e-05, "loss": 11.9897, "step": 27812 }, { "epoch": 1.5145290069633048, "grad_norm": 0.5260408723621902, "learning_rate": 2.934234306624749e-05, "loss": 11.8594, "step": 27813 }, { "epoch": 1.5145834609598878, "grad_norm": 0.5676865393284748, "learning_rate": 2.933610321879059e-05, "loss": 11.6873, "step": 27814 }, { "epoch": 1.5146379149564708, "grad_norm": 0.5789886000789263, "learning_rate": 2.9329863920827973e-05, "loss": 12.0022, "step": 27815 }, { "epoch": 1.5146923689530538, "grad_norm": 0.5660964656367419, "learning_rate": 2.932362517240822e-05, "loss": 11.9032, "step": 27816 }, { "epoch": 1.5147468229496368, "grad_norm": 0.5911587393227951, "learning_rate": 2.9317386973579764e-05, "loss": 11.9297, "step": 27817 }, { "epoch": 1.51480127694622, "grad_norm": 0.5413495498279537, "learning_rate": 2.931114932439121e-05, "loss": 11.8286, "step": 27818 }, { "epoch": 1.514855730942803, "grad_norm": 0.5625383941350024, "learning_rate": 2.930491222489097e-05, "loss": 11.887, "step": 27819 }, { "epoch": 1.514910184939386, "grad_norm": 0.5705753135611374, "learning_rate": 2.9298675675127586e-05, "loss": 11.9516, "step": 27820 }, { "epoch": 1.514964638935969, "grad_norm": 0.5495970961301904, "learning_rate": 2.9292439675149587e-05, "loss": 11.8987, "step": 27821 }, { "epoch": 1.515019092932552, "grad_norm": 0.5799716159335289, "learning_rate": 2.928620422500544e-05, "loss": 11.807, "step": 27822 }, { "epoch": 1.515073546929135, "grad_norm": 0.5299167729543424, "learning_rate": 2.927996932474363e-05, "loss": 11.8671, "step": 27823 }, { "epoch": 1.515128000925718, "grad_norm": 0.549514754283632, "learning_rate": 2.9273734974412605e-05, "loss": 11.9484, "step": 27824 }, { "epoch": 1.515182454922301, "grad_norm": 0.6565246978050049, "learning_rate": 2.9267501174060873e-05, "loss": 11.8285, "step": 27825 }, { "epoch": 1.515236908918884, "grad_norm": 0.5094424060972945, "learning_rate": 2.9261267923736958e-05, "loss": 11.8731, "step": 27826 }, { "epoch": 1.515291362915467, "grad_norm": 0.5401169161084873, "learning_rate": 2.9255035223489236e-05, "loss": 11.8489, "step": 27827 }, { "epoch": 1.51534581691205, "grad_norm": 0.534441901673353, "learning_rate": 2.9248803073366272e-05, "loss": 11.8566, "step": 27828 }, { "epoch": 1.515400270908633, "grad_norm": 0.5261033805036803, "learning_rate": 2.9242571473416436e-05, "loss": 11.8294, "step": 27829 }, { "epoch": 1.515454724905216, "grad_norm": 0.6081218612704858, "learning_rate": 2.9236340423688268e-05, "loss": 11.9544, "step": 27830 }, { "epoch": 1.515509178901799, "grad_norm": 0.5769409886346919, "learning_rate": 2.9230109924230152e-05, "loss": 11.925, "step": 27831 }, { "epoch": 1.515563632898382, "grad_norm": 0.5710254215367669, "learning_rate": 2.9223879975090606e-05, "loss": 11.9083, "step": 27832 }, { "epoch": 1.515618086894965, "grad_norm": 0.5514124110800704, "learning_rate": 2.9217650576318036e-05, "loss": 11.8565, "step": 27833 }, { "epoch": 1.515672540891548, "grad_norm": 0.5206379089906886, "learning_rate": 2.9211421727960854e-05, "loss": 11.7541, "step": 27834 }, { "epoch": 1.5157269948881311, "grad_norm": 0.537492180588417, "learning_rate": 2.9205193430067525e-05, "loss": 11.8885, "step": 27835 }, { "epoch": 1.515781448884714, "grad_norm": 0.5459418827556682, "learning_rate": 2.919896568268652e-05, "loss": 11.741, "step": 27836 }, { "epoch": 1.515835902881297, "grad_norm": 0.5690263196747039, "learning_rate": 2.919273848586619e-05, "loss": 11.7847, "step": 27837 }, { "epoch": 1.51589035687788, "grad_norm": 0.518363252654648, "learning_rate": 2.918651183965504e-05, "loss": 11.7857, "step": 27838 }, { "epoch": 1.515944810874463, "grad_norm": 0.5698791668369638, "learning_rate": 2.918028574410141e-05, "loss": 11.724, "step": 27839 }, { "epoch": 1.515999264871046, "grad_norm": 0.6821101984848678, "learning_rate": 2.9174060199253794e-05, "loss": 11.8832, "step": 27840 }, { "epoch": 1.5160537188676293, "grad_norm": 0.5393663450828089, "learning_rate": 2.9167835205160532e-05, "loss": 11.7887, "step": 27841 }, { "epoch": 1.5161081728642123, "grad_norm": 0.6874066226660146, "learning_rate": 2.9161610761870082e-05, "loss": 11.7755, "step": 27842 }, { "epoch": 1.5161626268607953, "grad_norm": 0.5503605609436028, "learning_rate": 2.9155386869430847e-05, "loss": 11.8203, "step": 27843 }, { "epoch": 1.5162170808573783, "grad_norm": 0.5233963511568601, "learning_rate": 2.9149163527891154e-05, "loss": 11.8154, "step": 27844 }, { "epoch": 1.5162715348539613, "grad_norm": 0.635581410885624, "learning_rate": 2.9142940737299485e-05, "loss": 11.8079, "step": 27845 }, { "epoch": 1.5163259888505443, "grad_norm": 0.6003910004603072, "learning_rate": 2.9136718497704164e-05, "loss": 11.8677, "step": 27846 }, { "epoch": 1.5163804428471273, "grad_norm": 0.55421737256066, "learning_rate": 2.9130496809153597e-05, "loss": 11.8329, "step": 27847 }, { "epoch": 1.5164348968437102, "grad_norm": 0.5580708026299369, "learning_rate": 2.9124275671696212e-05, "loss": 11.9821, "step": 27848 }, { "epoch": 1.5164893508402932, "grad_norm": 0.521589600014856, "learning_rate": 2.9118055085380303e-05, "loss": 11.8927, "step": 27849 }, { "epoch": 1.5165438048368762, "grad_norm": 0.5953309181436236, "learning_rate": 2.9111835050254323e-05, "loss": 11.9729, "step": 27850 }, { "epoch": 1.5165982588334592, "grad_norm": 0.5634892078543181, "learning_rate": 2.9105615566366563e-05, "loss": 11.8954, "step": 27851 }, { "epoch": 1.5166527128300422, "grad_norm": 0.5375423234571668, "learning_rate": 2.9099396633765464e-05, "loss": 11.7917, "step": 27852 }, { "epoch": 1.5167071668266252, "grad_norm": 0.7058152558283332, "learning_rate": 2.9093178252499344e-05, "loss": 12.0143, "step": 27853 }, { "epoch": 1.5167616208232082, "grad_norm": 0.627399877302454, "learning_rate": 2.9086960422616527e-05, "loss": 11.8652, "step": 27854 }, { "epoch": 1.5168160748197912, "grad_norm": 0.5432207752183109, "learning_rate": 2.9080743144165433e-05, "loss": 11.9182, "step": 27855 }, { "epoch": 1.5168705288163742, "grad_norm": 0.5795550600974179, "learning_rate": 2.9074526417194347e-05, "loss": 11.918, "step": 27856 }, { "epoch": 1.5169249828129572, "grad_norm": 0.6469021761797537, "learning_rate": 2.906831024175166e-05, "loss": 11.8331, "step": 27857 }, { "epoch": 1.5169794368095404, "grad_norm": 0.5351030400617772, "learning_rate": 2.906209461788566e-05, "loss": 11.8457, "step": 27858 }, { "epoch": 1.5170338908061234, "grad_norm": 0.5242390532435152, "learning_rate": 2.9055879545644716e-05, "loss": 11.8352, "step": 27859 }, { "epoch": 1.5170883448027064, "grad_norm": 0.536360864304305, "learning_rate": 2.9049665025077178e-05, "loss": 11.7729, "step": 27860 }, { "epoch": 1.5171427987992894, "grad_norm": 0.5182475054363096, "learning_rate": 2.9043451056231295e-05, "loss": 11.8093, "step": 27861 }, { "epoch": 1.5171972527958724, "grad_norm": 0.5674892112315953, "learning_rate": 2.9037237639155492e-05, "loss": 11.9089, "step": 27862 }, { "epoch": 1.5172517067924554, "grad_norm": 0.48246487052542764, "learning_rate": 2.9031024773898018e-05, "loss": 11.7977, "step": 27863 }, { "epoch": 1.5173061607890386, "grad_norm": 0.6185070494429271, "learning_rate": 2.9024812460507154e-05, "loss": 11.7855, "step": 27864 }, { "epoch": 1.5173606147856216, "grad_norm": 0.5718141434432262, "learning_rate": 2.9018600699031294e-05, "loss": 11.9102, "step": 27865 }, { "epoch": 1.5174150687822046, "grad_norm": 0.5511896418875497, "learning_rate": 2.9012389489518677e-05, "loss": 11.823, "step": 27866 }, { "epoch": 1.5174695227787875, "grad_norm": 0.5754939719354365, "learning_rate": 2.9006178832017638e-05, "loss": 11.9012, "step": 27867 }, { "epoch": 1.5175239767753705, "grad_norm": 0.5322506960199332, "learning_rate": 2.8999968726576442e-05, "loss": 11.8449, "step": 27868 }, { "epoch": 1.5175784307719535, "grad_norm": 0.5396397627818631, "learning_rate": 2.8993759173243386e-05, "loss": 11.8189, "step": 27869 }, { "epoch": 1.5176328847685365, "grad_norm": 0.5185878582681657, "learning_rate": 2.89875501720668e-05, "loss": 11.895, "step": 27870 }, { "epoch": 1.5176873387651195, "grad_norm": 0.5792286258753058, "learning_rate": 2.8981341723094902e-05, "loss": 11.8972, "step": 27871 }, { "epoch": 1.5177417927617025, "grad_norm": 0.5322116546301725, "learning_rate": 2.8975133826376057e-05, "loss": 11.9257, "step": 27872 }, { "epoch": 1.5177962467582855, "grad_norm": 0.5851757597525883, "learning_rate": 2.8968926481958424e-05, "loss": 11.8221, "step": 27873 }, { "epoch": 1.5178507007548685, "grad_norm": 0.5829732504888988, "learning_rate": 2.896271968989034e-05, "loss": 11.9159, "step": 27874 }, { "epoch": 1.5179051547514515, "grad_norm": 0.5144869012805328, "learning_rate": 2.895651345022008e-05, "loss": 11.6957, "step": 27875 }, { "epoch": 1.5179596087480345, "grad_norm": 0.5280357587995841, "learning_rate": 2.8950307762995853e-05, "loss": 11.8242, "step": 27876 }, { "epoch": 1.5180140627446175, "grad_norm": 0.5394983149057343, "learning_rate": 2.894410262826599e-05, "loss": 11.8521, "step": 27877 }, { "epoch": 1.5180685167412005, "grad_norm": 0.5396967352632152, "learning_rate": 2.893789804607866e-05, "loss": 11.8221, "step": 27878 }, { "epoch": 1.5181229707377835, "grad_norm": 0.5640942151315489, "learning_rate": 2.893169401648218e-05, "loss": 11.9148, "step": 27879 }, { "epoch": 1.5181774247343665, "grad_norm": 0.5478598688414913, "learning_rate": 2.8925490539524746e-05, "loss": 11.8535, "step": 27880 }, { "epoch": 1.5182318787309494, "grad_norm": 0.5769870862339115, "learning_rate": 2.891928761525461e-05, "loss": 12.0046, "step": 27881 }, { "epoch": 1.5182863327275327, "grad_norm": 0.5464094118829763, "learning_rate": 2.8913085243720085e-05, "loss": 11.9371, "step": 27882 }, { "epoch": 1.5183407867241157, "grad_norm": 0.5639966166131771, "learning_rate": 2.8906883424969257e-05, "loss": 11.9645, "step": 27883 }, { "epoch": 1.5183952407206986, "grad_norm": 0.561523889345088, "learning_rate": 2.890068215905043e-05, "loss": 11.8737, "step": 27884 }, { "epoch": 1.5184496947172816, "grad_norm": 0.5584318488250063, "learning_rate": 2.8894481446011847e-05, "loss": 11.7879, "step": 27885 }, { "epoch": 1.5185041487138646, "grad_norm": 0.5195656150142299, "learning_rate": 2.8888281285901674e-05, "loss": 11.8961, "step": 27886 }, { "epoch": 1.5185586027104476, "grad_norm": 0.5258584797947724, "learning_rate": 2.8882081678768193e-05, "loss": 11.8562, "step": 27887 }, { "epoch": 1.5186130567070308, "grad_norm": 0.5705927095621556, "learning_rate": 2.8875882624659524e-05, "loss": 11.9243, "step": 27888 }, { "epoch": 1.5186675107036138, "grad_norm": 0.5482577633922507, "learning_rate": 2.8869684123623963e-05, "loss": 11.8742, "step": 27889 }, { "epoch": 1.5187219647001968, "grad_norm": 0.5771281166583392, "learning_rate": 2.886348617570963e-05, "loss": 11.8703, "step": 27890 }, { "epoch": 1.5187764186967798, "grad_norm": 0.5262282254611751, "learning_rate": 2.8857288780964753e-05, "loss": 11.8149, "step": 27891 }, { "epoch": 1.5188308726933628, "grad_norm": 0.5398936561700859, "learning_rate": 2.8851091939437602e-05, "loss": 11.7759, "step": 27892 }, { "epoch": 1.5188853266899458, "grad_norm": 0.5436349613015419, "learning_rate": 2.8844895651176218e-05, "loss": 11.9125, "step": 27893 }, { "epoch": 1.5189397806865288, "grad_norm": 0.5211540971772278, "learning_rate": 2.8838699916228894e-05, "loss": 11.8642, "step": 27894 }, { "epoch": 1.5189942346831118, "grad_norm": 0.5274338892558537, "learning_rate": 2.883250473464374e-05, "loss": 11.8229, "step": 27895 }, { "epoch": 1.5190486886796948, "grad_norm": 0.5211417927003539, "learning_rate": 2.8826310106468968e-05, "loss": 11.7099, "step": 27896 }, { "epoch": 1.5191031426762778, "grad_norm": 0.5494026998814043, "learning_rate": 2.8820116031752774e-05, "loss": 11.807, "step": 27897 }, { "epoch": 1.5191575966728608, "grad_norm": 0.6055377194136393, "learning_rate": 2.8813922510543267e-05, "loss": 11.8004, "step": 27898 }, { "epoch": 1.5192120506694438, "grad_norm": 0.5225621783353847, "learning_rate": 2.8807729542888662e-05, "loss": 11.7967, "step": 27899 }, { "epoch": 1.5192665046660268, "grad_norm": 0.5567172433033473, "learning_rate": 2.8801537128837065e-05, "loss": 11.7802, "step": 27900 }, { "epoch": 1.5193209586626097, "grad_norm": 0.5196774715295803, "learning_rate": 2.879534526843668e-05, "loss": 11.8116, "step": 27901 }, { "epoch": 1.5193754126591927, "grad_norm": 0.5388871073186456, "learning_rate": 2.8789153961735605e-05, "loss": 11.8597, "step": 27902 }, { "epoch": 1.5194298666557757, "grad_norm": 0.5610179797228153, "learning_rate": 2.8782963208782042e-05, "loss": 11.847, "step": 27903 }, { "epoch": 1.5194843206523587, "grad_norm": 0.5846937180348672, "learning_rate": 2.87767730096241e-05, "loss": 11.8652, "step": 27904 }, { "epoch": 1.519538774648942, "grad_norm": 0.5537353097572987, "learning_rate": 2.877058336430989e-05, "loss": 11.8942, "step": 27905 }, { "epoch": 1.519593228645525, "grad_norm": 0.5957336635846588, "learning_rate": 2.8764394272887584e-05, "loss": 11.8114, "step": 27906 }, { "epoch": 1.519647682642108, "grad_norm": 0.48896804956338424, "learning_rate": 2.8758205735405276e-05, "loss": 11.8408, "step": 27907 }, { "epoch": 1.519702136638691, "grad_norm": 0.5693470935341843, "learning_rate": 2.875201775191111e-05, "loss": 11.6823, "step": 27908 }, { "epoch": 1.519756590635274, "grad_norm": 0.5409164269378668, "learning_rate": 2.8745830322453226e-05, "loss": 11.8604, "step": 27909 }, { "epoch": 1.519811044631857, "grad_norm": 0.5218839293379983, "learning_rate": 2.873964344707968e-05, "loss": 11.8008, "step": 27910 }, { "epoch": 1.5198654986284401, "grad_norm": 0.5377627599129235, "learning_rate": 2.8733457125838658e-05, "loss": 11.7381, "step": 27911 }, { "epoch": 1.519919952625023, "grad_norm": 0.5765601938153482, "learning_rate": 2.8727271358778185e-05, "loss": 11.8799, "step": 27912 }, { "epoch": 1.519974406621606, "grad_norm": 0.5392376194512571, "learning_rate": 2.872108614594644e-05, "loss": 11.766, "step": 27913 }, { "epoch": 1.520028860618189, "grad_norm": 0.5889251853275724, "learning_rate": 2.8714901487391477e-05, "loss": 11.8027, "step": 27914 }, { "epoch": 1.520083314614772, "grad_norm": 0.5743403154080624, "learning_rate": 2.8708717383161366e-05, "loss": 11.8232, "step": 27915 }, { "epoch": 1.520137768611355, "grad_norm": 0.6280411365971715, "learning_rate": 2.8702533833304256e-05, "loss": 11.9877, "step": 27916 }, { "epoch": 1.520192222607938, "grad_norm": 0.5101934537252294, "learning_rate": 2.8696350837868168e-05, "loss": 11.8981, "step": 27917 }, { "epoch": 1.520246676604521, "grad_norm": 0.5393295124478232, "learning_rate": 2.869016839690122e-05, "loss": 11.7954, "step": 27918 }, { "epoch": 1.520301130601104, "grad_norm": 0.5665424341421378, "learning_rate": 2.86839865104515e-05, "loss": 11.9649, "step": 27919 }, { "epoch": 1.520355584597687, "grad_norm": 0.5372327587059348, "learning_rate": 2.867780517856703e-05, "loss": 11.8867, "step": 27920 }, { "epoch": 1.52041003859427, "grad_norm": 0.49457536122256185, "learning_rate": 2.8671624401295947e-05, "loss": 11.7953, "step": 27921 }, { "epoch": 1.520464492590853, "grad_norm": 0.5682484408441838, "learning_rate": 2.866544417868624e-05, "loss": 11.865, "step": 27922 }, { "epoch": 1.520518946587436, "grad_norm": 0.5497355483267916, "learning_rate": 2.865926451078603e-05, "loss": 11.7453, "step": 27923 }, { "epoch": 1.520573400584019, "grad_norm": 0.5312350452988377, "learning_rate": 2.8653085397643355e-05, "loss": 11.7752, "step": 27924 }, { "epoch": 1.520627854580602, "grad_norm": 0.5337459565912904, "learning_rate": 2.864690683930621e-05, "loss": 11.7479, "step": 27925 }, { "epoch": 1.520682308577185, "grad_norm": 0.5492033095210955, "learning_rate": 2.8640728835822715e-05, "loss": 11.8602, "step": 27926 }, { "epoch": 1.520736762573768, "grad_norm": 0.5882903796976917, "learning_rate": 2.863455138724085e-05, "loss": 11.9274, "step": 27927 }, { "epoch": 1.5207912165703512, "grad_norm": 0.5464474040295534, "learning_rate": 2.862837449360871e-05, "loss": 11.8038, "step": 27928 }, { "epoch": 1.5208456705669342, "grad_norm": 0.5259964522455184, "learning_rate": 2.862219815497428e-05, "loss": 11.8672, "step": 27929 }, { "epoch": 1.5209001245635172, "grad_norm": 0.5164772302638427, "learning_rate": 2.86160223713856e-05, "loss": 11.8341, "step": 27930 }, { "epoch": 1.5209545785601002, "grad_norm": 0.5307716165980209, "learning_rate": 2.860984714289072e-05, "loss": 11.7679, "step": 27931 }, { "epoch": 1.5210090325566832, "grad_norm": 0.6043738213104092, "learning_rate": 2.8603672469537622e-05, "loss": 11.8279, "step": 27932 }, { "epoch": 1.5210634865532662, "grad_norm": 0.5445614445443454, "learning_rate": 2.8597498351374376e-05, "loss": 11.7401, "step": 27933 }, { "epoch": 1.5211179405498494, "grad_norm": 0.5289380992588216, "learning_rate": 2.8591324788448948e-05, "loss": 11.8751, "step": 27934 }, { "epoch": 1.5211723945464324, "grad_norm": 0.533654251638464, "learning_rate": 2.8585151780809328e-05, "loss": 11.8401, "step": 27935 }, { "epoch": 1.5212268485430154, "grad_norm": 0.5220964872865659, "learning_rate": 2.8578979328503576e-05, "loss": 11.8242, "step": 27936 }, { "epoch": 1.5212813025395984, "grad_norm": 0.5114364612323417, "learning_rate": 2.857280743157962e-05, "loss": 11.8001, "step": 27937 }, { "epoch": 1.5213357565361814, "grad_norm": 0.5523660842971195, "learning_rate": 2.856663609008553e-05, "loss": 11.8084, "step": 27938 }, { "epoch": 1.5213902105327644, "grad_norm": 0.52333885065501, "learning_rate": 2.856046530406922e-05, "loss": 11.8418, "step": 27939 }, { "epoch": 1.5214446645293473, "grad_norm": 0.5295371740160958, "learning_rate": 2.8554295073578708e-05, "loss": 11.8021, "step": 27940 }, { "epoch": 1.5214991185259303, "grad_norm": 0.4960778756772864, "learning_rate": 2.8548125398662017e-05, "loss": 11.8328, "step": 27941 }, { "epoch": 1.5215535725225133, "grad_norm": 0.46425175646156275, "learning_rate": 2.8541956279367053e-05, "loss": 11.729, "step": 27942 }, { "epoch": 1.5216080265190963, "grad_norm": 0.5012255929276607, "learning_rate": 2.853578771574189e-05, "loss": 11.8035, "step": 27943 }, { "epoch": 1.5216624805156793, "grad_norm": 0.5888817324911219, "learning_rate": 2.8529619707834355e-05, "loss": 11.8283, "step": 27944 }, { "epoch": 1.5217169345122623, "grad_norm": 0.6008255980546373, "learning_rate": 2.8523452255692485e-05, "loss": 11.9394, "step": 27945 }, { "epoch": 1.5217713885088453, "grad_norm": 0.6249937246002554, "learning_rate": 2.8517285359364266e-05, "loss": 11.92, "step": 27946 }, { "epoch": 1.5218258425054283, "grad_norm": 0.5441509701530162, "learning_rate": 2.851111901889759e-05, "loss": 11.9523, "step": 27947 }, { "epoch": 1.5218802965020113, "grad_norm": 0.5510325862380181, "learning_rate": 2.850495323434048e-05, "loss": 11.8965, "step": 27948 }, { "epoch": 1.5219347504985943, "grad_norm": 0.5338604703249146, "learning_rate": 2.8498788005740816e-05, "loss": 11.863, "step": 27949 }, { "epoch": 1.5219892044951773, "grad_norm": 0.6182806408308447, "learning_rate": 2.8492623333146585e-05, "loss": 12.011, "step": 27950 }, { "epoch": 1.5220436584917603, "grad_norm": 0.5586257192807768, "learning_rate": 2.8486459216605688e-05, "loss": 12.0284, "step": 27951 }, { "epoch": 1.5220981124883435, "grad_norm": 0.5386383629515035, "learning_rate": 2.848029565616607e-05, "loss": 11.6983, "step": 27952 }, { "epoch": 1.5221525664849265, "grad_norm": 0.6378948206143068, "learning_rate": 2.847413265187573e-05, "loss": 12.0341, "step": 27953 }, { "epoch": 1.5222070204815095, "grad_norm": 0.6468948500218227, "learning_rate": 2.8467970203782467e-05, "loss": 11.8029, "step": 27954 }, { "epoch": 1.5222614744780925, "grad_norm": 0.5759773580548446, "learning_rate": 2.846180831193429e-05, "loss": 11.8949, "step": 27955 }, { "epoch": 1.5223159284746755, "grad_norm": 0.6236305897816501, "learning_rate": 2.8455646976379068e-05, "loss": 11.9684, "step": 27956 }, { "epoch": 1.5223703824712587, "grad_norm": 0.6303340620397581, "learning_rate": 2.844948619716473e-05, "loss": 11.6356, "step": 27957 }, { "epoch": 1.5224248364678417, "grad_norm": 0.5853771207077727, "learning_rate": 2.844332597433921e-05, "loss": 11.7633, "step": 27958 }, { "epoch": 1.5224792904644247, "grad_norm": 0.5212751154680605, "learning_rate": 2.8437166307950368e-05, "loss": 11.9005, "step": 27959 }, { "epoch": 1.5225337444610076, "grad_norm": 0.5753545971375285, "learning_rate": 2.8431007198046144e-05, "loss": 11.848, "step": 27960 }, { "epoch": 1.5225881984575906, "grad_norm": 0.5314260131438653, "learning_rate": 2.8424848644674385e-05, "loss": 11.6512, "step": 27961 }, { "epoch": 1.5226426524541736, "grad_norm": 0.5560664066073572, "learning_rate": 2.841869064788303e-05, "loss": 11.8517, "step": 27962 }, { "epoch": 1.5226971064507566, "grad_norm": 0.5235610838337695, "learning_rate": 2.841253320771994e-05, "loss": 11.8405, "step": 27963 }, { "epoch": 1.5227515604473396, "grad_norm": 0.6135400382910773, "learning_rate": 2.8406376324232974e-05, "loss": 11.6442, "step": 27964 }, { "epoch": 1.5228060144439226, "grad_norm": 0.6182186923824203, "learning_rate": 2.8400219997470056e-05, "loss": 11.7927, "step": 27965 }, { "epoch": 1.5228604684405056, "grad_norm": 0.5503423157192251, "learning_rate": 2.839406422747901e-05, "loss": 11.8811, "step": 27966 }, { "epoch": 1.5229149224370886, "grad_norm": 0.5613776072349622, "learning_rate": 2.8387909014307722e-05, "loss": 11.8915, "step": 27967 }, { "epoch": 1.5229693764336716, "grad_norm": 0.5330578499553932, "learning_rate": 2.8381754358004097e-05, "loss": 11.7234, "step": 27968 }, { "epoch": 1.5230238304302546, "grad_norm": 0.556692852189812, "learning_rate": 2.837560025861593e-05, "loss": 11.6645, "step": 27969 }, { "epoch": 1.5230782844268376, "grad_norm": 0.529759914988266, "learning_rate": 2.836944671619114e-05, "loss": 11.8636, "step": 27970 }, { "epoch": 1.5231327384234206, "grad_norm": 0.5558955504395425, "learning_rate": 2.8363293730777517e-05, "loss": 11.9139, "step": 27971 }, { "epoch": 1.5231871924200036, "grad_norm": 0.5073574032386341, "learning_rate": 2.8357141302422962e-05, "loss": 11.8774, "step": 27972 }, { "epoch": 1.5232416464165865, "grad_norm": 0.5408827877794644, "learning_rate": 2.835098943117529e-05, "loss": 11.8919, "step": 27973 }, { "epoch": 1.5232961004131695, "grad_norm": 0.5485433703136475, "learning_rate": 2.8344838117082306e-05, "loss": 11.6821, "step": 27974 }, { "epoch": 1.5233505544097528, "grad_norm": 0.5524202991264017, "learning_rate": 2.8338687360191906e-05, "loss": 11.7799, "step": 27975 }, { "epoch": 1.5234050084063357, "grad_norm": 0.5407395598972906, "learning_rate": 2.8332537160551864e-05, "loss": 11.7938, "step": 27976 }, { "epoch": 1.5234594624029187, "grad_norm": 0.5985515785085527, "learning_rate": 2.8326387518210063e-05, "loss": 11.9475, "step": 27977 }, { "epoch": 1.5235139163995017, "grad_norm": 0.5743204461344149, "learning_rate": 2.8320238433214263e-05, "loss": 11.9857, "step": 27978 }, { "epoch": 1.5235683703960847, "grad_norm": 0.5341807793487325, "learning_rate": 2.8314089905612306e-05, "loss": 11.8742, "step": 27979 }, { "epoch": 1.5236228243926677, "grad_norm": 0.5275568888986025, "learning_rate": 2.8307941935452043e-05, "loss": 11.8501, "step": 27980 }, { "epoch": 1.523677278389251, "grad_norm": 0.5393791917241146, "learning_rate": 2.830179452278121e-05, "loss": 11.766, "step": 27981 }, { "epoch": 1.523731732385834, "grad_norm": 0.5768720464720104, "learning_rate": 2.8295647667647685e-05, "loss": 11.9164, "step": 27982 }, { "epoch": 1.523786186382417, "grad_norm": 0.5896321753178678, "learning_rate": 2.8289501370099225e-05, "loss": 11.7657, "step": 27983 }, { "epoch": 1.523840640379, "grad_norm": 0.5534454910447092, "learning_rate": 2.8283355630183593e-05, "loss": 11.8392, "step": 27984 }, { "epoch": 1.523895094375583, "grad_norm": 0.5516639208822186, "learning_rate": 2.8277210447948653e-05, "loss": 11.8244, "step": 27985 }, { "epoch": 1.523949548372166, "grad_norm": 0.686931899725569, "learning_rate": 2.8271065823442123e-05, "loss": 11.8538, "step": 27986 }, { "epoch": 1.524004002368749, "grad_norm": 0.5315660720617317, "learning_rate": 2.8264921756711837e-05, "loss": 11.9026, "step": 27987 }, { "epoch": 1.5240584563653319, "grad_norm": 0.6007169368610384, "learning_rate": 2.8258778247805517e-05, "loss": 11.8662, "step": 27988 }, { "epoch": 1.5241129103619149, "grad_norm": 0.5597611080799025, "learning_rate": 2.825263529677097e-05, "loss": 11.8968, "step": 27989 }, { "epoch": 1.5241673643584979, "grad_norm": 0.5920000808107831, "learning_rate": 2.8246492903655998e-05, "loss": 11.8771, "step": 27990 }, { "epoch": 1.5242218183550809, "grad_norm": 0.5544028723552854, "learning_rate": 2.8240351068508297e-05, "loss": 11.8372, "step": 27991 }, { "epoch": 1.5242762723516639, "grad_norm": 0.5826707811122956, "learning_rate": 2.82342097913757e-05, "loss": 11.8462, "step": 27992 }, { "epoch": 1.5243307263482468, "grad_norm": 0.5593061795049994, "learning_rate": 2.8228069072305907e-05, "loss": 11.7339, "step": 27993 }, { "epoch": 1.5243851803448298, "grad_norm": 0.555009710492283, "learning_rate": 2.8221928911346663e-05, "loss": 11.7679, "step": 27994 }, { "epoch": 1.5244396343414128, "grad_norm": 0.588777087305147, "learning_rate": 2.821578930854577e-05, "loss": 11.8769, "step": 27995 }, { "epoch": 1.5244940883379958, "grad_norm": 0.5612662130441314, "learning_rate": 2.8209650263950905e-05, "loss": 11.6895, "step": 27996 }, { "epoch": 1.5245485423345788, "grad_norm": 0.5517453495317268, "learning_rate": 2.8203511777609858e-05, "loss": 11.7565, "step": 27997 }, { "epoch": 1.524602996331162, "grad_norm": 0.5360699191588418, "learning_rate": 2.819737384957033e-05, "loss": 11.8972, "step": 27998 }, { "epoch": 1.524657450327745, "grad_norm": 0.5216156089956738, "learning_rate": 2.819123647988009e-05, "loss": 11.7221, "step": 27999 }, { "epoch": 1.524711904324328, "grad_norm": 0.5835597939173611, "learning_rate": 2.81850996685868e-05, "loss": 11.8809, "step": 28000 }, { "epoch": 1.524766358320911, "grad_norm": 0.5840532266995864, "learning_rate": 2.8178963415738225e-05, "loss": 11.8049, "step": 28001 }, { "epoch": 1.524820812317494, "grad_norm": 0.6183406611645063, "learning_rate": 2.81728277213821e-05, "loss": 11.8851, "step": 28002 }, { "epoch": 1.524875266314077, "grad_norm": 0.5232583144111126, "learning_rate": 2.816669258556611e-05, "loss": 11.8841, "step": 28003 }, { "epoch": 1.5249297203106602, "grad_norm": 0.5773888316303224, "learning_rate": 2.8160558008337977e-05, "loss": 11.8857, "step": 28004 }, { "epoch": 1.5249841743072432, "grad_norm": 0.576444798531799, "learning_rate": 2.815442398974536e-05, "loss": 11.8206, "step": 28005 }, { "epoch": 1.5250386283038262, "grad_norm": 0.475299752941559, "learning_rate": 2.8148290529835987e-05, "loss": 11.8208, "step": 28006 }, { "epoch": 1.5250930823004092, "grad_norm": 0.5790512256596804, "learning_rate": 2.8142157628657583e-05, "loss": 11.8499, "step": 28007 }, { "epoch": 1.5251475362969922, "grad_norm": 0.5234532016890456, "learning_rate": 2.8136025286257784e-05, "loss": 11.8159, "step": 28008 }, { "epoch": 1.5252019902935752, "grad_norm": 0.5630963999270417, "learning_rate": 2.8129893502684334e-05, "loss": 11.8672, "step": 28009 }, { "epoch": 1.5252564442901582, "grad_norm": 0.5455318970182873, "learning_rate": 2.8123762277984856e-05, "loss": 11.8057, "step": 28010 }, { "epoch": 1.5253108982867412, "grad_norm": 0.5171199728328628, "learning_rate": 2.8117631612207084e-05, "loss": 11.721, "step": 28011 }, { "epoch": 1.5253653522833241, "grad_norm": 0.5221933409063921, "learning_rate": 2.811150150539863e-05, "loss": 11.8984, "step": 28012 }, { "epoch": 1.5254198062799071, "grad_norm": 0.5187042053234086, "learning_rate": 2.8105371957607228e-05, "loss": 11.8053, "step": 28013 }, { "epoch": 1.5254742602764901, "grad_norm": 0.5429878060486782, "learning_rate": 2.8099242968880512e-05, "loss": 11.8103, "step": 28014 }, { "epoch": 1.5255287142730731, "grad_norm": 0.6503001961805629, "learning_rate": 2.809311453926612e-05, "loss": 11.9886, "step": 28015 }, { "epoch": 1.5255831682696561, "grad_norm": 0.5230869261194098, "learning_rate": 2.808698666881171e-05, "loss": 11.8418, "step": 28016 }, { "epoch": 1.5256376222662391, "grad_norm": 0.5723371956038334, "learning_rate": 2.8080859357564993e-05, "loss": 11.9124, "step": 28017 }, { "epoch": 1.525692076262822, "grad_norm": 0.5966982453619117, "learning_rate": 2.807473260557355e-05, "loss": 11.9901, "step": 28018 }, { "epoch": 1.525746530259405, "grad_norm": 0.49105321129198276, "learning_rate": 2.806860641288507e-05, "loss": 11.849, "step": 28019 }, { "epoch": 1.525800984255988, "grad_norm": 0.5817851378221217, "learning_rate": 2.806248077954714e-05, "loss": 11.7378, "step": 28020 }, { "epoch": 1.525855438252571, "grad_norm": 0.5241202288140655, "learning_rate": 2.8056355705607452e-05, "loss": 11.7037, "step": 28021 }, { "epoch": 1.5259098922491543, "grad_norm": 0.49166570019066175, "learning_rate": 2.8050231191113573e-05, "loss": 11.795, "step": 28022 }, { "epoch": 1.5259643462457373, "grad_norm": 0.5136362026482226, "learning_rate": 2.8044107236113203e-05, "loss": 11.8897, "step": 28023 }, { "epoch": 1.5260188002423203, "grad_norm": 0.5261733293137251, "learning_rate": 2.8037983840653913e-05, "loss": 11.8093, "step": 28024 }, { "epoch": 1.5260732542389033, "grad_norm": 0.4760619588143287, "learning_rate": 2.80318610047833e-05, "loss": 11.7524, "step": 28025 }, { "epoch": 1.5261277082354863, "grad_norm": 0.5216907813608113, "learning_rate": 2.8025738728549046e-05, "loss": 11.8057, "step": 28026 }, { "epoch": 1.5261821622320695, "grad_norm": 0.4826666149391475, "learning_rate": 2.8019617011998677e-05, "loss": 11.759, "step": 28027 }, { "epoch": 1.5262366162286525, "grad_norm": 0.5486646520149847, "learning_rate": 2.8013495855179837e-05, "loss": 11.8722, "step": 28028 }, { "epoch": 1.5262910702252355, "grad_norm": 0.5339442745290974, "learning_rate": 2.8007375258140156e-05, "loss": 11.8051, "step": 28029 }, { "epoch": 1.5263455242218185, "grad_norm": 0.5702150244759141, "learning_rate": 2.8001255220927168e-05, "loss": 11.8569, "step": 28030 }, { "epoch": 1.5263999782184015, "grad_norm": 0.5393840986818391, "learning_rate": 2.7995135743588528e-05, "loss": 11.8107, "step": 28031 }, { "epoch": 1.5264544322149844, "grad_norm": 0.5263479220858477, "learning_rate": 2.7989016826171755e-05, "loss": 11.9613, "step": 28032 }, { "epoch": 1.5265088862115674, "grad_norm": 0.5609368998722691, "learning_rate": 2.7982898468724493e-05, "loss": 11.9058, "step": 28033 }, { "epoch": 1.5265633402081504, "grad_norm": 0.5145333261289492, "learning_rate": 2.797678067129429e-05, "loss": 11.8689, "step": 28034 }, { "epoch": 1.5266177942047334, "grad_norm": 0.5112366708808339, "learning_rate": 2.797066343392869e-05, "loss": 11.7795, "step": 28035 }, { "epoch": 1.5266722482013164, "grad_norm": 0.5292153657436052, "learning_rate": 2.7964546756675313e-05, "loss": 11.8977, "step": 28036 }, { "epoch": 1.5267267021978994, "grad_norm": 0.5187700953438499, "learning_rate": 2.795843063958168e-05, "loss": 11.885, "step": 28037 }, { "epoch": 1.5267811561944824, "grad_norm": 0.5260785610507742, "learning_rate": 2.7952315082695367e-05, "loss": 11.7061, "step": 28038 }, { "epoch": 1.5268356101910654, "grad_norm": 0.5926034175885343, "learning_rate": 2.794620008606397e-05, "loss": 11.8106, "step": 28039 }, { "epoch": 1.5268900641876484, "grad_norm": 0.5698490004315689, "learning_rate": 2.794008564973497e-05, "loss": 11.8, "step": 28040 }, { "epoch": 1.5269445181842314, "grad_norm": 0.6678278354852758, "learning_rate": 2.7933971773755986e-05, "loss": 11.8865, "step": 28041 }, { "epoch": 1.5269989721808144, "grad_norm": 0.5213235120952687, "learning_rate": 2.7927858458174483e-05, "loss": 11.752, "step": 28042 }, { "epoch": 1.5270534261773974, "grad_norm": 0.5463359333407151, "learning_rate": 2.7921745703038072e-05, "loss": 11.6669, "step": 28043 }, { "epoch": 1.5271078801739804, "grad_norm": 0.5344727131368201, "learning_rate": 2.7915633508394258e-05, "loss": 11.8769, "step": 28044 }, { "epoch": 1.5271623341705636, "grad_norm": 0.5402139975432181, "learning_rate": 2.7909521874290524e-05, "loss": 11.9086, "step": 28045 }, { "epoch": 1.5272167881671466, "grad_norm": 0.6095364033149826, "learning_rate": 2.7903410800774478e-05, "loss": 11.901, "step": 28046 }, { "epoch": 1.5272712421637296, "grad_norm": 0.5795695853077685, "learning_rate": 2.7897300287893568e-05, "loss": 11.9074, "step": 28047 }, { "epoch": 1.5273256961603126, "grad_norm": 0.541051857417764, "learning_rate": 2.7891190335695373e-05, "loss": 11.8282, "step": 28048 }, { "epoch": 1.5273801501568955, "grad_norm": 0.6023298925941373, "learning_rate": 2.788508094422735e-05, "loss": 11.9771, "step": 28049 }, { "epoch": 1.5274346041534785, "grad_norm": 0.590164425957543, "learning_rate": 2.7878972113537017e-05, "loss": 11.8233, "step": 28050 }, { "epoch": 1.5274890581500618, "grad_norm": 0.5883901291502481, "learning_rate": 2.787286384367194e-05, "loss": 11.859, "step": 28051 }, { "epoch": 1.5275435121466447, "grad_norm": 0.5598461273887382, "learning_rate": 2.7866756134679528e-05, "loss": 11.8201, "step": 28052 }, { "epoch": 1.5275979661432277, "grad_norm": 0.5510855141903137, "learning_rate": 2.786064898660734e-05, "loss": 11.9274, "step": 28053 }, { "epoch": 1.5276524201398107, "grad_norm": 0.6171418019889008, "learning_rate": 2.785454239950286e-05, "loss": 11.9351, "step": 28054 }, { "epoch": 1.5277068741363937, "grad_norm": 0.5441721929752072, "learning_rate": 2.784843637341351e-05, "loss": 11.8461, "step": 28055 }, { "epoch": 1.5277613281329767, "grad_norm": 0.550427509481184, "learning_rate": 2.784233090838686e-05, "loss": 11.7901, "step": 28056 }, { "epoch": 1.5278157821295597, "grad_norm": 0.5388240833165424, "learning_rate": 2.7836226004470322e-05, "loss": 11.9641, "step": 28057 }, { "epoch": 1.5278702361261427, "grad_norm": 0.554171740403026, "learning_rate": 2.7830121661711405e-05, "loss": 11.837, "step": 28058 }, { "epoch": 1.5279246901227257, "grad_norm": 0.5383805673000276, "learning_rate": 2.782401788015755e-05, "loss": 11.8651, "step": 28059 }, { "epoch": 1.5279791441193087, "grad_norm": 0.5657370833017105, "learning_rate": 2.781791465985626e-05, "loss": 11.8957, "step": 28060 }, { "epoch": 1.5280335981158917, "grad_norm": 0.5216301810143582, "learning_rate": 2.781181200085494e-05, "loss": 11.7694, "step": 28061 }, { "epoch": 1.5280880521124747, "grad_norm": 0.4991395494787023, "learning_rate": 2.7805709903201073e-05, "loss": 11.8382, "step": 28062 }, { "epoch": 1.5281425061090577, "grad_norm": 0.5198154381445942, "learning_rate": 2.7799608366942143e-05, "loss": 11.8512, "step": 28063 }, { "epoch": 1.5281969601056407, "grad_norm": 0.5222690177492786, "learning_rate": 2.779350739212557e-05, "loss": 11.8377, "step": 28064 }, { "epoch": 1.5282514141022236, "grad_norm": 0.4910046177255441, "learning_rate": 2.778740697879877e-05, "loss": 11.7966, "step": 28065 }, { "epoch": 1.5283058680988066, "grad_norm": 0.5890142611054503, "learning_rate": 2.7781307127009226e-05, "loss": 11.8705, "step": 28066 }, { "epoch": 1.5283603220953896, "grad_norm": 0.5150666867395313, "learning_rate": 2.7775207836804317e-05, "loss": 11.635, "step": 28067 }, { "epoch": 1.5284147760919728, "grad_norm": 0.5877921587720709, "learning_rate": 2.7769109108231528e-05, "loss": 11.8718, "step": 28068 }, { "epoch": 1.5284692300885558, "grad_norm": 0.5905701907420984, "learning_rate": 2.776301094133824e-05, "loss": 11.8247, "step": 28069 }, { "epoch": 1.5285236840851388, "grad_norm": 0.5319275146743632, "learning_rate": 2.7756913336171917e-05, "loss": 11.885, "step": 28070 }, { "epoch": 1.5285781380817218, "grad_norm": 0.5710362753324053, "learning_rate": 2.7750816292779926e-05, "loss": 11.9469, "step": 28071 }, { "epoch": 1.5286325920783048, "grad_norm": 0.5839829945359638, "learning_rate": 2.7744719811209695e-05, "loss": 11.8371, "step": 28072 }, { "epoch": 1.5286870460748878, "grad_norm": 0.5087264130086645, "learning_rate": 2.7738623891508688e-05, "loss": 11.7981, "step": 28073 }, { "epoch": 1.528741500071471, "grad_norm": 0.6097435097493412, "learning_rate": 2.7732528533724245e-05, "loss": 12.018, "step": 28074 }, { "epoch": 1.528795954068054, "grad_norm": 0.581255067665826, "learning_rate": 2.772643373790378e-05, "loss": 11.9142, "step": 28075 }, { "epoch": 1.528850408064637, "grad_norm": 0.5529909667015764, "learning_rate": 2.772033950409466e-05, "loss": 11.6942, "step": 28076 }, { "epoch": 1.52890486206122, "grad_norm": 0.5507846355961354, "learning_rate": 2.7714245832344298e-05, "loss": 11.9129, "step": 28077 }, { "epoch": 1.528959316057803, "grad_norm": 0.5263795313275622, "learning_rate": 2.770815272270012e-05, "loss": 11.8114, "step": 28078 }, { "epoch": 1.529013770054386, "grad_norm": 0.5915882574630816, "learning_rate": 2.7702060175209433e-05, "loss": 11.947, "step": 28079 }, { "epoch": 1.529068224050969, "grad_norm": 0.5420336359228751, "learning_rate": 2.7695968189919684e-05, "loss": 11.6884, "step": 28080 }, { "epoch": 1.529122678047552, "grad_norm": 0.5761177601026898, "learning_rate": 2.7689876766878177e-05, "loss": 11.9456, "step": 28081 }, { "epoch": 1.529177132044135, "grad_norm": 0.6179788515082738, "learning_rate": 2.7683785906132353e-05, "loss": 11.9586, "step": 28082 }, { "epoch": 1.529231586040718, "grad_norm": 0.5376348725469609, "learning_rate": 2.7677695607729505e-05, "loss": 11.7374, "step": 28083 }, { "epoch": 1.529286040037301, "grad_norm": 0.5875898380240233, "learning_rate": 2.7671605871717044e-05, "loss": 11.8818, "step": 28084 }, { "epoch": 1.529340494033884, "grad_norm": 0.5319121946915005, "learning_rate": 2.7665516698142314e-05, "loss": 11.812, "step": 28085 }, { "epoch": 1.529394948030467, "grad_norm": 0.5232715675086153, "learning_rate": 2.765942808705263e-05, "loss": 11.6767, "step": 28086 }, { "epoch": 1.52944940202705, "grad_norm": 0.587033249086594, "learning_rate": 2.7653340038495358e-05, "loss": 11.9199, "step": 28087 }, { "epoch": 1.529503856023633, "grad_norm": 0.5670701469594571, "learning_rate": 2.7647252552517878e-05, "loss": 11.8395, "step": 28088 }, { "epoch": 1.529558310020216, "grad_norm": 0.5456216942507206, "learning_rate": 2.7641165629167464e-05, "loss": 11.8476, "step": 28089 }, { "epoch": 1.529612764016799, "grad_norm": 0.6133476880784436, "learning_rate": 2.7635079268491516e-05, "loss": 11.7352, "step": 28090 }, { "epoch": 1.5296672180133821, "grad_norm": 0.5689587520370178, "learning_rate": 2.762899347053729e-05, "loss": 11.9563, "step": 28091 }, { "epoch": 1.5297216720099651, "grad_norm": 0.5608064813813687, "learning_rate": 2.7622908235352185e-05, "loss": 11.9223, "step": 28092 }, { "epoch": 1.529776126006548, "grad_norm": 0.5265785301100568, "learning_rate": 2.7616823562983453e-05, "loss": 11.8348, "step": 28093 }, { "epoch": 1.529830580003131, "grad_norm": 0.5261958437030664, "learning_rate": 2.761073945347846e-05, "loss": 11.901, "step": 28094 }, { "epoch": 1.529885033999714, "grad_norm": 0.5544943092947802, "learning_rate": 2.7604655906884502e-05, "loss": 11.8705, "step": 28095 }, { "epoch": 1.529939487996297, "grad_norm": 0.5932595399334087, "learning_rate": 2.7598572923248857e-05, "loss": 11.9126, "step": 28096 }, { "epoch": 1.5299939419928803, "grad_norm": 0.5241433903541007, "learning_rate": 2.759249050261887e-05, "loss": 11.8082, "step": 28097 }, { "epoch": 1.5300483959894633, "grad_norm": 0.508394728574014, "learning_rate": 2.7586408645041796e-05, "loss": 11.8364, "step": 28098 }, { "epoch": 1.5301028499860463, "grad_norm": 0.5660475859610954, "learning_rate": 2.758032735056495e-05, "loss": 11.9086, "step": 28099 }, { "epoch": 1.5301573039826293, "grad_norm": 0.5854390292446331, "learning_rate": 2.7574246619235654e-05, "loss": 11.892, "step": 28100 }, { "epoch": 1.5302117579792123, "grad_norm": 0.6135300347394426, "learning_rate": 2.756816645110113e-05, "loss": 11.8013, "step": 28101 }, { "epoch": 1.5302662119757953, "grad_norm": 0.47022733445491777, "learning_rate": 2.7562086846208723e-05, "loss": 11.7453, "step": 28102 }, { "epoch": 1.5303206659723783, "grad_norm": 0.5604935484080049, "learning_rate": 2.7556007804605654e-05, "loss": 11.8919, "step": 28103 }, { "epoch": 1.5303751199689613, "grad_norm": 0.5560539287446726, "learning_rate": 2.754992932633924e-05, "loss": 11.9655, "step": 28104 }, { "epoch": 1.5304295739655442, "grad_norm": 0.5035252030556944, "learning_rate": 2.7543851411456722e-05, "loss": 11.845, "step": 28105 }, { "epoch": 1.5304840279621272, "grad_norm": 0.5998247291009688, "learning_rate": 2.753777406000534e-05, "loss": 11.8718, "step": 28106 }, { "epoch": 1.5305384819587102, "grad_norm": 0.6277000221854228, "learning_rate": 2.7531697272032408e-05, "loss": 11.8319, "step": 28107 }, { "epoch": 1.5305929359552932, "grad_norm": 0.5311575974019177, "learning_rate": 2.7525621047585115e-05, "loss": 11.862, "step": 28108 }, { "epoch": 1.5306473899518762, "grad_norm": 0.5240917378904438, "learning_rate": 2.7519545386710787e-05, "loss": 11.6807, "step": 28109 }, { "epoch": 1.5307018439484592, "grad_norm": 0.5482481864827582, "learning_rate": 2.75134702894566e-05, "loss": 11.7755, "step": 28110 }, { "epoch": 1.5307562979450422, "grad_norm": 0.5199580600118068, "learning_rate": 2.7507395755869813e-05, "loss": 11.7715, "step": 28111 }, { "epoch": 1.5308107519416252, "grad_norm": 0.46853550384032233, "learning_rate": 2.7501321785997713e-05, "loss": 11.8175, "step": 28112 }, { "epoch": 1.5308652059382082, "grad_norm": 0.5378687057721647, "learning_rate": 2.7495248379887462e-05, "loss": 11.794, "step": 28113 }, { "epoch": 1.5309196599347912, "grad_norm": 0.6076040383716326, "learning_rate": 2.7489175537586342e-05, "loss": 11.9345, "step": 28114 }, { "epoch": 1.5309741139313744, "grad_norm": 0.5302827915958461, "learning_rate": 2.748310325914155e-05, "loss": 11.78, "step": 28115 }, { "epoch": 1.5310285679279574, "grad_norm": 0.5206254842623453, "learning_rate": 2.7477031544600297e-05, "loss": 11.844, "step": 28116 }, { "epoch": 1.5310830219245404, "grad_norm": 0.5782492612681784, "learning_rate": 2.7470960394009826e-05, "loss": 11.6907, "step": 28117 }, { "epoch": 1.5311374759211234, "grad_norm": 0.5816187740888729, "learning_rate": 2.7464889807417293e-05, "loss": 11.7165, "step": 28118 }, { "epoch": 1.5311919299177064, "grad_norm": 0.5147939602641817, "learning_rate": 2.7458819784869983e-05, "loss": 11.7735, "step": 28119 }, { "epoch": 1.5312463839142894, "grad_norm": 0.5354121280135299, "learning_rate": 2.745275032641502e-05, "loss": 11.6952, "step": 28120 }, { "epoch": 1.5313008379108726, "grad_norm": 0.5344169272867186, "learning_rate": 2.7446681432099642e-05, "loss": 11.8144, "step": 28121 }, { "epoch": 1.5313552919074556, "grad_norm": 0.5251465455208807, "learning_rate": 2.744061310197106e-05, "loss": 11.766, "step": 28122 }, { "epoch": 1.5314097459040386, "grad_norm": 0.5721180654907436, "learning_rate": 2.743454533607641e-05, "loss": 11.814, "step": 28123 }, { "epoch": 1.5314641999006215, "grad_norm": 0.5198986708643252, "learning_rate": 2.7428478134462963e-05, "loss": 11.801, "step": 28124 }, { "epoch": 1.5315186538972045, "grad_norm": 0.6349634894030276, "learning_rate": 2.742241149717779e-05, "loss": 11.8605, "step": 28125 }, { "epoch": 1.5315731078937875, "grad_norm": 0.5476776064162383, "learning_rate": 2.7416345424268107e-05, "loss": 11.8861, "step": 28126 }, { "epoch": 1.5316275618903705, "grad_norm": 0.5389532605450704, "learning_rate": 2.7410279915781123e-05, "loss": 11.9331, "step": 28127 }, { "epoch": 1.5316820158869535, "grad_norm": 0.7137800699458078, "learning_rate": 2.7404214971763952e-05, "loss": 11.8757, "step": 28128 }, { "epoch": 1.5317364698835365, "grad_norm": 0.6023696650918151, "learning_rate": 2.7398150592263805e-05, "loss": 11.7711, "step": 28129 }, { "epoch": 1.5317909238801195, "grad_norm": 0.5275100704514966, "learning_rate": 2.7392086777327787e-05, "loss": 11.9261, "step": 28130 }, { "epoch": 1.5318453778767025, "grad_norm": 0.5377998200518751, "learning_rate": 2.738602352700311e-05, "loss": 11.8072, "step": 28131 }, { "epoch": 1.5318998318732855, "grad_norm": 0.5857096227630232, "learning_rate": 2.737996084133686e-05, "loss": 11.9087, "step": 28132 }, { "epoch": 1.5319542858698685, "grad_norm": 0.608249892848749, "learning_rate": 2.7373898720376212e-05, "loss": 11.8523, "step": 28133 }, { "epoch": 1.5320087398664515, "grad_norm": 0.5743496564238021, "learning_rate": 2.7367837164168375e-05, "loss": 11.7277, "step": 28134 }, { "epoch": 1.5320631938630345, "grad_norm": 0.5774906890183263, "learning_rate": 2.7361776172760346e-05, "loss": 11.8523, "step": 28135 }, { "epoch": 1.5321176478596175, "grad_norm": 0.5693847254039437, "learning_rate": 2.7355715746199327e-05, "loss": 11.8029, "step": 28136 }, { "epoch": 1.5321721018562005, "grad_norm": 0.48642514926955, "learning_rate": 2.734965588453248e-05, "loss": 11.7881, "step": 28137 }, { "epoch": 1.5322265558527837, "grad_norm": 0.5208125835512221, "learning_rate": 2.7343596587806865e-05, "loss": 11.769, "step": 28138 }, { "epoch": 1.5322810098493667, "grad_norm": 0.5756246884970783, "learning_rate": 2.7337537856069652e-05, "loss": 11.8422, "step": 28139 }, { "epoch": 1.5323354638459497, "grad_norm": 0.5510921702394357, "learning_rate": 2.7331479689367888e-05, "loss": 11.8868, "step": 28140 }, { "epoch": 1.5323899178425326, "grad_norm": 0.6054078622464308, "learning_rate": 2.7325422087748776e-05, "loss": 11.8649, "step": 28141 }, { "epoch": 1.5324443718391156, "grad_norm": 0.5215026850061641, "learning_rate": 2.7319365051259326e-05, "loss": 11.848, "step": 28142 }, { "epoch": 1.5324988258356986, "grad_norm": 0.5448259488081963, "learning_rate": 2.7313308579946684e-05, "loss": 11.8352, "step": 28143 }, { "epoch": 1.5325532798322818, "grad_norm": 0.536583365184915, "learning_rate": 2.7307252673858007e-05, "loss": 11.7611, "step": 28144 }, { "epoch": 1.5326077338288648, "grad_norm": 0.5328057211891392, "learning_rate": 2.7301197333040264e-05, "loss": 11.8555, "step": 28145 }, { "epoch": 1.5326621878254478, "grad_norm": 0.5382358859349715, "learning_rate": 2.729514255754063e-05, "loss": 11.8034, "step": 28146 }, { "epoch": 1.5327166418220308, "grad_norm": 0.54372606929296, "learning_rate": 2.7289088347406135e-05, "loss": 11.8172, "step": 28147 }, { "epoch": 1.5327710958186138, "grad_norm": 0.5071314621892234, "learning_rate": 2.7283034702683885e-05, "loss": 11.771, "step": 28148 }, { "epoch": 1.5328255498151968, "grad_norm": 0.47760232581095086, "learning_rate": 2.7276981623420974e-05, "loss": 11.7506, "step": 28149 }, { "epoch": 1.5328800038117798, "grad_norm": 0.5779126067885992, "learning_rate": 2.7270929109664423e-05, "loss": 11.776, "step": 28150 }, { "epoch": 1.5329344578083628, "grad_norm": 0.532756986419023, "learning_rate": 2.7264877161461343e-05, "loss": 11.8969, "step": 28151 }, { "epoch": 1.5329889118049458, "grad_norm": 0.5301360810526304, "learning_rate": 2.7258825778858764e-05, "loss": 11.909, "step": 28152 }, { "epoch": 1.5330433658015288, "grad_norm": 0.5409486462875552, "learning_rate": 2.725277496190377e-05, "loss": 11.855, "step": 28153 }, { "epoch": 1.5330978197981118, "grad_norm": 0.5483761385219517, "learning_rate": 2.724672471064341e-05, "loss": 11.8397, "step": 28154 }, { "epoch": 1.5331522737946948, "grad_norm": 0.5196613633243852, "learning_rate": 2.7240675025124684e-05, "loss": 11.8376, "step": 28155 }, { "epoch": 1.5332067277912778, "grad_norm": 0.5921240861695857, "learning_rate": 2.7234625905394697e-05, "loss": 11.9667, "step": 28156 }, { "epoch": 1.5332611817878607, "grad_norm": 0.5912985763166101, "learning_rate": 2.7228577351500427e-05, "loss": 11.991, "step": 28157 }, { "epoch": 1.5333156357844437, "grad_norm": 0.54196239765597, "learning_rate": 2.7222529363488992e-05, "loss": 11.7573, "step": 28158 }, { "epoch": 1.5333700897810267, "grad_norm": 0.5850025102559534, "learning_rate": 2.7216481941407323e-05, "loss": 11.9364, "step": 28159 }, { "epoch": 1.5334245437776097, "grad_norm": 0.5977439025733949, "learning_rate": 2.721043508530251e-05, "loss": 11.9675, "step": 28160 }, { "epoch": 1.533478997774193, "grad_norm": 0.5388939351892333, "learning_rate": 2.720438879522158e-05, "loss": 11.8457, "step": 28161 }, { "epoch": 1.533533451770776, "grad_norm": 0.5213198958516342, "learning_rate": 2.7198343071211508e-05, "loss": 11.789, "step": 28162 }, { "epoch": 1.533587905767359, "grad_norm": 0.5201233933753517, "learning_rate": 2.7192297913319363e-05, "loss": 11.7326, "step": 28163 }, { "epoch": 1.533642359763942, "grad_norm": 0.5696029144591914, "learning_rate": 2.7186253321592114e-05, "loss": 11.8743, "step": 28164 }, { "epoch": 1.533696813760525, "grad_norm": 0.5661831990271644, "learning_rate": 2.718020929607673e-05, "loss": 11.5473, "step": 28165 }, { "epoch": 1.533751267757108, "grad_norm": 0.5733737118565293, "learning_rate": 2.7174165836820297e-05, "loss": 11.897, "step": 28166 }, { "epoch": 1.5338057217536911, "grad_norm": 0.5742035351847482, "learning_rate": 2.7168122943869723e-05, "loss": 11.8856, "step": 28167 }, { "epoch": 1.5338601757502741, "grad_norm": 0.5999607268231191, "learning_rate": 2.716208061727208e-05, "loss": 11.8658, "step": 28168 }, { "epoch": 1.533914629746857, "grad_norm": 0.5218650452531289, "learning_rate": 2.715603885707427e-05, "loss": 11.7596, "step": 28169 }, { "epoch": 1.53396908374344, "grad_norm": 0.5579045882793019, "learning_rate": 2.7149997663323323e-05, "loss": 11.8881, "step": 28170 }, { "epoch": 1.534023537740023, "grad_norm": 0.5119751726399356, "learning_rate": 2.7143957036066238e-05, "loss": 11.8144, "step": 28171 }, { "epoch": 1.534077991736606, "grad_norm": 0.528054243795056, "learning_rate": 2.713791697534994e-05, "loss": 11.7638, "step": 28172 }, { "epoch": 1.534132445733189, "grad_norm": 0.6120665347017282, "learning_rate": 2.7131877481221448e-05, "loss": 11.6907, "step": 28173 }, { "epoch": 1.534186899729772, "grad_norm": 0.5020852473806008, "learning_rate": 2.712583855372769e-05, "loss": 11.791, "step": 28174 }, { "epoch": 1.534241353726355, "grad_norm": 0.5512264992726531, "learning_rate": 2.71198001929156e-05, "loss": 11.7513, "step": 28175 }, { "epoch": 1.534295807722938, "grad_norm": 0.5514363641125494, "learning_rate": 2.71137623988322e-05, "loss": 11.8073, "step": 28176 }, { "epoch": 1.534350261719521, "grad_norm": 0.5312078707535559, "learning_rate": 2.7107725171524378e-05, "loss": 11.8327, "step": 28177 }, { "epoch": 1.534404715716104, "grad_norm": 0.5247512696484431, "learning_rate": 2.7101688511039137e-05, "loss": 11.7713, "step": 28178 }, { "epoch": 1.534459169712687, "grad_norm": 0.5459385675423508, "learning_rate": 2.7095652417423357e-05, "loss": 11.8542, "step": 28179 }, { "epoch": 1.53451362370927, "grad_norm": 0.5749601248779559, "learning_rate": 2.7089616890724044e-05, "loss": 11.8236, "step": 28180 }, { "epoch": 1.534568077705853, "grad_norm": 0.5664622210684159, "learning_rate": 2.7083581930988065e-05, "loss": 11.8597, "step": 28181 }, { "epoch": 1.534622531702436, "grad_norm": 0.5690130145344745, "learning_rate": 2.7077547538262392e-05, "loss": 11.6244, "step": 28182 }, { "epoch": 1.534676985699019, "grad_norm": 0.6098614195320473, "learning_rate": 2.7071513712593954e-05, "loss": 11.8586, "step": 28183 }, { "epoch": 1.534731439695602, "grad_norm": 0.5345772468011127, "learning_rate": 2.7065480454029635e-05, "loss": 11.8058, "step": 28184 }, { "epoch": 1.5347858936921852, "grad_norm": 0.5004064107412757, "learning_rate": 2.7059447762616396e-05, "loss": 11.692, "step": 28185 }, { "epoch": 1.5348403476887682, "grad_norm": 0.5509148116583732, "learning_rate": 2.7053415638401125e-05, "loss": 11.8781, "step": 28186 }, { "epoch": 1.5348948016853512, "grad_norm": 0.5388835013163247, "learning_rate": 2.7047384081430706e-05, "loss": 11.9216, "step": 28187 }, { "epoch": 1.5349492556819342, "grad_norm": 0.5831123752442173, "learning_rate": 2.7041353091752076e-05, "loss": 11.878, "step": 28188 }, { "epoch": 1.5350037096785172, "grad_norm": 0.5942715795712129, "learning_rate": 2.70353226694121e-05, "loss": 11.8724, "step": 28189 }, { "epoch": 1.5350581636751004, "grad_norm": 0.576071406714683, "learning_rate": 2.7029292814457717e-05, "loss": 11.8425, "step": 28190 }, { "epoch": 1.5351126176716834, "grad_norm": 0.5763627257223947, "learning_rate": 2.702326352693576e-05, "loss": 11.7898, "step": 28191 }, { "epoch": 1.5351670716682664, "grad_norm": 0.543371020897107, "learning_rate": 2.7017234806893143e-05, "loss": 11.8641, "step": 28192 }, { "epoch": 1.5352215256648494, "grad_norm": 0.6112162290672735, "learning_rate": 2.7011206654376787e-05, "loss": 11.8811, "step": 28193 }, { "epoch": 1.5352759796614324, "grad_norm": 0.5228178862268024, "learning_rate": 2.7005179069433482e-05, "loss": 11.7781, "step": 28194 }, { "epoch": 1.5353304336580154, "grad_norm": 0.5842052961124035, "learning_rate": 2.6999152052110222e-05, "loss": 11.9354, "step": 28195 }, { "epoch": 1.5353848876545984, "grad_norm": 0.5523614778190836, "learning_rate": 2.6993125602453728e-05, "loss": 11.8795, "step": 28196 }, { "epoch": 1.5354393416511813, "grad_norm": 0.561699747227993, "learning_rate": 2.6987099720510933e-05, "loss": 11.7798, "step": 28197 }, { "epoch": 1.5354937956477643, "grad_norm": 0.5862358470888648, "learning_rate": 2.6981074406328732e-05, "loss": 11.894, "step": 28198 }, { "epoch": 1.5355482496443473, "grad_norm": 0.5816568902861724, "learning_rate": 2.697504965995391e-05, "loss": 11.6072, "step": 28199 }, { "epoch": 1.5356027036409303, "grad_norm": 0.5415710531098747, "learning_rate": 2.6969025481433375e-05, "loss": 11.8295, "step": 28200 }, { "epoch": 1.5356571576375133, "grad_norm": 0.5528077817902902, "learning_rate": 2.6963001870813907e-05, "loss": 11.9264, "step": 28201 }, { "epoch": 1.5357116116340963, "grad_norm": 0.5498728942280875, "learning_rate": 2.695697882814243e-05, "loss": 11.808, "step": 28202 }, { "epoch": 1.5357660656306793, "grad_norm": 0.5274560242694658, "learning_rate": 2.6950956353465697e-05, "loss": 11.847, "step": 28203 }, { "epoch": 1.5358205196272623, "grad_norm": 0.7194185033400121, "learning_rate": 2.6944934446830584e-05, "loss": 11.9278, "step": 28204 }, { "epoch": 1.5358749736238453, "grad_norm": 0.5294529582944018, "learning_rate": 2.693891310828396e-05, "loss": 11.8302, "step": 28205 }, { "epoch": 1.5359294276204283, "grad_norm": 0.5764854018767049, "learning_rate": 2.693289233787255e-05, "loss": 11.8552, "step": 28206 }, { "epoch": 1.5359838816170113, "grad_norm": 0.5182438175989933, "learning_rate": 2.6926872135643244e-05, "loss": 11.7975, "step": 28207 }, { "epoch": 1.5360383356135945, "grad_norm": 0.5586890704804036, "learning_rate": 2.6920852501642812e-05, "loss": 11.9141, "step": 28208 }, { "epoch": 1.5360927896101775, "grad_norm": 0.5515382589733375, "learning_rate": 2.6914833435918085e-05, "loss": 11.6309, "step": 28209 }, { "epoch": 1.5361472436067605, "grad_norm": 0.5639640398732918, "learning_rate": 2.6908814938515904e-05, "loss": 11.7213, "step": 28210 }, { "epoch": 1.5362016976033435, "grad_norm": 0.5368623491940676, "learning_rate": 2.6902797009482992e-05, "loss": 11.7836, "step": 28211 }, { "epoch": 1.5362561515999265, "grad_norm": 0.5347723030275935, "learning_rate": 2.6896779648866234e-05, "loss": 11.7712, "step": 28212 }, { "epoch": 1.5363106055965094, "grad_norm": 0.561264192490462, "learning_rate": 2.6890762856712338e-05, "loss": 11.9036, "step": 28213 }, { "epoch": 1.5363650595930927, "grad_norm": 0.5754915513990667, "learning_rate": 2.688474663306816e-05, "loss": 11.867, "step": 28214 }, { "epoch": 1.5364195135896757, "grad_norm": 0.5205203897511063, "learning_rate": 2.687873097798046e-05, "loss": 11.8342, "step": 28215 }, { "epoch": 1.5364739675862586, "grad_norm": 0.5193764182969883, "learning_rate": 2.6872715891495982e-05, "loss": 11.7938, "step": 28216 }, { "epoch": 1.5365284215828416, "grad_norm": 0.593055727163197, "learning_rate": 2.6866701373661552e-05, "loss": 11.9238, "step": 28217 }, { "epoch": 1.5365828755794246, "grad_norm": 0.5334897617058839, "learning_rate": 2.686068742452389e-05, "loss": 11.8103, "step": 28218 }, { "epoch": 1.5366373295760076, "grad_norm": 0.5530211029638681, "learning_rate": 2.685467404412978e-05, "loss": 11.9189, "step": 28219 }, { "epoch": 1.5366917835725906, "grad_norm": 0.5918491619100299, "learning_rate": 2.684866123252603e-05, "loss": 11.916, "step": 28220 }, { "epoch": 1.5367462375691736, "grad_norm": 0.6564766542400814, "learning_rate": 2.684264898975932e-05, "loss": 11.9384, "step": 28221 }, { "epoch": 1.5368006915657566, "grad_norm": 0.5300217143991779, "learning_rate": 2.683663731587648e-05, "loss": 11.8974, "step": 28222 }, { "epoch": 1.5368551455623396, "grad_norm": 0.6238055965656362, "learning_rate": 2.6830626210924182e-05, "loss": 11.8434, "step": 28223 }, { "epoch": 1.5369095995589226, "grad_norm": 0.4747300939338794, "learning_rate": 2.6824615674949227e-05, "loss": 11.876, "step": 28224 }, { "epoch": 1.5369640535555056, "grad_norm": 0.5202331247292236, "learning_rate": 2.6818605707998345e-05, "loss": 11.7362, "step": 28225 }, { "epoch": 1.5370185075520886, "grad_norm": 0.6266174323981987, "learning_rate": 2.681259631011822e-05, "loss": 11.8971, "step": 28226 }, { "epoch": 1.5370729615486716, "grad_norm": 0.5401195218607323, "learning_rate": 2.680658748135566e-05, "loss": 11.8222, "step": 28227 }, { "epoch": 1.5371274155452546, "grad_norm": 0.5287654703216333, "learning_rate": 2.6800579221757306e-05, "loss": 11.8862, "step": 28228 }, { "epoch": 1.5371818695418376, "grad_norm": 0.5382665462414589, "learning_rate": 2.679457153136996e-05, "loss": 11.838, "step": 28229 }, { "epoch": 1.5372363235384205, "grad_norm": 0.5244955209680553, "learning_rate": 2.678856441024028e-05, "loss": 11.7849, "step": 28230 }, { "epoch": 1.5372907775350038, "grad_norm": 0.5285389177552661, "learning_rate": 2.6782557858414992e-05, "loss": 11.8859, "step": 28231 }, { "epoch": 1.5373452315315868, "grad_norm": 0.5646328371339733, "learning_rate": 2.6776551875940847e-05, "loss": 11.8143, "step": 28232 }, { "epoch": 1.5373996855281697, "grad_norm": 0.5363893936188058, "learning_rate": 2.6770546462864478e-05, "loss": 11.8303, "step": 28233 }, { "epoch": 1.5374541395247527, "grad_norm": 0.5184131565912657, "learning_rate": 2.676454161923265e-05, "loss": 11.9607, "step": 28234 }, { "epoch": 1.5375085935213357, "grad_norm": 0.5467243664910497, "learning_rate": 2.6758537345092037e-05, "loss": 11.7396, "step": 28235 }, { "epoch": 1.5375630475179187, "grad_norm": 0.5002391440336198, "learning_rate": 2.6752533640489284e-05, "loss": 11.6786, "step": 28236 }, { "epoch": 1.537617501514502, "grad_norm": 0.5546305245961396, "learning_rate": 2.6746530505471147e-05, "loss": 11.8863, "step": 28237 }, { "epoch": 1.537671955511085, "grad_norm": 0.5463276192766239, "learning_rate": 2.6740527940084236e-05, "loss": 11.7478, "step": 28238 }, { "epoch": 1.537726409507668, "grad_norm": 0.6154320058044087, "learning_rate": 2.67345259443753e-05, "loss": 11.8903, "step": 28239 }, { "epoch": 1.537780863504251, "grad_norm": 0.5863681216047066, "learning_rate": 2.6728524518390952e-05, "loss": 11.8566, "step": 28240 }, { "epoch": 1.537835317500834, "grad_norm": 0.5603478206952835, "learning_rate": 2.672252366217788e-05, "loss": 11.8774, "step": 28241 }, { "epoch": 1.537889771497417, "grad_norm": 0.5593682753283273, "learning_rate": 2.6716523375782776e-05, "loss": 11.7486, "step": 28242 }, { "epoch": 1.537944225494, "grad_norm": 0.5579982858200164, "learning_rate": 2.6710523659252252e-05, "loss": 11.6957, "step": 28243 }, { "epoch": 1.5379986794905829, "grad_norm": 0.5395499117612986, "learning_rate": 2.670452451263301e-05, "loss": 11.9338, "step": 28244 }, { "epoch": 1.5380531334871659, "grad_norm": 0.5455376572211799, "learning_rate": 2.6698525935971685e-05, "loss": 11.7827, "step": 28245 }, { "epoch": 1.5381075874837489, "grad_norm": 0.5247406524040752, "learning_rate": 2.6692527929314893e-05, "loss": 11.8625, "step": 28246 }, { "epoch": 1.5381620414803319, "grad_norm": 0.5160767431977465, "learning_rate": 2.6686530492709316e-05, "loss": 11.7607, "step": 28247 }, { "epoch": 1.5382164954769149, "grad_norm": 0.6140882322238727, "learning_rate": 2.6680533626201544e-05, "loss": 11.9011, "step": 28248 }, { "epoch": 1.5382709494734979, "grad_norm": 0.5190865977042493, "learning_rate": 2.6674537329838267e-05, "loss": 11.8215, "step": 28249 }, { "epoch": 1.5383254034700808, "grad_norm": 0.5606977311297707, "learning_rate": 2.6668541603666052e-05, "loss": 11.7624, "step": 28250 }, { "epoch": 1.5383798574666638, "grad_norm": 0.5544608244481404, "learning_rate": 2.6662546447731594e-05, "loss": 11.866, "step": 28251 }, { "epoch": 1.5384343114632468, "grad_norm": 0.5405060388636937, "learning_rate": 2.6656551862081437e-05, "loss": 11.8598, "step": 28252 }, { "epoch": 1.5384887654598298, "grad_norm": 0.5814147566330362, "learning_rate": 2.6650557846762237e-05, "loss": 11.7744, "step": 28253 }, { "epoch": 1.5385432194564128, "grad_norm": 0.4946928011055275, "learning_rate": 2.6644564401820626e-05, "loss": 11.7776, "step": 28254 }, { "epoch": 1.538597673452996, "grad_norm": 0.5513304184837484, "learning_rate": 2.663857152730319e-05, "loss": 11.8948, "step": 28255 }, { "epoch": 1.538652127449579, "grad_norm": 0.5325891969231507, "learning_rate": 2.6632579223256515e-05, "loss": 11.8826, "step": 28256 }, { "epoch": 1.538706581446162, "grad_norm": 0.5272251203217627, "learning_rate": 2.6626587489727184e-05, "loss": 11.6687, "step": 28257 }, { "epoch": 1.538761035442745, "grad_norm": 0.5104341806424597, "learning_rate": 2.6620596326761814e-05, "loss": 11.7976, "step": 28258 }, { "epoch": 1.538815489439328, "grad_norm": 0.6044950523686626, "learning_rate": 2.6614605734407016e-05, "loss": 12.0231, "step": 28259 }, { "epoch": 1.5388699434359112, "grad_norm": 0.509313261244652, "learning_rate": 2.660861571270933e-05, "loss": 11.8104, "step": 28260 }, { "epoch": 1.5389243974324942, "grad_norm": 0.5832287795387571, "learning_rate": 2.660262626171538e-05, "loss": 11.9543, "step": 28261 }, { "epoch": 1.5389788514290772, "grad_norm": 0.5313190113670253, "learning_rate": 2.659663738147169e-05, "loss": 11.9242, "step": 28262 }, { "epoch": 1.5390333054256602, "grad_norm": 0.5440514986939275, "learning_rate": 2.659064907202489e-05, "loss": 11.9818, "step": 28263 }, { "epoch": 1.5390877594222432, "grad_norm": 0.5605878741267645, "learning_rate": 2.658466133342147e-05, "loss": 11.9153, "step": 28264 }, { "epoch": 1.5391422134188262, "grad_norm": 0.5532632315372058, "learning_rate": 2.6578674165708073e-05, "loss": 11.8579, "step": 28265 }, { "epoch": 1.5391966674154092, "grad_norm": 0.5740420566631218, "learning_rate": 2.657268756893122e-05, "loss": 11.8121, "step": 28266 }, { "epoch": 1.5392511214119922, "grad_norm": 0.5150297688552529, "learning_rate": 2.656670154313744e-05, "loss": 11.7826, "step": 28267 }, { "epoch": 1.5393055754085752, "grad_norm": 0.5225642522836905, "learning_rate": 2.6560716088373294e-05, "loss": 11.7241, "step": 28268 }, { "epoch": 1.5393600294051581, "grad_norm": 0.5335641716258503, "learning_rate": 2.655473120468537e-05, "loss": 11.8361, "step": 28269 }, { "epoch": 1.5394144834017411, "grad_norm": 0.5868537446099409, "learning_rate": 2.6548746892120136e-05, "loss": 11.8187, "step": 28270 }, { "epoch": 1.5394689373983241, "grad_norm": 0.5275408793199444, "learning_rate": 2.6542763150724192e-05, "loss": 11.8821, "step": 28271 }, { "epoch": 1.5395233913949071, "grad_norm": 0.5738982718035953, "learning_rate": 2.6536779980544024e-05, "loss": 11.7754, "step": 28272 }, { "epoch": 1.5395778453914901, "grad_norm": 0.550355726212609, "learning_rate": 2.6530797381626194e-05, "loss": 11.8614, "step": 28273 }, { "epoch": 1.5396322993880731, "grad_norm": 0.5776006089965645, "learning_rate": 2.6524815354017175e-05, "loss": 11.8015, "step": 28274 }, { "epoch": 1.539686753384656, "grad_norm": 0.5433736193633831, "learning_rate": 2.651883389776355e-05, "loss": 11.8397, "step": 28275 }, { "epoch": 1.539741207381239, "grad_norm": 0.5378596487178605, "learning_rate": 2.6512853012911787e-05, "loss": 11.7706, "step": 28276 }, { "epoch": 1.539795661377822, "grad_norm": 0.5774975567190883, "learning_rate": 2.650687269950838e-05, "loss": 11.8928, "step": 28277 }, { "epoch": 1.5398501153744053, "grad_norm": 0.670412656161244, "learning_rate": 2.650089295759989e-05, "loss": 11.7932, "step": 28278 }, { "epoch": 1.5399045693709883, "grad_norm": 0.6209052285760718, "learning_rate": 2.6494913787232735e-05, "loss": 12.094, "step": 28279 }, { "epoch": 1.5399590233675713, "grad_norm": 0.5434472006128124, "learning_rate": 2.648893518845347e-05, "loss": 11.7558, "step": 28280 }, { "epoch": 1.5400134773641543, "grad_norm": 0.507490407696909, "learning_rate": 2.6482957161308598e-05, "loss": 11.7851, "step": 28281 }, { "epoch": 1.5400679313607373, "grad_norm": 0.5185829514338437, "learning_rate": 2.6476979705844552e-05, "loss": 11.8925, "step": 28282 }, { "epoch": 1.5401223853573203, "grad_norm": 0.5818697985336496, "learning_rate": 2.647100282210787e-05, "loss": 11.7104, "step": 28283 }, { "epoch": 1.5401768393539035, "grad_norm": 0.5285137941617885, "learning_rate": 2.6465026510144975e-05, "loss": 11.8305, "step": 28284 }, { "epoch": 1.5402312933504865, "grad_norm": 0.5613587002440537, "learning_rate": 2.645905077000239e-05, "loss": 11.8033, "step": 28285 }, { "epoch": 1.5402857473470695, "grad_norm": 0.5563490881689548, "learning_rate": 2.6453075601726562e-05, "loss": 11.981, "step": 28286 }, { "epoch": 1.5403402013436525, "grad_norm": 0.5663931025263615, "learning_rate": 2.6447101005363927e-05, "loss": 11.826, "step": 28287 }, { "epoch": 1.5403946553402355, "grad_norm": 0.5531980679284013, "learning_rate": 2.6441126980960994e-05, "loss": 11.8848, "step": 28288 }, { "epoch": 1.5404491093368184, "grad_norm": 0.5424048762741972, "learning_rate": 2.643515352856416e-05, "loss": 11.7974, "step": 28289 }, { "epoch": 1.5405035633334014, "grad_norm": 0.533448142546899, "learning_rate": 2.642918064821992e-05, "loss": 11.8026, "step": 28290 }, { "epoch": 1.5405580173299844, "grad_norm": 0.5165036958476561, "learning_rate": 2.6423208339974725e-05, "loss": 11.9402, "step": 28291 }, { "epoch": 1.5406124713265674, "grad_norm": 0.5354052136056343, "learning_rate": 2.6417236603874982e-05, "loss": 11.8617, "step": 28292 }, { "epoch": 1.5406669253231504, "grad_norm": 0.592659981834093, "learning_rate": 2.641126543996717e-05, "loss": 11.8951, "step": 28293 }, { "epoch": 1.5407213793197334, "grad_norm": 0.5192023515452038, "learning_rate": 2.640529484829768e-05, "loss": 11.7969, "step": 28294 }, { "epoch": 1.5407758333163164, "grad_norm": 0.5714755675377791, "learning_rate": 2.6399324828912973e-05, "loss": 11.9247, "step": 28295 }, { "epoch": 1.5408302873128994, "grad_norm": 0.5223828682124572, "learning_rate": 2.6393355381859475e-05, "loss": 11.7491, "step": 28296 }, { "epoch": 1.5408847413094824, "grad_norm": 0.6046746840026958, "learning_rate": 2.638738650718354e-05, "loss": 11.8696, "step": 28297 }, { "epoch": 1.5409391953060654, "grad_norm": 0.5080778644015773, "learning_rate": 2.6381418204931684e-05, "loss": 11.8089, "step": 28298 }, { "epoch": 1.5409936493026484, "grad_norm": 0.5254248962893969, "learning_rate": 2.637545047515022e-05, "loss": 11.7532, "step": 28299 }, { "epoch": 1.5410481032992314, "grad_norm": 0.5641932917252481, "learning_rate": 2.6369483317885637e-05, "loss": 11.8597, "step": 28300 }, { "epoch": 1.5411025572958146, "grad_norm": 0.5175369706337605, "learning_rate": 2.636351673318427e-05, "loss": 11.7573, "step": 28301 }, { "epoch": 1.5411570112923976, "grad_norm": 0.6062429143215914, "learning_rate": 2.6357550721092538e-05, "loss": 11.9393, "step": 28302 }, { "epoch": 1.5412114652889806, "grad_norm": 0.5595891504648797, "learning_rate": 2.6351585281656875e-05, "loss": 11.7512, "step": 28303 }, { "epoch": 1.5412659192855636, "grad_norm": 0.5788012801230924, "learning_rate": 2.634562041492361e-05, "loss": 11.9586, "step": 28304 }, { "epoch": 1.5413203732821465, "grad_norm": 0.5805037700823928, "learning_rate": 2.6339656120939182e-05, "loss": 11.9215, "step": 28305 }, { "epoch": 1.5413748272787295, "grad_norm": 0.5127059839878486, "learning_rate": 2.6333692399749942e-05, "loss": 11.8413, "step": 28306 }, { "epoch": 1.5414292812753128, "grad_norm": 0.5395360288185435, "learning_rate": 2.6327729251402233e-05, "loss": 11.7857, "step": 28307 }, { "epoch": 1.5414837352718957, "grad_norm": 0.5152761726323076, "learning_rate": 2.6321766675942483e-05, "loss": 11.8383, "step": 28308 }, { "epoch": 1.5415381892684787, "grad_norm": 0.5600835552397703, "learning_rate": 2.6315804673416998e-05, "loss": 11.805, "step": 28309 }, { "epoch": 1.5415926432650617, "grad_norm": 0.5454841709735719, "learning_rate": 2.630984324387221e-05, "loss": 11.8448, "step": 28310 }, { "epoch": 1.5416470972616447, "grad_norm": 0.6253522379181685, "learning_rate": 2.6303882387354406e-05, "loss": 11.9069, "step": 28311 }, { "epoch": 1.5417015512582277, "grad_norm": 0.6219127188901813, "learning_rate": 2.629792210391e-05, "loss": 11.8694, "step": 28312 }, { "epoch": 1.5417560052548107, "grad_norm": 0.5797193422957406, "learning_rate": 2.6291962393585278e-05, "loss": 11.9399, "step": 28313 }, { "epoch": 1.5418104592513937, "grad_norm": 0.5359935669224456, "learning_rate": 2.6286003256426628e-05, "loss": 11.7806, "step": 28314 }, { "epoch": 1.5418649132479767, "grad_norm": 0.5482595131675084, "learning_rate": 2.6280044692480432e-05, "loss": 11.9322, "step": 28315 }, { "epoch": 1.5419193672445597, "grad_norm": 0.5412211758477348, "learning_rate": 2.627408670179291e-05, "loss": 11.8118, "step": 28316 }, { "epoch": 1.5419738212411427, "grad_norm": 0.5031157443018751, "learning_rate": 2.6268129284410458e-05, "loss": 11.724, "step": 28317 }, { "epoch": 1.5420282752377257, "grad_norm": 0.48082930373461896, "learning_rate": 2.6262172440379416e-05, "loss": 11.9277, "step": 28318 }, { "epoch": 1.5420827292343087, "grad_norm": 0.5756715142540825, "learning_rate": 2.625621616974607e-05, "loss": 11.8244, "step": 28319 }, { "epoch": 1.5421371832308917, "grad_norm": 0.5387402752687224, "learning_rate": 2.6250260472556777e-05, "loss": 11.8202, "step": 28320 }, { "epoch": 1.5421916372274747, "grad_norm": 0.5338966248872453, "learning_rate": 2.62443053488578e-05, "loss": 11.9323, "step": 28321 }, { "epoch": 1.5422460912240576, "grad_norm": 0.49394165854848227, "learning_rate": 2.623835079869551e-05, "loss": 11.8239, "step": 28322 }, { "epoch": 1.5423005452206406, "grad_norm": 0.5510301241991037, "learning_rate": 2.6232396822116134e-05, "loss": 11.7766, "step": 28323 }, { "epoch": 1.5423549992172239, "grad_norm": 0.5920767057702554, "learning_rate": 2.6226443419166012e-05, "loss": 11.9232, "step": 28324 }, { "epoch": 1.5424094532138068, "grad_norm": 0.5743929819507603, "learning_rate": 2.62204905898915e-05, "loss": 11.8195, "step": 28325 }, { "epoch": 1.5424639072103898, "grad_norm": 0.5439752153702302, "learning_rate": 2.6214538334338766e-05, "loss": 11.9344, "step": 28326 }, { "epoch": 1.5425183612069728, "grad_norm": 0.5646101969548867, "learning_rate": 2.620858665255418e-05, "loss": 11.8105, "step": 28327 }, { "epoch": 1.5425728152035558, "grad_norm": 0.5761307663888735, "learning_rate": 2.6202635544583975e-05, "loss": 11.7824, "step": 28328 }, { "epoch": 1.5426272692001388, "grad_norm": 0.532423942765136, "learning_rate": 2.6196685010474452e-05, "loss": 11.9044, "step": 28329 }, { "epoch": 1.542681723196722, "grad_norm": 0.5435841278020633, "learning_rate": 2.619073505027192e-05, "loss": 11.9201, "step": 28330 }, { "epoch": 1.542736177193305, "grad_norm": 0.5538508421217291, "learning_rate": 2.618478566402257e-05, "loss": 11.9541, "step": 28331 }, { "epoch": 1.542790631189888, "grad_norm": 0.6162874260229597, "learning_rate": 2.6178836851772736e-05, "loss": 11.9137, "step": 28332 }, { "epoch": 1.542845085186471, "grad_norm": 0.5373943280467628, "learning_rate": 2.6172888613568612e-05, "loss": 11.8199, "step": 28333 }, { "epoch": 1.542899539183054, "grad_norm": 0.5179828636771145, "learning_rate": 2.616694094945653e-05, "loss": 11.8554, "step": 28334 }, { "epoch": 1.542953993179637, "grad_norm": 0.48028407935061235, "learning_rate": 2.6160993859482665e-05, "loss": 11.9126, "step": 28335 }, { "epoch": 1.54300844717622, "grad_norm": 0.5225454847238858, "learning_rate": 2.6155047343693317e-05, "loss": 11.8516, "step": 28336 }, { "epoch": 1.543062901172803, "grad_norm": 0.563803946682016, "learning_rate": 2.6149101402134702e-05, "loss": 11.8689, "step": 28337 }, { "epoch": 1.543117355169386, "grad_norm": 0.5273704956752464, "learning_rate": 2.6143156034853034e-05, "loss": 11.8562, "step": 28338 }, { "epoch": 1.543171809165969, "grad_norm": 0.599175341602558, "learning_rate": 2.6137211241894576e-05, "loss": 11.8287, "step": 28339 }, { "epoch": 1.543226263162552, "grad_norm": 0.5615828578947035, "learning_rate": 2.613126702330557e-05, "loss": 11.7832, "step": 28340 }, { "epoch": 1.543280717159135, "grad_norm": 0.5481125681678248, "learning_rate": 2.6125323379132193e-05, "loss": 11.6501, "step": 28341 }, { "epoch": 1.543335171155718, "grad_norm": 0.5813305801399807, "learning_rate": 2.6119380309420727e-05, "loss": 11.8198, "step": 28342 }, { "epoch": 1.543389625152301, "grad_norm": 0.5535303105664144, "learning_rate": 2.6113437814217313e-05, "loss": 11.9125, "step": 28343 }, { "epoch": 1.543444079148884, "grad_norm": 0.5820910869739198, "learning_rate": 2.6107495893568236e-05, "loss": 11.7817, "step": 28344 }, { "epoch": 1.543498533145467, "grad_norm": 0.614254078685859, "learning_rate": 2.6101554547519626e-05, "loss": 11.8894, "step": 28345 }, { "epoch": 1.54355298714205, "grad_norm": 0.5264325906590919, "learning_rate": 2.6095613776117743e-05, "loss": 11.8236, "step": 28346 }, { "epoch": 1.543607441138633, "grad_norm": 0.5658556435382978, "learning_rate": 2.608967357940877e-05, "loss": 11.8537, "step": 28347 }, { "epoch": 1.5436618951352161, "grad_norm": 0.5796891032107981, "learning_rate": 2.608373395743886e-05, "loss": 11.9766, "step": 28348 }, { "epoch": 1.5437163491317991, "grad_norm": 0.5228768863041379, "learning_rate": 2.607779491025426e-05, "loss": 11.8632, "step": 28349 }, { "epoch": 1.543770803128382, "grad_norm": 0.5562725029840014, "learning_rate": 2.607185643790109e-05, "loss": 11.7465, "step": 28350 }, { "epoch": 1.543825257124965, "grad_norm": 0.524910704869663, "learning_rate": 2.606591854042556e-05, "loss": 11.8588, "step": 28351 }, { "epoch": 1.543879711121548, "grad_norm": 0.5637855734704784, "learning_rate": 2.6059981217873885e-05, "loss": 11.9517, "step": 28352 }, { "epoch": 1.543934165118131, "grad_norm": 0.6208971999782115, "learning_rate": 2.605404447029216e-05, "loss": 11.8617, "step": 28353 }, { "epoch": 1.5439886191147143, "grad_norm": 0.5726753186084881, "learning_rate": 2.6048108297726613e-05, "loss": 11.9586, "step": 28354 }, { "epoch": 1.5440430731112973, "grad_norm": 0.5358515479739011, "learning_rate": 2.6042172700223357e-05, "loss": 11.8994, "step": 28355 }, { "epoch": 1.5440975271078803, "grad_norm": 0.548695997930056, "learning_rate": 2.603623767782859e-05, "loss": 11.9534, "step": 28356 }, { "epoch": 1.5441519811044633, "grad_norm": 0.5525914459172369, "learning_rate": 2.603030323058845e-05, "loss": 11.8632, "step": 28357 }, { "epoch": 1.5442064351010463, "grad_norm": 0.5346785184587497, "learning_rate": 2.602436935854904e-05, "loss": 11.7681, "step": 28358 }, { "epoch": 1.5442608890976293, "grad_norm": 0.5612780373793211, "learning_rate": 2.601843606175658e-05, "loss": 11.9691, "step": 28359 }, { "epoch": 1.5443153430942123, "grad_norm": 0.5176343562940072, "learning_rate": 2.6012503340257123e-05, "loss": 11.6743, "step": 28360 }, { "epoch": 1.5443697970907952, "grad_norm": 0.5351190897668885, "learning_rate": 2.6006571194096885e-05, "loss": 11.8922, "step": 28361 }, { "epoch": 1.5444242510873782, "grad_norm": 0.522983848147743, "learning_rate": 2.6000639623321933e-05, "loss": 11.5706, "step": 28362 }, { "epoch": 1.5444787050839612, "grad_norm": 0.5916990401567385, "learning_rate": 2.5994708627978415e-05, "loss": 11.8936, "step": 28363 }, { "epoch": 1.5445331590805442, "grad_norm": 0.5807024279553158, "learning_rate": 2.5988778208112476e-05, "loss": 11.8119, "step": 28364 }, { "epoch": 1.5445876130771272, "grad_norm": 0.5416564826302289, "learning_rate": 2.5982848363770184e-05, "loss": 11.7792, "step": 28365 }, { "epoch": 1.5446420670737102, "grad_norm": 0.545381216888862, "learning_rate": 2.597691909499771e-05, "loss": 11.7271, "step": 28366 }, { "epoch": 1.5446965210702932, "grad_norm": 0.5237571080501712, "learning_rate": 2.5970990401841123e-05, "loss": 11.8412, "step": 28367 }, { "epoch": 1.5447509750668762, "grad_norm": 0.5145817796750763, "learning_rate": 2.5965062284346496e-05, "loss": 11.6807, "step": 28368 }, { "epoch": 1.5448054290634592, "grad_norm": 0.5881393218023219, "learning_rate": 2.5959134742559998e-05, "loss": 11.7546, "step": 28369 }, { "epoch": 1.5448598830600422, "grad_norm": 0.5752119891535337, "learning_rate": 2.5953207776527642e-05, "loss": 11.8851, "step": 28370 }, { "epoch": 1.5449143370566254, "grad_norm": 0.6303330141537662, "learning_rate": 2.594728138629561e-05, "loss": 11.8358, "step": 28371 }, { "epoch": 1.5449687910532084, "grad_norm": 0.5962712900911351, "learning_rate": 2.5941355571909886e-05, "loss": 11.7304, "step": 28372 }, { "epoch": 1.5450232450497914, "grad_norm": 0.5654189325468316, "learning_rate": 2.593543033341661e-05, "loss": 11.8266, "step": 28373 }, { "epoch": 1.5450776990463744, "grad_norm": 0.5788037406236803, "learning_rate": 2.5929505670861875e-05, "loss": 11.7691, "step": 28374 }, { "epoch": 1.5451321530429574, "grad_norm": 0.5022287286365017, "learning_rate": 2.59235815842917e-05, "loss": 11.7054, "step": 28375 }, { "epoch": 1.5451866070395404, "grad_norm": 0.5976819508719686, "learning_rate": 2.591765807375224e-05, "loss": 11.8247, "step": 28376 }, { "epoch": 1.5452410610361236, "grad_norm": 0.5356904181928719, "learning_rate": 2.591173513928943e-05, "loss": 11.7389, "step": 28377 }, { "epoch": 1.5452955150327066, "grad_norm": 0.5956826321487316, "learning_rate": 2.590581278094939e-05, "loss": 11.8591, "step": 28378 }, { "epoch": 1.5453499690292896, "grad_norm": 0.5405582684955066, "learning_rate": 2.589989099877821e-05, "loss": 11.7199, "step": 28379 }, { "epoch": 1.5454044230258726, "grad_norm": 0.58390632451909, "learning_rate": 2.589396979282187e-05, "loss": 11.9566, "step": 28380 }, { "epoch": 1.5454588770224555, "grad_norm": 0.5505994202984245, "learning_rate": 2.588804916312648e-05, "loss": 11.766, "step": 28381 }, { "epoch": 1.5455133310190385, "grad_norm": 0.6379255049520043, "learning_rate": 2.5882129109738018e-05, "loss": 11.895, "step": 28382 }, { "epoch": 1.5455677850156215, "grad_norm": 0.5558429276491551, "learning_rate": 2.5876209632702586e-05, "loss": 11.8519, "step": 28383 }, { "epoch": 1.5456222390122045, "grad_norm": 0.5415143297445657, "learning_rate": 2.587029073206614e-05, "loss": 11.8384, "step": 28384 }, { "epoch": 1.5456766930087875, "grad_norm": 0.5644130794259087, "learning_rate": 2.586437240787475e-05, "loss": 11.8423, "step": 28385 }, { "epoch": 1.5457311470053705, "grad_norm": 0.5344614842194936, "learning_rate": 2.585845466017448e-05, "loss": 11.8722, "step": 28386 }, { "epoch": 1.5457856010019535, "grad_norm": 0.5564494369049631, "learning_rate": 2.585253748901125e-05, "loss": 11.8582, "step": 28387 }, { "epoch": 1.5458400549985365, "grad_norm": 0.6049125728363719, "learning_rate": 2.5846620894431128e-05, "loss": 11.7804, "step": 28388 }, { "epoch": 1.5458945089951195, "grad_norm": 0.5961697799108411, "learning_rate": 2.584070487648015e-05, "loss": 11.9494, "step": 28389 }, { "epoch": 1.5459489629917025, "grad_norm": 0.5841081208904166, "learning_rate": 2.5834789435204243e-05, "loss": 11.8902, "step": 28390 }, { "epoch": 1.5460034169882855, "grad_norm": 0.5050965529600596, "learning_rate": 2.5828874570649486e-05, "loss": 11.9289, "step": 28391 }, { "epoch": 1.5460578709848685, "grad_norm": 0.5294184189231204, "learning_rate": 2.582296028286182e-05, "loss": 11.7554, "step": 28392 }, { "epoch": 1.5461123249814515, "grad_norm": 0.5135657146092494, "learning_rate": 2.581704657188727e-05, "loss": 11.7776, "step": 28393 }, { "epoch": 1.5461667789780347, "grad_norm": 0.5651805158509523, "learning_rate": 2.5811133437771785e-05, "loss": 11.8092, "step": 28394 }, { "epoch": 1.5462212329746177, "grad_norm": 0.5587154402497274, "learning_rate": 2.5805220880561364e-05, "loss": 11.8334, "step": 28395 }, { "epoch": 1.5462756869712007, "grad_norm": 0.5270724338500411, "learning_rate": 2.5799308900302044e-05, "loss": 11.7817, "step": 28396 }, { "epoch": 1.5463301409677837, "grad_norm": 0.5263739900704169, "learning_rate": 2.5793397497039683e-05, "loss": 11.7137, "step": 28397 }, { "epoch": 1.5463845949643666, "grad_norm": 0.5602872287487679, "learning_rate": 2.5787486670820348e-05, "loss": 11.9086, "step": 28398 }, { "epoch": 1.5464390489609496, "grad_norm": 0.517047938860014, "learning_rate": 2.5781576421689923e-05, "loss": 11.7993, "step": 28399 }, { "epoch": 1.5464935029575329, "grad_norm": 0.5108722857351483, "learning_rate": 2.5775666749694406e-05, "loss": 11.8566, "step": 28400 }, { "epoch": 1.5465479569541158, "grad_norm": 0.6174935293929322, "learning_rate": 2.5769757654879777e-05, "loss": 11.7961, "step": 28401 }, { "epoch": 1.5466024109506988, "grad_norm": 0.5302922625882244, "learning_rate": 2.576384913729194e-05, "loss": 11.9029, "step": 28402 }, { "epoch": 1.5466568649472818, "grad_norm": 0.5783597376976017, "learning_rate": 2.5757941196976887e-05, "loss": 11.8606, "step": 28403 }, { "epoch": 1.5467113189438648, "grad_norm": 0.5795724963024235, "learning_rate": 2.5752033833980515e-05, "loss": 11.9066, "step": 28404 }, { "epoch": 1.5467657729404478, "grad_norm": 0.5695245194364605, "learning_rate": 2.57461270483488e-05, "loss": 11.9406, "step": 28405 }, { "epoch": 1.5468202269370308, "grad_norm": 0.5543568927912289, "learning_rate": 2.574022084012765e-05, "loss": 11.8522, "step": 28406 }, { "epoch": 1.5468746809336138, "grad_norm": 0.5389756714339937, "learning_rate": 2.573431520936297e-05, "loss": 11.7696, "step": 28407 }, { "epoch": 1.5469291349301968, "grad_norm": 0.5439451675427848, "learning_rate": 2.5728410156100735e-05, "loss": 11.7786, "step": 28408 }, { "epoch": 1.5469835889267798, "grad_norm": 0.5777718417749071, "learning_rate": 2.572250568038681e-05, "loss": 11.8458, "step": 28409 }, { "epoch": 1.5470380429233628, "grad_norm": 0.5440717683471554, "learning_rate": 2.5716601782267168e-05, "loss": 11.7257, "step": 28410 }, { "epoch": 1.5470924969199458, "grad_norm": 0.5503235949718415, "learning_rate": 2.5710698461787664e-05, "loss": 11.875, "step": 28411 }, { "epoch": 1.5471469509165288, "grad_norm": 0.5311155260575879, "learning_rate": 2.5704795718994225e-05, "loss": 11.854, "step": 28412 }, { "epoch": 1.5472014049131118, "grad_norm": 0.5580733513631506, "learning_rate": 2.5698893553932778e-05, "loss": 11.8475, "step": 28413 }, { "epoch": 1.5472558589096947, "grad_norm": 0.5629690841127116, "learning_rate": 2.569299196664916e-05, "loss": 11.8143, "step": 28414 }, { "epoch": 1.5473103129062777, "grad_norm": 0.5358089708920208, "learning_rate": 2.568709095718934e-05, "loss": 11.8512, "step": 28415 }, { "epoch": 1.5473647669028607, "grad_norm": 0.5409977297627018, "learning_rate": 2.5681190525599163e-05, "loss": 11.9244, "step": 28416 }, { "epoch": 1.5474192208994437, "grad_norm": 0.638433176437136, "learning_rate": 2.5675290671924468e-05, "loss": 11.9127, "step": 28417 }, { "epoch": 1.547473674896027, "grad_norm": 0.5756050355483072, "learning_rate": 2.566939139621122e-05, "loss": 11.9142, "step": 28418 }, { "epoch": 1.54752812889261, "grad_norm": 0.5431040821555536, "learning_rate": 2.566349269850522e-05, "loss": 11.7574, "step": 28419 }, { "epoch": 1.547582582889193, "grad_norm": 0.5511736086108341, "learning_rate": 2.5657594578852386e-05, "loss": 11.8797, "step": 28420 }, { "epoch": 1.547637036885776, "grad_norm": 0.5323173722084245, "learning_rate": 2.565169703729854e-05, "loss": 11.8559, "step": 28421 }, { "epoch": 1.547691490882359, "grad_norm": 0.5596644594630502, "learning_rate": 2.5645800073889568e-05, "loss": 11.823, "step": 28422 }, { "epoch": 1.547745944878942, "grad_norm": 0.549082073241267, "learning_rate": 2.5639903688671352e-05, "loss": 11.9428, "step": 28423 }, { "epoch": 1.5478003988755251, "grad_norm": 0.5475631173156968, "learning_rate": 2.563400788168969e-05, "loss": 11.6797, "step": 28424 }, { "epoch": 1.5478548528721081, "grad_norm": 0.5149174796601396, "learning_rate": 2.5628112652990478e-05, "loss": 11.7562, "step": 28425 }, { "epoch": 1.547909306868691, "grad_norm": 0.550137163536973, "learning_rate": 2.5622218002619537e-05, "loss": 11.9227, "step": 28426 }, { "epoch": 1.547963760865274, "grad_norm": 0.6155296306761008, "learning_rate": 2.5616323930622665e-05, "loss": 12.0026, "step": 28427 }, { "epoch": 1.548018214861857, "grad_norm": 0.5078000253024684, "learning_rate": 2.5610430437045775e-05, "loss": 11.7096, "step": 28428 }, { "epoch": 1.54807266885844, "grad_norm": 0.5655854998697462, "learning_rate": 2.5604537521934614e-05, "loss": 11.7819, "step": 28429 }, { "epoch": 1.548127122855023, "grad_norm": 0.5253510272861863, "learning_rate": 2.559864518533508e-05, "loss": 11.7842, "step": 28430 }, { "epoch": 1.548181576851606, "grad_norm": 0.5411136871300276, "learning_rate": 2.5592753427292927e-05, "loss": 11.932, "step": 28431 }, { "epoch": 1.548236030848189, "grad_norm": 0.6140542561957026, "learning_rate": 2.5586862247854027e-05, "loss": 11.8188, "step": 28432 }, { "epoch": 1.548290484844772, "grad_norm": 0.5187459708397918, "learning_rate": 2.5580971647064134e-05, "loss": 11.7711, "step": 28433 }, { "epoch": 1.548344938841355, "grad_norm": 0.5237170621281736, "learning_rate": 2.5575081624969087e-05, "loss": 11.9784, "step": 28434 }, { "epoch": 1.548399392837938, "grad_norm": 0.5275190190654206, "learning_rate": 2.556919218161472e-05, "loss": 11.7082, "step": 28435 }, { "epoch": 1.548453846834521, "grad_norm": 0.5589132428169242, "learning_rate": 2.5563303317046795e-05, "loss": 11.8246, "step": 28436 }, { "epoch": 1.548508300831104, "grad_norm": 0.5084913066271427, "learning_rate": 2.5557415031311082e-05, "loss": 11.862, "step": 28437 }, { "epoch": 1.548562754827687, "grad_norm": 0.522068060313236, "learning_rate": 2.5551527324453417e-05, "loss": 11.7411, "step": 28438 }, { "epoch": 1.54861720882427, "grad_norm": 0.5739219829998441, "learning_rate": 2.554564019651954e-05, "loss": 11.8917, "step": 28439 }, { "epoch": 1.548671662820853, "grad_norm": 0.5725865084331427, "learning_rate": 2.5539753647555266e-05, "loss": 11.9694, "step": 28440 }, { "epoch": 1.5487261168174362, "grad_norm": 0.5222121633979407, "learning_rate": 2.5533867677606337e-05, "loss": 11.7907, "step": 28441 }, { "epoch": 1.5487805708140192, "grad_norm": 0.5343862732679546, "learning_rate": 2.5527982286718567e-05, "loss": 11.9875, "step": 28442 }, { "epoch": 1.5488350248106022, "grad_norm": 0.5173530999527631, "learning_rate": 2.5522097474937658e-05, "loss": 11.8422, "step": 28443 }, { "epoch": 1.5488894788071852, "grad_norm": 0.5505206385587831, "learning_rate": 2.5516213242309418e-05, "loss": 11.8241, "step": 28444 }, { "epoch": 1.5489439328037682, "grad_norm": 0.5601652186740096, "learning_rate": 2.5510329588879624e-05, "loss": 11.8184, "step": 28445 }, { "epoch": 1.5489983868003512, "grad_norm": 0.5260787115235537, "learning_rate": 2.5504446514693992e-05, "loss": 11.9064, "step": 28446 }, { "epoch": 1.5490528407969344, "grad_norm": 0.5527946293696209, "learning_rate": 2.549856401979829e-05, "loss": 11.8156, "step": 28447 }, { "epoch": 1.5491072947935174, "grad_norm": 0.5783068020689694, "learning_rate": 2.5492682104238207e-05, "loss": 11.8586, "step": 28448 }, { "epoch": 1.5491617487901004, "grad_norm": 0.5013085575154154, "learning_rate": 2.5486800768059527e-05, "loss": 11.7434, "step": 28449 }, { "epoch": 1.5492162027866834, "grad_norm": 0.4907623554578605, "learning_rate": 2.548092001130801e-05, "loss": 11.788, "step": 28450 }, { "epoch": 1.5492706567832664, "grad_norm": 0.60379728605464, "learning_rate": 2.5475039834029324e-05, "loss": 11.7984, "step": 28451 }, { "epoch": 1.5493251107798494, "grad_norm": 0.5735537390763572, "learning_rate": 2.546916023626925e-05, "loss": 11.832, "step": 28452 }, { "epoch": 1.5493795647764323, "grad_norm": 0.5880665310677062, "learning_rate": 2.5463281218073466e-05, "loss": 11.8112, "step": 28453 }, { "epoch": 1.5494340187730153, "grad_norm": 0.5662966118919047, "learning_rate": 2.5457402779487728e-05, "loss": 12.0215, "step": 28454 }, { "epoch": 1.5494884727695983, "grad_norm": 0.5487430492166419, "learning_rate": 2.54515249205577e-05, "loss": 11.8194, "step": 28455 }, { "epoch": 1.5495429267661813, "grad_norm": 0.6073259437457524, "learning_rate": 2.544564764132915e-05, "loss": 11.9199, "step": 28456 }, { "epoch": 1.5495973807627643, "grad_norm": 0.5110416125123185, "learning_rate": 2.543977094184775e-05, "loss": 11.9007, "step": 28457 }, { "epoch": 1.5496518347593473, "grad_norm": 0.6010475075449443, "learning_rate": 2.5433894822159155e-05, "loss": 11.7367, "step": 28458 }, { "epoch": 1.5497062887559303, "grad_norm": 0.5799295765125994, "learning_rate": 2.54280192823091e-05, "loss": 11.7556, "step": 28459 }, { "epoch": 1.5497607427525133, "grad_norm": 0.5737195536592768, "learning_rate": 2.542214432234331e-05, "loss": 11.8661, "step": 28460 }, { "epoch": 1.5498151967490963, "grad_norm": 0.5060511930665154, "learning_rate": 2.5416269942307404e-05, "loss": 11.8723, "step": 28461 }, { "epoch": 1.5498696507456793, "grad_norm": 0.4857564571140649, "learning_rate": 2.5410396142247107e-05, "loss": 11.8094, "step": 28462 }, { "epoch": 1.5499241047422623, "grad_norm": 0.5395258894578068, "learning_rate": 2.5404522922208064e-05, "loss": 11.8783, "step": 28463 }, { "epoch": 1.5499785587388455, "grad_norm": 0.5520990718572701, "learning_rate": 2.539865028223598e-05, "loss": 11.8394, "step": 28464 }, { "epoch": 1.5500330127354285, "grad_norm": 0.5408865396162762, "learning_rate": 2.5392778222376467e-05, "loss": 11.9677, "step": 28465 }, { "epoch": 1.5500874667320115, "grad_norm": 0.5381429081715795, "learning_rate": 2.5386906742675265e-05, "loss": 11.8065, "step": 28466 }, { "epoch": 1.5501419207285945, "grad_norm": 0.5616159830219871, "learning_rate": 2.538103584317798e-05, "loss": 11.9628, "step": 28467 }, { "epoch": 1.5501963747251775, "grad_norm": 0.5400137642162649, "learning_rate": 2.5375165523930256e-05, "loss": 11.7801, "step": 28468 }, { "epoch": 1.5502508287217605, "grad_norm": 0.5158123918662787, "learning_rate": 2.5369295784977776e-05, "loss": 11.8542, "step": 28469 }, { "epoch": 1.5503052827183437, "grad_norm": 0.5271437661328544, "learning_rate": 2.536342662636614e-05, "loss": 11.7967, "step": 28470 }, { "epoch": 1.5503597367149267, "grad_norm": 0.5534959222742973, "learning_rate": 2.5357558048141017e-05, "loss": 11.8952, "step": 28471 }, { "epoch": 1.5504141907115097, "grad_norm": 0.5482585284101907, "learning_rate": 2.5351690050348075e-05, "loss": 11.9437, "step": 28472 }, { "epoch": 1.5504686447080926, "grad_norm": 0.5579770962441959, "learning_rate": 2.534582263303288e-05, "loss": 11.8728, "step": 28473 }, { "epoch": 1.5505230987046756, "grad_norm": 0.8902138915915079, "learning_rate": 2.533995579624111e-05, "loss": 11.7841, "step": 28474 }, { "epoch": 1.5505775527012586, "grad_norm": 0.5406013223693736, "learning_rate": 2.5334089540018336e-05, "loss": 11.6726, "step": 28475 }, { "epoch": 1.5506320066978416, "grad_norm": 0.6255594525097159, "learning_rate": 2.5328223864410227e-05, "loss": 11.8823, "step": 28476 }, { "epoch": 1.5506864606944246, "grad_norm": 0.5325474589054756, "learning_rate": 2.5322358769462383e-05, "loss": 11.8107, "step": 28477 }, { "epoch": 1.5507409146910076, "grad_norm": 0.5368601359246209, "learning_rate": 2.531649425522037e-05, "loss": 11.68, "step": 28478 }, { "epoch": 1.5507953686875906, "grad_norm": 0.5529466446931488, "learning_rate": 2.5310630321729846e-05, "loss": 11.8102, "step": 28479 }, { "epoch": 1.5508498226841736, "grad_norm": 0.6059228591987602, "learning_rate": 2.5304766969036363e-05, "loss": 11.9629, "step": 28480 }, { "epoch": 1.5509042766807566, "grad_norm": 0.5819827990530834, "learning_rate": 2.5298904197185557e-05, "loss": 11.8608, "step": 28481 }, { "epoch": 1.5509587306773396, "grad_norm": 0.5142102163683073, "learning_rate": 2.5293042006222976e-05, "loss": 11.8563, "step": 28482 }, { "epoch": 1.5510131846739226, "grad_norm": 0.5319599480372926, "learning_rate": 2.5287180396194233e-05, "loss": 11.6907, "step": 28483 }, { "epoch": 1.5510676386705056, "grad_norm": 0.5819049921814107, "learning_rate": 2.528131936714493e-05, "loss": 11.9163, "step": 28484 }, { "epoch": 1.5511220926670886, "grad_norm": 0.5380853151840385, "learning_rate": 2.5275458919120598e-05, "loss": 11.7327, "step": 28485 }, { "epoch": 1.5511765466636716, "grad_norm": 0.5537818658038224, "learning_rate": 2.526959905216685e-05, "loss": 11.7454, "step": 28486 }, { "epoch": 1.5512310006602545, "grad_norm": 0.6021263448844547, "learning_rate": 2.526373976632923e-05, "loss": 11.9231, "step": 28487 }, { "epoch": 1.5512854546568378, "grad_norm": 0.6145604956693037, "learning_rate": 2.5257881061653277e-05, "loss": 11.9234, "step": 28488 }, { "epoch": 1.5513399086534208, "grad_norm": 0.5299598670829708, "learning_rate": 2.5252022938184594e-05, "loss": 11.8107, "step": 28489 }, { "epoch": 1.5513943626500037, "grad_norm": 0.5900073754156276, "learning_rate": 2.5246165395968702e-05, "loss": 11.843, "step": 28490 }, { "epoch": 1.5514488166465867, "grad_norm": 0.587429098817428, "learning_rate": 2.5240308435051187e-05, "loss": 11.9314, "step": 28491 }, { "epoch": 1.5515032706431697, "grad_norm": 0.5925412366155998, "learning_rate": 2.523445205547753e-05, "loss": 11.9291, "step": 28492 }, { "epoch": 1.551557724639753, "grad_norm": 0.4975812315264232, "learning_rate": 2.5228596257293325e-05, "loss": 11.6459, "step": 28493 }, { "epoch": 1.551612178636336, "grad_norm": 0.5454202643079502, "learning_rate": 2.522274104054413e-05, "loss": 11.6712, "step": 28494 }, { "epoch": 1.551666632632919, "grad_norm": 0.6599507266548589, "learning_rate": 2.5216886405275396e-05, "loss": 11.8713, "step": 28495 }, { "epoch": 1.551721086629502, "grad_norm": 0.5091180273869543, "learning_rate": 2.5211032351532738e-05, "loss": 11.6595, "step": 28496 }, { "epoch": 1.551775540626085, "grad_norm": 0.5563702209320073, "learning_rate": 2.5205178879361624e-05, "loss": 11.8779, "step": 28497 }, { "epoch": 1.551829994622668, "grad_norm": 0.5664771627422197, "learning_rate": 2.5199325988807553e-05, "loss": 11.8834, "step": 28498 }, { "epoch": 1.551884448619251, "grad_norm": 0.6076489917821639, "learning_rate": 2.5193473679916103e-05, "loss": 11.87, "step": 28499 }, { "epoch": 1.551938902615834, "grad_norm": 0.5669662502693399, "learning_rate": 2.518762195273272e-05, "loss": 11.8221, "step": 28500 }, { "epoch": 1.5519933566124169, "grad_norm": 0.5707755767582692, "learning_rate": 2.5181770807302953e-05, "loss": 11.7684, "step": 28501 }, { "epoch": 1.5520478106089999, "grad_norm": 0.5518388417723734, "learning_rate": 2.517592024367226e-05, "loss": 11.8613, "step": 28502 }, { "epoch": 1.5521022646055829, "grad_norm": 0.5008864884307273, "learning_rate": 2.517007026188619e-05, "loss": 11.7114, "step": 28503 }, { "epoch": 1.5521567186021659, "grad_norm": 0.4934029122753554, "learning_rate": 2.5164220861990174e-05, "loss": 11.7556, "step": 28504 }, { "epoch": 1.5522111725987489, "grad_norm": 0.5584005164458216, "learning_rate": 2.515837204402972e-05, "loss": 11.8764, "step": 28505 }, { "epoch": 1.5522656265953318, "grad_norm": 0.5847427836113182, "learning_rate": 2.5152523808050353e-05, "loss": 11.7411, "step": 28506 }, { "epoch": 1.5523200805919148, "grad_norm": 0.5587139265149721, "learning_rate": 2.5146676154097504e-05, "loss": 11.7873, "step": 28507 }, { "epoch": 1.5523745345884978, "grad_norm": 0.5743506563430758, "learning_rate": 2.5140829082216634e-05, "loss": 11.9409, "step": 28508 }, { "epoch": 1.5524289885850808, "grad_norm": 0.6032488970331529, "learning_rate": 2.5134982592453248e-05, "loss": 11.9309, "step": 28509 }, { "epoch": 1.5524834425816638, "grad_norm": 0.5852034334633738, "learning_rate": 2.5129136684852773e-05, "loss": 11.8758, "step": 28510 }, { "epoch": 1.552537896578247, "grad_norm": 0.5936254615136347, "learning_rate": 2.512329135946072e-05, "loss": 11.7461, "step": 28511 }, { "epoch": 1.55259235057483, "grad_norm": 0.6038112322972872, "learning_rate": 2.511744661632247e-05, "loss": 11.9481, "step": 28512 }, { "epoch": 1.552646804571413, "grad_norm": 0.57374905226439, "learning_rate": 2.5111602455483552e-05, "loss": 11.8354, "step": 28513 }, { "epoch": 1.552701258567996, "grad_norm": 0.6271858490672713, "learning_rate": 2.510575887698934e-05, "loss": 11.9047, "step": 28514 }, { "epoch": 1.552755712564579, "grad_norm": 0.5534109474902617, "learning_rate": 2.5099915880885328e-05, "loss": 11.8675, "step": 28515 }, { "epoch": 1.552810166561162, "grad_norm": 0.6694132274300042, "learning_rate": 2.5094073467216906e-05, "loss": 11.8989, "step": 28516 }, { "epoch": 1.5528646205577452, "grad_norm": 0.571900046650996, "learning_rate": 2.508823163602957e-05, "loss": 11.9213, "step": 28517 }, { "epoch": 1.5529190745543282, "grad_norm": 0.526527084733199, "learning_rate": 2.5082390387368694e-05, "loss": 11.8168, "step": 28518 }, { "epoch": 1.5529735285509112, "grad_norm": 0.5603474863790836, "learning_rate": 2.5076549721279685e-05, "loss": 11.9179, "step": 28519 }, { "epoch": 1.5530279825474942, "grad_norm": 0.5400150236607307, "learning_rate": 2.5070709637807998e-05, "loss": 11.7396, "step": 28520 }, { "epoch": 1.5530824365440772, "grad_norm": 0.5325796354521942, "learning_rate": 2.5064870136999064e-05, "loss": 11.785, "step": 28521 }, { "epoch": 1.5531368905406602, "grad_norm": 0.5709758062936658, "learning_rate": 2.5059031218898243e-05, "loss": 11.92, "step": 28522 }, { "epoch": 1.5531913445372432, "grad_norm": 0.5419729540075258, "learning_rate": 2.505319288355098e-05, "loss": 11.8677, "step": 28523 }, { "epoch": 1.5532457985338262, "grad_norm": 0.5044308542791872, "learning_rate": 2.5047355131002638e-05, "loss": 11.9088, "step": 28524 }, { "epoch": 1.5533002525304092, "grad_norm": 0.5875763905242495, "learning_rate": 2.5041517961298655e-05, "loss": 11.8522, "step": 28525 }, { "epoch": 1.5533547065269921, "grad_norm": 0.5436673170751153, "learning_rate": 2.5035681374484375e-05, "loss": 11.6491, "step": 28526 }, { "epoch": 1.5534091605235751, "grad_norm": 0.5018654095517917, "learning_rate": 2.502984537060524e-05, "loss": 11.8711, "step": 28527 }, { "epoch": 1.5534636145201581, "grad_norm": 0.5694929635891186, "learning_rate": 2.5024009949706596e-05, "loss": 11.8014, "step": 28528 }, { "epoch": 1.5535180685167411, "grad_norm": 0.555056874138216, "learning_rate": 2.5018175111833796e-05, "loss": 11.6892, "step": 28529 }, { "epoch": 1.5535725225133241, "grad_norm": 0.54490926612928, "learning_rate": 2.5012340857032267e-05, "loss": 11.819, "step": 28530 }, { "epoch": 1.553626976509907, "grad_norm": 0.5463372306875046, "learning_rate": 2.5006507185347317e-05, "loss": 11.8369, "step": 28531 }, { "epoch": 1.55368143050649, "grad_norm": 0.5448397787529085, "learning_rate": 2.500067409682435e-05, "loss": 11.8249, "step": 28532 }, { "epoch": 1.553735884503073, "grad_norm": 0.5675516686988226, "learning_rate": 2.4994841591508754e-05, "loss": 11.9393, "step": 28533 }, { "epoch": 1.5537903384996563, "grad_norm": 0.5267371689428354, "learning_rate": 2.4989009669445807e-05, "loss": 11.5129, "step": 28534 }, { "epoch": 1.5538447924962393, "grad_norm": 0.5425475610079252, "learning_rate": 2.4983178330680936e-05, "loss": 11.8204, "step": 28535 }, { "epoch": 1.5538992464928223, "grad_norm": 0.549845211647986, "learning_rate": 2.4977347575259425e-05, "loss": 11.8349, "step": 28536 }, { "epoch": 1.5539537004894053, "grad_norm": 0.4935824476753059, "learning_rate": 2.4971517403226663e-05, "loss": 11.867, "step": 28537 }, { "epoch": 1.5540081544859883, "grad_norm": 0.6923932588157574, "learning_rate": 2.496568781462797e-05, "loss": 11.9196, "step": 28538 }, { "epoch": 1.5540626084825713, "grad_norm": 0.5260881989908586, "learning_rate": 2.495985880950863e-05, "loss": 11.773, "step": 28539 }, { "epoch": 1.5541170624791545, "grad_norm": 0.5364431472614594, "learning_rate": 2.495403038791405e-05, "loss": 11.7791, "step": 28540 }, { "epoch": 1.5541715164757375, "grad_norm": 0.5812548888784491, "learning_rate": 2.4948202549889488e-05, "loss": 11.7457, "step": 28541 }, { "epoch": 1.5542259704723205, "grad_norm": 0.5919997638028225, "learning_rate": 2.494237529548029e-05, "loss": 11.9624, "step": 28542 }, { "epoch": 1.5542804244689035, "grad_norm": 0.5281734432144186, "learning_rate": 2.4936548624731792e-05, "loss": 11.9087, "step": 28543 }, { "epoch": 1.5543348784654865, "grad_norm": 0.5807438940712529, "learning_rate": 2.4930722537689256e-05, "loss": 11.803, "step": 28544 }, { "epoch": 1.5543893324620695, "grad_norm": 0.5800437925530684, "learning_rate": 2.4924897034398033e-05, "loss": 11.782, "step": 28545 }, { "epoch": 1.5544437864586524, "grad_norm": 0.5268069571755901, "learning_rate": 2.4919072114903374e-05, "loss": 11.8909, "step": 28546 }, { "epoch": 1.5544982404552354, "grad_norm": 0.545833204562664, "learning_rate": 2.4913247779250636e-05, "loss": 11.8384, "step": 28547 }, { "epoch": 1.5545526944518184, "grad_norm": 0.6160011127069638, "learning_rate": 2.490742402748507e-05, "loss": 11.7852, "step": 28548 }, { "epoch": 1.5546071484484014, "grad_norm": 0.5290535096343847, "learning_rate": 2.490160085965194e-05, "loss": 11.9017, "step": 28549 }, { "epoch": 1.5546616024449844, "grad_norm": 0.571680133415217, "learning_rate": 2.4895778275796587e-05, "loss": 11.8059, "step": 28550 }, { "epoch": 1.5547160564415674, "grad_norm": 0.6052844268196166, "learning_rate": 2.4889956275964232e-05, "loss": 11.8639, "step": 28551 }, { "epoch": 1.5547705104381504, "grad_norm": 0.6723872804078066, "learning_rate": 2.4884134860200194e-05, "loss": 11.8993, "step": 28552 }, { "epoch": 1.5548249644347334, "grad_norm": 0.5220939478677491, "learning_rate": 2.4878314028549686e-05, "loss": 11.6449, "step": 28553 }, { "epoch": 1.5548794184313164, "grad_norm": 0.5959196608776058, "learning_rate": 2.4872493781058015e-05, "loss": 11.8946, "step": 28554 }, { "epoch": 1.5549338724278994, "grad_norm": 0.5283774698462669, "learning_rate": 2.486667411777046e-05, "loss": 11.7955, "step": 28555 }, { "epoch": 1.5549883264244824, "grad_norm": 0.5808798509977745, "learning_rate": 2.4860855038732223e-05, "loss": 11.8956, "step": 28556 }, { "epoch": 1.5550427804210654, "grad_norm": 0.5713598747774712, "learning_rate": 2.4855036543988598e-05, "loss": 11.9146, "step": 28557 }, { "epoch": 1.5550972344176486, "grad_norm": 0.5475176573672785, "learning_rate": 2.4849218633584814e-05, "loss": 11.8442, "step": 28558 }, { "epoch": 1.5551516884142316, "grad_norm": 0.5350549055637662, "learning_rate": 2.4843401307566082e-05, "loss": 11.8207, "step": 28559 }, { "epoch": 1.5552061424108146, "grad_norm": 0.704467150637401, "learning_rate": 2.483758456597769e-05, "loss": 11.8277, "step": 28560 }, { "epoch": 1.5552605964073976, "grad_norm": 0.5585654171172186, "learning_rate": 2.483176840886482e-05, "loss": 11.8686, "step": 28561 }, { "epoch": 1.5553150504039805, "grad_norm": 0.6053319547903152, "learning_rate": 2.4825952836272747e-05, "loss": 11.8209, "step": 28562 }, { "epoch": 1.5553695044005638, "grad_norm": 0.58797267331446, "learning_rate": 2.4820137848246648e-05, "loss": 11.8193, "step": 28563 }, { "epoch": 1.5554239583971468, "grad_norm": 0.5585981773514379, "learning_rate": 2.4814323444831787e-05, "loss": 11.7418, "step": 28564 }, { "epoch": 1.5554784123937297, "grad_norm": 0.632299090688014, "learning_rate": 2.480850962607334e-05, "loss": 11.8569, "step": 28565 }, { "epoch": 1.5555328663903127, "grad_norm": 0.5404204742042362, "learning_rate": 2.4802696392016522e-05, "loss": 11.7614, "step": 28566 }, { "epoch": 1.5555873203868957, "grad_norm": 0.5307115052866692, "learning_rate": 2.4796883742706613e-05, "loss": 11.8431, "step": 28567 }, { "epoch": 1.5556417743834787, "grad_norm": 0.5748895070945195, "learning_rate": 2.479107167818868e-05, "loss": 11.8516, "step": 28568 }, { "epoch": 1.5556962283800617, "grad_norm": 0.5185364565502456, "learning_rate": 2.4785260198507987e-05, "loss": 11.7532, "step": 28569 }, { "epoch": 1.5557506823766447, "grad_norm": 0.5824939631145019, "learning_rate": 2.4779449303709758e-05, "loss": 11.8828, "step": 28570 }, { "epoch": 1.5558051363732277, "grad_norm": 0.5823898208825926, "learning_rate": 2.477363899383911e-05, "loss": 11.8927, "step": 28571 }, { "epoch": 1.5558595903698107, "grad_norm": 0.498832121471464, "learning_rate": 2.4767829268941302e-05, "loss": 11.8058, "step": 28572 }, { "epoch": 1.5559140443663937, "grad_norm": 0.5967633998287475, "learning_rate": 2.4762020129061427e-05, "loss": 11.8112, "step": 28573 }, { "epoch": 1.5559684983629767, "grad_norm": 0.5864411539195628, "learning_rate": 2.4756211574244735e-05, "loss": 11.9025, "step": 28574 }, { "epoch": 1.5560229523595597, "grad_norm": 0.5456037698325555, "learning_rate": 2.4750403604536333e-05, "loss": 11.8419, "step": 28575 }, { "epoch": 1.5560774063561427, "grad_norm": 0.5512963607380309, "learning_rate": 2.4744596219981418e-05, "loss": 11.7866, "step": 28576 }, { "epoch": 1.5561318603527257, "grad_norm": 0.5508067364195802, "learning_rate": 2.4738789420625195e-05, "loss": 11.7468, "step": 28577 }, { "epoch": 1.5561863143493087, "grad_norm": 0.6184534697758933, "learning_rate": 2.4732983206512706e-05, "loss": 11.9362, "step": 28578 }, { "epoch": 1.5562407683458916, "grad_norm": 0.5479859137355779, "learning_rate": 2.47271775776892e-05, "loss": 11.8337, "step": 28579 }, { "epoch": 1.5562952223424746, "grad_norm": 0.5082027032151528, "learning_rate": 2.472137253419974e-05, "loss": 11.7432, "step": 28580 }, { "epoch": 1.5563496763390579, "grad_norm": 0.49428153199139857, "learning_rate": 2.4715568076089522e-05, "loss": 11.7991, "step": 28581 }, { "epoch": 1.5564041303356408, "grad_norm": 0.5589649387211867, "learning_rate": 2.4709764203403697e-05, "loss": 11.8683, "step": 28582 }, { "epoch": 1.5564585843322238, "grad_norm": 0.53255396563382, "learning_rate": 2.470396091618734e-05, "loss": 11.7046, "step": 28583 }, { "epoch": 1.5565130383288068, "grad_norm": 0.4977437695350165, "learning_rate": 2.4698158214485644e-05, "loss": 11.8066, "step": 28584 }, { "epoch": 1.5565674923253898, "grad_norm": 0.5626315967222058, "learning_rate": 2.4692356098343673e-05, "loss": 11.8381, "step": 28585 }, { "epoch": 1.5566219463219728, "grad_norm": 0.5286919067641952, "learning_rate": 2.4686554567806597e-05, "loss": 11.8094, "step": 28586 }, { "epoch": 1.556676400318556, "grad_norm": 0.553498917671147, "learning_rate": 2.4680753622919496e-05, "loss": 11.8322, "step": 28587 }, { "epoch": 1.556730854315139, "grad_norm": 0.5602149080639953, "learning_rate": 2.4674953263727464e-05, "loss": 11.9096, "step": 28588 }, { "epoch": 1.556785308311722, "grad_norm": 0.5276623364621802, "learning_rate": 2.4669153490275652e-05, "loss": 11.7724, "step": 28589 }, { "epoch": 1.556839762308305, "grad_norm": 0.49386661278834587, "learning_rate": 2.466335430260911e-05, "loss": 11.7903, "step": 28590 }, { "epoch": 1.556894216304888, "grad_norm": 0.5721983479039223, "learning_rate": 2.4657555700772962e-05, "loss": 11.7868, "step": 28591 }, { "epoch": 1.556948670301471, "grad_norm": 0.5269906038780431, "learning_rate": 2.4651757684812317e-05, "loss": 11.9258, "step": 28592 }, { "epoch": 1.557003124298054, "grad_norm": 0.6267277822180448, "learning_rate": 2.4645960254772215e-05, "loss": 11.8316, "step": 28593 }, { "epoch": 1.557057578294637, "grad_norm": 0.5675082189744377, "learning_rate": 2.4640163410697793e-05, "loss": 11.654, "step": 28594 }, { "epoch": 1.55711203229122, "grad_norm": 0.5715847666698621, "learning_rate": 2.4634367152634062e-05, "loss": 11.7683, "step": 28595 }, { "epoch": 1.557166486287803, "grad_norm": 0.5335686527000096, "learning_rate": 2.4628571480626163e-05, "loss": 11.8012, "step": 28596 }, { "epoch": 1.557220940284386, "grad_norm": 0.5619368982704767, "learning_rate": 2.4622776394719137e-05, "loss": 11.7206, "step": 28597 }, { "epoch": 1.557275394280969, "grad_norm": 0.5735706142849618, "learning_rate": 2.4616981894958013e-05, "loss": 11.8803, "step": 28598 }, { "epoch": 1.557329848277552, "grad_norm": 0.5575511692095365, "learning_rate": 2.4611187981387907e-05, "loss": 11.717, "step": 28599 }, { "epoch": 1.557384302274135, "grad_norm": 0.5332991999274577, "learning_rate": 2.4605394654053814e-05, "loss": 11.6635, "step": 28600 }, { "epoch": 1.557438756270718, "grad_norm": 0.5375788513377285, "learning_rate": 2.4599601913000846e-05, "loss": 11.8682, "step": 28601 }, { "epoch": 1.557493210267301, "grad_norm": 0.5642020985469542, "learning_rate": 2.459380975827399e-05, "loss": 11.8607, "step": 28602 }, { "epoch": 1.557547664263884, "grad_norm": 0.5423063000863227, "learning_rate": 2.4588018189918317e-05, "loss": 11.8366, "step": 28603 }, { "epoch": 1.5576021182604671, "grad_norm": 0.5343092928001854, "learning_rate": 2.458222720797888e-05, "loss": 11.944, "step": 28604 }, { "epoch": 1.5576565722570501, "grad_norm": 0.4843757052977693, "learning_rate": 2.457643681250068e-05, "loss": 11.9183, "step": 28605 }, { "epoch": 1.5577110262536331, "grad_norm": 0.548516745397688, "learning_rate": 2.457064700352877e-05, "loss": 11.8403, "step": 28606 }, { "epoch": 1.557765480250216, "grad_norm": 0.5216081875371917, "learning_rate": 2.4564857781108165e-05, "loss": 11.7289, "step": 28607 }, { "epoch": 1.557819934246799, "grad_norm": 0.508128508668198, "learning_rate": 2.4559069145283852e-05, "loss": 11.7655, "step": 28608 }, { "epoch": 1.557874388243382, "grad_norm": 0.5711385745905865, "learning_rate": 2.4553281096100888e-05, "loss": 11.7131, "step": 28609 }, { "epoch": 1.5579288422399653, "grad_norm": 0.6333255154336631, "learning_rate": 2.4547493633604235e-05, "loss": 11.7489, "step": 28610 }, { "epoch": 1.5579832962365483, "grad_norm": 0.5312599057644889, "learning_rate": 2.4541706757838957e-05, "loss": 11.7314, "step": 28611 }, { "epoch": 1.5580377502331313, "grad_norm": 0.5378413349580727, "learning_rate": 2.4535920468849993e-05, "loss": 11.8509, "step": 28612 }, { "epoch": 1.5580922042297143, "grad_norm": 0.5637246262646303, "learning_rate": 2.45301347666824e-05, "loss": 11.8517, "step": 28613 }, { "epoch": 1.5581466582262973, "grad_norm": 0.5419334294028164, "learning_rate": 2.4524349651381095e-05, "loss": 11.6795, "step": 28614 }, { "epoch": 1.5582011122228803, "grad_norm": 0.6493976749345264, "learning_rate": 2.4518565122991112e-05, "loss": 11.8643, "step": 28615 }, { "epoch": 1.5582555662194633, "grad_norm": 0.5473265317011902, "learning_rate": 2.4512781181557455e-05, "loss": 11.6634, "step": 28616 }, { "epoch": 1.5583100202160463, "grad_norm": 0.5643660364536964, "learning_rate": 2.4506997827125067e-05, "loss": 11.879, "step": 28617 }, { "epoch": 1.5583644742126292, "grad_norm": 0.5333052829511333, "learning_rate": 2.4501215059738902e-05, "loss": 11.8666, "step": 28618 }, { "epoch": 1.5584189282092122, "grad_norm": 0.533843454397603, "learning_rate": 2.449543287944397e-05, "loss": 11.7767, "step": 28619 }, { "epoch": 1.5584733822057952, "grad_norm": 0.5117656452129317, "learning_rate": 2.4489651286285186e-05, "loss": 11.8108, "step": 28620 }, { "epoch": 1.5585278362023782, "grad_norm": 0.5018256139928625, "learning_rate": 2.4483870280307576e-05, "loss": 11.7305, "step": 28621 }, { "epoch": 1.5585822901989612, "grad_norm": 0.5154778160752138, "learning_rate": 2.4478089861556007e-05, "loss": 11.7754, "step": 28622 }, { "epoch": 1.5586367441955442, "grad_norm": 0.5008525101542722, "learning_rate": 2.4472310030075518e-05, "loss": 11.9024, "step": 28623 }, { "epoch": 1.5586911981921272, "grad_norm": 0.5203044454204292, "learning_rate": 2.446653078591099e-05, "loss": 11.8236, "step": 28624 }, { "epoch": 1.5587456521887102, "grad_norm": 0.5297525290030621, "learning_rate": 2.4460752129107377e-05, "loss": 11.865, "step": 28625 }, { "epoch": 1.5588001061852932, "grad_norm": 0.5086130964742646, "learning_rate": 2.4454974059709644e-05, "loss": 11.7829, "step": 28626 }, { "epoch": 1.5588545601818764, "grad_norm": 0.5712787289226434, "learning_rate": 2.444919657776269e-05, "loss": 11.8218, "step": 28627 }, { "epoch": 1.5589090141784594, "grad_norm": 0.5381708369746944, "learning_rate": 2.4443419683311496e-05, "loss": 11.9287, "step": 28628 }, { "epoch": 1.5589634681750424, "grad_norm": 0.5295786602324816, "learning_rate": 2.4437643376400888e-05, "loss": 11.8455, "step": 28629 }, { "epoch": 1.5590179221716254, "grad_norm": 0.5203389601693851, "learning_rate": 2.4431867657075846e-05, "loss": 11.8296, "step": 28630 }, { "epoch": 1.5590723761682084, "grad_norm": 0.6216417078768269, "learning_rate": 2.4426092525381294e-05, "loss": 11.8956, "step": 28631 }, { "epoch": 1.5591268301647914, "grad_norm": 0.5659958596684848, "learning_rate": 2.4420317981362094e-05, "loss": 11.8099, "step": 28632 }, { "epoch": 1.5591812841613746, "grad_norm": 0.5445559846984509, "learning_rate": 2.4414544025063202e-05, "loss": 11.8889, "step": 28633 }, { "epoch": 1.5592357381579576, "grad_norm": 0.5817520990401447, "learning_rate": 2.4408770656529468e-05, "loss": 11.9012, "step": 28634 }, { "epoch": 1.5592901921545406, "grad_norm": 0.5697807847156862, "learning_rate": 2.440299787580583e-05, "loss": 11.825, "step": 28635 }, { "epoch": 1.5593446461511236, "grad_norm": 0.524748583111397, "learning_rate": 2.439722568293713e-05, "loss": 11.9108, "step": 28636 }, { "epoch": 1.5593991001477066, "grad_norm": 0.5784674837697873, "learning_rate": 2.439145407796829e-05, "loss": 11.7483, "step": 28637 }, { "epoch": 1.5594535541442895, "grad_norm": 0.5499970080038136, "learning_rate": 2.4385683060944244e-05, "loss": 11.6448, "step": 28638 }, { "epoch": 1.5595080081408725, "grad_norm": 0.5523570417335587, "learning_rate": 2.437991263190974e-05, "loss": 11.8614, "step": 28639 }, { "epoch": 1.5595624621374555, "grad_norm": 0.5371483363138093, "learning_rate": 2.4374142790909716e-05, "loss": 11.6866, "step": 28640 }, { "epoch": 1.5596169161340385, "grad_norm": 0.5225912993809458, "learning_rate": 2.4368373537989065e-05, "loss": 11.7559, "step": 28641 }, { "epoch": 1.5596713701306215, "grad_norm": 0.5885400975090556, "learning_rate": 2.4362604873192608e-05, "loss": 11.9619, "step": 28642 }, { "epoch": 1.5597258241272045, "grad_norm": 0.6045698927878795, "learning_rate": 2.4356836796565242e-05, "loss": 11.9076, "step": 28643 }, { "epoch": 1.5597802781237875, "grad_norm": 0.5977187037410151, "learning_rate": 2.4351069308151775e-05, "loss": 11.8175, "step": 28644 }, { "epoch": 1.5598347321203705, "grad_norm": 0.629120694430693, "learning_rate": 2.4345302407997106e-05, "loss": 11.7188, "step": 28645 }, { "epoch": 1.5598891861169535, "grad_norm": 0.5351879694628767, "learning_rate": 2.433953609614602e-05, "loss": 11.8421, "step": 28646 }, { "epoch": 1.5599436401135365, "grad_norm": 0.526824232359522, "learning_rate": 2.4333770372643382e-05, "loss": 11.8083, "step": 28647 }, { "epoch": 1.5599980941101195, "grad_norm": 0.5323823091066532, "learning_rate": 2.4328005237534112e-05, "loss": 11.7918, "step": 28648 }, { "epoch": 1.5600525481067025, "grad_norm": 0.5136817842203153, "learning_rate": 2.4322240690862896e-05, "loss": 11.8293, "step": 28649 }, { "epoch": 1.5601070021032855, "grad_norm": 0.6024753448170723, "learning_rate": 2.4316476732674654e-05, "loss": 11.8917, "step": 28650 }, { "epoch": 1.5601614560998687, "grad_norm": 0.6061125754433051, "learning_rate": 2.4310713363014147e-05, "loss": 11.8552, "step": 28651 }, { "epoch": 1.5602159100964517, "grad_norm": 0.5350605569422646, "learning_rate": 2.4304950581926246e-05, "loss": 11.7366, "step": 28652 }, { "epoch": 1.5602703640930347, "grad_norm": 0.5338812927349402, "learning_rate": 2.429918838945575e-05, "loss": 11.6912, "step": 28653 }, { "epoch": 1.5603248180896176, "grad_norm": 0.5487878983649543, "learning_rate": 2.4293426785647444e-05, "loss": 11.8467, "step": 28654 }, { "epoch": 1.5603792720862006, "grad_norm": 0.6095213129678665, "learning_rate": 2.4287665770546176e-05, "loss": 11.8893, "step": 28655 }, { "epoch": 1.5604337260827836, "grad_norm": 0.5485124508683139, "learning_rate": 2.4281905344196686e-05, "loss": 11.8722, "step": 28656 }, { "epoch": 1.5604881800793668, "grad_norm": 0.5454587961981922, "learning_rate": 2.4276145506643822e-05, "loss": 11.8044, "step": 28657 }, { "epoch": 1.5605426340759498, "grad_norm": 0.5391338900803166, "learning_rate": 2.4270386257932353e-05, "loss": 11.9079, "step": 28658 }, { "epoch": 1.5605970880725328, "grad_norm": 0.5377075082659929, "learning_rate": 2.4264627598107025e-05, "loss": 11.8558, "step": 28659 }, { "epoch": 1.5606515420691158, "grad_norm": 0.5359101307237248, "learning_rate": 2.425886952721269e-05, "loss": 11.8104, "step": 28660 }, { "epoch": 1.5607059960656988, "grad_norm": 0.5198996882303529, "learning_rate": 2.4253112045294056e-05, "loss": 11.7534, "step": 28661 }, { "epoch": 1.5607604500622818, "grad_norm": 0.6005535960545674, "learning_rate": 2.4247355152395933e-05, "loss": 11.753, "step": 28662 }, { "epoch": 1.5608149040588648, "grad_norm": 0.5374859344460433, "learning_rate": 2.4241598848563095e-05, "loss": 11.8552, "step": 28663 }, { "epoch": 1.5608693580554478, "grad_norm": 0.5435563746272594, "learning_rate": 2.4235843133840276e-05, "loss": 11.8205, "step": 28664 }, { "epoch": 1.5609238120520308, "grad_norm": 0.514249889739879, "learning_rate": 2.4230088008272267e-05, "loss": 11.8019, "step": 28665 }, { "epoch": 1.5609782660486138, "grad_norm": 0.5519709215934762, "learning_rate": 2.422433347190377e-05, "loss": 11.8763, "step": 28666 }, { "epoch": 1.5610327200451968, "grad_norm": 0.6500519150146741, "learning_rate": 2.4218579524779596e-05, "loss": 11.8816, "step": 28667 }, { "epoch": 1.5610871740417798, "grad_norm": 0.540962738663549, "learning_rate": 2.4212826166944445e-05, "loss": 11.7951, "step": 28668 }, { "epoch": 1.5611416280383628, "grad_norm": 0.5331272631238472, "learning_rate": 2.420707339844305e-05, "loss": 11.7081, "step": 28669 }, { "epoch": 1.5611960820349458, "grad_norm": 0.5311968185544254, "learning_rate": 2.4201321219320194e-05, "loss": 11.8584, "step": 28670 }, { "epoch": 1.5612505360315287, "grad_norm": 0.5329710549646063, "learning_rate": 2.419556962962054e-05, "loss": 11.7512, "step": 28671 }, { "epoch": 1.5613049900281117, "grad_norm": 0.5494203517761478, "learning_rate": 2.4189818629388884e-05, "loss": 11.9823, "step": 28672 }, { "epoch": 1.5613594440246947, "grad_norm": 0.5331030500575514, "learning_rate": 2.4184068218669877e-05, "loss": 11.7479, "step": 28673 }, { "epoch": 1.561413898021278, "grad_norm": 0.565972514856663, "learning_rate": 2.417831839750827e-05, "loss": 11.8592, "step": 28674 }, { "epoch": 1.561468352017861, "grad_norm": 0.5680900215380795, "learning_rate": 2.41725691659488e-05, "loss": 12.0057, "step": 28675 }, { "epoch": 1.561522806014444, "grad_norm": 0.5255724644831455, "learning_rate": 2.416682052403613e-05, "loss": 11.8153, "step": 28676 }, { "epoch": 1.561577260011027, "grad_norm": 0.5279055876700216, "learning_rate": 2.4161072471814995e-05, "loss": 11.8486, "step": 28677 }, { "epoch": 1.56163171400761, "grad_norm": 0.595591677256108, "learning_rate": 2.415532500933009e-05, "loss": 11.8528, "step": 28678 }, { "epoch": 1.561686168004193, "grad_norm": 0.5061505186200804, "learning_rate": 2.4149578136626062e-05, "loss": 11.7846, "step": 28679 }, { "epoch": 1.5617406220007761, "grad_norm": 0.5060773506766636, "learning_rate": 2.4143831853747668e-05, "loss": 11.8524, "step": 28680 }, { "epoch": 1.5617950759973591, "grad_norm": 0.584715774655512, "learning_rate": 2.4138086160739525e-05, "loss": 11.7046, "step": 28681 }, { "epoch": 1.561849529993942, "grad_norm": 0.6205679596133787, "learning_rate": 2.413234105764638e-05, "loss": 11.8545, "step": 28682 }, { "epoch": 1.561903983990525, "grad_norm": 0.558499851873605, "learning_rate": 2.4126596544512846e-05, "loss": 11.7731, "step": 28683 }, { "epoch": 1.561958437987108, "grad_norm": 0.49533448777764894, "learning_rate": 2.4120852621383648e-05, "loss": 11.9185, "step": 28684 }, { "epoch": 1.562012891983691, "grad_norm": 0.5125488555076525, "learning_rate": 2.4115109288303395e-05, "loss": 11.7386, "step": 28685 }, { "epoch": 1.562067345980274, "grad_norm": 0.5737346535227084, "learning_rate": 2.410936654531678e-05, "loss": 11.8959, "step": 28686 }, { "epoch": 1.562121799976857, "grad_norm": 0.5614848846577719, "learning_rate": 2.410362439246848e-05, "loss": 11.6778, "step": 28687 }, { "epoch": 1.56217625397344, "grad_norm": 0.5149510034252667, "learning_rate": 2.409788282980313e-05, "loss": 11.7197, "step": 28688 }, { "epoch": 1.562230707970023, "grad_norm": 0.5971872967791887, "learning_rate": 2.4092141857365348e-05, "loss": 11.8822, "step": 28689 }, { "epoch": 1.562285161966606, "grad_norm": 0.5987782035748999, "learning_rate": 2.408640147519983e-05, "loss": 11.716, "step": 28690 }, { "epoch": 1.562339615963189, "grad_norm": 0.5242083412362638, "learning_rate": 2.4080661683351147e-05, "loss": 11.7833, "step": 28691 }, { "epoch": 1.562394069959772, "grad_norm": 0.6180979334920832, "learning_rate": 2.4074922481864005e-05, "loss": 11.8354, "step": 28692 }, { "epoch": 1.562448523956355, "grad_norm": 0.5467262921957935, "learning_rate": 2.4069183870782962e-05, "loss": 11.8422, "step": 28693 }, { "epoch": 1.562502977952938, "grad_norm": 0.5297791975345009, "learning_rate": 2.406344585015271e-05, "loss": 11.7195, "step": 28694 }, { "epoch": 1.562557431949521, "grad_norm": 0.5166941071554032, "learning_rate": 2.405770842001781e-05, "loss": 11.7458, "step": 28695 }, { "epoch": 1.562611885946104, "grad_norm": 0.5626793895017317, "learning_rate": 2.4051971580422917e-05, "loss": 11.898, "step": 28696 }, { "epoch": 1.5626663399426872, "grad_norm": 0.5460906787316868, "learning_rate": 2.4046235331412648e-05, "loss": 11.8376, "step": 28697 }, { "epoch": 1.5627207939392702, "grad_norm": 0.5257022270095185, "learning_rate": 2.404049967303158e-05, "loss": 11.8347, "step": 28698 }, { "epoch": 1.5627752479358532, "grad_norm": 0.5000313753235595, "learning_rate": 2.4034764605324344e-05, "loss": 11.8122, "step": 28699 }, { "epoch": 1.5628297019324362, "grad_norm": 0.5569986287852403, "learning_rate": 2.402903012833547e-05, "loss": 11.793, "step": 28700 }, { "epoch": 1.5628841559290192, "grad_norm": 0.5336683536513435, "learning_rate": 2.4023296242109616e-05, "loss": 11.8115, "step": 28701 }, { "epoch": 1.5629386099256022, "grad_norm": 0.5475684687673211, "learning_rate": 2.4017562946691376e-05, "loss": 11.9115, "step": 28702 }, { "epoch": 1.5629930639221854, "grad_norm": 0.5337789947071677, "learning_rate": 2.4011830242125277e-05, "loss": 11.8374, "step": 28703 }, { "epoch": 1.5630475179187684, "grad_norm": 0.5904045267095109, "learning_rate": 2.4006098128455957e-05, "loss": 11.8028, "step": 28704 }, { "epoch": 1.5631019719153514, "grad_norm": 0.5083725581979951, "learning_rate": 2.400036660572793e-05, "loss": 11.8354, "step": 28705 }, { "epoch": 1.5631564259119344, "grad_norm": 0.5801147322330663, "learning_rate": 2.3994635673985832e-05, "loss": 11.8596, "step": 28706 }, { "epoch": 1.5632108799085174, "grad_norm": 0.6197128490651064, "learning_rate": 2.398890533327416e-05, "loss": 11.724, "step": 28707 }, { "epoch": 1.5632653339051004, "grad_norm": 0.509327581039951, "learning_rate": 2.398317558363754e-05, "loss": 11.7433, "step": 28708 }, { "epoch": 1.5633197879016834, "grad_norm": 0.5578360607636319, "learning_rate": 2.3977446425120498e-05, "loss": 11.8905, "step": 28709 }, { "epoch": 1.5633742418982663, "grad_norm": 0.568355764591363, "learning_rate": 2.397171785776755e-05, "loss": 11.8355, "step": 28710 }, { "epoch": 1.5634286958948493, "grad_norm": 0.4980275299516433, "learning_rate": 2.3965989881623263e-05, "loss": 11.7844, "step": 28711 }, { "epoch": 1.5634831498914323, "grad_norm": 0.541978878698937, "learning_rate": 2.3960262496732234e-05, "loss": 11.8814, "step": 28712 }, { "epoch": 1.5635376038880153, "grad_norm": 0.501969711216694, "learning_rate": 2.3954535703138914e-05, "loss": 11.9145, "step": 28713 }, { "epoch": 1.5635920578845983, "grad_norm": 0.5280477172839124, "learning_rate": 2.3948809500887914e-05, "loss": 11.7605, "step": 28714 }, { "epoch": 1.5636465118811813, "grad_norm": 0.5200954490724421, "learning_rate": 2.39430838900237e-05, "loss": 11.7397, "step": 28715 }, { "epoch": 1.5637009658777643, "grad_norm": 0.5243192315248342, "learning_rate": 2.3937358870590842e-05, "loss": 11.9174, "step": 28716 }, { "epoch": 1.5637554198743473, "grad_norm": 0.5686220440936848, "learning_rate": 2.393163444263381e-05, "loss": 11.9139, "step": 28717 }, { "epoch": 1.5638098738709303, "grad_norm": 0.5883168712787715, "learning_rate": 2.392591060619718e-05, "loss": 11.861, "step": 28718 }, { "epoch": 1.5638643278675133, "grad_norm": 0.5133332182405562, "learning_rate": 2.392018736132542e-05, "loss": 11.7445, "step": 28719 }, { "epoch": 1.5639187818640963, "grad_norm": 0.5413224039592875, "learning_rate": 2.391446470806301e-05, "loss": 11.9039, "step": 28720 }, { "epoch": 1.5639732358606795, "grad_norm": 0.5726298650546662, "learning_rate": 2.3908742646454517e-05, "loss": 11.8146, "step": 28721 }, { "epoch": 1.5640276898572625, "grad_norm": 0.5707113450541269, "learning_rate": 2.3903021176544372e-05, "loss": 11.8959, "step": 28722 }, { "epoch": 1.5640821438538455, "grad_norm": 0.5102116110964123, "learning_rate": 2.389730029837709e-05, "loss": 11.8204, "step": 28723 }, { "epoch": 1.5641365978504285, "grad_norm": 0.5654603646911585, "learning_rate": 2.3891580011997194e-05, "loss": 11.7731, "step": 28724 }, { "epoch": 1.5641910518470115, "grad_norm": 0.5402573401043785, "learning_rate": 2.3885860317449116e-05, "loss": 11.7931, "step": 28725 }, { "epoch": 1.5642455058435947, "grad_norm": 0.5231421169027981, "learning_rate": 2.3880141214777375e-05, "loss": 11.8217, "step": 28726 }, { "epoch": 1.5642999598401777, "grad_norm": 0.5214264989303993, "learning_rate": 2.387442270402639e-05, "loss": 11.8492, "step": 28727 }, { "epoch": 1.5643544138367607, "grad_norm": 0.5118071365436173, "learning_rate": 2.3868704785240693e-05, "loss": 11.8548, "step": 28728 }, { "epoch": 1.5644088678333437, "grad_norm": 0.5094263838292173, "learning_rate": 2.3862987458464704e-05, "loss": 11.8949, "step": 28729 }, { "epoch": 1.5644633218299266, "grad_norm": 0.5738672324059615, "learning_rate": 2.3857270723742874e-05, "loss": 11.8904, "step": 28730 }, { "epoch": 1.5645177758265096, "grad_norm": 0.5148932033644436, "learning_rate": 2.3851554581119708e-05, "loss": 11.8232, "step": 28731 }, { "epoch": 1.5645722298230926, "grad_norm": 0.5352406901516237, "learning_rate": 2.3845839030639582e-05, "loss": 11.822, "step": 28732 }, { "epoch": 1.5646266838196756, "grad_norm": 0.49418822148510005, "learning_rate": 2.3840124072347027e-05, "loss": 11.8528, "step": 28733 }, { "epoch": 1.5646811378162586, "grad_norm": 0.5375250399413598, "learning_rate": 2.3834409706286397e-05, "loss": 11.7332, "step": 28734 }, { "epoch": 1.5647355918128416, "grad_norm": 0.58188569843298, "learning_rate": 2.3828695932502177e-05, "loss": 11.8946, "step": 28735 }, { "epoch": 1.5647900458094246, "grad_norm": 0.5830529094803893, "learning_rate": 2.3822982751038825e-05, "loss": 11.894, "step": 28736 }, { "epoch": 1.5648444998060076, "grad_norm": 0.5446066079976716, "learning_rate": 2.3817270161940696e-05, "loss": 11.9443, "step": 28737 }, { "epoch": 1.5648989538025906, "grad_norm": 0.5943059947098568, "learning_rate": 2.381155816525228e-05, "loss": 11.8442, "step": 28738 }, { "epoch": 1.5649534077991736, "grad_norm": 0.5221723910675296, "learning_rate": 2.380584676101797e-05, "loss": 11.7763, "step": 28739 }, { "epoch": 1.5650078617957566, "grad_norm": 0.5839712836393786, "learning_rate": 2.380013594928214e-05, "loss": 11.8905, "step": 28740 }, { "epoch": 1.5650623157923396, "grad_norm": 0.5390609129932482, "learning_rate": 2.3794425730089263e-05, "loss": 11.8606, "step": 28741 }, { "epoch": 1.5651167697889226, "grad_norm": 0.507204504875137, "learning_rate": 2.3788716103483677e-05, "loss": 11.7389, "step": 28742 }, { "epoch": 1.5651712237855055, "grad_norm": 0.5677803797688926, "learning_rate": 2.3783007069509856e-05, "loss": 11.9405, "step": 28743 }, { "epoch": 1.5652256777820888, "grad_norm": 0.48342845713468824, "learning_rate": 2.377729862821212e-05, "loss": 11.7874, "step": 28744 }, { "epoch": 1.5652801317786718, "grad_norm": 0.5616391977923335, "learning_rate": 2.377159077963489e-05, "loss": 11.9251, "step": 28745 }, { "epoch": 1.5653345857752547, "grad_norm": 0.5282233862160536, "learning_rate": 2.3765883523822575e-05, "loss": 11.9397, "step": 28746 }, { "epoch": 1.5653890397718377, "grad_norm": 0.5284359597713528, "learning_rate": 2.3760176860819517e-05, "loss": 11.7287, "step": 28747 }, { "epoch": 1.5654434937684207, "grad_norm": 0.5204217061484021, "learning_rate": 2.3754470790670158e-05, "loss": 11.9262, "step": 28748 }, { "epoch": 1.5654979477650037, "grad_norm": 0.5371181748420035, "learning_rate": 2.3748765313418765e-05, "loss": 11.8768, "step": 28749 }, { "epoch": 1.565552401761587, "grad_norm": 0.5137552902370959, "learning_rate": 2.3743060429109765e-05, "loss": 11.727, "step": 28750 }, { "epoch": 1.56560685575817, "grad_norm": 0.5572033761507299, "learning_rate": 2.3737356137787548e-05, "loss": 11.8423, "step": 28751 }, { "epoch": 1.565661309754753, "grad_norm": 0.5129204105216301, "learning_rate": 2.3731652439496398e-05, "loss": 11.7398, "step": 28752 }, { "epoch": 1.565715763751336, "grad_norm": 0.6097920625736223, "learning_rate": 2.372594933428074e-05, "loss": 11.8881, "step": 28753 }, { "epoch": 1.565770217747919, "grad_norm": 0.5645443131079321, "learning_rate": 2.3720246822184866e-05, "loss": 11.824, "step": 28754 }, { "epoch": 1.565824671744502, "grad_norm": 0.5537955200202854, "learning_rate": 2.371454490325318e-05, "loss": 11.908, "step": 28755 }, { "epoch": 1.565879125741085, "grad_norm": 0.5893560843152212, "learning_rate": 2.370884357752995e-05, "loss": 11.9121, "step": 28756 }, { "epoch": 1.565933579737668, "grad_norm": 0.5274576306479433, "learning_rate": 2.3703142845059545e-05, "loss": 11.8951, "step": 28757 }, { "epoch": 1.5659880337342509, "grad_norm": 0.540132917346285, "learning_rate": 2.369744270588635e-05, "loss": 11.791, "step": 28758 }, { "epoch": 1.5660424877308339, "grad_norm": 0.5511282991532422, "learning_rate": 2.369174316005459e-05, "loss": 11.7187, "step": 28759 }, { "epoch": 1.5660969417274169, "grad_norm": 0.5231365561063569, "learning_rate": 2.3686044207608626e-05, "loss": 11.8112, "step": 28760 }, { "epoch": 1.5661513957239999, "grad_norm": 0.48191017846474765, "learning_rate": 2.368034584859281e-05, "loss": 11.8063, "step": 28761 }, { "epoch": 1.5662058497205829, "grad_norm": 0.5128220263647797, "learning_rate": 2.3674648083051387e-05, "loss": 11.7613, "step": 28762 }, { "epoch": 1.5662603037171658, "grad_norm": 0.5198616359674489, "learning_rate": 2.3668950911028743e-05, "loss": 11.8377, "step": 28763 }, { "epoch": 1.5663147577137488, "grad_norm": 0.5331805507713756, "learning_rate": 2.3663254332569094e-05, "loss": 11.8622, "step": 28764 }, { "epoch": 1.5663692117103318, "grad_norm": 0.5312240906252309, "learning_rate": 2.3657558347716824e-05, "loss": 11.7363, "step": 28765 }, { "epoch": 1.5664236657069148, "grad_norm": 0.5669592320093093, "learning_rate": 2.3651862956516146e-05, "loss": 11.8808, "step": 28766 }, { "epoch": 1.566478119703498, "grad_norm": 0.5556726740596933, "learning_rate": 2.3646168159011418e-05, "loss": 11.6917, "step": 28767 }, { "epoch": 1.566532573700081, "grad_norm": 0.6241357849272708, "learning_rate": 2.3640473955246856e-05, "loss": 11.6563, "step": 28768 }, { "epoch": 1.566587027696664, "grad_norm": 0.5384150007817814, "learning_rate": 2.3634780345266806e-05, "loss": 11.8801, "step": 28769 }, { "epoch": 1.566641481693247, "grad_norm": 0.6633948331576193, "learning_rate": 2.3629087329115517e-05, "loss": 11.907, "step": 28770 }, { "epoch": 1.56669593568983, "grad_norm": 0.5392172734467195, "learning_rate": 2.3623394906837216e-05, "loss": 11.8092, "step": 28771 }, { "epoch": 1.566750389686413, "grad_norm": 0.5751667334222652, "learning_rate": 2.3617703078476204e-05, "loss": 11.7699, "step": 28772 }, { "epoch": 1.5668048436829962, "grad_norm": 0.5564432575087046, "learning_rate": 2.3612011844076776e-05, "loss": 11.7863, "step": 28773 }, { "epoch": 1.5668592976795792, "grad_norm": 0.555151398696221, "learning_rate": 2.3606321203683125e-05, "loss": 11.758, "step": 28774 }, { "epoch": 1.5669137516761622, "grad_norm": 0.5823342213344037, "learning_rate": 2.360063115733956e-05, "loss": 11.786, "step": 28775 }, { "epoch": 1.5669682056727452, "grad_norm": 0.5084271925018297, "learning_rate": 2.359494170509027e-05, "loss": 11.8565, "step": 28776 }, { "epoch": 1.5670226596693282, "grad_norm": 0.5624785039688724, "learning_rate": 2.3589252846979547e-05, "loss": 11.8468, "step": 28777 }, { "epoch": 1.5670771136659112, "grad_norm": 0.5853940280131675, "learning_rate": 2.3583564583051586e-05, "loss": 11.8713, "step": 28778 }, { "epoch": 1.5671315676624942, "grad_norm": 0.5133279565725016, "learning_rate": 2.357787691335067e-05, "loss": 11.7816, "step": 28779 }, { "epoch": 1.5671860216590772, "grad_norm": 0.6258315850356592, "learning_rate": 2.3572189837920998e-05, "loss": 11.7263, "step": 28780 }, { "epoch": 1.5672404756556602, "grad_norm": 0.5319551282542647, "learning_rate": 2.3566503356806768e-05, "loss": 11.7758, "step": 28781 }, { "epoch": 1.5672949296522432, "grad_norm": 0.5462647364412913, "learning_rate": 2.3560817470052253e-05, "loss": 11.8098, "step": 28782 }, { "epoch": 1.5673493836488261, "grad_norm": 0.5138441339635628, "learning_rate": 2.3555132177701612e-05, "loss": 11.8368, "step": 28783 }, { "epoch": 1.5674038376454091, "grad_norm": 0.5074516251173836, "learning_rate": 2.3549447479799093e-05, "loss": 11.8303, "step": 28784 }, { "epoch": 1.5674582916419921, "grad_norm": 0.5123121406713322, "learning_rate": 2.3543763376388904e-05, "loss": 11.8151, "step": 28785 }, { "epoch": 1.5675127456385751, "grad_norm": 0.5743749465006065, "learning_rate": 2.3538079867515217e-05, "loss": 11.8573, "step": 28786 }, { "epoch": 1.5675671996351581, "grad_norm": 0.5419491477759231, "learning_rate": 2.3532396953222257e-05, "loss": 11.8738, "step": 28787 }, { "epoch": 1.567621653631741, "grad_norm": 0.5100219492146478, "learning_rate": 2.352671463355418e-05, "loss": 11.8024, "step": 28788 }, { "epoch": 1.567676107628324, "grad_norm": 0.6024295216855875, "learning_rate": 2.352103290855523e-05, "loss": 11.8999, "step": 28789 }, { "epoch": 1.567730561624907, "grad_norm": 0.5244918516681831, "learning_rate": 2.3515351778269547e-05, "loss": 11.8695, "step": 28790 }, { "epoch": 1.5677850156214903, "grad_norm": 0.5251132217855241, "learning_rate": 2.3509671242741293e-05, "loss": 11.8705, "step": 28791 }, { "epoch": 1.5678394696180733, "grad_norm": 0.5617429267671153, "learning_rate": 2.3503991302014682e-05, "loss": 11.7922, "step": 28792 }, { "epoch": 1.5678939236146563, "grad_norm": 0.5013374606752228, "learning_rate": 2.3498311956133845e-05, "loss": 11.8182, "step": 28793 }, { "epoch": 1.5679483776112393, "grad_norm": 0.529311636265472, "learning_rate": 2.3492633205142957e-05, "loss": 11.8551, "step": 28794 }, { "epoch": 1.5680028316078223, "grad_norm": 0.5522262472677143, "learning_rate": 2.348695504908621e-05, "loss": 11.8119, "step": 28795 }, { "epoch": 1.5680572856044055, "grad_norm": 0.5637816975019991, "learning_rate": 2.34812774880077e-05, "loss": 11.9208, "step": 28796 }, { "epoch": 1.5681117396009885, "grad_norm": 0.5124110850407693, "learning_rate": 2.3475600521951646e-05, "loss": 11.8226, "step": 28797 }, { "epoch": 1.5681661935975715, "grad_norm": 0.5061190377644172, "learning_rate": 2.346992415096213e-05, "loss": 11.7736, "step": 28798 }, { "epoch": 1.5682206475941545, "grad_norm": 0.5952185542731735, "learning_rate": 2.3464248375083343e-05, "loss": 11.9282, "step": 28799 }, { "epoch": 1.5682751015907375, "grad_norm": 0.5735068389300485, "learning_rate": 2.345857319435939e-05, "loss": 11.9166, "step": 28800 }, { "epoch": 1.5683295555873205, "grad_norm": 0.5137494152933586, "learning_rate": 2.3452898608834385e-05, "loss": 11.8012, "step": 28801 }, { "epoch": 1.5683840095839034, "grad_norm": 0.5625967930993502, "learning_rate": 2.3447224618552498e-05, "loss": 11.8578, "step": 28802 }, { "epoch": 1.5684384635804864, "grad_norm": 0.5146389222455794, "learning_rate": 2.344155122355781e-05, "loss": 11.8517, "step": 28803 }, { "epoch": 1.5684929175770694, "grad_norm": 0.5264964698827507, "learning_rate": 2.3435878423894488e-05, "loss": 11.7719, "step": 28804 }, { "epoch": 1.5685473715736524, "grad_norm": 0.5464145866372988, "learning_rate": 2.3430206219606578e-05, "loss": 11.7619, "step": 28805 }, { "epoch": 1.5686018255702354, "grad_norm": 0.5585292284540881, "learning_rate": 2.3424534610738235e-05, "loss": 11.8803, "step": 28806 }, { "epoch": 1.5686562795668184, "grad_norm": 0.5819457914709306, "learning_rate": 2.3418863597333573e-05, "loss": 11.8771, "step": 28807 }, { "epoch": 1.5687107335634014, "grad_norm": 0.5685507530431541, "learning_rate": 2.3413193179436644e-05, "loss": 11.8529, "step": 28808 }, { "epoch": 1.5687651875599844, "grad_norm": 0.6031635409779674, "learning_rate": 2.34075233570916e-05, "loss": 11.8167, "step": 28809 }, { "epoch": 1.5688196415565674, "grad_norm": 0.5577260190987443, "learning_rate": 2.340185413034249e-05, "loss": 11.9123, "step": 28810 }, { "epoch": 1.5688740955531504, "grad_norm": 0.5429463809666294, "learning_rate": 2.3396185499233393e-05, "loss": 11.7829, "step": 28811 }, { "epoch": 1.5689285495497334, "grad_norm": 0.5210444587902235, "learning_rate": 2.3390517463808427e-05, "loss": 11.7514, "step": 28812 }, { "epoch": 1.5689830035463164, "grad_norm": 0.562737098318813, "learning_rate": 2.3384850024111616e-05, "loss": 11.8266, "step": 28813 }, { "epoch": 1.5690374575428996, "grad_norm": 0.6035665160900674, "learning_rate": 2.337918318018708e-05, "loss": 11.8448, "step": 28814 }, { "epoch": 1.5690919115394826, "grad_norm": 0.6524794324622821, "learning_rate": 2.3373516932078843e-05, "loss": 11.9193, "step": 28815 }, { "epoch": 1.5691463655360656, "grad_norm": 0.5289140152754468, "learning_rate": 2.3367851279830988e-05, "loss": 11.5499, "step": 28816 }, { "epoch": 1.5692008195326486, "grad_norm": 0.615430679097053, "learning_rate": 2.3362186223487603e-05, "loss": 11.8119, "step": 28817 }, { "epoch": 1.5692552735292316, "grad_norm": 0.5461840473533941, "learning_rate": 2.3356521763092687e-05, "loss": 11.7733, "step": 28818 }, { "epoch": 1.5693097275258145, "grad_norm": 0.5904324219559542, "learning_rate": 2.335085789869036e-05, "loss": 11.8883, "step": 28819 }, { "epoch": 1.5693641815223978, "grad_norm": 0.5416018914944514, "learning_rate": 2.3345194630324563e-05, "loss": 11.7112, "step": 28820 }, { "epoch": 1.5694186355189808, "grad_norm": 0.5592029661413549, "learning_rate": 2.3339531958039384e-05, "loss": 11.8843, "step": 28821 }, { "epoch": 1.5694730895155637, "grad_norm": 0.6197204075331203, "learning_rate": 2.333386988187889e-05, "loss": 11.918, "step": 28822 }, { "epoch": 1.5695275435121467, "grad_norm": 0.5527706793530026, "learning_rate": 2.3328208401887054e-05, "loss": 11.8334, "step": 28823 }, { "epoch": 1.5695819975087297, "grad_norm": 0.5986971658936749, "learning_rate": 2.332254751810795e-05, "loss": 11.815, "step": 28824 }, { "epoch": 1.5696364515053127, "grad_norm": 0.54743412591157, "learning_rate": 2.3316887230585548e-05, "loss": 11.7309, "step": 28825 }, { "epoch": 1.5696909055018957, "grad_norm": 0.5232788361556617, "learning_rate": 2.331122753936392e-05, "loss": 11.884, "step": 28826 }, { "epoch": 1.5697453594984787, "grad_norm": 0.5132268042937693, "learning_rate": 2.3305568444487026e-05, "loss": 11.7641, "step": 28827 }, { "epoch": 1.5697998134950617, "grad_norm": 0.5243162193204141, "learning_rate": 2.329990994599889e-05, "loss": 11.8505, "step": 28828 }, { "epoch": 1.5698542674916447, "grad_norm": 0.5632512571249487, "learning_rate": 2.3294252043943565e-05, "loss": 11.7132, "step": 28829 }, { "epoch": 1.5699087214882277, "grad_norm": 0.558958465573504, "learning_rate": 2.3288594738364955e-05, "loss": 11.7075, "step": 28830 }, { "epoch": 1.5699631754848107, "grad_norm": 0.5638982990361233, "learning_rate": 2.328293802930712e-05, "loss": 11.916, "step": 28831 }, { "epoch": 1.5700176294813937, "grad_norm": 0.5584317871891189, "learning_rate": 2.3277281916813998e-05, "loss": 11.8094, "step": 28832 }, { "epoch": 1.5700720834779767, "grad_norm": 0.534225741168446, "learning_rate": 2.3271626400929592e-05, "loss": 11.7414, "step": 28833 }, { "epoch": 1.5701265374745597, "grad_norm": 0.5439174665096898, "learning_rate": 2.3265971481697925e-05, "loss": 11.8063, "step": 28834 }, { "epoch": 1.5701809914711427, "grad_norm": 0.4844349434575177, "learning_rate": 2.3260317159162893e-05, "loss": 11.8095, "step": 28835 }, { "epoch": 1.5702354454677256, "grad_norm": 0.5011890767572778, "learning_rate": 2.325466343336854e-05, "loss": 11.7949, "step": 28836 }, { "epoch": 1.5702898994643089, "grad_norm": 0.5429282307084502, "learning_rate": 2.3249010304358767e-05, "loss": 11.8371, "step": 28837 }, { "epoch": 1.5703443534608919, "grad_norm": 0.5885668486888262, "learning_rate": 2.3243357772177587e-05, "loss": 11.8381, "step": 28838 }, { "epoch": 1.5703988074574748, "grad_norm": 0.516917885066894, "learning_rate": 2.3237705836868928e-05, "loss": 11.6619, "step": 28839 }, { "epoch": 1.5704532614540578, "grad_norm": 0.5392968565847012, "learning_rate": 2.323205449847671e-05, "loss": 11.8768, "step": 28840 }, { "epoch": 1.5705077154506408, "grad_norm": 0.5249524459624826, "learning_rate": 2.3226403757044936e-05, "loss": 11.8003, "step": 28841 }, { "epoch": 1.5705621694472238, "grad_norm": 0.5771607004204238, "learning_rate": 2.3220753612617495e-05, "loss": 11.9423, "step": 28842 }, { "epoch": 1.570616623443807, "grad_norm": 0.538561955421746, "learning_rate": 2.3215104065238357e-05, "loss": 11.784, "step": 28843 }, { "epoch": 1.57067107744039, "grad_norm": 0.5394213655913459, "learning_rate": 2.3209455114951474e-05, "loss": 11.8195, "step": 28844 }, { "epoch": 1.570725531436973, "grad_norm": 0.5664551279352269, "learning_rate": 2.3203806761800717e-05, "loss": 11.8103, "step": 28845 }, { "epoch": 1.570779985433556, "grad_norm": 0.5625213965904582, "learning_rate": 2.3198159005830055e-05, "loss": 11.8841, "step": 28846 }, { "epoch": 1.570834439430139, "grad_norm": 0.565816915360413, "learning_rate": 2.3192511847083375e-05, "loss": 11.7987, "step": 28847 }, { "epoch": 1.570888893426722, "grad_norm": 0.5448686503596304, "learning_rate": 2.318686528560462e-05, "loss": 11.6822, "step": 28848 }, { "epoch": 1.570943347423305, "grad_norm": 0.5553673942431313, "learning_rate": 2.3181219321437687e-05, "loss": 11.9259, "step": 28849 }, { "epoch": 1.570997801419888, "grad_norm": 0.5950616031474728, "learning_rate": 2.3175573954626452e-05, "loss": 11.8688, "step": 28850 }, { "epoch": 1.571052255416471, "grad_norm": 0.4919833826596297, "learning_rate": 2.3169929185214856e-05, "loss": 11.7691, "step": 28851 }, { "epoch": 1.571106709413054, "grad_norm": 0.5778741528414014, "learning_rate": 2.3164285013246755e-05, "loss": 11.8501, "step": 28852 }, { "epoch": 1.571161163409637, "grad_norm": 0.5534783101305234, "learning_rate": 2.3158641438766083e-05, "loss": 11.796, "step": 28853 }, { "epoch": 1.57121561740622, "grad_norm": 0.569115913555047, "learning_rate": 2.315299846181668e-05, "loss": 11.9566, "step": 28854 }, { "epoch": 1.571270071402803, "grad_norm": 0.5468155802080271, "learning_rate": 2.314735608244244e-05, "loss": 11.8586, "step": 28855 }, { "epoch": 1.571324525399386, "grad_norm": 0.5401501043180141, "learning_rate": 2.3141714300687278e-05, "loss": 11.8065, "step": 28856 }, { "epoch": 1.571378979395969, "grad_norm": 0.4796767885691581, "learning_rate": 2.3136073116595003e-05, "loss": 11.7506, "step": 28857 }, { "epoch": 1.571433433392552, "grad_norm": 0.5759294664512107, "learning_rate": 2.3130432530209545e-05, "loss": 11.776, "step": 28858 }, { "epoch": 1.571487887389135, "grad_norm": 0.5283198939236153, "learning_rate": 2.3124792541574737e-05, "loss": 11.7619, "step": 28859 }, { "epoch": 1.5715423413857181, "grad_norm": 0.5819955804357554, "learning_rate": 2.3119153150734397e-05, "loss": 11.9188, "step": 28860 }, { "epoch": 1.5715967953823011, "grad_norm": 0.5253213142452875, "learning_rate": 2.311351435773246e-05, "loss": 11.766, "step": 28861 }, { "epoch": 1.5716512493788841, "grad_norm": 0.5531674322889195, "learning_rate": 2.3107876162612684e-05, "loss": 11.8874, "step": 28862 }, { "epoch": 1.5717057033754671, "grad_norm": 0.5201859640964867, "learning_rate": 2.3102238565419e-05, "loss": 11.7146, "step": 28863 }, { "epoch": 1.57176015737205, "grad_norm": 0.5856481414476795, "learning_rate": 2.3096601566195163e-05, "loss": 11.9887, "step": 28864 }, { "epoch": 1.571814611368633, "grad_norm": 0.5451818598652349, "learning_rate": 2.3090965164985058e-05, "loss": 11.7176, "step": 28865 }, { "epoch": 1.5718690653652163, "grad_norm": 0.5155272465145542, "learning_rate": 2.308532936183253e-05, "loss": 11.7139, "step": 28866 }, { "epoch": 1.5719235193617993, "grad_norm": 0.5971233770583798, "learning_rate": 2.3079694156781352e-05, "loss": 11.9034, "step": 28867 }, { "epoch": 1.5719779733583823, "grad_norm": 0.5359770605652687, "learning_rate": 2.3074059549875393e-05, "loss": 11.9564, "step": 28868 }, { "epoch": 1.5720324273549653, "grad_norm": 0.6001300294057664, "learning_rate": 2.3068425541158456e-05, "loss": 11.7861, "step": 28869 }, { "epoch": 1.5720868813515483, "grad_norm": 0.49531358904600387, "learning_rate": 2.3062792130674314e-05, "loss": 11.7003, "step": 28870 }, { "epoch": 1.5721413353481313, "grad_norm": 0.588119631962856, "learning_rate": 2.3057159318466836e-05, "loss": 11.9108, "step": 28871 }, { "epoch": 1.5721957893447143, "grad_norm": 0.5981029271313729, "learning_rate": 2.305152710457975e-05, "loss": 11.7787, "step": 28872 }, { "epoch": 1.5722502433412973, "grad_norm": 0.5604200596126128, "learning_rate": 2.3045895489056934e-05, "loss": 11.8497, "step": 28873 }, { "epoch": 1.5723046973378803, "grad_norm": 0.5760121192842886, "learning_rate": 2.3040264471942108e-05, "loss": 11.7729, "step": 28874 }, { "epoch": 1.5723591513344632, "grad_norm": 0.5054891954410827, "learning_rate": 2.3034634053279115e-05, "loss": 11.691, "step": 28875 }, { "epoch": 1.5724136053310462, "grad_norm": 0.6305847167877786, "learning_rate": 2.3029004233111694e-05, "loss": 11.8834, "step": 28876 }, { "epoch": 1.5724680593276292, "grad_norm": 0.5231289623932586, "learning_rate": 2.3023375011483638e-05, "loss": 11.8407, "step": 28877 }, { "epoch": 1.5725225133242122, "grad_norm": 0.5354428442379003, "learning_rate": 2.3017746388438753e-05, "loss": 11.9022, "step": 28878 }, { "epoch": 1.5725769673207952, "grad_norm": 0.529924526298394, "learning_rate": 2.3012118364020787e-05, "loss": 11.8539, "step": 28879 }, { "epoch": 1.5726314213173782, "grad_norm": 0.49937226386890826, "learning_rate": 2.3006490938273507e-05, "loss": 11.773, "step": 28880 }, { "epoch": 1.5726858753139612, "grad_norm": 0.600246857127341, "learning_rate": 2.3000864111240627e-05, "loss": 11.9778, "step": 28881 }, { "epoch": 1.5727403293105442, "grad_norm": 0.5150061147575434, "learning_rate": 2.2995237882965936e-05, "loss": 11.7229, "step": 28882 }, { "epoch": 1.5727947833071272, "grad_norm": 0.5389269690688803, "learning_rate": 2.298961225349322e-05, "loss": 11.7591, "step": 28883 }, { "epoch": 1.5728492373037104, "grad_norm": 0.5496834218271008, "learning_rate": 2.2983987222866176e-05, "loss": 11.8051, "step": 28884 }, { "epoch": 1.5729036913002934, "grad_norm": 0.5408526547759474, "learning_rate": 2.2978362791128582e-05, "loss": 11.8004, "step": 28885 }, { "epoch": 1.5729581452968764, "grad_norm": 0.48644510699505733, "learning_rate": 2.2972738958324123e-05, "loss": 11.7983, "step": 28886 }, { "epoch": 1.5730125992934594, "grad_norm": 0.5125963878475169, "learning_rate": 2.29671157244966e-05, "loss": 11.7825, "step": 28887 }, { "epoch": 1.5730670532900424, "grad_norm": 0.5456381862501376, "learning_rate": 2.2961493089689677e-05, "loss": 11.8283, "step": 28888 }, { "epoch": 1.5731215072866254, "grad_norm": 0.5832132101324694, "learning_rate": 2.295587105394713e-05, "loss": 11.9061, "step": 28889 }, { "epoch": 1.5731759612832086, "grad_norm": 0.5117851731701568, "learning_rate": 2.2950249617312648e-05, "loss": 11.8274, "step": 28890 }, { "epoch": 1.5732304152797916, "grad_norm": 0.7988447022716587, "learning_rate": 2.2944628779829913e-05, "loss": 12.0, "step": 28891 }, { "epoch": 1.5732848692763746, "grad_norm": 0.5515940665282327, "learning_rate": 2.293900854154267e-05, "loss": 11.8128, "step": 28892 }, { "epoch": 1.5733393232729576, "grad_norm": 0.5458483583398712, "learning_rate": 2.2933388902494646e-05, "loss": 11.9411, "step": 28893 }, { "epoch": 1.5733937772695405, "grad_norm": 0.5000229674779554, "learning_rate": 2.2927769862729497e-05, "loss": 11.8226, "step": 28894 }, { "epoch": 1.5734482312661235, "grad_norm": 0.5421644200612199, "learning_rate": 2.292215142229095e-05, "loss": 11.9527, "step": 28895 }, { "epoch": 1.5735026852627065, "grad_norm": 0.48402343152433586, "learning_rate": 2.2916533581222655e-05, "loss": 11.7499, "step": 28896 }, { "epoch": 1.5735571392592895, "grad_norm": 0.5165782833986247, "learning_rate": 2.2910916339568354e-05, "loss": 11.8198, "step": 28897 }, { "epoch": 1.5736115932558725, "grad_norm": 0.5627621105234702, "learning_rate": 2.290529969737166e-05, "loss": 11.7158, "step": 28898 }, { "epoch": 1.5736660472524555, "grad_norm": 0.5331697384083023, "learning_rate": 2.289968365467632e-05, "loss": 11.7072, "step": 28899 }, { "epoch": 1.5737205012490385, "grad_norm": 0.5808663226201757, "learning_rate": 2.2894068211525964e-05, "loss": 11.8476, "step": 28900 }, { "epoch": 1.5737749552456215, "grad_norm": 0.5411353924438677, "learning_rate": 2.2888453367964235e-05, "loss": 11.8446, "step": 28901 }, { "epoch": 1.5738294092422045, "grad_norm": 0.5439941125721316, "learning_rate": 2.288283912403486e-05, "loss": 11.8777, "step": 28902 }, { "epoch": 1.5738838632387875, "grad_norm": 0.5354142726054871, "learning_rate": 2.2877225479781427e-05, "loss": 11.8446, "step": 28903 }, { "epoch": 1.5739383172353705, "grad_norm": 0.5238275096977838, "learning_rate": 2.2871612435247625e-05, "loss": 11.8189, "step": 28904 }, { "epoch": 1.5739927712319535, "grad_norm": 0.5314271423713879, "learning_rate": 2.2865999990477128e-05, "loss": 11.8378, "step": 28905 }, { "epoch": 1.5740472252285365, "grad_norm": 0.5406769231803211, "learning_rate": 2.286038814551351e-05, "loss": 11.7432, "step": 28906 }, { "epoch": 1.5741016792251197, "grad_norm": 0.5294794764373592, "learning_rate": 2.28547769004005e-05, "loss": 11.6909, "step": 28907 }, { "epoch": 1.5741561332217027, "grad_norm": 0.5633527580065322, "learning_rate": 2.284916625518163e-05, "loss": 11.7931, "step": 28908 }, { "epoch": 1.5742105872182857, "grad_norm": 0.5253651236233929, "learning_rate": 2.2843556209900628e-05, "loss": 11.8092, "step": 28909 }, { "epoch": 1.5742650412148687, "grad_norm": 0.537188452610232, "learning_rate": 2.283794676460107e-05, "loss": 11.9092, "step": 28910 }, { "epoch": 1.5743194952114516, "grad_norm": 0.5943887836728728, "learning_rate": 2.283233791932654e-05, "loss": 11.9433, "step": 28911 }, { "epoch": 1.5743739492080346, "grad_norm": 0.5744023067687126, "learning_rate": 2.2826729674120728e-05, "loss": 11.8958, "step": 28912 }, { "epoch": 1.5744284032046179, "grad_norm": 0.5141809582026872, "learning_rate": 2.2821122029027176e-05, "loss": 11.8696, "step": 28913 }, { "epoch": 1.5744828572012008, "grad_norm": 0.5748075044435534, "learning_rate": 2.2815514984089526e-05, "loss": 11.858, "step": 28914 }, { "epoch": 1.5745373111977838, "grad_norm": 0.560832134890702, "learning_rate": 2.28099085393514e-05, "loss": 11.8423, "step": 28915 }, { "epoch": 1.5745917651943668, "grad_norm": 0.5371437284242473, "learning_rate": 2.280430269485634e-05, "loss": 11.9207, "step": 28916 }, { "epoch": 1.5746462191909498, "grad_norm": 0.53380184172574, "learning_rate": 2.2798697450648e-05, "loss": 11.8518, "step": 28917 }, { "epoch": 1.5747006731875328, "grad_norm": 0.540136552633758, "learning_rate": 2.27930928067699e-05, "loss": 11.8117, "step": 28918 }, { "epoch": 1.5747551271841158, "grad_norm": 0.5279692252142438, "learning_rate": 2.2787488763265697e-05, "loss": 11.828, "step": 28919 }, { "epoch": 1.5748095811806988, "grad_norm": 0.5667267210614443, "learning_rate": 2.278188532017892e-05, "loss": 11.7413, "step": 28920 }, { "epoch": 1.5748640351772818, "grad_norm": 0.5641548462221883, "learning_rate": 2.2776282477553125e-05, "loss": 11.8776, "step": 28921 }, { "epoch": 1.5749184891738648, "grad_norm": 0.5165989644735156, "learning_rate": 2.277068023543194e-05, "loss": 11.7896, "step": 28922 }, { "epoch": 1.5749729431704478, "grad_norm": 0.5499267654843936, "learning_rate": 2.2765078593858868e-05, "loss": 11.8233, "step": 28923 }, { "epoch": 1.5750273971670308, "grad_norm": 0.5689880773549624, "learning_rate": 2.275947755287753e-05, "loss": 11.8807, "step": 28924 }, { "epoch": 1.5750818511636138, "grad_norm": 0.5704698987243558, "learning_rate": 2.275387711253142e-05, "loss": 11.9132, "step": 28925 }, { "epoch": 1.5751363051601968, "grad_norm": 0.5903502315610777, "learning_rate": 2.2748277272864106e-05, "loss": 11.8719, "step": 28926 }, { "epoch": 1.5751907591567798, "grad_norm": 0.5582185909323738, "learning_rate": 2.2742678033919175e-05, "loss": 11.83, "step": 28927 }, { "epoch": 1.5752452131533627, "grad_norm": 0.5236327125435509, "learning_rate": 2.2737079395740114e-05, "loss": 11.8483, "step": 28928 }, { "epoch": 1.5752996671499457, "grad_norm": 0.517888638304181, "learning_rate": 2.2731481358370498e-05, "loss": 11.7218, "step": 28929 }, { "epoch": 1.575354121146529, "grad_norm": 0.5806616262289762, "learning_rate": 2.272588392185384e-05, "loss": 11.8007, "step": 28930 }, { "epoch": 1.575408575143112, "grad_norm": 0.5388379469177362, "learning_rate": 2.272028708623365e-05, "loss": 11.7921, "step": 28931 }, { "epoch": 1.575463029139695, "grad_norm": 0.5361134179981693, "learning_rate": 2.2714690851553488e-05, "loss": 11.7761, "step": 28932 }, { "epoch": 1.575517483136278, "grad_norm": 0.5423441461487326, "learning_rate": 2.2709095217856836e-05, "loss": 11.8017, "step": 28933 }, { "epoch": 1.575571937132861, "grad_norm": 0.5245884311961633, "learning_rate": 2.270350018518723e-05, "loss": 11.7721, "step": 28934 }, { "epoch": 1.575626391129444, "grad_norm": 0.5602690809245169, "learning_rate": 2.2697905753588156e-05, "loss": 11.8095, "step": 28935 }, { "epoch": 1.5756808451260271, "grad_norm": 0.6439708276703576, "learning_rate": 2.269231192310315e-05, "loss": 11.8879, "step": 28936 }, { "epoch": 1.5757352991226101, "grad_norm": 0.6756873643787613, "learning_rate": 2.2686718693775664e-05, "loss": 11.8543, "step": 28937 }, { "epoch": 1.5757897531191931, "grad_norm": 0.5729403690540208, "learning_rate": 2.2681126065649227e-05, "loss": 11.9723, "step": 28938 }, { "epoch": 1.575844207115776, "grad_norm": 0.5200836804350855, "learning_rate": 2.2675534038767342e-05, "loss": 11.8063, "step": 28939 }, { "epoch": 1.575898661112359, "grad_norm": 0.5864393205334614, "learning_rate": 2.2669942613173465e-05, "loss": 11.9725, "step": 28940 }, { "epoch": 1.575953115108942, "grad_norm": 0.5406517164507786, "learning_rate": 2.2664351788911063e-05, "loss": 11.8057, "step": 28941 }, { "epoch": 1.576007569105525, "grad_norm": 0.5329268958336405, "learning_rate": 2.2658761566023655e-05, "loss": 11.7887, "step": 28942 }, { "epoch": 1.576062023102108, "grad_norm": 0.5332785095692129, "learning_rate": 2.2653171944554662e-05, "loss": 11.8428, "step": 28943 }, { "epoch": 1.576116477098691, "grad_norm": 0.5469093885972705, "learning_rate": 2.2647582924547606e-05, "loss": 11.8983, "step": 28944 }, { "epoch": 1.576170931095274, "grad_norm": 0.5199717403612628, "learning_rate": 2.2641994506045883e-05, "loss": 11.8231, "step": 28945 }, { "epoch": 1.576225385091857, "grad_norm": 0.5879884774777993, "learning_rate": 2.2636406689093014e-05, "loss": 11.8622, "step": 28946 }, { "epoch": 1.57627983908844, "grad_norm": 0.5686514620226485, "learning_rate": 2.263081947373239e-05, "loss": 11.649, "step": 28947 }, { "epoch": 1.576334293085023, "grad_norm": 0.507483113874304, "learning_rate": 2.2625232860007495e-05, "loss": 11.8155, "step": 28948 }, { "epoch": 1.576388747081606, "grad_norm": 0.6117196971041503, "learning_rate": 2.2619646847961785e-05, "loss": 11.8837, "step": 28949 }, { "epoch": 1.576443201078189, "grad_norm": 0.5720458429327793, "learning_rate": 2.261406143763868e-05, "loss": 11.8197, "step": 28950 }, { "epoch": 1.576497655074772, "grad_norm": 0.5534445260877011, "learning_rate": 2.260847662908161e-05, "loss": 11.8195, "step": 28951 }, { "epoch": 1.576552109071355, "grad_norm": 0.5583281817128195, "learning_rate": 2.2602892422333976e-05, "loss": 11.6677, "step": 28952 }, { "epoch": 1.576606563067938, "grad_norm": 0.531915341939756, "learning_rate": 2.2597308817439232e-05, "loss": 11.7879, "step": 28953 }, { "epoch": 1.5766610170645212, "grad_norm": 0.5098542220047537, "learning_rate": 2.2591725814440835e-05, "loss": 11.7829, "step": 28954 }, { "epoch": 1.5767154710611042, "grad_norm": 0.4992514116282411, "learning_rate": 2.258614341338211e-05, "loss": 11.7571, "step": 28955 }, { "epoch": 1.5767699250576872, "grad_norm": 0.5449980587058868, "learning_rate": 2.258056161430656e-05, "loss": 11.9823, "step": 28956 }, { "epoch": 1.5768243790542702, "grad_norm": 0.6281234219372464, "learning_rate": 2.257498041725751e-05, "loss": 11.8211, "step": 28957 }, { "epoch": 1.5768788330508532, "grad_norm": 0.5436626228492178, "learning_rate": 2.2569399822278425e-05, "loss": 11.8588, "step": 28958 }, { "epoch": 1.5769332870474362, "grad_norm": 0.631549265387338, "learning_rate": 2.2563819829412647e-05, "loss": 11.8126, "step": 28959 }, { "epoch": 1.5769877410440194, "grad_norm": 0.5676873706665952, "learning_rate": 2.2558240438703625e-05, "loss": 11.7445, "step": 28960 }, { "epoch": 1.5770421950406024, "grad_norm": 0.5769554586685568, "learning_rate": 2.2552661650194706e-05, "loss": 11.7879, "step": 28961 }, { "epoch": 1.5770966490371854, "grad_norm": 0.5362000100321781, "learning_rate": 2.2547083463929242e-05, "loss": 11.8459, "step": 28962 }, { "epoch": 1.5771511030337684, "grad_norm": 0.5134673533249084, "learning_rate": 2.2541505879950665e-05, "loss": 11.7598, "step": 28963 }, { "epoch": 1.5772055570303514, "grad_norm": 0.5357515394780799, "learning_rate": 2.253592889830234e-05, "loss": 11.9005, "step": 28964 }, { "epoch": 1.5772600110269344, "grad_norm": 0.524983362575471, "learning_rate": 2.2530352519027596e-05, "loss": 11.8162, "step": 28965 }, { "epoch": 1.5773144650235174, "grad_norm": 0.6154698033924738, "learning_rate": 2.252477674216986e-05, "loss": 11.8156, "step": 28966 }, { "epoch": 1.5773689190201003, "grad_norm": 0.5504062250391509, "learning_rate": 2.2519201567772418e-05, "loss": 11.6462, "step": 28967 }, { "epoch": 1.5774233730166833, "grad_norm": 0.5430983524423318, "learning_rate": 2.2513626995878688e-05, "loss": 11.7889, "step": 28968 }, { "epoch": 1.5774778270132663, "grad_norm": 0.5188498487490496, "learning_rate": 2.2508053026531962e-05, "loss": 11.7778, "step": 28969 }, { "epoch": 1.5775322810098493, "grad_norm": 0.5121725059802743, "learning_rate": 2.2502479659775632e-05, "loss": 11.866, "step": 28970 }, { "epoch": 1.5775867350064323, "grad_norm": 0.5290015315785176, "learning_rate": 2.2496906895653026e-05, "loss": 11.7747, "step": 28971 }, { "epoch": 1.5776411890030153, "grad_norm": 0.6569043931659422, "learning_rate": 2.2491334734207436e-05, "loss": 11.9028, "step": 28972 }, { "epoch": 1.5776956429995983, "grad_norm": 0.5543234105763295, "learning_rate": 2.2485763175482255e-05, "loss": 11.7604, "step": 28973 }, { "epoch": 1.5777500969961813, "grad_norm": 0.49838727870874877, "learning_rate": 2.2480192219520745e-05, "loss": 11.7692, "step": 28974 }, { "epoch": 1.5778045509927643, "grad_norm": 0.5135857097967986, "learning_rate": 2.2474621866366265e-05, "loss": 11.746, "step": 28975 }, { "epoch": 1.5778590049893473, "grad_norm": 0.5333201044484813, "learning_rate": 2.2469052116062162e-05, "loss": 11.7192, "step": 28976 }, { "epoch": 1.5779134589859305, "grad_norm": 0.529647847874188, "learning_rate": 2.2463482968651673e-05, "loss": 11.693, "step": 28977 }, { "epoch": 1.5779679129825135, "grad_norm": 0.5440209011817407, "learning_rate": 2.245791442417817e-05, "loss": 11.7874, "step": 28978 }, { "epoch": 1.5780223669790965, "grad_norm": 0.6376308003114978, "learning_rate": 2.2452346482684904e-05, "loss": 11.7973, "step": 28979 }, { "epoch": 1.5780768209756795, "grad_norm": 0.5519943422912124, "learning_rate": 2.2446779144215226e-05, "loss": 11.8261, "step": 28980 }, { "epoch": 1.5781312749722625, "grad_norm": 0.5295756531059973, "learning_rate": 2.2441212408812406e-05, "loss": 11.8154, "step": 28981 }, { "epoch": 1.5781857289688455, "grad_norm": 0.6778275703681259, "learning_rate": 2.2435646276519684e-05, "loss": 11.8203, "step": 28982 }, { "epoch": 1.5782401829654287, "grad_norm": 0.6029728680115989, "learning_rate": 2.243008074738042e-05, "loss": 11.6193, "step": 28983 }, { "epoch": 1.5782946369620117, "grad_norm": 0.5648586442144191, "learning_rate": 2.242451582143782e-05, "loss": 11.8152, "step": 28984 }, { "epoch": 1.5783490909585947, "grad_norm": 0.5750749028367127, "learning_rate": 2.241895149873523e-05, "loss": 11.8914, "step": 28985 }, { "epoch": 1.5784035449551777, "grad_norm": 0.5815848033836704, "learning_rate": 2.2413387779315854e-05, "loss": 11.8193, "step": 28986 }, { "epoch": 1.5784579989517606, "grad_norm": 0.5911850440102399, "learning_rate": 2.2407824663222988e-05, "loss": 11.9349, "step": 28987 }, { "epoch": 1.5785124529483436, "grad_norm": 0.5667578798371958, "learning_rate": 2.240226215049992e-05, "loss": 11.8732, "step": 28988 }, { "epoch": 1.5785669069449266, "grad_norm": 0.5261334268084475, "learning_rate": 2.239670024118984e-05, "loss": 11.7511, "step": 28989 }, { "epoch": 1.5786213609415096, "grad_norm": 0.5301617304382541, "learning_rate": 2.2391138935336065e-05, "loss": 11.9436, "step": 28990 }, { "epoch": 1.5786758149380926, "grad_norm": 0.6135552732462849, "learning_rate": 2.2385578232981807e-05, "loss": 12.0065, "step": 28991 }, { "epoch": 1.5787302689346756, "grad_norm": 0.5642411080239568, "learning_rate": 2.2380018134170278e-05, "loss": 11.8878, "step": 28992 }, { "epoch": 1.5787847229312586, "grad_norm": 0.5857130422423739, "learning_rate": 2.2374458638944775e-05, "loss": 11.8529, "step": 28993 }, { "epoch": 1.5788391769278416, "grad_norm": 0.6379855297922655, "learning_rate": 2.236889974734847e-05, "loss": 11.9326, "step": 28994 }, { "epoch": 1.5788936309244246, "grad_norm": 0.5219813063400816, "learning_rate": 2.236334145942465e-05, "loss": 11.7913, "step": 28995 }, { "epoch": 1.5789480849210076, "grad_norm": 0.5256263252262056, "learning_rate": 2.2357783775216478e-05, "loss": 11.7567, "step": 28996 }, { "epoch": 1.5790025389175906, "grad_norm": 0.6304368816838619, "learning_rate": 2.2352226694767198e-05, "loss": 11.8917, "step": 28997 }, { "epoch": 1.5790569929141736, "grad_norm": 0.5439879866079371, "learning_rate": 2.234667021812006e-05, "loss": 11.8921, "step": 28998 }, { "epoch": 1.5791114469107566, "grad_norm": 0.5229767730065559, "learning_rate": 2.2341114345318192e-05, "loss": 11.7656, "step": 28999 }, { "epoch": 1.5791659009073398, "grad_norm": 0.5446884088217604, "learning_rate": 2.233555907640491e-05, "loss": 11.8311, "step": 29000 }, { "epoch": 1.5792203549039228, "grad_norm": 0.506451489020867, "learning_rate": 2.2330004411423288e-05, "loss": 11.6914, "step": 29001 }, { "epoch": 1.5792748089005058, "grad_norm": 0.5656469671271962, "learning_rate": 2.232445035041658e-05, "loss": 11.9168, "step": 29002 }, { "epoch": 1.5793292628970887, "grad_norm": 0.6068004920605006, "learning_rate": 2.2318896893427998e-05, "loss": 11.7953, "step": 29003 }, { "epoch": 1.5793837168936717, "grad_norm": 0.6278159447890614, "learning_rate": 2.2313344040500673e-05, "loss": 11.9229, "step": 29004 }, { "epoch": 1.5794381708902547, "grad_norm": 0.5440617499970428, "learning_rate": 2.2307791791677845e-05, "loss": 11.7915, "step": 29005 }, { "epoch": 1.579492624886838, "grad_norm": 0.781416627615001, "learning_rate": 2.2302240147002616e-05, "loss": 11.7311, "step": 29006 }, { "epoch": 1.579547078883421, "grad_norm": 0.5246497992509671, "learning_rate": 2.229668910651824e-05, "loss": 11.6722, "step": 29007 }, { "epoch": 1.579601532880004, "grad_norm": 0.5731466136407526, "learning_rate": 2.2291138670267808e-05, "loss": 11.9134, "step": 29008 }, { "epoch": 1.579655986876587, "grad_norm": 0.5130475726398709, "learning_rate": 2.2285588838294514e-05, "loss": 11.8347, "step": 29009 }, { "epoch": 1.57971044087317, "grad_norm": 0.5376548533099087, "learning_rate": 2.2280039610641556e-05, "loss": 11.7917, "step": 29010 }, { "epoch": 1.579764894869753, "grad_norm": 0.6086318740399723, "learning_rate": 2.2274490987352003e-05, "loss": 11.901, "step": 29011 }, { "epoch": 1.579819348866336, "grad_norm": 0.6488987762219234, "learning_rate": 2.2268942968469043e-05, "loss": 11.8067, "step": 29012 }, { "epoch": 1.579873802862919, "grad_norm": 0.549697596344946, "learning_rate": 2.226339555403584e-05, "loss": 11.9265, "step": 29013 }, { "epoch": 1.579928256859502, "grad_norm": 0.4918710492757763, "learning_rate": 2.225784874409549e-05, "loss": 11.8429, "step": 29014 }, { "epoch": 1.5799827108560849, "grad_norm": 0.5244451401497985, "learning_rate": 2.225230253869116e-05, "loss": 11.7575, "step": 29015 }, { "epoch": 1.5800371648526679, "grad_norm": 0.5493826641505074, "learning_rate": 2.2246756937865943e-05, "loss": 11.7747, "step": 29016 }, { "epoch": 1.5800916188492509, "grad_norm": 0.5741686517478704, "learning_rate": 2.224121194166301e-05, "loss": 11.9342, "step": 29017 }, { "epoch": 1.5801460728458339, "grad_norm": 0.5597580542195663, "learning_rate": 2.2235667550125427e-05, "loss": 11.8798, "step": 29018 }, { "epoch": 1.5802005268424169, "grad_norm": 0.5389378037250077, "learning_rate": 2.223012376329633e-05, "loss": 11.8639, "step": 29019 }, { "epoch": 1.5802549808389998, "grad_norm": 0.5279581187588729, "learning_rate": 2.222458058121889e-05, "loss": 11.712, "step": 29020 }, { "epoch": 1.5803094348355828, "grad_norm": 0.5616077014528822, "learning_rate": 2.221903800393611e-05, "loss": 11.9719, "step": 29021 }, { "epoch": 1.5803638888321658, "grad_norm": 0.5172516190327713, "learning_rate": 2.2213496031491142e-05, "loss": 11.7983, "step": 29022 }, { "epoch": 1.5804183428287488, "grad_norm": 0.5774367667137277, "learning_rate": 2.2207954663927066e-05, "loss": 11.8706, "step": 29023 }, { "epoch": 1.580472796825332, "grad_norm": 0.5558358176704967, "learning_rate": 2.2202413901286968e-05, "loss": 11.8833, "step": 29024 }, { "epoch": 1.580527250821915, "grad_norm": 0.5207789713979787, "learning_rate": 2.219687374361398e-05, "loss": 11.7573, "step": 29025 }, { "epoch": 1.580581704818498, "grad_norm": 0.5306067291703788, "learning_rate": 2.2191334190951118e-05, "loss": 11.8661, "step": 29026 }, { "epoch": 1.580636158815081, "grad_norm": 0.5335870808746337, "learning_rate": 2.218579524334151e-05, "loss": 11.8255, "step": 29027 }, { "epoch": 1.580690612811664, "grad_norm": 0.518464436909379, "learning_rate": 2.218025690082819e-05, "loss": 11.7333, "step": 29028 }, { "epoch": 1.5807450668082472, "grad_norm": 0.5065462281628947, "learning_rate": 2.217471916345427e-05, "loss": 11.8099, "step": 29029 }, { "epoch": 1.5807995208048302, "grad_norm": 0.5028103002169194, "learning_rate": 2.2169182031262782e-05, "loss": 11.7827, "step": 29030 }, { "epoch": 1.5808539748014132, "grad_norm": 0.4958842347440862, "learning_rate": 2.216364550429676e-05, "loss": 11.8149, "step": 29031 }, { "epoch": 1.5809084287979962, "grad_norm": 0.6093113753707403, "learning_rate": 2.2158109582599305e-05, "loss": 11.7194, "step": 29032 }, { "epoch": 1.5809628827945792, "grad_norm": 0.5681429641056872, "learning_rate": 2.2152574266213434e-05, "loss": 11.9397, "step": 29033 }, { "epoch": 1.5810173367911622, "grad_norm": 0.6107264754564226, "learning_rate": 2.2147039555182216e-05, "loss": 11.5987, "step": 29034 }, { "epoch": 1.5810717907877452, "grad_norm": 0.5332209531813399, "learning_rate": 2.214150544954865e-05, "loss": 11.8597, "step": 29035 }, { "epoch": 1.5811262447843282, "grad_norm": 0.5354953413126309, "learning_rate": 2.213597194935578e-05, "loss": 11.8647, "step": 29036 }, { "epoch": 1.5811806987809112, "grad_norm": 0.5808049495861983, "learning_rate": 2.21304390546467e-05, "loss": 11.9228, "step": 29037 }, { "epoch": 1.5812351527774942, "grad_norm": 0.5742406354612188, "learning_rate": 2.2124906765464347e-05, "loss": 11.8291, "step": 29038 }, { "epoch": 1.5812896067740771, "grad_norm": 0.5044437219628712, "learning_rate": 2.21193750818518e-05, "loss": 11.8074, "step": 29039 }, { "epoch": 1.5813440607706601, "grad_norm": 0.4933525140210394, "learning_rate": 2.2113844003852057e-05, "loss": 11.8077, "step": 29040 }, { "epoch": 1.5813985147672431, "grad_norm": 0.5170722586985054, "learning_rate": 2.2108313531508108e-05, "loss": 11.7912, "step": 29041 }, { "epoch": 1.5814529687638261, "grad_norm": 0.54294628014179, "learning_rate": 2.2102783664862992e-05, "loss": 11.812, "step": 29042 }, { "epoch": 1.5815074227604091, "grad_norm": 0.5359145055436966, "learning_rate": 2.2097254403959666e-05, "loss": 11.7214, "step": 29043 }, { "epoch": 1.5815618767569921, "grad_norm": 0.5144911463018499, "learning_rate": 2.2091725748841187e-05, "loss": 11.6378, "step": 29044 }, { "epoch": 1.581616330753575, "grad_norm": 0.5511888396833232, "learning_rate": 2.208619769955048e-05, "loss": 11.8841, "step": 29045 }, { "epoch": 1.581670784750158, "grad_norm": 0.5605769639697662, "learning_rate": 2.2080670256130564e-05, "loss": 11.7991, "step": 29046 }, { "epoch": 1.5817252387467413, "grad_norm": 0.5613098356939531, "learning_rate": 2.2075143418624454e-05, "loss": 11.6674, "step": 29047 }, { "epoch": 1.5817796927433243, "grad_norm": 0.5292845915904641, "learning_rate": 2.2069617187075076e-05, "loss": 11.6973, "step": 29048 }, { "epoch": 1.5818341467399073, "grad_norm": 0.5340355912272015, "learning_rate": 2.2064091561525445e-05, "loss": 11.7223, "step": 29049 }, { "epoch": 1.5818886007364903, "grad_norm": 0.5426519904518304, "learning_rate": 2.205856654201851e-05, "loss": 11.8386, "step": 29050 }, { "epoch": 1.5819430547330733, "grad_norm": 0.5843536996058114, "learning_rate": 2.20530421285972e-05, "loss": 11.7543, "step": 29051 }, { "epoch": 1.5819975087296563, "grad_norm": 0.5281374525024105, "learning_rate": 2.2047518321304538e-05, "loss": 11.8029, "step": 29052 }, { "epoch": 1.5820519627262395, "grad_norm": 0.5468818828836998, "learning_rate": 2.2041995120183424e-05, "loss": 11.8517, "step": 29053 }, { "epoch": 1.5821064167228225, "grad_norm": 0.5696897498571843, "learning_rate": 2.2036472525276852e-05, "loss": 11.7462, "step": 29054 }, { "epoch": 1.5821608707194055, "grad_norm": 0.548390241746292, "learning_rate": 2.2030950536627715e-05, "loss": 11.9156, "step": 29055 }, { "epoch": 1.5822153247159885, "grad_norm": 0.5841769023178146, "learning_rate": 2.2025429154279008e-05, "loss": 11.7717, "step": 29056 }, { "epoch": 1.5822697787125715, "grad_norm": 0.536544838418208, "learning_rate": 2.2019908378273612e-05, "loss": 11.8411, "step": 29057 }, { "epoch": 1.5823242327091545, "grad_norm": 0.546668705865174, "learning_rate": 2.2014388208654492e-05, "loss": 11.7562, "step": 29058 }, { "epoch": 1.5823786867057374, "grad_norm": 0.5637228763955602, "learning_rate": 2.2008868645464586e-05, "loss": 11.7531, "step": 29059 }, { "epoch": 1.5824331407023204, "grad_norm": 0.5349452483227524, "learning_rate": 2.2003349688746776e-05, "loss": 11.8587, "step": 29060 }, { "epoch": 1.5824875946989034, "grad_norm": 0.5177356234154459, "learning_rate": 2.199783133854403e-05, "loss": 11.7372, "step": 29061 }, { "epoch": 1.5825420486954864, "grad_norm": 0.5816474828550473, "learning_rate": 2.1992313594899216e-05, "loss": 11.8017, "step": 29062 }, { "epoch": 1.5825965026920694, "grad_norm": 0.5381057753374475, "learning_rate": 2.198679645785524e-05, "loss": 11.8486, "step": 29063 }, { "epoch": 1.5826509566886524, "grad_norm": 0.548465577977049, "learning_rate": 2.1981279927455034e-05, "loss": 11.8801, "step": 29064 }, { "epoch": 1.5827054106852354, "grad_norm": 0.5347820313151733, "learning_rate": 2.1975764003741462e-05, "loss": 11.8356, "step": 29065 }, { "epoch": 1.5827598646818184, "grad_norm": 0.5559862770546784, "learning_rate": 2.1970248686757454e-05, "loss": 11.8921, "step": 29066 }, { "epoch": 1.5828143186784014, "grad_norm": 0.6062493227172571, "learning_rate": 2.1964733976545847e-05, "loss": 11.7938, "step": 29067 }, { "epoch": 1.5828687726749844, "grad_norm": 0.5570617421374567, "learning_rate": 2.195921987314956e-05, "loss": 11.8624, "step": 29068 }, { "epoch": 1.5829232266715674, "grad_norm": 0.5493201048040416, "learning_rate": 2.1953706376611495e-05, "loss": 11.853, "step": 29069 }, { "epoch": 1.5829776806681506, "grad_norm": 0.6152634032043247, "learning_rate": 2.1948193486974466e-05, "loss": 11.793, "step": 29070 }, { "epoch": 1.5830321346647336, "grad_norm": 0.5396840445424315, "learning_rate": 2.1942681204281433e-05, "loss": 11.9135, "step": 29071 }, { "epoch": 1.5830865886613166, "grad_norm": 0.5363726241797955, "learning_rate": 2.193716952857515e-05, "loss": 11.81, "step": 29072 }, { "epoch": 1.5831410426578996, "grad_norm": 0.5262139466133963, "learning_rate": 2.1931658459898518e-05, "loss": 11.844, "step": 29073 }, { "epoch": 1.5831954966544826, "grad_norm": 0.5416015297460568, "learning_rate": 2.1926147998294433e-05, "loss": 11.7217, "step": 29074 }, { "epoch": 1.5832499506510656, "grad_norm": 0.6406901360328437, "learning_rate": 2.1920638143805695e-05, "loss": 11.9512, "step": 29075 }, { "epoch": 1.5833044046476488, "grad_norm": 0.58056937267003, "learning_rate": 2.1915128896475188e-05, "loss": 11.7572, "step": 29076 }, { "epoch": 1.5833588586442318, "grad_norm": 0.5641658998258171, "learning_rate": 2.1909620256345708e-05, "loss": 11.7556, "step": 29077 }, { "epoch": 1.5834133126408148, "grad_norm": 0.47841146649147465, "learning_rate": 2.1904112223460138e-05, "loss": 11.7228, "step": 29078 }, { "epoch": 1.5834677666373977, "grad_norm": 0.5374175110604116, "learning_rate": 2.1898604797861267e-05, "loss": 11.7036, "step": 29079 }, { "epoch": 1.5835222206339807, "grad_norm": 0.5960610157409361, "learning_rate": 2.1893097979591937e-05, "loss": 11.7403, "step": 29080 }, { "epoch": 1.5835766746305637, "grad_norm": 0.5297496147641881, "learning_rate": 2.1887591768695036e-05, "loss": 11.7162, "step": 29081 }, { "epoch": 1.5836311286271467, "grad_norm": 0.5736564245092398, "learning_rate": 2.1882086165213268e-05, "loss": 11.8737, "step": 29082 }, { "epoch": 1.5836855826237297, "grad_norm": 0.5862986527536662, "learning_rate": 2.1876581169189527e-05, "loss": 11.7519, "step": 29083 }, { "epoch": 1.5837400366203127, "grad_norm": 0.5418749097848605, "learning_rate": 2.1871076780666556e-05, "loss": 11.8964, "step": 29084 }, { "epoch": 1.5837944906168957, "grad_norm": 0.4936289500691851, "learning_rate": 2.18655729996872e-05, "loss": 11.8897, "step": 29085 }, { "epoch": 1.5838489446134787, "grad_norm": 0.5013235215036588, "learning_rate": 2.186006982629427e-05, "loss": 11.8063, "step": 29086 }, { "epoch": 1.5839033986100617, "grad_norm": 0.5099163128588902, "learning_rate": 2.185456726053052e-05, "loss": 11.8442, "step": 29087 }, { "epoch": 1.5839578526066447, "grad_norm": 0.5917029892828737, "learning_rate": 2.1849065302438797e-05, "loss": 11.8341, "step": 29088 }, { "epoch": 1.5840123066032277, "grad_norm": 0.5021925925260688, "learning_rate": 2.1843563952061808e-05, "loss": 11.8131, "step": 29089 }, { "epoch": 1.5840667605998107, "grad_norm": 0.497188094143562, "learning_rate": 2.1838063209442407e-05, "loss": 11.8229, "step": 29090 }, { "epoch": 1.5841212145963937, "grad_norm": 0.626381462049018, "learning_rate": 2.1832563074623335e-05, "loss": 11.8201, "step": 29091 }, { "epoch": 1.5841756685929766, "grad_norm": 0.5784104874652187, "learning_rate": 2.182706354764733e-05, "loss": 11.9355, "step": 29092 }, { "epoch": 1.5842301225895596, "grad_norm": 0.5532987423111362, "learning_rate": 2.182156462855721e-05, "loss": 11.7749, "step": 29093 }, { "epoch": 1.5842845765861429, "grad_norm": 0.5022861028858535, "learning_rate": 2.18160663173957e-05, "loss": 11.7734, "step": 29094 }, { "epoch": 1.5843390305827258, "grad_norm": 0.5617752136216879, "learning_rate": 2.1810568614205562e-05, "loss": 11.8503, "step": 29095 }, { "epoch": 1.5843934845793088, "grad_norm": 0.5429460549899313, "learning_rate": 2.1805071519029586e-05, "loss": 11.8458, "step": 29096 }, { "epoch": 1.5844479385758918, "grad_norm": 0.5316821255156369, "learning_rate": 2.1799575031910447e-05, "loss": 11.8789, "step": 29097 }, { "epoch": 1.5845023925724748, "grad_norm": 0.5462723679800302, "learning_rate": 2.1794079152890966e-05, "loss": 11.8165, "step": 29098 }, { "epoch": 1.584556846569058, "grad_norm": 0.5281100326559773, "learning_rate": 2.1788583882013812e-05, "loss": 11.7381, "step": 29099 }, { "epoch": 1.584611300565641, "grad_norm": 0.5652817255406261, "learning_rate": 2.178308921932177e-05, "loss": 11.8184, "step": 29100 }, { "epoch": 1.584665754562224, "grad_norm": 0.559364291169176, "learning_rate": 2.1777595164857544e-05, "loss": 11.77, "step": 29101 }, { "epoch": 1.584720208558807, "grad_norm": 0.5517743434517466, "learning_rate": 2.1772101718663827e-05, "loss": 11.7682, "step": 29102 }, { "epoch": 1.58477466255539, "grad_norm": 0.6718982106305822, "learning_rate": 2.17666088807834e-05, "loss": 11.8081, "step": 29103 }, { "epoch": 1.584829116551973, "grad_norm": 0.5600334935057784, "learning_rate": 2.1761116651258918e-05, "loss": 11.8599, "step": 29104 }, { "epoch": 1.584883570548556, "grad_norm": 0.5398688980994854, "learning_rate": 2.175562503013313e-05, "loss": 11.8, "step": 29105 }, { "epoch": 1.584938024545139, "grad_norm": 0.520637674409149, "learning_rate": 2.17501340174487e-05, "loss": 11.7779, "step": 29106 }, { "epoch": 1.584992478541722, "grad_norm": 0.6543683148255448, "learning_rate": 2.174464361324835e-05, "loss": 11.6661, "step": 29107 }, { "epoch": 1.585046932538305, "grad_norm": 0.4959228769340991, "learning_rate": 2.1739153817574796e-05, "loss": 11.7957, "step": 29108 }, { "epoch": 1.585101386534888, "grad_norm": 0.5685651267143339, "learning_rate": 2.1733664630470685e-05, "loss": 11.8713, "step": 29109 }, { "epoch": 1.585155840531471, "grad_norm": 0.5651378296195563, "learning_rate": 2.1728176051978754e-05, "loss": 11.9213, "step": 29110 }, { "epoch": 1.585210294528054, "grad_norm": 0.5153589105640635, "learning_rate": 2.1722688082141652e-05, "loss": 11.795, "step": 29111 }, { "epoch": 1.585264748524637, "grad_norm": 0.6036755918870512, "learning_rate": 2.1717200721002017e-05, "loss": 11.8628, "step": 29112 }, { "epoch": 1.58531920252122, "grad_norm": 0.6678899420844301, "learning_rate": 2.17117139686026e-05, "loss": 11.8299, "step": 29113 }, { "epoch": 1.585373656517803, "grad_norm": 0.5068025624269648, "learning_rate": 2.170622782498598e-05, "loss": 11.8039, "step": 29114 }, { "epoch": 1.585428110514386, "grad_norm": 0.5893143485657308, "learning_rate": 2.170074229019491e-05, "loss": 11.7748, "step": 29115 }, { "epoch": 1.585482564510969, "grad_norm": 0.5289392164569375, "learning_rate": 2.1695257364271948e-05, "loss": 11.7358, "step": 29116 }, { "epoch": 1.5855370185075521, "grad_norm": 0.5227695138565449, "learning_rate": 2.168977304725981e-05, "loss": 11.8593, "step": 29117 }, { "epoch": 1.5855914725041351, "grad_norm": 0.5659794584806885, "learning_rate": 2.168428933920116e-05, "loss": 11.9605, "step": 29118 }, { "epoch": 1.5856459265007181, "grad_norm": 0.5498119223535392, "learning_rate": 2.167880624013857e-05, "loss": 11.7594, "step": 29119 }, { "epoch": 1.585700380497301, "grad_norm": 0.5337225061114226, "learning_rate": 2.167332375011476e-05, "loss": 11.8674, "step": 29120 }, { "epoch": 1.585754834493884, "grad_norm": 0.5580952724767342, "learning_rate": 2.1667841869172313e-05, "loss": 11.7955, "step": 29121 }, { "epoch": 1.585809288490467, "grad_norm": 0.5462561153115113, "learning_rate": 2.1662360597353826e-05, "loss": 11.8734, "step": 29122 }, { "epoch": 1.5858637424870503, "grad_norm": 0.5461428899994698, "learning_rate": 2.1656879934702e-05, "loss": 11.8885, "step": 29123 }, { "epoch": 1.5859181964836333, "grad_norm": 0.5583712414235588, "learning_rate": 2.165139988125938e-05, "loss": 11.8426, "step": 29124 }, { "epoch": 1.5859726504802163, "grad_norm": 0.6166205933702942, "learning_rate": 2.1645920437068645e-05, "loss": 11.6789, "step": 29125 }, { "epoch": 1.5860271044767993, "grad_norm": 0.5771649355488141, "learning_rate": 2.1640441602172347e-05, "loss": 11.8718, "step": 29126 }, { "epoch": 1.5860815584733823, "grad_norm": 0.5711758625196081, "learning_rate": 2.1634963376613136e-05, "loss": 11.7602, "step": 29127 }, { "epoch": 1.5861360124699653, "grad_norm": 0.5728149068220495, "learning_rate": 2.1629485760433575e-05, "loss": 11.7312, "step": 29128 }, { "epoch": 1.5861904664665483, "grad_norm": 0.6268439389283404, "learning_rate": 2.1624008753676262e-05, "loss": 11.8792, "step": 29129 }, { "epoch": 1.5862449204631313, "grad_norm": 0.6069611203707442, "learning_rate": 2.1618532356383835e-05, "loss": 11.831, "step": 29130 }, { "epoch": 1.5862993744597143, "grad_norm": 0.5386232493514849, "learning_rate": 2.1613056568598843e-05, "loss": 11.8296, "step": 29131 }, { "epoch": 1.5863538284562972, "grad_norm": 0.5265429702693016, "learning_rate": 2.1607581390363873e-05, "loss": 11.6696, "step": 29132 }, { "epoch": 1.5864082824528802, "grad_norm": 0.5530855200515045, "learning_rate": 2.160210682172147e-05, "loss": 11.7573, "step": 29133 }, { "epoch": 1.5864627364494632, "grad_norm": 0.5583801460465538, "learning_rate": 2.1596632862714228e-05, "loss": 11.9413, "step": 29134 }, { "epoch": 1.5865171904460462, "grad_norm": 0.565629203525206, "learning_rate": 2.159115951338475e-05, "loss": 11.8124, "step": 29135 }, { "epoch": 1.5865716444426292, "grad_norm": 0.6122724835057962, "learning_rate": 2.1585686773775525e-05, "loss": 11.949, "step": 29136 }, { "epoch": 1.5866260984392122, "grad_norm": 0.5596665136297907, "learning_rate": 2.1580214643929187e-05, "loss": 11.8034, "step": 29137 }, { "epoch": 1.5866805524357952, "grad_norm": 0.5271993444831127, "learning_rate": 2.1574743123888218e-05, "loss": 11.7126, "step": 29138 }, { "epoch": 1.5867350064323782, "grad_norm": 0.5547945712060374, "learning_rate": 2.1569272213695236e-05, "loss": 11.8732, "step": 29139 }, { "epoch": 1.5867894604289614, "grad_norm": 0.5334795982773989, "learning_rate": 2.156380191339271e-05, "loss": 11.7308, "step": 29140 }, { "epoch": 1.5868439144255444, "grad_norm": 0.5445794762733351, "learning_rate": 2.1558332223023247e-05, "loss": 11.6873, "step": 29141 }, { "epoch": 1.5868983684221274, "grad_norm": 0.5507707318790223, "learning_rate": 2.1552863142629344e-05, "loss": 11.7586, "step": 29142 }, { "epoch": 1.5869528224187104, "grad_norm": 0.5147203627377872, "learning_rate": 2.1547394672253496e-05, "loss": 11.765, "step": 29143 }, { "epoch": 1.5870072764152934, "grad_norm": 0.7584100790072255, "learning_rate": 2.1541926811938274e-05, "loss": 11.6893, "step": 29144 }, { "epoch": 1.5870617304118764, "grad_norm": 0.5401167545297461, "learning_rate": 2.153645956172622e-05, "loss": 11.7704, "step": 29145 }, { "epoch": 1.5871161844084596, "grad_norm": 0.558777060546608, "learning_rate": 2.1530992921659775e-05, "loss": 11.7655, "step": 29146 }, { "epoch": 1.5871706384050426, "grad_norm": 0.49229068922422564, "learning_rate": 2.1525526891781522e-05, "loss": 11.7972, "step": 29147 }, { "epoch": 1.5872250924016256, "grad_norm": 0.5533981064329909, "learning_rate": 2.1520061472133902e-05, "loss": 11.8416, "step": 29148 }, { "epoch": 1.5872795463982086, "grad_norm": 0.5013605542073992, "learning_rate": 2.1514596662759467e-05, "loss": 11.8212, "step": 29149 }, { "epoch": 1.5873340003947916, "grad_norm": 0.5900255638176347, "learning_rate": 2.1509132463700677e-05, "loss": 11.6981, "step": 29150 }, { "epoch": 1.5873884543913745, "grad_norm": 0.5330474399920727, "learning_rate": 2.150366887500005e-05, "loss": 11.8208, "step": 29151 }, { "epoch": 1.5874429083879575, "grad_norm": 0.5134170634657004, "learning_rate": 2.1498205896700063e-05, "loss": 11.6273, "step": 29152 }, { "epoch": 1.5874973623845405, "grad_norm": 0.5683029043787393, "learning_rate": 2.1492743528843173e-05, "loss": 11.8019, "step": 29153 }, { "epoch": 1.5875518163811235, "grad_norm": 0.5519986747922824, "learning_rate": 2.148728177147189e-05, "loss": 11.6967, "step": 29154 }, { "epoch": 1.5876062703777065, "grad_norm": 0.5351536096407199, "learning_rate": 2.1481820624628644e-05, "loss": 11.789, "step": 29155 }, { "epoch": 1.5876607243742895, "grad_norm": 0.514282314891165, "learning_rate": 2.1476360088355928e-05, "loss": 11.8068, "step": 29156 }, { "epoch": 1.5877151783708725, "grad_norm": 0.5509461107819521, "learning_rate": 2.147090016269624e-05, "loss": 11.8408, "step": 29157 }, { "epoch": 1.5877696323674555, "grad_norm": 0.5855389748059536, "learning_rate": 2.1465440847691975e-05, "loss": 11.8164, "step": 29158 }, { "epoch": 1.5878240863640385, "grad_norm": 0.5449832269316832, "learning_rate": 2.1459982143385627e-05, "loss": 11.8003, "step": 29159 }, { "epoch": 1.5878785403606215, "grad_norm": 0.5963238684244109, "learning_rate": 2.1454524049819613e-05, "loss": 11.9584, "step": 29160 }, { "epoch": 1.5879329943572045, "grad_norm": 0.5565491697938304, "learning_rate": 2.1449066567036413e-05, "loss": 11.7727, "step": 29161 }, { "epoch": 1.5879874483537875, "grad_norm": 0.5219938308345949, "learning_rate": 2.144360969507845e-05, "loss": 11.875, "step": 29162 }, { "epoch": 1.5880419023503707, "grad_norm": 0.5408579161222582, "learning_rate": 2.1438153433988117e-05, "loss": 11.8056, "step": 29163 }, { "epoch": 1.5880963563469537, "grad_norm": 0.4886017432595483, "learning_rate": 2.143269778380791e-05, "loss": 11.8131, "step": 29164 }, { "epoch": 1.5881508103435367, "grad_norm": 0.5771394631036637, "learning_rate": 2.14272427445802e-05, "loss": 11.8153, "step": 29165 }, { "epoch": 1.5882052643401197, "grad_norm": 0.5217238601603127, "learning_rate": 2.1421788316347415e-05, "loss": 11.819, "step": 29166 }, { "epoch": 1.5882597183367027, "grad_norm": 0.5630323168814597, "learning_rate": 2.1416334499152013e-05, "loss": 11.842, "step": 29167 }, { "epoch": 1.5883141723332856, "grad_norm": 0.5376945597360916, "learning_rate": 2.1410881293036344e-05, "loss": 11.7791, "step": 29168 }, { "epoch": 1.5883686263298689, "grad_norm": 0.5248237679813952, "learning_rate": 2.1405428698042874e-05, "loss": 11.7955, "step": 29169 }, { "epoch": 1.5884230803264519, "grad_norm": 0.5436701563811874, "learning_rate": 2.1399976714213942e-05, "loss": 11.8776, "step": 29170 }, { "epoch": 1.5884775343230348, "grad_norm": 0.5128821896212716, "learning_rate": 2.1394525341591997e-05, "loss": 11.8182, "step": 29171 }, { "epoch": 1.5885319883196178, "grad_norm": 0.4992722527555482, "learning_rate": 2.1389074580219402e-05, "loss": 11.7359, "step": 29172 }, { "epoch": 1.5885864423162008, "grad_norm": 0.5467252671302966, "learning_rate": 2.138362443013853e-05, "loss": 11.8906, "step": 29173 }, { "epoch": 1.5886408963127838, "grad_norm": 0.5843163581667822, "learning_rate": 2.137817489139179e-05, "loss": 11.8127, "step": 29174 }, { "epoch": 1.5886953503093668, "grad_norm": 0.629587246009668, "learning_rate": 2.1372725964021534e-05, "loss": 11.9161, "step": 29175 }, { "epoch": 1.5887498043059498, "grad_norm": 0.5397705353121288, "learning_rate": 2.136727764807016e-05, "loss": 11.8118, "step": 29176 }, { "epoch": 1.5888042583025328, "grad_norm": 0.540204443399126, "learning_rate": 2.1361829943580004e-05, "loss": 11.8334, "step": 29177 }, { "epoch": 1.5888587122991158, "grad_norm": 0.6136391249747147, "learning_rate": 2.135638285059345e-05, "loss": 11.7844, "step": 29178 }, { "epoch": 1.5889131662956988, "grad_norm": 0.5118044243007966, "learning_rate": 2.1350936369152873e-05, "loss": 11.9372, "step": 29179 }, { "epoch": 1.5889676202922818, "grad_norm": 0.5571087194325227, "learning_rate": 2.134549049930058e-05, "loss": 11.773, "step": 29180 }, { "epoch": 1.5890220742888648, "grad_norm": 0.5520172025077966, "learning_rate": 2.134004524107899e-05, "loss": 11.8087, "step": 29181 }, { "epoch": 1.5890765282854478, "grad_norm": 0.5189616400894338, "learning_rate": 2.1334600594530353e-05, "loss": 11.8258, "step": 29182 }, { "epoch": 1.5891309822820308, "grad_norm": 0.5060035468310032, "learning_rate": 2.132915655969705e-05, "loss": 11.8142, "step": 29183 }, { "epoch": 1.5891854362786138, "grad_norm": 0.5444357482948762, "learning_rate": 2.1323713136621447e-05, "loss": 11.9714, "step": 29184 }, { "epoch": 1.5892398902751967, "grad_norm": 0.5036453223751051, "learning_rate": 2.1318270325345813e-05, "loss": 11.7883, "step": 29185 }, { "epoch": 1.5892943442717797, "grad_norm": 0.5272278688885228, "learning_rate": 2.1312828125912542e-05, "loss": 11.7632, "step": 29186 }, { "epoch": 1.589348798268363, "grad_norm": 0.5516678752768197, "learning_rate": 2.1307386538363872e-05, "loss": 11.9416, "step": 29187 }, { "epoch": 1.589403252264946, "grad_norm": 0.5390981805405269, "learning_rate": 2.1301945562742198e-05, "loss": 11.8479, "step": 29188 }, { "epoch": 1.589457706261529, "grad_norm": 0.5455874018990825, "learning_rate": 2.1296505199089767e-05, "loss": 11.745, "step": 29189 }, { "epoch": 1.589512160258112, "grad_norm": 0.5358756695688616, "learning_rate": 2.129106544744891e-05, "loss": 11.9746, "step": 29190 }, { "epoch": 1.589566614254695, "grad_norm": 0.5572858884820283, "learning_rate": 2.1285626307861985e-05, "loss": 11.684, "step": 29191 }, { "epoch": 1.589621068251278, "grad_norm": 0.5749974252294444, "learning_rate": 2.1280187780371164e-05, "loss": 11.7451, "step": 29192 }, { "epoch": 1.5896755222478611, "grad_norm": 0.6094768452017864, "learning_rate": 2.1274749865018817e-05, "loss": 11.784, "step": 29193 }, { "epoch": 1.5897299762444441, "grad_norm": 0.5155799518670798, "learning_rate": 2.1269312561847243e-05, "loss": 11.8573, "step": 29194 }, { "epoch": 1.5897844302410271, "grad_norm": 0.554418934830859, "learning_rate": 2.1263875870898663e-05, "loss": 11.8116, "step": 29195 }, { "epoch": 1.58983888423761, "grad_norm": 0.5677755816917791, "learning_rate": 2.1258439792215424e-05, "loss": 11.9431, "step": 29196 }, { "epoch": 1.589893338234193, "grad_norm": 0.5491318303962497, "learning_rate": 2.125300432583972e-05, "loss": 11.8706, "step": 29197 }, { "epoch": 1.589947792230776, "grad_norm": 0.5536616969800391, "learning_rate": 2.12475694718139e-05, "loss": 11.7946, "step": 29198 }, { "epoch": 1.590002246227359, "grad_norm": 0.5421698118841684, "learning_rate": 2.124213523018016e-05, "loss": 11.7416, "step": 29199 }, { "epoch": 1.590056700223942, "grad_norm": 0.5689022927176461, "learning_rate": 2.1236701600980778e-05, "loss": 11.9269, "step": 29200 }, { "epoch": 1.590111154220525, "grad_norm": 0.6153413866925239, "learning_rate": 2.1231268584258045e-05, "loss": 11.8882, "step": 29201 }, { "epoch": 1.590165608217108, "grad_norm": 0.5124053512110116, "learning_rate": 2.122583618005417e-05, "loss": 11.8088, "step": 29202 }, { "epoch": 1.590220062213691, "grad_norm": 0.580719462557604, "learning_rate": 2.122040438841141e-05, "loss": 11.7163, "step": 29203 }, { "epoch": 1.590274516210274, "grad_norm": 0.5151472244442663, "learning_rate": 2.1214973209371968e-05, "loss": 11.7885, "step": 29204 }, { "epoch": 1.590328970206857, "grad_norm": 0.5076383471266717, "learning_rate": 2.1209542642978108e-05, "loss": 11.7529, "step": 29205 }, { "epoch": 1.59038342420344, "grad_norm": 0.5193509259548776, "learning_rate": 2.1204112689272084e-05, "loss": 11.7794, "step": 29206 }, { "epoch": 1.590437878200023, "grad_norm": 0.5828889025431375, "learning_rate": 2.1198683348296066e-05, "loss": 11.7939, "step": 29207 }, { "epoch": 1.590492332196606, "grad_norm": 0.5650755250368186, "learning_rate": 2.119325462009233e-05, "loss": 11.8566, "step": 29208 }, { "epoch": 1.590546786193189, "grad_norm": 0.5297085796436609, "learning_rate": 2.1187826504703035e-05, "loss": 11.887, "step": 29209 }, { "epoch": 1.5906012401897722, "grad_norm": 0.5064611510433685, "learning_rate": 2.118239900217044e-05, "loss": 11.7344, "step": 29210 }, { "epoch": 1.5906556941863552, "grad_norm": 0.5342162214270872, "learning_rate": 2.1176972112536707e-05, "loss": 11.8243, "step": 29211 }, { "epoch": 1.5907101481829382, "grad_norm": 0.5392048206008597, "learning_rate": 2.1171545835844074e-05, "loss": 11.7277, "step": 29212 }, { "epoch": 1.5907646021795212, "grad_norm": 0.5489343232386106, "learning_rate": 2.116612017213473e-05, "loss": 11.7642, "step": 29213 }, { "epoch": 1.5908190561761042, "grad_norm": 0.5556368544405371, "learning_rate": 2.1160695121450835e-05, "loss": 11.9001, "step": 29214 }, { "epoch": 1.5908735101726872, "grad_norm": 0.5637806875218498, "learning_rate": 2.1155270683834583e-05, "loss": 11.846, "step": 29215 }, { "epoch": 1.5909279641692704, "grad_norm": 0.5675488791609382, "learning_rate": 2.1149846859328204e-05, "loss": 11.8661, "step": 29216 }, { "epoch": 1.5909824181658534, "grad_norm": 0.5517703220420297, "learning_rate": 2.1144423647973798e-05, "loss": 11.8339, "step": 29217 }, { "epoch": 1.5910368721624364, "grad_norm": 0.5623212189048679, "learning_rate": 2.1139001049813623e-05, "loss": 11.7671, "step": 29218 }, { "epoch": 1.5910913261590194, "grad_norm": 0.49704509406640446, "learning_rate": 2.1133579064889764e-05, "loss": 11.8135, "step": 29219 }, { "epoch": 1.5911457801556024, "grad_norm": 0.5886064269462271, "learning_rate": 2.1128157693244454e-05, "loss": 11.8631, "step": 29220 }, { "epoch": 1.5912002341521854, "grad_norm": 0.5392727739873534, "learning_rate": 2.1122736934919783e-05, "loss": 11.7098, "step": 29221 }, { "epoch": 1.5912546881487684, "grad_norm": 0.565132586191543, "learning_rate": 2.1117316789957962e-05, "loss": 11.8217, "step": 29222 }, { "epoch": 1.5913091421453514, "grad_norm": 0.542112994531766, "learning_rate": 2.1111897258401125e-05, "loss": 11.795, "step": 29223 }, { "epoch": 1.5913635961419343, "grad_norm": 0.5161550032683274, "learning_rate": 2.110647834029137e-05, "loss": 11.8477, "step": 29224 }, { "epoch": 1.5914180501385173, "grad_norm": 0.5026830349343652, "learning_rate": 2.1101060035670893e-05, "loss": 11.8038, "step": 29225 }, { "epoch": 1.5914725041351003, "grad_norm": 0.5379469062411203, "learning_rate": 2.1095642344581778e-05, "loss": 11.8555, "step": 29226 }, { "epoch": 1.5915269581316833, "grad_norm": 0.5024891120704371, "learning_rate": 2.1090225267066187e-05, "loss": 11.6885, "step": 29227 }, { "epoch": 1.5915814121282663, "grad_norm": 0.56469021355504, "learning_rate": 2.1084808803166267e-05, "loss": 11.9066, "step": 29228 }, { "epoch": 1.5916358661248493, "grad_norm": 0.5676993194955461, "learning_rate": 2.107939295292407e-05, "loss": 11.7328, "step": 29229 }, { "epoch": 1.5916903201214323, "grad_norm": 0.5052910014520902, "learning_rate": 2.1073977716381787e-05, "loss": 11.7752, "step": 29230 }, { "epoch": 1.5917447741180153, "grad_norm": 0.5207248151621681, "learning_rate": 2.106856309358145e-05, "loss": 11.8814, "step": 29231 }, { "epoch": 1.5917992281145983, "grad_norm": 0.5204707893107874, "learning_rate": 2.1063149084565238e-05, "loss": 11.8628, "step": 29232 }, { "epoch": 1.5918536821111815, "grad_norm": 0.604036827527067, "learning_rate": 2.1057735689375212e-05, "loss": 11.9725, "step": 29233 }, { "epoch": 1.5919081361077645, "grad_norm": 0.5089674645710892, "learning_rate": 2.1052322908053457e-05, "loss": 11.7684, "step": 29234 }, { "epoch": 1.5919625901043475, "grad_norm": 0.5745763659233781, "learning_rate": 2.104691074064209e-05, "loss": 11.8978, "step": 29235 }, { "epoch": 1.5920170441009305, "grad_norm": 0.591309404312403, "learning_rate": 2.1041499187183167e-05, "loss": 11.8323, "step": 29236 }, { "epoch": 1.5920714980975135, "grad_norm": 0.5545029480301127, "learning_rate": 2.103608824771881e-05, "loss": 11.796, "step": 29237 }, { "epoch": 1.5921259520940965, "grad_norm": 0.5184792550490716, "learning_rate": 2.1030677922291054e-05, "loss": 11.79, "step": 29238 }, { "epoch": 1.5921804060906797, "grad_norm": 0.5539966035999682, "learning_rate": 2.1025268210941984e-05, "loss": 11.9322, "step": 29239 }, { "epoch": 1.5922348600872627, "grad_norm": 0.576787261578154, "learning_rate": 2.1019859113713702e-05, "loss": 11.8311, "step": 29240 }, { "epoch": 1.5922893140838457, "grad_norm": 0.5353127817297225, "learning_rate": 2.101445063064821e-05, "loss": 11.8213, "step": 29241 }, { "epoch": 1.5923437680804287, "grad_norm": 0.5028518180042723, "learning_rate": 2.1009042761787622e-05, "loss": 11.6888, "step": 29242 }, { "epoch": 1.5923982220770116, "grad_norm": 0.5227168638947924, "learning_rate": 2.100363550717397e-05, "loss": 11.7396, "step": 29243 }, { "epoch": 1.5924526760735946, "grad_norm": 0.5207687491171965, "learning_rate": 2.0998228866849258e-05, "loss": 11.8756, "step": 29244 }, { "epoch": 1.5925071300701776, "grad_norm": 0.5689433497115669, "learning_rate": 2.0992822840855607e-05, "loss": 11.9099, "step": 29245 }, { "epoch": 1.5925615840667606, "grad_norm": 0.5170718065392603, "learning_rate": 2.0987417429234978e-05, "loss": 11.6968, "step": 29246 }, { "epoch": 1.5926160380633436, "grad_norm": 0.7161047318327063, "learning_rate": 2.098201263202948e-05, "loss": 11.9096, "step": 29247 }, { "epoch": 1.5926704920599266, "grad_norm": 0.5449667988099567, "learning_rate": 2.0976608449281065e-05, "loss": 11.7962, "step": 29248 }, { "epoch": 1.5927249460565096, "grad_norm": 0.5422448402646335, "learning_rate": 2.0971204881031804e-05, "loss": 11.7914, "step": 29249 }, { "epoch": 1.5927794000530926, "grad_norm": 0.5204730984461148, "learning_rate": 2.0965801927323723e-05, "loss": 11.8461, "step": 29250 }, { "epoch": 1.5928338540496756, "grad_norm": 0.5911032488345888, "learning_rate": 2.09603995881988e-05, "loss": 11.9048, "step": 29251 }, { "epoch": 1.5928883080462586, "grad_norm": 0.5380247928248235, "learning_rate": 2.095499786369912e-05, "loss": 11.8832, "step": 29252 }, { "epoch": 1.5929427620428416, "grad_norm": 0.5702427190662462, "learning_rate": 2.0949596753866573e-05, "loss": 11.7932, "step": 29253 }, { "epoch": 1.5929972160394246, "grad_norm": 0.5117243886033823, "learning_rate": 2.094419625874322e-05, "loss": 11.8682, "step": 29254 }, { "epoch": 1.5930516700360076, "grad_norm": 0.562316585517797, "learning_rate": 2.0938796378371084e-05, "loss": 11.7828, "step": 29255 }, { "epoch": 1.5931061240325906, "grad_norm": 0.564703827891049, "learning_rate": 2.0933397112792097e-05, "loss": 11.8802, "step": 29256 }, { "epoch": 1.5931605780291738, "grad_norm": 0.5840082363291428, "learning_rate": 2.0927998462048305e-05, "loss": 11.8813, "step": 29257 }, { "epoch": 1.5932150320257568, "grad_norm": 0.5169333620750146, "learning_rate": 2.0922600426181627e-05, "loss": 11.8397, "step": 29258 }, { "epoch": 1.5932694860223398, "grad_norm": 0.5618592799405008, "learning_rate": 2.09172030052341e-05, "loss": 11.742, "step": 29259 }, { "epoch": 1.5933239400189227, "grad_norm": 0.6014043592532105, "learning_rate": 2.091180619924763e-05, "loss": 11.7218, "step": 29260 }, { "epoch": 1.5933783940155057, "grad_norm": 0.5450336197543947, "learning_rate": 2.090641000826422e-05, "loss": 11.8609, "step": 29261 }, { "epoch": 1.593432848012089, "grad_norm": 0.5122395469798758, "learning_rate": 2.0901014432325894e-05, "loss": 11.758, "step": 29262 }, { "epoch": 1.593487302008672, "grad_norm": 0.5099102735598482, "learning_rate": 2.0895619471474482e-05, "loss": 11.8422, "step": 29263 }, { "epoch": 1.593541756005255, "grad_norm": 0.6136624409372344, "learning_rate": 2.0890225125751996e-05, "loss": 11.7182, "step": 29264 }, { "epoch": 1.593596210001838, "grad_norm": 0.6155525934292949, "learning_rate": 2.088483139520042e-05, "loss": 11.9642, "step": 29265 }, { "epoch": 1.593650663998421, "grad_norm": 0.5131323241283007, "learning_rate": 2.087943827986163e-05, "loss": 11.7876, "step": 29266 }, { "epoch": 1.593705117995004, "grad_norm": 0.592275942631102, "learning_rate": 2.087404577977763e-05, "loss": 11.7975, "step": 29267 }, { "epoch": 1.593759571991587, "grad_norm": 0.5590714359972297, "learning_rate": 2.0868653894990286e-05, "loss": 11.7961, "step": 29268 }, { "epoch": 1.59381402598817, "grad_norm": 0.5687129917076879, "learning_rate": 2.086326262554159e-05, "loss": 11.8583, "step": 29269 }, { "epoch": 1.593868479984753, "grad_norm": 0.5440672765535223, "learning_rate": 2.0857871971473396e-05, "loss": 11.7144, "step": 29270 }, { "epoch": 1.5939229339813359, "grad_norm": 0.5096900234979592, "learning_rate": 2.0852481932827683e-05, "loss": 11.8566, "step": 29271 }, { "epoch": 1.5939773879779189, "grad_norm": 0.6039441842701397, "learning_rate": 2.0847092509646382e-05, "loss": 11.8094, "step": 29272 }, { "epoch": 1.5940318419745019, "grad_norm": 0.5224844081263593, "learning_rate": 2.0841703701971317e-05, "loss": 11.8094, "step": 29273 }, { "epoch": 1.5940862959710849, "grad_norm": 0.5925389058157018, "learning_rate": 2.0836315509844462e-05, "loss": 11.8046, "step": 29274 }, { "epoch": 1.5941407499676679, "grad_norm": 0.5479508959563254, "learning_rate": 2.0830927933307666e-05, "loss": 11.7138, "step": 29275 }, { "epoch": 1.5941952039642509, "grad_norm": 0.637556042757401, "learning_rate": 2.0825540972402858e-05, "loss": 11.8174, "step": 29276 }, { "epoch": 1.5942496579608338, "grad_norm": 0.5866111568680433, "learning_rate": 2.082015462717194e-05, "loss": 11.844, "step": 29277 }, { "epoch": 1.5943041119574168, "grad_norm": 0.5123714898958694, "learning_rate": 2.0814768897656754e-05, "loss": 11.7639, "step": 29278 }, { "epoch": 1.5943585659539998, "grad_norm": 0.6224809269400543, "learning_rate": 2.080938378389923e-05, "loss": 11.9527, "step": 29279 }, { "epoch": 1.594413019950583, "grad_norm": 0.5313001344689005, "learning_rate": 2.080399928594119e-05, "loss": 11.8202, "step": 29280 }, { "epoch": 1.594467473947166, "grad_norm": 0.592526489518852, "learning_rate": 2.0798615403824562e-05, "loss": 11.8038, "step": 29281 }, { "epoch": 1.594521927943749, "grad_norm": 0.5396856684300272, "learning_rate": 2.079323213759119e-05, "loss": 11.8334, "step": 29282 }, { "epoch": 1.594576381940332, "grad_norm": 0.5204311666070865, "learning_rate": 2.0787849487282894e-05, "loss": 11.8591, "step": 29283 }, { "epoch": 1.594630835936915, "grad_norm": 0.5625748473460092, "learning_rate": 2.078246745294159e-05, "loss": 11.8796, "step": 29284 }, { "epoch": 1.594685289933498, "grad_norm": 0.5504402028504651, "learning_rate": 2.077708603460907e-05, "loss": 11.8352, "step": 29285 }, { "epoch": 1.5947397439300812, "grad_norm": 0.5938363247497671, "learning_rate": 2.0771705232327253e-05, "loss": 11.8516, "step": 29286 }, { "epoch": 1.5947941979266642, "grad_norm": 0.5375808369991738, "learning_rate": 2.0766325046137915e-05, "loss": 11.7668, "step": 29287 }, { "epoch": 1.5948486519232472, "grad_norm": 0.6255520458851344, "learning_rate": 2.0760945476082914e-05, "loss": 11.9198, "step": 29288 }, { "epoch": 1.5949031059198302, "grad_norm": 0.6078822174774229, "learning_rate": 2.075556652220413e-05, "loss": 11.861, "step": 29289 }, { "epoch": 1.5949575599164132, "grad_norm": 0.5727629181299997, "learning_rate": 2.0750188184543306e-05, "loss": 11.8628, "step": 29290 }, { "epoch": 1.5950120139129962, "grad_norm": 0.567216252456352, "learning_rate": 2.074481046314235e-05, "loss": 11.7328, "step": 29291 }, { "epoch": 1.5950664679095792, "grad_norm": 0.6022782443729904, "learning_rate": 2.0739433358043026e-05, "loss": 11.7829, "step": 29292 }, { "epoch": 1.5951209219061622, "grad_norm": 0.5616427075100456, "learning_rate": 2.0734056869287144e-05, "loss": 11.8856, "step": 29293 }, { "epoch": 1.5951753759027452, "grad_norm": 0.5327986980656418, "learning_rate": 2.072868099691655e-05, "loss": 11.9241, "step": 29294 }, { "epoch": 1.5952298298993282, "grad_norm": 0.5593695576862636, "learning_rate": 2.0723305740972996e-05, "loss": 11.8909, "step": 29295 }, { "epoch": 1.5952842838959111, "grad_norm": 0.5311426509712349, "learning_rate": 2.0717931101498344e-05, "loss": 11.9296, "step": 29296 }, { "epoch": 1.5953387378924941, "grad_norm": 0.5176813535764708, "learning_rate": 2.0712557078534335e-05, "loss": 11.8577, "step": 29297 }, { "epoch": 1.5953931918890771, "grad_norm": 0.5496162841096986, "learning_rate": 2.0707183672122765e-05, "loss": 11.8236, "step": 29298 }, { "epoch": 1.5954476458856601, "grad_norm": 0.504755071941615, "learning_rate": 2.0701810882305462e-05, "loss": 11.7105, "step": 29299 }, { "epoch": 1.5955020998822431, "grad_norm": 0.5039080260857132, "learning_rate": 2.0696438709124157e-05, "loss": 11.7858, "step": 29300 }, { "epoch": 1.5955565538788261, "grad_norm": 0.6188281055076668, "learning_rate": 2.069106715262067e-05, "loss": 11.8878, "step": 29301 }, { "epoch": 1.595611007875409, "grad_norm": 0.5333251973907444, "learning_rate": 2.0685696212836737e-05, "loss": 11.8109, "step": 29302 }, { "epoch": 1.5956654618719923, "grad_norm": 0.5721325884762738, "learning_rate": 2.0680325889814112e-05, "loss": 11.85, "step": 29303 }, { "epoch": 1.5957199158685753, "grad_norm": 0.5291263883005053, "learning_rate": 2.0674956183594595e-05, "loss": 11.8698, "step": 29304 }, { "epoch": 1.5957743698651583, "grad_norm": 0.5646301205861213, "learning_rate": 2.06695870942199e-05, "loss": 11.8938, "step": 29305 }, { "epoch": 1.5958288238617413, "grad_norm": 0.5432610891425843, "learning_rate": 2.0664218621731823e-05, "loss": 11.7214, "step": 29306 }, { "epoch": 1.5958832778583243, "grad_norm": 0.539769048527323, "learning_rate": 2.0658850766172054e-05, "loss": 11.9207, "step": 29307 }, { "epoch": 1.5959377318549073, "grad_norm": 0.588714758758604, "learning_rate": 2.0653483527582408e-05, "loss": 11.8795, "step": 29308 }, { "epoch": 1.5959921858514905, "grad_norm": 0.5916651483477288, "learning_rate": 2.0648116906004543e-05, "loss": 11.7702, "step": 29309 }, { "epoch": 1.5960466398480735, "grad_norm": 0.5108358492591021, "learning_rate": 2.0642750901480233e-05, "loss": 11.8893, "step": 29310 }, { "epoch": 1.5961010938446565, "grad_norm": 0.49544440087693337, "learning_rate": 2.0637385514051223e-05, "loss": 11.7864, "step": 29311 }, { "epoch": 1.5961555478412395, "grad_norm": 0.5423380161079329, "learning_rate": 2.0632020743759217e-05, "loss": 11.691, "step": 29312 }, { "epoch": 1.5962100018378225, "grad_norm": 0.5046285314022753, "learning_rate": 2.06266565906459e-05, "loss": 11.9099, "step": 29313 }, { "epoch": 1.5962644558344055, "grad_norm": 0.5434175528898965, "learning_rate": 2.0621293054753032e-05, "loss": 11.9292, "step": 29314 }, { "epoch": 1.5963189098309885, "grad_norm": 0.513969180975846, "learning_rate": 2.0615930136122286e-05, "loss": 11.8333, "step": 29315 }, { "epoch": 1.5963733638275714, "grad_norm": 0.5537772534660337, "learning_rate": 2.061056783479539e-05, "loss": 11.7639, "step": 29316 }, { "epoch": 1.5964278178241544, "grad_norm": 0.5658981839435101, "learning_rate": 2.0605206150814017e-05, "loss": 11.9302, "step": 29317 }, { "epoch": 1.5964822718207374, "grad_norm": 0.5156821553687051, "learning_rate": 2.0599845084219905e-05, "loss": 11.8463, "step": 29318 }, { "epoch": 1.5965367258173204, "grad_norm": 0.6033784564546595, "learning_rate": 2.059448463505468e-05, "loss": 11.8912, "step": 29319 }, { "epoch": 1.5965911798139034, "grad_norm": 0.5023804642949706, "learning_rate": 2.058912480336006e-05, "loss": 11.5914, "step": 29320 }, { "epoch": 1.5966456338104864, "grad_norm": 0.565739164961108, "learning_rate": 2.058376558917775e-05, "loss": 11.7791, "step": 29321 }, { "epoch": 1.5967000878070694, "grad_norm": 0.5280704510056182, "learning_rate": 2.0578406992549405e-05, "loss": 11.6932, "step": 29322 }, { "epoch": 1.5967545418036524, "grad_norm": 0.5126183917450234, "learning_rate": 2.0573049013516676e-05, "loss": 11.8379, "step": 29323 }, { "epoch": 1.5968089958002354, "grad_norm": 0.5440726386513677, "learning_rate": 2.056769165212121e-05, "loss": 11.8529, "step": 29324 }, { "epoch": 1.5968634497968184, "grad_norm": 0.5233461159863342, "learning_rate": 2.05623349084047e-05, "loss": 11.727, "step": 29325 }, { "epoch": 1.5969179037934014, "grad_norm": 0.5363070990828511, "learning_rate": 2.055697878240882e-05, "loss": 11.839, "step": 29326 }, { "epoch": 1.5969723577899846, "grad_norm": 0.614792689281094, "learning_rate": 2.0551623274175167e-05, "loss": 11.7179, "step": 29327 }, { "epoch": 1.5970268117865676, "grad_norm": 0.6002726534508523, "learning_rate": 2.0546268383745447e-05, "loss": 11.8518, "step": 29328 }, { "epoch": 1.5970812657831506, "grad_norm": 0.6787932544133423, "learning_rate": 2.0540914111161246e-05, "loss": 11.7234, "step": 29329 }, { "epoch": 1.5971357197797336, "grad_norm": 0.53390654356914, "learning_rate": 2.0535560456464244e-05, "loss": 11.7811, "step": 29330 }, { "epoch": 1.5971901737763166, "grad_norm": 0.5723515378116586, "learning_rate": 2.0530207419696014e-05, "loss": 11.8642, "step": 29331 }, { "epoch": 1.5972446277728998, "grad_norm": 0.5323370881988478, "learning_rate": 2.052485500089826e-05, "loss": 11.8854, "step": 29332 }, { "epoch": 1.5972990817694828, "grad_norm": 0.5355162973079043, "learning_rate": 2.0519503200112544e-05, "loss": 11.8937, "step": 29333 }, { "epoch": 1.5973535357660658, "grad_norm": 0.6096867267092838, "learning_rate": 2.0514152017380482e-05, "loss": 11.7952, "step": 29334 }, { "epoch": 1.5974079897626488, "grad_norm": 0.6575767372477747, "learning_rate": 2.050880145274373e-05, "loss": 11.9411, "step": 29335 }, { "epoch": 1.5974624437592317, "grad_norm": 0.5766491995228399, "learning_rate": 2.0503451506243844e-05, "loss": 11.8742, "step": 29336 }, { "epoch": 1.5975168977558147, "grad_norm": 0.5564938888422738, "learning_rate": 2.049810217792245e-05, "loss": 11.6938, "step": 29337 }, { "epoch": 1.5975713517523977, "grad_norm": 0.5633894880921775, "learning_rate": 2.049275346782118e-05, "loss": 11.8614, "step": 29338 }, { "epoch": 1.5976258057489807, "grad_norm": 0.6026291967793118, "learning_rate": 2.048740537598155e-05, "loss": 11.9225, "step": 29339 }, { "epoch": 1.5976802597455637, "grad_norm": 0.5454465730805981, "learning_rate": 2.048205790244523e-05, "loss": 11.806, "step": 29340 }, { "epoch": 1.5977347137421467, "grad_norm": 0.5324393254097145, "learning_rate": 2.047671104725373e-05, "loss": 11.7547, "step": 29341 }, { "epoch": 1.5977891677387297, "grad_norm": 0.539211253220576, "learning_rate": 2.047136481044869e-05, "loss": 11.8763, "step": 29342 }, { "epoch": 1.5978436217353127, "grad_norm": 0.4994208877902171, "learning_rate": 2.0466019192071652e-05, "loss": 11.8807, "step": 29343 }, { "epoch": 1.5978980757318957, "grad_norm": 0.5234971724937825, "learning_rate": 2.0460674192164163e-05, "loss": 11.8054, "step": 29344 }, { "epoch": 1.5979525297284787, "grad_norm": 0.6142188536115024, "learning_rate": 2.045532981076783e-05, "loss": 11.7835, "step": 29345 }, { "epoch": 1.5980069837250617, "grad_norm": 0.5165604775158151, "learning_rate": 2.0449986047924173e-05, "loss": 11.7835, "step": 29346 }, { "epoch": 1.5980614377216447, "grad_norm": 0.5374935990097058, "learning_rate": 2.044464290367476e-05, "loss": 11.6989, "step": 29347 }, { "epoch": 1.5981158917182277, "grad_norm": 0.5124067275860661, "learning_rate": 2.0439300378061178e-05, "loss": 11.7579, "step": 29348 }, { "epoch": 1.5981703457148106, "grad_norm": 0.5376089506350612, "learning_rate": 2.0433958471124902e-05, "loss": 11.77, "step": 29349 }, { "epoch": 1.5982247997113939, "grad_norm": 0.5845308154121257, "learning_rate": 2.042861718290754e-05, "loss": 11.831, "step": 29350 }, { "epoch": 1.5982792537079769, "grad_norm": 0.5422491709840996, "learning_rate": 2.0423276513450572e-05, "loss": 11.8305, "step": 29351 }, { "epoch": 1.5983337077045598, "grad_norm": 0.5882532223644485, "learning_rate": 2.041793646279557e-05, "loss": 11.8764, "step": 29352 }, { "epoch": 1.5983881617011428, "grad_norm": 0.5068251702623046, "learning_rate": 2.041259703098405e-05, "loss": 11.8794, "step": 29353 }, { "epoch": 1.5984426156977258, "grad_norm": 0.5423090314044817, "learning_rate": 2.040725821805749e-05, "loss": 11.6394, "step": 29354 }, { "epoch": 1.5984970696943088, "grad_norm": 0.5244575659651973, "learning_rate": 2.0401920024057464e-05, "loss": 11.8177, "step": 29355 }, { "epoch": 1.598551523690892, "grad_norm": 0.534760458178448, "learning_rate": 2.0396582449025438e-05, "loss": 11.8421, "step": 29356 }, { "epoch": 1.598605977687475, "grad_norm": 0.595690836982202, "learning_rate": 2.0391245493002952e-05, "loss": 11.9205, "step": 29357 }, { "epoch": 1.598660431684058, "grad_norm": 0.5297888333733959, "learning_rate": 2.0385909156031467e-05, "loss": 11.8155, "step": 29358 }, { "epoch": 1.598714885680641, "grad_norm": 0.5589288586725722, "learning_rate": 2.0380573438152507e-05, "loss": 11.6377, "step": 29359 }, { "epoch": 1.598769339677224, "grad_norm": 0.5116527023082919, "learning_rate": 2.0375238339407577e-05, "loss": 11.7126, "step": 29360 }, { "epoch": 1.598823793673807, "grad_norm": 0.5354004677928198, "learning_rate": 2.0369903859838135e-05, "loss": 11.7904, "step": 29361 }, { "epoch": 1.59887824767039, "grad_norm": 0.5822814331580956, "learning_rate": 2.0364569999485695e-05, "loss": 11.7983, "step": 29362 }, { "epoch": 1.598932701666973, "grad_norm": 0.4890187835699303, "learning_rate": 2.035923675839171e-05, "loss": 11.8121, "step": 29363 }, { "epoch": 1.598987155663556, "grad_norm": 0.5322577516379914, "learning_rate": 2.0353904136597635e-05, "loss": 11.7383, "step": 29364 }, { "epoch": 1.599041609660139, "grad_norm": 0.5383542847029062, "learning_rate": 2.034857213414497e-05, "loss": 11.6958, "step": 29365 }, { "epoch": 1.599096063656722, "grad_norm": 0.5210816974437572, "learning_rate": 2.0343240751075154e-05, "loss": 11.5566, "step": 29366 }, { "epoch": 1.599150517653305, "grad_norm": 0.5457318379706793, "learning_rate": 2.0337909987429683e-05, "loss": 11.6954, "step": 29367 }, { "epoch": 1.599204971649888, "grad_norm": 0.5168958739928255, "learning_rate": 2.0332579843249954e-05, "loss": 11.6211, "step": 29368 }, { "epoch": 1.599259425646471, "grad_norm": 0.5850823126483844, "learning_rate": 2.032725031857744e-05, "loss": 11.737, "step": 29369 }, { "epoch": 1.599313879643054, "grad_norm": 0.5253489496144531, "learning_rate": 2.0321921413453627e-05, "loss": 11.8412, "step": 29370 }, { "epoch": 1.599368333639637, "grad_norm": 0.5565454633267117, "learning_rate": 2.031659312791987e-05, "loss": 11.8185, "step": 29371 }, { "epoch": 1.59942278763622, "grad_norm": 0.5149642405061943, "learning_rate": 2.0311265462017693e-05, "loss": 11.8535, "step": 29372 }, { "epoch": 1.5994772416328031, "grad_norm": 0.5864200899043692, "learning_rate": 2.0305938415788472e-05, "loss": 11.7961, "step": 29373 }, { "epoch": 1.5995316956293861, "grad_norm": 0.5950075065925858, "learning_rate": 2.030061198927361e-05, "loss": 11.8858, "step": 29374 }, { "epoch": 1.5995861496259691, "grad_norm": 0.6078560209968946, "learning_rate": 2.0295286182514584e-05, "loss": 11.9283, "step": 29375 }, { "epoch": 1.5996406036225521, "grad_norm": 0.5447446069690238, "learning_rate": 2.028996099555275e-05, "loss": 11.7079, "step": 29376 }, { "epoch": 1.599695057619135, "grad_norm": 0.5171646235522743, "learning_rate": 2.028463642842957e-05, "loss": 11.7078, "step": 29377 }, { "epoch": 1.599749511615718, "grad_norm": 0.5406602315200316, "learning_rate": 2.0279312481186407e-05, "loss": 11.8501, "step": 29378 }, { "epoch": 1.5998039656123013, "grad_norm": 0.554756835062345, "learning_rate": 2.0273989153864703e-05, "loss": 11.9458, "step": 29379 }, { "epoch": 1.5998584196088843, "grad_norm": 0.6053443037557681, "learning_rate": 2.0268666446505803e-05, "loss": 11.8822, "step": 29380 }, { "epoch": 1.5999128736054673, "grad_norm": 0.5052093775294951, "learning_rate": 2.0263344359151114e-05, "loss": 11.8362, "step": 29381 }, { "epoch": 1.5999673276020503, "grad_norm": 0.5497470843840734, "learning_rate": 2.0258022891842066e-05, "loss": 11.7865, "step": 29382 }, { "epoch": 1.6000217815986333, "grad_norm": 0.5696991347372337, "learning_rate": 2.0252702044620007e-05, "loss": 11.7343, "step": 29383 }, { "epoch": 1.6000762355952163, "grad_norm": 0.5391041147615704, "learning_rate": 2.024738181752631e-05, "loss": 11.8043, "step": 29384 }, { "epoch": 1.6001306895917993, "grad_norm": 0.5445163514808162, "learning_rate": 2.0242062210602318e-05, "loss": 11.8279, "step": 29385 }, { "epoch": 1.6001851435883823, "grad_norm": 0.4901077404302885, "learning_rate": 2.0236743223889422e-05, "loss": 11.7292, "step": 29386 }, { "epoch": 1.6002395975849653, "grad_norm": 0.5709519578301949, "learning_rate": 2.0231424857429026e-05, "loss": 11.8579, "step": 29387 }, { "epoch": 1.6002940515815482, "grad_norm": 0.5521014017851856, "learning_rate": 2.0226107111262404e-05, "loss": 11.7892, "step": 29388 }, { "epoch": 1.6003485055781312, "grad_norm": 0.5981844161381498, "learning_rate": 2.0220789985430986e-05, "loss": 11.8641, "step": 29389 }, { "epoch": 1.6004029595747142, "grad_norm": 0.6060426741470232, "learning_rate": 2.0215473479976067e-05, "loss": 11.8387, "step": 29390 }, { "epoch": 1.6004574135712972, "grad_norm": 0.5382350872141481, "learning_rate": 2.0210157594939016e-05, "loss": 11.7363, "step": 29391 }, { "epoch": 1.6005118675678802, "grad_norm": 0.5575719557209478, "learning_rate": 2.020484233036114e-05, "loss": 11.9765, "step": 29392 }, { "epoch": 1.6005663215644632, "grad_norm": 0.5301358191553304, "learning_rate": 2.0199527686283827e-05, "loss": 11.673, "step": 29393 }, { "epoch": 1.6006207755610462, "grad_norm": 0.5860651116209377, "learning_rate": 2.0194213662748362e-05, "loss": 11.7878, "step": 29394 }, { "epoch": 1.6006752295576292, "grad_norm": 0.5871124440341244, "learning_rate": 2.018890025979604e-05, "loss": 11.7139, "step": 29395 }, { "epoch": 1.6007296835542122, "grad_norm": 0.5616677883384459, "learning_rate": 2.0183587477468226e-05, "loss": 11.8742, "step": 29396 }, { "epoch": 1.6007841375507954, "grad_norm": 0.5191381149894279, "learning_rate": 2.0178275315806237e-05, "loss": 11.7855, "step": 29397 }, { "epoch": 1.6008385915473784, "grad_norm": 0.5091419315561139, "learning_rate": 2.017296377485135e-05, "loss": 11.8683, "step": 29398 }, { "epoch": 1.6008930455439614, "grad_norm": 0.5352089856035932, "learning_rate": 2.0167652854644902e-05, "loss": 11.8727, "step": 29399 }, { "epoch": 1.6009474995405444, "grad_norm": 0.5537442666076666, "learning_rate": 2.0162342555228152e-05, "loss": 11.8444, "step": 29400 }, { "epoch": 1.6010019535371274, "grad_norm": 0.5936712654447079, "learning_rate": 2.0157032876642445e-05, "loss": 11.7059, "step": 29401 }, { "epoch": 1.6010564075337106, "grad_norm": 0.5149627350136136, "learning_rate": 2.0151723818929002e-05, "loss": 11.7851, "step": 29402 }, { "epoch": 1.6011108615302936, "grad_norm": 0.5368922638871556, "learning_rate": 2.014641538212918e-05, "loss": 11.763, "step": 29403 }, { "epoch": 1.6011653155268766, "grad_norm": 0.5987494576207734, "learning_rate": 2.0141107566284224e-05, "loss": 11.7101, "step": 29404 }, { "epoch": 1.6012197695234596, "grad_norm": 0.5622285933443621, "learning_rate": 2.0135800371435377e-05, "loss": 11.7634, "step": 29405 }, { "epoch": 1.6012742235200426, "grad_norm": 0.587119010358452, "learning_rate": 2.013049379762397e-05, "loss": 11.8866, "step": 29406 }, { "epoch": 1.6013286775166256, "grad_norm": 0.5303848846970265, "learning_rate": 2.0125187844891214e-05, "loss": 11.8284, "step": 29407 }, { "epoch": 1.6013831315132085, "grad_norm": 0.5169890060139251, "learning_rate": 2.0119882513278376e-05, "loss": 11.7693, "step": 29408 }, { "epoch": 1.6014375855097915, "grad_norm": 0.4841309745431037, "learning_rate": 2.011457780282677e-05, "loss": 11.6597, "step": 29409 }, { "epoch": 1.6014920395063745, "grad_norm": 0.5296250126575548, "learning_rate": 2.010927371357757e-05, "loss": 11.8657, "step": 29410 }, { "epoch": 1.6015464935029575, "grad_norm": 0.5264449967244791, "learning_rate": 2.010397024557209e-05, "loss": 11.902, "step": 29411 }, { "epoch": 1.6016009474995405, "grad_norm": 0.5242921438198742, "learning_rate": 2.0098667398851502e-05, "loss": 11.761, "step": 29412 }, { "epoch": 1.6016554014961235, "grad_norm": 0.5567536072830178, "learning_rate": 2.0093365173457102e-05, "loss": 11.9027, "step": 29413 }, { "epoch": 1.6017098554927065, "grad_norm": 0.5864266023755338, "learning_rate": 2.0088063569430093e-05, "loss": 11.9558, "step": 29414 }, { "epoch": 1.6017643094892895, "grad_norm": 0.5302354177762099, "learning_rate": 2.0082762586811686e-05, "loss": 11.8522, "step": 29415 }, { "epoch": 1.6018187634858725, "grad_norm": 0.5340893847277921, "learning_rate": 2.007746222564314e-05, "loss": 11.8762, "step": 29416 }, { "epoch": 1.6018732174824555, "grad_norm": 0.5841347553224232, "learning_rate": 2.0072162485965618e-05, "loss": 11.5254, "step": 29417 }, { "epoch": 1.6019276714790385, "grad_norm": 0.5277587417691794, "learning_rate": 2.0066863367820376e-05, "loss": 11.8683, "step": 29418 }, { "epoch": 1.6019821254756215, "grad_norm": 0.5513675047556913, "learning_rate": 2.006156487124863e-05, "loss": 11.8087, "step": 29419 }, { "epoch": 1.6020365794722047, "grad_norm": 0.523210258162122, "learning_rate": 2.005626699629153e-05, "loss": 11.8505, "step": 29420 }, { "epoch": 1.6020910334687877, "grad_norm": 0.5376344267347557, "learning_rate": 2.0050969742990346e-05, "loss": 11.7899, "step": 29421 }, { "epoch": 1.6021454874653707, "grad_norm": 0.535397752464809, "learning_rate": 2.0045673111386187e-05, "loss": 11.7908, "step": 29422 }, { "epoch": 1.6021999414619537, "grad_norm": 0.5702444878502699, "learning_rate": 2.0040377101520313e-05, "loss": 11.7889, "step": 29423 }, { "epoch": 1.6022543954585367, "grad_norm": 0.6231034304487845, "learning_rate": 2.0035081713433878e-05, "loss": 11.8418, "step": 29424 }, { "epoch": 1.6023088494551196, "grad_norm": 0.49722995197810943, "learning_rate": 2.0029786947168028e-05, "loss": 11.8859, "step": 29425 }, { "epoch": 1.6023633034517029, "grad_norm": 0.5580957798454991, "learning_rate": 2.0024492802763996e-05, "loss": 11.746, "step": 29426 }, { "epoch": 1.6024177574482859, "grad_norm": 0.5889126367332558, "learning_rate": 2.001919928026289e-05, "loss": 11.8205, "step": 29427 }, { "epoch": 1.6024722114448688, "grad_norm": 0.5209238836756184, "learning_rate": 2.0013906379705925e-05, "loss": 11.9455, "step": 29428 }, { "epoch": 1.6025266654414518, "grad_norm": 0.5677628338776259, "learning_rate": 2.0008614101134228e-05, "loss": 11.8863, "step": 29429 }, { "epoch": 1.6025811194380348, "grad_norm": 0.6074607586573398, "learning_rate": 2.0003322444588945e-05, "loss": 11.7899, "step": 29430 }, { "epoch": 1.6026355734346178, "grad_norm": 0.5618640876632571, "learning_rate": 1.9998031410111275e-05, "loss": 11.7676, "step": 29431 }, { "epoch": 1.6026900274312008, "grad_norm": 0.5084939224750649, "learning_rate": 1.999274099774231e-05, "loss": 11.7653, "step": 29432 }, { "epoch": 1.6027444814277838, "grad_norm": 0.5814571428367679, "learning_rate": 1.9987451207523245e-05, "loss": 11.8396, "step": 29433 }, { "epoch": 1.6027989354243668, "grad_norm": 0.5685096359853066, "learning_rate": 1.9982162039495133e-05, "loss": 11.818, "step": 29434 }, { "epoch": 1.6028533894209498, "grad_norm": 0.5745809363328269, "learning_rate": 1.9976873493699155e-05, "loss": 11.6109, "step": 29435 }, { "epoch": 1.6029078434175328, "grad_norm": 0.5289668498802075, "learning_rate": 1.9971585570176443e-05, "loss": 11.705, "step": 29436 }, { "epoch": 1.6029622974141158, "grad_norm": 0.6143617425459307, "learning_rate": 1.9966298268968086e-05, "loss": 11.8005, "step": 29437 }, { "epoch": 1.6030167514106988, "grad_norm": 0.49538255166652645, "learning_rate": 1.996101159011524e-05, "loss": 11.7992, "step": 29438 }, { "epoch": 1.6030712054072818, "grad_norm": 0.5543726132688722, "learning_rate": 1.9955725533658954e-05, "loss": 11.7016, "step": 29439 }, { "epoch": 1.6031256594038648, "grad_norm": 0.6354922580447147, "learning_rate": 1.995044009964041e-05, "loss": 11.8876, "step": 29440 }, { "epoch": 1.6031801134004477, "grad_norm": 0.5291584829323384, "learning_rate": 1.9945155288100636e-05, "loss": 11.8345, "step": 29441 }, { "epoch": 1.6032345673970307, "grad_norm": 0.5579322998389947, "learning_rate": 1.9939871099080753e-05, "loss": 11.8444, "step": 29442 }, { "epoch": 1.603289021393614, "grad_norm": 0.5522996991215278, "learning_rate": 1.993458753262192e-05, "loss": 11.9134, "step": 29443 }, { "epoch": 1.603343475390197, "grad_norm": 0.5340596804135831, "learning_rate": 1.9929304588765107e-05, "loss": 11.8661, "step": 29444 }, { "epoch": 1.60339792938678, "grad_norm": 0.5900912133232146, "learning_rate": 1.9924022267551444e-05, "loss": 11.9336, "step": 29445 }, { "epoch": 1.603452383383363, "grad_norm": 0.6204722787699938, "learning_rate": 1.9918740569022042e-05, "loss": 11.954, "step": 29446 }, { "epoch": 1.603506837379946, "grad_norm": 0.5473150052355028, "learning_rate": 1.9913459493217923e-05, "loss": 11.7859, "step": 29447 }, { "epoch": 1.603561291376529, "grad_norm": 0.6360911442389331, "learning_rate": 1.9908179040180197e-05, "loss": 11.8471, "step": 29448 }, { "epoch": 1.6036157453731121, "grad_norm": 0.5718820184914463, "learning_rate": 1.9902899209949865e-05, "loss": 11.8596, "step": 29449 }, { "epoch": 1.6036701993696951, "grad_norm": 0.5702905523345672, "learning_rate": 1.9897620002568064e-05, "loss": 11.9062, "step": 29450 }, { "epoch": 1.6037246533662781, "grad_norm": 0.674056427309686, "learning_rate": 1.9892341418075766e-05, "loss": 12.0222, "step": 29451 }, { "epoch": 1.6037791073628611, "grad_norm": 0.5430100968272906, "learning_rate": 1.9887063456514055e-05, "loss": 11.6578, "step": 29452 }, { "epoch": 1.603833561359444, "grad_norm": 0.5437180308953273, "learning_rate": 1.9881786117924027e-05, "loss": 11.7666, "step": 29453 }, { "epoch": 1.603888015356027, "grad_norm": 0.5978516870845708, "learning_rate": 1.9876509402346623e-05, "loss": 11.7347, "step": 29454 }, { "epoch": 1.60394246935261, "grad_norm": 0.5734227760594006, "learning_rate": 1.9871233309822935e-05, "loss": 11.9542, "step": 29455 }, { "epoch": 1.603996923349193, "grad_norm": 0.5099606994702084, "learning_rate": 1.9865957840393945e-05, "loss": 11.858, "step": 29456 }, { "epoch": 1.604051377345776, "grad_norm": 0.5513875227445373, "learning_rate": 1.986068299410071e-05, "loss": 11.7734, "step": 29457 }, { "epoch": 1.604105831342359, "grad_norm": 0.571443913331018, "learning_rate": 1.9855408770984274e-05, "loss": 11.7681, "step": 29458 }, { "epoch": 1.604160285338942, "grad_norm": 0.6271376497212182, "learning_rate": 1.9850135171085582e-05, "loss": 11.7576, "step": 29459 }, { "epoch": 1.604214739335525, "grad_norm": 0.5429512264294071, "learning_rate": 1.9844862194445713e-05, "loss": 11.8022, "step": 29460 }, { "epoch": 1.604269193332108, "grad_norm": 0.5231861899643807, "learning_rate": 1.9839589841105623e-05, "loss": 11.8079, "step": 29461 }, { "epoch": 1.604323647328691, "grad_norm": 0.530296903557611, "learning_rate": 1.983431811110633e-05, "loss": 11.8469, "step": 29462 }, { "epoch": 1.604378101325274, "grad_norm": 0.5534393628654909, "learning_rate": 1.9829047004488832e-05, "loss": 11.7574, "step": 29463 }, { "epoch": 1.604432555321857, "grad_norm": 0.5181474191870714, "learning_rate": 1.982377652129409e-05, "loss": 11.7387, "step": 29464 }, { "epoch": 1.60448700931844, "grad_norm": 0.5370931947527635, "learning_rate": 1.981850666156313e-05, "loss": 11.7648, "step": 29465 }, { "epoch": 1.6045414633150232, "grad_norm": 0.5592638879431615, "learning_rate": 1.9813237425336883e-05, "loss": 11.8377, "step": 29466 }, { "epoch": 1.6045959173116062, "grad_norm": 0.6084962679463427, "learning_rate": 1.9807968812656342e-05, "loss": 11.7357, "step": 29467 }, { "epoch": 1.6046503713081892, "grad_norm": 0.5625876362901717, "learning_rate": 1.9802700823562514e-05, "loss": 11.8689, "step": 29468 }, { "epoch": 1.6047048253047722, "grad_norm": 0.5597426028816667, "learning_rate": 1.9797433458096304e-05, "loss": 11.825, "step": 29469 }, { "epoch": 1.6047592793013552, "grad_norm": 0.5074386905928788, "learning_rate": 1.979216671629873e-05, "loss": 11.7852, "step": 29470 }, { "epoch": 1.6048137332979382, "grad_norm": 0.5922805815533806, "learning_rate": 1.97869005982107e-05, "loss": 11.8745, "step": 29471 }, { "epoch": 1.6048681872945214, "grad_norm": 0.5292795437817838, "learning_rate": 1.978163510387321e-05, "loss": 11.7987, "step": 29472 }, { "epoch": 1.6049226412911044, "grad_norm": 0.5729941697010434, "learning_rate": 1.977637023332717e-05, "loss": 11.8809, "step": 29473 }, { "epoch": 1.6049770952876874, "grad_norm": 0.6235051491056085, "learning_rate": 1.9771105986613492e-05, "loss": 11.8302, "step": 29474 }, { "epoch": 1.6050315492842704, "grad_norm": 0.5094087735677347, "learning_rate": 1.976584236377319e-05, "loss": 11.7308, "step": 29475 }, { "epoch": 1.6050860032808534, "grad_norm": 0.5222837277982497, "learning_rate": 1.976057936484712e-05, "loss": 11.7958, "step": 29476 }, { "epoch": 1.6051404572774364, "grad_norm": 0.5484101553209316, "learning_rate": 1.9755316989876273e-05, "loss": 11.7901, "step": 29477 }, { "epoch": 1.6051949112740194, "grad_norm": 0.5685191554377844, "learning_rate": 1.975005523890151e-05, "loss": 11.7697, "step": 29478 }, { "epoch": 1.6052493652706024, "grad_norm": 0.538431149989638, "learning_rate": 1.974479411196377e-05, "loss": 11.7302, "step": 29479 }, { "epoch": 1.6053038192671854, "grad_norm": 0.5511395883280289, "learning_rate": 1.9739533609104e-05, "loss": 11.8739, "step": 29480 }, { "epoch": 1.6053582732637683, "grad_norm": 0.5394145093542255, "learning_rate": 1.973427373036305e-05, "loss": 11.7719, "step": 29481 }, { "epoch": 1.6054127272603513, "grad_norm": 0.5821856532083591, "learning_rate": 1.972901447578188e-05, "loss": 11.6762, "step": 29482 }, { "epoch": 1.6054671812569343, "grad_norm": 0.554589386980011, "learning_rate": 1.9723755845401348e-05, "loss": 11.6822, "step": 29483 }, { "epoch": 1.6055216352535173, "grad_norm": 0.6017702453644517, "learning_rate": 1.9718497839262327e-05, "loss": 11.8896, "step": 29484 }, { "epoch": 1.6055760892501003, "grad_norm": 0.5876700246333384, "learning_rate": 1.9713240457405756e-05, "loss": 11.8857, "step": 29485 }, { "epoch": 1.6056305432466833, "grad_norm": 0.5051138645146366, "learning_rate": 1.9707983699872467e-05, "loss": 11.7911, "step": 29486 }, { "epoch": 1.6056849972432663, "grad_norm": 0.5804991215949988, "learning_rate": 1.9702727566703383e-05, "loss": 11.8263, "step": 29487 }, { "epoch": 1.6057394512398493, "grad_norm": 0.5762805663177812, "learning_rate": 1.9697472057939336e-05, "loss": 11.7438, "step": 29488 }, { "epoch": 1.6057939052364323, "grad_norm": 0.5301925356628214, "learning_rate": 1.969221717362124e-05, "loss": 11.8418, "step": 29489 }, { "epoch": 1.6058483592330155, "grad_norm": 0.4838981274097415, "learning_rate": 1.9686962913789897e-05, "loss": 11.8709, "step": 29490 }, { "epoch": 1.6059028132295985, "grad_norm": 0.5256166653957722, "learning_rate": 1.9681709278486204e-05, "loss": 11.7395, "step": 29491 }, { "epoch": 1.6059572672261815, "grad_norm": 0.5874138183369193, "learning_rate": 1.967645626775103e-05, "loss": 11.6483, "step": 29492 }, { "epoch": 1.6060117212227645, "grad_norm": 0.619175014361116, "learning_rate": 1.9671203881625187e-05, "loss": 11.7709, "step": 29493 }, { "epoch": 1.6060661752193475, "grad_norm": 0.5397721233361413, "learning_rate": 1.9665952120149557e-05, "loss": 11.841, "step": 29494 }, { "epoch": 1.6061206292159305, "grad_norm": 0.5133565440623458, "learning_rate": 1.9660700983364943e-05, "loss": 11.635, "step": 29495 }, { "epoch": 1.6061750832125137, "grad_norm": 0.5781675428524408, "learning_rate": 1.9655450471312176e-05, "loss": 11.8864, "step": 29496 }, { "epoch": 1.6062295372090967, "grad_norm": 0.5758039603189183, "learning_rate": 1.9650200584032118e-05, "loss": 11.8107, "step": 29497 }, { "epoch": 1.6062839912056797, "grad_norm": 0.5710336443251128, "learning_rate": 1.9644951321565564e-05, "loss": 11.8368, "step": 29498 }, { "epoch": 1.6063384452022627, "grad_norm": 0.6379260319670289, "learning_rate": 1.9639702683953355e-05, "loss": 11.9761, "step": 29499 }, { "epoch": 1.6063928991988456, "grad_norm": 0.5398940733126778, "learning_rate": 1.963445467123628e-05, "loss": 11.8037, "step": 29500 }, { "epoch": 1.6064473531954286, "grad_norm": 0.560128835292989, "learning_rate": 1.9629207283455152e-05, "loss": 11.8461, "step": 29501 }, { "epoch": 1.6065018071920116, "grad_norm": 0.5908217534337917, "learning_rate": 1.9623960520650818e-05, "loss": 11.8833, "step": 29502 }, { "epoch": 1.6065562611885946, "grad_norm": 0.6062157160382261, "learning_rate": 1.9618714382864022e-05, "loss": 11.9305, "step": 29503 }, { "epoch": 1.6066107151851776, "grad_norm": 0.5354432956893059, "learning_rate": 1.9613468870135632e-05, "loss": 11.7287, "step": 29504 }, { "epoch": 1.6066651691817606, "grad_norm": 0.5175288607008623, "learning_rate": 1.9608223982506336e-05, "loss": 11.7869, "step": 29505 }, { "epoch": 1.6067196231783436, "grad_norm": 0.5830635355602947, "learning_rate": 1.9602979720016966e-05, "loss": 11.9349, "step": 29506 }, { "epoch": 1.6067740771749266, "grad_norm": 0.5880066495957835, "learning_rate": 1.959773608270835e-05, "loss": 11.6871, "step": 29507 }, { "epoch": 1.6068285311715096, "grad_norm": 0.5211816113989604, "learning_rate": 1.9592493070621186e-05, "loss": 11.8174, "step": 29508 }, { "epoch": 1.6068829851680926, "grad_norm": 0.5881013403450657, "learning_rate": 1.95872506837963e-05, "loss": 11.7841, "step": 29509 }, { "epoch": 1.6069374391646756, "grad_norm": 0.5116156019886727, "learning_rate": 1.9582008922274418e-05, "loss": 11.8707, "step": 29510 }, { "epoch": 1.6069918931612586, "grad_norm": 0.556690276586919, "learning_rate": 1.9576767786096352e-05, "loss": 11.7254, "step": 29511 }, { "epoch": 1.6070463471578416, "grad_norm": 0.6215590417171741, "learning_rate": 1.9571527275302792e-05, "loss": 11.905, "step": 29512 }, { "epoch": 1.6071008011544248, "grad_norm": 0.5608214710341549, "learning_rate": 1.9566287389934535e-05, "loss": 11.9164, "step": 29513 }, { "epoch": 1.6071552551510078, "grad_norm": 0.5599770801378372, "learning_rate": 1.9561048130032357e-05, "loss": 11.8274, "step": 29514 }, { "epoch": 1.6072097091475908, "grad_norm": 0.531641870382106, "learning_rate": 1.9555809495636912e-05, "loss": 11.6633, "step": 29515 }, { "epoch": 1.6072641631441738, "grad_norm": 0.5398785670105353, "learning_rate": 1.9550571486788984e-05, "loss": 11.829, "step": 29516 }, { "epoch": 1.6073186171407567, "grad_norm": 0.5244099261527798, "learning_rate": 1.9545334103529324e-05, "loss": 11.7315, "step": 29517 }, { "epoch": 1.6073730711373397, "grad_norm": 0.5923949215629317, "learning_rate": 1.954009734589861e-05, "loss": 11.8393, "step": 29518 }, { "epoch": 1.607427525133923, "grad_norm": 0.5406979365669363, "learning_rate": 1.9534861213937627e-05, "loss": 11.7673, "step": 29519 }, { "epoch": 1.607481979130506, "grad_norm": 0.5557786652710994, "learning_rate": 1.9529625707687027e-05, "loss": 11.8586, "step": 29520 }, { "epoch": 1.607536433127089, "grad_norm": 0.5494921665501551, "learning_rate": 1.9524390827187577e-05, "loss": 11.8828, "step": 29521 }, { "epoch": 1.607590887123672, "grad_norm": 0.5415854479649429, "learning_rate": 1.9519156572479934e-05, "loss": 11.8841, "step": 29522 }, { "epoch": 1.607645341120255, "grad_norm": 0.5048770497296734, "learning_rate": 1.9513922943604834e-05, "loss": 11.8813, "step": 29523 }, { "epoch": 1.607699795116838, "grad_norm": 0.4881420841267677, "learning_rate": 1.9508689940603008e-05, "loss": 11.9356, "step": 29524 }, { "epoch": 1.607754249113421, "grad_norm": 0.5381087130994184, "learning_rate": 1.950345756351506e-05, "loss": 11.8552, "step": 29525 }, { "epoch": 1.607808703110004, "grad_norm": 0.5556975514909981, "learning_rate": 1.9498225812381755e-05, "loss": 11.7983, "step": 29526 }, { "epoch": 1.607863157106587, "grad_norm": 0.5920559232220721, "learning_rate": 1.9492994687243714e-05, "loss": 11.876, "step": 29527 }, { "epoch": 1.6079176111031699, "grad_norm": 0.5737121249998529, "learning_rate": 1.9487764188141655e-05, "loss": 11.7988, "step": 29528 }, { "epoch": 1.6079720650997529, "grad_norm": 0.5837325675912988, "learning_rate": 1.948253431511626e-05, "loss": 11.8061, "step": 29529 }, { "epoch": 1.6080265190963359, "grad_norm": 0.5208751115569004, "learning_rate": 1.9477305068208163e-05, "loss": 11.8655, "step": 29530 }, { "epoch": 1.6080809730929189, "grad_norm": 0.5807413749379425, "learning_rate": 1.9472076447458064e-05, "loss": 11.8194, "step": 29531 }, { "epoch": 1.6081354270895019, "grad_norm": 0.5780867307509088, "learning_rate": 1.946684845290658e-05, "loss": 11.748, "step": 29532 }, { "epoch": 1.6081898810860848, "grad_norm": 0.5718270503096778, "learning_rate": 1.9461621084594418e-05, "loss": 11.6538, "step": 29533 }, { "epoch": 1.6082443350826678, "grad_norm": 0.5256356144512199, "learning_rate": 1.945639434256219e-05, "loss": 11.8155, "step": 29534 }, { "epoch": 1.6082987890792508, "grad_norm": 0.5758285504641771, "learning_rate": 1.9451168226850524e-05, "loss": 11.7021, "step": 29535 }, { "epoch": 1.608353243075834, "grad_norm": 0.5559875475881851, "learning_rate": 1.9445942737500113e-05, "loss": 11.8845, "step": 29536 }, { "epoch": 1.608407697072417, "grad_norm": 0.534015103734107, "learning_rate": 1.9440717874551528e-05, "loss": 11.883, "step": 29537 }, { "epoch": 1.608462151069, "grad_norm": 0.5331950711834742, "learning_rate": 1.9435493638045455e-05, "loss": 11.7914, "step": 29538 }, { "epoch": 1.608516605065583, "grad_norm": 0.5458694608076182, "learning_rate": 1.943027002802247e-05, "loss": 11.725, "step": 29539 }, { "epoch": 1.608571059062166, "grad_norm": 0.5828304155005862, "learning_rate": 1.9425047044523226e-05, "loss": 11.8955, "step": 29540 }, { "epoch": 1.608625513058749, "grad_norm": 0.5225958157818917, "learning_rate": 1.941982468758834e-05, "loss": 11.7342, "step": 29541 }, { "epoch": 1.6086799670553322, "grad_norm": 0.5496488900618468, "learning_rate": 1.941460295725839e-05, "loss": 11.8381, "step": 29542 }, { "epoch": 1.6087344210519152, "grad_norm": 0.5062961263757868, "learning_rate": 1.9409381853574026e-05, "loss": 11.8887, "step": 29543 }, { "epoch": 1.6087888750484982, "grad_norm": 0.5302396278303994, "learning_rate": 1.9404161376575835e-05, "loss": 11.813, "step": 29544 }, { "epoch": 1.6088433290450812, "grad_norm": 0.49799933580786143, "learning_rate": 1.939894152630436e-05, "loss": 11.8171, "step": 29545 }, { "epoch": 1.6088977830416642, "grad_norm": 0.5289288469149018, "learning_rate": 1.9393722302800266e-05, "loss": 11.7155, "step": 29546 }, { "epoch": 1.6089522370382472, "grad_norm": 0.6175431664817955, "learning_rate": 1.9388503706104077e-05, "loss": 11.8465, "step": 29547 }, { "epoch": 1.6090066910348302, "grad_norm": 0.5713663245887997, "learning_rate": 1.938328573625643e-05, "loss": 11.9048, "step": 29548 }, { "epoch": 1.6090611450314132, "grad_norm": 0.7576427256232343, "learning_rate": 1.9378068393297844e-05, "loss": 11.9706, "step": 29549 }, { "epoch": 1.6091155990279962, "grad_norm": 0.5443327726839043, "learning_rate": 1.937285167726892e-05, "loss": 11.9013, "step": 29550 }, { "epoch": 1.6091700530245792, "grad_norm": 0.5647505781259433, "learning_rate": 1.9367635588210253e-05, "loss": 11.9367, "step": 29551 }, { "epoch": 1.6092245070211622, "grad_norm": 0.5655720292030478, "learning_rate": 1.936242012616234e-05, "loss": 11.8224, "step": 29552 }, { "epoch": 1.6092789610177451, "grad_norm": 0.5978227737092479, "learning_rate": 1.9357205291165802e-05, "loss": 11.8153, "step": 29553 }, { "epoch": 1.6093334150143281, "grad_norm": 0.5091386406530556, "learning_rate": 1.9351991083261156e-05, "loss": 11.828, "step": 29554 }, { "epoch": 1.6093878690109111, "grad_norm": 0.5762205756489719, "learning_rate": 1.9346777502488923e-05, "loss": 11.8939, "step": 29555 }, { "epoch": 1.6094423230074941, "grad_norm": 0.5396369529761386, "learning_rate": 1.9341564548889713e-05, "loss": 11.5902, "step": 29556 }, { "epoch": 1.6094967770040771, "grad_norm": 0.5152461717621627, "learning_rate": 1.9336352222503984e-05, "loss": 11.713, "step": 29557 }, { "epoch": 1.60955123100066, "grad_norm": 0.5538421050177275, "learning_rate": 1.933114052337234e-05, "loss": 11.8857, "step": 29558 }, { "epoch": 1.609605684997243, "grad_norm": 0.5159800025316812, "learning_rate": 1.932592945153524e-05, "loss": 11.8314, "step": 29559 }, { "epoch": 1.6096601389938263, "grad_norm": 0.5413113498923491, "learning_rate": 1.9320719007033282e-05, "loss": 11.8667, "step": 29560 }, { "epoch": 1.6097145929904093, "grad_norm": 0.5531103888116603, "learning_rate": 1.931550918990691e-05, "loss": 11.7705, "step": 29561 }, { "epoch": 1.6097690469869923, "grad_norm": 0.5782706319008367, "learning_rate": 1.9310300000196668e-05, "loss": 11.926, "step": 29562 }, { "epoch": 1.6098235009835753, "grad_norm": 0.5584620417309196, "learning_rate": 1.930509143794309e-05, "loss": 11.8614, "step": 29563 }, { "epoch": 1.6098779549801583, "grad_norm": 0.5381583087166442, "learning_rate": 1.9299883503186656e-05, "loss": 11.7382, "step": 29564 }, { "epoch": 1.6099324089767415, "grad_norm": 0.5125804308240364, "learning_rate": 1.9294676195967833e-05, "loss": 11.8325, "step": 29565 }, { "epoch": 1.6099868629733245, "grad_norm": 0.5481157917892993, "learning_rate": 1.9289469516327175e-05, "loss": 11.838, "step": 29566 }, { "epoch": 1.6100413169699075, "grad_norm": 0.5936471282787849, "learning_rate": 1.9284263464305108e-05, "loss": 11.9087, "step": 29567 }, { "epoch": 1.6100957709664905, "grad_norm": 0.5251934267782864, "learning_rate": 1.9279058039942165e-05, "loss": 11.8108, "step": 29568 }, { "epoch": 1.6101502249630735, "grad_norm": 0.6441650840009321, "learning_rate": 1.9273853243278782e-05, "loss": 11.9299, "step": 29569 }, { "epoch": 1.6102046789596565, "grad_norm": 0.5996784450447684, "learning_rate": 1.9268649074355484e-05, "loss": 11.8155, "step": 29570 }, { "epoch": 1.6102591329562395, "grad_norm": 0.5240673711257662, "learning_rate": 1.9263445533212677e-05, "loss": 11.8043, "step": 29571 }, { "epoch": 1.6103135869528225, "grad_norm": 0.5922263540896808, "learning_rate": 1.9258242619890854e-05, "loss": 11.8052, "step": 29572 }, { "epoch": 1.6103680409494054, "grad_norm": 0.5400260135135027, "learning_rate": 1.9253040334430505e-05, "loss": 11.7609, "step": 29573 }, { "epoch": 1.6104224949459884, "grad_norm": 0.5622988521635481, "learning_rate": 1.9247838676872064e-05, "loss": 11.7748, "step": 29574 }, { "epoch": 1.6104769489425714, "grad_norm": 0.47277871171211544, "learning_rate": 1.9242637647255967e-05, "loss": 11.7018, "step": 29575 }, { "epoch": 1.6105314029391544, "grad_norm": 0.5598215265791844, "learning_rate": 1.9237437245622635e-05, "loss": 11.977, "step": 29576 }, { "epoch": 1.6105858569357374, "grad_norm": 0.5066079895315062, "learning_rate": 1.923223747201254e-05, "loss": 11.8769, "step": 29577 }, { "epoch": 1.6106403109323204, "grad_norm": 0.5737905323530964, "learning_rate": 1.922703832646613e-05, "loss": 11.8221, "step": 29578 }, { "epoch": 1.6106947649289034, "grad_norm": 0.5371741190676275, "learning_rate": 1.922183980902379e-05, "loss": 11.8263, "step": 29579 }, { "epoch": 1.6107492189254864, "grad_norm": 0.5470637761312328, "learning_rate": 1.9216641919725996e-05, "loss": 11.829, "step": 29580 }, { "epoch": 1.6108036729220694, "grad_norm": 0.5755290994175941, "learning_rate": 1.9211444658613122e-05, "loss": 11.8048, "step": 29581 }, { "epoch": 1.6108581269186524, "grad_norm": 0.6298942677106962, "learning_rate": 1.9206248025725614e-05, "loss": 11.9161, "step": 29582 }, { "epoch": 1.6109125809152356, "grad_norm": 0.5313163553259902, "learning_rate": 1.920105202110386e-05, "loss": 11.9056, "step": 29583 }, { "epoch": 1.6109670349118186, "grad_norm": 0.5472964463557821, "learning_rate": 1.91958566447883e-05, "loss": 11.905, "step": 29584 }, { "epoch": 1.6110214889084016, "grad_norm": 0.5912321516673279, "learning_rate": 1.9190661896819307e-05, "loss": 11.8834, "step": 29585 }, { "epoch": 1.6110759429049846, "grad_norm": 0.6536873624048212, "learning_rate": 1.9185467777237254e-05, "loss": 11.923, "step": 29586 }, { "epoch": 1.6111303969015676, "grad_norm": 0.5090252953567536, "learning_rate": 1.9180274286082578e-05, "loss": 11.833, "step": 29587 }, { "epoch": 1.6111848508981506, "grad_norm": 0.5001710337858857, "learning_rate": 1.9175081423395612e-05, "loss": 11.8627, "step": 29588 }, { "epoch": 1.6112393048947338, "grad_norm": 0.5291093489575781, "learning_rate": 1.9169889189216783e-05, "loss": 11.8392, "step": 29589 }, { "epoch": 1.6112937588913168, "grad_norm": 0.5202906696704613, "learning_rate": 1.9164697583586468e-05, "loss": 11.776, "step": 29590 }, { "epoch": 1.6113482128878998, "grad_norm": 0.5246389746290668, "learning_rate": 1.9159506606544984e-05, "loss": 11.8681, "step": 29591 }, { "epoch": 1.6114026668844827, "grad_norm": 0.4898275564259432, "learning_rate": 1.9154316258132777e-05, "loss": 11.6848, "step": 29592 }, { "epoch": 1.6114571208810657, "grad_norm": 0.5413377027556796, "learning_rate": 1.9149126538390127e-05, "loss": 11.8868, "step": 29593 }, { "epoch": 1.6115115748776487, "grad_norm": 0.5192466163693963, "learning_rate": 1.9143937447357462e-05, "loss": 11.761, "step": 29594 }, { "epoch": 1.6115660288742317, "grad_norm": 0.5587969031217869, "learning_rate": 1.9138748985075094e-05, "loss": 11.8258, "step": 29595 }, { "epoch": 1.6116204828708147, "grad_norm": 0.5353248257573864, "learning_rate": 1.9133561151583355e-05, "loss": 11.7902, "step": 29596 }, { "epoch": 1.6116749368673977, "grad_norm": 0.5227399056629031, "learning_rate": 1.912837394692262e-05, "loss": 11.7976, "step": 29597 }, { "epoch": 1.6117293908639807, "grad_norm": 0.5508571534806835, "learning_rate": 1.91231873711332e-05, "loss": 11.6627, "step": 29598 }, { "epoch": 1.6117838448605637, "grad_norm": 0.5102923687347977, "learning_rate": 1.9118001424255427e-05, "loss": 11.8478, "step": 29599 }, { "epoch": 1.6118382988571467, "grad_norm": 0.531953171103753, "learning_rate": 1.9112816106329667e-05, "loss": 11.7117, "step": 29600 }, { "epoch": 1.6118927528537297, "grad_norm": 0.5297788108626057, "learning_rate": 1.9107631417396188e-05, "loss": 11.8004, "step": 29601 }, { "epoch": 1.6119472068503127, "grad_norm": 0.7068541638325369, "learning_rate": 1.9102447357495355e-05, "loss": 11.7185, "step": 29602 }, { "epoch": 1.6120016608468957, "grad_norm": 0.49332287462322977, "learning_rate": 1.909726392666744e-05, "loss": 11.8006, "step": 29603 }, { "epoch": 1.6120561148434787, "grad_norm": 0.5468416721993087, "learning_rate": 1.9092081124952787e-05, "loss": 11.7957, "step": 29604 }, { "epoch": 1.6121105688400617, "grad_norm": 0.5464211206930017, "learning_rate": 1.9086898952391686e-05, "loss": 11.7933, "step": 29605 }, { "epoch": 1.6121650228366449, "grad_norm": 0.5639125354698105, "learning_rate": 1.90817174090244e-05, "loss": 11.8746, "step": 29606 }, { "epoch": 1.6122194768332279, "grad_norm": 0.5275710519658461, "learning_rate": 1.9076536494891272e-05, "loss": 11.8444, "step": 29607 }, { "epoch": 1.6122739308298109, "grad_norm": 0.5395387033152028, "learning_rate": 1.9071356210032544e-05, "loss": 11.7913, "step": 29608 }, { "epoch": 1.6123283848263938, "grad_norm": 0.627910051876697, "learning_rate": 1.9066176554488545e-05, "loss": 12.0437, "step": 29609 }, { "epoch": 1.6123828388229768, "grad_norm": 0.6087750569054463, "learning_rate": 1.9060997528299505e-05, "loss": 11.8598, "step": 29610 }, { "epoch": 1.6124372928195598, "grad_norm": 0.5898224923045239, "learning_rate": 1.9055819131505724e-05, "loss": 11.8917, "step": 29611 }, { "epoch": 1.612491746816143, "grad_norm": 0.5226180741584564, "learning_rate": 1.9050641364147493e-05, "loss": 11.7502, "step": 29612 }, { "epoch": 1.612546200812726, "grad_norm": 0.5586843605855791, "learning_rate": 1.904546422626502e-05, "loss": 11.7246, "step": 29613 }, { "epoch": 1.612600654809309, "grad_norm": 0.5259545980686726, "learning_rate": 1.904028771789863e-05, "loss": 11.823, "step": 29614 }, { "epoch": 1.612655108805892, "grad_norm": 0.5658945433406595, "learning_rate": 1.9035111839088527e-05, "loss": 11.8673, "step": 29615 }, { "epoch": 1.612709562802475, "grad_norm": 0.5527330441003985, "learning_rate": 1.902993658987495e-05, "loss": 11.7979, "step": 29616 }, { "epoch": 1.612764016799058, "grad_norm": 0.5096908852313172, "learning_rate": 1.9024761970298187e-05, "loss": 11.7417, "step": 29617 }, { "epoch": 1.612818470795641, "grad_norm": 0.5566952959549946, "learning_rate": 1.9019587980398445e-05, "loss": 11.9029, "step": 29618 }, { "epoch": 1.612872924792224, "grad_norm": 0.5164784893964677, "learning_rate": 1.901441462021598e-05, "loss": 11.7604, "step": 29619 }, { "epoch": 1.612927378788807, "grad_norm": 0.505202894194634, "learning_rate": 1.9009241889790984e-05, "loss": 11.8244, "step": 29620 }, { "epoch": 1.61298183278539, "grad_norm": 0.49931685006218834, "learning_rate": 1.9004069789163702e-05, "loss": 11.7195, "step": 29621 }, { "epoch": 1.613036286781973, "grad_norm": 0.5455425210517945, "learning_rate": 1.8998898318374382e-05, "loss": 11.7581, "step": 29622 }, { "epoch": 1.613090740778556, "grad_norm": 0.5432936625568962, "learning_rate": 1.8993727477463197e-05, "loss": 11.8066, "step": 29623 }, { "epoch": 1.613145194775139, "grad_norm": 0.5828583096469647, "learning_rate": 1.8988557266470408e-05, "loss": 11.8331, "step": 29624 }, { "epoch": 1.613199648771722, "grad_norm": 0.526464711439938, "learning_rate": 1.898338768543615e-05, "loss": 11.8147, "step": 29625 }, { "epoch": 1.613254102768305, "grad_norm": 0.51089263905331, "learning_rate": 1.8978218734400645e-05, "loss": 11.8426, "step": 29626 }, { "epoch": 1.613308556764888, "grad_norm": 0.5337080174071616, "learning_rate": 1.8973050413404126e-05, "loss": 11.8138, "step": 29627 }, { "epoch": 1.613363010761471, "grad_norm": 0.6126107910181418, "learning_rate": 1.896788272248673e-05, "loss": 11.8685, "step": 29628 }, { "epoch": 1.613417464758054, "grad_norm": 0.5444234316369566, "learning_rate": 1.896271566168869e-05, "loss": 11.7673, "step": 29629 }, { "epoch": 1.6134719187546371, "grad_norm": 0.6123301985222828, "learning_rate": 1.895754923105013e-05, "loss": 11.7898, "step": 29630 }, { "epoch": 1.6135263727512201, "grad_norm": 0.5254807159606458, "learning_rate": 1.8952383430611298e-05, "loss": 11.8347, "step": 29631 }, { "epoch": 1.6135808267478031, "grad_norm": 0.5005848331393221, "learning_rate": 1.894721826041229e-05, "loss": 11.8329, "step": 29632 }, { "epoch": 1.6136352807443861, "grad_norm": 0.5490648289902342, "learning_rate": 1.8942053720493314e-05, "loss": 11.8345, "step": 29633 }, { "epoch": 1.613689734740969, "grad_norm": 0.5419209812540147, "learning_rate": 1.8936889810894542e-05, "loss": 11.8052, "step": 29634 }, { "epoch": 1.6137441887375523, "grad_norm": 0.5832089670673982, "learning_rate": 1.89317265316561e-05, "loss": 11.6428, "step": 29635 }, { "epoch": 1.6137986427341353, "grad_norm": 0.553729642072451, "learning_rate": 1.892656388281816e-05, "loss": 11.9131, "step": 29636 }, { "epoch": 1.6138530967307183, "grad_norm": 0.5502991842329528, "learning_rate": 1.892140186442083e-05, "loss": 11.8427, "step": 29637 }, { "epoch": 1.6139075507273013, "grad_norm": 0.555463008611924, "learning_rate": 1.8916240476504276e-05, "loss": 11.9569, "step": 29638 }, { "epoch": 1.6139620047238843, "grad_norm": 0.6543402161789428, "learning_rate": 1.891107971910865e-05, "loss": 11.9743, "step": 29639 }, { "epoch": 1.6140164587204673, "grad_norm": 0.5126772533103994, "learning_rate": 1.8905919592274046e-05, "loss": 11.5509, "step": 29640 }, { "epoch": 1.6140709127170503, "grad_norm": 0.559791595103359, "learning_rate": 1.890076009604064e-05, "loss": 11.872, "step": 29641 }, { "epoch": 1.6141253667136333, "grad_norm": 0.6019338330864715, "learning_rate": 1.8895601230448502e-05, "loss": 11.8674, "step": 29642 }, { "epoch": 1.6141798207102163, "grad_norm": 0.5680816470425887, "learning_rate": 1.8890442995537782e-05, "loss": 11.8414, "step": 29643 }, { "epoch": 1.6142342747067993, "grad_norm": 0.5496959065861055, "learning_rate": 1.888528539134856e-05, "loss": 11.805, "step": 29644 }, { "epoch": 1.6142887287033822, "grad_norm": 0.5399562832626262, "learning_rate": 1.8880128417920993e-05, "loss": 11.8105, "step": 29645 }, { "epoch": 1.6143431826999652, "grad_norm": 0.5282321670642584, "learning_rate": 1.887497207529514e-05, "loss": 11.7994, "step": 29646 }, { "epoch": 1.6143976366965482, "grad_norm": 0.5414942430792258, "learning_rate": 1.8869816363511095e-05, "loss": 11.7747, "step": 29647 }, { "epoch": 1.6144520906931312, "grad_norm": 0.5670112969663105, "learning_rate": 1.8864661282608954e-05, "loss": 11.8084, "step": 29648 }, { "epoch": 1.6145065446897142, "grad_norm": 0.543724242751264, "learning_rate": 1.8859506832628848e-05, "loss": 11.6512, "step": 29649 }, { "epoch": 1.6145609986862972, "grad_norm": 0.6046962502639556, "learning_rate": 1.885435301361079e-05, "loss": 11.881, "step": 29650 }, { "epoch": 1.6146154526828802, "grad_norm": 0.5295983012501382, "learning_rate": 1.8849199825594922e-05, "loss": 11.6617, "step": 29651 }, { "epoch": 1.6146699066794632, "grad_norm": 0.5356367137513399, "learning_rate": 1.8844047268621256e-05, "loss": 11.8382, "step": 29652 }, { "epoch": 1.6147243606760464, "grad_norm": 0.5692716482021974, "learning_rate": 1.8838895342729913e-05, "loss": 11.7674, "step": 29653 }, { "epoch": 1.6147788146726294, "grad_norm": 0.5406897308559924, "learning_rate": 1.8833744047960912e-05, "loss": 11.832, "step": 29654 }, { "epoch": 1.6148332686692124, "grad_norm": 0.5131634859804876, "learning_rate": 1.882859338435434e-05, "loss": 11.7801, "step": 29655 }, { "epoch": 1.6148877226657954, "grad_norm": 0.5414352540346842, "learning_rate": 1.8823443351950254e-05, "loss": 11.8916, "step": 29656 }, { "epoch": 1.6149421766623784, "grad_norm": 0.6039948249554931, "learning_rate": 1.8818293950788656e-05, "loss": 11.8279, "step": 29657 }, { "epoch": 1.6149966306589614, "grad_norm": 0.5631411197719803, "learning_rate": 1.881314518090964e-05, "loss": 11.771, "step": 29658 }, { "epoch": 1.6150510846555446, "grad_norm": 0.5250399020585735, "learning_rate": 1.8807997042353198e-05, "loss": 11.7496, "step": 29659 }, { "epoch": 1.6151055386521276, "grad_norm": 0.559565163232491, "learning_rate": 1.8802849535159393e-05, "loss": 11.8464, "step": 29660 }, { "epoch": 1.6151599926487106, "grad_norm": 0.5732934200833762, "learning_rate": 1.8797702659368265e-05, "loss": 11.8802, "step": 29661 }, { "epoch": 1.6152144466452936, "grad_norm": 0.5437281490005131, "learning_rate": 1.879255641501979e-05, "loss": 11.8055, "step": 29662 }, { "epoch": 1.6152689006418766, "grad_norm": 0.5146443961078804, "learning_rate": 1.8787410802154048e-05, "loss": 11.7519, "step": 29663 }, { "epoch": 1.6153233546384596, "grad_norm": 0.5901833914398004, "learning_rate": 1.8782265820810995e-05, "loss": 11.824, "step": 29664 }, { "epoch": 1.6153778086350425, "grad_norm": 0.49409431228392275, "learning_rate": 1.877712147103069e-05, "loss": 11.8383, "step": 29665 }, { "epoch": 1.6154322626316255, "grad_norm": 0.5510657008505888, "learning_rate": 1.87719777528531e-05, "loss": 11.843, "step": 29666 }, { "epoch": 1.6154867166282085, "grad_norm": 0.5682934847729291, "learning_rate": 1.876683466631821e-05, "loss": 11.8976, "step": 29667 }, { "epoch": 1.6155411706247915, "grad_norm": 0.5586429230938673, "learning_rate": 1.8761692211466063e-05, "loss": 11.6376, "step": 29668 }, { "epoch": 1.6155956246213745, "grad_norm": 0.5575927369629163, "learning_rate": 1.8756550388336603e-05, "loss": 11.8644, "step": 29669 }, { "epoch": 1.6156500786179575, "grad_norm": 0.5522321182844298, "learning_rate": 1.8751409196969817e-05, "loss": 11.8512, "step": 29670 }, { "epoch": 1.6157045326145405, "grad_norm": 0.5691194886311696, "learning_rate": 1.874626863740574e-05, "loss": 11.7803, "step": 29671 }, { "epoch": 1.6157589866111235, "grad_norm": 0.5387213643850569, "learning_rate": 1.8741128709684264e-05, "loss": 11.8652, "step": 29672 }, { "epoch": 1.6158134406077065, "grad_norm": 0.5629550626198992, "learning_rate": 1.8735989413845432e-05, "loss": 11.8926, "step": 29673 }, { "epoch": 1.6158678946042895, "grad_norm": 0.5400894612339091, "learning_rate": 1.873085074992915e-05, "loss": 11.6906, "step": 29674 }, { "epoch": 1.6159223486008725, "grad_norm": 0.5797810667066575, "learning_rate": 1.8725712717975418e-05, "loss": 11.8288, "step": 29675 }, { "epoch": 1.6159768025974557, "grad_norm": 0.583512170594587, "learning_rate": 1.872057531802419e-05, "loss": 11.9047, "step": 29676 }, { "epoch": 1.6160312565940387, "grad_norm": 0.5796086398446647, "learning_rate": 1.871543855011536e-05, "loss": 11.8306, "step": 29677 }, { "epoch": 1.6160857105906217, "grad_norm": 0.6026315869755731, "learning_rate": 1.871030241428894e-05, "loss": 11.9264, "step": 29678 }, { "epoch": 1.6161401645872047, "grad_norm": 0.5549657449014572, "learning_rate": 1.8705166910584815e-05, "loss": 11.8103, "step": 29679 }, { "epoch": 1.6161946185837877, "grad_norm": 0.5021729527965968, "learning_rate": 1.870003203904297e-05, "loss": 11.7316, "step": 29680 }, { "epoch": 1.6162490725803706, "grad_norm": 0.5175615033962297, "learning_rate": 1.8694897799703282e-05, "loss": 11.9144, "step": 29681 }, { "epoch": 1.6163035265769539, "grad_norm": 0.5262945184296868, "learning_rate": 1.8689764192605707e-05, "loss": 11.8049, "step": 29682 }, { "epoch": 1.6163579805735369, "grad_norm": 0.4973390433921258, "learning_rate": 1.8684631217790194e-05, "loss": 11.8239, "step": 29683 }, { "epoch": 1.6164124345701198, "grad_norm": 0.5495698019580566, "learning_rate": 1.8679498875296588e-05, "loss": 11.808, "step": 29684 }, { "epoch": 1.6164668885667028, "grad_norm": 0.5129640458517766, "learning_rate": 1.8674367165164898e-05, "loss": 11.8014, "step": 29685 }, { "epoch": 1.6165213425632858, "grad_norm": 0.5501871845800342, "learning_rate": 1.8669236087434904e-05, "loss": 11.7824, "step": 29686 }, { "epoch": 1.6165757965598688, "grad_norm": 0.5263849480792924, "learning_rate": 1.866410564214658e-05, "loss": 11.651, "step": 29687 }, { "epoch": 1.6166302505564518, "grad_norm": 0.5654000764992241, "learning_rate": 1.8658975829339832e-05, "loss": 11.6673, "step": 29688 }, { "epoch": 1.6166847045530348, "grad_norm": 0.5329454968180634, "learning_rate": 1.8653846649054497e-05, "loss": 11.7625, "step": 29689 }, { "epoch": 1.6167391585496178, "grad_norm": 0.5874770734532969, "learning_rate": 1.864871810133053e-05, "loss": 11.8525, "step": 29690 }, { "epoch": 1.6167936125462008, "grad_norm": 0.5889592799692791, "learning_rate": 1.8643590186207737e-05, "loss": 11.8978, "step": 29691 }, { "epoch": 1.6168480665427838, "grad_norm": 0.5247054306891863, "learning_rate": 1.8638462903726062e-05, "loss": 11.7815, "step": 29692 }, { "epoch": 1.6169025205393668, "grad_norm": 0.5405937891158469, "learning_rate": 1.8633336253925314e-05, "loss": 11.8829, "step": 29693 }, { "epoch": 1.6169569745359498, "grad_norm": 0.5864781772542362, "learning_rate": 1.862821023684539e-05, "loss": 11.8152, "step": 29694 }, { "epoch": 1.6170114285325328, "grad_norm": 0.5940501313197909, "learning_rate": 1.8623084852526208e-05, "loss": 11.9119, "step": 29695 }, { "epoch": 1.6170658825291158, "grad_norm": 0.5226921153191229, "learning_rate": 1.8617960101007504e-05, "loss": 11.761, "step": 29696 }, { "epoch": 1.6171203365256988, "grad_norm": 0.5452004933781307, "learning_rate": 1.861283598232919e-05, "loss": 11.7848, "step": 29697 }, { "epoch": 1.6171747905222817, "grad_norm": 0.5318178540595372, "learning_rate": 1.8607712496531148e-05, "loss": 11.7976, "step": 29698 }, { "epoch": 1.617229244518865, "grad_norm": 0.5477669497775027, "learning_rate": 1.8602589643653144e-05, "loss": 11.8655, "step": 29699 }, { "epoch": 1.617283698515448, "grad_norm": 0.5344043123632418, "learning_rate": 1.8597467423735092e-05, "loss": 11.8005, "step": 29700 }, { "epoch": 1.617338152512031, "grad_norm": 0.5575845681308145, "learning_rate": 1.859234583681675e-05, "loss": 11.852, "step": 29701 }, { "epoch": 1.617392606508614, "grad_norm": 0.5262517809572043, "learning_rate": 1.8587224882938003e-05, "loss": 11.78, "step": 29702 }, { "epoch": 1.617447060505197, "grad_norm": 0.5882528780569757, "learning_rate": 1.8582104562138637e-05, "loss": 11.838, "step": 29703 }, { "epoch": 1.61750151450178, "grad_norm": 0.6035131653248957, "learning_rate": 1.857698487445846e-05, "loss": 11.8683, "step": 29704 }, { "epoch": 1.6175559684983631, "grad_norm": 0.5144802107133795, "learning_rate": 1.857186581993737e-05, "loss": 11.7094, "step": 29705 }, { "epoch": 1.6176104224949461, "grad_norm": 0.5667786980611123, "learning_rate": 1.8566747398615048e-05, "loss": 11.8018, "step": 29706 }, { "epoch": 1.6176648764915291, "grad_norm": 0.5400424489562431, "learning_rate": 1.8561629610531385e-05, "loss": 11.8653, "step": 29707 }, { "epoch": 1.6177193304881121, "grad_norm": 0.563215643402406, "learning_rate": 1.8556512455726115e-05, "loss": 11.9103, "step": 29708 }, { "epoch": 1.617773784484695, "grad_norm": 0.5516286598899117, "learning_rate": 1.8551395934239068e-05, "loss": 11.7368, "step": 29709 }, { "epoch": 1.617828238481278, "grad_norm": 0.5328982898114879, "learning_rate": 1.8546280046110042e-05, "loss": 11.7824, "step": 29710 }, { "epoch": 1.617882692477861, "grad_norm": 0.5870227609709985, "learning_rate": 1.8541164791378786e-05, "loss": 11.9064, "step": 29711 }, { "epoch": 1.617937146474444, "grad_norm": 0.5749743102814574, "learning_rate": 1.8536050170085117e-05, "loss": 11.9272, "step": 29712 }, { "epoch": 1.617991600471027, "grad_norm": 0.5111909105755295, "learning_rate": 1.8530936182268755e-05, "loss": 11.6833, "step": 29713 }, { "epoch": 1.61804605446761, "grad_norm": 0.5118690492104214, "learning_rate": 1.852582282796952e-05, "loss": 11.7455, "step": 29714 }, { "epoch": 1.618100508464193, "grad_norm": 0.5177070100658223, "learning_rate": 1.8520710107227156e-05, "loss": 11.8511, "step": 29715 }, { "epoch": 1.618154962460776, "grad_norm": 0.5429501437094439, "learning_rate": 1.8515598020081383e-05, "loss": 11.9224, "step": 29716 }, { "epoch": 1.618209416457359, "grad_norm": 0.5475021896792563, "learning_rate": 1.8510486566572017e-05, "loss": 11.6796, "step": 29717 }, { "epoch": 1.618263870453942, "grad_norm": 0.5924743382317234, "learning_rate": 1.850537574673874e-05, "loss": 11.8485, "step": 29718 }, { "epoch": 1.618318324450525, "grad_norm": 0.5013354626793133, "learning_rate": 1.8500265560621334e-05, "loss": 11.8067, "step": 29719 }, { "epoch": 1.618372778447108, "grad_norm": 0.5114146283955324, "learning_rate": 1.8495156008259552e-05, "loss": 11.6471, "step": 29720 }, { "epoch": 1.618427232443691, "grad_norm": 0.5786062154256025, "learning_rate": 1.8490047089693084e-05, "loss": 11.934, "step": 29721 }, { "epoch": 1.618481686440274, "grad_norm": 0.5626412667315792, "learning_rate": 1.8484938804961706e-05, "loss": 11.8384, "step": 29722 }, { "epoch": 1.6185361404368572, "grad_norm": 0.5544560528376635, "learning_rate": 1.8479831154105075e-05, "loss": 11.6767, "step": 29723 }, { "epoch": 1.6185905944334402, "grad_norm": 0.5589589299532259, "learning_rate": 1.847472413716298e-05, "loss": 11.8483, "step": 29724 }, { "epoch": 1.6186450484300232, "grad_norm": 0.5928141170937883, "learning_rate": 1.8469617754175107e-05, "loss": 11.7703, "step": 29725 }, { "epoch": 1.6186995024266062, "grad_norm": 0.5576237658610851, "learning_rate": 1.8464512005181133e-05, "loss": 11.8894, "step": 29726 }, { "epoch": 1.6187539564231892, "grad_norm": 0.5290010663145447, "learning_rate": 1.845940689022081e-05, "loss": 11.7963, "step": 29727 }, { "epoch": 1.6188084104197722, "grad_norm": 0.5617072598563205, "learning_rate": 1.845430240933379e-05, "loss": 11.7636, "step": 29728 }, { "epoch": 1.6188628644163554, "grad_norm": 0.5955959921049784, "learning_rate": 1.844919856255981e-05, "loss": 11.879, "step": 29729 }, { "epoch": 1.6189173184129384, "grad_norm": 0.5005111701661928, "learning_rate": 1.8444095349938518e-05, "loss": 11.8425, "step": 29730 }, { "epoch": 1.6189717724095214, "grad_norm": 0.5753867860748185, "learning_rate": 1.843899277150961e-05, "loss": 11.8455, "step": 29731 }, { "epoch": 1.6190262264061044, "grad_norm": 0.6022810675614024, "learning_rate": 1.8433890827312806e-05, "loss": 11.7727, "step": 29732 }, { "epoch": 1.6190806804026874, "grad_norm": 0.5737669885927515, "learning_rate": 1.8428789517387723e-05, "loss": 11.6892, "step": 29733 }, { "epoch": 1.6191351343992704, "grad_norm": 0.5383511411239242, "learning_rate": 1.8423688841774067e-05, "loss": 11.7437, "step": 29734 }, { "epoch": 1.6191895883958534, "grad_norm": 0.6560133762952048, "learning_rate": 1.8418588800511493e-05, "loss": 11.9499, "step": 29735 }, { "epoch": 1.6192440423924364, "grad_norm": 0.5467763424292883, "learning_rate": 1.841348939363963e-05, "loss": 11.8131, "step": 29736 }, { "epoch": 1.6192984963890193, "grad_norm": 0.5654688761193011, "learning_rate": 1.840839062119818e-05, "loss": 11.6356, "step": 29737 }, { "epoch": 1.6193529503856023, "grad_norm": 0.5736787912331952, "learning_rate": 1.840329248322673e-05, "loss": 11.7675, "step": 29738 }, { "epoch": 1.6194074043821853, "grad_norm": 0.7146954815898924, "learning_rate": 1.8398194979765005e-05, "loss": 11.8188, "step": 29739 }, { "epoch": 1.6194618583787683, "grad_norm": 0.5270226949178375, "learning_rate": 1.839309811085257e-05, "loss": 11.651, "step": 29740 }, { "epoch": 1.6195163123753513, "grad_norm": 0.5697433592279761, "learning_rate": 1.8388001876529105e-05, "loss": 11.8665, "step": 29741 }, { "epoch": 1.6195707663719343, "grad_norm": 0.5792422543746626, "learning_rate": 1.8382906276834212e-05, "loss": 11.8301, "step": 29742 }, { "epoch": 1.6196252203685173, "grad_norm": 0.5314354702809112, "learning_rate": 1.8377811311807514e-05, "loss": 11.7925, "step": 29743 }, { "epoch": 1.6196796743651003, "grad_norm": 0.5408031908187323, "learning_rate": 1.837271698148868e-05, "loss": 11.9217, "step": 29744 }, { "epoch": 1.6197341283616833, "grad_norm": 0.5682392557677713, "learning_rate": 1.836762328591728e-05, "loss": 11.7606, "step": 29745 }, { "epoch": 1.6197885823582665, "grad_norm": 0.47152443855339354, "learning_rate": 1.8362530225132902e-05, "loss": 11.817, "step": 29746 }, { "epoch": 1.6198430363548495, "grad_norm": 0.5890601620756605, "learning_rate": 1.8357437799175216e-05, "loss": 11.9369, "step": 29747 }, { "epoch": 1.6198974903514325, "grad_norm": 0.512614243080736, "learning_rate": 1.8352346008083753e-05, "loss": 11.7113, "step": 29748 }, { "epoch": 1.6199519443480155, "grad_norm": 0.5001521175061336, "learning_rate": 1.8347254851898166e-05, "loss": 11.7671, "step": 29749 }, { "epoch": 1.6200063983445985, "grad_norm": 0.5526447643640972, "learning_rate": 1.8342164330657997e-05, "loss": 11.6839, "step": 29750 }, { "epoch": 1.6200608523411815, "grad_norm": 0.5244986183923309, "learning_rate": 1.833707444440288e-05, "loss": 11.6831, "step": 29751 }, { "epoch": 1.6201153063377647, "grad_norm": 0.5890321091260146, "learning_rate": 1.833198519317233e-05, "loss": 11.8685, "step": 29752 }, { "epoch": 1.6201697603343477, "grad_norm": 0.5218310798077371, "learning_rate": 1.832689657700597e-05, "loss": 11.7616, "step": 29753 }, { "epoch": 1.6202242143309307, "grad_norm": 0.5239361598665088, "learning_rate": 1.832180859594338e-05, "loss": 11.7665, "step": 29754 }, { "epoch": 1.6202786683275137, "grad_norm": 0.5561360692793038, "learning_rate": 1.8316721250024093e-05, "loss": 11.8565, "step": 29755 }, { "epoch": 1.6203331223240967, "grad_norm": 0.58588166861285, "learning_rate": 1.8311634539287692e-05, "loss": 11.8331, "step": 29756 }, { "epoch": 1.6203875763206796, "grad_norm": 0.47225103007604813, "learning_rate": 1.83065484637737e-05, "loss": 11.7213, "step": 29757 }, { "epoch": 1.6204420303172626, "grad_norm": 0.5730737085838575, "learning_rate": 1.830146302352168e-05, "loss": 11.7649, "step": 29758 }, { "epoch": 1.6204964843138456, "grad_norm": 0.480864723884432, "learning_rate": 1.8296378218571207e-05, "loss": 11.7468, "step": 29759 }, { "epoch": 1.6205509383104286, "grad_norm": 0.6027636164304947, "learning_rate": 1.829129404896177e-05, "loss": 11.8949, "step": 29760 }, { "epoch": 1.6206053923070116, "grad_norm": 0.515908200547813, "learning_rate": 1.8286210514732973e-05, "loss": 11.6664, "step": 29761 }, { "epoch": 1.6206598463035946, "grad_norm": 0.5588522111663349, "learning_rate": 1.8281127615924264e-05, "loss": 11.7865, "step": 29762 }, { "epoch": 1.6207143003001776, "grad_norm": 0.6389388471645864, "learning_rate": 1.8276045352575244e-05, "loss": 11.8541, "step": 29763 }, { "epoch": 1.6207687542967606, "grad_norm": 0.5136481223944924, "learning_rate": 1.8270963724725375e-05, "loss": 11.8994, "step": 29764 }, { "epoch": 1.6208232082933436, "grad_norm": 0.5349080581632016, "learning_rate": 1.826588273241423e-05, "loss": 11.7461, "step": 29765 }, { "epoch": 1.6208776622899266, "grad_norm": 0.5986740150879933, "learning_rate": 1.8260802375681286e-05, "loss": 11.9312, "step": 29766 }, { "epoch": 1.6209321162865096, "grad_norm": 0.5560229739189517, "learning_rate": 1.825572265456602e-05, "loss": 11.8586, "step": 29767 }, { "epoch": 1.6209865702830926, "grad_norm": 0.582516417404331, "learning_rate": 1.825064356910796e-05, "loss": 11.8453, "step": 29768 }, { "epoch": 1.6210410242796758, "grad_norm": 0.6281607864308909, "learning_rate": 1.8245565119346632e-05, "loss": 11.795, "step": 29769 }, { "epoch": 1.6210954782762588, "grad_norm": 0.5695811605645611, "learning_rate": 1.8240487305321473e-05, "loss": 11.7029, "step": 29770 }, { "epoch": 1.6211499322728418, "grad_norm": 0.5451215618444492, "learning_rate": 1.8235410127072027e-05, "loss": 11.7372, "step": 29771 }, { "epoch": 1.6212043862694248, "grad_norm": 0.5705223579328994, "learning_rate": 1.8230333584637716e-05, "loss": 11.9475, "step": 29772 }, { "epoch": 1.6212588402660078, "grad_norm": 0.5490309796689397, "learning_rate": 1.8225257678058072e-05, "loss": 11.8289, "step": 29773 }, { "epoch": 1.6213132942625907, "grad_norm": 0.5834407945646138, "learning_rate": 1.822018240737251e-05, "loss": 11.9484, "step": 29774 }, { "epoch": 1.621367748259174, "grad_norm": 0.5774574060540624, "learning_rate": 1.8215107772620554e-05, "loss": 11.9275, "step": 29775 }, { "epoch": 1.621422202255757, "grad_norm": 0.5749672081949425, "learning_rate": 1.8210033773841627e-05, "loss": 11.8054, "step": 29776 }, { "epoch": 1.62147665625234, "grad_norm": 0.5108241365433569, "learning_rate": 1.820496041107518e-05, "loss": 11.7704, "step": 29777 }, { "epoch": 1.621531110248923, "grad_norm": 0.5355894214491904, "learning_rate": 1.8199887684360696e-05, "loss": 11.7478, "step": 29778 }, { "epoch": 1.621585564245506, "grad_norm": 0.5344683838749086, "learning_rate": 1.819481559373757e-05, "loss": 11.721, "step": 29779 }, { "epoch": 1.621640018242089, "grad_norm": 0.5484251651408244, "learning_rate": 1.818974413924528e-05, "loss": 11.7141, "step": 29780 }, { "epoch": 1.621694472238672, "grad_norm": 0.5330372194509918, "learning_rate": 1.8184673320923296e-05, "loss": 11.847, "step": 29781 }, { "epoch": 1.621748926235255, "grad_norm": 0.54969047449176, "learning_rate": 1.8179603138810973e-05, "loss": 11.9113, "step": 29782 }, { "epoch": 1.621803380231838, "grad_norm": 0.5540264519426792, "learning_rate": 1.8174533592947817e-05, "loss": 11.8589, "step": 29783 }, { "epoch": 1.621857834228421, "grad_norm": 0.5027733970076889, "learning_rate": 1.8169464683373172e-05, "loss": 11.8258, "step": 29784 }, { "epoch": 1.6219122882250039, "grad_norm": 0.5338662523738564, "learning_rate": 1.8164396410126527e-05, "loss": 11.8409, "step": 29785 }, { "epoch": 1.6219667422215869, "grad_norm": 0.5355778796105455, "learning_rate": 1.8159328773247254e-05, "loss": 11.6876, "step": 29786 }, { "epoch": 1.6220211962181699, "grad_norm": 0.5506582771023127, "learning_rate": 1.8154261772774738e-05, "loss": 11.7884, "step": 29787 }, { "epoch": 1.6220756502147529, "grad_norm": 0.5706165051098933, "learning_rate": 1.8149195408748443e-05, "loss": 11.8531, "step": 29788 }, { "epoch": 1.6221301042113359, "grad_norm": 0.5373574049088574, "learning_rate": 1.8144129681207688e-05, "loss": 11.7053, "step": 29789 }, { "epoch": 1.6221845582079188, "grad_norm": 0.5458616252224401, "learning_rate": 1.8139064590191946e-05, "loss": 11.7965, "step": 29790 }, { "epoch": 1.6222390122045018, "grad_norm": 0.5840944206279342, "learning_rate": 1.8134000135740536e-05, "loss": 11.8291, "step": 29791 }, { "epoch": 1.6222934662010848, "grad_norm": 0.5795284200751835, "learning_rate": 1.8128936317892875e-05, "loss": 11.7689, "step": 29792 }, { "epoch": 1.622347920197668, "grad_norm": 0.5292672808196972, "learning_rate": 1.8123873136688364e-05, "loss": 11.774, "step": 29793 }, { "epoch": 1.622402374194251, "grad_norm": 0.505826063359344, "learning_rate": 1.8118810592166314e-05, "loss": 11.8555, "step": 29794 }, { "epoch": 1.622456828190834, "grad_norm": 0.5177697343648606, "learning_rate": 1.811374868436615e-05, "loss": 11.7862, "step": 29795 }, { "epoch": 1.622511282187417, "grad_norm": 0.6289377222011117, "learning_rate": 1.810868741332721e-05, "loss": 11.8154, "step": 29796 }, { "epoch": 1.622565736184, "grad_norm": 0.5185660813581372, "learning_rate": 1.810362677908882e-05, "loss": 11.7476, "step": 29797 }, { "epoch": 1.622620190180583, "grad_norm": 0.8677192061925832, "learning_rate": 1.8098566781690397e-05, "loss": 11.6404, "step": 29798 }, { "epoch": 1.6226746441771662, "grad_norm": 0.5562605901423329, "learning_rate": 1.8093507421171217e-05, "loss": 11.8005, "step": 29799 }, { "epoch": 1.6227290981737492, "grad_norm": 0.5416724699517385, "learning_rate": 1.8088448697570693e-05, "loss": 11.8058, "step": 29800 }, { "epoch": 1.6227835521703322, "grad_norm": 0.5413622332538577, "learning_rate": 1.8083390610928097e-05, "loss": 11.8775, "step": 29801 }, { "epoch": 1.6228380061669152, "grad_norm": 0.5086184504807458, "learning_rate": 1.807833316128279e-05, "loss": 11.8459, "step": 29802 }, { "epoch": 1.6228924601634982, "grad_norm": 0.5248310507755106, "learning_rate": 1.807327634867414e-05, "loss": 11.7233, "step": 29803 }, { "epoch": 1.6229469141600812, "grad_norm": 0.5570920371789084, "learning_rate": 1.8068220173141394e-05, "loss": 11.8058, "step": 29804 }, { "epoch": 1.6230013681566642, "grad_norm": 0.504956431398303, "learning_rate": 1.806316463472394e-05, "loss": 11.7385, "step": 29805 }, { "epoch": 1.6230558221532472, "grad_norm": 0.5343066618336901, "learning_rate": 1.805810973346106e-05, "loss": 11.8999, "step": 29806 }, { "epoch": 1.6231102761498302, "grad_norm": 0.5745609893400554, "learning_rate": 1.8053055469392034e-05, "loss": 11.9275, "step": 29807 }, { "epoch": 1.6231647301464132, "grad_norm": 0.5751834413970852, "learning_rate": 1.8048001842556216e-05, "loss": 11.8573, "step": 29808 }, { "epoch": 1.6232191841429962, "grad_norm": 0.5415861978949785, "learning_rate": 1.804294885299286e-05, "loss": 11.792, "step": 29809 }, { "epoch": 1.6232736381395791, "grad_norm": 0.5979319586553563, "learning_rate": 1.8037896500741292e-05, "loss": 11.8604, "step": 29810 }, { "epoch": 1.6233280921361621, "grad_norm": 0.5332030842511283, "learning_rate": 1.8032844785840765e-05, "loss": 11.825, "step": 29811 }, { "epoch": 1.6233825461327451, "grad_norm": 0.5672826300173477, "learning_rate": 1.802779370833061e-05, "loss": 11.7771, "step": 29812 }, { "epoch": 1.6234370001293281, "grad_norm": 0.5456811727174755, "learning_rate": 1.8022743268250053e-05, "loss": 11.7762, "step": 29813 }, { "epoch": 1.6234914541259111, "grad_norm": 0.5188778840167558, "learning_rate": 1.8017693465638385e-05, "loss": 11.8128, "step": 29814 }, { "epoch": 1.623545908122494, "grad_norm": 0.6029305953430277, "learning_rate": 1.8012644300534908e-05, "loss": 11.9242, "step": 29815 }, { "epoch": 1.6236003621190773, "grad_norm": 0.576940166603129, "learning_rate": 1.800759577297887e-05, "loss": 11.8113, "step": 29816 }, { "epoch": 1.6236548161156603, "grad_norm": 0.5358719089609392, "learning_rate": 1.800254788300948e-05, "loss": 11.8687, "step": 29817 }, { "epoch": 1.6237092701122433, "grad_norm": 0.5202429387029124, "learning_rate": 1.7997500630666053e-05, "loss": 11.6968, "step": 29818 }, { "epoch": 1.6237637241088263, "grad_norm": 0.5431275583205528, "learning_rate": 1.7992454015987793e-05, "loss": 11.7837, "step": 29819 }, { "epoch": 1.6238181781054093, "grad_norm": 0.5793584771153201, "learning_rate": 1.7987408039013987e-05, "loss": 11.7659, "step": 29820 }, { "epoch": 1.6238726321019923, "grad_norm": 0.5579550905480155, "learning_rate": 1.7982362699783827e-05, "loss": 11.6604, "step": 29821 }, { "epoch": 1.6239270860985755, "grad_norm": 0.5744584766801781, "learning_rate": 1.7977317998336597e-05, "loss": 12.0137, "step": 29822 }, { "epoch": 1.6239815400951585, "grad_norm": 0.6315104965522051, "learning_rate": 1.797227393471147e-05, "loss": 11.8804, "step": 29823 }, { "epoch": 1.6240359940917415, "grad_norm": 0.562141376184874, "learning_rate": 1.7967230508947687e-05, "loss": 11.8801, "step": 29824 }, { "epoch": 1.6240904480883245, "grad_norm": 0.538474797622595, "learning_rate": 1.796218772108451e-05, "loss": 11.7563, "step": 29825 }, { "epoch": 1.6241449020849075, "grad_norm": 0.5280226524983459, "learning_rate": 1.7957145571161117e-05, "loss": 11.8238, "step": 29826 }, { "epoch": 1.6241993560814905, "grad_norm": 0.5367644806566167, "learning_rate": 1.7952104059216722e-05, "loss": 11.8238, "step": 29827 }, { "epoch": 1.6242538100780735, "grad_norm": 0.536063174405373, "learning_rate": 1.7947063185290502e-05, "loss": 11.8473, "step": 29828 }, { "epoch": 1.6243082640746564, "grad_norm": 0.5080321495151783, "learning_rate": 1.7942022949421665e-05, "loss": 11.8336, "step": 29829 }, { "epoch": 1.6243627180712394, "grad_norm": 0.5543109604420468, "learning_rate": 1.793698335164946e-05, "loss": 11.8157, "step": 29830 }, { "epoch": 1.6244171720678224, "grad_norm": 0.5446841505257469, "learning_rate": 1.7931944392013e-05, "loss": 11.7777, "step": 29831 }, { "epoch": 1.6244716260644054, "grad_norm": 0.5390297565914725, "learning_rate": 1.792690607055153e-05, "loss": 11.6937, "step": 29832 }, { "epoch": 1.6245260800609884, "grad_norm": 0.6024428999512876, "learning_rate": 1.7921868387304187e-05, "loss": 11.715, "step": 29833 }, { "epoch": 1.6245805340575714, "grad_norm": 0.4685025058250226, "learning_rate": 1.791683134231017e-05, "loss": 11.8521, "step": 29834 }, { "epoch": 1.6246349880541544, "grad_norm": 0.537252562932653, "learning_rate": 1.7911794935608616e-05, "loss": 11.8172, "step": 29835 }, { "epoch": 1.6246894420507374, "grad_norm": 0.5740644328894018, "learning_rate": 1.790675916723874e-05, "loss": 11.7572, "step": 29836 }, { "epoch": 1.6247438960473204, "grad_norm": 0.49532925050483767, "learning_rate": 1.7901724037239675e-05, "loss": 11.7963, "step": 29837 }, { "epoch": 1.6247983500439034, "grad_norm": 0.4812212648939318, "learning_rate": 1.7896689545650548e-05, "loss": 11.6105, "step": 29838 }, { "epoch": 1.6248528040404866, "grad_norm": 0.5808507902498412, "learning_rate": 1.7891655692510547e-05, "loss": 11.7792, "step": 29839 }, { "epoch": 1.6249072580370696, "grad_norm": 0.5716854153082225, "learning_rate": 1.7886622477858783e-05, "loss": 11.8356, "step": 29840 }, { "epoch": 1.6249617120336526, "grad_norm": 0.5438784820571364, "learning_rate": 1.7881589901734408e-05, "loss": 11.7404, "step": 29841 }, { "epoch": 1.6250161660302356, "grad_norm": 0.5522321049595004, "learning_rate": 1.7876557964176587e-05, "loss": 11.8787, "step": 29842 }, { "epoch": 1.6250706200268186, "grad_norm": 0.5385128981210782, "learning_rate": 1.7871526665224404e-05, "loss": 11.9742, "step": 29843 }, { "epoch": 1.6251250740234016, "grad_norm": 0.5642394369165362, "learning_rate": 1.786649600491702e-05, "loss": 11.8231, "step": 29844 }, { "epoch": 1.6251795280199848, "grad_norm": 0.5282025949759572, "learning_rate": 1.7861465983293514e-05, "loss": 11.8951, "step": 29845 }, { "epoch": 1.6252339820165678, "grad_norm": 0.5067521434221105, "learning_rate": 1.7856436600393046e-05, "loss": 11.8502, "step": 29846 }, { "epoch": 1.6252884360131508, "grad_norm": 0.5880429059922319, "learning_rate": 1.785140785625471e-05, "loss": 11.8383, "step": 29847 }, { "epoch": 1.6253428900097338, "grad_norm": 0.532984901509907, "learning_rate": 1.7846379750917576e-05, "loss": 11.8345, "step": 29848 }, { "epoch": 1.6253973440063167, "grad_norm": 0.5544920082952747, "learning_rate": 1.7841352284420786e-05, "loss": 11.7894, "step": 29849 }, { "epoch": 1.6254517980028997, "grad_norm": 0.5217674602501226, "learning_rate": 1.7836325456803404e-05, "loss": 11.7569, "step": 29850 }, { "epoch": 1.6255062519994827, "grad_norm": 0.5172372220391482, "learning_rate": 1.783129926810453e-05, "loss": 11.7676, "step": 29851 }, { "epoch": 1.6255607059960657, "grad_norm": 0.5156677079660713, "learning_rate": 1.7826273718363274e-05, "loss": 11.6364, "step": 29852 }, { "epoch": 1.6256151599926487, "grad_norm": 0.5500854937460841, "learning_rate": 1.7821248807618674e-05, "loss": 11.7937, "step": 29853 }, { "epoch": 1.6256696139892317, "grad_norm": 0.6009562225192525, "learning_rate": 1.7816224535909854e-05, "loss": 11.8653, "step": 29854 }, { "epoch": 1.6257240679858147, "grad_norm": 0.5897278157514485, "learning_rate": 1.7811200903275826e-05, "loss": 11.9097, "step": 29855 }, { "epoch": 1.6257785219823977, "grad_norm": 0.5196931484303855, "learning_rate": 1.7806177909755706e-05, "loss": 11.7866, "step": 29856 }, { "epoch": 1.6258329759789807, "grad_norm": 0.5259285433385095, "learning_rate": 1.7801155555388528e-05, "loss": 11.9078, "step": 29857 }, { "epoch": 1.6258874299755637, "grad_norm": 0.5379667007032992, "learning_rate": 1.7796133840213323e-05, "loss": 11.8614, "step": 29858 }, { "epoch": 1.6259418839721467, "grad_norm": 0.5596329701440825, "learning_rate": 1.77911127642692e-05, "loss": 11.7767, "step": 29859 }, { "epoch": 1.6259963379687297, "grad_norm": 0.5160637975508303, "learning_rate": 1.778609232759514e-05, "loss": 11.8189, "step": 29860 }, { "epoch": 1.6260507919653127, "grad_norm": 0.565438200037967, "learning_rate": 1.7781072530230236e-05, "loss": 11.7822, "step": 29861 }, { "epoch": 1.6261052459618957, "grad_norm": 0.58634501271915, "learning_rate": 1.7776053372213486e-05, "loss": 11.8256, "step": 29862 }, { "epoch": 1.6261596999584789, "grad_norm": 0.557731556141301, "learning_rate": 1.7771034853583923e-05, "loss": 11.7614, "step": 29863 }, { "epoch": 1.6262141539550619, "grad_norm": 0.5498987337364991, "learning_rate": 1.7766016974380606e-05, "loss": 11.8434, "step": 29864 }, { "epoch": 1.6262686079516449, "grad_norm": 0.5814591000650591, "learning_rate": 1.7760999734642513e-05, "loss": 11.8587, "step": 29865 }, { "epoch": 1.6263230619482278, "grad_norm": 0.5252663649406915, "learning_rate": 1.7755983134408703e-05, "loss": 11.7993, "step": 29866 }, { "epoch": 1.6263775159448108, "grad_norm": 0.5483473578770199, "learning_rate": 1.775096717371816e-05, "loss": 11.8305, "step": 29867 }, { "epoch": 1.626431969941394, "grad_norm": 0.5179828168760365, "learning_rate": 1.7745951852609865e-05, "loss": 11.8467, "step": 29868 }, { "epoch": 1.626486423937977, "grad_norm": 0.6214975217796868, "learning_rate": 1.7740937171122864e-05, "loss": 11.7297, "step": 29869 }, { "epoch": 1.62654087793456, "grad_norm": 0.5395443085330129, "learning_rate": 1.7735923129296107e-05, "loss": 11.805, "step": 29870 }, { "epoch": 1.626595331931143, "grad_norm": 0.6039017919913058, "learning_rate": 1.7730909727168632e-05, "loss": 11.8849, "step": 29871 }, { "epoch": 1.626649785927726, "grad_norm": 0.5699838813347412, "learning_rate": 1.7725896964779375e-05, "loss": 11.8384, "step": 29872 }, { "epoch": 1.626704239924309, "grad_norm": 0.5390644551119237, "learning_rate": 1.772088484216734e-05, "loss": 11.8132, "step": 29873 }, { "epoch": 1.626758693920892, "grad_norm": 0.581202814320493, "learning_rate": 1.7715873359371526e-05, "loss": 11.885, "step": 29874 }, { "epoch": 1.626813147917475, "grad_norm": 0.4968017133864991, "learning_rate": 1.7710862516430858e-05, "loss": 11.6651, "step": 29875 }, { "epoch": 1.626867601914058, "grad_norm": 0.5312199139071078, "learning_rate": 1.7705852313384374e-05, "loss": 11.8351, "step": 29876 }, { "epoch": 1.626922055910641, "grad_norm": 0.5009578686172519, "learning_rate": 1.770084275027093e-05, "loss": 11.773, "step": 29877 }, { "epoch": 1.626976509907224, "grad_norm": 0.5437621951448142, "learning_rate": 1.7695833827129538e-05, "loss": 11.7318, "step": 29878 }, { "epoch": 1.627030963903807, "grad_norm": 0.5263257599612827, "learning_rate": 1.7690825543999167e-05, "loss": 11.86, "step": 29879 }, { "epoch": 1.62708541790039, "grad_norm": 0.529554063485207, "learning_rate": 1.7685817900918723e-05, "loss": 11.7946, "step": 29880 }, { "epoch": 1.627139871896973, "grad_norm": 0.5981416094577404, "learning_rate": 1.768081089792717e-05, "loss": 11.9005, "step": 29881 }, { "epoch": 1.627194325893556, "grad_norm": 0.589911703001778, "learning_rate": 1.7675804535063424e-05, "loss": 11.8847, "step": 29882 }, { "epoch": 1.627248779890139, "grad_norm": 0.5577245965792366, "learning_rate": 1.767079881236645e-05, "loss": 11.843, "step": 29883 }, { "epoch": 1.627303233886722, "grad_norm": 0.5370008557756406, "learning_rate": 1.7665793729875125e-05, "loss": 11.7251, "step": 29884 }, { "epoch": 1.627357687883305, "grad_norm": 0.55426542388383, "learning_rate": 1.76607892876284e-05, "loss": 11.8755, "step": 29885 }, { "epoch": 1.6274121418798881, "grad_norm": 0.5663218347031582, "learning_rate": 1.7655785485665233e-05, "loss": 11.7826, "step": 29886 }, { "epoch": 1.6274665958764711, "grad_norm": 0.5194411723218578, "learning_rate": 1.765078232402444e-05, "loss": 11.7238, "step": 29887 }, { "epoch": 1.6275210498730541, "grad_norm": 0.5452031387886116, "learning_rate": 1.7645779802744965e-05, "loss": 11.8034, "step": 29888 }, { "epoch": 1.6275755038696371, "grad_norm": 0.625163580505798, "learning_rate": 1.7640777921865746e-05, "loss": 11.7914, "step": 29889 }, { "epoch": 1.6276299578662201, "grad_norm": 0.562152490599785, "learning_rate": 1.7635776681425632e-05, "loss": 11.8573, "step": 29890 }, { "epoch": 1.627684411862803, "grad_norm": 0.5631411373166973, "learning_rate": 1.763077608146355e-05, "loss": 11.8009, "step": 29891 }, { "epoch": 1.6277388658593863, "grad_norm": 0.5004948928729843, "learning_rate": 1.7625776122018333e-05, "loss": 11.824, "step": 29892 }, { "epoch": 1.6277933198559693, "grad_norm": 0.5780593933229375, "learning_rate": 1.7620776803128936e-05, "loss": 11.9005, "step": 29893 }, { "epoch": 1.6278477738525523, "grad_norm": 0.543401389222193, "learning_rate": 1.761577812483416e-05, "loss": 11.7474, "step": 29894 }, { "epoch": 1.6279022278491353, "grad_norm": 0.5595408395569539, "learning_rate": 1.761078008717293e-05, "loss": 11.8585, "step": 29895 }, { "epoch": 1.6279566818457183, "grad_norm": 0.5635121012342469, "learning_rate": 1.7605782690184103e-05, "loss": 11.9676, "step": 29896 }, { "epoch": 1.6280111358423013, "grad_norm": 0.5949180173017401, "learning_rate": 1.7600785933906494e-05, "loss": 11.9263, "step": 29897 }, { "epoch": 1.6280655898388843, "grad_norm": 0.6192375097089666, "learning_rate": 1.7595789818379017e-05, "loss": 11.8609, "step": 29898 }, { "epoch": 1.6281200438354673, "grad_norm": 0.5377687262894562, "learning_rate": 1.7590794343640472e-05, "loss": 11.7447, "step": 29899 }, { "epoch": 1.6281744978320503, "grad_norm": 0.5328658151317262, "learning_rate": 1.7585799509729727e-05, "loss": 11.8867, "step": 29900 }, { "epoch": 1.6282289518286333, "grad_norm": 0.6000130189421203, "learning_rate": 1.7580805316685666e-05, "loss": 11.7832, "step": 29901 }, { "epoch": 1.6282834058252162, "grad_norm": 0.6347525803748942, "learning_rate": 1.7575811764547045e-05, "loss": 11.9967, "step": 29902 }, { "epoch": 1.6283378598217992, "grad_norm": 0.507984416260509, "learning_rate": 1.7570818853352766e-05, "loss": 11.6879, "step": 29903 }, { "epoch": 1.6283923138183822, "grad_norm": 0.5272719736698143, "learning_rate": 1.75658265831416e-05, "loss": 11.7738, "step": 29904 }, { "epoch": 1.6284467678149652, "grad_norm": 0.5171362303261697, "learning_rate": 1.756083495395241e-05, "loss": 11.7618, "step": 29905 }, { "epoch": 1.6285012218115482, "grad_norm": 0.573823690687866, "learning_rate": 1.7555843965823992e-05, "loss": 11.8225, "step": 29906 }, { "epoch": 1.6285556758081312, "grad_norm": 0.6025332639191686, "learning_rate": 1.7550853618795125e-05, "loss": 11.8546, "step": 29907 }, { "epoch": 1.6286101298047142, "grad_norm": 0.5876088671905414, "learning_rate": 1.7545863912904693e-05, "loss": 11.9995, "step": 29908 }, { "epoch": 1.6286645838012974, "grad_norm": 0.6659103166319886, "learning_rate": 1.7540874848191413e-05, "loss": 11.7968, "step": 29909 }, { "epoch": 1.6287190377978804, "grad_norm": 0.5783262871274817, "learning_rate": 1.753588642469414e-05, "loss": 11.7612, "step": 29910 }, { "epoch": 1.6287734917944634, "grad_norm": 0.49121689900863397, "learning_rate": 1.753089864245162e-05, "loss": 11.7456, "step": 29911 }, { "epoch": 1.6288279457910464, "grad_norm": 0.554789154861225, "learning_rate": 1.7525911501502666e-05, "loss": 11.6595, "step": 29912 }, { "epoch": 1.6288823997876294, "grad_norm": 0.4976445127465602, "learning_rate": 1.752092500188608e-05, "loss": 11.8202, "step": 29913 }, { "epoch": 1.6289368537842124, "grad_norm": 0.574762160355171, "learning_rate": 1.7515939143640592e-05, "loss": 11.7653, "step": 29914 }, { "epoch": 1.6289913077807956, "grad_norm": 0.54272193204427, "learning_rate": 1.7510953926805006e-05, "loss": 11.8093, "step": 29915 }, { "epoch": 1.6290457617773786, "grad_norm": 0.5370533238410519, "learning_rate": 1.7505969351418083e-05, "loss": 11.7391, "step": 29916 }, { "epoch": 1.6291002157739616, "grad_norm": 0.5176173028365498, "learning_rate": 1.7500985417518555e-05, "loss": 11.7328, "step": 29917 }, { "epoch": 1.6291546697705446, "grad_norm": 0.5533274271616844, "learning_rate": 1.7496002125145217e-05, "loss": 11.7789, "step": 29918 }, { "epoch": 1.6292091237671276, "grad_norm": 0.5672350726348384, "learning_rate": 1.7491019474336788e-05, "loss": 11.8602, "step": 29919 }, { "epoch": 1.6292635777637106, "grad_norm": 0.5632535034997334, "learning_rate": 1.7486037465132042e-05, "loss": 11.8678, "step": 29920 }, { "epoch": 1.6293180317602936, "grad_norm": 0.5189339183346685, "learning_rate": 1.7481056097569692e-05, "loss": 11.8659, "step": 29921 }, { "epoch": 1.6293724857568765, "grad_norm": 0.5896665295666436, "learning_rate": 1.7476075371688484e-05, "loss": 11.8734, "step": 29922 }, { "epoch": 1.6294269397534595, "grad_norm": 0.5639517663215442, "learning_rate": 1.7471095287527173e-05, "loss": 11.8643, "step": 29923 }, { "epoch": 1.6294813937500425, "grad_norm": 0.532895021995113, "learning_rate": 1.7466115845124443e-05, "loss": 11.783, "step": 29924 }, { "epoch": 1.6295358477466255, "grad_norm": 0.5485912463971948, "learning_rate": 1.746113704451906e-05, "loss": 11.7881, "step": 29925 }, { "epoch": 1.6295903017432085, "grad_norm": 0.5370351342773633, "learning_rate": 1.7456158885749698e-05, "loss": 11.8659, "step": 29926 }, { "epoch": 1.6296447557397915, "grad_norm": 0.5385903916694121, "learning_rate": 1.7451181368855108e-05, "loss": 11.8075, "step": 29927 }, { "epoch": 1.6296992097363745, "grad_norm": 0.5417105813799838, "learning_rate": 1.7446204493873985e-05, "loss": 11.8404, "step": 29928 }, { "epoch": 1.6297536637329575, "grad_norm": 0.5365161137756895, "learning_rate": 1.7441228260844977e-05, "loss": 11.8451, "step": 29929 }, { "epoch": 1.6298081177295405, "grad_norm": 0.5404649118432637, "learning_rate": 1.7436252669806864e-05, "loss": 11.6875, "step": 29930 }, { "epoch": 1.6298625717261235, "grad_norm": 0.5416142284054279, "learning_rate": 1.7431277720798266e-05, "loss": 11.7902, "step": 29931 }, { "epoch": 1.6299170257227065, "grad_norm": 0.5846514279150229, "learning_rate": 1.7426303413857924e-05, "loss": 11.7458, "step": 29932 }, { "epoch": 1.6299714797192897, "grad_norm": 0.526056418719975, "learning_rate": 1.742132974902446e-05, "loss": 11.6389, "step": 29933 }, { "epoch": 1.6300259337158727, "grad_norm": 0.5459289100184062, "learning_rate": 1.74163567263366e-05, "loss": 11.7018, "step": 29934 }, { "epoch": 1.6300803877124557, "grad_norm": 0.4947918508224737, "learning_rate": 1.7411384345833016e-05, "loss": 11.775, "step": 29935 }, { "epoch": 1.6301348417090387, "grad_norm": 0.5410534306996849, "learning_rate": 1.7406412607552335e-05, "loss": 11.7166, "step": 29936 }, { "epoch": 1.6301892957056217, "grad_norm": 0.6530609830229392, "learning_rate": 1.7401441511533268e-05, "loss": 11.9182, "step": 29937 }, { "epoch": 1.6302437497022049, "grad_norm": 0.5322161878776143, "learning_rate": 1.7396471057814433e-05, "loss": 11.8298, "step": 29938 }, { "epoch": 1.6302982036987879, "grad_norm": 0.5067145283136489, "learning_rate": 1.7391501246434472e-05, "loss": 11.8654, "step": 29939 }, { "epoch": 1.6303526576953709, "grad_norm": 0.5367518130966582, "learning_rate": 1.7386532077432084e-05, "loss": 11.7867, "step": 29940 }, { "epoch": 1.6304071116919538, "grad_norm": 0.5940582748469746, "learning_rate": 1.7381563550845836e-05, "loss": 11.8191, "step": 29941 }, { "epoch": 1.6304615656885368, "grad_norm": 0.5854313763757858, "learning_rate": 1.7376595666714444e-05, "loss": 11.7924, "step": 29942 }, { "epoch": 1.6305160196851198, "grad_norm": 0.586362723890138, "learning_rate": 1.737162842507647e-05, "loss": 11.6086, "step": 29943 }, { "epoch": 1.6305704736817028, "grad_norm": 0.513156294977651, "learning_rate": 1.73666618259706e-05, "loss": 11.7184, "step": 29944 }, { "epoch": 1.6306249276782858, "grad_norm": 0.553937100181963, "learning_rate": 1.736169586943539e-05, "loss": 12.0196, "step": 29945 }, { "epoch": 1.6306793816748688, "grad_norm": 0.5563188406615613, "learning_rate": 1.7356730555509494e-05, "loss": 11.8782, "step": 29946 }, { "epoch": 1.6307338356714518, "grad_norm": 0.5886290555930446, "learning_rate": 1.7351765884231575e-05, "loss": 11.9051, "step": 29947 }, { "epoch": 1.6307882896680348, "grad_norm": 0.5713424153146086, "learning_rate": 1.7346801855640138e-05, "loss": 11.9171, "step": 29948 }, { "epoch": 1.6308427436646178, "grad_norm": 0.5629399749142001, "learning_rate": 1.7341838469773818e-05, "loss": 11.7363, "step": 29949 }, { "epoch": 1.6308971976612008, "grad_norm": 0.5192354403945891, "learning_rate": 1.7336875726671265e-05, "loss": 11.9123, "step": 29950 }, { "epoch": 1.6309516516577838, "grad_norm": 0.4996096798522264, "learning_rate": 1.733191362637099e-05, "loss": 11.8283, "step": 29951 }, { "epoch": 1.6310061056543668, "grad_norm": 0.5517685381132719, "learning_rate": 1.732695216891165e-05, "loss": 11.7819, "step": 29952 }, { "epoch": 1.6310605596509498, "grad_norm": 0.5743685710000227, "learning_rate": 1.7321991354331768e-05, "loss": 11.8194, "step": 29953 }, { "epoch": 1.6311150136475328, "grad_norm": 0.502849034050244, "learning_rate": 1.7317031182669975e-05, "loss": 11.7695, "step": 29954 }, { "epoch": 1.6311694676441157, "grad_norm": 0.557595428652575, "learning_rate": 1.7312071653964778e-05, "loss": 11.9792, "step": 29955 }, { "epoch": 1.631223921640699, "grad_norm": 0.5255916282851473, "learning_rate": 1.7307112768254785e-05, "loss": 11.7699, "step": 29956 }, { "epoch": 1.631278375637282, "grad_norm": 0.5822218988876974, "learning_rate": 1.7302154525578607e-05, "loss": 11.6688, "step": 29957 }, { "epoch": 1.631332829633865, "grad_norm": 0.5970869466234149, "learning_rate": 1.729719692597468e-05, "loss": 11.9408, "step": 29958 }, { "epoch": 1.631387283630448, "grad_norm": 0.5221473922026875, "learning_rate": 1.729223996948166e-05, "loss": 11.7715, "step": 29959 }, { "epoch": 1.631441737627031, "grad_norm": 0.5946245539765873, "learning_rate": 1.728728365613801e-05, "loss": 11.7498, "step": 29960 }, { "epoch": 1.631496191623614, "grad_norm": 0.5729044952909914, "learning_rate": 1.728232798598233e-05, "loss": 11.9128, "step": 29961 }, { "epoch": 1.6315506456201971, "grad_norm": 0.6188856348056977, "learning_rate": 1.727737295905315e-05, "loss": 11.8017, "step": 29962 }, { "epoch": 1.6316050996167801, "grad_norm": 0.5219248367702782, "learning_rate": 1.727241857538897e-05, "loss": 11.7027, "step": 29963 }, { "epoch": 1.6316595536133631, "grad_norm": 0.5771117140186215, "learning_rate": 1.7267464835028368e-05, "loss": 11.757, "step": 29964 }, { "epoch": 1.6317140076099461, "grad_norm": 0.534418423431781, "learning_rate": 1.72625117380098e-05, "loss": 11.8091, "step": 29965 }, { "epoch": 1.631768461606529, "grad_norm": 0.5382609954575446, "learning_rate": 1.725755928437185e-05, "loss": 11.6708, "step": 29966 }, { "epoch": 1.631822915603112, "grad_norm": 0.573478177342807, "learning_rate": 1.725260747415299e-05, "loss": 11.787, "step": 29967 }, { "epoch": 1.631877369599695, "grad_norm": 0.5501560654196728, "learning_rate": 1.7247656307391703e-05, "loss": 11.8144, "step": 29968 }, { "epoch": 1.631931823596278, "grad_norm": 0.5329772046645241, "learning_rate": 1.7242705784126545e-05, "loss": 11.7624, "step": 29969 }, { "epoch": 1.631986277592861, "grad_norm": 0.5403872408862898, "learning_rate": 1.723775590439596e-05, "loss": 11.843, "step": 29970 }, { "epoch": 1.632040731589444, "grad_norm": 0.6013170532934077, "learning_rate": 1.7232806668238466e-05, "loss": 11.8641, "step": 29971 }, { "epoch": 1.632095185586027, "grad_norm": 0.5806003289194821, "learning_rate": 1.7227858075692572e-05, "loss": 11.8855, "step": 29972 }, { "epoch": 1.63214963958261, "grad_norm": 0.5536369154366305, "learning_rate": 1.722291012679671e-05, "loss": 11.6345, "step": 29973 }, { "epoch": 1.632204093579193, "grad_norm": 0.5051355613369085, "learning_rate": 1.7217962821589405e-05, "loss": 11.8649, "step": 29974 }, { "epoch": 1.632258547575776, "grad_norm": 0.5162418393512304, "learning_rate": 1.7213016160109075e-05, "loss": 11.8208, "step": 29975 }, { "epoch": 1.632313001572359, "grad_norm": 0.5359214026692442, "learning_rate": 1.720807014239425e-05, "loss": 11.8198, "step": 29976 }, { "epoch": 1.632367455568942, "grad_norm": 0.5661563304303698, "learning_rate": 1.7203124768483347e-05, "loss": 11.8668, "step": 29977 }, { "epoch": 1.632421909565525, "grad_norm": 0.5385703886541486, "learning_rate": 1.7198180038414814e-05, "loss": 11.81, "step": 29978 }, { "epoch": 1.6324763635621082, "grad_norm": 0.5266639322681721, "learning_rate": 1.7193235952227127e-05, "loss": 11.8471, "step": 29979 }, { "epoch": 1.6325308175586912, "grad_norm": 0.5411447754752525, "learning_rate": 1.7188292509958714e-05, "loss": 11.7058, "step": 29980 }, { "epoch": 1.6325852715552742, "grad_norm": 0.5698511022891626, "learning_rate": 1.7183349711648045e-05, "loss": 11.795, "step": 29981 }, { "epoch": 1.6326397255518572, "grad_norm": 0.5468815026330077, "learning_rate": 1.717840755733352e-05, "loss": 11.7333, "step": 29982 }, { "epoch": 1.6326941795484402, "grad_norm": 0.5504093319599296, "learning_rate": 1.7173466047053576e-05, "loss": 11.7767, "step": 29983 }, { "epoch": 1.6327486335450232, "grad_norm": 0.6155818544855903, "learning_rate": 1.7168525180846683e-05, "loss": 11.9369, "step": 29984 }, { "epoch": 1.6328030875416064, "grad_norm": 0.5670589884714078, "learning_rate": 1.71635849587512e-05, "loss": 11.8164, "step": 29985 }, { "epoch": 1.6328575415381894, "grad_norm": 0.607814367470063, "learning_rate": 1.7158645380805595e-05, "loss": 11.7662, "step": 29986 }, { "epoch": 1.6329119955347724, "grad_norm": 0.5492429874772824, "learning_rate": 1.7153706447048256e-05, "loss": 11.8007, "step": 29987 }, { "epoch": 1.6329664495313554, "grad_norm": 0.5837794851697979, "learning_rate": 1.714876815751757e-05, "loss": 11.8114, "step": 29988 }, { "epoch": 1.6330209035279384, "grad_norm": 0.6224607450061648, "learning_rate": 1.714383051225198e-05, "loss": 11.7728, "step": 29989 }, { "epoch": 1.6330753575245214, "grad_norm": 0.5772532331114856, "learning_rate": 1.7138893511289833e-05, "loss": 11.8376, "step": 29990 }, { "epoch": 1.6331298115211044, "grad_norm": 0.5929361195225834, "learning_rate": 1.7133957154669566e-05, "loss": 11.7147, "step": 29991 }, { "epoch": 1.6331842655176874, "grad_norm": 0.5619559270786373, "learning_rate": 1.7129021442429526e-05, "loss": 11.8793, "step": 29992 }, { "epoch": 1.6332387195142704, "grad_norm": 0.5084116920288405, "learning_rate": 1.712408637460814e-05, "loss": 11.6987, "step": 29993 }, { "epoch": 1.6332931735108533, "grad_norm": 0.6164379057478121, "learning_rate": 1.711915195124373e-05, "loss": 11.8974, "step": 29994 }, { "epoch": 1.6333476275074363, "grad_norm": 0.8771008723158041, "learning_rate": 1.7114218172374687e-05, "loss": 11.771, "step": 29995 }, { "epoch": 1.6334020815040193, "grad_norm": 0.6433309937496873, "learning_rate": 1.7109285038039414e-05, "loss": 11.9548, "step": 29996 }, { "epoch": 1.6334565355006023, "grad_norm": 0.5610217464026294, "learning_rate": 1.7104352548276238e-05, "loss": 11.8948, "step": 29997 }, { "epoch": 1.6335109894971853, "grad_norm": 0.5950032725760158, "learning_rate": 1.7099420703123492e-05, "loss": 11.9114, "step": 29998 }, { "epoch": 1.6335654434937683, "grad_norm": 0.5461232231821463, "learning_rate": 1.7094489502619583e-05, "loss": 11.7725, "step": 29999 }, { "epoch": 1.6336198974903513, "grad_norm": 0.6059168275598296, "learning_rate": 1.7089558946802808e-05, "loss": 11.9028, "step": 30000 }, { "epoch": 1.6336743514869343, "grad_norm": 0.6105096510741903, "learning_rate": 1.708462903571154e-05, "loss": 11.7295, "step": 30001 }, { "epoch": 1.6337288054835175, "grad_norm": 0.5402355451457493, "learning_rate": 1.7079699769384084e-05, "loss": 11.8044, "step": 30002 }, { "epoch": 1.6337832594801005, "grad_norm": 0.5329796961915664, "learning_rate": 1.7074771147858805e-05, "loss": 11.8456, "step": 30003 }, { "epoch": 1.6338377134766835, "grad_norm": 0.6321166473939128, "learning_rate": 1.7069843171173993e-05, "loss": 11.8749, "step": 30004 }, { "epoch": 1.6338921674732665, "grad_norm": 0.5538225118784644, "learning_rate": 1.706491583936799e-05, "loss": 11.7861, "step": 30005 }, { "epoch": 1.6339466214698495, "grad_norm": 0.5914003584658366, "learning_rate": 1.7059989152479126e-05, "loss": 11.7819, "step": 30006 }, { "epoch": 1.6340010754664325, "grad_norm": 0.5376738789432051, "learning_rate": 1.7055063110545698e-05, "loss": 11.7034, "step": 30007 }, { "epoch": 1.6340555294630157, "grad_norm": 0.5155795542157379, "learning_rate": 1.705013771360602e-05, "loss": 11.8084, "step": 30008 }, { "epoch": 1.6341099834595987, "grad_norm": 0.5703404727879696, "learning_rate": 1.7045212961698342e-05, "loss": 11.8154, "step": 30009 }, { "epoch": 1.6341644374561817, "grad_norm": 0.5368328682726928, "learning_rate": 1.704028885486101e-05, "loss": 11.849, "step": 30010 }, { "epoch": 1.6342188914527647, "grad_norm": 0.5565666632153429, "learning_rate": 1.7035365393132328e-05, "loss": 11.8997, "step": 30011 }, { "epoch": 1.6342733454493477, "grad_norm": 0.5732944831513871, "learning_rate": 1.7030442576550533e-05, "loss": 11.8533, "step": 30012 }, { "epoch": 1.6343277994459307, "grad_norm": 0.563069851826406, "learning_rate": 1.702552040515395e-05, "loss": 11.7898, "step": 30013 }, { "epoch": 1.6343822534425136, "grad_norm": 0.6865294138891473, "learning_rate": 1.702059887898081e-05, "loss": 11.859, "step": 30014 }, { "epoch": 1.6344367074390966, "grad_norm": 0.6114203703375761, "learning_rate": 1.7015677998069436e-05, "loss": 11.898, "step": 30015 }, { "epoch": 1.6344911614356796, "grad_norm": 0.5619269326249757, "learning_rate": 1.701075776245804e-05, "loss": 11.8784, "step": 30016 }, { "epoch": 1.6345456154322626, "grad_norm": 0.5496292266512631, "learning_rate": 1.7005838172184942e-05, "loss": 11.6915, "step": 30017 }, { "epoch": 1.6346000694288456, "grad_norm": 0.61453076425597, "learning_rate": 1.700091922728836e-05, "loss": 11.9333, "step": 30018 }, { "epoch": 1.6346545234254286, "grad_norm": 0.5557187034857363, "learning_rate": 1.6996000927806522e-05, "loss": 11.7656, "step": 30019 }, { "epoch": 1.6347089774220116, "grad_norm": 0.544668931532488, "learning_rate": 1.69910832737777e-05, "loss": 11.7086, "step": 30020 }, { "epoch": 1.6347634314185946, "grad_norm": 0.5320903825442154, "learning_rate": 1.6986166265240165e-05, "loss": 11.9186, "step": 30021 }, { "epoch": 1.6348178854151776, "grad_norm": 0.5975450327333364, "learning_rate": 1.698124990223209e-05, "loss": 11.8909, "step": 30022 }, { "epoch": 1.6348723394117606, "grad_norm": 0.6641660955700938, "learning_rate": 1.6976334184791766e-05, "loss": 11.7259, "step": 30023 }, { "epoch": 1.6349267934083436, "grad_norm": 0.565918647424538, "learning_rate": 1.6971419112957366e-05, "loss": 11.833, "step": 30024 }, { "epoch": 1.6349812474049266, "grad_norm": 0.5664943585391604, "learning_rate": 1.696650468676716e-05, "loss": 11.8858, "step": 30025 }, { "epoch": 1.6350357014015098, "grad_norm": 0.5758799834049017, "learning_rate": 1.6961590906259317e-05, "loss": 11.7644, "step": 30026 }, { "epoch": 1.6350901553980928, "grad_norm": 0.5871389275039496, "learning_rate": 1.6956677771472086e-05, "loss": 11.6974, "step": 30027 }, { "epoch": 1.6351446093946758, "grad_norm": 0.5864890693843933, "learning_rate": 1.6951765282443665e-05, "loss": 11.8469, "step": 30028 }, { "epoch": 1.6351990633912588, "grad_norm": 0.5496732357600271, "learning_rate": 1.6946853439212208e-05, "loss": 11.7803, "step": 30029 }, { "epoch": 1.6352535173878417, "grad_norm": 0.54321256875059, "learning_rate": 1.6941942241815976e-05, "loss": 11.8153, "step": 30030 }, { "epoch": 1.6353079713844247, "grad_norm": 0.49234013000421545, "learning_rate": 1.6937031690293114e-05, "loss": 11.8611, "step": 30031 }, { "epoch": 1.635362425381008, "grad_norm": 0.5436932201135376, "learning_rate": 1.693212178468181e-05, "loss": 11.7982, "step": 30032 }, { "epoch": 1.635416879377591, "grad_norm": 0.6021818624904028, "learning_rate": 1.6927212525020286e-05, "loss": 11.8615, "step": 30033 }, { "epoch": 1.635471333374174, "grad_norm": 0.5271136833375871, "learning_rate": 1.6922303911346672e-05, "loss": 11.6763, "step": 30034 }, { "epoch": 1.635525787370757, "grad_norm": 0.5546249424141428, "learning_rate": 1.6917395943699167e-05, "loss": 11.7943, "step": 30035 }, { "epoch": 1.63558024136734, "grad_norm": 0.5544407333531863, "learning_rate": 1.6912488622115906e-05, "loss": 11.798, "step": 30036 }, { "epoch": 1.635634695363923, "grad_norm": 0.55837627104082, "learning_rate": 1.69075819466351e-05, "loss": 11.7274, "step": 30037 }, { "epoch": 1.635689149360506, "grad_norm": 0.4966642026542986, "learning_rate": 1.6902675917294864e-05, "loss": 11.7882, "step": 30038 }, { "epoch": 1.635743603357089, "grad_norm": 0.5733304750743955, "learning_rate": 1.689777053413333e-05, "loss": 11.9074, "step": 30039 }, { "epoch": 1.635798057353672, "grad_norm": 0.5606136053844597, "learning_rate": 1.6892865797188683e-05, "loss": 11.854, "step": 30040 }, { "epoch": 1.635852511350255, "grad_norm": 0.5457936238075886, "learning_rate": 1.6887961706499033e-05, "loss": 11.8125, "step": 30041 }, { "epoch": 1.6359069653468379, "grad_norm": 0.5306080361527131, "learning_rate": 1.6883058262102558e-05, "loss": 11.8243, "step": 30042 }, { "epoch": 1.6359614193434209, "grad_norm": 0.5193432870429455, "learning_rate": 1.6878155464037338e-05, "loss": 11.7919, "step": 30043 }, { "epoch": 1.6360158733400039, "grad_norm": 0.6253338911875089, "learning_rate": 1.687325331234152e-05, "loss": 11.9306, "step": 30044 }, { "epoch": 1.6360703273365869, "grad_norm": 0.5701814352057775, "learning_rate": 1.686835180705324e-05, "loss": 11.8268, "step": 30045 }, { "epoch": 1.6361247813331699, "grad_norm": 0.5394099316510441, "learning_rate": 1.686345094821058e-05, "loss": 11.8104, "step": 30046 }, { "epoch": 1.6361792353297528, "grad_norm": 0.5045219247526627, "learning_rate": 1.6858550735851695e-05, "loss": 11.6289, "step": 30047 }, { "epoch": 1.6362336893263358, "grad_norm": 0.5671328828710982, "learning_rate": 1.685365117001466e-05, "loss": 11.6058, "step": 30048 }, { "epoch": 1.636288143322919, "grad_norm": 0.538110442919234, "learning_rate": 1.6848752250737554e-05, "loss": 11.8118, "step": 30049 }, { "epoch": 1.636342597319502, "grad_norm": 0.522312921593881, "learning_rate": 1.684385397805851e-05, "loss": 11.7837, "step": 30050 }, { "epoch": 1.636397051316085, "grad_norm": 0.5498876554399803, "learning_rate": 1.683895635201559e-05, "loss": 11.7524, "step": 30051 }, { "epoch": 1.636451505312668, "grad_norm": 0.5556707509641179, "learning_rate": 1.6834059372646904e-05, "loss": 11.8493, "step": 30052 }, { "epoch": 1.636505959309251, "grad_norm": 0.5582342262947769, "learning_rate": 1.6829163039990503e-05, "loss": 11.7411, "step": 30053 }, { "epoch": 1.636560413305834, "grad_norm": 0.5381200686560274, "learning_rate": 1.682426735408448e-05, "loss": 11.8162, "step": 30054 }, { "epoch": 1.6366148673024172, "grad_norm": 0.5421399558650414, "learning_rate": 1.6819372314966932e-05, "loss": 11.8233, "step": 30055 }, { "epoch": 1.6366693212990002, "grad_norm": 0.5242904898004394, "learning_rate": 1.6814477922675853e-05, "loss": 11.7721, "step": 30056 }, { "epoch": 1.6367237752955832, "grad_norm": 0.5682402609569833, "learning_rate": 1.6809584177249405e-05, "loss": 11.8661, "step": 30057 }, { "epoch": 1.6367782292921662, "grad_norm": 0.5518682391718505, "learning_rate": 1.6804691078725533e-05, "loss": 11.7571, "step": 30058 }, { "epoch": 1.6368326832887492, "grad_norm": 0.5388755418216244, "learning_rate": 1.6799798627142326e-05, "loss": 11.8295, "step": 30059 }, { "epoch": 1.6368871372853322, "grad_norm": 0.5676185457478856, "learning_rate": 1.679490682253787e-05, "loss": 11.8375, "step": 30060 }, { "epoch": 1.6369415912819152, "grad_norm": 0.5274055204742559, "learning_rate": 1.679001566495014e-05, "loss": 11.8399, "step": 30061 }, { "epoch": 1.6369960452784982, "grad_norm": 0.5763032762948113, "learning_rate": 1.6785125154417235e-05, "loss": 11.8082, "step": 30062 }, { "epoch": 1.6370504992750812, "grad_norm": 0.5071738550733418, "learning_rate": 1.678023529097712e-05, "loss": 11.8153, "step": 30063 }, { "epoch": 1.6371049532716642, "grad_norm": 0.5430073858298432, "learning_rate": 1.6775346074667887e-05, "loss": 11.736, "step": 30064 }, { "epoch": 1.6371594072682472, "grad_norm": 0.5404312233014523, "learning_rate": 1.677045750552748e-05, "loss": 11.8809, "step": 30065 }, { "epoch": 1.6372138612648302, "grad_norm": 0.5368471376442372, "learning_rate": 1.6765569583593964e-05, "loss": 11.7555, "step": 30066 }, { "epoch": 1.6372683152614131, "grad_norm": 0.537025994929866, "learning_rate": 1.676068230890536e-05, "loss": 11.6391, "step": 30067 }, { "epoch": 1.6373227692579961, "grad_norm": 0.5311610970612696, "learning_rate": 1.6755795681499653e-05, "loss": 11.7425, "step": 30068 }, { "epoch": 1.6373772232545791, "grad_norm": 0.5370816819715026, "learning_rate": 1.6750909701414808e-05, "loss": 11.7935, "step": 30069 }, { "epoch": 1.6374316772511621, "grad_norm": 0.5189362084418445, "learning_rate": 1.6746024368688872e-05, "loss": 11.8864, "step": 30070 }, { "epoch": 1.6374861312477451, "grad_norm": 0.5576943627089798, "learning_rate": 1.6741139683359796e-05, "loss": 11.616, "step": 30071 }, { "epoch": 1.6375405852443283, "grad_norm": 0.5894375453579125, "learning_rate": 1.6736255645465602e-05, "loss": 11.7119, "step": 30072 }, { "epoch": 1.6375950392409113, "grad_norm": 0.5471688132992194, "learning_rate": 1.673137225504422e-05, "loss": 11.6751, "step": 30073 }, { "epoch": 1.6376494932374943, "grad_norm": 0.5673981566929037, "learning_rate": 1.672648951213368e-05, "loss": 11.8766, "step": 30074 }, { "epoch": 1.6377039472340773, "grad_norm": 0.5408324528312617, "learning_rate": 1.672160741677189e-05, "loss": 11.7351, "step": 30075 }, { "epoch": 1.6377584012306603, "grad_norm": 0.5820788572291198, "learning_rate": 1.671672596899686e-05, "loss": 11.8543, "step": 30076 }, { "epoch": 1.6378128552272433, "grad_norm": 0.557561109908252, "learning_rate": 1.6711845168846553e-05, "loss": 11.6013, "step": 30077 }, { "epoch": 1.6378673092238265, "grad_norm": 0.5336726556037551, "learning_rate": 1.67069650163589e-05, "loss": 11.845, "step": 30078 }, { "epoch": 1.6379217632204095, "grad_norm": 0.6094915021122238, "learning_rate": 1.6702085511571863e-05, "loss": 11.8543, "step": 30079 }, { "epoch": 1.6379762172169925, "grad_norm": 0.5317362788737234, "learning_rate": 1.6697206654523344e-05, "loss": 11.8369, "step": 30080 }, { "epoch": 1.6380306712135755, "grad_norm": 0.5327956438040591, "learning_rate": 1.6692328445251325e-05, "loss": 11.7825, "step": 30081 }, { "epoch": 1.6380851252101585, "grad_norm": 0.5461396379068768, "learning_rate": 1.6687450883793753e-05, "loss": 11.8955, "step": 30082 }, { "epoch": 1.6381395792067415, "grad_norm": 0.5050090392465167, "learning_rate": 1.668257397018851e-05, "loss": 11.8116, "step": 30083 }, { "epoch": 1.6381940332033245, "grad_norm": 0.5690084558942501, "learning_rate": 1.667769770447357e-05, "loss": 11.6996, "step": 30084 }, { "epoch": 1.6382484871999075, "grad_norm": 0.5827578713703137, "learning_rate": 1.6672822086686803e-05, "loss": 11.8359, "step": 30085 }, { "epoch": 1.6383029411964904, "grad_norm": 0.542803027702863, "learning_rate": 1.6667947116866168e-05, "loss": 11.7655, "step": 30086 }, { "epoch": 1.6383573951930734, "grad_norm": 0.5515404128187872, "learning_rate": 1.6663072795049515e-05, "loss": 11.8565, "step": 30087 }, { "epoch": 1.6384118491896564, "grad_norm": 0.5164036976239055, "learning_rate": 1.665819912127482e-05, "loss": 11.8333, "step": 30088 }, { "epoch": 1.6384663031862394, "grad_norm": 0.592115041557363, "learning_rate": 1.6653326095579946e-05, "loss": 11.6284, "step": 30089 }, { "epoch": 1.6385207571828224, "grad_norm": 0.5585561045423707, "learning_rate": 1.6648453718002765e-05, "loss": 11.8202, "step": 30090 }, { "epoch": 1.6385752111794054, "grad_norm": 0.5429523758392679, "learning_rate": 1.664358198858117e-05, "loss": 11.7837, "step": 30091 }, { "epoch": 1.6386296651759884, "grad_norm": 0.5787009703870182, "learning_rate": 1.6638710907353095e-05, "loss": 11.9051, "step": 30092 }, { "epoch": 1.6386841191725714, "grad_norm": 0.5372462408997021, "learning_rate": 1.6633840474356366e-05, "loss": 11.5681, "step": 30093 }, { "epoch": 1.6387385731691544, "grad_norm": 0.5849190833703195, "learning_rate": 1.6628970689628887e-05, "loss": 11.9292, "step": 30094 }, { "epoch": 1.6387930271657374, "grad_norm": 0.5209925881249817, "learning_rate": 1.6624101553208492e-05, "loss": 11.882, "step": 30095 }, { "epoch": 1.6388474811623206, "grad_norm": 0.5124654393783703, "learning_rate": 1.661923306513309e-05, "loss": 11.7205, "step": 30096 }, { "epoch": 1.6389019351589036, "grad_norm": 0.560753906045654, "learning_rate": 1.6614365225440488e-05, "loss": 11.805, "step": 30097 }, { "epoch": 1.6389563891554866, "grad_norm": 0.5489227700795934, "learning_rate": 1.6609498034168603e-05, "loss": 11.9082, "step": 30098 }, { "epoch": 1.6390108431520696, "grad_norm": 0.5765246165371151, "learning_rate": 1.6604631491355226e-05, "loss": 11.8043, "step": 30099 }, { "epoch": 1.6390652971486526, "grad_norm": 0.5548932072939367, "learning_rate": 1.6599765597038207e-05, "loss": 11.8307, "step": 30100 }, { "epoch": 1.6391197511452358, "grad_norm": 0.530195112560014, "learning_rate": 1.6594900351255428e-05, "loss": 11.841, "step": 30101 }, { "epoch": 1.6391742051418188, "grad_norm": 0.5312002150356473, "learning_rate": 1.659003575404465e-05, "loss": 11.6799, "step": 30102 }, { "epoch": 1.6392286591384018, "grad_norm": 0.5707074796569253, "learning_rate": 1.6585171805443754e-05, "loss": 11.8772, "step": 30103 }, { "epoch": 1.6392831131349848, "grad_norm": 0.6623149603267436, "learning_rate": 1.658030850549057e-05, "loss": 11.9273, "step": 30104 }, { "epoch": 1.6393375671315678, "grad_norm": 0.5247996329746817, "learning_rate": 1.657544585422287e-05, "loss": 11.7142, "step": 30105 }, { "epoch": 1.6393920211281507, "grad_norm": 0.5231783955010734, "learning_rate": 1.6570583851678524e-05, "loss": 11.8849, "step": 30106 }, { "epoch": 1.6394464751247337, "grad_norm": 0.5614133123226214, "learning_rate": 1.6565722497895287e-05, "loss": 11.7412, "step": 30107 }, { "epoch": 1.6395009291213167, "grad_norm": 0.575766303853106, "learning_rate": 1.6560861792910997e-05, "loss": 11.9719, "step": 30108 }, { "epoch": 1.6395553831178997, "grad_norm": 0.5500778176237447, "learning_rate": 1.655600173676345e-05, "loss": 11.7736, "step": 30109 }, { "epoch": 1.6396098371144827, "grad_norm": 0.5503625238016333, "learning_rate": 1.655114232949039e-05, "loss": 11.8682, "step": 30110 }, { "epoch": 1.6396642911110657, "grad_norm": 0.5653805119479136, "learning_rate": 1.6546283571129672e-05, "loss": 11.9648, "step": 30111 }, { "epoch": 1.6397187451076487, "grad_norm": 0.5157511129896286, "learning_rate": 1.6541425461719027e-05, "loss": 11.7993, "step": 30112 }, { "epoch": 1.6397731991042317, "grad_norm": 0.5705645957327735, "learning_rate": 1.653656800129628e-05, "loss": 11.7648, "step": 30113 }, { "epoch": 1.6398276531008147, "grad_norm": 0.5515964624708107, "learning_rate": 1.6531711189899145e-05, "loss": 11.7286, "step": 30114 }, { "epoch": 1.6398821070973977, "grad_norm": 0.5069048453469294, "learning_rate": 1.6526855027565423e-05, "loss": 11.8409, "step": 30115 }, { "epoch": 1.6399365610939807, "grad_norm": 0.5606669819797543, "learning_rate": 1.652199951433291e-05, "loss": 11.8605, "step": 30116 }, { "epoch": 1.6399910150905637, "grad_norm": 0.6099907291433857, "learning_rate": 1.651714465023929e-05, "loss": 11.9511, "step": 30117 }, { "epoch": 1.6400454690871467, "grad_norm": 0.5727123118523072, "learning_rate": 1.6512290435322385e-05, "loss": 11.6288, "step": 30118 }, { "epoch": 1.6400999230837299, "grad_norm": 0.4939985069499722, "learning_rate": 1.6507436869619907e-05, "loss": 11.7311, "step": 30119 }, { "epoch": 1.6401543770803129, "grad_norm": 0.5129205285412832, "learning_rate": 1.650258395316957e-05, "loss": 11.6636, "step": 30120 }, { "epoch": 1.6402088310768959, "grad_norm": 0.5085701211172086, "learning_rate": 1.6497731686009176e-05, "loss": 11.7575, "step": 30121 }, { "epoch": 1.6402632850734788, "grad_norm": 0.5101237714626635, "learning_rate": 1.6492880068176396e-05, "loss": 11.7947, "step": 30122 }, { "epoch": 1.6403177390700618, "grad_norm": 0.5512918945382997, "learning_rate": 1.6488029099709023e-05, "loss": 11.8164, "step": 30123 }, { "epoch": 1.6403721930666448, "grad_norm": 0.5242432731708799, "learning_rate": 1.6483178780644702e-05, "loss": 11.7004, "step": 30124 }, { "epoch": 1.640426647063228, "grad_norm": 0.568338929432154, "learning_rate": 1.6478329111021185e-05, "loss": 11.9587, "step": 30125 }, { "epoch": 1.640481101059811, "grad_norm": 0.507992246282294, "learning_rate": 1.6473480090876226e-05, "loss": 11.7301, "step": 30126 }, { "epoch": 1.640535555056394, "grad_norm": 0.5983717861836699, "learning_rate": 1.646863172024746e-05, "loss": 11.8204, "step": 30127 }, { "epoch": 1.640590009052977, "grad_norm": 0.5772742570237682, "learning_rate": 1.6463783999172665e-05, "loss": 11.7378, "step": 30128 }, { "epoch": 1.64064446304956, "grad_norm": 0.5491561143340316, "learning_rate": 1.6458936927689462e-05, "loss": 11.8633, "step": 30129 }, { "epoch": 1.640698917046143, "grad_norm": 0.5221765408554284, "learning_rate": 1.6454090505835562e-05, "loss": 11.8537, "step": 30130 }, { "epoch": 1.640753371042726, "grad_norm": 0.5578128299948817, "learning_rate": 1.6449244733648695e-05, "loss": 11.8589, "step": 30131 }, { "epoch": 1.640807825039309, "grad_norm": 0.6028340515247499, "learning_rate": 1.6444399611166484e-05, "loss": 11.8208, "step": 30132 }, { "epoch": 1.640862279035892, "grad_norm": 0.5399518459484299, "learning_rate": 1.6439555138426666e-05, "loss": 11.7228, "step": 30133 }, { "epoch": 1.640916733032475, "grad_norm": 0.5902670897304149, "learning_rate": 1.6434711315466844e-05, "loss": 11.8582, "step": 30134 }, { "epoch": 1.640971187029058, "grad_norm": 0.5743644022290435, "learning_rate": 1.6429868142324757e-05, "loss": 11.8942, "step": 30135 }, { "epoch": 1.641025641025641, "grad_norm": 0.5386308223798555, "learning_rate": 1.642502561903799e-05, "loss": 11.7774, "step": 30136 }, { "epoch": 1.641080095022224, "grad_norm": 0.5331663092826117, "learning_rate": 1.6420183745644258e-05, "loss": 11.8669, "step": 30137 }, { "epoch": 1.641134549018807, "grad_norm": 0.5882156028799918, "learning_rate": 1.6415342522181233e-05, "loss": 11.8053, "step": 30138 }, { "epoch": 1.64118900301539, "grad_norm": 0.5164712670058067, "learning_rate": 1.6410501948686473e-05, "loss": 11.8903, "step": 30139 }, { "epoch": 1.641243457011973, "grad_norm": 0.596815269015819, "learning_rate": 1.6405662025197665e-05, "loss": 11.8447, "step": 30140 }, { "epoch": 1.641297911008556, "grad_norm": 0.5299777840801392, "learning_rate": 1.6400822751752475e-05, "loss": 11.7045, "step": 30141 }, { "epoch": 1.6413523650051391, "grad_norm": 0.5109550589165297, "learning_rate": 1.6395984128388487e-05, "loss": 11.8194, "step": 30142 }, { "epoch": 1.6414068190017221, "grad_norm": 0.600931446336842, "learning_rate": 1.6391146155143367e-05, "loss": 11.857, "step": 30143 }, { "epoch": 1.6414612729983051, "grad_norm": 0.5144424977095644, "learning_rate": 1.63863088320547e-05, "loss": 11.6053, "step": 30144 }, { "epoch": 1.6415157269948881, "grad_norm": 0.5553563956708736, "learning_rate": 1.6381472159160126e-05, "loss": 11.8207, "step": 30145 }, { "epoch": 1.6415701809914711, "grad_norm": 0.5406254046813125, "learning_rate": 1.6376636136497235e-05, "loss": 11.8224, "step": 30146 }, { "epoch": 1.641624634988054, "grad_norm": 0.6141331659409237, "learning_rate": 1.6371800764103663e-05, "loss": 11.8021, "step": 30147 }, { "epoch": 1.6416790889846373, "grad_norm": 0.5601358481483191, "learning_rate": 1.6366966042016996e-05, "loss": 11.6448, "step": 30148 }, { "epoch": 1.6417335429812203, "grad_norm": 0.484738020824408, "learning_rate": 1.6362131970274796e-05, "loss": 11.8006, "step": 30149 }, { "epoch": 1.6417879969778033, "grad_norm": 0.5427513378769239, "learning_rate": 1.6357298548914722e-05, "loss": 11.8511, "step": 30150 }, { "epoch": 1.6418424509743863, "grad_norm": 0.5756763379337209, "learning_rate": 1.635246577797428e-05, "loss": 11.8875, "step": 30151 }, { "epoch": 1.6418969049709693, "grad_norm": 0.5385630663061246, "learning_rate": 1.63476336574911e-05, "loss": 11.7996, "step": 30152 }, { "epoch": 1.6419513589675523, "grad_norm": 0.6060538780518313, "learning_rate": 1.6342802187502782e-05, "loss": 11.7774, "step": 30153 }, { "epoch": 1.6420058129641353, "grad_norm": 0.5927889484959951, "learning_rate": 1.6337971368046823e-05, "loss": 11.7934, "step": 30154 }, { "epoch": 1.6420602669607183, "grad_norm": 0.5672123417583669, "learning_rate": 1.633314119916086e-05, "loss": 11.8023, "step": 30155 }, { "epoch": 1.6421147209573013, "grad_norm": 0.5047756720524929, "learning_rate": 1.63283116808824e-05, "loss": 11.8242, "step": 30156 }, { "epoch": 1.6421691749538843, "grad_norm": 0.5481889386605263, "learning_rate": 1.6323482813249046e-05, "loss": 11.8706, "step": 30157 }, { "epoch": 1.6422236289504673, "grad_norm": 0.5739071727362628, "learning_rate": 1.631865459629832e-05, "loss": 11.9472, "step": 30158 }, { "epoch": 1.6422780829470502, "grad_norm": 0.5230641715863068, "learning_rate": 1.6313827030067742e-05, "loss": 11.7824, "step": 30159 }, { "epoch": 1.6423325369436332, "grad_norm": 0.533055946320717, "learning_rate": 1.6309000114594907e-05, "loss": 11.7953, "step": 30160 }, { "epoch": 1.6423869909402162, "grad_norm": 0.5262768491491955, "learning_rate": 1.6304173849917293e-05, "loss": 11.7402, "step": 30161 }, { "epoch": 1.6424414449367992, "grad_norm": 0.5180209889053046, "learning_rate": 1.6299348236072487e-05, "loss": 11.7873, "step": 30162 }, { "epoch": 1.6424958989333822, "grad_norm": 0.5472664669211041, "learning_rate": 1.629452327309796e-05, "loss": 11.7478, "step": 30163 }, { "epoch": 1.6425503529299652, "grad_norm": 0.5622909712539453, "learning_rate": 1.6289698961031265e-05, "loss": 11.917, "step": 30164 }, { "epoch": 1.6426048069265482, "grad_norm": 0.5305997335205761, "learning_rate": 1.628487529990992e-05, "loss": 11.813, "step": 30165 }, { "epoch": 1.6426592609231314, "grad_norm": 0.49457213538677725, "learning_rate": 1.6280052289771407e-05, "loss": 11.7294, "step": 30166 }, { "epoch": 1.6427137149197144, "grad_norm": 0.5504786587383643, "learning_rate": 1.627522993065327e-05, "loss": 11.8793, "step": 30167 }, { "epoch": 1.6427681689162974, "grad_norm": 0.5563024258222868, "learning_rate": 1.6270408222592992e-05, "loss": 11.7451, "step": 30168 }, { "epoch": 1.6428226229128804, "grad_norm": 0.5538065961453419, "learning_rate": 1.6265587165628026e-05, "loss": 11.8441, "step": 30169 }, { "epoch": 1.6428770769094634, "grad_norm": 0.5133949483620408, "learning_rate": 1.6260766759795932e-05, "loss": 11.8452, "step": 30170 }, { "epoch": 1.6429315309060466, "grad_norm": 0.537967420213983, "learning_rate": 1.625594700513413e-05, "loss": 11.7883, "step": 30171 }, { "epoch": 1.6429859849026296, "grad_norm": 0.5286085004048998, "learning_rate": 1.6251127901680154e-05, "loss": 11.8057, "step": 30172 }, { "epoch": 1.6430404388992126, "grad_norm": 0.5052098441762602, "learning_rate": 1.624630944947142e-05, "loss": 11.702, "step": 30173 }, { "epoch": 1.6430948928957956, "grad_norm": 0.6221990772921728, "learning_rate": 1.6241491648545425e-05, "loss": 12.0073, "step": 30174 }, { "epoch": 1.6431493468923786, "grad_norm": 0.5572868259678778, "learning_rate": 1.6236674498939674e-05, "loss": 11.7741, "step": 30175 }, { "epoch": 1.6432038008889616, "grad_norm": 0.6730592903415611, "learning_rate": 1.623185800069157e-05, "loss": 11.8505, "step": 30176 }, { "epoch": 1.6432582548855446, "grad_norm": 0.5398495166799504, "learning_rate": 1.6227042153838602e-05, "loss": 11.8462, "step": 30177 }, { "epoch": 1.6433127088821275, "grad_norm": 0.5434122483300948, "learning_rate": 1.6222226958418196e-05, "loss": 11.8473, "step": 30178 }, { "epoch": 1.6433671628787105, "grad_norm": 0.5141177947597703, "learning_rate": 1.621741241446778e-05, "loss": 11.7393, "step": 30179 }, { "epoch": 1.6434216168752935, "grad_norm": 0.5144025252010122, "learning_rate": 1.6212598522024846e-05, "loss": 11.7931, "step": 30180 }, { "epoch": 1.6434760708718765, "grad_norm": 0.5644670811107217, "learning_rate": 1.6207785281126776e-05, "loss": 11.7436, "step": 30181 }, { "epoch": 1.6435305248684595, "grad_norm": 0.5343302417104526, "learning_rate": 1.6202972691811036e-05, "loss": 11.8889, "step": 30182 }, { "epoch": 1.6435849788650425, "grad_norm": 0.5538776457357333, "learning_rate": 1.619816075411501e-05, "loss": 11.7634, "step": 30183 }, { "epoch": 1.6436394328616255, "grad_norm": 0.5331346245091763, "learning_rate": 1.619334946807617e-05, "loss": 11.6585, "step": 30184 }, { "epoch": 1.6436938868582085, "grad_norm": 0.5369501897949668, "learning_rate": 1.618853883373187e-05, "loss": 11.5974, "step": 30185 }, { "epoch": 1.6437483408547915, "grad_norm": 0.549389119772309, "learning_rate": 1.618372885111955e-05, "loss": 11.8103, "step": 30186 }, { "epoch": 1.6438027948513745, "grad_norm": 0.5392353962985108, "learning_rate": 1.6178919520276636e-05, "loss": 11.8543, "step": 30187 }, { "epoch": 1.6438572488479575, "grad_norm": 0.5745473003725531, "learning_rate": 1.6174110841240498e-05, "loss": 11.8401, "step": 30188 }, { "epoch": 1.6439117028445407, "grad_norm": 0.5425281327084583, "learning_rate": 1.6169302814048505e-05, "loss": 11.756, "step": 30189 }, { "epoch": 1.6439661568411237, "grad_norm": 0.5404762589688521, "learning_rate": 1.6164495438738093e-05, "loss": 11.7027, "step": 30190 }, { "epoch": 1.6440206108377067, "grad_norm": 0.5224215118945175, "learning_rate": 1.615968871534661e-05, "loss": 11.7933, "step": 30191 }, { "epoch": 1.6440750648342897, "grad_norm": 0.5558364736720943, "learning_rate": 1.6154882643911462e-05, "loss": 11.8388, "step": 30192 }, { "epoch": 1.6441295188308727, "grad_norm": 0.5201611773068436, "learning_rate": 1.6150077224469982e-05, "loss": 11.7992, "step": 30193 }, { "epoch": 1.6441839728274557, "grad_norm": 0.6008427608907387, "learning_rate": 1.61452724570596e-05, "loss": 11.9217, "step": 30194 }, { "epoch": 1.6442384268240389, "grad_norm": 0.5242592097140267, "learning_rate": 1.6140468341717606e-05, "loss": 11.8247, "step": 30195 }, { "epoch": 1.6442928808206219, "grad_norm": 0.5179011124989028, "learning_rate": 1.6135664878481427e-05, "loss": 11.7884, "step": 30196 }, { "epoch": 1.6443473348172049, "grad_norm": 0.5513964123639697, "learning_rate": 1.6130862067388353e-05, "loss": 11.8346, "step": 30197 }, { "epoch": 1.6444017888137878, "grad_norm": 0.5532758824760037, "learning_rate": 1.6126059908475778e-05, "loss": 11.8882, "step": 30198 }, { "epoch": 1.6444562428103708, "grad_norm": 0.5063132610517528, "learning_rate": 1.6121258401781035e-05, "loss": 11.7788, "step": 30199 }, { "epoch": 1.6445106968069538, "grad_norm": 0.5780013179652417, "learning_rate": 1.6116457547341425e-05, "loss": 11.7088, "step": 30200 }, { "epoch": 1.6445651508035368, "grad_norm": 0.5981226697750344, "learning_rate": 1.6111657345194308e-05, "loss": 11.7495, "step": 30201 }, { "epoch": 1.6446196048001198, "grad_norm": 0.5244753579200823, "learning_rate": 1.6106857795377038e-05, "loss": 11.8968, "step": 30202 }, { "epoch": 1.6446740587967028, "grad_norm": 0.5259705428364313, "learning_rate": 1.6102058897926886e-05, "loss": 11.747, "step": 30203 }, { "epoch": 1.6447285127932858, "grad_norm": 0.6379847630238716, "learning_rate": 1.609726065288122e-05, "loss": 11.8394, "step": 30204 }, { "epoch": 1.6447829667898688, "grad_norm": 0.5361327029031906, "learning_rate": 1.609246306027731e-05, "loss": 11.8516, "step": 30205 }, { "epoch": 1.6448374207864518, "grad_norm": 0.46549869099427416, "learning_rate": 1.608766612015249e-05, "loss": 11.7019, "step": 30206 }, { "epoch": 1.6448918747830348, "grad_norm": 0.5261882995306035, "learning_rate": 1.6082869832544022e-05, "loss": 11.8061, "step": 30207 }, { "epoch": 1.6449463287796178, "grad_norm": 0.49370947612749594, "learning_rate": 1.607807419748927e-05, "loss": 11.7223, "step": 30208 }, { "epoch": 1.6450007827762008, "grad_norm": 0.5000206853764118, "learning_rate": 1.6073279215025473e-05, "loss": 11.816, "step": 30209 }, { "epoch": 1.6450552367727838, "grad_norm": 0.5787497720942635, "learning_rate": 1.6068484885189915e-05, "loss": 11.9479, "step": 30210 }, { "epoch": 1.6451096907693668, "grad_norm": 0.5372897855231669, "learning_rate": 1.6063691208019917e-05, "loss": 11.8536, "step": 30211 }, { "epoch": 1.64516414476595, "grad_norm": 0.5002106263799222, "learning_rate": 1.6058898183552708e-05, "loss": 11.661, "step": 30212 }, { "epoch": 1.645218598762533, "grad_norm": 0.5660293224008779, "learning_rate": 1.6054105811825582e-05, "loss": 11.8339, "step": 30213 }, { "epoch": 1.645273052759116, "grad_norm": 0.5275471143776981, "learning_rate": 1.6049314092875832e-05, "loss": 11.8119, "step": 30214 }, { "epoch": 1.645327506755699, "grad_norm": 0.5427198343184654, "learning_rate": 1.6044523026740666e-05, "loss": 11.8751, "step": 30215 }, { "epoch": 1.645381960752282, "grad_norm": 0.5249795762334607, "learning_rate": 1.603973261345739e-05, "loss": 11.7967, "step": 30216 }, { "epoch": 1.645436414748865, "grad_norm": 0.5720662556319112, "learning_rate": 1.6034942853063218e-05, "loss": 11.844, "step": 30217 }, { "epoch": 1.6454908687454481, "grad_norm": 0.6051020981225518, "learning_rate": 1.603015374559543e-05, "loss": 11.9617, "step": 30218 }, { "epoch": 1.6455453227420311, "grad_norm": 0.5839193438142962, "learning_rate": 1.6025365291091253e-05, "loss": 11.8775, "step": 30219 }, { "epoch": 1.6455997767386141, "grad_norm": 0.5290094140171435, "learning_rate": 1.6020577489587897e-05, "loss": 11.8442, "step": 30220 }, { "epoch": 1.6456542307351971, "grad_norm": 0.5457381959843188, "learning_rate": 1.6015790341122637e-05, "loss": 11.8472, "step": 30221 }, { "epoch": 1.6457086847317801, "grad_norm": 0.5302315325245227, "learning_rate": 1.601100384573264e-05, "loss": 11.7743, "step": 30222 }, { "epoch": 1.645763138728363, "grad_norm": 0.5443525039553707, "learning_rate": 1.6006218003455175e-05, "loss": 11.8231, "step": 30223 }, { "epoch": 1.645817592724946, "grad_norm": 0.5151473320863345, "learning_rate": 1.6001432814327467e-05, "loss": 11.7664, "step": 30224 }, { "epoch": 1.645872046721529, "grad_norm": 0.5958881746221502, "learning_rate": 1.599664827838667e-05, "loss": 11.846, "step": 30225 }, { "epoch": 1.645926500718112, "grad_norm": 0.5797504547148997, "learning_rate": 1.5991864395670052e-05, "loss": 11.8341, "step": 30226 }, { "epoch": 1.645980954714695, "grad_norm": 0.5883214560695113, "learning_rate": 1.598708116621477e-05, "loss": 11.8504, "step": 30227 }, { "epoch": 1.646035408711278, "grad_norm": 0.6053640051559289, "learning_rate": 1.598229859005804e-05, "loss": 11.8396, "step": 30228 }, { "epoch": 1.646089862707861, "grad_norm": 0.5922548361657342, "learning_rate": 1.597751666723706e-05, "loss": 11.8569, "step": 30229 }, { "epoch": 1.646144316704444, "grad_norm": 0.5600807084813513, "learning_rate": 1.5972735397788972e-05, "loss": 11.7831, "step": 30230 }, { "epoch": 1.646198770701027, "grad_norm": 0.509136169809699, "learning_rate": 1.5967954781751004e-05, "loss": 11.7968, "step": 30231 }, { "epoch": 1.64625322469761, "grad_norm": 0.5757226218075859, "learning_rate": 1.5963174819160297e-05, "loss": 11.8568, "step": 30232 }, { "epoch": 1.646307678694193, "grad_norm": 0.5645557112183426, "learning_rate": 1.5958395510054058e-05, "loss": 11.786, "step": 30233 }, { "epoch": 1.646362132690776, "grad_norm": 0.5352800378522243, "learning_rate": 1.59536168544694e-05, "loss": 11.66, "step": 30234 }, { "epoch": 1.6464165866873592, "grad_norm": 0.520343803438192, "learning_rate": 1.5948838852443515e-05, "loss": 11.8258, "step": 30235 }, { "epoch": 1.6464710406839422, "grad_norm": 0.5734099053021384, "learning_rate": 1.5944061504013584e-05, "loss": 11.9015, "step": 30236 }, { "epoch": 1.6465254946805252, "grad_norm": 0.5804734427098937, "learning_rate": 1.593928480921669e-05, "loss": 11.8885, "step": 30237 }, { "epoch": 1.6465799486771082, "grad_norm": 0.5559074652720587, "learning_rate": 1.593450876809005e-05, "loss": 11.8093, "step": 30238 }, { "epoch": 1.6466344026736912, "grad_norm": 0.5548681081568707, "learning_rate": 1.5929733380670763e-05, "loss": 11.7741, "step": 30239 }, { "epoch": 1.6466888566702742, "grad_norm": 0.5482775809889404, "learning_rate": 1.5924958646995948e-05, "loss": 11.8452, "step": 30240 }, { "epoch": 1.6467433106668574, "grad_norm": 0.515091895675061, "learning_rate": 1.5920184567102768e-05, "loss": 11.8557, "step": 30241 }, { "epoch": 1.6467977646634404, "grad_norm": 0.6422393359974997, "learning_rate": 1.5915411141028326e-05, "loss": 11.6629, "step": 30242 }, { "epoch": 1.6468522186600234, "grad_norm": 0.5747118067216739, "learning_rate": 1.591063836880976e-05, "loss": 11.7587, "step": 30243 }, { "epoch": 1.6469066726566064, "grad_norm": 0.5208001241281782, "learning_rate": 1.5905866250484146e-05, "loss": 11.764, "step": 30244 }, { "epoch": 1.6469611266531894, "grad_norm": 0.5943357567202925, "learning_rate": 1.590109478608862e-05, "loss": 11.6988, "step": 30245 }, { "epoch": 1.6470155806497724, "grad_norm": 0.6174542726047334, "learning_rate": 1.589632397566031e-05, "loss": 11.8192, "step": 30246 }, { "epoch": 1.6470700346463554, "grad_norm": 0.5589766098234975, "learning_rate": 1.589155381923627e-05, "loss": 11.8471, "step": 30247 }, { "epoch": 1.6471244886429384, "grad_norm": 0.526663364675429, "learning_rate": 1.588678431685363e-05, "loss": 11.833, "step": 30248 }, { "epoch": 1.6471789426395214, "grad_norm": 0.5420646816181991, "learning_rate": 1.588201546854946e-05, "loss": 11.7753, "step": 30249 }, { "epoch": 1.6472333966361044, "grad_norm": 0.5585573894236098, "learning_rate": 1.587724727436082e-05, "loss": 11.8415, "step": 30250 }, { "epoch": 1.6472878506326873, "grad_norm": 0.5718008066308626, "learning_rate": 1.5872479734324843e-05, "loss": 11.7366, "step": 30251 }, { "epoch": 1.6473423046292703, "grad_norm": 0.4943534299757422, "learning_rate": 1.5867712848478545e-05, "loss": 11.7126, "step": 30252 }, { "epoch": 1.6473967586258533, "grad_norm": 0.5023601942903341, "learning_rate": 1.586294661685904e-05, "loss": 11.7995, "step": 30253 }, { "epoch": 1.6474512126224363, "grad_norm": 0.5113537105801732, "learning_rate": 1.585818103950335e-05, "loss": 11.8232, "step": 30254 }, { "epoch": 1.6475056666190193, "grad_norm": 0.5339525225899784, "learning_rate": 1.5853416116448582e-05, "loss": 11.7476, "step": 30255 }, { "epoch": 1.6475601206156023, "grad_norm": 0.5436259771386474, "learning_rate": 1.5848651847731745e-05, "loss": 11.7871, "step": 30256 }, { "epoch": 1.6476145746121853, "grad_norm": 0.6170914582958583, "learning_rate": 1.5843888233389893e-05, "loss": 11.8158, "step": 30257 }, { "epoch": 1.6476690286087683, "grad_norm": 0.6716163108483342, "learning_rate": 1.58391252734601e-05, "loss": 11.836, "step": 30258 }, { "epoch": 1.6477234826053515, "grad_norm": 0.5310794370224393, "learning_rate": 1.5834362967979387e-05, "loss": 11.829, "step": 30259 }, { "epoch": 1.6477779366019345, "grad_norm": 0.481759676350806, "learning_rate": 1.582960131698479e-05, "loss": 11.8247, "step": 30260 }, { "epoch": 1.6478323905985175, "grad_norm": 0.5315280858473589, "learning_rate": 1.582484032051329e-05, "loss": 11.8731, "step": 30261 }, { "epoch": 1.6478868445951005, "grad_norm": 0.5378813463110909, "learning_rate": 1.5820079978601955e-05, "loss": 11.8076, "step": 30262 }, { "epoch": 1.6479412985916835, "grad_norm": 0.55903266225185, "learning_rate": 1.5815320291287816e-05, "loss": 11.7443, "step": 30263 }, { "epoch": 1.6479957525882665, "grad_norm": 0.6012034498040094, "learning_rate": 1.5810561258607847e-05, "loss": 11.7531, "step": 30264 }, { "epoch": 1.6480502065848497, "grad_norm": 0.5144155695918641, "learning_rate": 1.5805802880599097e-05, "loss": 11.9225, "step": 30265 }, { "epoch": 1.6481046605814327, "grad_norm": 0.5474350811292334, "learning_rate": 1.580104515729851e-05, "loss": 11.8487, "step": 30266 }, { "epoch": 1.6481591145780157, "grad_norm": 0.6174978287790941, "learning_rate": 1.579628808874315e-05, "loss": 11.8679, "step": 30267 }, { "epoch": 1.6482135685745987, "grad_norm": 0.5420979287960093, "learning_rate": 1.5791531674969938e-05, "loss": 11.8612, "step": 30268 }, { "epoch": 1.6482680225711817, "grad_norm": 0.5702792554346019, "learning_rate": 1.5786775916015938e-05, "loss": 11.8418, "step": 30269 }, { "epoch": 1.6483224765677646, "grad_norm": 0.5200834366442612, "learning_rate": 1.5782020811918075e-05, "loss": 11.8059, "step": 30270 }, { "epoch": 1.6483769305643476, "grad_norm": 0.5350795284900471, "learning_rate": 1.5777266362713327e-05, "loss": 11.8158, "step": 30271 }, { "epoch": 1.6484313845609306, "grad_norm": 0.5144431219346606, "learning_rate": 1.577251256843868e-05, "loss": 11.8311, "step": 30272 }, { "epoch": 1.6484858385575136, "grad_norm": 0.5051398509971821, "learning_rate": 1.5767759429131123e-05, "loss": 11.805, "step": 30273 }, { "epoch": 1.6485402925540966, "grad_norm": 0.5139305998432078, "learning_rate": 1.5763006944827564e-05, "loss": 11.8075, "step": 30274 }, { "epoch": 1.6485947465506796, "grad_norm": 0.5260812256509733, "learning_rate": 1.5758255115565023e-05, "loss": 11.7722, "step": 30275 }, { "epoch": 1.6486492005472626, "grad_norm": 0.5193301853136946, "learning_rate": 1.575350394138039e-05, "loss": 11.7299, "step": 30276 }, { "epoch": 1.6487036545438456, "grad_norm": 0.551471942030761, "learning_rate": 1.5748753422310657e-05, "loss": 11.7086, "step": 30277 }, { "epoch": 1.6487581085404286, "grad_norm": 0.5041331920105072, "learning_rate": 1.5744003558392727e-05, "loss": 11.7779, "step": 30278 }, { "epoch": 1.6488125625370116, "grad_norm": 0.5717935631152075, "learning_rate": 1.573925434966358e-05, "loss": 11.8936, "step": 30279 }, { "epoch": 1.6488670165335946, "grad_norm": 0.5568158823108074, "learning_rate": 1.5734505796160125e-05, "loss": 11.9016, "step": 30280 }, { "epoch": 1.6489214705301776, "grad_norm": 0.5757758848790586, "learning_rate": 1.572975789791925e-05, "loss": 11.8203, "step": 30281 }, { "epoch": 1.6489759245267608, "grad_norm": 0.5473651209505923, "learning_rate": 1.5725010654977944e-05, "loss": 11.7335, "step": 30282 }, { "epoch": 1.6490303785233438, "grad_norm": 0.5518298560932837, "learning_rate": 1.572026406737306e-05, "loss": 11.7585, "step": 30283 }, { "epoch": 1.6490848325199268, "grad_norm": 0.5395694848191006, "learning_rate": 1.5715518135141537e-05, "loss": 11.8149, "step": 30284 }, { "epoch": 1.6491392865165098, "grad_norm": 0.5357026261927198, "learning_rate": 1.57107728583203e-05, "loss": 11.8469, "step": 30285 }, { "epoch": 1.6491937405130928, "grad_norm": 0.5788451144378695, "learning_rate": 1.5706028236946213e-05, "loss": 11.9288, "step": 30286 }, { "epoch": 1.6492481945096757, "grad_norm": 0.49511969403709766, "learning_rate": 1.5701284271056206e-05, "loss": 11.7889, "step": 30287 }, { "epoch": 1.649302648506259, "grad_norm": 0.5333781080811473, "learning_rate": 1.5696540960687124e-05, "loss": 11.8837, "step": 30288 }, { "epoch": 1.649357102502842, "grad_norm": 0.485940058173729, "learning_rate": 1.5691798305875892e-05, "loss": 11.7135, "step": 30289 }, { "epoch": 1.649411556499425, "grad_norm": 0.5588762528727661, "learning_rate": 1.5687056306659385e-05, "loss": 11.7117, "step": 30290 }, { "epoch": 1.649466010496008, "grad_norm": 0.4969574205986947, "learning_rate": 1.568231496307445e-05, "loss": 11.8301, "step": 30291 }, { "epoch": 1.649520464492591, "grad_norm": 0.5388298627014794, "learning_rate": 1.567757427515799e-05, "loss": 11.7702, "step": 30292 }, { "epoch": 1.649574918489174, "grad_norm": 0.559164915027884, "learning_rate": 1.5672834242946833e-05, "loss": 11.8038, "step": 30293 }, { "epoch": 1.649629372485757, "grad_norm": 0.5254196074212372, "learning_rate": 1.5668094866477846e-05, "loss": 11.7543, "step": 30294 }, { "epoch": 1.64968382648234, "grad_norm": 0.5450844572147387, "learning_rate": 1.5663356145787932e-05, "loss": 11.8074, "step": 30295 }, { "epoch": 1.649738280478923, "grad_norm": 0.5800732777198301, "learning_rate": 1.5658618080913878e-05, "loss": 11.9646, "step": 30296 }, { "epoch": 1.649792734475506, "grad_norm": 0.5408316042486881, "learning_rate": 1.5653880671892573e-05, "loss": 11.7845, "step": 30297 }, { "epoch": 1.6498471884720889, "grad_norm": 0.5049158407211579, "learning_rate": 1.5649143918760822e-05, "loss": 11.7873, "step": 30298 }, { "epoch": 1.6499016424686719, "grad_norm": 0.5507430811172999, "learning_rate": 1.5644407821555495e-05, "loss": 11.9438, "step": 30299 }, { "epoch": 1.6499560964652549, "grad_norm": 0.537260845691589, "learning_rate": 1.5639672380313398e-05, "loss": 11.714, "step": 30300 }, { "epoch": 1.6500105504618379, "grad_norm": 0.5595521725432183, "learning_rate": 1.563493759507133e-05, "loss": 11.8271, "step": 30301 }, { "epoch": 1.6500650044584209, "grad_norm": 0.5486761625586757, "learning_rate": 1.5630203465866156e-05, "loss": 11.8217, "step": 30302 }, { "epoch": 1.6501194584550039, "grad_norm": 0.5580839278671575, "learning_rate": 1.5625469992734642e-05, "loss": 11.7532, "step": 30303 }, { "epoch": 1.6501739124515868, "grad_norm": 0.5809952505539281, "learning_rate": 1.5620737175713663e-05, "loss": 11.8901, "step": 30304 }, { "epoch": 1.65022836644817, "grad_norm": 0.543482058822921, "learning_rate": 1.561600501483994e-05, "loss": 11.7574, "step": 30305 }, { "epoch": 1.650282820444753, "grad_norm": 0.5544393298231484, "learning_rate": 1.5611273510150326e-05, "loss": 11.7676, "step": 30306 }, { "epoch": 1.650337274441336, "grad_norm": 0.5144533714967504, "learning_rate": 1.5606542661681622e-05, "loss": 11.79, "step": 30307 }, { "epoch": 1.650391728437919, "grad_norm": 0.5170192773414732, "learning_rate": 1.5601812469470565e-05, "loss": 11.7147, "step": 30308 }, { "epoch": 1.650446182434502, "grad_norm": 0.545316082602362, "learning_rate": 1.559708293355402e-05, "loss": 11.8639, "step": 30309 }, { "epoch": 1.650500636431085, "grad_norm": 0.5979883381590966, "learning_rate": 1.559235405396867e-05, "loss": 11.9237, "step": 30310 }, { "epoch": 1.6505550904276682, "grad_norm": 0.5953268062769637, "learning_rate": 1.558762583075133e-05, "loss": 11.7034, "step": 30311 }, { "epoch": 1.6506095444242512, "grad_norm": 0.6066605854287557, "learning_rate": 1.5582898263938784e-05, "loss": 11.858, "step": 30312 }, { "epoch": 1.6506639984208342, "grad_norm": 0.5447694093535638, "learning_rate": 1.557817135356775e-05, "loss": 11.8601, "step": 30313 }, { "epoch": 1.6507184524174172, "grad_norm": 0.6123454736841145, "learning_rate": 1.5573445099675045e-05, "loss": 11.8336, "step": 30314 }, { "epoch": 1.6507729064140002, "grad_norm": 0.5389667746472384, "learning_rate": 1.556871950229737e-05, "loss": 11.7927, "step": 30315 }, { "epoch": 1.6508273604105832, "grad_norm": 0.5308657024967885, "learning_rate": 1.5563994561471506e-05, "loss": 11.675, "step": 30316 }, { "epoch": 1.6508818144071662, "grad_norm": 0.525862668934587, "learning_rate": 1.555927027723416e-05, "loss": 11.7492, "step": 30317 }, { "epoch": 1.6509362684037492, "grad_norm": 0.5386362426469454, "learning_rate": 1.5554546649622094e-05, "loss": 11.6782, "step": 30318 }, { "epoch": 1.6509907224003322, "grad_norm": 0.5667891737255518, "learning_rate": 1.5549823678672072e-05, "loss": 11.9245, "step": 30319 }, { "epoch": 1.6510451763969152, "grad_norm": 0.5157830108174235, "learning_rate": 1.5545101364420744e-05, "loss": 11.5794, "step": 30320 }, { "epoch": 1.6510996303934982, "grad_norm": 0.6135712245199829, "learning_rate": 1.5540379706904862e-05, "loss": 11.9485, "step": 30321 }, { "epoch": 1.6511540843900812, "grad_norm": 0.5558481003182353, "learning_rate": 1.5535658706161183e-05, "loss": 11.8048, "step": 30322 }, { "epoch": 1.6512085383866641, "grad_norm": 0.5875586621045195, "learning_rate": 1.5530938362226353e-05, "loss": 11.8165, "step": 30323 }, { "epoch": 1.6512629923832471, "grad_norm": 0.537000074397403, "learning_rate": 1.552621867513715e-05, "loss": 11.8471, "step": 30324 }, { "epoch": 1.6513174463798301, "grad_norm": 0.557250138866359, "learning_rate": 1.5521499644930203e-05, "loss": 11.7105, "step": 30325 }, { "epoch": 1.6513719003764131, "grad_norm": 0.5779101757630384, "learning_rate": 1.5516781271642265e-05, "loss": 11.6837, "step": 30326 }, { "epoch": 1.6514263543729961, "grad_norm": 0.5137701020962699, "learning_rate": 1.5512063555309985e-05, "loss": 11.7143, "step": 30327 }, { "epoch": 1.6514808083695791, "grad_norm": 0.5416251137432319, "learning_rate": 1.5507346495970075e-05, "loss": 11.7964, "step": 30328 }, { "epoch": 1.6515352623661623, "grad_norm": 0.5257476400477481, "learning_rate": 1.5502630093659243e-05, "loss": 11.7226, "step": 30329 }, { "epoch": 1.6515897163627453, "grad_norm": 0.5530633292834322, "learning_rate": 1.549791434841409e-05, "loss": 11.7645, "step": 30330 }, { "epoch": 1.6516441703593283, "grad_norm": 0.4887682575319276, "learning_rate": 1.549319926027134e-05, "loss": 11.7486, "step": 30331 }, { "epoch": 1.6516986243559113, "grad_norm": 0.5035435737874265, "learning_rate": 1.5488484829267626e-05, "loss": 11.6502, "step": 30332 }, { "epoch": 1.6517530783524943, "grad_norm": 0.5236023357418601, "learning_rate": 1.5483771055439634e-05, "loss": 11.8346, "step": 30333 }, { "epoch": 1.6518075323490773, "grad_norm": 0.5720862268818151, "learning_rate": 1.5479057938824025e-05, "loss": 11.7759, "step": 30334 }, { "epoch": 1.6518619863456605, "grad_norm": 0.5189373404562976, "learning_rate": 1.5474345479457408e-05, "loss": 11.794, "step": 30335 }, { "epoch": 1.6519164403422435, "grad_norm": 0.596743891031663, "learning_rate": 1.5469633677376495e-05, "loss": 11.8006, "step": 30336 }, { "epoch": 1.6519708943388265, "grad_norm": 0.6333154097587338, "learning_rate": 1.5464922532617853e-05, "loss": 11.9669, "step": 30337 }, { "epoch": 1.6520253483354095, "grad_norm": 0.5352705912976505, "learning_rate": 1.546021204521817e-05, "loss": 11.8758, "step": 30338 }, { "epoch": 1.6520798023319925, "grad_norm": 0.5458485553451803, "learning_rate": 1.5455502215214057e-05, "loss": 11.8238, "step": 30339 }, { "epoch": 1.6521342563285755, "grad_norm": 0.5712397342395055, "learning_rate": 1.545079304264212e-05, "loss": 11.7973, "step": 30340 }, { "epoch": 1.6521887103251585, "grad_norm": 0.666926277135753, "learning_rate": 1.544608452753902e-05, "loss": 11.8262, "step": 30341 }, { "epoch": 1.6522431643217415, "grad_norm": 0.543368776603423, "learning_rate": 1.544137666994131e-05, "loss": 11.8499, "step": 30342 }, { "epoch": 1.6522976183183244, "grad_norm": 0.5593660809025408, "learning_rate": 1.5436669469885645e-05, "loss": 11.8503, "step": 30343 }, { "epoch": 1.6523520723149074, "grad_norm": 0.5784121439909142, "learning_rate": 1.543196292740864e-05, "loss": 11.7, "step": 30344 }, { "epoch": 1.6524065263114904, "grad_norm": 0.5811856404692756, "learning_rate": 1.542725704254685e-05, "loss": 11.8399, "step": 30345 }, { "epoch": 1.6524609803080734, "grad_norm": 0.5337992690973378, "learning_rate": 1.5422551815336916e-05, "loss": 11.7704, "step": 30346 }, { "epoch": 1.6525154343046564, "grad_norm": 0.5361280886355042, "learning_rate": 1.5417847245815387e-05, "loss": 11.6659, "step": 30347 }, { "epoch": 1.6525698883012394, "grad_norm": 0.5805437058398001, "learning_rate": 1.5413143334018875e-05, "loss": 11.8149, "step": 30348 }, { "epoch": 1.6526243422978224, "grad_norm": 0.5204293584801625, "learning_rate": 1.5408440079983945e-05, "loss": 11.7907, "step": 30349 }, { "epoch": 1.6526787962944054, "grad_norm": 0.5598694718774125, "learning_rate": 1.540373748374715e-05, "loss": 11.7298, "step": 30350 }, { "epoch": 1.6527332502909884, "grad_norm": 0.5730257959846164, "learning_rate": 1.539903554534511e-05, "loss": 11.7879, "step": 30351 }, { "epoch": 1.6527877042875716, "grad_norm": 0.5471553983808888, "learning_rate": 1.5394334264814326e-05, "loss": 11.796, "step": 30352 }, { "epoch": 1.6528421582841546, "grad_norm": 0.6051570299155735, "learning_rate": 1.5389633642191415e-05, "loss": 11.8685, "step": 30353 }, { "epoch": 1.6528966122807376, "grad_norm": 0.5979375283709545, "learning_rate": 1.5384933677512868e-05, "loss": 11.8454, "step": 30354 }, { "epoch": 1.6529510662773206, "grad_norm": 0.5348757715511375, "learning_rate": 1.538023437081528e-05, "loss": 11.883, "step": 30355 }, { "epoch": 1.6530055202739036, "grad_norm": 0.705201060188786, "learning_rate": 1.5375535722135203e-05, "loss": 11.8147, "step": 30356 }, { "epoch": 1.6530599742704866, "grad_norm": 0.581476274725346, "learning_rate": 1.5370837731509125e-05, "loss": 11.8792, "step": 30357 }, { "epoch": 1.6531144282670698, "grad_norm": 0.5791998452877999, "learning_rate": 1.5366140398973627e-05, "loss": 11.8992, "step": 30358 }, { "epoch": 1.6531688822636528, "grad_norm": 0.56410517208088, "learning_rate": 1.5361443724565183e-05, "loss": 11.8791, "step": 30359 }, { "epoch": 1.6532233362602358, "grad_norm": 0.6183107415967306, "learning_rate": 1.5356747708320386e-05, "loss": 11.7988, "step": 30360 }, { "epoch": 1.6532777902568188, "grad_norm": 0.6875640496560392, "learning_rate": 1.5352052350275703e-05, "loss": 11.8829, "step": 30361 }, { "epoch": 1.6533322442534018, "grad_norm": 0.5497935646659076, "learning_rate": 1.534735765046763e-05, "loss": 11.791, "step": 30362 }, { "epoch": 1.6533866982499847, "grad_norm": 0.5619957708161861, "learning_rate": 1.5342663608932738e-05, "loss": 11.8213, "step": 30363 }, { "epoch": 1.6534411522465677, "grad_norm": 0.5555121280641989, "learning_rate": 1.5337970225707456e-05, "loss": 11.882, "step": 30364 }, { "epoch": 1.6534956062431507, "grad_norm": 0.48188241317873043, "learning_rate": 1.5333277500828337e-05, "loss": 11.8077, "step": 30365 }, { "epoch": 1.6535500602397337, "grad_norm": 0.6001084996859429, "learning_rate": 1.5328585434331834e-05, "loss": 11.8115, "step": 30366 }, { "epoch": 1.6536045142363167, "grad_norm": 0.5428956276223575, "learning_rate": 1.5323894026254448e-05, "loss": 11.8563, "step": 30367 }, { "epoch": 1.6536589682328997, "grad_norm": 0.5685570349966749, "learning_rate": 1.531920327663269e-05, "loss": 11.7507, "step": 30368 }, { "epoch": 1.6537134222294827, "grad_norm": 0.5485690021047516, "learning_rate": 1.5314513185502976e-05, "loss": 11.8139, "step": 30369 }, { "epoch": 1.6537678762260657, "grad_norm": 0.5509489858868376, "learning_rate": 1.530982375290184e-05, "loss": 11.8292, "step": 30370 }, { "epoch": 1.6538223302226487, "grad_norm": 0.5656163722575241, "learning_rate": 1.530513497886571e-05, "loss": 11.7971, "step": 30371 }, { "epoch": 1.6538767842192317, "grad_norm": 0.5823733713054023, "learning_rate": 1.530044686343104e-05, "loss": 11.8057, "step": 30372 }, { "epoch": 1.6539312382158147, "grad_norm": 0.5984367437471964, "learning_rate": 1.5295759406634312e-05, "loss": 11.8007, "step": 30373 }, { "epoch": 1.6539856922123977, "grad_norm": 0.5833388668835244, "learning_rate": 1.5291072608511946e-05, "loss": 11.7687, "step": 30374 }, { "epoch": 1.6540401462089809, "grad_norm": 0.614076766546919, "learning_rate": 1.5286386469100434e-05, "loss": 11.9744, "step": 30375 }, { "epoch": 1.6540946002055639, "grad_norm": 0.5085048132639166, "learning_rate": 1.5281700988436153e-05, "loss": 11.6403, "step": 30376 }, { "epoch": 1.6541490542021469, "grad_norm": 0.5215632385558154, "learning_rate": 1.527701616655558e-05, "loss": 11.7214, "step": 30377 }, { "epoch": 1.6542035081987299, "grad_norm": 0.532165653013137, "learning_rate": 1.527233200349516e-05, "loss": 11.7481, "step": 30378 }, { "epoch": 1.6542579621953128, "grad_norm": 0.6044869715802822, "learning_rate": 1.5267648499291266e-05, "loss": 11.8685, "step": 30379 }, { "epoch": 1.6543124161918958, "grad_norm": 0.5182815276832565, "learning_rate": 1.52629656539804e-05, "loss": 11.8329, "step": 30380 }, { "epoch": 1.654366870188479, "grad_norm": 0.5775482579320368, "learning_rate": 1.5258283467598878e-05, "loss": 11.8831, "step": 30381 }, { "epoch": 1.654421324185062, "grad_norm": 0.5034491015701649, "learning_rate": 1.525360194018315e-05, "loss": 11.7911, "step": 30382 }, { "epoch": 1.654475778181645, "grad_norm": 0.52738638170991, "learning_rate": 1.524892107176964e-05, "loss": 11.6845, "step": 30383 }, { "epoch": 1.654530232178228, "grad_norm": 0.587513577590588, "learning_rate": 1.5244240862394721e-05, "loss": 11.806, "step": 30384 }, { "epoch": 1.654584686174811, "grad_norm": 0.6151047411750562, "learning_rate": 1.523956131209482e-05, "loss": 11.7422, "step": 30385 }, { "epoch": 1.654639140171394, "grad_norm": 0.4971705865165115, "learning_rate": 1.5234882420906282e-05, "loss": 11.7241, "step": 30386 }, { "epoch": 1.654693594167977, "grad_norm": 0.6124434570136407, "learning_rate": 1.5230204188865538e-05, "loss": 11.8893, "step": 30387 }, { "epoch": 1.65474804816456, "grad_norm": 0.5647274186823761, "learning_rate": 1.5225526616008912e-05, "loss": 11.7533, "step": 30388 }, { "epoch": 1.654802502161143, "grad_norm": 0.5205825535187503, "learning_rate": 1.5220849702372819e-05, "loss": 11.8398, "step": 30389 }, { "epoch": 1.654856956157726, "grad_norm": 0.5137468990488969, "learning_rate": 1.5216173447993654e-05, "loss": 11.7658, "step": 30390 }, { "epoch": 1.654911410154309, "grad_norm": 0.5846701781531567, "learning_rate": 1.5211497852907697e-05, "loss": 11.712, "step": 30391 }, { "epoch": 1.654965864150892, "grad_norm": 0.5596722748678072, "learning_rate": 1.5206822917151353e-05, "loss": 11.8297, "step": 30392 }, { "epoch": 1.655020318147475, "grad_norm": 0.6166217791286542, "learning_rate": 1.5202148640760993e-05, "loss": 11.888, "step": 30393 }, { "epoch": 1.655074772144058, "grad_norm": 0.5143980147394284, "learning_rate": 1.5197475023772933e-05, "loss": 11.7012, "step": 30394 }, { "epoch": 1.655129226140641, "grad_norm": 0.6011240633541712, "learning_rate": 1.5192802066223543e-05, "loss": 11.9945, "step": 30395 }, { "epoch": 1.655183680137224, "grad_norm": 0.5237221533972257, "learning_rate": 1.5188129768149129e-05, "loss": 11.7944, "step": 30396 }, { "epoch": 1.655238134133807, "grad_norm": 0.5219806364379025, "learning_rate": 1.518345812958606e-05, "loss": 11.7039, "step": 30397 }, { "epoch": 1.65529258813039, "grad_norm": 0.5315686986719056, "learning_rate": 1.5178787150570617e-05, "loss": 11.8195, "step": 30398 }, { "epoch": 1.6553470421269731, "grad_norm": 0.5243077468399364, "learning_rate": 1.5174116831139185e-05, "loss": 11.7269, "step": 30399 }, { "epoch": 1.6554014961235561, "grad_norm": 0.4962328515717563, "learning_rate": 1.5169447171328032e-05, "loss": 11.7835, "step": 30400 }, { "epoch": 1.6554559501201391, "grad_norm": 0.5297610904104818, "learning_rate": 1.5164778171173465e-05, "loss": 12.0246, "step": 30401 }, { "epoch": 1.6555104041167221, "grad_norm": 0.5402781603697728, "learning_rate": 1.5160109830711832e-05, "loss": 11.855, "step": 30402 }, { "epoch": 1.6555648581133051, "grad_norm": 0.7292855827356006, "learning_rate": 1.5155442149979393e-05, "loss": 11.8603, "step": 30403 }, { "epoch": 1.6556193121098883, "grad_norm": 0.574548793554654, "learning_rate": 1.5150775129012473e-05, "loss": 11.7707, "step": 30404 }, { "epoch": 1.6556737661064713, "grad_norm": 0.5715973146866069, "learning_rate": 1.5146108767847367e-05, "loss": 11.8918, "step": 30405 }, { "epoch": 1.6557282201030543, "grad_norm": 0.5483522380286712, "learning_rate": 1.5141443066520323e-05, "loss": 11.7617, "step": 30406 }, { "epoch": 1.6557826740996373, "grad_norm": 0.49708881440381997, "learning_rate": 1.5136778025067678e-05, "loss": 11.7585, "step": 30407 }, { "epoch": 1.6558371280962203, "grad_norm": 0.5767574518299988, "learning_rate": 1.5132113643525658e-05, "loss": 11.7842, "step": 30408 }, { "epoch": 1.6558915820928033, "grad_norm": 0.553466801046347, "learning_rate": 1.512744992193057e-05, "loss": 11.7271, "step": 30409 }, { "epoch": 1.6559460360893863, "grad_norm": 0.5508168131996588, "learning_rate": 1.5122786860318671e-05, "loss": 11.7983, "step": 30410 }, { "epoch": 1.6560004900859693, "grad_norm": 0.5412522124930113, "learning_rate": 1.511812445872619e-05, "loss": 11.8493, "step": 30411 }, { "epoch": 1.6560549440825523, "grad_norm": 0.762679716974236, "learning_rate": 1.511346271718943e-05, "loss": 11.753, "step": 30412 }, { "epoch": 1.6561093980791353, "grad_norm": 0.5304688621580921, "learning_rate": 1.5108801635744596e-05, "loss": 11.9053, "step": 30413 }, { "epoch": 1.6561638520757183, "grad_norm": 0.5631865236162934, "learning_rate": 1.510414121442798e-05, "loss": 11.7591, "step": 30414 }, { "epoch": 1.6562183060723012, "grad_norm": 0.5897639777853539, "learning_rate": 1.5099481453275777e-05, "loss": 11.7983, "step": 30415 }, { "epoch": 1.6562727600688842, "grad_norm": 0.5405331228809653, "learning_rate": 1.5094822352324246e-05, "loss": 11.7762, "step": 30416 }, { "epoch": 1.6563272140654672, "grad_norm": 0.5257861697223148, "learning_rate": 1.5090163911609633e-05, "loss": 11.7752, "step": 30417 }, { "epoch": 1.6563816680620502, "grad_norm": 0.5135910112359288, "learning_rate": 1.5085506131168125e-05, "loss": 11.6758, "step": 30418 }, { "epoch": 1.6564361220586332, "grad_norm": 0.5315099764161411, "learning_rate": 1.508084901103598e-05, "loss": 11.7313, "step": 30419 }, { "epoch": 1.6564905760552162, "grad_norm": 0.5490301029222859, "learning_rate": 1.5076192551249402e-05, "loss": 11.8143, "step": 30420 }, { "epoch": 1.6565450300517992, "grad_norm": 0.5209767159520465, "learning_rate": 1.5071536751844562e-05, "loss": 11.7725, "step": 30421 }, { "epoch": 1.6565994840483824, "grad_norm": 0.5816017991475442, "learning_rate": 1.506688161285772e-05, "loss": 11.8754, "step": 30422 }, { "epoch": 1.6566539380449654, "grad_norm": 0.5109652757838231, "learning_rate": 1.5062227134325024e-05, "loss": 11.5619, "step": 30423 }, { "epoch": 1.6567083920415484, "grad_norm": 0.5540279756642714, "learning_rate": 1.5057573316282725e-05, "loss": 11.7301, "step": 30424 }, { "epoch": 1.6567628460381314, "grad_norm": 0.524685815297852, "learning_rate": 1.5052920158766948e-05, "loss": 11.8003, "step": 30425 }, { "epoch": 1.6568173000347144, "grad_norm": 0.5097022926707467, "learning_rate": 1.5048267661813919e-05, "loss": 11.7585, "step": 30426 }, { "epoch": 1.6568717540312974, "grad_norm": 0.545251790768531, "learning_rate": 1.5043615825459823e-05, "loss": 11.8773, "step": 30427 }, { "epoch": 1.6569262080278806, "grad_norm": 0.5084444355519825, "learning_rate": 1.5038964649740806e-05, "loss": 11.8479, "step": 30428 }, { "epoch": 1.6569806620244636, "grad_norm": 0.5251794274872539, "learning_rate": 1.5034314134693061e-05, "loss": 11.6698, "step": 30429 }, { "epoch": 1.6570351160210466, "grad_norm": 0.6197933700010179, "learning_rate": 1.502966428035274e-05, "loss": 11.9312, "step": 30430 }, { "epoch": 1.6570895700176296, "grad_norm": 0.5446950480380597, "learning_rate": 1.5025015086755968e-05, "loss": 11.8244, "step": 30431 }, { "epoch": 1.6571440240142126, "grad_norm": 0.49786773049131056, "learning_rate": 1.5020366553938958e-05, "loss": 11.7073, "step": 30432 }, { "epoch": 1.6571984780107956, "grad_norm": 0.5520210459942693, "learning_rate": 1.5015718681937818e-05, "loss": 11.867, "step": 30433 }, { "epoch": 1.6572529320073786, "grad_norm": 0.5915950089523143, "learning_rate": 1.5011071470788706e-05, "loss": 11.8472, "step": 30434 }, { "epoch": 1.6573073860039615, "grad_norm": 0.5431760172320985, "learning_rate": 1.500642492052774e-05, "loss": 11.9128, "step": 30435 }, { "epoch": 1.6573618400005445, "grad_norm": 0.5358030956140224, "learning_rate": 1.5001779031191087e-05, "loss": 11.7488, "step": 30436 }, { "epoch": 1.6574162939971275, "grad_norm": 0.6157510286347626, "learning_rate": 1.4997133802814845e-05, "loss": 11.8955, "step": 30437 }, { "epoch": 1.6574707479937105, "grad_norm": 0.5774760449390086, "learning_rate": 1.4992489235435126e-05, "loss": 11.8392, "step": 30438 }, { "epoch": 1.6575252019902935, "grad_norm": 0.5401170132973687, "learning_rate": 1.4987845329088101e-05, "loss": 11.7123, "step": 30439 }, { "epoch": 1.6575796559868765, "grad_norm": 0.5495759029012905, "learning_rate": 1.4983202083809844e-05, "loss": 11.9677, "step": 30440 }, { "epoch": 1.6576341099834595, "grad_norm": 0.5041097356224132, "learning_rate": 1.4978559499636447e-05, "loss": 11.7353, "step": 30441 }, { "epoch": 1.6576885639800425, "grad_norm": 0.5175543807236078, "learning_rate": 1.4973917576604046e-05, "loss": 11.7952, "step": 30442 }, { "epoch": 1.6577430179766255, "grad_norm": 0.5684874834799495, "learning_rate": 1.4969276314748692e-05, "loss": 11.7802, "step": 30443 }, { "epoch": 1.6577974719732085, "grad_norm": 0.5264629618291342, "learning_rate": 1.4964635714106545e-05, "loss": 11.7771, "step": 30444 }, { "epoch": 1.6578519259697917, "grad_norm": 0.6101260725520791, "learning_rate": 1.495999577471362e-05, "loss": 11.7677, "step": 30445 }, { "epoch": 1.6579063799663747, "grad_norm": 0.5461996462767426, "learning_rate": 1.4955356496606042e-05, "loss": 11.8557, "step": 30446 }, { "epoch": 1.6579608339629577, "grad_norm": 0.5690858666358461, "learning_rate": 1.4950717879819864e-05, "loss": 11.7328, "step": 30447 }, { "epoch": 1.6580152879595407, "grad_norm": 0.5042772098131838, "learning_rate": 1.4946079924391165e-05, "loss": 11.7353, "step": 30448 }, { "epoch": 1.6580697419561237, "grad_norm": 0.5551249340875368, "learning_rate": 1.4941442630356028e-05, "loss": 11.8102, "step": 30449 }, { "epoch": 1.6581241959527067, "grad_norm": 0.6822976424863264, "learning_rate": 1.4936805997750503e-05, "loss": 11.9755, "step": 30450 }, { "epoch": 1.6581786499492899, "grad_norm": 0.5217195502125209, "learning_rate": 1.4932170026610636e-05, "loss": 11.7261, "step": 30451 }, { "epoch": 1.6582331039458729, "grad_norm": 0.5350002273576602, "learning_rate": 1.4927534716972468e-05, "loss": 11.8111, "step": 30452 }, { "epoch": 1.6582875579424559, "grad_norm": 0.5409593969053802, "learning_rate": 1.4922900068872048e-05, "loss": 11.8507, "step": 30453 }, { "epoch": 1.6583420119390389, "grad_norm": 0.5343336682199202, "learning_rate": 1.4918266082345444e-05, "loss": 11.8041, "step": 30454 }, { "epoch": 1.6583964659356218, "grad_norm": 0.5439666242708063, "learning_rate": 1.4913632757428652e-05, "loss": 11.8396, "step": 30455 }, { "epoch": 1.6584509199322048, "grad_norm": 0.5078023638394096, "learning_rate": 1.4909000094157743e-05, "loss": 11.7087, "step": 30456 }, { "epoch": 1.6585053739287878, "grad_norm": 0.5040080596064688, "learning_rate": 1.4904368092568699e-05, "loss": 11.7599, "step": 30457 }, { "epoch": 1.6585598279253708, "grad_norm": 0.5456376162897237, "learning_rate": 1.489973675269758e-05, "loss": 11.7348, "step": 30458 }, { "epoch": 1.6586142819219538, "grad_norm": 0.5226602620290465, "learning_rate": 1.4895106074580355e-05, "loss": 11.8768, "step": 30459 }, { "epoch": 1.6586687359185368, "grad_norm": 0.5421098455850706, "learning_rate": 1.4890476058253089e-05, "loss": 11.8537, "step": 30460 }, { "epoch": 1.6587231899151198, "grad_norm": 0.52238904348751, "learning_rate": 1.4885846703751749e-05, "loss": 11.8057, "step": 30461 }, { "epoch": 1.6587776439117028, "grad_norm": 0.5643276611856208, "learning_rate": 1.4881218011112308e-05, "loss": 11.84, "step": 30462 }, { "epoch": 1.6588320979082858, "grad_norm": 0.5108632330040422, "learning_rate": 1.4876589980370825e-05, "loss": 11.7637, "step": 30463 }, { "epoch": 1.6588865519048688, "grad_norm": 0.5748429324447424, "learning_rate": 1.4871962611563218e-05, "loss": 11.6599, "step": 30464 }, { "epoch": 1.6589410059014518, "grad_norm": 0.5685506754204068, "learning_rate": 1.486733590472551e-05, "loss": 11.7119, "step": 30465 }, { "epoch": 1.6589954598980348, "grad_norm": 0.5518315507953145, "learning_rate": 1.4862709859893708e-05, "loss": 11.8669, "step": 30466 }, { "epoch": 1.6590499138946178, "grad_norm": 0.5161797691907695, "learning_rate": 1.4858084477103717e-05, "loss": 11.8546, "step": 30467 }, { "epoch": 1.6591043678912007, "grad_norm": 0.5500540514779502, "learning_rate": 1.4853459756391564e-05, "loss": 11.891, "step": 30468 }, { "epoch": 1.659158821887784, "grad_norm": 0.5261406948857814, "learning_rate": 1.4848835697793174e-05, "loss": 11.8261, "step": 30469 }, { "epoch": 1.659213275884367, "grad_norm": 0.5221450123639628, "learning_rate": 1.484421230134453e-05, "loss": 11.8626, "step": 30470 }, { "epoch": 1.65926772988095, "grad_norm": 0.5370308777913975, "learning_rate": 1.483958956708157e-05, "loss": 11.7647, "step": 30471 }, { "epoch": 1.659322183877533, "grad_norm": 0.5451917737767658, "learning_rate": 1.483496749504022e-05, "loss": 11.7655, "step": 30472 }, { "epoch": 1.659376637874116, "grad_norm": 0.5512427098784394, "learning_rate": 1.4830346085256475e-05, "loss": 11.728, "step": 30473 }, { "epoch": 1.6594310918706991, "grad_norm": 0.48879493807625274, "learning_rate": 1.4825725337766217e-05, "loss": 11.7241, "step": 30474 }, { "epoch": 1.6594855458672821, "grad_norm": 0.5323193600760803, "learning_rate": 1.4821105252605404e-05, "loss": 11.6926, "step": 30475 }, { "epoch": 1.6595399998638651, "grad_norm": 0.5357552343034483, "learning_rate": 1.481648582980998e-05, "loss": 11.873, "step": 30476 }, { "epoch": 1.6595944538604481, "grad_norm": 0.5843543350729047, "learning_rate": 1.481186706941583e-05, "loss": 11.749, "step": 30477 }, { "epoch": 1.6596489078570311, "grad_norm": 0.6312598436209041, "learning_rate": 1.4807248971458898e-05, "loss": 11.9846, "step": 30478 }, { "epoch": 1.6597033618536141, "grad_norm": 0.6070430916918202, "learning_rate": 1.4802631535975076e-05, "loss": 11.8512, "step": 30479 }, { "epoch": 1.659757815850197, "grad_norm": 0.5799074330813278, "learning_rate": 1.47980147630003e-05, "loss": 11.8646, "step": 30480 }, { "epoch": 1.65981226984678, "grad_norm": 0.5673598018127015, "learning_rate": 1.4793398652570445e-05, "loss": 11.7869, "step": 30481 }, { "epoch": 1.659866723843363, "grad_norm": 0.5577040261014342, "learning_rate": 1.478878320472139e-05, "loss": 11.8044, "step": 30482 }, { "epoch": 1.659921177839946, "grad_norm": 0.5914851761444279, "learning_rate": 1.4784168419489065e-05, "loss": 11.926, "step": 30483 }, { "epoch": 1.659975631836529, "grad_norm": 0.5864248361351323, "learning_rate": 1.4779554296909315e-05, "loss": 11.8559, "step": 30484 }, { "epoch": 1.660030085833112, "grad_norm": 0.5727600462604755, "learning_rate": 1.477494083701807e-05, "loss": 11.7601, "step": 30485 }, { "epoch": 1.660084539829695, "grad_norm": 0.5837628149486116, "learning_rate": 1.4770328039851155e-05, "loss": 11.8826, "step": 30486 }, { "epoch": 1.660138993826278, "grad_norm": 0.5257373739101987, "learning_rate": 1.4765715905444455e-05, "loss": 11.7619, "step": 30487 }, { "epoch": 1.660193447822861, "grad_norm": 0.5569929326785522, "learning_rate": 1.4761104433833873e-05, "loss": 11.7252, "step": 30488 }, { "epoch": 1.660247901819444, "grad_norm": 0.5596793453691836, "learning_rate": 1.4756493625055211e-05, "loss": 11.5962, "step": 30489 }, { "epoch": 1.660302355816027, "grad_norm": 0.5268308993081, "learning_rate": 1.475188347914438e-05, "loss": 11.8873, "step": 30490 }, { "epoch": 1.66035680981261, "grad_norm": 0.5503512179842955, "learning_rate": 1.474727399613719e-05, "loss": 11.9062, "step": 30491 }, { "epoch": 1.6604112638091932, "grad_norm": 0.5652508792980745, "learning_rate": 1.474266517606947e-05, "loss": 11.92, "step": 30492 }, { "epoch": 1.6604657178057762, "grad_norm": 0.4919906257929225, "learning_rate": 1.4738057018977114e-05, "loss": 11.8253, "step": 30493 }, { "epoch": 1.6605201718023592, "grad_norm": 0.5038695577291233, "learning_rate": 1.4733449524895893e-05, "loss": 11.8345, "step": 30494 }, { "epoch": 1.6605746257989422, "grad_norm": 0.5230842360938279, "learning_rate": 1.47288426938617e-05, "loss": 11.7675, "step": 30495 }, { "epoch": 1.6606290797955252, "grad_norm": 0.5818535948817662, "learning_rate": 1.4724236525910296e-05, "loss": 11.876, "step": 30496 }, { "epoch": 1.6606835337921082, "grad_norm": 0.5106333140474635, "learning_rate": 1.471963102107753e-05, "loss": 11.5331, "step": 30497 }, { "epoch": 1.6607379877886914, "grad_norm": 0.5344151145727453, "learning_rate": 1.4715026179399239e-05, "loss": 11.7591, "step": 30498 }, { "epoch": 1.6607924417852744, "grad_norm": 0.500887120599743, "learning_rate": 1.4710422000911183e-05, "loss": 11.7601, "step": 30499 }, { "epoch": 1.6608468957818574, "grad_norm": 0.5596855613702822, "learning_rate": 1.4705818485649237e-05, "loss": 11.6184, "step": 30500 }, { "epoch": 1.6609013497784404, "grad_norm": 0.6390268041634257, "learning_rate": 1.4701215633649102e-05, "loss": 11.8665, "step": 30501 }, { "epoch": 1.6609558037750234, "grad_norm": 0.5300312005429197, "learning_rate": 1.4696613444946627e-05, "loss": 11.7044, "step": 30502 }, { "epoch": 1.6610102577716064, "grad_norm": 0.5479748636290885, "learning_rate": 1.4692011919577609e-05, "loss": 11.6887, "step": 30503 }, { "epoch": 1.6610647117681894, "grad_norm": 0.5865166959549861, "learning_rate": 1.4687411057577782e-05, "loss": 11.8067, "step": 30504 }, { "epoch": 1.6611191657647724, "grad_norm": 0.631559322131889, "learning_rate": 1.4682810858982986e-05, "loss": 11.8915, "step": 30505 }, { "epoch": 1.6611736197613554, "grad_norm": 0.6867109202353935, "learning_rate": 1.4678211323828939e-05, "loss": 11.8789, "step": 30506 }, { "epoch": 1.6612280737579384, "grad_norm": 0.5306137175143503, "learning_rate": 1.4673612452151441e-05, "loss": 11.8295, "step": 30507 }, { "epoch": 1.6612825277545213, "grad_norm": 0.5384906241963868, "learning_rate": 1.4669014243986224e-05, "loss": 11.7728, "step": 30508 }, { "epoch": 1.6613369817511043, "grad_norm": 0.5363580618189482, "learning_rate": 1.4664416699369065e-05, "loss": 11.643, "step": 30509 }, { "epoch": 1.6613914357476873, "grad_norm": 0.5276766457965232, "learning_rate": 1.4659819818335729e-05, "loss": 11.7706, "step": 30510 }, { "epoch": 1.6614458897442703, "grad_norm": 0.5972428001730457, "learning_rate": 1.4655223600921952e-05, "loss": 11.7991, "step": 30511 }, { "epoch": 1.6615003437408533, "grad_norm": 0.5463440321434727, "learning_rate": 1.4650628047163472e-05, "loss": 11.7979, "step": 30512 }, { "epoch": 1.6615547977374363, "grad_norm": 0.5086481462827724, "learning_rate": 1.4646033157095996e-05, "loss": 11.8116, "step": 30513 }, { "epoch": 1.6616092517340193, "grad_norm": 0.553074260326025, "learning_rate": 1.4641438930755268e-05, "loss": 11.7544, "step": 30514 }, { "epoch": 1.6616637057306025, "grad_norm": 0.5367150667301452, "learning_rate": 1.4636845368177065e-05, "loss": 11.8423, "step": 30515 }, { "epoch": 1.6617181597271855, "grad_norm": 0.56086017051629, "learning_rate": 1.4632252469397034e-05, "loss": 11.8746, "step": 30516 }, { "epoch": 1.6617726137237685, "grad_norm": 0.5090796710024545, "learning_rate": 1.4627660234450957e-05, "loss": 11.8088, "step": 30517 }, { "epoch": 1.6618270677203515, "grad_norm": 0.5647977886294014, "learning_rate": 1.4623068663374484e-05, "loss": 11.8778, "step": 30518 }, { "epoch": 1.6618815217169345, "grad_norm": 0.5852868407151046, "learning_rate": 1.4618477756203364e-05, "loss": 11.7201, "step": 30519 }, { "epoch": 1.6619359757135175, "grad_norm": 0.6172226511052394, "learning_rate": 1.4613887512973257e-05, "loss": 11.7215, "step": 30520 }, { "epoch": 1.6619904297101007, "grad_norm": 0.5306618304665179, "learning_rate": 1.4609297933719913e-05, "loss": 11.7218, "step": 30521 }, { "epoch": 1.6620448837066837, "grad_norm": 0.5640490787690716, "learning_rate": 1.460470901847898e-05, "loss": 11.8726, "step": 30522 }, { "epoch": 1.6620993377032667, "grad_norm": 0.589495949418483, "learning_rate": 1.460012076728613e-05, "loss": 11.8359, "step": 30523 }, { "epoch": 1.6621537916998497, "grad_norm": 0.5767911074678742, "learning_rate": 1.4595533180177057e-05, "loss": 11.8568, "step": 30524 }, { "epoch": 1.6622082456964327, "grad_norm": 0.5925122392277705, "learning_rate": 1.4590946257187465e-05, "loss": 11.7018, "step": 30525 }, { "epoch": 1.6622626996930157, "grad_norm": 0.5372430755545777, "learning_rate": 1.4586359998352984e-05, "loss": 11.6798, "step": 30526 }, { "epoch": 1.6623171536895986, "grad_norm": 0.5798060665421161, "learning_rate": 1.4581774403709303e-05, "loss": 11.8282, "step": 30527 }, { "epoch": 1.6623716076861816, "grad_norm": 0.5419721798492204, "learning_rate": 1.4577189473292053e-05, "loss": 11.8512, "step": 30528 }, { "epoch": 1.6624260616827646, "grad_norm": 0.5229119328459965, "learning_rate": 1.4572605207136925e-05, "loss": 11.8269, "step": 30529 }, { "epoch": 1.6624805156793476, "grad_norm": 0.5830340866341808, "learning_rate": 1.4568021605279525e-05, "loss": 11.8343, "step": 30530 }, { "epoch": 1.6625349696759306, "grad_norm": 0.5839352903112112, "learning_rate": 1.4563438667755536e-05, "loss": 11.7647, "step": 30531 }, { "epoch": 1.6625894236725136, "grad_norm": 0.5442136620128387, "learning_rate": 1.4558856394600573e-05, "loss": 11.7684, "step": 30532 }, { "epoch": 1.6626438776690966, "grad_norm": 0.5287246160487524, "learning_rate": 1.455427478585024e-05, "loss": 11.7827, "step": 30533 }, { "epoch": 1.6626983316656796, "grad_norm": 0.6048785959870898, "learning_rate": 1.4549693841540235e-05, "loss": 11.8265, "step": 30534 }, { "epoch": 1.6627527856622626, "grad_norm": 0.5371811579078356, "learning_rate": 1.4545113561706114e-05, "loss": 11.9294, "step": 30535 }, { "epoch": 1.6628072396588456, "grad_norm": 0.5189119070180531, "learning_rate": 1.4540533946383516e-05, "loss": 11.7807, "step": 30536 }, { "epoch": 1.6628616936554286, "grad_norm": 0.5601368480764733, "learning_rate": 1.4535954995608081e-05, "loss": 11.7246, "step": 30537 }, { "epoch": 1.6629161476520118, "grad_norm": 0.5494316509945332, "learning_rate": 1.4531376709415368e-05, "loss": 11.8384, "step": 30538 }, { "epoch": 1.6629706016485948, "grad_norm": 0.521643862118866, "learning_rate": 1.4526799087841037e-05, "loss": 11.7458, "step": 30539 }, { "epoch": 1.6630250556451778, "grad_norm": 0.5813182171851603, "learning_rate": 1.4522222130920616e-05, "loss": 11.8722, "step": 30540 }, { "epoch": 1.6630795096417608, "grad_norm": 0.515015291051856, "learning_rate": 1.4517645838689754e-05, "loss": 11.6671, "step": 30541 }, { "epoch": 1.6631339636383438, "grad_norm": 0.6300347934852151, "learning_rate": 1.451307021118401e-05, "loss": 11.933, "step": 30542 }, { "epoch": 1.6631884176349268, "grad_norm": 0.5629815631744695, "learning_rate": 1.4508495248438958e-05, "loss": 11.8467, "step": 30543 }, { "epoch": 1.66324287163151, "grad_norm": 0.5630607797950447, "learning_rate": 1.4503920950490202e-05, "loss": 11.7637, "step": 30544 }, { "epoch": 1.663297325628093, "grad_norm": 0.5409344508512145, "learning_rate": 1.4499347317373268e-05, "loss": 11.8202, "step": 30545 }, { "epoch": 1.663351779624676, "grad_norm": 0.5470298260052968, "learning_rate": 1.4494774349123741e-05, "loss": 11.834, "step": 30546 }, { "epoch": 1.663406233621259, "grad_norm": 0.5327489775189177, "learning_rate": 1.4490202045777224e-05, "loss": 11.6929, "step": 30547 }, { "epoch": 1.663460687617842, "grad_norm": 0.5302068927531721, "learning_rate": 1.4485630407369211e-05, "loss": 11.6785, "step": 30548 }, { "epoch": 1.663515141614425, "grad_norm": 0.523235391529584, "learning_rate": 1.4481059433935296e-05, "loss": 11.812, "step": 30549 }, { "epoch": 1.663569595611008, "grad_norm": 0.5708136537775274, "learning_rate": 1.4476489125510973e-05, "loss": 11.728, "step": 30550 }, { "epoch": 1.663624049607591, "grad_norm": 0.5440375571530032, "learning_rate": 1.4471919482131846e-05, "loss": 11.7661, "step": 30551 }, { "epoch": 1.663678503604174, "grad_norm": 0.5273290339034447, "learning_rate": 1.4467350503833421e-05, "loss": 11.7998, "step": 30552 }, { "epoch": 1.663732957600757, "grad_norm": 0.837060976833387, "learning_rate": 1.4462782190651191e-05, "loss": 11.6781, "step": 30553 }, { "epoch": 1.66378741159734, "grad_norm": 0.5517925530154327, "learning_rate": 1.4458214542620729e-05, "loss": 11.889, "step": 30554 }, { "epoch": 1.6638418655939229, "grad_norm": 0.5398113363513517, "learning_rate": 1.4453647559777527e-05, "loss": 11.8162, "step": 30555 }, { "epoch": 1.6638963195905059, "grad_norm": 0.48781454370799227, "learning_rate": 1.4449081242157126e-05, "loss": 11.6628, "step": 30556 }, { "epoch": 1.6639507735870889, "grad_norm": 0.5324761965955556, "learning_rate": 1.4444515589794982e-05, "loss": 11.7681, "step": 30557 }, { "epoch": 1.6640052275836719, "grad_norm": 0.5190118095315441, "learning_rate": 1.4439950602726648e-05, "loss": 11.7628, "step": 30558 }, { "epoch": 1.6640596815802549, "grad_norm": 0.562217111189288, "learning_rate": 1.4435386280987618e-05, "loss": 11.9103, "step": 30559 }, { "epoch": 1.6641141355768378, "grad_norm": 0.5883842957215601, "learning_rate": 1.443082262461336e-05, "loss": 11.9338, "step": 30560 }, { "epoch": 1.6641685895734208, "grad_norm": 0.6249725874539235, "learning_rate": 1.4426259633639416e-05, "loss": 11.8424, "step": 30561 }, { "epoch": 1.664223043570004, "grad_norm": 0.4985586351457106, "learning_rate": 1.4421697308101178e-05, "loss": 11.7519, "step": 30562 }, { "epoch": 1.664277497566587, "grad_norm": 0.5391981641148595, "learning_rate": 1.4417135648034186e-05, "loss": 11.8427, "step": 30563 }, { "epoch": 1.66433195156317, "grad_norm": 0.5288522672615051, "learning_rate": 1.441257465347391e-05, "loss": 11.7846, "step": 30564 }, { "epoch": 1.664386405559753, "grad_norm": 0.5518269616252821, "learning_rate": 1.4408014324455787e-05, "loss": 11.7347, "step": 30565 }, { "epoch": 1.664440859556336, "grad_norm": 0.6027580192488022, "learning_rate": 1.4403454661015326e-05, "loss": 11.8817, "step": 30566 }, { "epoch": 1.664495313552919, "grad_norm": 0.5729240918869509, "learning_rate": 1.4398895663187927e-05, "loss": 11.7502, "step": 30567 }, { "epoch": 1.6645497675495022, "grad_norm": 0.5349704726100336, "learning_rate": 1.43943373310091e-05, "loss": 11.6887, "step": 30568 }, { "epoch": 1.6646042215460852, "grad_norm": 0.6587897180079224, "learning_rate": 1.4389779664514235e-05, "loss": 11.7798, "step": 30569 }, { "epoch": 1.6646586755426682, "grad_norm": 0.5412107808277605, "learning_rate": 1.4385222663738796e-05, "loss": 11.8893, "step": 30570 }, { "epoch": 1.6647131295392512, "grad_norm": 0.5234676013799723, "learning_rate": 1.4380666328718274e-05, "loss": 11.8058, "step": 30571 }, { "epoch": 1.6647675835358342, "grad_norm": 0.5545345381822373, "learning_rate": 1.4376110659488006e-05, "loss": 11.8276, "step": 30572 }, { "epoch": 1.6648220375324172, "grad_norm": 0.5127483290730295, "learning_rate": 1.4371555656083457e-05, "loss": 11.7903, "step": 30573 }, { "epoch": 1.6648764915290002, "grad_norm": 0.6074805114055226, "learning_rate": 1.4367001318540075e-05, "loss": 11.7282, "step": 30574 }, { "epoch": 1.6649309455255832, "grad_norm": 0.524109377181651, "learning_rate": 1.4362447646893218e-05, "loss": 11.774, "step": 30575 }, { "epoch": 1.6649853995221662, "grad_norm": 0.5469659882901008, "learning_rate": 1.4357894641178371e-05, "loss": 11.7587, "step": 30576 }, { "epoch": 1.6650398535187492, "grad_norm": 0.5216704136518607, "learning_rate": 1.435334230143086e-05, "loss": 11.8727, "step": 30577 }, { "epoch": 1.6650943075153322, "grad_norm": 0.5255879142118899, "learning_rate": 1.4348790627686149e-05, "loss": 11.8424, "step": 30578 }, { "epoch": 1.6651487615119152, "grad_norm": 0.5120144482353609, "learning_rate": 1.4344239619979583e-05, "loss": 11.7848, "step": 30579 }, { "epoch": 1.6652032155084981, "grad_norm": 0.6554859104398678, "learning_rate": 1.433968927834658e-05, "loss": 11.7708, "step": 30580 }, { "epoch": 1.6652576695050811, "grad_norm": 0.5535995453122468, "learning_rate": 1.4335139602822557e-05, "loss": 11.6216, "step": 30581 }, { "epoch": 1.6653121235016641, "grad_norm": 0.5198100048389207, "learning_rate": 1.4330590593442817e-05, "loss": 11.5929, "step": 30582 }, { "epoch": 1.6653665774982471, "grad_norm": 0.5358411842760149, "learning_rate": 1.4326042250242789e-05, "loss": 11.8566, "step": 30583 }, { "epoch": 1.6654210314948301, "grad_norm": 0.5785564122028076, "learning_rate": 1.43214945732578e-05, "loss": 11.7175, "step": 30584 }, { "epoch": 1.6654754854914133, "grad_norm": 0.5435001051217152, "learning_rate": 1.431694756252323e-05, "loss": 11.7581, "step": 30585 }, { "epoch": 1.6655299394879963, "grad_norm": 0.5384336380334687, "learning_rate": 1.4312401218074478e-05, "loss": 11.7637, "step": 30586 }, { "epoch": 1.6655843934845793, "grad_norm": 0.5429834110530761, "learning_rate": 1.4307855539946847e-05, "loss": 11.8063, "step": 30587 }, { "epoch": 1.6656388474811623, "grad_norm": 0.5496962632683134, "learning_rate": 1.4303310528175717e-05, "loss": 11.8142, "step": 30588 }, { "epoch": 1.6656933014777453, "grad_norm": 0.5093237154352173, "learning_rate": 1.4298766182796386e-05, "loss": 11.7745, "step": 30589 }, { "epoch": 1.6657477554743283, "grad_norm": 0.5365372478799709, "learning_rate": 1.4294222503844257e-05, "loss": 11.8519, "step": 30590 }, { "epoch": 1.6658022094709115, "grad_norm": 0.5333590014600259, "learning_rate": 1.4289679491354613e-05, "loss": 11.8323, "step": 30591 }, { "epoch": 1.6658566634674945, "grad_norm": 0.5855486768965955, "learning_rate": 1.4285137145362781e-05, "loss": 11.8671, "step": 30592 }, { "epoch": 1.6659111174640775, "grad_norm": 0.5117628893373161, "learning_rate": 1.4280595465904123e-05, "loss": 11.8224, "step": 30593 }, { "epoch": 1.6659655714606605, "grad_norm": 0.5177405067725183, "learning_rate": 1.42760544530139e-05, "loss": 11.727, "step": 30594 }, { "epoch": 1.6660200254572435, "grad_norm": 0.590421014802051, "learning_rate": 1.4271514106727458e-05, "loss": 11.8403, "step": 30595 }, { "epoch": 1.6660744794538265, "grad_norm": 0.5205639485560986, "learning_rate": 1.4266974427080115e-05, "loss": 11.8655, "step": 30596 }, { "epoch": 1.6661289334504095, "grad_norm": 0.6257887917244348, "learning_rate": 1.4262435414107134e-05, "loss": 11.9091, "step": 30597 }, { "epoch": 1.6661833874469925, "grad_norm": 0.5213222188455704, "learning_rate": 1.4257897067843862e-05, "loss": 11.7735, "step": 30598 }, { "epoch": 1.6662378414435755, "grad_norm": 0.6016607595260474, "learning_rate": 1.4253359388325537e-05, "loss": 11.7408, "step": 30599 }, { "epoch": 1.6662922954401584, "grad_norm": 0.5728991967714202, "learning_rate": 1.4248822375587489e-05, "loss": 11.7922, "step": 30600 }, { "epoch": 1.6663467494367414, "grad_norm": 0.6421991607919589, "learning_rate": 1.4244286029664988e-05, "loss": 11.9538, "step": 30601 }, { "epoch": 1.6664012034333244, "grad_norm": 0.527930619823098, "learning_rate": 1.4239750350593273e-05, "loss": 11.8458, "step": 30602 }, { "epoch": 1.6664556574299074, "grad_norm": 0.5772038038487892, "learning_rate": 1.4235215338407658e-05, "loss": 11.7478, "step": 30603 }, { "epoch": 1.6665101114264904, "grad_norm": 0.5274345854094674, "learning_rate": 1.4230680993143376e-05, "loss": 11.7769, "step": 30604 }, { "epoch": 1.6665645654230734, "grad_norm": 0.5411191221571823, "learning_rate": 1.4226147314835714e-05, "loss": 11.7858, "step": 30605 }, { "epoch": 1.6666190194196564, "grad_norm": 0.4910491243074338, "learning_rate": 1.4221614303519904e-05, "loss": 11.7864, "step": 30606 }, { "epoch": 1.6666734734162394, "grad_norm": 0.5490921718782192, "learning_rate": 1.4217081959231204e-05, "loss": 11.8052, "step": 30607 }, { "epoch": 1.6667279274128226, "grad_norm": 0.5370539524170274, "learning_rate": 1.4212550282004878e-05, "loss": 11.8518, "step": 30608 }, { "epoch": 1.6667823814094056, "grad_norm": 0.5763943784481144, "learning_rate": 1.4208019271876128e-05, "loss": 11.8761, "step": 30609 }, { "epoch": 1.6668368354059886, "grad_norm": 0.5489631288190735, "learning_rate": 1.4203488928880226e-05, "loss": 11.7234, "step": 30610 }, { "epoch": 1.6668912894025716, "grad_norm": 0.6224037636667891, "learning_rate": 1.4198959253052391e-05, "loss": 11.8845, "step": 30611 }, { "epoch": 1.6669457433991546, "grad_norm": 0.5296441792635029, "learning_rate": 1.4194430244427802e-05, "loss": 11.8248, "step": 30612 }, { "epoch": 1.6670001973957376, "grad_norm": 0.6968604979288429, "learning_rate": 1.4189901903041746e-05, "loss": 11.8933, "step": 30613 }, { "epoch": 1.6670546513923208, "grad_norm": 0.5899802956762564, "learning_rate": 1.4185374228929382e-05, "loss": 11.9042, "step": 30614 }, { "epoch": 1.6671091053889038, "grad_norm": 0.5404336158935368, "learning_rate": 1.4180847222125959e-05, "loss": 11.834, "step": 30615 }, { "epoch": 1.6671635593854868, "grad_norm": 0.5609721932057188, "learning_rate": 1.4176320882666627e-05, "loss": 11.8251, "step": 30616 }, { "epoch": 1.6672180133820698, "grad_norm": 0.5526477556977242, "learning_rate": 1.4171795210586658e-05, "loss": 11.7074, "step": 30617 }, { "epoch": 1.6672724673786528, "grad_norm": 0.5548255554874686, "learning_rate": 1.4167270205921169e-05, "loss": 11.7813, "step": 30618 }, { "epoch": 1.6673269213752357, "grad_norm": 0.5542599397274358, "learning_rate": 1.4162745868705373e-05, "loss": 11.7384, "step": 30619 }, { "epoch": 1.6673813753718187, "grad_norm": 0.5542399215659537, "learning_rate": 1.4158222198974502e-05, "loss": 11.8433, "step": 30620 }, { "epoch": 1.6674358293684017, "grad_norm": 0.5338091516642297, "learning_rate": 1.415369919676368e-05, "loss": 11.8447, "step": 30621 }, { "epoch": 1.6674902833649847, "grad_norm": 0.5192170780996689, "learning_rate": 1.414917686210806e-05, "loss": 11.8986, "step": 30622 }, { "epoch": 1.6675447373615677, "grad_norm": 0.563720811981106, "learning_rate": 1.4144655195042877e-05, "loss": 11.8853, "step": 30623 }, { "epoch": 1.6675991913581507, "grad_norm": 0.5031909712320997, "learning_rate": 1.4140134195603216e-05, "loss": 11.7591, "step": 30624 }, { "epoch": 1.6676536453547337, "grad_norm": 0.6025047116870932, "learning_rate": 1.4135613863824304e-05, "loss": 11.7564, "step": 30625 }, { "epoch": 1.6677080993513167, "grad_norm": 0.4733156463199147, "learning_rate": 1.4131094199741224e-05, "loss": 11.8403, "step": 30626 }, { "epoch": 1.6677625533478997, "grad_norm": 0.5721359322113635, "learning_rate": 1.4126575203389181e-05, "loss": 11.8232, "step": 30627 }, { "epoch": 1.6678170073444827, "grad_norm": 0.534762221395335, "learning_rate": 1.412205687480328e-05, "loss": 11.779, "step": 30628 }, { "epoch": 1.6678714613410657, "grad_norm": 0.5322262854907802, "learning_rate": 1.411753921401865e-05, "loss": 11.7902, "step": 30629 }, { "epoch": 1.6679259153376487, "grad_norm": 0.5514355863362038, "learning_rate": 1.4113022221070472e-05, "loss": 11.8879, "step": 30630 }, { "epoch": 1.6679803693342317, "grad_norm": 0.502506217205064, "learning_rate": 1.410850589599383e-05, "loss": 11.7194, "step": 30631 }, { "epoch": 1.6680348233308149, "grad_norm": 0.5694251018926452, "learning_rate": 1.410399023882385e-05, "loss": 11.8151, "step": 30632 }, { "epoch": 1.6680892773273979, "grad_norm": 0.5125036313871394, "learning_rate": 1.4099475249595628e-05, "loss": 11.8312, "step": 30633 }, { "epoch": 1.6681437313239809, "grad_norm": 0.5450231604275171, "learning_rate": 1.4094960928344292e-05, "loss": 11.7312, "step": 30634 }, { "epoch": 1.6681981853205639, "grad_norm": 0.5849536903654996, "learning_rate": 1.4090447275104968e-05, "loss": 11.8918, "step": 30635 }, { "epoch": 1.6682526393171468, "grad_norm": 0.5251727664436304, "learning_rate": 1.4085934289912706e-05, "loss": 11.8149, "step": 30636 }, { "epoch": 1.66830709331373, "grad_norm": 0.6149918819302681, "learning_rate": 1.4081421972802655e-05, "loss": 11.8117, "step": 30637 }, { "epoch": 1.668361547310313, "grad_norm": 0.5665962930161643, "learning_rate": 1.4076910323809845e-05, "loss": 11.5848, "step": 30638 }, { "epoch": 1.668416001306896, "grad_norm": 0.5111726167768292, "learning_rate": 1.4072399342969422e-05, "loss": 11.8496, "step": 30639 }, { "epoch": 1.668470455303479, "grad_norm": 0.552531943121508, "learning_rate": 1.4067889030316406e-05, "loss": 11.6825, "step": 30640 }, { "epoch": 1.668524909300062, "grad_norm": 0.5175033775069464, "learning_rate": 1.4063379385885911e-05, "loss": 11.7762, "step": 30641 }, { "epoch": 1.668579363296645, "grad_norm": 0.6186484060029801, "learning_rate": 1.4058870409713e-05, "loss": 12.0445, "step": 30642 }, { "epoch": 1.668633817293228, "grad_norm": 0.5427269369432087, "learning_rate": 1.4054362101832696e-05, "loss": 11.8152, "step": 30643 }, { "epoch": 1.668688271289811, "grad_norm": 0.5530534526730182, "learning_rate": 1.4049854462280088e-05, "loss": 11.8655, "step": 30644 }, { "epoch": 1.668742725286394, "grad_norm": 0.5780781935152243, "learning_rate": 1.4045347491090254e-05, "loss": 11.7665, "step": 30645 }, { "epoch": 1.668797179282977, "grad_norm": 0.5181392305023769, "learning_rate": 1.4040841188298182e-05, "loss": 11.7054, "step": 30646 }, { "epoch": 1.66885163327956, "grad_norm": 0.636847789134479, "learning_rate": 1.4036335553938962e-05, "loss": 11.805, "step": 30647 }, { "epoch": 1.668906087276143, "grad_norm": 0.5660885977660897, "learning_rate": 1.4031830588047601e-05, "loss": 11.8311, "step": 30648 }, { "epoch": 1.668960541272726, "grad_norm": 0.6833788173018199, "learning_rate": 1.4027326290659159e-05, "loss": 11.8768, "step": 30649 }, { "epoch": 1.669014995269309, "grad_norm": 0.6139854201866528, "learning_rate": 1.4022822661808621e-05, "loss": 11.8321, "step": 30650 }, { "epoch": 1.669069449265892, "grad_norm": 0.5552076093213949, "learning_rate": 1.4018319701531035e-05, "loss": 11.7677, "step": 30651 }, { "epoch": 1.669123903262475, "grad_norm": 0.5209814904995312, "learning_rate": 1.4013817409861463e-05, "loss": 11.7055, "step": 30652 }, { "epoch": 1.669178357259058, "grad_norm": 0.5339816384169245, "learning_rate": 1.400931578683482e-05, "loss": 11.7872, "step": 30653 }, { "epoch": 1.669232811255641, "grad_norm": 0.5683988407237588, "learning_rate": 1.400481483248618e-05, "loss": 11.8208, "step": 30654 }, { "epoch": 1.6692872652522242, "grad_norm": 0.5907841922901259, "learning_rate": 1.4000314546850502e-05, "loss": 11.8629, "step": 30655 }, { "epoch": 1.6693417192488071, "grad_norm": 0.5407176440878831, "learning_rate": 1.3995814929962791e-05, "loss": 11.8693, "step": 30656 }, { "epoch": 1.6693961732453901, "grad_norm": 0.5387535016711819, "learning_rate": 1.3991315981858077e-05, "loss": 11.8178, "step": 30657 }, { "epoch": 1.6694506272419731, "grad_norm": 0.5622576450184463, "learning_rate": 1.3986817702571286e-05, "loss": 11.8224, "step": 30658 }, { "epoch": 1.6695050812385561, "grad_norm": 0.5118073203746123, "learning_rate": 1.3982320092137447e-05, "loss": 11.8534, "step": 30659 }, { "epoch": 1.6695595352351391, "grad_norm": 0.540542840608444, "learning_rate": 1.3977823150591496e-05, "loss": 11.8103, "step": 30660 }, { "epoch": 1.6696139892317223, "grad_norm": 0.5482135433940559, "learning_rate": 1.3973326877968429e-05, "loss": 11.7786, "step": 30661 }, { "epoch": 1.6696684432283053, "grad_norm": 0.553369146661749, "learning_rate": 1.3968831274303206e-05, "loss": 11.8318, "step": 30662 }, { "epoch": 1.6697228972248883, "grad_norm": 0.5073757333984068, "learning_rate": 1.3964336339630757e-05, "loss": 11.7957, "step": 30663 }, { "epoch": 1.6697773512214713, "grad_norm": 0.5055837732603168, "learning_rate": 1.3959842073986085e-05, "loss": 11.7707, "step": 30664 }, { "epoch": 1.6698318052180543, "grad_norm": 0.559876593933698, "learning_rate": 1.3955348477404072e-05, "loss": 11.7811, "step": 30665 }, { "epoch": 1.6698862592146373, "grad_norm": 0.5333976863578087, "learning_rate": 1.395085554991974e-05, "loss": 11.8541, "step": 30666 }, { "epoch": 1.6699407132112203, "grad_norm": 0.5454917870028871, "learning_rate": 1.3946363291567944e-05, "loss": 11.797, "step": 30667 }, { "epoch": 1.6699951672078033, "grad_norm": 0.530962618090314, "learning_rate": 1.3941871702383669e-05, "loss": 11.8457, "step": 30668 }, { "epoch": 1.6700496212043863, "grad_norm": 0.488430907721536, "learning_rate": 1.3937380782401855e-05, "loss": 11.7797, "step": 30669 }, { "epoch": 1.6701040752009693, "grad_norm": 0.5307821084548326, "learning_rate": 1.3932890531657373e-05, "loss": 11.796, "step": 30670 }, { "epoch": 1.6701585291975523, "grad_norm": 0.5233877365304789, "learning_rate": 1.3928400950185194e-05, "loss": 11.8846, "step": 30671 }, { "epoch": 1.6702129831941352, "grad_norm": 0.5273684616147256, "learning_rate": 1.3923912038020204e-05, "loss": 11.7697, "step": 30672 }, { "epoch": 1.6702674371907182, "grad_norm": 0.6265352816580725, "learning_rate": 1.3919423795197284e-05, "loss": 11.7663, "step": 30673 }, { "epoch": 1.6703218911873012, "grad_norm": 0.5377756673137262, "learning_rate": 1.3914936221751384e-05, "loss": 11.7565, "step": 30674 }, { "epoch": 1.6703763451838842, "grad_norm": 0.5160009649868486, "learning_rate": 1.3910449317717356e-05, "loss": 11.7871, "step": 30675 }, { "epoch": 1.6704307991804672, "grad_norm": 0.5258348428317202, "learning_rate": 1.3905963083130135e-05, "loss": 11.843, "step": 30676 }, { "epoch": 1.6704852531770502, "grad_norm": 0.5179978593831852, "learning_rate": 1.3901477518024552e-05, "loss": 11.7681, "step": 30677 }, { "epoch": 1.6705397071736334, "grad_norm": 0.5239347580002798, "learning_rate": 1.3896992622435523e-05, "loss": 11.7877, "step": 30678 }, { "epoch": 1.6705941611702164, "grad_norm": 0.5381226775521556, "learning_rate": 1.3892508396397941e-05, "loss": 11.7176, "step": 30679 }, { "epoch": 1.6706486151667994, "grad_norm": 0.5557597232340132, "learning_rate": 1.3888024839946635e-05, "loss": 11.8947, "step": 30680 }, { "epoch": 1.6707030691633824, "grad_norm": 0.5284770407387012, "learning_rate": 1.3883541953116508e-05, "loss": 11.7098, "step": 30681 }, { "epoch": 1.6707575231599654, "grad_norm": 0.5503145099653577, "learning_rate": 1.3879059735942401e-05, "loss": 11.8881, "step": 30682 }, { "epoch": 1.6708119771565484, "grad_norm": 0.6382435746497301, "learning_rate": 1.3874578188459153e-05, "loss": 11.8654, "step": 30683 }, { "epoch": 1.6708664311531316, "grad_norm": 0.5822616467219236, "learning_rate": 1.3870097310701636e-05, "loss": 11.9282, "step": 30684 }, { "epoch": 1.6709208851497146, "grad_norm": 0.5727434133047872, "learning_rate": 1.3865617102704676e-05, "loss": 11.7462, "step": 30685 }, { "epoch": 1.6709753391462976, "grad_norm": 0.515647104081518, "learning_rate": 1.3861137564503135e-05, "loss": 11.6965, "step": 30686 }, { "epoch": 1.6710297931428806, "grad_norm": 0.5183354230310832, "learning_rate": 1.385665869613182e-05, "loss": 11.7704, "step": 30687 }, { "epoch": 1.6710842471394636, "grad_norm": 0.6054209867642117, "learning_rate": 1.3852180497625588e-05, "loss": 11.7721, "step": 30688 }, { "epoch": 1.6711387011360466, "grad_norm": 0.5805249737938416, "learning_rate": 1.384770296901924e-05, "loss": 11.8295, "step": 30689 }, { "epoch": 1.6711931551326296, "grad_norm": 0.577409391021253, "learning_rate": 1.3843226110347584e-05, "loss": 11.714, "step": 30690 }, { "epoch": 1.6712476091292126, "grad_norm": 0.5656187536623469, "learning_rate": 1.3838749921645477e-05, "loss": 11.8524, "step": 30691 }, { "epoch": 1.6713020631257955, "grad_norm": 0.5594468803485352, "learning_rate": 1.3834274402947711e-05, "loss": 11.7565, "step": 30692 }, { "epoch": 1.6713565171223785, "grad_norm": 0.5624642442627585, "learning_rate": 1.3829799554289036e-05, "loss": 11.851, "step": 30693 }, { "epoch": 1.6714109711189615, "grad_norm": 0.6382464063271283, "learning_rate": 1.3825325375704323e-05, "loss": 11.7675, "step": 30694 }, { "epoch": 1.6714654251155445, "grad_norm": 0.5776129033826342, "learning_rate": 1.3820851867228313e-05, "loss": 11.8955, "step": 30695 }, { "epoch": 1.6715198791121275, "grad_norm": 0.5297254642489581, "learning_rate": 1.3816379028895832e-05, "loss": 11.8662, "step": 30696 }, { "epoch": 1.6715743331087105, "grad_norm": 0.5479233691876345, "learning_rate": 1.3811906860741608e-05, "loss": 11.8863, "step": 30697 }, { "epoch": 1.6716287871052935, "grad_norm": 0.6292991158702002, "learning_rate": 1.3807435362800481e-05, "loss": 11.7588, "step": 30698 }, { "epoch": 1.6716832411018765, "grad_norm": 0.517464277885571, "learning_rate": 1.3802964535107177e-05, "loss": 11.8557, "step": 30699 }, { "epoch": 1.6717376950984595, "grad_norm": 0.5444021249152353, "learning_rate": 1.3798494377696459e-05, "loss": 11.8088, "step": 30700 }, { "epoch": 1.6717921490950425, "grad_norm": 0.5918704828124932, "learning_rate": 1.379402489060314e-05, "loss": 11.7818, "step": 30701 }, { "epoch": 1.6718466030916257, "grad_norm": 0.5443720411630387, "learning_rate": 1.3789556073861931e-05, "loss": 11.9303, "step": 30702 }, { "epoch": 1.6719010570882087, "grad_norm": 0.5499557398483298, "learning_rate": 1.37850879275076e-05, "loss": 11.7787, "step": 30703 }, { "epoch": 1.6719555110847917, "grad_norm": 0.49798541994472717, "learning_rate": 1.3780620451574855e-05, "loss": 11.7193, "step": 30704 }, { "epoch": 1.6720099650813747, "grad_norm": 0.5402006959711375, "learning_rate": 1.3776153646098467e-05, "loss": 11.7147, "step": 30705 }, { "epoch": 1.6720644190779577, "grad_norm": 0.5664303938988359, "learning_rate": 1.3771687511113186e-05, "loss": 11.8356, "step": 30706 }, { "epoch": 1.6721188730745409, "grad_norm": 0.5531096500165288, "learning_rate": 1.3767222046653705e-05, "loss": 11.8854, "step": 30707 }, { "epoch": 1.6721733270711239, "grad_norm": 0.5545658808717064, "learning_rate": 1.3762757252754788e-05, "loss": 11.8637, "step": 30708 }, { "epoch": 1.6722277810677069, "grad_norm": 0.569745662900167, "learning_rate": 1.3758293129451116e-05, "loss": 11.884, "step": 30709 }, { "epoch": 1.6722822350642899, "grad_norm": 0.5800772003933736, "learning_rate": 1.375382967677743e-05, "loss": 11.8379, "step": 30710 }, { "epoch": 1.6723366890608728, "grad_norm": 0.5226995107064963, "learning_rate": 1.3749366894768412e-05, "loss": 11.8163, "step": 30711 }, { "epoch": 1.6723911430574558, "grad_norm": 0.5779024817809948, "learning_rate": 1.37449047834588e-05, "loss": 11.8702, "step": 30712 }, { "epoch": 1.6724455970540388, "grad_norm": 0.5495962175139693, "learning_rate": 1.374044334288328e-05, "loss": 11.742, "step": 30713 }, { "epoch": 1.6725000510506218, "grad_norm": 0.5564836727694991, "learning_rate": 1.3735982573076511e-05, "loss": 11.757, "step": 30714 }, { "epoch": 1.6725545050472048, "grad_norm": 0.5740085108021452, "learning_rate": 1.3731522474073233e-05, "loss": 11.7604, "step": 30715 }, { "epoch": 1.6726089590437878, "grad_norm": 0.5160515161411331, "learning_rate": 1.3727063045908084e-05, "loss": 11.8244, "step": 30716 }, { "epoch": 1.6726634130403708, "grad_norm": 0.543489470428873, "learning_rate": 1.3722604288615759e-05, "loss": 11.7809, "step": 30717 }, { "epoch": 1.6727178670369538, "grad_norm": 0.5175571927238948, "learning_rate": 1.3718146202230953e-05, "loss": 11.8779, "step": 30718 }, { "epoch": 1.6727723210335368, "grad_norm": 0.5198347335821604, "learning_rate": 1.371368878678828e-05, "loss": 11.7775, "step": 30719 }, { "epoch": 1.6728267750301198, "grad_norm": 0.5414702976639753, "learning_rate": 1.3709232042322472e-05, "loss": 11.8345, "step": 30720 }, { "epoch": 1.6728812290267028, "grad_norm": 0.6141249785605588, "learning_rate": 1.3704775968868111e-05, "loss": 11.9421, "step": 30721 }, { "epoch": 1.6729356830232858, "grad_norm": 0.7390792488911934, "learning_rate": 1.3700320566459912e-05, "loss": 11.7716, "step": 30722 }, { "epoch": 1.6729901370198688, "grad_norm": 0.5173868267241312, "learning_rate": 1.3695865835132493e-05, "loss": 11.7361, "step": 30723 }, { "epoch": 1.6730445910164518, "grad_norm": 0.5474029527500227, "learning_rate": 1.3691411774920471e-05, "loss": 11.8611, "step": 30724 }, { "epoch": 1.673099045013035, "grad_norm": 0.5514893474856272, "learning_rate": 1.3686958385858529e-05, "loss": 11.8024, "step": 30725 }, { "epoch": 1.673153499009618, "grad_norm": 0.577040005904867, "learning_rate": 1.368250566798125e-05, "loss": 11.7137, "step": 30726 }, { "epoch": 1.673207953006201, "grad_norm": 0.5475240789568734, "learning_rate": 1.3678053621323283e-05, "loss": 11.7408, "step": 30727 }, { "epoch": 1.673262407002784, "grad_norm": 0.530659666503884, "learning_rate": 1.367360224591927e-05, "loss": 11.8441, "step": 30728 }, { "epoch": 1.673316860999367, "grad_norm": 0.5611999792445183, "learning_rate": 1.3669151541803771e-05, "loss": 11.8717, "step": 30729 }, { "epoch": 1.67337131499595, "grad_norm": 0.5306339523517843, "learning_rate": 1.3664701509011457e-05, "loss": 11.806, "step": 30730 }, { "epoch": 1.6734257689925331, "grad_norm": 0.5219800102886839, "learning_rate": 1.3660252147576879e-05, "loss": 11.6249, "step": 30731 }, { "epoch": 1.6734802229891161, "grad_norm": 0.574986750708085, "learning_rate": 1.3655803457534688e-05, "loss": 11.7464, "step": 30732 }, { "epoch": 1.6735346769856991, "grad_norm": 0.6243519582976768, "learning_rate": 1.3651355438919444e-05, "loss": 11.9312, "step": 30733 }, { "epoch": 1.6735891309822821, "grad_norm": 0.5939908201828773, "learning_rate": 1.364690809176572e-05, "loss": 11.8336, "step": 30734 }, { "epoch": 1.6736435849788651, "grad_norm": 0.5744204053403791, "learning_rate": 1.3642461416108142e-05, "loss": 11.6633, "step": 30735 }, { "epoch": 1.673698038975448, "grad_norm": 0.5276219268542305, "learning_rate": 1.3638015411981242e-05, "loss": 11.7969, "step": 30736 }, { "epoch": 1.673752492972031, "grad_norm": 0.4895900857711987, "learning_rate": 1.3633570079419644e-05, "loss": 11.5451, "step": 30737 }, { "epoch": 1.673806946968614, "grad_norm": 0.6192604787760138, "learning_rate": 1.3629125418457867e-05, "loss": 11.8738, "step": 30738 }, { "epoch": 1.673861400965197, "grad_norm": 0.5183316054407755, "learning_rate": 1.3624681429130493e-05, "loss": 11.8157, "step": 30739 }, { "epoch": 1.67391585496178, "grad_norm": 0.5352572930781914, "learning_rate": 1.3620238111472095e-05, "loss": 11.7011, "step": 30740 }, { "epoch": 1.673970308958363, "grad_norm": 0.6608333963970415, "learning_rate": 1.3615795465517201e-05, "loss": 11.9022, "step": 30741 }, { "epoch": 1.674024762954946, "grad_norm": 0.5645761140980992, "learning_rate": 1.3611353491300383e-05, "loss": 11.8001, "step": 30742 }, { "epoch": 1.674079216951529, "grad_norm": 0.5175858894914743, "learning_rate": 1.360691218885617e-05, "loss": 11.7656, "step": 30743 }, { "epoch": 1.674133670948112, "grad_norm": 0.5380103375369542, "learning_rate": 1.3602471558219076e-05, "loss": 11.7123, "step": 30744 }, { "epoch": 1.674188124944695, "grad_norm": 0.5079453659542924, "learning_rate": 1.3598031599423666e-05, "loss": 11.7998, "step": 30745 }, { "epoch": 1.674242578941278, "grad_norm": 0.5437587357002618, "learning_rate": 1.3593592312504444e-05, "loss": 11.8777, "step": 30746 }, { "epoch": 1.674297032937861, "grad_norm": 0.5996576782136743, "learning_rate": 1.3589153697495948e-05, "loss": 11.8434, "step": 30747 }, { "epoch": 1.6743514869344442, "grad_norm": 0.5709378066021393, "learning_rate": 1.3584715754432664e-05, "loss": 11.79, "step": 30748 }, { "epoch": 1.6744059409310272, "grad_norm": 0.5601905928731062, "learning_rate": 1.3580278483349129e-05, "loss": 11.7178, "step": 30749 }, { "epoch": 1.6744603949276102, "grad_norm": 0.5323236474538788, "learning_rate": 1.3575841884279861e-05, "loss": 11.7674, "step": 30750 }, { "epoch": 1.6745148489241932, "grad_norm": 0.507929985239759, "learning_rate": 1.3571405957259309e-05, "loss": 11.7714, "step": 30751 }, { "epoch": 1.6745693029207762, "grad_norm": 0.48647216203174226, "learning_rate": 1.3566970702322058e-05, "loss": 11.8004, "step": 30752 }, { "epoch": 1.6746237569173592, "grad_norm": 0.5567592249661721, "learning_rate": 1.3562536119502477e-05, "loss": 11.5576, "step": 30753 }, { "epoch": 1.6746782109139424, "grad_norm": 0.611344391102703, "learning_rate": 1.3558102208835121e-05, "loss": 11.7727, "step": 30754 }, { "epoch": 1.6747326649105254, "grad_norm": 0.5741730647314339, "learning_rate": 1.3553668970354483e-05, "loss": 11.8161, "step": 30755 }, { "epoch": 1.6747871189071084, "grad_norm": 0.5089711773059751, "learning_rate": 1.3549236404094978e-05, "loss": 11.6753, "step": 30756 }, { "epoch": 1.6748415729036914, "grad_norm": 0.5474695100402294, "learning_rate": 1.3544804510091136e-05, "loss": 11.7483, "step": 30757 }, { "epoch": 1.6748960269002744, "grad_norm": 0.5201051846364081, "learning_rate": 1.3540373288377372e-05, "loss": 11.6236, "step": 30758 }, { "epoch": 1.6749504808968574, "grad_norm": 0.6008503605801372, "learning_rate": 1.3535942738988194e-05, "loss": 11.911, "step": 30759 }, { "epoch": 1.6750049348934404, "grad_norm": 0.5490633818844283, "learning_rate": 1.3531512861957995e-05, "loss": 11.8329, "step": 30760 }, { "epoch": 1.6750593888900234, "grad_norm": 0.5956673616462292, "learning_rate": 1.3527083657321248e-05, "loss": 11.6763, "step": 30761 }, { "epoch": 1.6751138428866064, "grad_norm": 0.5871668559190035, "learning_rate": 1.3522655125112449e-05, "loss": 11.8177, "step": 30762 }, { "epoch": 1.6751682968831894, "grad_norm": 0.5252319294898933, "learning_rate": 1.3518227265365945e-05, "loss": 11.8208, "step": 30763 }, { "epoch": 1.6752227508797723, "grad_norm": 0.6494343850464059, "learning_rate": 1.3513800078116234e-05, "loss": 11.8865, "step": 30764 }, { "epoch": 1.6752772048763553, "grad_norm": 0.5577113683637119, "learning_rate": 1.3509373563397688e-05, "loss": 11.8043, "step": 30765 }, { "epoch": 1.6753316588729383, "grad_norm": 0.5479970067312949, "learning_rate": 1.3504947721244753e-05, "loss": 11.8447, "step": 30766 }, { "epoch": 1.6753861128695213, "grad_norm": 0.5144120454661337, "learning_rate": 1.3500522551691885e-05, "loss": 11.7687, "step": 30767 }, { "epoch": 1.6754405668661043, "grad_norm": 0.5836956491599675, "learning_rate": 1.3496098054773432e-05, "loss": 11.8723, "step": 30768 }, { "epoch": 1.6754950208626873, "grad_norm": 0.5516998216749677, "learning_rate": 1.3491674230523842e-05, "loss": 11.7775, "step": 30769 }, { "epoch": 1.6755494748592703, "grad_norm": 0.6221994938327313, "learning_rate": 1.3487251078977492e-05, "loss": 11.9036, "step": 30770 }, { "epoch": 1.6756039288558535, "grad_norm": 0.4966220869176989, "learning_rate": 1.3482828600168795e-05, "loss": 11.7278, "step": 30771 }, { "epoch": 1.6756583828524365, "grad_norm": 0.5655745151714466, "learning_rate": 1.3478406794132137e-05, "loss": 11.791, "step": 30772 }, { "epoch": 1.6757128368490195, "grad_norm": 0.5737705742226658, "learning_rate": 1.3473985660901878e-05, "loss": 11.8163, "step": 30773 }, { "epoch": 1.6757672908456025, "grad_norm": 0.5724829551180888, "learning_rate": 1.3469565200512434e-05, "loss": 11.8458, "step": 30774 }, { "epoch": 1.6758217448421855, "grad_norm": 0.6026529526748534, "learning_rate": 1.3465145412998148e-05, "loss": 11.7957, "step": 30775 }, { "epoch": 1.6758761988387685, "grad_norm": 0.5668312546691957, "learning_rate": 1.34607262983934e-05, "loss": 11.7239, "step": 30776 }, { "epoch": 1.6759306528353517, "grad_norm": 0.5320837458259533, "learning_rate": 1.3456307856732575e-05, "loss": 11.7107, "step": 30777 }, { "epoch": 1.6759851068319347, "grad_norm": 0.5371011898510129, "learning_rate": 1.345189008804999e-05, "loss": 11.7957, "step": 30778 }, { "epoch": 1.6760395608285177, "grad_norm": 0.60628546475273, "learning_rate": 1.344747299238005e-05, "loss": 11.8693, "step": 30779 }, { "epoch": 1.6760940148251007, "grad_norm": 0.5945148256243175, "learning_rate": 1.344305656975705e-05, "loss": 11.8287, "step": 30780 }, { "epoch": 1.6761484688216837, "grad_norm": 0.5422130819635125, "learning_rate": 1.3438640820215376e-05, "loss": 11.7867, "step": 30781 }, { "epoch": 1.6762029228182667, "grad_norm": 0.502865469110512, "learning_rate": 1.3434225743789352e-05, "loss": 11.6306, "step": 30782 }, { "epoch": 1.6762573768148497, "grad_norm": 0.55338832640472, "learning_rate": 1.3429811340513287e-05, "loss": 11.7155, "step": 30783 }, { "epoch": 1.6763118308114326, "grad_norm": 0.5342679761102067, "learning_rate": 1.3425397610421541e-05, "loss": 11.7603, "step": 30784 }, { "epoch": 1.6763662848080156, "grad_norm": 0.5849556399975631, "learning_rate": 1.34209845535484e-05, "loss": 11.7956, "step": 30785 }, { "epoch": 1.6764207388045986, "grad_norm": 0.5567148521287311, "learning_rate": 1.3416572169928233e-05, "loss": 11.56, "step": 30786 }, { "epoch": 1.6764751928011816, "grad_norm": 0.5522869294628729, "learning_rate": 1.341216045959529e-05, "loss": 11.6991, "step": 30787 }, { "epoch": 1.6765296467977646, "grad_norm": 0.5534686532411146, "learning_rate": 1.3407749422583915e-05, "loss": 11.8312, "step": 30788 }, { "epoch": 1.6765841007943476, "grad_norm": 0.59627571237323, "learning_rate": 1.3403339058928422e-05, "loss": 11.9478, "step": 30789 }, { "epoch": 1.6766385547909306, "grad_norm": 0.4876074041422157, "learning_rate": 1.339892936866306e-05, "loss": 11.7527, "step": 30790 }, { "epoch": 1.6766930087875136, "grad_norm": 0.5432630204509983, "learning_rate": 1.339452035182217e-05, "loss": 11.819, "step": 30791 }, { "epoch": 1.6767474627840966, "grad_norm": 0.5341624086670461, "learning_rate": 1.3390112008439992e-05, "loss": 11.6182, "step": 30792 }, { "epoch": 1.6768019167806796, "grad_norm": 0.5474969809089916, "learning_rate": 1.3385704338550853e-05, "loss": 11.7691, "step": 30793 }, { "epoch": 1.6768563707772626, "grad_norm": 0.5223113431713742, "learning_rate": 1.3381297342189004e-05, "loss": 11.7281, "step": 30794 }, { "epoch": 1.6769108247738458, "grad_norm": 0.5546933604599444, "learning_rate": 1.3376891019388682e-05, "loss": 11.8756, "step": 30795 }, { "epoch": 1.6769652787704288, "grad_norm": 0.5150681125434099, "learning_rate": 1.3372485370184218e-05, "loss": 11.8583, "step": 30796 }, { "epoch": 1.6770197327670118, "grad_norm": 0.5345053336037571, "learning_rate": 1.3368080394609794e-05, "loss": 11.747, "step": 30797 }, { "epoch": 1.6770741867635948, "grad_norm": 0.5870743626595543, "learning_rate": 1.3363676092699718e-05, "loss": 11.9117, "step": 30798 }, { "epoch": 1.6771286407601778, "grad_norm": 0.5435534278676204, "learning_rate": 1.3359272464488248e-05, "loss": 11.8437, "step": 30799 }, { "epoch": 1.6771830947567608, "grad_norm": 0.4971165194270236, "learning_rate": 1.3354869510009583e-05, "loss": 11.7076, "step": 30800 }, { "epoch": 1.677237548753344, "grad_norm": 0.5501541279707652, "learning_rate": 1.3350467229298002e-05, "loss": 11.7002, "step": 30801 }, { "epoch": 1.677292002749927, "grad_norm": 0.5257373542374613, "learning_rate": 1.3346065622387694e-05, "loss": 11.7226, "step": 30802 }, { "epoch": 1.67734645674651, "grad_norm": 0.4854746856865274, "learning_rate": 1.3341664689312939e-05, "loss": 11.7435, "step": 30803 }, { "epoch": 1.677400910743093, "grad_norm": 0.5627802516140279, "learning_rate": 1.3337264430107921e-05, "loss": 11.8204, "step": 30804 }, { "epoch": 1.677455364739676, "grad_norm": 0.5372233772155028, "learning_rate": 1.3332864844806859e-05, "loss": 11.8013, "step": 30805 }, { "epoch": 1.677509818736259, "grad_norm": 0.5283210218300569, "learning_rate": 1.332846593344399e-05, "loss": 11.8013, "step": 30806 }, { "epoch": 1.677564272732842, "grad_norm": 0.4962134052480816, "learning_rate": 1.3324067696053488e-05, "loss": 11.6823, "step": 30807 }, { "epoch": 1.677618726729425, "grad_norm": 0.56457898796017, "learning_rate": 1.3319670132669593e-05, "loss": 11.7457, "step": 30808 }, { "epoch": 1.677673180726008, "grad_norm": 0.5815024347259764, "learning_rate": 1.3315273243326454e-05, "loss": 11.825, "step": 30809 }, { "epoch": 1.677727634722591, "grad_norm": 0.5140054589877632, "learning_rate": 1.3310877028058277e-05, "loss": 11.7688, "step": 30810 }, { "epoch": 1.677782088719174, "grad_norm": 0.5833100953870268, "learning_rate": 1.3306481486899292e-05, "loss": 11.7954, "step": 30811 }, { "epoch": 1.6778365427157569, "grad_norm": 0.6723215588131805, "learning_rate": 1.3302086619883625e-05, "loss": 12.0623, "step": 30812 }, { "epoch": 1.6778909967123399, "grad_norm": 0.530142616716068, "learning_rate": 1.3297692427045516e-05, "loss": 11.8834, "step": 30813 }, { "epoch": 1.6779454507089229, "grad_norm": 0.5224907506133561, "learning_rate": 1.329329890841904e-05, "loss": 11.7784, "step": 30814 }, { "epoch": 1.6779999047055059, "grad_norm": 0.5218275472829298, "learning_rate": 1.328890606403842e-05, "loss": 11.8317, "step": 30815 }, { "epoch": 1.6780543587020889, "grad_norm": 0.5489846031224762, "learning_rate": 1.3284513893937822e-05, "loss": 11.826, "step": 30816 }, { "epoch": 1.6781088126986718, "grad_norm": 0.5204790083568857, "learning_rate": 1.3280122398151363e-05, "loss": 11.7844, "step": 30817 }, { "epoch": 1.678163266695255, "grad_norm": 0.5194773603284347, "learning_rate": 1.3275731576713247e-05, "loss": 11.7438, "step": 30818 }, { "epoch": 1.678217720691838, "grad_norm": 0.5871451905002218, "learning_rate": 1.3271341429657557e-05, "loss": 11.5825, "step": 30819 }, { "epoch": 1.678272174688421, "grad_norm": 0.5159076570751091, "learning_rate": 1.3266951957018481e-05, "loss": 11.7884, "step": 30820 }, { "epoch": 1.678326628685004, "grad_norm": 0.6150612967492541, "learning_rate": 1.326256315883011e-05, "loss": 11.9415, "step": 30821 }, { "epoch": 1.678381082681587, "grad_norm": 0.4931136989370211, "learning_rate": 1.3258175035126596e-05, "loss": 11.8024, "step": 30822 }, { "epoch": 1.67843553667817, "grad_norm": 0.5725825767857251, "learning_rate": 1.3253787585942112e-05, "loss": 11.7904, "step": 30823 }, { "epoch": 1.6784899906747532, "grad_norm": 0.6390060461074529, "learning_rate": 1.3249400811310663e-05, "loss": 12.0097, "step": 30824 }, { "epoch": 1.6785444446713362, "grad_norm": 0.5314803990215526, "learning_rate": 1.3245014711266435e-05, "loss": 11.7944, "step": 30825 }, { "epoch": 1.6785988986679192, "grad_norm": 0.5038522679184477, "learning_rate": 1.3240629285843542e-05, "loss": 11.7378, "step": 30826 }, { "epoch": 1.6786533526645022, "grad_norm": 0.5454704986853873, "learning_rate": 1.3236244535076036e-05, "loss": 11.9079, "step": 30827 }, { "epoch": 1.6787078066610852, "grad_norm": 0.557951993907346, "learning_rate": 1.3231860458998069e-05, "loss": 11.8218, "step": 30828 }, { "epoch": 1.6787622606576682, "grad_norm": 0.555182406506991, "learning_rate": 1.3227477057643677e-05, "loss": 11.6933, "step": 30829 }, { "epoch": 1.6788167146542512, "grad_norm": 0.5695684130966864, "learning_rate": 1.3223094331047004e-05, "loss": 11.656, "step": 30830 }, { "epoch": 1.6788711686508342, "grad_norm": 0.5020493245916736, "learning_rate": 1.3218712279242084e-05, "loss": 11.8005, "step": 30831 }, { "epoch": 1.6789256226474172, "grad_norm": 0.5481198553243628, "learning_rate": 1.3214330902263006e-05, "loss": 11.8994, "step": 30832 }, { "epoch": 1.6789800766440002, "grad_norm": 0.5018290173423888, "learning_rate": 1.3209950200143884e-05, "loss": 11.7898, "step": 30833 }, { "epoch": 1.6790345306405832, "grad_norm": 0.502488793789876, "learning_rate": 1.3205570172918702e-05, "loss": 11.6313, "step": 30834 }, { "epoch": 1.6790889846371662, "grad_norm": 0.5579112840523157, "learning_rate": 1.320119082062159e-05, "loss": 11.8107, "step": 30835 }, { "epoch": 1.6791434386337492, "grad_norm": 0.6770367140611824, "learning_rate": 1.3196812143286551e-05, "loss": 11.8248, "step": 30836 }, { "epoch": 1.6791978926303321, "grad_norm": 0.5036100044091174, "learning_rate": 1.3192434140947652e-05, "loss": 11.7979, "step": 30837 }, { "epoch": 1.6792523466269151, "grad_norm": 0.5353437836758715, "learning_rate": 1.3188056813638971e-05, "loss": 11.7918, "step": 30838 }, { "epoch": 1.6793068006234981, "grad_norm": 0.5288200963388185, "learning_rate": 1.3183680161394495e-05, "loss": 11.6297, "step": 30839 }, { "epoch": 1.6793612546200811, "grad_norm": 0.5658695122389732, "learning_rate": 1.3179304184248298e-05, "loss": 11.8004, "step": 30840 }, { "epoch": 1.6794157086166643, "grad_norm": 0.544517652730449, "learning_rate": 1.3174928882234373e-05, "loss": 11.8579, "step": 30841 }, { "epoch": 1.6794701626132473, "grad_norm": 0.4962557362885047, "learning_rate": 1.3170554255386791e-05, "loss": 11.8731, "step": 30842 }, { "epoch": 1.6795246166098303, "grad_norm": 0.5188071007258321, "learning_rate": 1.3166180303739528e-05, "loss": 11.6215, "step": 30843 }, { "epoch": 1.6795790706064133, "grad_norm": 0.5188097641128784, "learning_rate": 1.316180702732659e-05, "loss": 11.8047, "step": 30844 }, { "epoch": 1.6796335246029963, "grad_norm": 0.5264623739112867, "learning_rate": 1.3157434426182025e-05, "loss": 11.8645, "step": 30845 }, { "epoch": 1.6796879785995793, "grad_norm": 0.5310097958857362, "learning_rate": 1.3153062500339796e-05, "loss": 11.7249, "step": 30846 }, { "epoch": 1.6797424325961625, "grad_norm": 0.5991591247823416, "learning_rate": 1.3148691249833922e-05, "loss": 11.8467, "step": 30847 }, { "epoch": 1.6797968865927455, "grad_norm": 0.5257352301499815, "learning_rate": 1.3144320674698396e-05, "loss": 11.8047, "step": 30848 }, { "epoch": 1.6798513405893285, "grad_norm": 0.5647495279696361, "learning_rate": 1.3139950774967192e-05, "loss": 11.8984, "step": 30849 }, { "epoch": 1.6799057945859115, "grad_norm": 0.5275975143919737, "learning_rate": 1.3135581550674315e-05, "loss": 11.709, "step": 30850 }, { "epoch": 1.6799602485824945, "grad_norm": 0.5936834863768895, "learning_rate": 1.3131213001853692e-05, "loss": 11.8145, "step": 30851 }, { "epoch": 1.6800147025790775, "grad_norm": 0.5436783695171021, "learning_rate": 1.3126845128539356e-05, "loss": 11.6715, "step": 30852 }, { "epoch": 1.6800691565756605, "grad_norm": 0.5401272411344255, "learning_rate": 1.3122477930765243e-05, "loss": 11.8293, "step": 30853 }, { "epoch": 1.6801236105722435, "grad_norm": 0.5071004671666826, "learning_rate": 1.311811140856528e-05, "loss": 11.7706, "step": 30854 }, { "epoch": 1.6801780645688265, "grad_norm": 0.5213372358570623, "learning_rate": 1.3113745561973478e-05, "loss": 11.8414, "step": 30855 }, { "epoch": 1.6802325185654094, "grad_norm": 0.5677481754254107, "learning_rate": 1.310938039102374e-05, "loss": 11.7646, "step": 30856 }, { "epoch": 1.6802869725619924, "grad_norm": 0.5898449492356662, "learning_rate": 1.310501589575005e-05, "loss": 11.7354, "step": 30857 }, { "epoch": 1.6803414265585754, "grad_norm": 0.5417890917817078, "learning_rate": 1.3100652076186314e-05, "loss": 11.7539, "step": 30858 }, { "epoch": 1.6803958805551584, "grad_norm": 0.5769759359254978, "learning_rate": 1.3096288932366474e-05, "loss": 11.7423, "step": 30859 }, { "epoch": 1.6804503345517414, "grad_norm": 0.6294268045759392, "learning_rate": 1.3091926464324488e-05, "loss": 11.8246, "step": 30860 }, { "epoch": 1.6805047885483244, "grad_norm": 0.5385612538260942, "learning_rate": 1.3087564672094243e-05, "loss": 11.7478, "step": 30861 }, { "epoch": 1.6805592425449074, "grad_norm": 0.5374740654419663, "learning_rate": 1.3083203555709677e-05, "loss": 11.7969, "step": 30862 }, { "epoch": 1.6806136965414904, "grad_norm": 0.527415784310097, "learning_rate": 1.3078843115204709e-05, "loss": 11.71, "step": 30863 }, { "epoch": 1.6806681505380734, "grad_norm": 0.5145511706149598, "learning_rate": 1.3074483350613209e-05, "loss": 11.73, "step": 30864 }, { "epoch": 1.6807226045346566, "grad_norm": 0.5551752795785434, "learning_rate": 1.307012426196912e-05, "loss": 11.866, "step": 30865 }, { "epoch": 1.6807770585312396, "grad_norm": 0.5474346226968603, "learning_rate": 1.3065765849306311e-05, "loss": 11.8227, "step": 30866 }, { "epoch": 1.6808315125278226, "grad_norm": 0.6086897915503154, "learning_rate": 1.3061408112658703e-05, "loss": 11.8433, "step": 30867 }, { "epoch": 1.6808859665244056, "grad_norm": 0.5770849409133813, "learning_rate": 1.3057051052060143e-05, "loss": 11.8486, "step": 30868 }, { "epoch": 1.6809404205209886, "grad_norm": 0.5439636611188174, "learning_rate": 1.3052694667544551e-05, "loss": 11.8306, "step": 30869 }, { "epoch": 1.6809948745175716, "grad_norm": 0.5498289907417206, "learning_rate": 1.3048338959145766e-05, "loss": 11.723, "step": 30870 }, { "epoch": 1.6810493285141548, "grad_norm": 0.5625196472309748, "learning_rate": 1.3043983926897684e-05, "loss": 11.8165, "step": 30871 }, { "epoch": 1.6811037825107378, "grad_norm": 0.5064244434293584, "learning_rate": 1.303962957083419e-05, "loss": 11.7715, "step": 30872 }, { "epoch": 1.6811582365073208, "grad_norm": 0.5110432577084042, "learning_rate": 1.3035275890989107e-05, "loss": 11.6278, "step": 30873 }, { "epoch": 1.6812126905039038, "grad_norm": 0.5213051899119718, "learning_rate": 1.3030922887396291e-05, "loss": 11.785, "step": 30874 }, { "epoch": 1.6812671445004868, "grad_norm": 0.5560173869588457, "learning_rate": 1.3026570560089624e-05, "loss": 11.6741, "step": 30875 }, { "epoch": 1.6813215984970697, "grad_norm": 0.5389905003713072, "learning_rate": 1.3022218909102901e-05, "loss": 11.8545, "step": 30876 }, { "epoch": 1.6813760524936527, "grad_norm": 0.5600843065563837, "learning_rate": 1.3017867934470018e-05, "loss": 11.7851, "step": 30877 }, { "epoch": 1.6814305064902357, "grad_norm": 0.5502635265922394, "learning_rate": 1.301351763622476e-05, "loss": 11.8169, "step": 30878 }, { "epoch": 1.6814849604868187, "grad_norm": 0.5284691962359876, "learning_rate": 1.3009168014400997e-05, "loss": 11.7361, "step": 30879 }, { "epoch": 1.6815394144834017, "grad_norm": 0.4789944056438769, "learning_rate": 1.3004819069032514e-05, "loss": 11.721, "step": 30880 }, { "epoch": 1.6815938684799847, "grad_norm": 0.5517250120970683, "learning_rate": 1.3000470800153141e-05, "loss": 11.7812, "step": 30881 }, { "epoch": 1.6816483224765677, "grad_norm": 0.5518930652654063, "learning_rate": 1.2996123207796718e-05, "loss": 11.8482, "step": 30882 }, { "epoch": 1.6817027764731507, "grad_norm": 0.5485477433553917, "learning_rate": 1.2991776291997037e-05, "loss": 11.802, "step": 30883 }, { "epoch": 1.6817572304697337, "grad_norm": 0.5263391224168971, "learning_rate": 1.2987430052787885e-05, "loss": 11.6938, "step": 30884 }, { "epoch": 1.6818116844663167, "grad_norm": 0.5706755975080879, "learning_rate": 1.2983084490203056e-05, "loss": 11.8174, "step": 30885 }, { "epoch": 1.6818661384628997, "grad_norm": 0.5285214526502554, "learning_rate": 1.2978739604276357e-05, "loss": 11.7778, "step": 30886 }, { "epoch": 1.6819205924594827, "grad_norm": 0.5909773457621915, "learning_rate": 1.2974395395041595e-05, "loss": 11.8949, "step": 30887 }, { "epoch": 1.6819750464560659, "grad_norm": 0.5747792388283205, "learning_rate": 1.2970051862532496e-05, "loss": 11.8241, "step": 30888 }, { "epoch": 1.6820295004526489, "grad_norm": 0.5925829241717875, "learning_rate": 1.29657090067829e-05, "loss": 11.8375, "step": 30889 }, { "epoch": 1.6820839544492319, "grad_norm": 0.5220072443568061, "learning_rate": 1.2961366827826515e-05, "loss": 11.8994, "step": 30890 }, { "epoch": 1.6821384084458149, "grad_norm": 0.5577779274779051, "learning_rate": 1.295702532569717e-05, "loss": 11.8704, "step": 30891 }, { "epoch": 1.6821928624423979, "grad_norm": 0.5051562087141184, "learning_rate": 1.2952684500428558e-05, "loss": 11.7841, "step": 30892 }, { "epoch": 1.6822473164389808, "grad_norm": 0.5518716408252478, "learning_rate": 1.294834435205451e-05, "loss": 11.7912, "step": 30893 }, { "epoch": 1.682301770435564, "grad_norm": 0.5066884801095428, "learning_rate": 1.2944004880608718e-05, "loss": 11.7375, "step": 30894 }, { "epoch": 1.682356224432147, "grad_norm": 0.5492426517534502, "learning_rate": 1.2939666086124936e-05, "loss": 11.8867, "step": 30895 }, { "epoch": 1.68241067842873, "grad_norm": 0.5693621790149476, "learning_rate": 1.29353279686369e-05, "loss": 11.8318, "step": 30896 }, { "epoch": 1.682465132425313, "grad_norm": 0.5165803167555809, "learning_rate": 1.2930990528178377e-05, "loss": 11.8264, "step": 30897 }, { "epoch": 1.682519586421896, "grad_norm": 0.5596161858222217, "learning_rate": 1.2926653764783047e-05, "loss": 11.8619, "step": 30898 }, { "epoch": 1.682574040418479, "grad_norm": 0.547827742362886, "learning_rate": 1.2922317678484697e-05, "loss": 11.7555, "step": 30899 }, { "epoch": 1.682628494415062, "grad_norm": 0.535879415983803, "learning_rate": 1.2917982269316975e-05, "loss": 11.7463, "step": 30900 }, { "epoch": 1.682682948411645, "grad_norm": 0.535413866828367, "learning_rate": 1.2913647537313644e-05, "loss": 11.8689, "step": 30901 }, { "epoch": 1.682737402408228, "grad_norm": 0.5805696529246693, "learning_rate": 1.2909313482508379e-05, "loss": 11.9161, "step": 30902 }, { "epoch": 1.682791856404811, "grad_norm": 0.5260264998409206, "learning_rate": 1.2904980104934917e-05, "loss": 11.8405, "step": 30903 }, { "epoch": 1.682846310401394, "grad_norm": 0.5854381143859504, "learning_rate": 1.2900647404626943e-05, "loss": 11.792, "step": 30904 }, { "epoch": 1.682900764397977, "grad_norm": 0.5253860655671488, "learning_rate": 1.2896315381618107e-05, "loss": 11.7813, "step": 30905 }, { "epoch": 1.68295521839456, "grad_norm": 0.554908637873242, "learning_rate": 1.2891984035942162e-05, "loss": 11.7716, "step": 30906 }, { "epoch": 1.683009672391143, "grad_norm": 0.5782336746781361, "learning_rate": 1.2887653367632735e-05, "loss": 11.6716, "step": 30907 }, { "epoch": 1.683064126387726, "grad_norm": 0.5665593192445647, "learning_rate": 1.2883323376723521e-05, "loss": 11.7545, "step": 30908 }, { "epoch": 1.683118580384309, "grad_norm": 0.6995263762196952, "learning_rate": 1.2878994063248218e-05, "loss": 11.9942, "step": 30909 }, { "epoch": 1.683173034380892, "grad_norm": 0.535471529613836, "learning_rate": 1.287466542724044e-05, "loss": 11.6526, "step": 30910 }, { "epoch": 1.6832274883774752, "grad_norm": 0.48303961442288756, "learning_rate": 1.2870337468733895e-05, "loss": 11.7168, "step": 30911 }, { "epoch": 1.6832819423740581, "grad_norm": 0.5542509936891705, "learning_rate": 1.28660101877622e-05, "loss": 11.8746, "step": 30912 }, { "epoch": 1.6833363963706411, "grad_norm": 0.5820860719517761, "learning_rate": 1.2861683584359041e-05, "loss": 11.7746, "step": 30913 }, { "epoch": 1.6833908503672241, "grad_norm": 0.6270610000900988, "learning_rate": 1.2857357658558056e-05, "loss": 11.691, "step": 30914 }, { "epoch": 1.6834453043638071, "grad_norm": 0.5736399326213332, "learning_rate": 1.285303241039284e-05, "loss": 11.8652, "step": 30915 }, { "epoch": 1.6834997583603901, "grad_norm": 0.5619805326157901, "learning_rate": 1.2848707839897078e-05, "loss": 11.7971, "step": 30916 }, { "epoch": 1.6835542123569733, "grad_norm": 0.479098233802878, "learning_rate": 1.2844383947104354e-05, "loss": 11.7201, "step": 30917 }, { "epoch": 1.6836086663535563, "grad_norm": 0.5459321419597545, "learning_rate": 1.2840060732048353e-05, "loss": 11.8616, "step": 30918 }, { "epoch": 1.6836631203501393, "grad_norm": 0.6303937182194855, "learning_rate": 1.2835738194762626e-05, "loss": 11.7714, "step": 30919 }, { "epoch": 1.6837175743467223, "grad_norm": 0.6063994481799019, "learning_rate": 1.283141633528081e-05, "loss": 11.8242, "step": 30920 }, { "epoch": 1.6837720283433053, "grad_norm": 0.5507727194406049, "learning_rate": 1.2827095153636548e-05, "loss": 11.8543, "step": 30921 }, { "epoch": 1.6838264823398883, "grad_norm": 0.5404127369170493, "learning_rate": 1.282277464986339e-05, "loss": 11.8502, "step": 30922 }, { "epoch": 1.6838809363364713, "grad_norm": 0.522691500546519, "learning_rate": 1.2818454823994974e-05, "loss": 11.7252, "step": 30923 }, { "epoch": 1.6839353903330543, "grad_norm": 0.5330287746979502, "learning_rate": 1.2814135676064887e-05, "loss": 11.8818, "step": 30924 }, { "epoch": 1.6839898443296373, "grad_norm": 0.5324819084756363, "learning_rate": 1.2809817206106667e-05, "loss": 11.7668, "step": 30925 }, { "epoch": 1.6840442983262203, "grad_norm": 0.5679799851672759, "learning_rate": 1.2805499414153954e-05, "loss": 11.7543, "step": 30926 }, { "epoch": 1.6840987523228033, "grad_norm": 0.5641950613952643, "learning_rate": 1.2801182300240277e-05, "loss": 11.8477, "step": 30927 }, { "epoch": 1.6841532063193863, "grad_norm": 0.5684721307222901, "learning_rate": 1.2796865864399254e-05, "loss": 11.9336, "step": 30928 }, { "epoch": 1.6842076603159692, "grad_norm": 0.5958884951547424, "learning_rate": 1.2792550106664415e-05, "loss": 11.8898, "step": 30929 }, { "epoch": 1.6842621143125522, "grad_norm": 0.5768648008155474, "learning_rate": 1.2788235027069318e-05, "loss": 11.8193, "step": 30930 }, { "epoch": 1.6843165683091352, "grad_norm": 0.5808263521474171, "learning_rate": 1.278392062564755e-05, "loss": 11.8038, "step": 30931 }, { "epoch": 1.6843710223057182, "grad_norm": 0.5670701911435472, "learning_rate": 1.2779606902432627e-05, "loss": 11.7317, "step": 30932 }, { "epoch": 1.6844254763023012, "grad_norm": 0.5128089869119639, "learning_rate": 1.2775293857458148e-05, "loss": 11.6964, "step": 30933 }, { "epoch": 1.6844799302988842, "grad_norm": 0.48434730096323336, "learning_rate": 1.2770981490757572e-05, "loss": 11.6702, "step": 30934 }, { "epoch": 1.6845343842954674, "grad_norm": 0.5543601897213132, "learning_rate": 1.2766669802364473e-05, "loss": 11.5906, "step": 30935 }, { "epoch": 1.6845888382920504, "grad_norm": 0.5149236057309247, "learning_rate": 1.276235879231239e-05, "loss": 11.8646, "step": 30936 }, { "epoch": 1.6846432922886334, "grad_norm": 0.5751090349175441, "learning_rate": 1.275804846063482e-05, "loss": 11.8037, "step": 30937 }, { "epoch": 1.6846977462852164, "grad_norm": 0.5163364506804589, "learning_rate": 1.2753738807365322e-05, "loss": 11.7896, "step": 30938 }, { "epoch": 1.6847522002817994, "grad_norm": 0.557297852222947, "learning_rate": 1.2749429832537352e-05, "loss": 11.8165, "step": 30939 }, { "epoch": 1.6848066542783826, "grad_norm": 0.5754185383229934, "learning_rate": 1.2745121536184468e-05, "loss": 11.8465, "step": 30940 }, { "epoch": 1.6848611082749656, "grad_norm": 0.5189316542291988, "learning_rate": 1.2740813918340144e-05, "loss": 11.6276, "step": 30941 }, { "epoch": 1.6849155622715486, "grad_norm": 0.5397203483892539, "learning_rate": 1.2736506979037866e-05, "loss": 11.814, "step": 30942 }, { "epoch": 1.6849700162681316, "grad_norm": 0.5248558922205516, "learning_rate": 1.2732200718311183e-05, "loss": 11.655, "step": 30943 }, { "epoch": 1.6850244702647146, "grad_norm": 0.569772147300932, "learning_rate": 1.2727895136193535e-05, "loss": 11.8354, "step": 30944 }, { "epoch": 1.6850789242612976, "grad_norm": 0.5107304223006778, "learning_rate": 1.2723590232718386e-05, "loss": 11.7712, "step": 30945 }, { "epoch": 1.6851333782578806, "grad_norm": 0.5517829892272162, "learning_rate": 1.2719286007919252e-05, "loss": 11.853, "step": 30946 }, { "epoch": 1.6851878322544636, "grad_norm": 0.5516534462677495, "learning_rate": 1.2714982461829571e-05, "loss": 11.9019, "step": 30947 }, { "epoch": 1.6852422862510466, "grad_norm": 0.5973626906897074, "learning_rate": 1.2710679594482855e-05, "loss": 11.8328, "step": 30948 }, { "epoch": 1.6852967402476295, "grad_norm": 0.486708149672646, "learning_rate": 1.2706377405912496e-05, "loss": 11.7596, "step": 30949 }, { "epoch": 1.6853511942442125, "grad_norm": 0.5392361116153961, "learning_rate": 1.270207589615201e-05, "loss": 11.8083, "step": 30950 }, { "epoch": 1.6854056482407955, "grad_norm": 0.6011225344433534, "learning_rate": 1.2697775065234807e-05, "loss": 11.8283, "step": 30951 }, { "epoch": 1.6854601022373785, "grad_norm": 0.4660341428272207, "learning_rate": 1.2693474913194347e-05, "loss": 11.7044, "step": 30952 }, { "epoch": 1.6855145562339615, "grad_norm": 0.5744173327336871, "learning_rate": 1.2689175440064083e-05, "loss": 11.8485, "step": 30953 }, { "epoch": 1.6855690102305445, "grad_norm": 0.49386697404015156, "learning_rate": 1.268487664587743e-05, "loss": 11.8396, "step": 30954 }, { "epoch": 1.6856234642271275, "grad_norm": 0.5649774813196295, "learning_rate": 1.268057853066783e-05, "loss": 11.7956, "step": 30955 }, { "epoch": 1.6856779182237105, "grad_norm": 0.5066835993067511, "learning_rate": 1.2676281094468667e-05, "loss": 11.9316, "step": 30956 }, { "epoch": 1.6857323722202935, "grad_norm": 0.5387885700361347, "learning_rate": 1.2671984337313381e-05, "loss": 11.809, "step": 30957 }, { "epoch": 1.6857868262168767, "grad_norm": 0.6158151256337171, "learning_rate": 1.266768825923541e-05, "loss": 11.7484, "step": 30958 }, { "epoch": 1.6858412802134597, "grad_norm": 0.5642077699526209, "learning_rate": 1.2663392860268131e-05, "loss": 11.7662, "step": 30959 }, { "epoch": 1.6858957342100427, "grad_norm": 0.5004019979844113, "learning_rate": 1.265909814044497e-05, "loss": 11.8103, "step": 30960 }, { "epoch": 1.6859501882066257, "grad_norm": 0.5326111269189654, "learning_rate": 1.265480409979929e-05, "loss": 11.7948, "step": 30961 }, { "epoch": 1.6860046422032087, "grad_norm": 0.503972500382625, "learning_rate": 1.2650510738364518e-05, "loss": 11.6654, "step": 30962 }, { "epoch": 1.6860590961997917, "grad_norm": 0.5222908121815988, "learning_rate": 1.2646218056174009e-05, "loss": 11.9125, "step": 30963 }, { "epoch": 1.6861135501963749, "grad_norm": 0.5209559935917166, "learning_rate": 1.2641926053261177e-05, "loss": 11.6175, "step": 30964 }, { "epoch": 1.6861680041929579, "grad_norm": 0.5519284255939755, "learning_rate": 1.2637634729659375e-05, "loss": 11.7419, "step": 30965 }, { "epoch": 1.6862224581895409, "grad_norm": 0.5532925618193903, "learning_rate": 1.2633344085401955e-05, "loss": 11.776, "step": 30966 }, { "epoch": 1.6862769121861239, "grad_norm": 0.5466818222668209, "learning_rate": 1.262905412052232e-05, "loss": 11.7007, "step": 30967 }, { "epoch": 1.6863313661827068, "grad_norm": 0.5760458209075585, "learning_rate": 1.2624764835053804e-05, "loss": 11.7261, "step": 30968 }, { "epoch": 1.6863858201792898, "grad_norm": 0.5607843698283759, "learning_rate": 1.2620476229029753e-05, "loss": 11.7256, "step": 30969 }, { "epoch": 1.6864402741758728, "grad_norm": 0.5876410562875248, "learning_rate": 1.2616188302483567e-05, "loss": 11.7445, "step": 30970 }, { "epoch": 1.6864947281724558, "grad_norm": 0.5333042910383061, "learning_rate": 1.2611901055448528e-05, "loss": 11.7903, "step": 30971 }, { "epoch": 1.6865491821690388, "grad_norm": 0.5375697937719466, "learning_rate": 1.260761448795802e-05, "loss": 11.795, "step": 30972 }, { "epoch": 1.6866036361656218, "grad_norm": 0.5320807055716856, "learning_rate": 1.2603328600045338e-05, "loss": 11.7174, "step": 30973 }, { "epoch": 1.6866580901622048, "grad_norm": 0.5754043768594431, "learning_rate": 1.259904339174386e-05, "loss": 11.7903, "step": 30974 }, { "epoch": 1.6867125441587878, "grad_norm": 0.5422487035895022, "learning_rate": 1.2594758863086865e-05, "loss": 11.8148, "step": 30975 }, { "epoch": 1.6867669981553708, "grad_norm": 0.5434807244508277, "learning_rate": 1.2590475014107661e-05, "loss": 11.8619, "step": 30976 }, { "epoch": 1.6868214521519538, "grad_norm": 0.5788963942280565, "learning_rate": 1.2586191844839612e-05, "loss": 11.8867, "step": 30977 }, { "epoch": 1.6868759061485368, "grad_norm": 0.6179581524250197, "learning_rate": 1.2581909355315968e-05, "loss": 11.7265, "step": 30978 }, { "epoch": 1.6869303601451198, "grad_norm": 0.6156278283170895, "learning_rate": 1.2577627545570059e-05, "loss": 11.7749, "step": 30979 }, { "epoch": 1.6869848141417028, "grad_norm": 0.5364324405623805, "learning_rate": 1.2573346415635201e-05, "loss": 11.7738, "step": 30980 }, { "epoch": 1.687039268138286, "grad_norm": 0.6067647389487533, "learning_rate": 1.2569065965544636e-05, "loss": 11.8974, "step": 30981 }, { "epoch": 1.687093722134869, "grad_norm": 0.5466475366529785, "learning_rate": 1.2564786195331702e-05, "loss": 11.771, "step": 30982 }, { "epoch": 1.687148176131452, "grad_norm": 0.5538879810462987, "learning_rate": 1.256050710502964e-05, "loss": 11.8921, "step": 30983 }, { "epoch": 1.687202630128035, "grad_norm": 0.7236317356514967, "learning_rate": 1.2556228694671746e-05, "loss": 11.8492, "step": 30984 }, { "epoch": 1.687257084124618, "grad_norm": 0.5389627890915959, "learning_rate": 1.2551950964291292e-05, "loss": 11.7868, "step": 30985 }, { "epoch": 1.687311538121201, "grad_norm": 0.5477972562887624, "learning_rate": 1.2547673913921499e-05, "loss": 11.661, "step": 30986 }, { "epoch": 1.6873659921177842, "grad_norm": 0.5549818238153731, "learning_rate": 1.2543397543595692e-05, "loss": 11.9308, "step": 30987 }, { "epoch": 1.6874204461143671, "grad_norm": 0.5392778514406316, "learning_rate": 1.2539121853347069e-05, "loss": 11.6602, "step": 30988 }, { "epoch": 1.6874749001109501, "grad_norm": 0.5639723548899724, "learning_rate": 1.2534846843208925e-05, "loss": 11.7974, "step": 30989 }, { "epoch": 1.6875293541075331, "grad_norm": 0.5938283471144208, "learning_rate": 1.2530572513214446e-05, "loss": 11.8409, "step": 30990 }, { "epoch": 1.6875838081041161, "grad_norm": 0.5400408892509663, "learning_rate": 1.2526298863396912e-05, "loss": 11.7953, "step": 30991 }, { "epoch": 1.6876382621006991, "grad_norm": 0.6553367130312735, "learning_rate": 1.2522025893789569e-05, "loss": 11.8586, "step": 30992 }, { "epoch": 1.687692716097282, "grad_norm": 0.5834404463377707, "learning_rate": 1.2517753604425608e-05, "loss": 11.7478, "step": 30993 }, { "epoch": 1.687747170093865, "grad_norm": 0.57803278579205, "learning_rate": 1.2513481995338284e-05, "loss": 11.8328, "step": 30994 }, { "epoch": 1.687801624090448, "grad_norm": 0.6286824435063781, "learning_rate": 1.2509211066560788e-05, "loss": 11.813, "step": 30995 }, { "epoch": 1.687856078087031, "grad_norm": 0.5277799842411208, "learning_rate": 1.250494081812632e-05, "loss": 11.7461, "step": 30996 }, { "epoch": 1.687910532083614, "grad_norm": 0.528310544407678, "learning_rate": 1.250067125006813e-05, "loss": 11.6591, "step": 30997 }, { "epoch": 1.687964986080197, "grad_norm": 0.5240383164639838, "learning_rate": 1.2496402362419369e-05, "loss": 11.8203, "step": 30998 }, { "epoch": 1.68801944007678, "grad_norm": 0.5499002892758581, "learning_rate": 1.2492134155213275e-05, "loss": 11.6102, "step": 30999 }, { "epoch": 1.688073894073363, "grad_norm": 0.5024318789840799, "learning_rate": 1.2487866628483014e-05, "loss": 11.7725, "step": 31000 }, { "epoch": 1.688128348069946, "grad_norm": 0.5838408608040243, "learning_rate": 1.2483599782261768e-05, "loss": 11.7318, "step": 31001 }, { "epoch": 1.688182802066529, "grad_norm": 0.6098789562032324, "learning_rate": 1.2479333616582744e-05, "loss": 11.8629, "step": 31002 }, { "epoch": 1.688237256063112, "grad_norm": 0.5656585730421902, "learning_rate": 1.2475068131479084e-05, "loss": 11.792, "step": 31003 }, { "epoch": 1.688291710059695, "grad_norm": 0.5392994469720828, "learning_rate": 1.2470803326984016e-05, "loss": 11.7874, "step": 31004 }, { "epoch": 1.6883461640562782, "grad_norm": 0.5252149296259485, "learning_rate": 1.2466539203130612e-05, "loss": 11.784, "step": 31005 }, { "epoch": 1.6884006180528612, "grad_norm": 0.507835131305577, "learning_rate": 1.246227575995208e-05, "loss": 11.7961, "step": 31006 }, { "epoch": 1.6884550720494442, "grad_norm": 0.5172020169125301, "learning_rate": 1.2458012997481594e-05, "loss": 11.8493, "step": 31007 }, { "epoch": 1.6885095260460272, "grad_norm": 0.580931403383934, "learning_rate": 1.245375091575226e-05, "loss": 11.8756, "step": 31008 }, { "epoch": 1.6885639800426102, "grad_norm": 0.5486886961906027, "learning_rate": 1.2449489514797264e-05, "loss": 11.7934, "step": 31009 }, { "epoch": 1.6886184340391934, "grad_norm": 0.47751340387962543, "learning_rate": 1.24452287946497e-05, "loss": 11.7613, "step": 31010 }, { "epoch": 1.6886728880357764, "grad_norm": 0.6112902242248104, "learning_rate": 1.244096875534273e-05, "loss": 11.7245, "step": 31011 }, { "epoch": 1.6887273420323594, "grad_norm": 0.5524698751702133, "learning_rate": 1.2436709396909451e-05, "loss": 11.817, "step": 31012 }, { "epoch": 1.6887817960289424, "grad_norm": 0.5503354050461463, "learning_rate": 1.2432450719383015e-05, "loss": 11.8147, "step": 31013 }, { "epoch": 1.6888362500255254, "grad_norm": 0.5508965272505504, "learning_rate": 1.242819272279656e-05, "loss": 11.6423, "step": 31014 }, { "epoch": 1.6888907040221084, "grad_norm": 0.5367811120380197, "learning_rate": 1.242393540718313e-05, "loss": 11.8595, "step": 31015 }, { "epoch": 1.6889451580186914, "grad_norm": 0.5561738870650693, "learning_rate": 1.2419678772575882e-05, "loss": 11.8342, "step": 31016 }, { "epoch": 1.6889996120152744, "grad_norm": 0.5301874112024781, "learning_rate": 1.2415422819007871e-05, "loss": 11.8866, "step": 31017 }, { "epoch": 1.6890540660118574, "grad_norm": 0.6467900675327487, "learning_rate": 1.2411167546512226e-05, "loss": 11.9131, "step": 31018 }, { "epoch": 1.6891085200084404, "grad_norm": 0.5411147604998521, "learning_rate": 1.2406912955122052e-05, "loss": 11.9355, "step": 31019 }, { "epoch": 1.6891629740050234, "grad_norm": 0.47049751414523905, "learning_rate": 1.240265904487039e-05, "loss": 11.7164, "step": 31020 }, { "epoch": 1.6892174280016063, "grad_norm": 0.5459693348498853, "learning_rate": 1.2398405815790371e-05, "loss": 11.8325, "step": 31021 }, { "epoch": 1.6892718819981893, "grad_norm": 0.5441759555883625, "learning_rate": 1.2394153267915009e-05, "loss": 11.8041, "step": 31022 }, { "epoch": 1.6893263359947723, "grad_norm": 0.5060146303218898, "learning_rate": 1.2389901401277426e-05, "loss": 11.7788, "step": 31023 }, { "epoch": 1.6893807899913553, "grad_norm": 0.6361849047081553, "learning_rate": 1.238565021591066e-05, "loss": 11.8789, "step": 31024 }, { "epoch": 1.6894352439879383, "grad_norm": 0.5062576448789375, "learning_rate": 1.2381399711847751e-05, "loss": 11.7049, "step": 31025 }, { "epoch": 1.6894896979845213, "grad_norm": 0.554103616111281, "learning_rate": 1.2377149889121797e-05, "loss": 11.7653, "step": 31026 }, { "epoch": 1.6895441519811043, "grad_norm": 0.4929483825124844, "learning_rate": 1.2372900747765791e-05, "loss": 11.6667, "step": 31027 }, { "epoch": 1.6895986059776875, "grad_norm": 0.5440798486434106, "learning_rate": 1.236865228781281e-05, "loss": 11.8324, "step": 31028 }, { "epoch": 1.6896530599742705, "grad_norm": 0.565908417010825, "learning_rate": 1.2364404509295902e-05, "loss": 11.7977, "step": 31029 }, { "epoch": 1.6897075139708535, "grad_norm": 0.5461142968907308, "learning_rate": 1.2360157412248064e-05, "loss": 11.7253, "step": 31030 }, { "epoch": 1.6897619679674365, "grad_norm": 0.5625146515850067, "learning_rate": 1.2355910996702358e-05, "loss": 11.9029, "step": 31031 }, { "epoch": 1.6898164219640195, "grad_norm": 0.4987479890051444, "learning_rate": 1.2351665262691758e-05, "loss": 11.644, "step": 31032 }, { "epoch": 1.6898708759606025, "grad_norm": 0.5498233010714532, "learning_rate": 1.2347420210249339e-05, "loss": 11.7239, "step": 31033 }, { "epoch": 1.6899253299571857, "grad_norm": 0.579645915344141, "learning_rate": 1.2343175839408072e-05, "loss": 11.8525, "step": 31034 }, { "epoch": 1.6899797839537687, "grad_norm": 0.5109973452372204, "learning_rate": 1.2338932150200955e-05, "loss": 11.7003, "step": 31035 }, { "epoch": 1.6900342379503517, "grad_norm": 0.5250080212971248, "learning_rate": 1.2334689142661015e-05, "loss": 11.7213, "step": 31036 }, { "epoch": 1.6900886919469347, "grad_norm": 0.5140151033662399, "learning_rate": 1.2330446816821217e-05, "loss": 11.7804, "step": 31037 }, { "epoch": 1.6901431459435177, "grad_norm": 0.5814524810375874, "learning_rate": 1.232620517271459e-05, "loss": 11.7545, "step": 31038 }, { "epoch": 1.6901975999401007, "grad_norm": 0.5844885144223078, "learning_rate": 1.2321964210374081e-05, "loss": 11.8049, "step": 31039 }, { "epoch": 1.6902520539366837, "grad_norm": 0.5273357932651959, "learning_rate": 1.231772392983267e-05, "loss": 11.825, "step": 31040 }, { "epoch": 1.6903065079332666, "grad_norm": 0.5230934720052108, "learning_rate": 1.2313484331123371e-05, "loss": 11.7785, "step": 31041 }, { "epoch": 1.6903609619298496, "grad_norm": 0.5066650193755428, "learning_rate": 1.2309245414279092e-05, "loss": 11.6719, "step": 31042 }, { "epoch": 1.6904154159264326, "grad_norm": 0.48386011572223003, "learning_rate": 1.230500717933285e-05, "loss": 11.7318, "step": 31043 }, { "epoch": 1.6904698699230156, "grad_norm": 0.5816903007676951, "learning_rate": 1.2300769626317587e-05, "loss": 11.7068, "step": 31044 }, { "epoch": 1.6905243239195986, "grad_norm": 0.5322336672555599, "learning_rate": 1.2296532755266222e-05, "loss": 11.8342, "step": 31045 }, { "epoch": 1.6905787779161816, "grad_norm": 0.5174156072071366, "learning_rate": 1.2292296566211737e-05, "loss": 11.7864, "step": 31046 }, { "epoch": 1.6906332319127646, "grad_norm": 0.6314504146664143, "learning_rate": 1.228806105918705e-05, "loss": 11.8131, "step": 31047 }, { "epoch": 1.6906876859093476, "grad_norm": 0.5293592883679874, "learning_rate": 1.2283826234225137e-05, "loss": 11.7786, "step": 31048 }, { "epoch": 1.6907421399059306, "grad_norm": 0.5495173292261814, "learning_rate": 1.2279592091358871e-05, "loss": 11.795, "step": 31049 }, { "epoch": 1.6907965939025136, "grad_norm": 0.5284385447491805, "learning_rate": 1.2275358630621214e-05, "loss": 11.7026, "step": 31050 }, { "epoch": 1.6908510478990968, "grad_norm": 0.5072889494758992, "learning_rate": 1.2271125852045095e-05, "loss": 11.8435, "step": 31051 }, { "epoch": 1.6909055018956798, "grad_norm": 0.541842245256738, "learning_rate": 1.2266893755663388e-05, "loss": 11.7366, "step": 31052 }, { "epoch": 1.6909599558922628, "grad_norm": 0.5077504448857384, "learning_rate": 1.2262662341509057e-05, "loss": 11.8134, "step": 31053 }, { "epoch": 1.6910144098888458, "grad_norm": 0.5042125936553743, "learning_rate": 1.2258431609614973e-05, "loss": 11.6919, "step": 31054 }, { "epoch": 1.6910688638854288, "grad_norm": 0.5543434981239634, "learning_rate": 1.2254201560014022e-05, "loss": 11.7729, "step": 31055 }, { "epoch": 1.6911233178820118, "grad_norm": 0.5241217795547016, "learning_rate": 1.2249972192739135e-05, "loss": 11.7454, "step": 31056 }, { "epoch": 1.691177771878595, "grad_norm": 0.5353220571388403, "learning_rate": 1.224574350782316e-05, "loss": 11.8802, "step": 31057 }, { "epoch": 1.691232225875178, "grad_norm": 0.5159713338830011, "learning_rate": 1.2241515505299016e-05, "loss": 11.7998, "step": 31058 }, { "epoch": 1.691286679871761, "grad_norm": 0.5840084513881622, "learning_rate": 1.2237288185199547e-05, "loss": 11.8563, "step": 31059 }, { "epoch": 1.691341133868344, "grad_norm": 0.5622320436290875, "learning_rate": 1.2233061547557667e-05, "loss": 11.7928, "step": 31060 }, { "epoch": 1.691395587864927, "grad_norm": 0.5405334827035437, "learning_rate": 1.2228835592406196e-05, "loss": 11.7847, "step": 31061 }, { "epoch": 1.69145004186151, "grad_norm": 0.5921790358345382, "learning_rate": 1.2224610319778018e-05, "loss": 11.7002, "step": 31062 }, { "epoch": 1.691504495858093, "grad_norm": 0.5404621715199007, "learning_rate": 1.2220385729706007e-05, "loss": 11.8761, "step": 31063 }, { "epoch": 1.691558949854676, "grad_norm": 0.6224685543905782, "learning_rate": 1.2216161822222993e-05, "loss": 11.8591, "step": 31064 }, { "epoch": 1.691613403851259, "grad_norm": 0.5297868569126959, "learning_rate": 1.2211938597361839e-05, "loss": 11.4864, "step": 31065 }, { "epoch": 1.691667857847842, "grad_norm": 0.7639102393028403, "learning_rate": 1.2207716055155349e-05, "loss": 11.7668, "step": 31066 }, { "epoch": 1.691722311844425, "grad_norm": 0.5075137608088227, "learning_rate": 1.2203494195636379e-05, "loss": 11.8143, "step": 31067 }, { "epoch": 1.691776765841008, "grad_norm": 0.5915385553745713, "learning_rate": 1.2199273018837775e-05, "loss": 11.8833, "step": 31068 }, { "epoch": 1.6918312198375909, "grad_norm": 0.5144594337393157, "learning_rate": 1.2195052524792339e-05, "loss": 11.7613, "step": 31069 }, { "epoch": 1.6918856738341739, "grad_norm": 0.4984695264655388, "learning_rate": 1.2190832713532917e-05, "loss": 11.7809, "step": 31070 }, { "epoch": 1.6919401278307569, "grad_norm": 0.5117894029324008, "learning_rate": 1.2186613585092277e-05, "loss": 11.7135, "step": 31071 }, { "epoch": 1.6919945818273399, "grad_norm": 0.5383639911433917, "learning_rate": 1.2182395139503289e-05, "loss": 11.8268, "step": 31072 }, { "epoch": 1.6920490358239229, "grad_norm": 0.5174906912576328, "learning_rate": 1.2178177376798693e-05, "loss": 11.8227, "step": 31073 }, { "epoch": 1.692103489820506, "grad_norm": 0.575912544748499, "learning_rate": 1.2173960297011344e-05, "loss": 11.6925, "step": 31074 }, { "epoch": 1.692157943817089, "grad_norm": 0.5235764455622189, "learning_rate": 1.2169743900174003e-05, "loss": 11.6668, "step": 31075 }, { "epoch": 1.692212397813672, "grad_norm": 0.5566910111289741, "learning_rate": 1.2165528186319453e-05, "loss": 11.8516, "step": 31076 }, { "epoch": 1.692266851810255, "grad_norm": 0.535604012565439, "learning_rate": 1.2161313155480469e-05, "loss": 11.8037, "step": 31077 }, { "epoch": 1.692321305806838, "grad_norm": 0.5247715048292585, "learning_rate": 1.2157098807689882e-05, "loss": 11.8749, "step": 31078 }, { "epoch": 1.692375759803421, "grad_norm": 0.568802881873654, "learning_rate": 1.2152885142980397e-05, "loss": 11.8276, "step": 31079 }, { "epoch": 1.6924302138000042, "grad_norm": 0.5621694856576721, "learning_rate": 1.2148672161384823e-05, "loss": 11.8172, "step": 31080 }, { "epoch": 1.6924846677965872, "grad_norm": 0.5564010752224365, "learning_rate": 1.2144459862935898e-05, "loss": 11.8701, "step": 31081 }, { "epoch": 1.6925391217931702, "grad_norm": 0.5462549488719423, "learning_rate": 1.2140248247666397e-05, "loss": 11.8552, "step": 31082 }, { "epoch": 1.6925935757897532, "grad_norm": 0.5615880949210886, "learning_rate": 1.213603731560905e-05, "loss": 11.7695, "step": 31083 }, { "epoch": 1.6926480297863362, "grad_norm": 0.5923126679584949, "learning_rate": 1.2131827066796608e-05, "loss": 11.7957, "step": 31084 }, { "epoch": 1.6927024837829192, "grad_norm": 0.5783788676073419, "learning_rate": 1.2127617501261845e-05, "loss": 11.691, "step": 31085 }, { "epoch": 1.6927569377795022, "grad_norm": 0.5420336454809602, "learning_rate": 1.2123408619037434e-05, "loss": 11.7871, "step": 31086 }, { "epoch": 1.6928113917760852, "grad_norm": 0.5781991952400373, "learning_rate": 1.211920042015615e-05, "loss": 11.773, "step": 31087 }, { "epoch": 1.6928658457726682, "grad_norm": 0.5485901242604088, "learning_rate": 1.2114992904650691e-05, "loss": 11.8089, "step": 31088 }, { "epoch": 1.6929202997692512, "grad_norm": 0.5246036213674602, "learning_rate": 1.211078607255377e-05, "loss": 11.8005, "step": 31089 }, { "epoch": 1.6929747537658342, "grad_norm": 0.651142675260831, "learning_rate": 1.2106579923898154e-05, "loss": 11.7684, "step": 31090 }, { "epoch": 1.6930292077624172, "grad_norm": 0.5184941021250798, "learning_rate": 1.2102374458716481e-05, "loss": 11.8613, "step": 31091 }, { "epoch": 1.6930836617590002, "grad_norm": 0.5544019442943551, "learning_rate": 1.2098169677041504e-05, "loss": 11.7862, "step": 31092 }, { "epoch": 1.6931381157555832, "grad_norm": 0.6027250340795689, "learning_rate": 1.2093965578905875e-05, "loss": 11.855, "step": 31093 }, { "epoch": 1.6931925697521661, "grad_norm": 0.6490606591017354, "learning_rate": 1.2089762164342344e-05, "loss": 11.853, "step": 31094 }, { "epoch": 1.6932470237487491, "grad_norm": 0.6449716678820732, "learning_rate": 1.2085559433383565e-05, "loss": 11.7931, "step": 31095 }, { "epoch": 1.6933014777453321, "grad_norm": 0.5988995642660392, "learning_rate": 1.2081357386062186e-05, "loss": 11.8962, "step": 31096 }, { "epoch": 1.6933559317419151, "grad_norm": 0.5489172557999267, "learning_rate": 1.2077156022410952e-05, "loss": 11.7662, "step": 31097 }, { "epoch": 1.6934103857384983, "grad_norm": 0.5388743072112442, "learning_rate": 1.2072955342462466e-05, "loss": 11.737, "step": 31098 }, { "epoch": 1.6934648397350813, "grad_norm": 0.5502970655509305, "learning_rate": 1.2068755346249427e-05, "loss": 11.8435, "step": 31099 }, { "epoch": 1.6935192937316643, "grad_norm": 0.5266090068269315, "learning_rate": 1.2064556033804508e-05, "loss": 11.656, "step": 31100 }, { "epoch": 1.6935737477282473, "grad_norm": 0.5333254402942086, "learning_rate": 1.206035740516034e-05, "loss": 11.8281, "step": 31101 }, { "epoch": 1.6936282017248303, "grad_norm": 0.5587803731499055, "learning_rate": 1.2056159460349592e-05, "loss": 11.842, "step": 31102 }, { "epoch": 1.6936826557214133, "grad_norm": 0.5887906236010659, "learning_rate": 1.2051962199404876e-05, "loss": 11.79, "step": 31103 }, { "epoch": 1.6937371097179965, "grad_norm": 0.5052092030785456, "learning_rate": 1.2047765622358875e-05, "loss": 11.7987, "step": 31104 }, { "epoch": 1.6937915637145795, "grad_norm": 0.5189554623571955, "learning_rate": 1.2043569729244198e-05, "loss": 11.7302, "step": 31105 }, { "epoch": 1.6938460177111625, "grad_norm": 0.536995774187871, "learning_rate": 1.203937452009345e-05, "loss": 11.9114, "step": 31106 }, { "epoch": 1.6939004717077455, "grad_norm": 0.5514921998254492, "learning_rate": 1.2035179994939295e-05, "loss": 11.733, "step": 31107 }, { "epoch": 1.6939549257043285, "grad_norm": 0.5376014961174918, "learning_rate": 1.2030986153814316e-05, "loss": 11.649, "step": 31108 }, { "epoch": 1.6940093797009115, "grad_norm": 0.6520060717512525, "learning_rate": 1.2026792996751157e-05, "loss": 11.9269, "step": 31109 }, { "epoch": 1.6940638336974945, "grad_norm": 0.5537771651695272, "learning_rate": 1.202260052378239e-05, "loss": 11.8471, "step": 31110 }, { "epoch": 1.6941182876940775, "grad_norm": 0.5484192303934142, "learning_rate": 1.2018408734940644e-05, "loss": 11.7694, "step": 31111 }, { "epoch": 1.6941727416906605, "grad_norm": 0.5503157610454154, "learning_rate": 1.2014217630258518e-05, "loss": 11.8576, "step": 31112 }, { "epoch": 1.6942271956872434, "grad_norm": 0.6120296218848519, "learning_rate": 1.201002720976857e-05, "loss": 11.7632, "step": 31113 }, { "epoch": 1.6942816496838264, "grad_norm": 0.5567956856022628, "learning_rate": 1.2005837473503434e-05, "loss": 11.9131, "step": 31114 }, { "epoch": 1.6943361036804094, "grad_norm": 0.5460710209147294, "learning_rate": 1.2001648421495648e-05, "loss": 11.6285, "step": 31115 }, { "epoch": 1.6943905576769924, "grad_norm": 0.48387214905013837, "learning_rate": 1.1997460053777799e-05, "loss": 11.6831, "step": 31116 }, { "epoch": 1.6944450116735754, "grad_norm": 0.6240826432684845, "learning_rate": 1.1993272370382469e-05, "loss": 11.9838, "step": 31117 }, { "epoch": 1.6944994656701584, "grad_norm": 0.5618401324838624, "learning_rate": 1.198908537134219e-05, "loss": 11.8195, "step": 31118 }, { "epoch": 1.6945539196667414, "grad_norm": 0.49165678973523896, "learning_rate": 1.1984899056689569e-05, "loss": 11.692, "step": 31119 }, { "epoch": 1.6946083736633244, "grad_norm": 0.5751672989876923, "learning_rate": 1.1980713426457102e-05, "loss": 11.9483, "step": 31120 }, { "epoch": 1.6946628276599076, "grad_norm": 0.5234085505286293, "learning_rate": 1.1976528480677396e-05, "loss": 11.6454, "step": 31121 }, { "epoch": 1.6947172816564906, "grad_norm": 0.5742276405421017, "learning_rate": 1.1972344219382947e-05, "loss": 11.7931, "step": 31122 }, { "epoch": 1.6947717356530736, "grad_norm": 0.5736738705491817, "learning_rate": 1.1968160642606308e-05, "loss": 11.8693, "step": 31123 }, { "epoch": 1.6948261896496566, "grad_norm": 0.5227934599039821, "learning_rate": 1.196397775038004e-05, "loss": 11.7932, "step": 31124 }, { "epoch": 1.6948806436462396, "grad_norm": 0.5585268892314317, "learning_rate": 1.195979554273664e-05, "loss": 11.8074, "step": 31125 }, { "epoch": 1.6949350976428226, "grad_norm": 0.5272249447494851, "learning_rate": 1.1955614019708616e-05, "loss": 11.8543, "step": 31126 }, { "epoch": 1.6949895516394058, "grad_norm": 0.6150872977246788, "learning_rate": 1.195143318132852e-05, "loss": 11.8697, "step": 31127 }, { "epoch": 1.6950440056359888, "grad_norm": 0.523893250614035, "learning_rate": 1.1947253027628824e-05, "loss": 11.7275, "step": 31128 }, { "epoch": 1.6950984596325718, "grad_norm": 0.5466708874336663, "learning_rate": 1.1943073558642082e-05, "loss": 11.6989, "step": 31129 }, { "epoch": 1.6951529136291548, "grad_norm": 0.5297247842485884, "learning_rate": 1.1938894774400743e-05, "loss": 11.7921, "step": 31130 }, { "epoch": 1.6952073676257378, "grad_norm": 0.668330003281475, "learning_rate": 1.1934716674937352e-05, "loss": 11.7513, "step": 31131 }, { "epoch": 1.6952618216223208, "grad_norm": 0.5890816140468341, "learning_rate": 1.1930539260284346e-05, "loss": 11.9074, "step": 31132 }, { "epoch": 1.6953162756189037, "grad_norm": 0.5921423566789797, "learning_rate": 1.1926362530474234e-05, "loss": 11.7829, "step": 31133 }, { "epoch": 1.6953707296154867, "grad_norm": 0.5604002994266877, "learning_rate": 1.1922186485539522e-05, "loss": 11.7451, "step": 31134 }, { "epoch": 1.6954251836120697, "grad_norm": 0.5386171188268375, "learning_rate": 1.1918011125512651e-05, "loss": 11.6358, "step": 31135 }, { "epoch": 1.6954796376086527, "grad_norm": 0.6047722369859541, "learning_rate": 1.1913836450426108e-05, "loss": 11.7773, "step": 31136 }, { "epoch": 1.6955340916052357, "grad_norm": 0.5636338677013298, "learning_rate": 1.190966246031232e-05, "loss": 11.8731, "step": 31137 }, { "epoch": 1.6955885456018187, "grad_norm": 0.5289540230140203, "learning_rate": 1.1905489155203764e-05, "loss": 11.6416, "step": 31138 }, { "epoch": 1.6956429995984017, "grad_norm": 0.5610172695635461, "learning_rate": 1.1901316535132922e-05, "loss": 11.8599, "step": 31139 }, { "epoch": 1.6956974535949847, "grad_norm": 0.5620621465242817, "learning_rate": 1.1897144600132203e-05, "loss": 11.8455, "step": 31140 }, { "epoch": 1.6957519075915677, "grad_norm": 0.5286016597676234, "learning_rate": 1.1892973350234072e-05, "loss": 11.7321, "step": 31141 }, { "epoch": 1.6958063615881507, "grad_norm": 0.5538988834899655, "learning_rate": 1.1888802785470943e-05, "loss": 11.7554, "step": 31142 }, { "epoch": 1.6958608155847337, "grad_norm": 0.5220134673972876, "learning_rate": 1.1884632905875281e-05, "loss": 11.6936, "step": 31143 }, { "epoch": 1.6959152695813169, "grad_norm": 0.5659510239305034, "learning_rate": 1.188046371147946e-05, "loss": 11.8246, "step": 31144 }, { "epoch": 1.6959697235778999, "grad_norm": 0.5261749517964932, "learning_rate": 1.1876295202315957e-05, "loss": 11.7013, "step": 31145 }, { "epoch": 1.6960241775744829, "grad_norm": 0.555824132148923, "learning_rate": 1.1872127378417163e-05, "loss": 11.9716, "step": 31146 }, { "epoch": 1.6960786315710659, "grad_norm": 0.5117819884497045, "learning_rate": 1.1867960239815467e-05, "loss": 11.8115, "step": 31147 }, { "epoch": 1.6961330855676489, "grad_norm": 0.5933197441541157, "learning_rate": 1.1863793786543288e-05, "loss": 11.7341, "step": 31148 }, { "epoch": 1.6961875395642318, "grad_norm": 0.5199894150538277, "learning_rate": 1.1859628018633052e-05, "loss": 11.7156, "step": 31149 }, { "epoch": 1.696241993560815, "grad_norm": 0.5452589369395606, "learning_rate": 1.1855462936117101e-05, "loss": 11.7479, "step": 31150 }, { "epoch": 1.696296447557398, "grad_norm": 0.5794796909162606, "learning_rate": 1.1851298539027889e-05, "loss": 11.7396, "step": 31151 }, { "epoch": 1.696350901553981, "grad_norm": 0.6460472069428692, "learning_rate": 1.184713482739772e-05, "loss": 11.6926, "step": 31152 }, { "epoch": 1.696405355550564, "grad_norm": 0.6811738661338196, "learning_rate": 1.184297180125905e-05, "loss": 11.8275, "step": 31153 }, { "epoch": 1.696459809547147, "grad_norm": 0.597358354833702, "learning_rate": 1.1838809460644195e-05, "loss": 11.8919, "step": 31154 }, { "epoch": 1.69651426354373, "grad_norm": 0.5613360676396864, "learning_rate": 1.1834647805585552e-05, "loss": 11.7637, "step": 31155 }, { "epoch": 1.696568717540313, "grad_norm": 0.5514983697671271, "learning_rate": 1.1830486836115483e-05, "loss": 11.7227, "step": 31156 }, { "epoch": 1.696623171536896, "grad_norm": 0.5435911315202359, "learning_rate": 1.1826326552266308e-05, "loss": 11.8893, "step": 31157 }, { "epoch": 1.696677625533479, "grad_norm": 0.5338506575659464, "learning_rate": 1.1822166954070434e-05, "loss": 11.6075, "step": 31158 }, { "epoch": 1.696732079530062, "grad_norm": 0.4986525568452677, "learning_rate": 1.1818008041560147e-05, "loss": 11.8006, "step": 31159 }, { "epoch": 1.696786533526645, "grad_norm": 0.5745697895053549, "learning_rate": 1.1813849814767818e-05, "loss": 11.898, "step": 31160 }, { "epoch": 1.696840987523228, "grad_norm": 0.5029913364089251, "learning_rate": 1.1809692273725803e-05, "loss": 11.7185, "step": 31161 }, { "epoch": 1.696895441519811, "grad_norm": 0.5740095031314133, "learning_rate": 1.1805535418466395e-05, "loss": 11.9046, "step": 31162 }, { "epoch": 1.696949895516394, "grad_norm": 0.5590567368795529, "learning_rate": 1.1801379249021948e-05, "loss": 11.7892, "step": 31163 }, { "epoch": 1.697004349512977, "grad_norm": 0.4763955732081574, "learning_rate": 1.1797223765424747e-05, "loss": 11.8285, "step": 31164 }, { "epoch": 1.69705880350956, "grad_norm": 0.5061884999654044, "learning_rate": 1.1793068967707154e-05, "loss": 11.7972, "step": 31165 }, { "epoch": 1.697113257506143, "grad_norm": 0.5523447234983317, "learning_rate": 1.1788914855901445e-05, "loss": 11.7633, "step": 31166 }, { "epoch": 1.697167711502726, "grad_norm": 0.5401459164104537, "learning_rate": 1.1784761430039904e-05, "loss": 11.7394, "step": 31167 }, { "epoch": 1.6972221654993092, "grad_norm": 0.5376616400063358, "learning_rate": 1.178060869015487e-05, "loss": 11.766, "step": 31168 }, { "epoch": 1.6972766194958921, "grad_norm": 0.5910258722714671, "learning_rate": 1.1776456636278598e-05, "loss": 11.7837, "step": 31169 }, { "epoch": 1.6973310734924751, "grad_norm": 0.5302378668243817, "learning_rate": 1.1772305268443417e-05, "loss": 11.8017, "step": 31170 }, { "epoch": 1.6973855274890581, "grad_norm": 0.6109088340488431, "learning_rate": 1.1768154586681568e-05, "loss": 11.8214, "step": 31171 }, { "epoch": 1.6974399814856411, "grad_norm": 0.5527344866619177, "learning_rate": 1.1764004591025346e-05, "loss": 11.6997, "step": 31172 }, { "epoch": 1.6974944354822243, "grad_norm": 0.625763945079985, "learning_rate": 1.1759855281507037e-05, "loss": 11.9015, "step": 31173 }, { "epoch": 1.6975488894788073, "grad_norm": 0.4966233521673928, "learning_rate": 1.1755706658158872e-05, "loss": 11.7, "step": 31174 }, { "epoch": 1.6976033434753903, "grad_norm": 0.5206706851319974, "learning_rate": 1.1751558721013157e-05, "loss": 11.7349, "step": 31175 }, { "epoch": 1.6976577974719733, "grad_norm": 0.5466083767639491, "learning_rate": 1.174741147010211e-05, "loss": 11.7965, "step": 31176 }, { "epoch": 1.6977122514685563, "grad_norm": 0.5555311654612501, "learning_rate": 1.1743264905457973e-05, "loss": 11.8305, "step": 31177 }, { "epoch": 1.6977667054651393, "grad_norm": 0.5827110532366812, "learning_rate": 1.1739119027113033e-05, "loss": 11.9338, "step": 31178 }, { "epoch": 1.6978211594617223, "grad_norm": 0.546713353054429, "learning_rate": 1.1734973835099483e-05, "loss": 11.8532, "step": 31179 }, { "epoch": 1.6978756134583053, "grad_norm": 0.5617743954800166, "learning_rate": 1.17308293294496e-05, "loss": 12.0388, "step": 31180 }, { "epoch": 1.6979300674548883, "grad_norm": 0.5302951201507142, "learning_rate": 1.1726685510195567e-05, "loss": 11.8557, "step": 31181 }, { "epoch": 1.6979845214514713, "grad_norm": 0.517914976928299, "learning_rate": 1.1722542377369639e-05, "loss": 11.8541, "step": 31182 }, { "epoch": 1.6980389754480543, "grad_norm": 0.5714738519216818, "learning_rate": 1.1718399931004043e-05, "loss": 11.8515, "step": 31183 }, { "epoch": 1.6980934294446373, "grad_norm": 0.6066301526252882, "learning_rate": 1.1714258171130954e-05, "loss": 11.713, "step": 31184 }, { "epoch": 1.6981478834412203, "grad_norm": 0.562355052368481, "learning_rate": 1.1710117097782635e-05, "loss": 11.8545, "step": 31185 }, { "epoch": 1.6982023374378032, "grad_norm": 0.5294204864785601, "learning_rate": 1.1705976710991206e-05, "loss": 11.7689, "step": 31186 }, { "epoch": 1.6982567914343862, "grad_norm": 0.5444777783774468, "learning_rate": 1.170183701078892e-05, "loss": 11.6812, "step": 31187 }, { "epoch": 1.6983112454309692, "grad_norm": 0.57113633311093, "learning_rate": 1.1697697997207979e-05, "loss": 11.7576, "step": 31188 }, { "epoch": 1.6983656994275522, "grad_norm": 0.5507576347384435, "learning_rate": 1.1693559670280518e-05, "loss": 11.5738, "step": 31189 }, { "epoch": 1.6984201534241352, "grad_norm": 0.5175553432450869, "learning_rate": 1.1689422030038765e-05, "loss": 11.7401, "step": 31190 }, { "epoch": 1.6984746074207184, "grad_norm": 0.5095825569788507, "learning_rate": 1.1685285076514863e-05, "loss": 11.6847, "step": 31191 }, { "epoch": 1.6985290614173014, "grad_norm": 0.5677070131155337, "learning_rate": 1.1681148809741016e-05, "loss": 11.8285, "step": 31192 }, { "epoch": 1.6985835154138844, "grad_norm": 0.5335949677262533, "learning_rate": 1.1677013229749334e-05, "loss": 11.7886, "step": 31193 }, { "epoch": 1.6986379694104674, "grad_norm": 0.560539418745713, "learning_rate": 1.1672878336572024e-05, "loss": 11.7421, "step": 31194 }, { "epoch": 1.6986924234070504, "grad_norm": 0.5705922297864177, "learning_rate": 1.166874413024126e-05, "loss": 11.8745, "step": 31195 }, { "epoch": 1.6987468774036334, "grad_norm": 0.554963106832216, "learning_rate": 1.1664610610789106e-05, "loss": 11.9591, "step": 31196 }, { "epoch": 1.6988013314002166, "grad_norm": 0.548341727094421, "learning_rate": 1.1660477778247758e-05, "loss": 11.7741, "step": 31197 }, { "epoch": 1.6988557853967996, "grad_norm": 0.5568708477835408, "learning_rate": 1.165634563264938e-05, "loss": 11.7128, "step": 31198 }, { "epoch": 1.6989102393933826, "grad_norm": 0.5605668718638176, "learning_rate": 1.1652214174026045e-05, "loss": 11.8779, "step": 31199 }, { "epoch": 1.6989646933899656, "grad_norm": 0.5776693095633945, "learning_rate": 1.1648083402409927e-05, "loss": 11.8465, "step": 31200 }, { "epoch": 1.6990191473865486, "grad_norm": 0.5691267610329024, "learning_rate": 1.1643953317833112e-05, "loss": 11.7833, "step": 31201 }, { "epoch": 1.6990736013831316, "grad_norm": 0.5488503787332141, "learning_rate": 1.1639823920327753e-05, "loss": 11.7771, "step": 31202 }, { "epoch": 1.6991280553797146, "grad_norm": 0.5433140690219086, "learning_rate": 1.1635695209925922e-05, "loss": 11.8082, "step": 31203 }, { "epoch": 1.6991825093762976, "grad_norm": 0.5317624685769947, "learning_rate": 1.1631567186659741e-05, "loss": 11.8112, "step": 31204 }, { "epoch": 1.6992369633728805, "grad_norm": 0.5585805141630426, "learning_rate": 1.1627439850561351e-05, "loss": 11.8755, "step": 31205 }, { "epoch": 1.6992914173694635, "grad_norm": 0.5323604788938309, "learning_rate": 1.1623313201662778e-05, "loss": 11.7716, "step": 31206 }, { "epoch": 1.6993458713660465, "grad_norm": 0.5593327894143041, "learning_rate": 1.1619187239996154e-05, "loss": 11.8379, "step": 31207 }, { "epoch": 1.6994003253626295, "grad_norm": 0.513432314602708, "learning_rate": 1.1615061965593533e-05, "loss": 11.8205, "step": 31208 }, { "epoch": 1.6994547793592125, "grad_norm": 0.5476954780496204, "learning_rate": 1.1610937378487008e-05, "loss": 11.826, "step": 31209 }, { "epoch": 1.6995092333557955, "grad_norm": 0.6067927322440799, "learning_rate": 1.1606813478708689e-05, "loss": 11.8313, "step": 31210 }, { "epoch": 1.6995636873523785, "grad_norm": 0.528739121928077, "learning_rate": 1.1602690266290583e-05, "loss": 11.7791, "step": 31211 }, { "epoch": 1.6996181413489615, "grad_norm": 0.4979464794477796, "learning_rate": 1.15985677412648e-05, "loss": 11.68, "step": 31212 }, { "epoch": 1.6996725953455445, "grad_norm": 0.5699731103190695, "learning_rate": 1.1594445903663365e-05, "loss": 11.783, "step": 31213 }, { "epoch": 1.6997270493421277, "grad_norm": 0.5240174059605114, "learning_rate": 1.1590324753518367e-05, "loss": 11.7972, "step": 31214 }, { "epoch": 1.6997815033387107, "grad_norm": 0.5348642485513088, "learning_rate": 1.1586204290861824e-05, "loss": 11.8107, "step": 31215 }, { "epoch": 1.6998359573352937, "grad_norm": 0.5532560565183934, "learning_rate": 1.1582084515725766e-05, "loss": 11.8337, "step": 31216 }, { "epoch": 1.6998904113318767, "grad_norm": 0.5466933677368346, "learning_rate": 1.1577965428142257e-05, "loss": 11.7897, "step": 31217 }, { "epoch": 1.6999448653284597, "grad_norm": 0.5572512944589181, "learning_rate": 1.1573847028143315e-05, "loss": 11.8394, "step": 31218 }, { "epoch": 1.6999993193250427, "grad_norm": 0.5551068691720326, "learning_rate": 1.1569729315760969e-05, "loss": 11.8603, "step": 31219 }, { "epoch": 1.7000537733216259, "grad_norm": 0.5692280362315412, "learning_rate": 1.1565612291027228e-05, "loss": 11.7198, "step": 31220 }, { "epoch": 1.7001082273182089, "grad_norm": 0.5044517711240781, "learning_rate": 1.1561495953974122e-05, "loss": 11.7405, "step": 31221 }, { "epoch": 1.7001626813147919, "grad_norm": 0.6662775759620126, "learning_rate": 1.1557380304633659e-05, "loss": 11.8044, "step": 31222 }, { "epoch": 1.7002171353113749, "grad_norm": 0.5468430216285007, "learning_rate": 1.1553265343037833e-05, "loss": 11.6875, "step": 31223 }, { "epoch": 1.7002715893079579, "grad_norm": 0.510436662117331, "learning_rate": 1.1549151069218655e-05, "loss": 11.7209, "step": 31224 }, { "epoch": 1.7003260433045408, "grad_norm": 0.571369443119801, "learning_rate": 1.1545037483208098e-05, "loss": 11.8318, "step": 31225 }, { "epoch": 1.7003804973011238, "grad_norm": 0.5187043516404863, "learning_rate": 1.1540924585038193e-05, "loss": 11.7688, "step": 31226 }, { "epoch": 1.7004349512977068, "grad_norm": 0.5264191141204417, "learning_rate": 1.153681237474088e-05, "loss": 11.8329, "step": 31227 }, { "epoch": 1.7004894052942898, "grad_norm": 0.5478201588307893, "learning_rate": 1.1532700852348132e-05, "loss": 11.8485, "step": 31228 }, { "epoch": 1.7005438592908728, "grad_norm": 0.5377739250236913, "learning_rate": 1.1528590017891961e-05, "loss": 11.6057, "step": 31229 }, { "epoch": 1.7005983132874558, "grad_norm": 0.5774197328841263, "learning_rate": 1.1524479871404293e-05, "loss": 11.7849, "step": 31230 }, { "epoch": 1.7006527672840388, "grad_norm": 0.5434830326628208, "learning_rate": 1.1520370412917103e-05, "loss": 11.7581, "step": 31231 }, { "epoch": 1.7007072212806218, "grad_norm": 0.5541688132906321, "learning_rate": 1.1516261642462366e-05, "loss": 11.781, "step": 31232 }, { "epoch": 1.7007616752772048, "grad_norm": 0.6036717618407496, "learning_rate": 1.1512153560072003e-05, "loss": 11.846, "step": 31233 }, { "epoch": 1.7008161292737878, "grad_norm": 0.5431214809691001, "learning_rate": 1.1508046165777997e-05, "loss": 11.79, "step": 31234 }, { "epoch": 1.7008705832703708, "grad_norm": 0.5744119753557512, "learning_rate": 1.1503939459612234e-05, "loss": 11.72, "step": 31235 }, { "epoch": 1.7009250372669538, "grad_norm": 0.6004856798370535, "learning_rate": 1.149983344160671e-05, "loss": 11.8934, "step": 31236 }, { "epoch": 1.7009794912635368, "grad_norm": 0.6745525890407313, "learning_rate": 1.1495728111793313e-05, "loss": 11.9227, "step": 31237 }, { "epoch": 1.70103394526012, "grad_norm": 0.5476790313785479, "learning_rate": 1.149162347020396e-05, "loss": 11.7708, "step": 31238 }, { "epoch": 1.701088399256703, "grad_norm": 0.5726140051205252, "learning_rate": 1.1487519516870604e-05, "loss": 11.7045, "step": 31239 }, { "epoch": 1.701142853253286, "grad_norm": 0.5206056900957082, "learning_rate": 1.148341625182512e-05, "loss": 11.7224, "step": 31240 }, { "epoch": 1.701197307249869, "grad_norm": 0.5515522092270686, "learning_rate": 1.1479313675099456e-05, "loss": 11.6924, "step": 31241 }, { "epoch": 1.701251761246452, "grad_norm": 0.5844488582553043, "learning_rate": 1.1475211786725482e-05, "loss": 11.8821, "step": 31242 }, { "epoch": 1.7013062152430352, "grad_norm": 0.5717821331759587, "learning_rate": 1.1471110586735101e-05, "loss": 11.7941, "step": 31243 }, { "epoch": 1.7013606692396182, "grad_norm": 0.5688372934165855, "learning_rate": 1.1467010075160223e-05, "loss": 11.965, "step": 31244 }, { "epoch": 1.7014151232362011, "grad_norm": 0.5840128309900645, "learning_rate": 1.1462910252032711e-05, "loss": 11.7605, "step": 31245 }, { "epoch": 1.7014695772327841, "grad_norm": 0.5091680797575212, "learning_rate": 1.1458811117384472e-05, "loss": 11.8584, "step": 31246 }, { "epoch": 1.7015240312293671, "grad_norm": 0.4746440996748854, "learning_rate": 1.145471267124737e-05, "loss": 11.6595, "step": 31247 }, { "epoch": 1.7015784852259501, "grad_norm": 0.4875825178627739, "learning_rate": 1.1450614913653246e-05, "loss": 11.6376, "step": 31248 }, { "epoch": 1.7016329392225331, "grad_norm": 0.5855410815440408, "learning_rate": 1.1446517844634009e-05, "loss": 11.8022, "step": 31249 }, { "epoch": 1.701687393219116, "grad_norm": 0.6982611521644404, "learning_rate": 1.1442421464221487e-05, "loss": 11.8648, "step": 31250 }, { "epoch": 1.701741847215699, "grad_norm": 0.578953360743314, "learning_rate": 1.1438325772447566e-05, "loss": 11.7539, "step": 31251 }, { "epoch": 1.701796301212282, "grad_norm": 0.5357224343657475, "learning_rate": 1.1434230769344046e-05, "loss": 11.9149, "step": 31252 }, { "epoch": 1.701850755208865, "grad_norm": 0.5423134189518319, "learning_rate": 1.1430136454942808e-05, "loss": 11.6547, "step": 31253 }, { "epoch": 1.701905209205448, "grad_norm": 0.5542201498977888, "learning_rate": 1.1426042829275706e-05, "loss": 11.7096, "step": 31254 }, { "epoch": 1.701959663202031, "grad_norm": 0.5109266878922101, "learning_rate": 1.1421949892374516e-05, "loss": 11.7829, "step": 31255 }, { "epoch": 1.702014117198614, "grad_norm": 0.5398111518041445, "learning_rate": 1.1417857644271158e-05, "loss": 11.7906, "step": 31256 }, { "epoch": 1.702068571195197, "grad_norm": 0.5654600725757558, "learning_rate": 1.1413766084997335e-05, "loss": 11.88, "step": 31257 }, { "epoch": 1.70212302519178, "grad_norm": 0.6032763644933257, "learning_rate": 1.1409675214584925e-05, "loss": 11.8079, "step": 31258 }, { "epoch": 1.702177479188363, "grad_norm": 0.5796129509260738, "learning_rate": 1.140558503306577e-05, "loss": 11.7964, "step": 31259 }, { "epoch": 1.702231933184946, "grad_norm": 0.5547492772467061, "learning_rate": 1.1401495540471607e-05, "loss": 11.7957, "step": 31260 }, { "epoch": 1.7022863871815292, "grad_norm": 0.5234253960011268, "learning_rate": 1.1397406736834305e-05, "loss": 11.7468, "step": 31261 }, { "epoch": 1.7023408411781122, "grad_norm": 0.5560471414455337, "learning_rate": 1.13933186221856e-05, "loss": 11.8825, "step": 31262 }, { "epoch": 1.7023952951746952, "grad_norm": 0.556439278089486, "learning_rate": 1.1389231196557337e-05, "loss": 11.8036, "step": 31263 }, { "epoch": 1.7024497491712782, "grad_norm": 0.5913091902537242, "learning_rate": 1.1385144459981245e-05, "loss": 11.8411, "step": 31264 }, { "epoch": 1.7025042031678612, "grad_norm": 0.5001200998457481, "learning_rate": 1.1381058412489132e-05, "loss": 11.801, "step": 31265 }, { "epoch": 1.7025586571644442, "grad_norm": 0.5743565103017917, "learning_rate": 1.1376973054112816e-05, "loss": 11.8137, "step": 31266 }, { "epoch": 1.7026131111610274, "grad_norm": 0.5366117216556852, "learning_rate": 1.1372888384883973e-05, "loss": 11.7674, "step": 31267 }, { "epoch": 1.7026675651576104, "grad_norm": 0.5544635367823163, "learning_rate": 1.1368804404834431e-05, "loss": 11.7378, "step": 31268 }, { "epoch": 1.7027220191541934, "grad_norm": 0.5414820553650712, "learning_rate": 1.1364721113995914e-05, "loss": 11.7492, "step": 31269 }, { "epoch": 1.7027764731507764, "grad_norm": 0.5411382474896068, "learning_rate": 1.1360638512400179e-05, "loss": 11.7101, "step": 31270 }, { "epoch": 1.7028309271473594, "grad_norm": 0.5423226520013353, "learning_rate": 1.1356556600079016e-05, "loss": 11.8066, "step": 31271 }, { "epoch": 1.7028853811439424, "grad_norm": 0.5846380472738664, "learning_rate": 1.135247537706411e-05, "loss": 11.8038, "step": 31272 }, { "epoch": 1.7029398351405254, "grad_norm": 0.5268004901861915, "learning_rate": 1.1348394843387244e-05, "loss": 11.8386, "step": 31273 }, { "epoch": 1.7029942891371084, "grad_norm": 0.5801831134073427, "learning_rate": 1.1344314999080107e-05, "loss": 11.8378, "step": 31274 }, { "epoch": 1.7030487431336914, "grad_norm": 0.5959497144288662, "learning_rate": 1.1340235844174463e-05, "loss": 11.8477, "step": 31275 }, { "epoch": 1.7031031971302744, "grad_norm": 0.5313650368151611, "learning_rate": 1.1336157378702018e-05, "loss": 11.729, "step": 31276 }, { "epoch": 1.7031576511268574, "grad_norm": 0.5720033144718367, "learning_rate": 1.1332079602694446e-05, "loss": 11.7933, "step": 31277 }, { "epoch": 1.7032121051234403, "grad_norm": 0.5260727974708785, "learning_rate": 1.1328002516183522e-05, "loss": 11.7659, "step": 31278 }, { "epoch": 1.7032665591200233, "grad_norm": 0.530997427908885, "learning_rate": 1.1323926119200912e-05, "loss": 11.6772, "step": 31279 }, { "epoch": 1.7033210131166063, "grad_norm": 0.4760019346679841, "learning_rate": 1.131985041177831e-05, "loss": 11.6817, "step": 31280 }, { "epoch": 1.7033754671131893, "grad_norm": 0.5251093535938762, "learning_rate": 1.1315775393947447e-05, "loss": 11.7829, "step": 31281 }, { "epoch": 1.7034299211097723, "grad_norm": 0.6078293290441132, "learning_rate": 1.1311701065739965e-05, "loss": 11.7932, "step": 31282 }, { "epoch": 1.7034843751063553, "grad_norm": 0.5385094091574454, "learning_rate": 1.1307627427187595e-05, "loss": 11.7556, "step": 31283 }, { "epoch": 1.7035388291029385, "grad_norm": 0.5637869318010038, "learning_rate": 1.1303554478321954e-05, "loss": 11.8293, "step": 31284 }, { "epoch": 1.7035932830995215, "grad_norm": 0.5672741725345258, "learning_rate": 1.1299482219174784e-05, "loss": 11.8961, "step": 31285 }, { "epoch": 1.7036477370961045, "grad_norm": 0.5191819917234376, "learning_rate": 1.1295410649777704e-05, "loss": 11.7897, "step": 31286 }, { "epoch": 1.7037021910926875, "grad_norm": 0.5211313978753638, "learning_rate": 1.1291339770162368e-05, "loss": 11.8367, "step": 31287 }, { "epoch": 1.7037566450892705, "grad_norm": 0.6807269684400129, "learning_rate": 1.1287269580360471e-05, "loss": 11.9386, "step": 31288 }, { "epoch": 1.7038110990858535, "grad_norm": 0.5270099005856907, "learning_rate": 1.1283200080403632e-05, "loss": 11.7162, "step": 31289 }, { "epoch": 1.7038655530824367, "grad_norm": 0.5911195333290961, "learning_rate": 1.1279131270323518e-05, "loss": 11.8088, "step": 31290 }, { "epoch": 1.7039200070790197, "grad_norm": 0.5900182950918653, "learning_rate": 1.1275063150151744e-05, "loss": 11.8025, "step": 31291 }, { "epoch": 1.7039744610756027, "grad_norm": 0.5600283388219753, "learning_rate": 1.1270995719919952e-05, "loss": 11.9367, "step": 31292 }, { "epoch": 1.7040289150721857, "grad_norm": 0.5049451539233827, "learning_rate": 1.1266928979659796e-05, "loss": 11.7135, "step": 31293 }, { "epoch": 1.7040833690687687, "grad_norm": 0.6294900510590399, "learning_rate": 1.1262862929402862e-05, "loss": 11.8334, "step": 31294 }, { "epoch": 1.7041378230653517, "grad_norm": 0.5269043995110961, "learning_rate": 1.1258797569180812e-05, "loss": 11.6721, "step": 31295 }, { "epoch": 1.7041922770619347, "grad_norm": 0.5255994567283027, "learning_rate": 1.1254732899025222e-05, "loss": 11.8429, "step": 31296 }, { "epoch": 1.7042467310585176, "grad_norm": 0.5035362961618848, "learning_rate": 1.1250668918967711e-05, "loss": 11.742, "step": 31297 }, { "epoch": 1.7043011850551006, "grad_norm": 0.5254803888413105, "learning_rate": 1.1246605629039886e-05, "loss": 11.9251, "step": 31298 }, { "epoch": 1.7043556390516836, "grad_norm": 0.50652490422515, "learning_rate": 1.1242543029273322e-05, "loss": 11.7973, "step": 31299 }, { "epoch": 1.7044100930482666, "grad_norm": 0.5293692893456606, "learning_rate": 1.123848111969965e-05, "loss": 11.8231, "step": 31300 }, { "epoch": 1.7044645470448496, "grad_norm": 0.4859020321205245, "learning_rate": 1.1234419900350413e-05, "loss": 11.6815, "step": 31301 }, { "epoch": 1.7045190010414326, "grad_norm": 0.49308515674546355, "learning_rate": 1.1230359371257215e-05, "loss": 11.7344, "step": 31302 }, { "epoch": 1.7045734550380156, "grad_norm": 0.5485698988111734, "learning_rate": 1.1226299532451657e-05, "loss": 11.7352, "step": 31303 }, { "epoch": 1.7046279090345986, "grad_norm": 0.5742764621579853, "learning_rate": 1.1222240383965243e-05, "loss": 11.7522, "step": 31304 }, { "epoch": 1.7046823630311816, "grad_norm": 0.4777860565935414, "learning_rate": 1.1218181925829607e-05, "loss": 11.7434, "step": 31305 }, { "epoch": 1.7047368170277646, "grad_norm": 0.5040213787829656, "learning_rate": 1.1214124158076278e-05, "loss": 11.8565, "step": 31306 }, { "epoch": 1.7047912710243478, "grad_norm": 0.5030122128680107, "learning_rate": 1.1210067080736786e-05, "loss": 11.6438, "step": 31307 }, { "epoch": 1.7048457250209308, "grad_norm": 0.5981916364110439, "learning_rate": 1.1206010693842727e-05, "loss": 11.9581, "step": 31308 }, { "epoch": 1.7049001790175138, "grad_norm": 0.5167265446436821, "learning_rate": 1.120195499742559e-05, "loss": 11.8135, "step": 31309 }, { "epoch": 1.7049546330140968, "grad_norm": 0.5268983740408758, "learning_rate": 1.1197899991516968e-05, "loss": 11.7719, "step": 31310 }, { "epoch": 1.7050090870106798, "grad_norm": 0.5874469871352266, "learning_rate": 1.119384567614835e-05, "loss": 11.7836, "step": 31311 }, { "epoch": 1.7050635410072628, "grad_norm": 0.5373827277132782, "learning_rate": 1.11897920513513e-05, "loss": 11.7336, "step": 31312 }, { "epoch": 1.705117995003846, "grad_norm": 0.5108124772621323, "learning_rate": 1.1185739117157301e-05, "loss": 11.8133, "step": 31313 }, { "epoch": 1.705172449000429, "grad_norm": 0.5876733779798438, "learning_rate": 1.1181686873597886e-05, "loss": 11.9087, "step": 31314 }, { "epoch": 1.705226902997012, "grad_norm": 0.5517447147814664, "learning_rate": 1.1177635320704594e-05, "loss": 11.9218, "step": 31315 }, { "epoch": 1.705281356993595, "grad_norm": 0.6209837257303116, "learning_rate": 1.1173584458508901e-05, "loss": 11.9754, "step": 31316 }, { "epoch": 1.705335810990178, "grad_norm": 0.5512831780592493, "learning_rate": 1.1169534287042283e-05, "loss": 11.6591, "step": 31317 }, { "epoch": 1.705390264986761, "grad_norm": 0.5985583538413535, "learning_rate": 1.1165484806336302e-05, "loss": 11.7803, "step": 31318 }, { "epoch": 1.705444718983344, "grad_norm": 0.575165712108634, "learning_rate": 1.1161436016422378e-05, "loss": 11.8899, "step": 31319 }, { "epoch": 1.705499172979927, "grad_norm": 0.5819721823764681, "learning_rate": 1.115738791733204e-05, "loss": 11.7869, "step": 31320 }, { "epoch": 1.70555362697651, "grad_norm": 0.5609460168421133, "learning_rate": 1.1153340509096744e-05, "loss": 11.7868, "step": 31321 }, { "epoch": 1.705608080973093, "grad_norm": 0.5771190839784344, "learning_rate": 1.1149293791747984e-05, "loss": 11.841, "step": 31322 }, { "epoch": 1.705662534969676, "grad_norm": 0.5188743409995102, "learning_rate": 1.114524776531719e-05, "loss": 11.8366, "step": 31323 }, { "epoch": 1.705716988966259, "grad_norm": 0.5913309838941305, "learning_rate": 1.1141202429835885e-05, "loss": 11.7614, "step": 31324 }, { "epoch": 1.7057714429628419, "grad_norm": 0.5552822084419874, "learning_rate": 1.1137157785335462e-05, "loss": 11.8259, "step": 31325 }, { "epoch": 1.7058258969594249, "grad_norm": 0.5318717769413936, "learning_rate": 1.1133113831847431e-05, "loss": 11.7007, "step": 31326 }, { "epoch": 1.7058803509560079, "grad_norm": 0.5681071697112415, "learning_rate": 1.1129070569403199e-05, "loss": 11.896, "step": 31327 }, { "epoch": 1.7059348049525909, "grad_norm": 0.5089217670051894, "learning_rate": 1.112502799803421e-05, "loss": 11.7835, "step": 31328 }, { "epoch": 1.7059892589491739, "grad_norm": 0.6497385507303329, "learning_rate": 1.1120986117771892e-05, "loss": 11.7724, "step": 31329 }, { "epoch": 1.7060437129457569, "grad_norm": 0.5022658093666803, "learning_rate": 1.1116944928647722e-05, "loss": 11.7518, "step": 31330 }, { "epoch": 1.70609816694234, "grad_norm": 0.5996853030420862, "learning_rate": 1.1112904430693071e-05, "loss": 11.7625, "step": 31331 }, { "epoch": 1.706152620938923, "grad_norm": 0.5378810116332772, "learning_rate": 1.1108864623939408e-05, "loss": 11.7291, "step": 31332 }, { "epoch": 1.706207074935506, "grad_norm": 0.5509921075397711, "learning_rate": 1.1104825508418094e-05, "loss": 11.8393, "step": 31333 }, { "epoch": 1.706261528932089, "grad_norm": 0.5440243687930099, "learning_rate": 1.110078708416058e-05, "loss": 11.7172, "step": 31334 }, { "epoch": 1.706315982928672, "grad_norm": 0.5716647565322295, "learning_rate": 1.1096749351198243e-05, "loss": 11.7178, "step": 31335 }, { "epoch": 1.706370436925255, "grad_norm": 0.59309097214083, "learning_rate": 1.1092712309562503e-05, "loss": 11.7863, "step": 31336 }, { "epoch": 1.7064248909218382, "grad_norm": 0.5770068869056281, "learning_rate": 1.1088675959284756e-05, "loss": 11.7634, "step": 31337 }, { "epoch": 1.7064793449184212, "grad_norm": 0.5370075067079356, "learning_rate": 1.1084640300396343e-05, "loss": 11.6557, "step": 31338 }, { "epoch": 1.7065337989150042, "grad_norm": 0.5773533412198194, "learning_rate": 1.1080605332928706e-05, "loss": 11.8865, "step": 31339 }, { "epoch": 1.7065882529115872, "grad_norm": 0.6064211803671088, "learning_rate": 1.1076571056913166e-05, "loss": 11.9042, "step": 31340 }, { "epoch": 1.7066427069081702, "grad_norm": 0.5828648957159318, "learning_rate": 1.1072537472381127e-05, "loss": 11.7946, "step": 31341 }, { "epoch": 1.7066971609047532, "grad_norm": 0.5706479761074192, "learning_rate": 1.106850457936397e-05, "loss": 11.8108, "step": 31342 }, { "epoch": 1.7067516149013362, "grad_norm": 0.5552683982108595, "learning_rate": 1.106447237789301e-05, "loss": 11.8471, "step": 31343 }, { "epoch": 1.7068060688979192, "grad_norm": 0.5314013550341861, "learning_rate": 1.1060440867999655e-05, "loss": 11.7732, "step": 31344 }, { "epoch": 1.7068605228945022, "grad_norm": 0.5327937711703757, "learning_rate": 1.1056410049715193e-05, "loss": 11.7761, "step": 31345 }, { "epoch": 1.7069149768910852, "grad_norm": 0.5575654136767733, "learning_rate": 1.1052379923071033e-05, "loss": 11.7641, "step": 31346 }, { "epoch": 1.7069694308876682, "grad_norm": 0.6278233816868286, "learning_rate": 1.1048350488098491e-05, "loss": 11.9154, "step": 31347 }, { "epoch": 1.7070238848842512, "grad_norm": 0.5061958734375708, "learning_rate": 1.1044321744828857e-05, "loss": 11.7967, "step": 31348 }, { "epoch": 1.7070783388808342, "grad_norm": 0.5369718885477847, "learning_rate": 1.1040293693293524e-05, "loss": 11.7805, "step": 31349 }, { "epoch": 1.7071327928774171, "grad_norm": 0.5771905146793501, "learning_rate": 1.103626633352376e-05, "loss": 11.8721, "step": 31350 }, { "epoch": 1.7071872468740001, "grad_norm": 0.5150195865782898, "learning_rate": 1.1032239665550915e-05, "loss": 11.7464, "step": 31351 }, { "epoch": 1.7072417008705831, "grad_norm": 0.5898189938885433, "learning_rate": 1.1028213689406309e-05, "loss": 11.8358, "step": 31352 }, { "epoch": 1.7072961548671661, "grad_norm": 0.6005633282977264, "learning_rate": 1.1024188405121216e-05, "loss": 11.8762, "step": 31353 }, { "epoch": 1.7073506088637493, "grad_norm": 0.7128329843736801, "learning_rate": 1.102016381272698e-05, "loss": 11.8238, "step": 31354 }, { "epoch": 1.7074050628603323, "grad_norm": 0.587798147854613, "learning_rate": 1.1016139912254841e-05, "loss": 11.9425, "step": 31355 }, { "epoch": 1.7074595168569153, "grad_norm": 0.5373226019184096, "learning_rate": 1.1012116703736153e-05, "loss": 11.7459, "step": 31356 }, { "epoch": 1.7075139708534983, "grad_norm": 0.5513145666790776, "learning_rate": 1.1008094187202166e-05, "loss": 11.6954, "step": 31357 }, { "epoch": 1.7075684248500813, "grad_norm": 0.5214057416926633, "learning_rate": 1.1004072362684136e-05, "loss": 11.7938, "step": 31358 }, { "epoch": 1.7076228788466643, "grad_norm": 0.4945392891809306, "learning_rate": 1.1000051230213393e-05, "loss": 11.7933, "step": 31359 }, { "epoch": 1.7076773328432475, "grad_norm": 0.5116429250355039, "learning_rate": 1.0996030789821143e-05, "loss": 11.8087, "step": 31360 }, { "epoch": 1.7077317868398305, "grad_norm": 0.5063420571560326, "learning_rate": 1.0992011041538719e-05, "loss": 11.8815, "step": 31361 }, { "epoch": 1.7077862408364135, "grad_norm": 0.5517296998105604, "learning_rate": 1.0987991985397317e-05, "loss": 11.768, "step": 31362 }, { "epoch": 1.7078406948329965, "grad_norm": 0.541663286380688, "learning_rate": 1.0983973621428212e-05, "loss": 11.7052, "step": 31363 }, { "epoch": 1.7078951488295795, "grad_norm": 0.538583345271869, "learning_rate": 1.097995594966268e-05, "loss": 11.7137, "step": 31364 }, { "epoch": 1.7079496028261625, "grad_norm": 0.5956429163476145, "learning_rate": 1.0975938970131915e-05, "loss": 11.8822, "step": 31365 }, { "epoch": 1.7080040568227455, "grad_norm": 0.528189305513491, "learning_rate": 1.0971922682867209e-05, "loss": 11.9011, "step": 31366 }, { "epoch": 1.7080585108193285, "grad_norm": 0.5457842011110622, "learning_rate": 1.0967907087899754e-05, "loss": 11.9135, "step": 31367 }, { "epoch": 1.7081129648159115, "grad_norm": 0.5762829539871385, "learning_rate": 1.0963892185260771e-05, "loss": 11.8354, "step": 31368 }, { "epoch": 1.7081674188124945, "grad_norm": 0.5604912212589517, "learning_rate": 1.0959877974981503e-05, "loss": 11.7911, "step": 31369 }, { "epoch": 1.7082218728090774, "grad_norm": 0.5599812857831126, "learning_rate": 1.0955864457093145e-05, "loss": 11.807, "step": 31370 }, { "epoch": 1.7082763268056604, "grad_norm": 0.5705282747501176, "learning_rate": 1.0951851631626931e-05, "loss": 11.7675, "step": 31371 }, { "epoch": 1.7083307808022434, "grad_norm": 0.5512783051901707, "learning_rate": 1.0947839498614032e-05, "loss": 11.8431, "step": 31372 }, { "epoch": 1.7083852347988264, "grad_norm": 0.5521971898170139, "learning_rate": 1.0943828058085693e-05, "loss": 11.774, "step": 31373 }, { "epoch": 1.7084396887954094, "grad_norm": 0.49511653281167395, "learning_rate": 1.0939817310073064e-05, "loss": 11.7015, "step": 31374 }, { "epoch": 1.7084941427919924, "grad_norm": 0.5341521834294026, "learning_rate": 1.0935807254607344e-05, "loss": 11.737, "step": 31375 }, { "epoch": 1.7085485967885754, "grad_norm": 0.49326756019797263, "learning_rate": 1.0931797891719742e-05, "loss": 11.6405, "step": 31376 }, { "epoch": 1.7086030507851586, "grad_norm": 0.5888327481424543, "learning_rate": 1.092778922144142e-05, "loss": 11.7765, "step": 31377 }, { "epoch": 1.7086575047817416, "grad_norm": 0.5394675366004322, "learning_rate": 1.0923781243803533e-05, "loss": 11.7522, "step": 31378 }, { "epoch": 1.7087119587783246, "grad_norm": 0.6782495984309581, "learning_rate": 1.0919773958837266e-05, "loss": 11.8773, "step": 31379 }, { "epoch": 1.7087664127749076, "grad_norm": 0.5195367539642743, "learning_rate": 1.091576736657377e-05, "loss": 11.8558, "step": 31380 }, { "epoch": 1.7088208667714906, "grad_norm": 0.5677555429094424, "learning_rate": 1.0911761467044212e-05, "loss": 11.7823, "step": 31381 }, { "epoch": 1.7088753207680736, "grad_norm": 0.5161460961865567, "learning_rate": 1.0907756260279734e-05, "loss": 11.8444, "step": 31382 }, { "epoch": 1.7089297747646568, "grad_norm": 0.5625333123438224, "learning_rate": 1.0903751746311485e-05, "loss": 11.7917, "step": 31383 }, { "epoch": 1.7089842287612398, "grad_norm": 0.5187215304958623, "learning_rate": 1.08997479251706e-05, "loss": 11.8216, "step": 31384 }, { "epoch": 1.7090386827578228, "grad_norm": 0.5733631636985802, "learning_rate": 1.0895744796888207e-05, "loss": 11.7785, "step": 31385 }, { "epoch": 1.7090931367544058, "grad_norm": 0.5444131973315395, "learning_rate": 1.0891742361495472e-05, "loss": 11.7844, "step": 31386 }, { "epoch": 1.7091475907509888, "grad_norm": 0.5296870045464985, "learning_rate": 1.0887740619023489e-05, "loss": 11.8324, "step": 31387 }, { "epoch": 1.7092020447475718, "grad_norm": 0.5362580175654252, "learning_rate": 1.0883739569503382e-05, "loss": 11.8728, "step": 31388 }, { "epoch": 1.7092564987441548, "grad_norm": 0.5050825661817893, "learning_rate": 1.0879739212966233e-05, "loss": 11.6665, "step": 31389 }, { "epoch": 1.7093109527407377, "grad_norm": 0.5089122476644603, "learning_rate": 1.0875739549443186e-05, "loss": 11.6489, "step": 31390 }, { "epoch": 1.7093654067373207, "grad_norm": 0.5137015693380984, "learning_rate": 1.087174057896535e-05, "loss": 11.7657, "step": 31391 }, { "epoch": 1.7094198607339037, "grad_norm": 0.6671127362235856, "learning_rate": 1.0867742301563788e-05, "loss": 11.8941, "step": 31392 }, { "epoch": 1.7094743147304867, "grad_norm": 0.5283914427623861, "learning_rate": 1.086374471726963e-05, "loss": 11.7407, "step": 31393 }, { "epoch": 1.7095287687270697, "grad_norm": 0.5791147656313416, "learning_rate": 1.085974782611392e-05, "loss": 11.9786, "step": 31394 }, { "epoch": 1.7095832227236527, "grad_norm": 0.5413001101306327, "learning_rate": 1.0855751628127775e-05, "loss": 11.7702, "step": 31395 }, { "epoch": 1.7096376767202357, "grad_norm": 0.5410949087423563, "learning_rate": 1.0851756123342239e-05, "loss": 11.8466, "step": 31396 }, { "epoch": 1.7096921307168187, "grad_norm": 0.5234902815370354, "learning_rate": 1.0847761311788418e-05, "loss": 11.8398, "step": 31397 }, { "epoch": 1.7097465847134017, "grad_norm": 0.5276060194192779, "learning_rate": 1.0843767193497356e-05, "loss": 11.8515, "step": 31398 }, { "epoch": 1.7098010387099847, "grad_norm": 0.5028690735956702, "learning_rate": 1.0839773768500095e-05, "loss": 11.8355, "step": 31399 }, { "epoch": 1.7098554927065677, "grad_norm": 0.5399868910470234, "learning_rate": 1.0835781036827697e-05, "loss": 11.8094, "step": 31400 }, { "epoch": 1.7099099467031509, "grad_norm": 0.6186854069007485, "learning_rate": 1.0831788998511238e-05, "loss": 11.8729, "step": 31401 }, { "epoch": 1.7099644006997339, "grad_norm": 0.5169071866840222, "learning_rate": 1.0827797653581728e-05, "loss": 11.7763, "step": 31402 }, { "epoch": 1.7100188546963169, "grad_norm": 0.5749566765483729, "learning_rate": 1.082380700207024e-05, "loss": 11.8186, "step": 31403 }, { "epoch": 1.7100733086928999, "grad_norm": 0.5346734206873114, "learning_rate": 1.0819817044007764e-05, "loss": 11.8243, "step": 31404 }, { "epoch": 1.7101277626894829, "grad_norm": 0.5721397827847031, "learning_rate": 1.0815827779425359e-05, "loss": 11.702, "step": 31405 }, { "epoch": 1.7101822166860658, "grad_norm": 0.5159035606249273, "learning_rate": 1.0811839208354014e-05, "loss": 11.8388, "step": 31406 }, { "epoch": 1.710236670682649, "grad_norm": 0.6457503850999484, "learning_rate": 1.0807851330824792e-05, "loss": 11.7218, "step": 31407 }, { "epoch": 1.710291124679232, "grad_norm": 0.5418568786335738, "learning_rate": 1.0803864146868669e-05, "loss": 11.9103, "step": 31408 }, { "epoch": 1.710345578675815, "grad_norm": 0.5107847895235661, "learning_rate": 1.0799877656516633e-05, "loss": 11.8648, "step": 31409 }, { "epoch": 1.710400032672398, "grad_norm": 0.5459476998412349, "learning_rate": 1.0795891859799734e-05, "loss": 11.758, "step": 31410 }, { "epoch": 1.710454486668981, "grad_norm": 0.6011175290544278, "learning_rate": 1.0791906756748916e-05, "loss": 11.8125, "step": 31411 }, { "epoch": 1.710508940665564, "grad_norm": 0.5648296862396707, "learning_rate": 1.0787922347395195e-05, "loss": 11.8706, "step": 31412 }, { "epoch": 1.710563394662147, "grad_norm": 0.6056109388881363, "learning_rate": 1.0783938631769564e-05, "loss": 11.8285, "step": 31413 }, { "epoch": 1.71061784865873, "grad_norm": 0.5171633534586365, "learning_rate": 1.077995560990297e-05, "loss": 11.8017, "step": 31414 }, { "epoch": 1.710672302655313, "grad_norm": 0.5490682626692884, "learning_rate": 1.0775973281826424e-05, "loss": 11.8877, "step": 31415 }, { "epoch": 1.710726756651896, "grad_norm": 0.5379869833675414, "learning_rate": 1.0771991647570856e-05, "loss": 11.7853, "step": 31416 }, { "epoch": 1.710781210648479, "grad_norm": 0.6122121634699095, "learning_rate": 1.0768010707167264e-05, "loss": 11.8365, "step": 31417 }, { "epoch": 1.710835664645062, "grad_norm": 0.6593521026631303, "learning_rate": 1.0764030460646579e-05, "loss": 11.9166, "step": 31418 }, { "epoch": 1.710890118641645, "grad_norm": 0.5613742894427809, "learning_rate": 1.0760050908039742e-05, "loss": 11.8424, "step": 31419 }, { "epoch": 1.710944572638228, "grad_norm": 0.5385458958570473, "learning_rate": 1.0756072049377742e-05, "loss": 11.6688, "step": 31420 }, { "epoch": 1.710999026634811, "grad_norm": 0.5693692277686349, "learning_rate": 1.075209388469146e-05, "loss": 11.8753, "step": 31421 }, { "epoch": 1.711053480631394, "grad_norm": 0.5054031846875073, "learning_rate": 1.0748116414011888e-05, "loss": 11.7141, "step": 31422 }, { "epoch": 1.711107934627977, "grad_norm": 0.5666660153708234, "learning_rate": 1.074413963736991e-05, "loss": 11.884, "step": 31423 }, { "epoch": 1.7111623886245602, "grad_norm": 0.5416718000326497, "learning_rate": 1.0740163554796478e-05, "loss": 11.8489, "step": 31424 }, { "epoch": 1.7112168426211432, "grad_norm": 0.5800941045494119, "learning_rate": 1.0736188166322513e-05, "loss": 11.8722, "step": 31425 }, { "epoch": 1.7112712966177261, "grad_norm": 0.5321302466771534, "learning_rate": 1.0732213471978902e-05, "loss": 11.7248, "step": 31426 }, { "epoch": 1.7113257506143091, "grad_norm": 0.6105716607849028, "learning_rate": 1.0728239471796586e-05, "loss": 11.9043, "step": 31427 }, { "epoch": 1.7113802046108921, "grad_norm": 0.512140905505257, "learning_rate": 1.072426616580645e-05, "loss": 11.7281, "step": 31428 }, { "epoch": 1.7114346586074751, "grad_norm": 0.5692570433891122, "learning_rate": 1.0720293554039374e-05, "loss": 11.8318, "step": 31429 }, { "epoch": 1.7114891126040583, "grad_norm": 0.5329991484708254, "learning_rate": 1.0716321636526295e-05, "loss": 11.7278, "step": 31430 }, { "epoch": 1.7115435666006413, "grad_norm": 0.5407219564206193, "learning_rate": 1.0712350413298045e-05, "loss": 11.8869, "step": 31431 }, { "epoch": 1.7115980205972243, "grad_norm": 0.5466139944200886, "learning_rate": 1.0708379884385545e-05, "loss": 11.8161, "step": 31432 }, { "epoch": 1.7116524745938073, "grad_norm": 0.5260275128237987, "learning_rate": 1.0704410049819647e-05, "loss": 11.7968, "step": 31433 }, { "epoch": 1.7117069285903903, "grad_norm": 0.5113930171897116, "learning_rate": 1.0700440909631226e-05, "loss": 11.7194, "step": 31434 }, { "epoch": 1.7117613825869733, "grad_norm": 0.5484905628324327, "learning_rate": 1.0696472463851181e-05, "loss": 11.7503, "step": 31435 }, { "epoch": 1.7118158365835563, "grad_norm": 0.6031907273961457, "learning_rate": 1.069250471251031e-05, "loss": 11.7069, "step": 31436 }, { "epoch": 1.7118702905801393, "grad_norm": 0.5901844988234785, "learning_rate": 1.0688537655639552e-05, "loss": 11.76, "step": 31437 }, { "epoch": 1.7119247445767223, "grad_norm": 0.5501451352658523, "learning_rate": 1.0684571293269652e-05, "loss": 11.7001, "step": 31438 }, { "epoch": 1.7119791985733053, "grad_norm": 0.5508893112889037, "learning_rate": 1.0680605625431506e-05, "loss": 11.7408, "step": 31439 }, { "epoch": 1.7120336525698883, "grad_norm": 0.5197146997984214, "learning_rate": 1.0676640652155979e-05, "loss": 11.7606, "step": 31440 }, { "epoch": 1.7120881065664713, "grad_norm": 0.6284286242143241, "learning_rate": 1.0672676373473845e-05, "loss": 11.8299, "step": 31441 }, { "epoch": 1.7121425605630542, "grad_norm": 0.5178726043955564, "learning_rate": 1.066871278941598e-05, "loss": 11.7827, "step": 31442 }, { "epoch": 1.7121970145596372, "grad_norm": 0.5233572969675926, "learning_rate": 1.066474990001316e-05, "loss": 11.7396, "step": 31443 }, { "epoch": 1.7122514685562202, "grad_norm": 0.5556031437831046, "learning_rate": 1.066078770529626e-05, "loss": 11.7558, "step": 31444 }, { "epoch": 1.7123059225528032, "grad_norm": 0.5105555233505923, "learning_rate": 1.0656826205296021e-05, "loss": 11.6405, "step": 31445 }, { "epoch": 1.7123603765493862, "grad_norm": 0.5691749437304826, "learning_rate": 1.0652865400043299e-05, "loss": 11.6957, "step": 31446 }, { "epoch": 1.7124148305459694, "grad_norm": 0.592482491596476, "learning_rate": 1.0648905289568912e-05, "loss": 11.9148, "step": 31447 }, { "epoch": 1.7124692845425524, "grad_norm": 0.5470973254217619, "learning_rate": 1.0644945873903579e-05, "loss": 11.8666, "step": 31448 }, { "epoch": 1.7125237385391354, "grad_norm": 0.5457120239878783, "learning_rate": 1.064098715307813e-05, "loss": 11.8624, "step": 31449 }, { "epoch": 1.7125781925357184, "grad_norm": 0.5779835299810903, "learning_rate": 1.0637029127123377e-05, "loss": 11.8025, "step": 31450 }, { "epoch": 1.7126326465323014, "grad_norm": 0.5365774391121249, "learning_rate": 1.063307179607006e-05, "loss": 11.7081, "step": 31451 }, { "epoch": 1.7126871005288844, "grad_norm": 0.5367586795865453, "learning_rate": 1.0629115159948966e-05, "loss": 11.7081, "step": 31452 }, { "epoch": 1.7127415545254676, "grad_norm": 0.5022251677931849, "learning_rate": 1.0625159218790847e-05, "loss": 11.6737, "step": 31453 }, { "epoch": 1.7127960085220506, "grad_norm": 0.5299995102224879, "learning_rate": 1.0621203972626504e-05, "loss": 11.7842, "step": 31454 }, { "epoch": 1.7128504625186336, "grad_norm": 0.5703988649620033, "learning_rate": 1.0617249421486642e-05, "loss": 11.8321, "step": 31455 }, { "epoch": 1.7129049165152166, "grad_norm": 0.5735867008308759, "learning_rate": 1.0613295565402038e-05, "loss": 11.8049, "step": 31456 }, { "epoch": 1.7129593705117996, "grad_norm": 0.5642239560494303, "learning_rate": 1.0609342404403477e-05, "loss": 11.7006, "step": 31457 }, { "epoch": 1.7130138245083826, "grad_norm": 0.5533283490365261, "learning_rate": 1.0605389938521626e-05, "loss": 11.826, "step": 31458 }, { "epoch": 1.7130682785049656, "grad_norm": 0.5759973187992862, "learning_rate": 1.060143816778727e-05, "loss": 11.9006, "step": 31459 }, { "epoch": 1.7131227325015486, "grad_norm": 0.5610058868292839, "learning_rate": 1.0597487092231096e-05, "loss": 11.8556, "step": 31460 }, { "epoch": 1.7131771864981316, "grad_norm": 0.6075491934975943, "learning_rate": 1.0593536711883866e-05, "loss": 11.8305, "step": 31461 }, { "epoch": 1.7132316404947145, "grad_norm": 0.5660814834145764, "learning_rate": 1.0589587026776304e-05, "loss": 11.6515, "step": 31462 }, { "epoch": 1.7132860944912975, "grad_norm": 0.5660420940437678, "learning_rate": 1.0585638036939083e-05, "loss": 11.661, "step": 31463 }, { "epoch": 1.7133405484878805, "grad_norm": 0.5647383866968578, "learning_rate": 1.0581689742402968e-05, "loss": 11.7554, "step": 31464 }, { "epoch": 1.7133950024844635, "grad_norm": 0.5337713886613089, "learning_rate": 1.05777421431986e-05, "loss": 11.8057, "step": 31465 }, { "epoch": 1.7134494564810465, "grad_norm": 0.6145542922416759, "learning_rate": 1.057379523935672e-05, "loss": 11.6535, "step": 31466 }, { "epoch": 1.7135039104776295, "grad_norm": 0.5585886103670066, "learning_rate": 1.056984903090802e-05, "loss": 11.8565, "step": 31467 }, { "epoch": 1.7135583644742125, "grad_norm": 0.5203912155534972, "learning_rate": 1.0565903517883135e-05, "loss": 11.7955, "step": 31468 }, { "epoch": 1.7136128184707955, "grad_norm": 0.5443404081948386, "learning_rate": 1.0561958700312812e-05, "loss": 11.8629, "step": 31469 }, { "epoch": 1.7136672724673785, "grad_norm": 0.5563624426346335, "learning_rate": 1.0558014578227671e-05, "loss": 11.7182, "step": 31470 }, { "epoch": 1.7137217264639617, "grad_norm": 0.5407574650049557, "learning_rate": 1.055407115165844e-05, "loss": 11.7053, "step": 31471 }, { "epoch": 1.7137761804605447, "grad_norm": 0.6411292787717409, "learning_rate": 1.0550128420635719e-05, "loss": 11.8317, "step": 31472 }, { "epoch": 1.7138306344571277, "grad_norm": 0.5362348810926708, "learning_rate": 1.0546186385190204e-05, "loss": 11.8486, "step": 31473 }, { "epoch": 1.7138850884537107, "grad_norm": 0.5584341649374477, "learning_rate": 1.0542245045352572e-05, "loss": 11.7925, "step": 31474 }, { "epoch": 1.7139395424502937, "grad_norm": 0.5238947057269379, "learning_rate": 1.053830440115341e-05, "loss": 11.8164, "step": 31475 }, { "epoch": 1.7139939964468769, "grad_norm": 0.627934181047507, "learning_rate": 1.0534364452623424e-05, "loss": 11.9633, "step": 31476 }, { "epoch": 1.7140484504434599, "grad_norm": 0.5471972408288592, "learning_rate": 1.0530425199793226e-05, "loss": 11.8643, "step": 31477 }, { "epoch": 1.7141029044400429, "grad_norm": 0.5573526478679477, "learning_rate": 1.0526486642693423e-05, "loss": 11.7174, "step": 31478 }, { "epoch": 1.7141573584366259, "grad_norm": 0.5196324123573932, "learning_rate": 1.0522548781354691e-05, "loss": 11.711, "step": 31479 }, { "epoch": 1.7142118124332089, "grad_norm": 0.5325676929214292, "learning_rate": 1.0518611615807594e-05, "loss": 11.7579, "step": 31480 }, { "epoch": 1.7142662664297919, "grad_norm": 0.574989754468607, "learning_rate": 1.0514675146082808e-05, "loss": 11.7042, "step": 31481 }, { "epoch": 1.7143207204263748, "grad_norm": 0.5475284050653473, "learning_rate": 1.0510739372210888e-05, "loss": 11.7333, "step": 31482 }, { "epoch": 1.7143751744229578, "grad_norm": 0.5267157853026788, "learning_rate": 1.0506804294222473e-05, "loss": 11.8675, "step": 31483 }, { "epoch": 1.7144296284195408, "grad_norm": 0.5417543576027039, "learning_rate": 1.0502869912148172e-05, "loss": 11.777, "step": 31484 }, { "epoch": 1.7144840824161238, "grad_norm": 0.5439329150693312, "learning_rate": 1.049893622601854e-05, "loss": 11.803, "step": 31485 }, { "epoch": 1.7145385364127068, "grad_norm": 0.6262666704501959, "learning_rate": 1.0495003235864209e-05, "loss": 11.8466, "step": 31486 }, { "epoch": 1.7145929904092898, "grad_norm": 0.5329814841722585, "learning_rate": 1.0491070941715752e-05, "loss": 11.748, "step": 31487 }, { "epoch": 1.7146474444058728, "grad_norm": 0.5956165880943686, "learning_rate": 1.0487139343603702e-05, "loss": 11.7892, "step": 31488 }, { "epoch": 1.7147018984024558, "grad_norm": 0.5532086445924472, "learning_rate": 1.04832084415587e-05, "loss": 11.7403, "step": 31489 }, { "epoch": 1.7147563523990388, "grad_norm": 0.5494991855615453, "learning_rate": 1.0479278235611267e-05, "loss": 11.7501, "step": 31490 }, { "epoch": 1.7148108063956218, "grad_norm": 0.5586482148519528, "learning_rate": 1.047534872579199e-05, "loss": 11.7576, "step": 31491 }, { "epoch": 1.7148652603922048, "grad_norm": 0.6106049331093408, "learning_rate": 1.0471419912131396e-05, "loss": 11.7508, "step": 31492 }, { "epoch": 1.7149197143887878, "grad_norm": 0.5640251305216247, "learning_rate": 1.0467491794660066e-05, "loss": 11.7891, "step": 31493 }, { "epoch": 1.714974168385371, "grad_norm": 0.5420770331907155, "learning_rate": 1.046356437340853e-05, "loss": 11.8908, "step": 31494 }, { "epoch": 1.715028622381954, "grad_norm": 0.5660027284710281, "learning_rate": 1.0459637648407328e-05, "loss": 11.8144, "step": 31495 }, { "epoch": 1.715083076378537, "grad_norm": 0.580241096977362, "learning_rate": 1.0455711619687014e-05, "loss": 11.8834, "step": 31496 }, { "epoch": 1.71513753037512, "grad_norm": 0.5168530731885738, "learning_rate": 1.0451786287278097e-05, "loss": 11.8216, "step": 31497 }, { "epoch": 1.715191984371703, "grad_norm": 0.6908020811749591, "learning_rate": 1.0447861651211099e-05, "loss": 11.9295, "step": 31498 }, { "epoch": 1.715246438368286, "grad_norm": 0.523079280413465, "learning_rate": 1.0443937711516571e-05, "loss": 11.7284, "step": 31499 }, { "epoch": 1.7153008923648692, "grad_norm": 0.6298982791860973, "learning_rate": 1.0440014468224968e-05, "loss": 11.745, "step": 31500 }, { "epoch": 1.7153553463614521, "grad_norm": 0.5342697318723727, "learning_rate": 1.0436091921366863e-05, "loss": 11.8669, "step": 31501 }, { "epoch": 1.7154098003580351, "grad_norm": 0.6044304543034712, "learning_rate": 1.0432170070972702e-05, "loss": 11.8372, "step": 31502 }, { "epoch": 1.7154642543546181, "grad_norm": 0.5340018605607262, "learning_rate": 1.0428248917073025e-05, "loss": 11.7431, "step": 31503 }, { "epoch": 1.7155187083512011, "grad_norm": 0.6007453410235755, "learning_rate": 1.0424328459698285e-05, "loss": 11.8048, "step": 31504 }, { "epoch": 1.7155731623477841, "grad_norm": 0.5575050090038742, "learning_rate": 1.0420408698878992e-05, "loss": 11.8508, "step": 31505 }, { "epoch": 1.7156276163443671, "grad_norm": 0.5591948108676068, "learning_rate": 1.0416489634645644e-05, "loss": 11.813, "step": 31506 }, { "epoch": 1.71568207034095, "grad_norm": 0.5707980019885782, "learning_rate": 1.0412571267028704e-05, "loss": 11.8364, "step": 31507 }, { "epoch": 1.715736524337533, "grad_norm": 0.5400907530965886, "learning_rate": 1.0408653596058615e-05, "loss": 11.7201, "step": 31508 }, { "epoch": 1.715790978334116, "grad_norm": 0.5476077115514427, "learning_rate": 1.0404736621765854e-05, "loss": 11.7321, "step": 31509 }, { "epoch": 1.715845432330699, "grad_norm": 0.5132813267877665, "learning_rate": 1.0400820344180884e-05, "loss": 11.8285, "step": 31510 }, { "epoch": 1.715899886327282, "grad_norm": 0.5453485699056855, "learning_rate": 1.039690476333418e-05, "loss": 11.8487, "step": 31511 }, { "epoch": 1.715954340323865, "grad_norm": 0.5177560533775238, "learning_rate": 1.0392989879256142e-05, "loss": 11.6971, "step": 31512 }, { "epoch": 1.716008794320448, "grad_norm": 0.5625019384813693, "learning_rate": 1.038907569197728e-05, "loss": 11.7111, "step": 31513 }, { "epoch": 1.716063248317031, "grad_norm": 0.5604026690973885, "learning_rate": 1.0385162201527954e-05, "loss": 11.8006, "step": 31514 }, { "epoch": 1.716117702313614, "grad_norm": 0.6216431233268295, "learning_rate": 1.0381249407938664e-05, "loss": 11.7218, "step": 31515 }, { "epoch": 1.716172156310197, "grad_norm": 0.5785475638762526, "learning_rate": 1.0377337311239787e-05, "loss": 11.8406, "step": 31516 }, { "epoch": 1.7162266103067803, "grad_norm": 0.6282864620618059, "learning_rate": 1.0373425911461764e-05, "loss": 11.9044, "step": 31517 }, { "epoch": 1.7162810643033632, "grad_norm": 0.5517508946106106, "learning_rate": 1.0369515208635061e-05, "loss": 11.6555, "step": 31518 }, { "epoch": 1.7163355182999462, "grad_norm": 0.5468075888658445, "learning_rate": 1.0365605202789986e-05, "loss": 11.6919, "step": 31519 }, { "epoch": 1.7163899722965292, "grad_norm": 0.5434999254704422, "learning_rate": 1.0361695893957001e-05, "loss": 11.801, "step": 31520 }, { "epoch": 1.7164444262931122, "grad_norm": 0.49435438127943954, "learning_rate": 1.035778728216652e-05, "loss": 11.8131, "step": 31521 }, { "epoch": 1.7164988802896952, "grad_norm": 0.5170349555973794, "learning_rate": 1.0353879367448905e-05, "loss": 11.6788, "step": 31522 }, { "epoch": 1.7165533342862784, "grad_norm": 0.5494413014674582, "learning_rate": 1.0349972149834574e-05, "loss": 11.8655, "step": 31523 }, { "epoch": 1.7166077882828614, "grad_norm": 0.532723027958204, "learning_rate": 1.0346065629353874e-05, "loss": 11.7205, "step": 31524 }, { "epoch": 1.7166622422794444, "grad_norm": 0.5396919734135337, "learning_rate": 1.034215980603721e-05, "loss": 11.6964, "step": 31525 }, { "epoch": 1.7167166962760274, "grad_norm": 0.5429105082128733, "learning_rate": 1.0338254679914939e-05, "loss": 11.8505, "step": 31526 }, { "epoch": 1.7167711502726104, "grad_norm": 0.5963999813032983, "learning_rate": 1.0334350251017455e-05, "loss": 11.7538, "step": 31527 }, { "epoch": 1.7168256042691934, "grad_norm": 0.6194946283530616, "learning_rate": 1.0330446519375104e-05, "loss": 11.8054, "step": 31528 }, { "epoch": 1.7168800582657764, "grad_norm": 0.4968158802263791, "learning_rate": 1.0326543485018214e-05, "loss": 11.7892, "step": 31529 }, { "epoch": 1.7169345122623594, "grad_norm": 0.5616479182415575, "learning_rate": 1.0322641147977185e-05, "loss": 11.7664, "step": 31530 }, { "epoch": 1.7169889662589424, "grad_norm": 0.5527259225655293, "learning_rate": 1.0318739508282305e-05, "loss": 11.869, "step": 31531 }, { "epoch": 1.7170434202555254, "grad_norm": 0.5166681923881729, "learning_rate": 1.0314838565963958e-05, "loss": 11.9058, "step": 31532 }, { "epoch": 1.7170978742521084, "grad_norm": 0.5237575025103077, "learning_rate": 1.0310938321052477e-05, "loss": 11.8324, "step": 31533 }, { "epoch": 1.7171523282486914, "grad_norm": 0.5178726626154517, "learning_rate": 1.030703877357817e-05, "loss": 11.8695, "step": 31534 }, { "epoch": 1.7172067822452743, "grad_norm": 0.5688868199917441, "learning_rate": 1.0303139923571393e-05, "loss": 11.833, "step": 31535 }, { "epoch": 1.7172612362418573, "grad_norm": 0.5133095572706747, "learning_rate": 1.0299241771062406e-05, "loss": 11.7907, "step": 31536 }, { "epoch": 1.7173156902384403, "grad_norm": 0.5517265001372719, "learning_rate": 1.029534431608159e-05, "loss": 11.8537, "step": 31537 }, { "epoch": 1.7173701442350233, "grad_norm": 0.6458168601278518, "learning_rate": 1.0291447558659218e-05, "loss": 11.8367, "step": 31538 }, { "epoch": 1.7174245982316063, "grad_norm": 0.6796053357856466, "learning_rate": 1.0287551498825575e-05, "loss": 11.7266, "step": 31539 }, { "epoch": 1.7174790522281893, "grad_norm": 0.5672932565031154, "learning_rate": 1.0283656136610997e-05, "loss": 11.7974, "step": 31540 }, { "epoch": 1.7175335062247725, "grad_norm": 0.5717451313671872, "learning_rate": 1.0279761472045735e-05, "loss": 11.744, "step": 31541 }, { "epoch": 1.7175879602213555, "grad_norm": 0.5334852185932999, "learning_rate": 1.0275867505160108e-05, "loss": 11.8491, "step": 31542 }, { "epoch": 1.7176424142179385, "grad_norm": 0.590529124136015, "learning_rate": 1.0271974235984372e-05, "loss": 11.8387, "step": 31543 }, { "epoch": 1.7176968682145215, "grad_norm": 0.5984197892987017, "learning_rate": 1.0268081664548802e-05, "loss": 11.8591, "step": 31544 }, { "epoch": 1.7177513222111045, "grad_norm": 0.5299065613184486, "learning_rate": 1.0264189790883693e-05, "loss": 11.5362, "step": 31545 }, { "epoch": 1.7178057762076877, "grad_norm": 0.5150687628781443, "learning_rate": 1.0260298615019281e-05, "loss": 11.6615, "step": 31546 }, { "epoch": 1.7178602302042707, "grad_norm": 0.575100554428203, "learning_rate": 1.0256408136985862e-05, "loss": 11.8952, "step": 31547 }, { "epoch": 1.7179146842008537, "grad_norm": 0.5113283756035647, "learning_rate": 1.0252518356813657e-05, "loss": 11.7354, "step": 31548 }, { "epoch": 1.7179691381974367, "grad_norm": 0.5525406438555145, "learning_rate": 1.0248629274532895e-05, "loss": 11.8523, "step": 31549 }, { "epoch": 1.7180235921940197, "grad_norm": 0.5285956182745611, "learning_rate": 1.0244740890173865e-05, "loss": 11.6948, "step": 31550 }, { "epoch": 1.7180780461906027, "grad_norm": 0.5017862131373222, "learning_rate": 1.0240853203766764e-05, "loss": 11.7097, "step": 31551 }, { "epoch": 1.7181325001871857, "grad_norm": 0.6261660800804799, "learning_rate": 1.023696621534187e-05, "loss": 11.8221, "step": 31552 }, { "epoch": 1.7181869541837687, "grad_norm": 0.5545612695863086, "learning_rate": 1.0233079924929346e-05, "loss": 11.922, "step": 31553 }, { "epoch": 1.7182414081803516, "grad_norm": 0.5104973443489229, "learning_rate": 1.0229194332559456e-05, "loss": 11.9069, "step": 31554 }, { "epoch": 1.7182958621769346, "grad_norm": 0.5420941901474664, "learning_rate": 1.0225309438262421e-05, "loss": 11.7832, "step": 31555 }, { "epoch": 1.7183503161735176, "grad_norm": 0.553564900845983, "learning_rate": 1.0221425242068417e-05, "loss": 11.9343, "step": 31556 }, { "epoch": 1.7184047701701006, "grad_norm": 0.5617533403323572, "learning_rate": 1.0217541744007687e-05, "loss": 11.7281, "step": 31557 }, { "epoch": 1.7184592241666836, "grad_norm": 0.5345195734226562, "learning_rate": 1.0213658944110404e-05, "loss": 11.7387, "step": 31558 }, { "epoch": 1.7185136781632666, "grad_norm": 0.5405528268589259, "learning_rate": 1.020977684240675e-05, "loss": 11.7239, "step": 31559 }, { "epoch": 1.7185681321598496, "grad_norm": 0.5258780473731511, "learning_rate": 1.0205895438926949e-05, "loss": 11.6759, "step": 31560 }, { "epoch": 1.7186225861564326, "grad_norm": 0.5641976631201479, "learning_rate": 1.0202014733701138e-05, "loss": 11.7516, "step": 31561 }, { "epoch": 1.7186770401530156, "grad_norm": 0.5717027721752065, "learning_rate": 1.0198134726759545e-05, "loss": 11.7368, "step": 31562 }, { "epoch": 1.7187314941495986, "grad_norm": 0.6088941775585593, "learning_rate": 1.0194255418132292e-05, "loss": 11.9148, "step": 31563 }, { "epoch": 1.7187859481461818, "grad_norm": 0.5165406297448353, "learning_rate": 1.01903768078496e-05, "loss": 11.7316, "step": 31564 }, { "epoch": 1.7188404021427648, "grad_norm": 0.5166590946357978, "learning_rate": 1.0186498895941566e-05, "loss": 11.7439, "step": 31565 }, { "epoch": 1.7188948561393478, "grad_norm": 0.5265213241462661, "learning_rate": 1.0182621682438386e-05, "loss": 11.8408, "step": 31566 }, { "epoch": 1.7189493101359308, "grad_norm": 0.5631334509084922, "learning_rate": 1.0178745167370218e-05, "loss": 11.8852, "step": 31567 }, { "epoch": 1.7190037641325138, "grad_norm": 0.579931971715743, "learning_rate": 1.017486935076719e-05, "loss": 11.8404, "step": 31568 }, { "epoch": 1.7190582181290968, "grad_norm": 0.6461344153584181, "learning_rate": 1.0170994232659425e-05, "loss": 11.8433, "step": 31569 }, { "epoch": 1.71911267212568, "grad_norm": 0.5633979225841509, "learning_rate": 1.0167119813077097e-05, "loss": 11.7002, "step": 31570 }, { "epoch": 1.719167126122263, "grad_norm": 0.5530773534330833, "learning_rate": 1.0163246092050283e-05, "loss": 11.7217, "step": 31571 }, { "epoch": 1.719221580118846, "grad_norm": 0.5637321739375956, "learning_rate": 1.0159373069609157e-05, "loss": 11.7768, "step": 31572 }, { "epoch": 1.719276034115429, "grad_norm": 0.5270648099352024, "learning_rate": 1.0155500745783797e-05, "loss": 11.725, "step": 31573 }, { "epoch": 1.719330488112012, "grad_norm": 0.5248390565762538, "learning_rate": 1.0151629120604345e-05, "loss": 11.8033, "step": 31574 }, { "epoch": 1.719384942108595, "grad_norm": 0.53525601514702, "learning_rate": 1.0147758194100864e-05, "loss": 11.7964, "step": 31575 }, { "epoch": 1.719439396105178, "grad_norm": 0.564528381781861, "learning_rate": 1.0143887966303512e-05, "loss": 11.7376, "step": 31576 }, { "epoch": 1.719493850101761, "grad_norm": 0.5347051331277457, "learning_rate": 1.0140018437242338e-05, "loss": 11.7006, "step": 31577 }, { "epoch": 1.719548304098344, "grad_norm": 0.5297048389539417, "learning_rate": 1.0136149606947466e-05, "loss": 11.7363, "step": 31578 }, { "epoch": 1.719602758094927, "grad_norm": 0.5562468619462452, "learning_rate": 1.0132281475448967e-05, "loss": 11.7608, "step": 31579 }, { "epoch": 1.71965721209151, "grad_norm": 0.5929683492025817, "learning_rate": 1.0128414042776901e-05, "loss": 11.92, "step": 31580 }, { "epoch": 1.719711666088093, "grad_norm": 0.5721271283055273, "learning_rate": 1.0124547308961352e-05, "loss": 11.6951, "step": 31581 }, { "epoch": 1.7197661200846759, "grad_norm": 0.5579167792201388, "learning_rate": 1.0120681274032417e-05, "loss": 11.7907, "step": 31582 }, { "epoch": 1.7198205740812589, "grad_norm": 0.497027615107683, "learning_rate": 1.0116815938020119e-05, "loss": 11.7754, "step": 31583 }, { "epoch": 1.7198750280778419, "grad_norm": 0.5914398622635827, "learning_rate": 1.0112951300954554e-05, "loss": 11.9825, "step": 31584 }, { "epoch": 1.7199294820744249, "grad_norm": 0.4773284932550976, "learning_rate": 1.0109087362865732e-05, "loss": 11.7669, "step": 31585 }, { "epoch": 1.7199839360710079, "grad_norm": 0.5543281504887684, "learning_rate": 1.0105224123783742e-05, "loss": 11.7636, "step": 31586 }, { "epoch": 1.720038390067591, "grad_norm": 0.5437165446321306, "learning_rate": 1.010136158373859e-05, "loss": 11.7309, "step": 31587 }, { "epoch": 1.720092844064174, "grad_norm": 0.5741886097642697, "learning_rate": 1.0097499742760342e-05, "loss": 11.8462, "step": 31588 }, { "epoch": 1.720147298060757, "grad_norm": 0.5216243349159138, "learning_rate": 1.0093638600879008e-05, "loss": 11.6872, "step": 31589 }, { "epoch": 1.72020175205734, "grad_norm": 0.5493658476374641, "learning_rate": 1.0089778158124596e-05, "loss": 11.7145, "step": 31590 }, { "epoch": 1.720256206053923, "grad_norm": 0.4942337410589276, "learning_rate": 1.0085918414527174e-05, "loss": 11.5876, "step": 31591 }, { "epoch": 1.720310660050506, "grad_norm": 0.49565025243563043, "learning_rate": 1.0082059370116714e-05, "loss": 11.7896, "step": 31592 }, { "epoch": 1.7203651140470892, "grad_norm": 0.5346386801223755, "learning_rate": 1.0078201024923228e-05, "loss": 11.7027, "step": 31593 }, { "epoch": 1.7204195680436722, "grad_norm": 0.5325015247587378, "learning_rate": 1.0074343378976758e-05, "loss": 11.767, "step": 31594 }, { "epoch": 1.7204740220402552, "grad_norm": 0.5011686389998581, "learning_rate": 1.0070486432307257e-05, "loss": 11.7314, "step": 31595 }, { "epoch": 1.7205284760368382, "grad_norm": 0.5931487508652592, "learning_rate": 1.0066630184944748e-05, "loss": 11.7359, "step": 31596 }, { "epoch": 1.7205829300334212, "grad_norm": 0.5578300443420816, "learning_rate": 1.0062774636919181e-05, "loss": 11.7218, "step": 31597 }, { "epoch": 1.7206373840300042, "grad_norm": 0.4853604650187636, "learning_rate": 1.005891978826059e-05, "loss": 11.648, "step": 31598 }, { "epoch": 1.7206918380265872, "grad_norm": 0.5662665237519126, "learning_rate": 1.0055065638998917e-05, "loss": 11.785, "step": 31599 }, { "epoch": 1.7207462920231702, "grad_norm": 0.583786061851586, "learning_rate": 1.0051212189164117e-05, "loss": 11.8159, "step": 31600 }, { "epoch": 1.7208007460197532, "grad_norm": 0.5268813172833314, "learning_rate": 1.0047359438786197e-05, "loss": 11.766, "step": 31601 }, { "epoch": 1.7208552000163362, "grad_norm": 0.5665939415294465, "learning_rate": 1.004350738789508e-05, "loss": 11.8127, "step": 31602 }, { "epoch": 1.7209096540129192, "grad_norm": 0.5773183052607285, "learning_rate": 1.0039656036520728e-05, "loss": 11.7662, "step": 31603 }, { "epoch": 1.7209641080095022, "grad_norm": 0.5510697257439507, "learning_rate": 1.003580538469312e-05, "loss": 11.8002, "step": 31604 }, { "epoch": 1.7210185620060852, "grad_norm": 0.580086336691622, "learning_rate": 1.0031955432442153e-05, "loss": 11.912, "step": 31605 }, { "epoch": 1.7210730160026682, "grad_norm": 0.5022698051681344, "learning_rate": 1.0028106179797813e-05, "loss": 11.6991, "step": 31606 }, { "epoch": 1.7211274699992511, "grad_norm": 0.5138518962521057, "learning_rate": 1.002425762678999e-05, "loss": 11.7953, "step": 31607 }, { "epoch": 1.7211819239958341, "grad_norm": 0.5711216960217186, "learning_rate": 1.0020409773448637e-05, "loss": 11.7749, "step": 31608 }, { "epoch": 1.7212363779924171, "grad_norm": 0.5005861228314973, "learning_rate": 1.0016562619803682e-05, "loss": 11.5601, "step": 31609 }, { "epoch": 1.7212908319890003, "grad_norm": 0.5337994292374386, "learning_rate": 1.0012716165884994e-05, "loss": 11.8073, "step": 31610 }, { "epoch": 1.7213452859855833, "grad_norm": 0.6007049351191095, "learning_rate": 1.0008870411722537e-05, "loss": 11.7589, "step": 31611 }, { "epoch": 1.7213997399821663, "grad_norm": 0.5666830835561676, "learning_rate": 1.0005025357346187e-05, "loss": 11.8024, "step": 31612 }, { "epoch": 1.7214541939787493, "grad_norm": 0.6016147626228205, "learning_rate": 1.0001181002785864e-05, "loss": 11.9061, "step": 31613 }, { "epoch": 1.7215086479753323, "grad_norm": 0.5700946700385966, "learning_rate": 9.997337348071423e-06, "loss": 11.7835, "step": 31614 }, { "epoch": 1.7215631019719153, "grad_norm": 0.6276406480298716, "learning_rate": 9.993494393232795e-06, "loss": 11.8186, "step": 31615 }, { "epoch": 1.7216175559684985, "grad_norm": 0.5382441499206801, "learning_rate": 9.989652138299854e-06, "loss": 11.6891, "step": 31616 }, { "epoch": 1.7216720099650815, "grad_norm": 0.5526851264588184, "learning_rate": 9.985810583302457e-06, "loss": 11.7511, "step": 31617 }, { "epoch": 1.7217264639616645, "grad_norm": 0.5077383918843621, "learning_rate": 9.981969728270524e-06, "loss": 11.8187, "step": 31618 }, { "epoch": 1.7217809179582475, "grad_norm": 0.559844989293298, "learning_rate": 9.978129573233875e-06, "loss": 11.7314, "step": 31619 }, { "epoch": 1.7218353719548305, "grad_norm": 0.5416457035722602, "learning_rate": 9.974290118222374e-06, "loss": 11.8057, "step": 31620 }, { "epoch": 1.7218898259514135, "grad_norm": 0.49255162000816144, "learning_rate": 9.970451363265909e-06, "loss": 11.821, "step": 31621 }, { "epoch": 1.7219442799479965, "grad_norm": 0.5756449643012322, "learning_rate": 9.9666133083943e-06, "loss": 11.7517, "step": 31622 }, { "epoch": 1.7219987339445795, "grad_norm": 0.554707897770957, "learning_rate": 9.962775953637416e-06, "loss": 11.7847, "step": 31623 }, { "epoch": 1.7220531879411625, "grad_norm": 0.5579892266497304, "learning_rate": 9.95893929902506e-06, "loss": 11.7983, "step": 31624 }, { "epoch": 1.7221076419377455, "grad_norm": 0.5721832657330594, "learning_rate": 9.955103344587125e-06, "loss": 11.78, "step": 31625 }, { "epoch": 1.7221620959343285, "grad_norm": 0.5286547776030073, "learning_rate": 9.951268090353382e-06, "loss": 11.8571, "step": 31626 }, { "epoch": 1.7222165499309114, "grad_norm": 0.6085563065468181, "learning_rate": 9.947433536353679e-06, "loss": 11.9021, "step": 31627 }, { "epoch": 1.7222710039274944, "grad_norm": 0.49485171895229996, "learning_rate": 9.943599682617865e-06, "loss": 11.7653, "step": 31628 }, { "epoch": 1.7223254579240774, "grad_norm": 0.5236511628822097, "learning_rate": 9.939766529175698e-06, "loss": 11.6727, "step": 31629 }, { "epoch": 1.7223799119206604, "grad_norm": 0.5440952775692672, "learning_rate": 9.935934076057008e-06, "loss": 11.8226, "step": 31630 }, { "epoch": 1.7224343659172434, "grad_norm": 0.5611291784323829, "learning_rate": 9.932102323291603e-06, "loss": 11.8492, "step": 31631 }, { "epoch": 1.7224888199138264, "grad_norm": 0.5407081392247196, "learning_rate": 9.928271270909273e-06, "loss": 11.7461, "step": 31632 }, { "epoch": 1.7225432739104094, "grad_norm": 0.5431104522580013, "learning_rate": 9.924440918939814e-06, "loss": 11.8053, "step": 31633 }, { "epoch": 1.7225977279069926, "grad_norm": 0.5648261138440481, "learning_rate": 9.920611267413005e-06, "loss": 11.7403, "step": 31634 }, { "epoch": 1.7226521819035756, "grad_norm": 0.6562552367201177, "learning_rate": 9.91678231635864e-06, "loss": 11.8344, "step": 31635 }, { "epoch": 1.7227066359001586, "grad_norm": 0.6069501439000724, "learning_rate": 9.912954065806468e-06, "loss": 11.7119, "step": 31636 }, { "epoch": 1.7227610898967416, "grad_norm": 0.5926715108090882, "learning_rate": 9.90912651578626e-06, "loss": 11.8739, "step": 31637 }, { "epoch": 1.7228155438933246, "grad_norm": 0.57333652997764, "learning_rate": 9.905299666327838e-06, "loss": 11.9253, "step": 31638 }, { "epoch": 1.7228699978899076, "grad_norm": 0.5179009446000575, "learning_rate": 9.90147351746088e-06, "loss": 11.7079, "step": 31639 }, { "epoch": 1.7229244518864908, "grad_norm": 0.5927678052247316, "learning_rate": 9.897648069215193e-06, "loss": 11.7076, "step": 31640 }, { "epoch": 1.7229789058830738, "grad_norm": 0.5420398795968947, "learning_rate": 9.893823321620488e-06, "loss": 11.8555, "step": 31641 }, { "epoch": 1.7230333598796568, "grad_norm": 0.7032982705210287, "learning_rate": 9.889999274706518e-06, "loss": 11.8216, "step": 31642 }, { "epoch": 1.7230878138762398, "grad_norm": 0.5456092851715043, "learning_rate": 9.886175928503038e-06, "loss": 11.8308, "step": 31643 }, { "epoch": 1.7231422678728228, "grad_norm": 0.5144023790602227, "learning_rate": 9.882353283039758e-06, "loss": 11.8272, "step": 31644 }, { "epoch": 1.7231967218694058, "grad_norm": 0.5219399078420015, "learning_rate": 9.87853133834643e-06, "loss": 11.7801, "step": 31645 }, { "epoch": 1.7232511758659887, "grad_norm": 0.5264655189569761, "learning_rate": 9.874710094452733e-06, "loss": 11.7471, "step": 31646 }, { "epoch": 1.7233056298625717, "grad_norm": 0.5354346228133944, "learning_rate": 9.870889551388419e-06, "loss": 11.7905, "step": 31647 }, { "epoch": 1.7233600838591547, "grad_norm": 0.5272933071145303, "learning_rate": 9.867069709183186e-06, "loss": 11.6881, "step": 31648 }, { "epoch": 1.7234145378557377, "grad_norm": 0.5897080081996334, "learning_rate": 9.863250567866721e-06, "loss": 11.7322, "step": 31649 }, { "epoch": 1.7234689918523207, "grad_norm": 0.5078046384524149, "learning_rate": 9.859432127468748e-06, "loss": 11.8635, "step": 31650 }, { "epoch": 1.7235234458489037, "grad_norm": 0.637272507312113, "learning_rate": 9.85561438801893e-06, "loss": 11.7572, "step": 31651 }, { "epoch": 1.7235778998454867, "grad_norm": 0.560055228726501, "learning_rate": 9.851797349546976e-06, "loss": 11.7605, "step": 31652 }, { "epoch": 1.7236323538420697, "grad_norm": 0.5062532132317071, "learning_rate": 9.847981012082574e-06, "loss": 11.6805, "step": 31653 }, { "epoch": 1.7236868078386527, "grad_norm": 0.5877989342666577, "learning_rate": 9.844165375655379e-06, "loss": 11.7646, "step": 31654 }, { "epoch": 1.7237412618352357, "grad_norm": 0.5330469216308327, "learning_rate": 9.840350440295088e-06, "loss": 11.6925, "step": 31655 }, { "epoch": 1.7237957158318187, "grad_norm": 0.5621134833465058, "learning_rate": 9.836536206031333e-06, "loss": 11.8281, "step": 31656 }, { "epoch": 1.723850169828402, "grad_norm": 0.5405881843850385, "learning_rate": 9.832722672893812e-06, "loss": 11.6365, "step": 31657 }, { "epoch": 1.7239046238249849, "grad_norm": 0.5139143593961488, "learning_rate": 9.82890984091216e-06, "loss": 11.7755, "step": 31658 }, { "epoch": 1.7239590778215679, "grad_norm": 0.5470150575024245, "learning_rate": 9.825097710116016e-06, "loss": 11.8374, "step": 31659 }, { "epoch": 1.7240135318181509, "grad_norm": 0.5799642034690877, "learning_rate": 9.821286280535048e-06, "loss": 11.8594, "step": 31660 }, { "epoch": 1.7240679858147339, "grad_norm": 0.5471452229170947, "learning_rate": 9.817475552198851e-06, "loss": 11.9307, "step": 31661 }, { "epoch": 1.7241224398113169, "grad_norm": 0.5604782843538757, "learning_rate": 9.813665525137117e-06, "loss": 11.8213, "step": 31662 }, { "epoch": 1.7241768938079, "grad_norm": 0.5269929859812372, "learning_rate": 9.80985619937943e-06, "loss": 11.797, "step": 31663 }, { "epoch": 1.724231347804483, "grad_norm": 0.5361352117910505, "learning_rate": 9.806047574955413e-06, "loss": 11.818, "step": 31664 }, { "epoch": 1.724285801801066, "grad_norm": 0.5594370648934125, "learning_rate": 9.80223965189473e-06, "loss": 11.7579, "step": 31665 }, { "epoch": 1.724340255797649, "grad_norm": 0.5921846916923454, "learning_rate": 9.798432430226923e-06, "loss": 11.9223, "step": 31666 }, { "epoch": 1.724394709794232, "grad_norm": 0.5474470645296436, "learning_rate": 9.794625909981659e-06, "loss": 11.7333, "step": 31667 }, { "epoch": 1.724449163790815, "grad_norm": 0.5149711932014519, "learning_rate": 9.790820091188502e-06, "loss": 11.8104, "step": 31668 }, { "epoch": 1.724503617787398, "grad_norm": 0.5614818687022267, "learning_rate": 9.787014973877062e-06, "loss": 11.8931, "step": 31669 }, { "epoch": 1.724558071783981, "grad_norm": 0.5239461447515954, "learning_rate": 9.783210558076928e-06, "loss": 11.7741, "step": 31670 }, { "epoch": 1.724612525780564, "grad_norm": 0.5675347104846662, "learning_rate": 9.77940684381765e-06, "loss": 11.7387, "step": 31671 }, { "epoch": 1.724666979777147, "grad_norm": 0.5974951287968702, "learning_rate": 9.775603831128865e-06, "loss": 11.8754, "step": 31672 }, { "epoch": 1.72472143377373, "grad_norm": 0.5715581040755615, "learning_rate": 9.77180152004009e-06, "loss": 11.872, "step": 31673 }, { "epoch": 1.724775887770313, "grad_norm": 0.5238339741704982, "learning_rate": 9.767999910580916e-06, "loss": 11.867, "step": 31674 }, { "epoch": 1.724830341766896, "grad_norm": 0.6017754906156638, "learning_rate": 9.764199002780927e-06, "loss": 11.8095, "step": 31675 }, { "epoch": 1.724884795763479, "grad_norm": 0.5378820399597718, "learning_rate": 9.760398796669646e-06, "loss": 11.8627, "step": 31676 }, { "epoch": 1.724939249760062, "grad_norm": 0.5688165944084097, "learning_rate": 9.756599292276646e-06, "loss": 11.9054, "step": 31677 }, { "epoch": 1.724993703756645, "grad_norm": 0.6024619360754615, "learning_rate": 9.752800489631453e-06, "loss": 11.9786, "step": 31678 }, { "epoch": 1.725048157753228, "grad_norm": 0.5228581285698074, "learning_rate": 9.74900238876364e-06, "loss": 11.7096, "step": 31679 }, { "epoch": 1.7251026117498112, "grad_norm": 0.5032368347871917, "learning_rate": 9.745204989702705e-06, "loss": 11.7223, "step": 31680 }, { "epoch": 1.7251570657463942, "grad_norm": 0.528420938205925, "learning_rate": 9.741408292478183e-06, "loss": 11.8796, "step": 31681 }, { "epoch": 1.7252115197429772, "grad_norm": 0.5160502954828745, "learning_rate": 9.737612297119625e-06, "loss": 11.7338, "step": 31682 }, { "epoch": 1.7252659737395601, "grad_norm": 0.5349271245456051, "learning_rate": 9.733817003656509e-06, "loss": 11.7757, "step": 31683 }, { "epoch": 1.7253204277361431, "grad_norm": 0.6173101001891643, "learning_rate": 9.7300224121184e-06, "loss": 11.8106, "step": 31684 }, { "epoch": 1.7253748817327261, "grad_norm": 0.5553358185095433, "learning_rate": 9.726228522534742e-06, "loss": 11.8653, "step": 31685 }, { "epoch": 1.7254293357293093, "grad_norm": 0.5359553888537437, "learning_rate": 9.722435334935077e-06, "loss": 11.7207, "step": 31686 }, { "epoch": 1.7254837897258923, "grad_norm": 0.5705403304738097, "learning_rate": 9.718642849348902e-06, "loss": 11.9162, "step": 31687 }, { "epoch": 1.7255382437224753, "grad_norm": 0.47644253008702425, "learning_rate": 9.714851065805697e-06, "loss": 11.6373, "step": 31688 }, { "epoch": 1.7255926977190583, "grad_norm": 0.5633304336926984, "learning_rate": 9.711059984334981e-06, "loss": 11.7309, "step": 31689 }, { "epoch": 1.7256471517156413, "grad_norm": 0.5367003899122796, "learning_rate": 9.707269604966162e-06, "loss": 11.7679, "step": 31690 }, { "epoch": 1.7257016057122243, "grad_norm": 0.5448976654034257, "learning_rate": 9.703479927728765e-06, "loss": 11.7898, "step": 31691 }, { "epoch": 1.7257560597088073, "grad_norm": 0.5791685649235563, "learning_rate": 9.699690952652275e-06, "loss": 11.8029, "step": 31692 }, { "epoch": 1.7258105137053903, "grad_norm": 0.5271671684126381, "learning_rate": 9.6959026797661e-06, "loss": 11.7772, "step": 31693 }, { "epoch": 1.7258649677019733, "grad_norm": 0.5772368975839575, "learning_rate": 9.692115109099754e-06, "loss": 11.8192, "step": 31694 }, { "epoch": 1.7259194216985563, "grad_norm": 0.5436844798966903, "learning_rate": 9.688328240682643e-06, "loss": 11.8391, "step": 31695 }, { "epoch": 1.7259738756951393, "grad_norm": 0.5332655215303967, "learning_rate": 9.684542074544256e-06, "loss": 11.8615, "step": 31696 }, { "epoch": 1.7260283296917223, "grad_norm": 0.5085579615833358, "learning_rate": 9.680756610714003e-06, "loss": 11.8357, "step": 31697 }, { "epoch": 1.7260827836883053, "grad_norm": 0.530327560360123, "learning_rate": 9.676971849221328e-06, "loss": 11.8331, "step": 31698 }, { "epoch": 1.7261372376848882, "grad_norm": 0.5679448804061265, "learning_rate": 9.673187790095706e-06, "loss": 11.8893, "step": 31699 }, { "epoch": 1.7261916916814712, "grad_norm": 0.5371532607973594, "learning_rate": 9.66940443336648e-06, "loss": 11.8694, "step": 31700 }, { "epoch": 1.7262461456780542, "grad_norm": 0.5364368743944987, "learning_rate": 9.665621779063127e-06, "loss": 11.811, "step": 31701 }, { "epoch": 1.7263005996746372, "grad_norm": 0.6120074762318003, "learning_rate": 9.661839827215058e-06, "loss": 11.9309, "step": 31702 }, { "epoch": 1.7263550536712202, "grad_norm": 0.5521704409754, "learning_rate": 9.658058577851658e-06, "loss": 11.7711, "step": 31703 }, { "epoch": 1.7264095076678034, "grad_norm": 0.49477285511138874, "learning_rate": 9.654278031002361e-06, "loss": 11.8737, "step": 31704 }, { "epoch": 1.7264639616643864, "grad_norm": 0.5873796201042932, "learning_rate": 9.650498186696522e-06, "loss": 11.9124, "step": 31705 }, { "epoch": 1.7265184156609694, "grad_norm": 0.5743880791251625, "learning_rate": 9.646719044963593e-06, "loss": 11.7838, "step": 31706 }, { "epoch": 1.7265728696575524, "grad_norm": 0.5289910373412918, "learning_rate": 9.642940605832906e-06, "loss": 11.7521, "step": 31707 }, { "epoch": 1.7266273236541354, "grad_norm": 0.5643291751214005, "learning_rate": 9.639162869333861e-06, "loss": 11.7548, "step": 31708 }, { "epoch": 1.7266817776507186, "grad_norm": 0.5648407876761691, "learning_rate": 9.63538583549588e-06, "loss": 11.8659, "step": 31709 }, { "epoch": 1.7267362316473016, "grad_norm": 0.6448956523557557, "learning_rate": 9.631609504348249e-06, "loss": 11.8444, "step": 31710 }, { "epoch": 1.7267906856438846, "grad_norm": 0.5195585514260834, "learning_rate": 9.627833875920411e-06, "loss": 11.7816, "step": 31711 }, { "epoch": 1.7268451396404676, "grad_norm": 0.5592209932938418, "learning_rate": 9.624058950241666e-06, "loss": 11.9012, "step": 31712 }, { "epoch": 1.7268995936370506, "grad_norm": 0.5576575172917734, "learning_rate": 9.62028472734139e-06, "loss": 12.0067, "step": 31713 }, { "epoch": 1.7269540476336336, "grad_norm": 0.577147121452383, "learning_rate": 9.616511207248957e-06, "loss": 11.7964, "step": 31714 }, { "epoch": 1.7270085016302166, "grad_norm": 0.5705233257587317, "learning_rate": 9.61273838999367e-06, "loss": 11.8475, "step": 31715 }, { "epoch": 1.7270629556267996, "grad_norm": 0.6145207127089715, "learning_rate": 9.608966275604913e-06, "loss": 11.8718, "step": 31716 }, { "epoch": 1.7271174096233826, "grad_norm": 0.5457264230962521, "learning_rate": 9.605194864111967e-06, "loss": 11.8942, "step": 31717 }, { "epoch": 1.7271718636199656, "grad_norm": 0.5731032537776786, "learning_rate": 9.601424155544214e-06, "loss": 11.7933, "step": 31718 }, { "epoch": 1.7272263176165485, "grad_norm": 0.5175332225203725, "learning_rate": 9.597654149930934e-06, "loss": 11.7578, "step": 31719 }, { "epoch": 1.7272807716131315, "grad_norm": 0.6721303858954103, "learning_rate": 9.593884847301437e-06, "loss": 12.0066, "step": 31720 }, { "epoch": 1.7273352256097145, "grad_norm": 0.6322488225549545, "learning_rate": 9.590116247685089e-06, "loss": 11.7774, "step": 31721 }, { "epoch": 1.7273896796062975, "grad_norm": 0.5490412921685415, "learning_rate": 9.586348351111118e-06, "loss": 11.6418, "step": 31722 }, { "epoch": 1.7274441336028805, "grad_norm": 0.5069457744941861, "learning_rate": 9.582581157608883e-06, "loss": 11.6878, "step": 31723 }, { "epoch": 1.7274985875994635, "grad_norm": 0.510792647693245, "learning_rate": 9.57881466720767e-06, "loss": 11.7458, "step": 31724 }, { "epoch": 1.7275530415960465, "grad_norm": 0.5589741024172136, "learning_rate": 9.575048879936732e-06, "loss": 11.8988, "step": 31725 }, { "epoch": 1.7276074955926295, "grad_norm": 0.5440348207592137, "learning_rate": 9.571283795825404e-06, "loss": 11.7806, "step": 31726 }, { "epoch": 1.7276619495892127, "grad_norm": 0.5228054075329174, "learning_rate": 9.567519414902926e-06, "loss": 11.7824, "step": 31727 }, { "epoch": 1.7277164035857957, "grad_norm": 0.5812598052893078, "learning_rate": 9.563755737198588e-06, "loss": 11.8072, "step": 31728 }, { "epoch": 1.7277708575823787, "grad_norm": 0.5124134013005092, "learning_rate": 9.559992762741666e-06, "loss": 11.7268, "step": 31729 }, { "epoch": 1.7278253115789617, "grad_norm": 0.5835164472508388, "learning_rate": 9.55623049156138e-06, "loss": 11.7904, "step": 31730 }, { "epoch": 1.7278797655755447, "grad_norm": 0.5085767572132961, "learning_rate": 9.55246892368703e-06, "loss": 11.8523, "step": 31731 }, { "epoch": 1.7279342195721277, "grad_norm": 0.576927234951472, "learning_rate": 9.548708059147827e-06, "loss": 11.7906, "step": 31732 }, { "epoch": 1.7279886735687109, "grad_norm": 0.597292849935191, "learning_rate": 9.544947897973066e-06, "loss": 11.7756, "step": 31733 }, { "epoch": 1.7280431275652939, "grad_norm": 0.5796608200635165, "learning_rate": 9.54118844019194e-06, "loss": 11.8446, "step": 31734 }, { "epoch": 1.7280975815618769, "grad_norm": 0.5473601123137064, "learning_rate": 9.5374296858337e-06, "loss": 11.9058, "step": 31735 }, { "epoch": 1.7281520355584599, "grad_norm": 0.6127335719945055, "learning_rate": 9.533671634927599e-06, "loss": 11.8116, "step": 31736 }, { "epoch": 1.7282064895550429, "grad_norm": 0.532967238794352, "learning_rate": 9.529914287502816e-06, "loss": 11.7317, "step": 31737 }, { "epoch": 1.7282609435516259, "grad_norm": 0.6523433605397009, "learning_rate": 9.526157643588618e-06, "loss": 11.8911, "step": 31738 }, { "epoch": 1.7283153975482088, "grad_norm": 0.5082996924963534, "learning_rate": 9.52240170321418e-06, "loss": 11.7689, "step": 31739 }, { "epoch": 1.7283698515447918, "grad_norm": 0.5489672555722254, "learning_rate": 9.518646466408709e-06, "loss": 11.8655, "step": 31740 }, { "epoch": 1.7284243055413748, "grad_norm": 0.47865030614903176, "learning_rate": 9.51489193320143e-06, "loss": 11.7239, "step": 31741 }, { "epoch": 1.7284787595379578, "grad_norm": 0.5008943163819279, "learning_rate": 9.511138103621508e-06, "loss": 11.6818, "step": 31742 }, { "epoch": 1.7285332135345408, "grad_norm": 0.5616135003176408, "learning_rate": 9.507384977698175e-06, "loss": 11.8098, "step": 31743 }, { "epoch": 1.7285876675311238, "grad_norm": 0.6008478960011499, "learning_rate": 9.503632555460574e-06, "loss": 11.8165, "step": 31744 }, { "epoch": 1.7286421215277068, "grad_norm": 0.5864006875525177, "learning_rate": 9.499880836937913e-06, "loss": 11.9344, "step": 31745 }, { "epoch": 1.7286965755242898, "grad_norm": 0.6029267443455991, "learning_rate": 9.496129822159338e-06, "loss": 11.7735, "step": 31746 }, { "epoch": 1.7287510295208728, "grad_norm": 0.5100562454781388, "learning_rate": 9.492379511154036e-06, "loss": 11.7855, "step": 31747 }, { "epoch": 1.7288054835174558, "grad_norm": 0.5145890233843773, "learning_rate": 9.488629903951197e-06, "loss": 11.7564, "step": 31748 }, { "epoch": 1.7288599375140388, "grad_norm": 0.5558111178106622, "learning_rate": 9.484881000579937e-06, "loss": 11.8101, "step": 31749 }, { "epoch": 1.728914391510622, "grad_norm": 0.5288825719725364, "learning_rate": 9.481132801069403e-06, "loss": 11.7869, "step": 31750 }, { "epoch": 1.728968845507205, "grad_norm": 0.5379104324708626, "learning_rate": 9.477385305448794e-06, "loss": 11.8337, "step": 31751 }, { "epoch": 1.729023299503788, "grad_norm": 0.6405719500892642, "learning_rate": 9.473638513747184e-06, "loss": 11.9674, "step": 31752 }, { "epoch": 1.729077753500371, "grad_norm": 0.614330814369286, "learning_rate": 9.469892425993764e-06, "loss": 11.7954, "step": 31753 }, { "epoch": 1.729132207496954, "grad_norm": 0.5079721712198427, "learning_rate": 9.466147042217632e-06, "loss": 11.652, "step": 31754 }, { "epoch": 1.729186661493537, "grad_norm": 0.5782492986472985, "learning_rate": 9.46240236244793e-06, "loss": 11.77, "step": 31755 }, { "epoch": 1.7292411154901202, "grad_norm": 0.5521552723003574, "learning_rate": 9.45865838671376e-06, "loss": 11.839, "step": 31756 }, { "epoch": 1.7292955694867032, "grad_norm": 0.5549373008724034, "learning_rate": 9.45491511504425e-06, "loss": 11.8042, "step": 31757 }, { "epoch": 1.7293500234832861, "grad_norm": 0.5215075361656176, "learning_rate": 9.451172547468512e-06, "loss": 11.6191, "step": 31758 }, { "epoch": 1.7294044774798691, "grad_norm": 0.5692800819500581, "learning_rate": 9.447430684015645e-06, "loss": 11.7545, "step": 31759 }, { "epoch": 1.7294589314764521, "grad_norm": 0.5437205367979392, "learning_rate": 9.44368952471475e-06, "loss": 11.8703, "step": 31760 }, { "epoch": 1.7295133854730351, "grad_norm": 0.5414735539849145, "learning_rate": 9.439949069594888e-06, "loss": 11.8, "step": 31761 }, { "epoch": 1.7295678394696181, "grad_norm": 0.5091608588791786, "learning_rate": 9.436209318685163e-06, "loss": 11.7333, "step": 31762 }, { "epoch": 1.729622293466201, "grad_norm": 0.5716410024395188, "learning_rate": 9.432470272014681e-06, "loss": 11.8264, "step": 31763 }, { "epoch": 1.729676747462784, "grad_norm": 0.5155399898054873, "learning_rate": 9.428731929612488e-06, "loss": 11.8019, "step": 31764 }, { "epoch": 1.729731201459367, "grad_norm": 0.5338373149824597, "learning_rate": 9.424994291507682e-06, "loss": 11.8044, "step": 31765 }, { "epoch": 1.72978565545595, "grad_norm": 0.5280839117958512, "learning_rate": 9.421257357729284e-06, "loss": 11.7311, "step": 31766 }, { "epoch": 1.729840109452533, "grad_norm": 0.5590267339098637, "learning_rate": 9.417521128306406e-06, "loss": 11.9218, "step": 31767 }, { "epoch": 1.729894563449116, "grad_norm": 0.5411376474594606, "learning_rate": 9.413785603268055e-06, "loss": 11.7603, "step": 31768 }, { "epoch": 1.729949017445699, "grad_norm": 0.5774859340004413, "learning_rate": 9.41005078264331e-06, "loss": 11.7246, "step": 31769 }, { "epoch": 1.730003471442282, "grad_norm": 0.5115390727274177, "learning_rate": 9.406316666461202e-06, "loss": 11.7678, "step": 31770 }, { "epoch": 1.730057925438865, "grad_norm": 0.5870685307456822, "learning_rate": 9.402583254750752e-06, "loss": 11.8025, "step": 31771 }, { "epoch": 1.730112379435448, "grad_norm": 0.5992386089876499, "learning_rate": 9.398850547541015e-06, "loss": 11.8757, "step": 31772 }, { "epoch": 1.730166833432031, "grad_norm": 0.5789352698885607, "learning_rate": 9.395118544861026e-06, "loss": 11.7844, "step": 31773 }, { "epoch": 1.7302212874286143, "grad_norm": 0.5334465536363548, "learning_rate": 9.391387246739758e-06, "loss": 11.6402, "step": 31774 }, { "epoch": 1.7302757414251972, "grad_norm": 0.5767553661427227, "learning_rate": 9.387656653206289e-06, "loss": 11.8415, "step": 31775 }, { "epoch": 1.7303301954217802, "grad_norm": 0.5284614688540364, "learning_rate": 9.383926764289574e-06, "loss": 11.7814, "step": 31776 }, { "epoch": 1.7303846494183632, "grad_norm": 0.5561196040121275, "learning_rate": 9.380197580018667e-06, "loss": 11.7798, "step": 31777 }, { "epoch": 1.7304391034149462, "grad_norm": 0.5335937976136347, "learning_rate": 9.376469100422513e-06, "loss": 11.4178, "step": 31778 }, { "epoch": 1.7304935574115294, "grad_norm": 0.5712066405402608, "learning_rate": 9.372741325530154e-06, "loss": 11.875, "step": 31779 }, { "epoch": 1.7305480114081124, "grad_norm": 0.5862121634867449, "learning_rate": 9.369014255370557e-06, "loss": 11.9227, "step": 31780 }, { "epoch": 1.7306024654046954, "grad_norm": 0.5074207143718371, "learning_rate": 9.365287889972686e-06, "loss": 11.6428, "step": 31781 }, { "epoch": 1.7306569194012784, "grad_norm": 0.5267269417926154, "learning_rate": 9.361562229365561e-06, "loss": 11.9004, "step": 31782 }, { "epoch": 1.7307113733978614, "grad_norm": 0.5822986060977571, "learning_rate": 9.357837273578096e-06, "loss": 11.7974, "step": 31783 }, { "epoch": 1.7307658273944444, "grad_norm": 0.5779219084655686, "learning_rate": 9.3541130226393e-06, "loss": 11.8296, "step": 31784 }, { "epoch": 1.7308202813910274, "grad_norm": 0.5272081635596604, "learning_rate": 9.350389476578137e-06, "loss": 11.6917, "step": 31785 }, { "epoch": 1.7308747353876104, "grad_norm": 0.5631118552644984, "learning_rate": 9.34666663542353e-06, "loss": 11.7668, "step": 31786 }, { "epoch": 1.7309291893841934, "grad_norm": 0.5549072649547676, "learning_rate": 9.342944499204465e-06, "loss": 11.6803, "step": 31787 }, { "epoch": 1.7309836433807764, "grad_norm": 0.7167830541880525, "learning_rate": 9.339223067949843e-06, "loss": 11.804, "step": 31788 }, { "epoch": 1.7310380973773594, "grad_norm": 0.5556018355130229, "learning_rate": 9.335502341688652e-06, "loss": 11.6957, "step": 31789 }, { "epoch": 1.7310925513739424, "grad_norm": 0.5446655057382435, "learning_rate": 9.33178232044979e-06, "loss": 11.8085, "step": 31790 }, { "epoch": 1.7311470053705253, "grad_norm": 0.5432132415105937, "learning_rate": 9.328063004262177e-06, "loss": 11.8292, "step": 31791 }, { "epoch": 1.7312014593671083, "grad_norm": 0.5688969977007066, "learning_rate": 9.324344393154783e-06, "loss": 11.8716, "step": 31792 }, { "epoch": 1.7312559133636913, "grad_norm": 0.5212004715882294, "learning_rate": 9.320626487156459e-06, "loss": 11.7897, "step": 31793 }, { "epoch": 1.7313103673602743, "grad_norm": 0.516186582647028, "learning_rate": 9.316909286296183e-06, "loss": 11.873, "step": 31794 }, { "epoch": 1.7313648213568573, "grad_norm": 0.5027105438080042, "learning_rate": 9.313192790602798e-06, "loss": 11.7418, "step": 31795 }, { "epoch": 1.7314192753534403, "grad_norm": 0.5349807289495488, "learning_rate": 9.309477000105237e-06, "loss": 11.6966, "step": 31796 }, { "epoch": 1.7314737293500235, "grad_norm": 0.5307829903497532, "learning_rate": 9.305761914832412e-06, "loss": 11.8554, "step": 31797 }, { "epoch": 1.7315281833466065, "grad_norm": 0.4943187880420814, "learning_rate": 9.302047534813174e-06, "loss": 11.7604, "step": 31798 }, { "epoch": 1.7315826373431895, "grad_norm": 0.537687020744414, "learning_rate": 9.298333860076435e-06, "loss": 11.8353, "step": 31799 }, { "epoch": 1.7316370913397725, "grad_norm": 0.5634131204889544, "learning_rate": 9.294620890651074e-06, "loss": 11.7512, "step": 31800 }, { "epoch": 1.7316915453363555, "grad_norm": 0.5880410043981958, "learning_rate": 9.29090862656593e-06, "loss": 11.9291, "step": 31801 }, { "epoch": 1.7317459993329385, "grad_norm": 0.5863121013597389, "learning_rate": 9.287197067849907e-06, "loss": 11.7841, "step": 31802 }, { "epoch": 1.7318004533295217, "grad_norm": 0.5473313432392998, "learning_rate": 9.283486214531833e-06, "loss": 11.8744, "step": 31803 }, { "epoch": 1.7318549073261047, "grad_norm": 0.535849865589238, "learning_rate": 9.27977606664061e-06, "loss": 11.8094, "step": 31804 }, { "epoch": 1.7319093613226877, "grad_norm": 0.5661076997331189, "learning_rate": 9.276066624205038e-06, "loss": 11.8197, "step": 31805 }, { "epoch": 1.7319638153192707, "grad_norm": 0.5627804408755753, "learning_rate": 9.27235788725399e-06, "loss": 11.7295, "step": 31806 }, { "epoch": 1.7320182693158537, "grad_norm": 0.5157923365260186, "learning_rate": 9.268649855816313e-06, "loss": 11.8166, "step": 31807 }, { "epoch": 1.7320727233124367, "grad_norm": 0.5736871896204039, "learning_rate": 9.264942529920817e-06, "loss": 11.8427, "step": 31808 }, { "epoch": 1.7321271773090197, "grad_norm": 0.5814916522615083, "learning_rate": 9.261235909596367e-06, "loss": 11.8641, "step": 31809 }, { "epoch": 1.7321816313056027, "grad_norm": 0.5461754129138099, "learning_rate": 9.25752999487176e-06, "loss": 11.8515, "step": 31810 }, { "epoch": 1.7322360853021856, "grad_norm": 0.5126673278482509, "learning_rate": 9.253824785775799e-06, "loss": 11.7771, "step": 31811 }, { "epoch": 1.7322905392987686, "grad_norm": 0.5560593970273288, "learning_rate": 9.250120282337326e-06, "loss": 11.7364, "step": 31812 }, { "epoch": 1.7323449932953516, "grad_norm": 0.5395423087362559, "learning_rate": 9.24641648458513e-06, "loss": 11.8862, "step": 31813 }, { "epoch": 1.7323994472919346, "grad_norm": 0.49090752907695995, "learning_rate": 9.24271339254803e-06, "loss": 11.7974, "step": 31814 }, { "epoch": 1.7324539012885176, "grad_norm": 0.4910345344878293, "learning_rate": 9.239011006254794e-06, "loss": 11.7537, "step": 31815 }, { "epoch": 1.7325083552851006, "grad_norm": 0.5468577576444289, "learning_rate": 9.235309325734242e-06, "loss": 11.8825, "step": 31816 }, { "epoch": 1.7325628092816836, "grad_norm": 0.541162242652814, "learning_rate": 9.23160835101513e-06, "loss": 11.8388, "step": 31817 }, { "epoch": 1.7326172632782666, "grad_norm": 0.6230660625624617, "learning_rate": 9.227908082126258e-06, "loss": 11.9004, "step": 31818 }, { "epoch": 1.7326717172748496, "grad_norm": 0.5382764596129828, "learning_rate": 9.22420851909641e-06, "loss": 11.9146, "step": 31819 }, { "epoch": 1.7327261712714328, "grad_norm": 0.5263115047747339, "learning_rate": 9.220509661954346e-06, "loss": 11.6744, "step": 31820 }, { "epoch": 1.7327806252680158, "grad_norm": 0.49533064461987614, "learning_rate": 9.216811510728795e-06, "loss": 11.765, "step": 31821 }, { "epoch": 1.7328350792645988, "grad_norm": 0.5375916987433108, "learning_rate": 9.213114065448559e-06, "loss": 11.7109, "step": 31822 }, { "epoch": 1.7328895332611818, "grad_norm": 0.5771549502666888, "learning_rate": 9.209417326142367e-06, "loss": 11.9013, "step": 31823 }, { "epoch": 1.7329439872577648, "grad_norm": 0.5788564287638096, "learning_rate": 9.205721292838976e-06, "loss": 11.7073, "step": 31824 }, { "epoch": 1.7329984412543478, "grad_norm": 0.5620934116215915, "learning_rate": 9.202025965567118e-06, "loss": 11.9465, "step": 31825 }, { "epoch": 1.733052895250931, "grad_norm": 0.5288264080461414, "learning_rate": 9.198331344355537e-06, "loss": 11.7418, "step": 31826 }, { "epoch": 1.733107349247514, "grad_norm": 0.5991099826495264, "learning_rate": 9.194637429232955e-06, "loss": 11.8112, "step": 31827 }, { "epoch": 1.733161803244097, "grad_norm": 0.49794012698116397, "learning_rate": 9.190944220228093e-06, "loss": 11.7288, "step": 31828 }, { "epoch": 1.73321625724068, "grad_norm": 0.5295308583277801, "learning_rate": 9.187251717369695e-06, "loss": 11.8134, "step": 31829 }, { "epoch": 1.733270711237263, "grad_norm": 0.5052311824340352, "learning_rate": 9.183559920686457e-06, "loss": 11.8452, "step": 31830 }, { "epoch": 1.733325165233846, "grad_norm": 0.524407725961568, "learning_rate": 9.179868830207084e-06, "loss": 11.6978, "step": 31831 }, { "epoch": 1.733379619230429, "grad_norm": 0.5772553659785641, "learning_rate": 9.17617844596027e-06, "loss": 11.7124, "step": 31832 }, { "epoch": 1.733434073227012, "grad_norm": 0.5215331440394445, "learning_rate": 9.172488767974718e-06, "loss": 11.688, "step": 31833 }, { "epoch": 1.733488527223595, "grad_norm": 0.5672522888550658, "learning_rate": 9.168799796279148e-06, "loss": 11.7624, "step": 31834 }, { "epoch": 1.733542981220178, "grad_norm": 0.5529480895985476, "learning_rate": 9.165111530902204e-06, "loss": 11.7447, "step": 31835 }, { "epoch": 1.733597435216761, "grad_norm": 0.5738203143307091, "learning_rate": 9.161423971872606e-06, "loss": 11.8681, "step": 31836 }, { "epoch": 1.733651889213344, "grad_norm": 0.48516725773577646, "learning_rate": 9.15773711921898e-06, "loss": 11.8038, "step": 31837 }, { "epoch": 1.733706343209927, "grad_norm": 0.559581261836742, "learning_rate": 9.154050972970052e-06, "loss": 11.8667, "step": 31838 }, { "epoch": 1.7337607972065099, "grad_norm": 0.5619177232601096, "learning_rate": 9.150365533154437e-06, "loss": 11.7478, "step": 31839 }, { "epoch": 1.7338152512030929, "grad_norm": 0.5469247544641608, "learning_rate": 9.146680799800834e-06, "loss": 11.8148, "step": 31840 }, { "epoch": 1.7338697051996759, "grad_norm": 0.5634893522774611, "learning_rate": 9.142996772937884e-06, "loss": 11.8232, "step": 31841 }, { "epoch": 1.7339241591962589, "grad_norm": 0.60737512667563, "learning_rate": 9.1393134525942e-06, "loss": 11.7279, "step": 31842 }, { "epoch": 1.733978613192842, "grad_norm": 0.5891923500901595, "learning_rate": 9.13563083879847e-06, "loss": 11.8399, "step": 31843 }, { "epoch": 1.734033067189425, "grad_norm": 0.549736623254649, "learning_rate": 9.131948931579303e-06, "loss": 11.6969, "step": 31844 }, { "epoch": 1.734087521186008, "grad_norm": 0.5923333227224441, "learning_rate": 9.128267730965334e-06, "loss": 11.9202, "step": 31845 }, { "epoch": 1.734141975182591, "grad_norm": 0.5112563342606398, "learning_rate": 9.124587236985216e-06, "loss": 11.8206, "step": 31846 }, { "epoch": 1.734196429179174, "grad_norm": 0.5577007296796052, "learning_rate": 9.120907449667527e-06, "loss": 11.7966, "step": 31847 }, { "epoch": 1.734250883175757, "grad_norm": 0.5229001121047716, "learning_rate": 9.11722836904092e-06, "loss": 11.7849, "step": 31848 }, { "epoch": 1.7343053371723403, "grad_norm": 0.5792622385692741, "learning_rate": 9.113549995133964e-06, "loss": 11.7612, "step": 31849 }, { "epoch": 1.7343597911689232, "grad_norm": 0.5338103589125868, "learning_rate": 9.10987232797531e-06, "loss": 11.7377, "step": 31850 }, { "epoch": 1.7344142451655062, "grad_norm": 0.5622271457193666, "learning_rate": 9.106195367593528e-06, "loss": 11.7544, "step": 31851 }, { "epoch": 1.7344686991620892, "grad_norm": 0.5177335513682215, "learning_rate": 9.102519114017194e-06, "loss": 11.7878, "step": 31852 }, { "epoch": 1.7345231531586722, "grad_norm": 0.5389755869777442, "learning_rate": 9.09884356727494e-06, "loss": 11.6675, "step": 31853 }, { "epoch": 1.7345776071552552, "grad_norm": 0.5404158838421443, "learning_rate": 9.095168727395298e-06, "loss": 11.8193, "step": 31854 }, { "epoch": 1.7346320611518382, "grad_norm": 0.5943710794913865, "learning_rate": 9.091494594406868e-06, "loss": 11.7008, "step": 31855 }, { "epoch": 1.7346865151484212, "grad_norm": 0.5339501381814628, "learning_rate": 9.087821168338239e-06, "loss": 11.6924, "step": 31856 }, { "epoch": 1.7347409691450042, "grad_norm": 0.5678115108341879, "learning_rate": 9.084148449217945e-06, "loss": 11.7249, "step": 31857 }, { "epoch": 1.7347954231415872, "grad_norm": 0.5668293287245962, "learning_rate": 9.080476437074569e-06, "loss": 11.8283, "step": 31858 }, { "epoch": 1.7348498771381702, "grad_norm": 0.5547633744492095, "learning_rate": 9.07680513193665e-06, "loss": 11.7417, "step": 31859 }, { "epoch": 1.7349043311347532, "grad_norm": 0.5436193098513749, "learning_rate": 9.07313453383275e-06, "loss": 11.6788, "step": 31860 }, { "epoch": 1.7349587851313362, "grad_norm": 0.5690684965834453, "learning_rate": 9.069464642791403e-06, "loss": 11.8594, "step": 31861 }, { "epoch": 1.7350132391279192, "grad_norm": 0.5921293204556438, "learning_rate": 9.065795458841143e-06, "loss": 11.8938, "step": 31862 }, { "epoch": 1.7350676931245022, "grad_norm": 0.5523228562514196, "learning_rate": 9.06212698201051e-06, "loss": 11.8822, "step": 31863 }, { "epoch": 1.7351221471210851, "grad_norm": 0.5686848873771777, "learning_rate": 9.058459212328018e-06, "loss": 11.8903, "step": 31864 }, { "epoch": 1.7351766011176681, "grad_norm": 0.5264078035947367, "learning_rate": 9.054792149822222e-06, "loss": 11.8165, "step": 31865 }, { "epoch": 1.7352310551142511, "grad_norm": 0.5488165106359091, "learning_rate": 9.051125794521587e-06, "loss": 11.7877, "step": 31866 }, { "epoch": 1.7352855091108343, "grad_norm": 0.5730019632075116, "learning_rate": 9.047460146454644e-06, "loss": 11.8723, "step": 31867 }, { "epoch": 1.7353399631074173, "grad_norm": 0.6138328700026215, "learning_rate": 9.04379520564993e-06, "loss": 11.7798, "step": 31868 }, { "epoch": 1.7353944171040003, "grad_norm": 0.6531888711113734, "learning_rate": 9.040130972135907e-06, "loss": 11.7869, "step": 31869 }, { "epoch": 1.7354488711005833, "grad_norm": 0.575646539004118, "learning_rate": 9.036467445941089e-06, "loss": 11.7694, "step": 31870 }, { "epoch": 1.7355033250971663, "grad_norm": 0.5919880430843911, "learning_rate": 9.03280462709395e-06, "loss": 11.8656, "step": 31871 }, { "epoch": 1.7355577790937493, "grad_norm": 0.524589233508476, "learning_rate": 9.029142515622968e-06, "loss": 11.7044, "step": 31872 }, { "epoch": 1.7356122330903325, "grad_norm": 0.5689243356154033, "learning_rate": 9.025481111556645e-06, "loss": 11.8084, "step": 31873 }, { "epoch": 1.7356666870869155, "grad_norm": 0.5010574702545381, "learning_rate": 9.021820414923421e-06, "loss": 11.6926, "step": 31874 }, { "epoch": 1.7357211410834985, "grad_norm": 0.5140298930794341, "learning_rate": 9.018160425751787e-06, "loss": 11.7479, "step": 31875 }, { "epoch": 1.7357755950800815, "grad_norm": 0.5598431460374912, "learning_rate": 9.014501144070187e-06, "loss": 11.8489, "step": 31876 }, { "epoch": 1.7358300490766645, "grad_norm": 0.5498528068954908, "learning_rate": 9.010842569907086e-06, "loss": 11.7145, "step": 31877 }, { "epoch": 1.7358845030732475, "grad_norm": 0.5128889158274546, "learning_rate": 9.00718470329095e-06, "loss": 11.8008, "step": 31878 }, { "epoch": 1.7359389570698305, "grad_norm": 0.5558278832011802, "learning_rate": 9.003527544250178e-06, "loss": 11.8823, "step": 31879 }, { "epoch": 1.7359934110664135, "grad_norm": 0.5685858614518955, "learning_rate": 8.999871092813272e-06, "loss": 11.8429, "step": 31880 }, { "epoch": 1.7360478650629965, "grad_norm": 0.5850625674664518, "learning_rate": 8.996215349008608e-06, "loss": 11.8085, "step": 31881 }, { "epoch": 1.7361023190595795, "grad_norm": 0.5071773126399107, "learning_rate": 8.992560312864617e-06, "loss": 11.804, "step": 31882 }, { "epoch": 1.7361567730561625, "grad_norm": 0.5433957365845743, "learning_rate": 8.988905984409768e-06, "loss": 11.7221, "step": 31883 }, { "epoch": 1.7362112270527454, "grad_norm": 0.501478910362739, "learning_rate": 8.985252363672426e-06, "loss": 11.7778, "step": 31884 }, { "epoch": 1.7362656810493284, "grad_norm": 0.5460184950510293, "learning_rate": 8.981599450681043e-06, "loss": 11.6809, "step": 31885 }, { "epoch": 1.7363201350459114, "grad_norm": 0.5794270108992531, "learning_rate": 8.977947245463991e-06, "loss": 11.7472, "step": 31886 }, { "epoch": 1.7363745890424944, "grad_norm": 0.513443854355982, "learning_rate": 8.97429574804971e-06, "loss": 11.6999, "step": 31887 }, { "epoch": 1.7364290430390774, "grad_norm": 0.5719591196016566, "learning_rate": 8.970644958466534e-06, "loss": 11.948, "step": 31888 }, { "epoch": 1.7364834970356604, "grad_norm": 0.5466542800558982, "learning_rate": 8.966994876742907e-06, "loss": 11.6543, "step": 31889 }, { "epoch": 1.7365379510322436, "grad_norm": 0.5855541183710491, "learning_rate": 8.963345502907216e-06, "loss": 11.8503, "step": 31890 }, { "epoch": 1.7365924050288266, "grad_norm": 0.4972010693033175, "learning_rate": 8.959696836987796e-06, "loss": 11.739, "step": 31891 }, { "epoch": 1.7366468590254096, "grad_norm": 0.6368720737829793, "learning_rate": 8.956048879013045e-06, "loss": 11.7997, "step": 31892 }, { "epoch": 1.7367013130219926, "grad_norm": 0.6061320673296801, "learning_rate": 8.95240162901132e-06, "loss": 11.8223, "step": 31893 }, { "epoch": 1.7367557670185756, "grad_norm": 0.5468653568716522, "learning_rate": 8.948755087010973e-06, "loss": 11.6784, "step": 31894 }, { "epoch": 1.7368102210151586, "grad_norm": 0.6787007297548442, "learning_rate": 8.945109253040407e-06, "loss": 11.9298, "step": 31895 }, { "epoch": 1.7368646750117418, "grad_norm": 0.4826222858430997, "learning_rate": 8.941464127127918e-06, "loss": 11.7985, "step": 31896 }, { "epoch": 1.7369191290083248, "grad_norm": 0.5567254485433407, "learning_rate": 8.937819709301898e-06, "loss": 11.7621, "step": 31897 }, { "epoch": 1.7369735830049078, "grad_norm": 0.5272206331795003, "learning_rate": 8.934175999590633e-06, "loss": 11.7968, "step": 31898 }, { "epoch": 1.7370280370014908, "grad_norm": 0.5289801539308546, "learning_rate": 8.930532998022512e-06, "loss": 11.7091, "step": 31899 }, { "epoch": 1.7370824909980738, "grad_norm": 0.5294484475452663, "learning_rate": 8.926890704625845e-06, "loss": 11.7452, "step": 31900 }, { "epoch": 1.7371369449946568, "grad_norm": 0.5283246401411331, "learning_rate": 8.923249119428922e-06, "loss": 11.758, "step": 31901 }, { "epoch": 1.7371913989912398, "grad_norm": 0.6418452457116163, "learning_rate": 8.919608242460108e-06, "loss": 11.8581, "step": 31902 }, { "epoch": 1.7372458529878227, "grad_norm": 0.5248087601600387, "learning_rate": 8.915968073747682e-06, "loss": 11.8406, "step": 31903 }, { "epoch": 1.7373003069844057, "grad_norm": 0.5388132701630212, "learning_rate": 8.912328613319953e-06, "loss": 11.8169, "step": 31904 }, { "epoch": 1.7373547609809887, "grad_norm": 0.5051992040933106, "learning_rate": 8.908689861205255e-06, "loss": 11.7077, "step": 31905 }, { "epoch": 1.7374092149775717, "grad_norm": 0.5759347931673632, "learning_rate": 8.905051817431853e-06, "loss": 11.8287, "step": 31906 }, { "epoch": 1.7374636689741547, "grad_norm": 0.4963267385136898, "learning_rate": 8.901414482028047e-06, "loss": 11.8085, "step": 31907 }, { "epoch": 1.7375181229707377, "grad_norm": 0.5020050777137453, "learning_rate": 8.897777855022105e-06, "loss": 11.7405, "step": 31908 }, { "epoch": 1.7375725769673207, "grad_norm": 0.5810868966399029, "learning_rate": 8.894141936442346e-06, "loss": 11.7657, "step": 31909 }, { "epoch": 1.7376270309639037, "grad_norm": 0.5735157174973503, "learning_rate": 8.890506726317005e-06, "loss": 11.7871, "step": 31910 }, { "epoch": 1.7376814849604867, "grad_norm": 0.5594504910573439, "learning_rate": 8.886872224674359e-06, "loss": 11.6382, "step": 31911 }, { "epoch": 1.7377359389570697, "grad_norm": 0.5037711618953534, "learning_rate": 8.883238431542684e-06, "loss": 11.791, "step": 31912 }, { "epoch": 1.737790392953653, "grad_norm": 0.5332720651748264, "learning_rate": 8.879605346950203e-06, "loss": 11.7786, "step": 31913 }, { "epoch": 1.7378448469502359, "grad_norm": 0.5461210602637525, "learning_rate": 8.875972970925229e-06, "loss": 11.8422, "step": 31914 }, { "epoch": 1.7378993009468189, "grad_norm": 0.5556292310379968, "learning_rate": 8.872341303495935e-06, "loss": 11.7714, "step": 31915 }, { "epoch": 1.7379537549434019, "grad_norm": 0.554320969484622, "learning_rate": 8.868710344690601e-06, "loss": 11.7083, "step": 31916 }, { "epoch": 1.7380082089399849, "grad_norm": 0.6041289850317021, "learning_rate": 8.86508009453748e-06, "loss": 11.7377, "step": 31917 }, { "epoch": 1.7380626629365679, "grad_norm": 0.5841586540888242, "learning_rate": 8.861450553064765e-06, "loss": 11.7625, "step": 31918 }, { "epoch": 1.738117116933151, "grad_norm": 0.5567303583460421, "learning_rate": 8.857821720300697e-06, "loss": 11.6293, "step": 31919 }, { "epoch": 1.738171570929734, "grad_norm": 0.5983747172622275, "learning_rate": 8.854193596273509e-06, "loss": 11.8934, "step": 31920 }, { "epoch": 1.738226024926317, "grad_norm": 0.7066976709933921, "learning_rate": 8.850566181011366e-06, "loss": 11.9972, "step": 31921 }, { "epoch": 1.7382804789229, "grad_norm": 0.5893838894800378, "learning_rate": 8.846939474542538e-06, "loss": 11.7745, "step": 31922 }, { "epoch": 1.738334932919483, "grad_norm": 0.5110780693151147, "learning_rate": 8.843313476895165e-06, "loss": 11.7757, "step": 31923 }, { "epoch": 1.738389386916066, "grad_norm": 0.6209174833375607, "learning_rate": 8.839688188097495e-06, "loss": 11.822, "step": 31924 }, { "epoch": 1.738443840912649, "grad_norm": 0.5819825506408214, "learning_rate": 8.83606360817768e-06, "loss": 11.7911, "step": 31925 }, { "epoch": 1.738498294909232, "grad_norm": 0.6126769758037125, "learning_rate": 8.832439737163923e-06, "loss": 11.7689, "step": 31926 }, { "epoch": 1.738552748905815, "grad_norm": 0.5648979642562577, "learning_rate": 8.82881657508442e-06, "loss": 11.8178, "step": 31927 }, { "epoch": 1.738607202902398, "grad_norm": 0.5358949345609733, "learning_rate": 8.825194121967307e-06, "loss": 11.7572, "step": 31928 }, { "epoch": 1.738661656898981, "grad_norm": 0.5717128670773614, "learning_rate": 8.821572377840803e-06, "loss": 11.8206, "step": 31929 }, { "epoch": 1.738716110895564, "grad_norm": 0.5853436900609554, "learning_rate": 8.817951342733032e-06, "loss": 11.8308, "step": 31930 }, { "epoch": 1.738770564892147, "grad_norm": 0.5471095256239284, "learning_rate": 8.81433101667215e-06, "loss": 11.7714, "step": 31931 }, { "epoch": 1.73882501888873, "grad_norm": 0.546559421596322, "learning_rate": 8.810711399686334e-06, "loss": 11.8059, "step": 31932 }, { "epoch": 1.738879472885313, "grad_norm": 0.5367329275204307, "learning_rate": 8.807092491803715e-06, "loss": 11.7581, "step": 31933 }, { "epoch": 1.738933926881896, "grad_norm": 0.5698202345816502, "learning_rate": 8.803474293052438e-06, "loss": 11.9099, "step": 31934 }, { "epoch": 1.738988380878479, "grad_norm": 0.5262262135844425, "learning_rate": 8.799856803460627e-06, "loss": 11.6944, "step": 31935 }, { "epoch": 1.739042834875062, "grad_norm": 0.5930909117868646, "learning_rate": 8.796240023056445e-06, "loss": 11.8409, "step": 31936 }, { "epoch": 1.7390972888716452, "grad_norm": 0.5720385926628261, "learning_rate": 8.792623951867985e-06, "loss": 11.9001, "step": 31937 }, { "epoch": 1.7391517428682282, "grad_norm": 0.587627436208008, "learning_rate": 8.789008589923364e-06, "loss": 11.8586, "step": 31938 }, { "epoch": 1.7392061968648111, "grad_norm": 0.5999198699587912, "learning_rate": 8.78539393725073e-06, "loss": 11.8785, "step": 31939 }, { "epoch": 1.7392606508613941, "grad_norm": 0.5202491532771909, "learning_rate": 8.781779993878169e-06, "loss": 11.5571, "step": 31940 }, { "epoch": 1.7393151048579771, "grad_norm": 0.6831119204493081, "learning_rate": 8.778166759833784e-06, "loss": 11.8553, "step": 31941 }, { "epoch": 1.7393695588545601, "grad_norm": 0.5254518224668623, "learning_rate": 8.774554235145648e-06, "loss": 11.8292, "step": 31942 }, { "epoch": 1.7394240128511433, "grad_norm": 0.5153392436022605, "learning_rate": 8.770942419841888e-06, "loss": 11.7068, "step": 31943 }, { "epoch": 1.7394784668477263, "grad_norm": 0.5577738292986892, "learning_rate": 8.767331313950588e-06, "loss": 11.6506, "step": 31944 }, { "epoch": 1.7395329208443093, "grad_norm": 0.5793486381665118, "learning_rate": 8.763720917499807e-06, "loss": 11.6731, "step": 31945 }, { "epoch": 1.7395873748408923, "grad_norm": 0.5667946436974896, "learning_rate": 8.760111230517653e-06, "loss": 11.8109, "step": 31946 }, { "epoch": 1.7396418288374753, "grad_norm": 0.5475771598830302, "learning_rate": 8.75650225303215e-06, "loss": 11.7654, "step": 31947 }, { "epoch": 1.7396962828340583, "grad_norm": 0.5202563000046072, "learning_rate": 8.75289398507141e-06, "loss": 11.8181, "step": 31948 }, { "epoch": 1.7397507368306413, "grad_norm": 0.5646446723735778, "learning_rate": 8.74928642666345e-06, "loss": 11.762, "step": 31949 }, { "epoch": 1.7398051908272243, "grad_norm": 0.6338854791429734, "learning_rate": 8.745679577836342e-06, "loss": 11.8404, "step": 31950 }, { "epoch": 1.7398596448238073, "grad_norm": 0.627681073375901, "learning_rate": 8.74207343861817e-06, "loss": 11.7785, "step": 31951 }, { "epoch": 1.7399140988203903, "grad_norm": 0.5419673933021651, "learning_rate": 8.738468009036893e-06, "loss": 11.724, "step": 31952 }, { "epoch": 1.7399685528169733, "grad_norm": 0.5212431761118159, "learning_rate": 8.73486328912061e-06, "loss": 11.6219, "step": 31953 }, { "epoch": 1.7400230068135563, "grad_norm": 0.60439168942001, "learning_rate": 8.731259278897341e-06, "loss": 11.8018, "step": 31954 }, { "epoch": 1.7400774608101393, "grad_norm": 0.5415075829565839, "learning_rate": 8.727655978395089e-06, "loss": 11.7804, "step": 31955 }, { "epoch": 1.7401319148067222, "grad_norm": 0.5351938086840057, "learning_rate": 8.724053387641906e-06, "loss": 11.5577, "step": 31956 }, { "epoch": 1.7401863688033052, "grad_norm": 0.621849052620925, "learning_rate": 8.720451506665783e-06, "loss": 11.7246, "step": 31957 }, { "epoch": 1.7402408227998882, "grad_norm": 0.5492982585708485, "learning_rate": 8.716850335494742e-06, "loss": 11.7424, "step": 31958 }, { "epoch": 1.7402952767964712, "grad_norm": 0.5920599598120099, "learning_rate": 8.71324987415677e-06, "loss": 11.8464, "step": 31959 }, { "epoch": 1.7403497307930544, "grad_norm": 0.4947891504692365, "learning_rate": 8.70965012267988e-06, "loss": 11.7205, "step": 31960 }, { "epoch": 1.7404041847896374, "grad_norm": 0.5455025699705034, "learning_rate": 8.706051081092092e-06, "loss": 11.6804, "step": 31961 }, { "epoch": 1.7404586387862204, "grad_norm": 0.557191036658413, "learning_rate": 8.70245274942132e-06, "loss": 11.6383, "step": 31962 }, { "epoch": 1.7405130927828034, "grad_norm": 0.5340196800062514, "learning_rate": 8.698855127695605e-06, "loss": 11.7639, "step": 31963 }, { "epoch": 1.7405675467793864, "grad_norm": 0.5767153667836261, "learning_rate": 8.695258215942893e-06, "loss": 11.9129, "step": 31964 }, { "epoch": 1.7406220007759694, "grad_norm": 0.5694295553676907, "learning_rate": 8.691662014191159e-06, "loss": 11.8254, "step": 31965 }, { "epoch": 1.7406764547725526, "grad_norm": 0.6535280581606852, "learning_rate": 8.688066522468397e-06, "loss": 11.91, "step": 31966 }, { "epoch": 1.7407309087691356, "grad_norm": 0.5542515877471047, "learning_rate": 8.684471740802514e-06, "loss": 11.7171, "step": 31967 }, { "epoch": 1.7407853627657186, "grad_norm": 0.5526873203872726, "learning_rate": 8.680877669221522e-06, "loss": 11.912, "step": 31968 }, { "epoch": 1.7408398167623016, "grad_norm": 0.6768706919925701, "learning_rate": 8.67728430775332e-06, "loss": 11.9056, "step": 31969 }, { "epoch": 1.7408942707588846, "grad_norm": 0.5126020406449916, "learning_rate": 8.673691656425885e-06, "loss": 11.8263, "step": 31970 }, { "epoch": 1.7409487247554676, "grad_norm": 0.5439267010646247, "learning_rate": 8.670099715267132e-06, "loss": 11.7099, "step": 31971 }, { "epoch": 1.7410031787520506, "grad_norm": 0.4976696273610106, "learning_rate": 8.666508484304992e-06, "loss": 11.7321, "step": 31972 }, { "epoch": 1.7410576327486336, "grad_norm": 0.5508934392557755, "learning_rate": 8.662917963567418e-06, "loss": 11.8543, "step": 31973 }, { "epoch": 1.7411120867452166, "grad_norm": 0.5740364595544567, "learning_rate": 8.65932815308228e-06, "loss": 11.842, "step": 31974 }, { "epoch": 1.7411665407417996, "grad_norm": 0.5215353855019623, "learning_rate": 8.655739052877532e-06, "loss": 11.7305, "step": 31975 }, { "epoch": 1.7412209947383825, "grad_norm": 0.5614963324268551, "learning_rate": 8.652150662981095e-06, "loss": 11.8178, "step": 31976 }, { "epoch": 1.7412754487349655, "grad_norm": 0.5653651352357203, "learning_rate": 8.648562983420839e-06, "loss": 11.9051, "step": 31977 }, { "epoch": 1.7413299027315485, "grad_norm": 0.5334463137538656, "learning_rate": 8.644976014224692e-06, "loss": 11.7374, "step": 31978 }, { "epoch": 1.7413843567281315, "grad_norm": 0.576985678978604, "learning_rate": 8.641389755420515e-06, "loss": 11.729, "step": 31979 }, { "epoch": 1.7414388107247145, "grad_norm": 0.5234658387895171, "learning_rate": 8.637804207036226e-06, "loss": 11.7497, "step": 31980 }, { "epoch": 1.7414932647212975, "grad_norm": 0.6089813444056918, "learning_rate": 8.634219369099694e-06, "loss": 11.8424, "step": 31981 }, { "epoch": 1.7415477187178805, "grad_norm": 0.5935612717936091, "learning_rate": 8.630635241638773e-06, "loss": 11.8468, "step": 31982 }, { "epoch": 1.7416021727144637, "grad_norm": 0.5886032923980896, "learning_rate": 8.627051824681376e-06, "loss": 11.8731, "step": 31983 }, { "epoch": 1.7416566267110467, "grad_norm": 0.5198412867645322, "learning_rate": 8.623469118255334e-06, "loss": 11.8138, "step": 31984 }, { "epoch": 1.7417110807076297, "grad_norm": 0.49262438340279746, "learning_rate": 8.619887122388525e-06, "loss": 11.775, "step": 31985 }, { "epoch": 1.7417655347042127, "grad_norm": 0.5374674231944098, "learning_rate": 8.616305837108795e-06, "loss": 11.7372, "step": 31986 }, { "epoch": 1.7418199887007957, "grad_norm": 0.5033764033578693, "learning_rate": 8.612725262443989e-06, "loss": 11.7487, "step": 31987 }, { "epoch": 1.7418744426973787, "grad_norm": 0.6043909876110837, "learning_rate": 8.60914539842198e-06, "loss": 11.8433, "step": 31988 }, { "epoch": 1.741928896693962, "grad_norm": 0.5455036964850147, "learning_rate": 8.605566245070552e-06, "loss": 11.7161, "step": 31989 }, { "epoch": 1.7419833506905449, "grad_norm": 0.5409583913962226, "learning_rate": 8.601987802417599e-06, "loss": 11.7858, "step": 31990 }, { "epoch": 1.7420378046871279, "grad_norm": 0.5815138530271966, "learning_rate": 8.598410070490915e-06, "loss": 11.7551, "step": 31991 }, { "epoch": 1.7420922586837109, "grad_norm": 0.5446835716470613, "learning_rate": 8.594833049318297e-06, "loss": 11.7497, "step": 31992 }, { "epoch": 1.7421467126802939, "grad_norm": 0.5786796657330281, "learning_rate": 8.591256738927611e-06, "loss": 11.717, "step": 31993 }, { "epoch": 1.7422011666768769, "grad_norm": 0.5645904802426776, "learning_rate": 8.587681139346615e-06, "loss": 11.7751, "step": 31994 }, { "epoch": 1.7422556206734598, "grad_norm": 0.5709068463833342, "learning_rate": 8.584106250603164e-06, "loss": 11.8462, "step": 31995 }, { "epoch": 1.7423100746700428, "grad_norm": 0.5340152565702709, "learning_rate": 8.580532072725012e-06, "loss": 11.7117, "step": 31996 }, { "epoch": 1.7423645286666258, "grad_norm": 0.5387086854060601, "learning_rate": 8.576958605740004e-06, "loss": 11.817, "step": 31997 }, { "epoch": 1.7424189826632088, "grad_norm": 0.5140277553920145, "learning_rate": 8.573385849675863e-06, "loss": 11.7447, "step": 31998 }, { "epoch": 1.7424734366597918, "grad_norm": 0.5629323046688717, "learning_rate": 8.56981380456041e-06, "loss": 11.8892, "step": 31999 }, { "epoch": 1.7425278906563748, "grad_norm": 0.5431528813466309, "learning_rate": 8.566242470421448e-06, "loss": 11.7875, "step": 32000 }, { "epoch": 1.7425823446529578, "grad_norm": 0.5154491026682231, "learning_rate": 8.562671847286707e-06, "loss": 11.6852, "step": 32001 }, { "epoch": 1.7426367986495408, "grad_norm": 0.5892238812937414, "learning_rate": 8.559101935183944e-06, "loss": 11.7823, "step": 32002 }, { "epoch": 1.7426912526461238, "grad_norm": 0.5680434408571875, "learning_rate": 8.555532734140959e-06, "loss": 11.7483, "step": 32003 }, { "epoch": 1.7427457066427068, "grad_norm": 0.5022921114212071, "learning_rate": 8.551964244185474e-06, "loss": 11.7121, "step": 32004 }, { "epoch": 1.7428001606392898, "grad_norm": 0.5543481251024199, "learning_rate": 8.548396465345265e-06, "loss": 11.7405, "step": 32005 }, { "epoch": 1.7428546146358728, "grad_norm": 0.5417179749654835, "learning_rate": 8.544829397648046e-06, "loss": 11.7747, "step": 32006 }, { "epoch": 1.742909068632456, "grad_norm": 0.5116342225880156, "learning_rate": 8.541263041121584e-06, "loss": 11.7312, "step": 32007 }, { "epoch": 1.742963522629039, "grad_norm": 0.512291100772166, "learning_rate": 8.537697395793586e-06, "loss": 11.7384, "step": 32008 }, { "epoch": 1.743017976625622, "grad_norm": 0.6432909811509387, "learning_rate": 8.534132461691779e-06, "loss": 11.7537, "step": 32009 }, { "epoch": 1.743072430622205, "grad_norm": 0.5673547760782457, "learning_rate": 8.530568238843928e-06, "loss": 11.782, "step": 32010 }, { "epoch": 1.743126884618788, "grad_norm": 0.5607694313388282, "learning_rate": 8.52700472727771e-06, "loss": 11.6333, "step": 32011 }, { "epoch": 1.7431813386153712, "grad_norm": 0.4913740142745817, "learning_rate": 8.523441927020848e-06, "loss": 11.7803, "step": 32012 }, { "epoch": 1.7432357926119542, "grad_norm": 0.5694761407372012, "learning_rate": 8.519879838101031e-06, "loss": 11.7153, "step": 32013 }, { "epoch": 1.7432902466085372, "grad_norm": 0.6028894497907186, "learning_rate": 8.516318460545958e-06, "loss": 11.9751, "step": 32014 }, { "epoch": 1.7433447006051201, "grad_norm": 0.5507114254422816, "learning_rate": 8.512757794383353e-06, "loss": 11.8017, "step": 32015 }, { "epoch": 1.7433991546017031, "grad_norm": 0.5972184955355894, "learning_rate": 8.50919783964087e-06, "loss": 11.739, "step": 32016 }, { "epoch": 1.7434536085982861, "grad_norm": 0.5857228924141316, "learning_rate": 8.505638596346233e-06, "loss": 11.6848, "step": 32017 }, { "epoch": 1.7435080625948691, "grad_norm": 0.562031072838741, "learning_rate": 8.502080064527063e-06, "loss": 11.7777, "step": 32018 }, { "epoch": 1.7435625165914521, "grad_norm": 0.7034782977047336, "learning_rate": 8.498522244211093e-06, "loss": 11.835, "step": 32019 }, { "epoch": 1.743616970588035, "grad_norm": 0.5649368271146247, "learning_rate": 8.494965135425937e-06, "loss": 11.8731, "step": 32020 }, { "epoch": 1.743671424584618, "grad_norm": 0.5436977892164904, "learning_rate": 8.491408738199291e-06, "loss": 11.8892, "step": 32021 }, { "epoch": 1.743725878581201, "grad_norm": 0.5559043028084236, "learning_rate": 8.487853052558791e-06, "loss": 11.9061, "step": 32022 }, { "epoch": 1.743780332577784, "grad_norm": 0.5133931781143667, "learning_rate": 8.484298078532083e-06, "loss": 11.7108, "step": 32023 }, { "epoch": 1.743834786574367, "grad_norm": 0.5480535266969013, "learning_rate": 8.480743816146818e-06, "loss": 11.7956, "step": 32024 }, { "epoch": 1.74388924057095, "grad_norm": 0.5323917940609546, "learning_rate": 8.477190265430668e-06, "loss": 11.8297, "step": 32025 }, { "epoch": 1.743943694567533, "grad_norm": 0.5391719565254227, "learning_rate": 8.473637426411196e-06, "loss": 11.6837, "step": 32026 }, { "epoch": 1.743998148564116, "grad_norm": 0.5212312196844444, "learning_rate": 8.470085299116103e-06, "loss": 11.7863, "step": 32027 }, { "epoch": 1.744052602560699, "grad_norm": 0.5708364673038417, "learning_rate": 8.466533883572947e-06, "loss": 11.8323, "step": 32028 }, { "epoch": 1.744107056557282, "grad_norm": 0.5751547927852145, "learning_rate": 8.462983179809391e-06, "loss": 11.8152, "step": 32029 }, { "epoch": 1.7441615105538653, "grad_norm": 0.48722597878671126, "learning_rate": 8.459433187853016e-06, "loss": 11.8481, "step": 32030 }, { "epoch": 1.7442159645504483, "grad_norm": 0.5742185937981873, "learning_rate": 8.455883907731465e-06, "loss": 11.7642, "step": 32031 }, { "epoch": 1.7442704185470312, "grad_norm": 0.6257192235986521, "learning_rate": 8.452335339472305e-06, "loss": 11.8823, "step": 32032 }, { "epoch": 1.7443248725436142, "grad_norm": 0.527745318790352, "learning_rate": 8.448787483103116e-06, "loss": 11.7927, "step": 32033 }, { "epoch": 1.7443793265401972, "grad_norm": 0.6222700022992099, "learning_rate": 8.445240338651527e-06, "loss": 11.871, "step": 32034 }, { "epoch": 1.7444337805367802, "grad_norm": 0.5487261637021156, "learning_rate": 8.441693906145088e-06, "loss": 11.5871, "step": 32035 }, { "epoch": 1.7444882345333634, "grad_norm": 0.5047141327693643, "learning_rate": 8.438148185611395e-06, "loss": 11.6439, "step": 32036 }, { "epoch": 1.7445426885299464, "grad_norm": 0.5784583151736736, "learning_rate": 8.434603177078027e-06, "loss": 11.7275, "step": 32037 }, { "epoch": 1.7445971425265294, "grad_norm": 0.5229644104779019, "learning_rate": 8.43105888057253e-06, "loss": 11.6475, "step": 32038 }, { "epoch": 1.7446515965231124, "grad_norm": 0.5699041812977644, "learning_rate": 8.427515296122491e-06, "loss": 11.7752, "step": 32039 }, { "epoch": 1.7447060505196954, "grad_norm": 0.5957268317513674, "learning_rate": 8.423972423755433e-06, "loss": 11.9105, "step": 32040 }, { "epoch": 1.7447605045162784, "grad_norm": 0.539938225679523, "learning_rate": 8.420430263498935e-06, "loss": 11.7252, "step": 32041 }, { "epoch": 1.7448149585128614, "grad_norm": 0.5709212899677252, "learning_rate": 8.41688881538053e-06, "loss": 11.739, "step": 32042 }, { "epoch": 1.7448694125094444, "grad_norm": 0.5266446459566502, "learning_rate": 8.41334807942774e-06, "loss": 11.7513, "step": 32043 }, { "epoch": 1.7449238665060274, "grad_norm": 0.5677609274402388, "learning_rate": 8.409808055668134e-06, "loss": 11.8223, "step": 32044 }, { "epoch": 1.7449783205026104, "grad_norm": 0.5532136403402126, "learning_rate": 8.406268744129209e-06, "loss": 11.8705, "step": 32045 }, { "epoch": 1.7450327744991934, "grad_norm": 0.5151713012528072, "learning_rate": 8.4027301448385e-06, "loss": 11.7093, "step": 32046 }, { "epoch": 1.7450872284957764, "grad_norm": 0.6215215255628963, "learning_rate": 8.399192257823518e-06, "loss": 11.7937, "step": 32047 }, { "epoch": 1.7451416824923593, "grad_norm": 0.6201131430276449, "learning_rate": 8.395655083111776e-06, "loss": 11.8427, "step": 32048 }, { "epoch": 1.7451961364889423, "grad_norm": 0.5209650258702061, "learning_rate": 8.392118620730794e-06, "loss": 11.7063, "step": 32049 }, { "epoch": 1.7452505904855253, "grad_norm": 0.5612328279856644, "learning_rate": 8.38858287070805e-06, "loss": 11.9087, "step": 32050 }, { "epoch": 1.7453050444821083, "grad_norm": 0.564227927612779, "learning_rate": 8.385047833071058e-06, "loss": 11.6975, "step": 32051 }, { "epoch": 1.7453594984786913, "grad_norm": 0.567497662223573, "learning_rate": 8.381513507847306e-06, "loss": 11.8284, "step": 32052 }, { "epoch": 1.7454139524752745, "grad_norm": 0.5363259550569481, "learning_rate": 8.377979895064248e-06, "loss": 11.7159, "step": 32053 }, { "epoch": 1.7454684064718575, "grad_norm": 0.590642468655495, "learning_rate": 8.374446994749396e-06, "loss": 11.7312, "step": 32054 }, { "epoch": 1.7455228604684405, "grad_norm": 0.5465002820785123, "learning_rate": 8.370914806930198e-06, "loss": 11.6307, "step": 32055 }, { "epoch": 1.7455773144650235, "grad_norm": 0.5888990470494139, "learning_rate": 8.367383331634148e-06, "loss": 11.8342, "step": 32056 }, { "epoch": 1.7456317684616065, "grad_norm": 0.5746100887956731, "learning_rate": 8.363852568888686e-06, "loss": 11.6764, "step": 32057 }, { "epoch": 1.7456862224581895, "grad_norm": 0.5887223194598696, "learning_rate": 8.360322518721265e-06, "loss": 11.8901, "step": 32058 }, { "epoch": 1.7457406764547727, "grad_norm": 0.5108734790458672, "learning_rate": 8.35679318115935e-06, "loss": 11.7426, "step": 32059 }, { "epoch": 1.7457951304513557, "grad_norm": 0.5690170304757952, "learning_rate": 8.353264556230378e-06, "loss": 11.6658, "step": 32060 }, { "epoch": 1.7458495844479387, "grad_norm": 0.5176612902090224, "learning_rate": 8.349736643961813e-06, "loss": 11.6612, "step": 32061 }, { "epoch": 1.7459040384445217, "grad_norm": 0.5799500284494544, "learning_rate": 8.346209444381048e-06, "loss": 11.8357, "step": 32062 }, { "epoch": 1.7459584924411047, "grad_norm": 0.5396245797154937, "learning_rate": 8.342682957515513e-06, "loss": 11.8793, "step": 32063 }, { "epoch": 1.7460129464376877, "grad_norm": 0.5222696040881784, "learning_rate": 8.339157183392666e-06, "loss": 11.7805, "step": 32064 }, { "epoch": 1.7460674004342707, "grad_norm": 0.550740507352698, "learning_rate": 8.335632122039893e-06, "loss": 11.7638, "step": 32065 }, { "epoch": 1.7461218544308537, "grad_norm": 0.5574886696018079, "learning_rate": 8.332107773484633e-06, "loss": 11.7661, "step": 32066 }, { "epoch": 1.7461763084274367, "grad_norm": 0.6567091073380188, "learning_rate": 8.328584137754259e-06, "loss": 11.9116, "step": 32067 }, { "epoch": 1.7462307624240196, "grad_norm": 0.5897784852784564, "learning_rate": 8.325061214876195e-06, "loss": 11.6664, "step": 32068 }, { "epoch": 1.7462852164206026, "grad_norm": 0.5467779883960694, "learning_rate": 8.32153900487782e-06, "loss": 11.6983, "step": 32069 }, { "epoch": 1.7463396704171856, "grad_norm": 0.5084861598687963, "learning_rate": 8.318017507786535e-06, "loss": 11.6828, "step": 32070 }, { "epoch": 1.7463941244137686, "grad_norm": 0.5764218627558026, "learning_rate": 8.31449672362975e-06, "loss": 11.6875, "step": 32071 }, { "epoch": 1.7464485784103516, "grad_norm": 0.5343029023386447, "learning_rate": 8.310976652434776e-06, "loss": 11.8479, "step": 32072 }, { "epoch": 1.7465030324069346, "grad_norm": 0.5455757670649288, "learning_rate": 8.307457294229038e-06, "loss": 11.7445, "step": 32073 }, { "epoch": 1.7465574864035176, "grad_norm": 0.5637366874826736, "learning_rate": 8.303938649039888e-06, "loss": 11.5759, "step": 32074 }, { "epoch": 1.7466119404001006, "grad_norm": 0.541970138016841, "learning_rate": 8.300420716894686e-06, "loss": 11.8475, "step": 32075 }, { "epoch": 1.7466663943966836, "grad_norm": 0.568620399398183, "learning_rate": 8.296903497820808e-06, "loss": 11.8766, "step": 32076 }, { "epoch": 1.7467208483932668, "grad_norm": 0.5413186586308619, "learning_rate": 8.293386991845553e-06, "loss": 11.8918, "step": 32077 }, { "epoch": 1.7467753023898498, "grad_norm": 0.5654869300695888, "learning_rate": 8.289871198996334e-06, "loss": 11.8282, "step": 32078 }, { "epoch": 1.7468297563864328, "grad_norm": 0.599210642396545, "learning_rate": 8.286356119300432e-06, "loss": 11.8196, "step": 32079 }, { "epoch": 1.7468842103830158, "grad_norm": 0.5681529131139145, "learning_rate": 8.28284175278521e-06, "loss": 11.7322, "step": 32080 }, { "epoch": 1.7469386643795988, "grad_norm": 0.5764930737237418, "learning_rate": 8.279328099478023e-06, "loss": 11.8125, "step": 32081 }, { "epoch": 1.746993118376182, "grad_norm": 0.6045864425024923, "learning_rate": 8.27581515940612e-06, "loss": 11.7184, "step": 32082 }, { "epoch": 1.747047572372765, "grad_norm": 0.5662377367856796, "learning_rate": 8.272302932596888e-06, "loss": 11.9217, "step": 32083 }, { "epoch": 1.747102026369348, "grad_norm": 0.6057743589010771, "learning_rate": 8.268791419077592e-06, "loss": 11.8011, "step": 32084 }, { "epoch": 1.747156480365931, "grad_norm": 0.5613648064944241, "learning_rate": 8.265280618875559e-06, "loss": 11.8049, "step": 32085 }, { "epoch": 1.747210934362514, "grad_norm": 0.48816779325586207, "learning_rate": 8.261770532018098e-06, "loss": 11.6951, "step": 32086 }, { "epoch": 1.747265388359097, "grad_norm": 0.6015002130304541, "learning_rate": 8.258261158532487e-06, "loss": 11.8388, "step": 32087 }, { "epoch": 1.74731984235568, "grad_norm": 0.5379049160926599, "learning_rate": 8.254752498446028e-06, "loss": 11.8134, "step": 32088 }, { "epoch": 1.747374296352263, "grad_norm": 0.536591873539549, "learning_rate": 8.251244551785987e-06, "loss": 11.7769, "step": 32089 }, { "epoch": 1.747428750348846, "grad_norm": 0.5370086668121891, "learning_rate": 8.247737318579673e-06, "loss": 11.8478, "step": 32090 }, { "epoch": 1.747483204345429, "grad_norm": 0.5393090808096153, "learning_rate": 8.244230798854347e-06, "loss": 11.6601, "step": 32091 }, { "epoch": 1.747537658342012, "grad_norm": 0.6406910306153049, "learning_rate": 8.24072499263724e-06, "loss": 11.8621, "step": 32092 }, { "epoch": 1.747592112338595, "grad_norm": 0.5317494391056862, "learning_rate": 8.237219899955662e-06, "loss": 11.7992, "step": 32093 }, { "epoch": 1.747646566335178, "grad_norm": 0.5756798275330044, "learning_rate": 8.233715520836837e-06, "loss": 11.7773, "step": 32094 }, { "epoch": 1.747701020331761, "grad_norm": 0.5776534959828046, "learning_rate": 8.230211855308057e-06, "loss": 11.8673, "step": 32095 }, { "epoch": 1.7477554743283439, "grad_norm": 0.551942844959109, "learning_rate": 8.226708903396507e-06, "loss": 11.7723, "step": 32096 }, { "epoch": 1.7478099283249269, "grad_norm": 0.5698823976918336, "learning_rate": 8.223206665129468e-06, "loss": 11.649, "step": 32097 }, { "epoch": 1.7478643823215099, "grad_norm": 0.5585603105322928, "learning_rate": 8.219705140534173e-06, "loss": 11.837, "step": 32098 }, { "epoch": 1.7479188363180929, "grad_norm": 0.5582876530335303, "learning_rate": 8.216204329637834e-06, "loss": 11.7641, "step": 32099 }, { "epoch": 1.747973290314676, "grad_norm": 0.6105735570584604, "learning_rate": 8.212704232467694e-06, "loss": 11.7557, "step": 32100 }, { "epoch": 1.748027744311259, "grad_norm": 0.5067331004793603, "learning_rate": 8.209204849050944e-06, "loss": 11.7947, "step": 32101 }, { "epoch": 1.748082198307842, "grad_norm": 0.5115845141990338, "learning_rate": 8.205706179414829e-06, "loss": 11.7104, "step": 32102 }, { "epoch": 1.748136652304425, "grad_norm": 0.5998195876428078, "learning_rate": 8.202208223586538e-06, "loss": 11.8674, "step": 32103 }, { "epoch": 1.748191106301008, "grad_norm": 0.5431466281900563, "learning_rate": 8.198710981593249e-06, "loss": 11.8214, "step": 32104 }, { "epoch": 1.748245560297591, "grad_norm": 0.5681776478049104, "learning_rate": 8.195214453462196e-06, "loss": 11.7089, "step": 32105 }, { "epoch": 1.7483000142941743, "grad_norm": 0.5113870119080456, "learning_rate": 8.191718639220536e-06, "loss": 11.7082, "step": 32106 }, { "epoch": 1.7483544682907572, "grad_norm": 0.5602182821619474, "learning_rate": 8.188223538895456e-06, "loss": 11.7769, "step": 32107 }, { "epoch": 1.7484089222873402, "grad_norm": 0.5190456682225881, "learning_rate": 8.184729152514182e-06, "loss": 11.6851, "step": 32108 }, { "epoch": 1.7484633762839232, "grad_norm": 0.5102597510300653, "learning_rate": 8.181235480103822e-06, "loss": 11.7892, "step": 32109 }, { "epoch": 1.7485178302805062, "grad_norm": 0.5306065048933504, "learning_rate": 8.1777425216916e-06, "loss": 11.8607, "step": 32110 }, { "epoch": 1.7485722842770892, "grad_norm": 0.5225156835424806, "learning_rate": 8.174250277304628e-06, "loss": 11.7497, "step": 32111 }, { "epoch": 1.7486267382736722, "grad_norm": 0.5087099348069014, "learning_rate": 8.170758746970097e-06, "loss": 11.7138, "step": 32112 }, { "epoch": 1.7486811922702552, "grad_norm": 0.5325545484352462, "learning_rate": 8.167267930715161e-06, "loss": 11.6696, "step": 32113 }, { "epoch": 1.7487356462668382, "grad_norm": 0.5834730509714643, "learning_rate": 8.163777828566921e-06, "loss": 11.8353, "step": 32114 }, { "epoch": 1.7487901002634212, "grad_norm": 0.5603422351374289, "learning_rate": 8.160288440552565e-06, "loss": 11.6679, "step": 32115 }, { "epoch": 1.7488445542600042, "grad_norm": 0.509828969330385, "learning_rate": 8.156799766699186e-06, "loss": 11.6698, "step": 32116 }, { "epoch": 1.7488990082565872, "grad_norm": 0.5374525294611626, "learning_rate": 8.153311807033958e-06, "loss": 11.6937, "step": 32117 }, { "epoch": 1.7489534622531702, "grad_norm": 0.554043448104786, "learning_rate": 8.149824561583962e-06, "loss": 11.734, "step": 32118 }, { "epoch": 1.7490079162497532, "grad_norm": 0.5397036647979724, "learning_rate": 8.146338030376332e-06, "loss": 11.7771, "step": 32119 }, { "epoch": 1.7490623702463362, "grad_norm": 0.5295951749240384, "learning_rate": 8.14285221343819e-06, "loss": 11.7543, "step": 32120 }, { "epoch": 1.7491168242429191, "grad_norm": 0.5525421236400031, "learning_rate": 8.139367110796626e-06, "loss": 11.6253, "step": 32121 }, { "epoch": 1.7491712782395021, "grad_norm": 0.5072333533669457, "learning_rate": 8.135882722478772e-06, "loss": 11.7333, "step": 32122 }, { "epoch": 1.7492257322360854, "grad_norm": 0.4910920049852444, "learning_rate": 8.132399048511685e-06, "loss": 11.7351, "step": 32123 }, { "epoch": 1.7492801862326683, "grad_norm": 0.5801308156761865, "learning_rate": 8.128916088922467e-06, "loss": 11.6644, "step": 32124 }, { "epoch": 1.7493346402292513, "grad_norm": 0.6177879020362912, "learning_rate": 8.125433843738206e-06, "loss": 11.7532, "step": 32125 }, { "epoch": 1.7493890942258343, "grad_norm": 0.5496992994543061, "learning_rate": 8.12195231298597e-06, "loss": 11.8126, "step": 32126 }, { "epoch": 1.7494435482224173, "grad_norm": 0.5578115138425902, "learning_rate": 8.118471496692859e-06, "loss": 11.8202, "step": 32127 }, { "epoch": 1.7494980022190003, "grad_norm": 0.5027642151831847, "learning_rate": 8.114991394885908e-06, "loss": 11.6968, "step": 32128 }, { "epoch": 1.7495524562155835, "grad_norm": 0.51993964938592, "learning_rate": 8.111512007592181e-06, "loss": 11.7713, "step": 32129 }, { "epoch": 1.7496069102121665, "grad_norm": 0.5210930704701255, "learning_rate": 8.108033334838771e-06, "loss": 11.7823, "step": 32130 }, { "epoch": 1.7496613642087495, "grad_norm": 0.532312018316325, "learning_rate": 8.104555376652689e-06, "loss": 11.5907, "step": 32131 }, { "epoch": 1.7497158182053325, "grad_norm": 0.5969973555377257, "learning_rate": 8.101078133061025e-06, "loss": 11.8562, "step": 32132 }, { "epoch": 1.7497702722019155, "grad_norm": 0.5720486340891017, "learning_rate": 8.097601604090765e-06, "loss": 11.6376, "step": 32133 }, { "epoch": 1.7498247261984985, "grad_norm": 0.5672396675148712, "learning_rate": 8.094125789768959e-06, "loss": 11.8103, "step": 32134 }, { "epoch": 1.7498791801950815, "grad_norm": 0.5272982053436494, "learning_rate": 8.090650690122659e-06, "loss": 11.6549, "step": 32135 }, { "epoch": 1.7499336341916645, "grad_norm": 0.5139232150430204, "learning_rate": 8.087176305178856e-06, "loss": 11.7585, "step": 32136 }, { "epoch": 1.7499880881882475, "grad_norm": 0.5509708901767132, "learning_rate": 8.083702634964595e-06, "loss": 11.7857, "step": 32137 }, { "epoch": 1.7500425421848305, "grad_norm": 0.5744520180512087, "learning_rate": 8.080229679506868e-06, "loss": 11.7313, "step": 32138 }, { "epoch": 1.7500969961814135, "grad_norm": 0.5655000383945293, "learning_rate": 8.076757438832706e-06, "loss": 11.9291, "step": 32139 }, { "epoch": 1.7501514501779964, "grad_norm": 0.5862390754138455, "learning_rate": 8.073285912969075e-06, "loss": 11.8236, "step": 32140 }, { "epoch": 1.7502059041745794, "grad_norm": 0.5722237955712688, "learning_rate": 8.06981510194299e-06, "loss": 11.9508, "step": 32141 }, { "epoch": 1.7502603581711624, "grad_norm": 0.5175385689487015, "learning_rate": 8.066345005781473e-06, "loss": 11.7662, "step": 32142 }, { "epoch": 1.7503148121677454, "grad_norm": 0.5907442384318156, "learning_rate": 8.062875624511446e-06, "loss": 11.767, "step": 32143 }, { "epoch": 1.7503692661643284, "grad_norm": 0.5211366070284962, "learning_rate": 8.05940695815992e-06, "loss": 11.8284, "step": 32144 }, { "epoch": 1.7504237201609114, "grad_norm": 0.5708486403797793, "learning_rate": 8.055939006753855e-06, "loss": 11.8896, "step": 32145 }, { "epoch": 1.7504781741574946, "grad_norm": 0.5930456298382185, "learning_rate": 8.052471770320236e-06, "loss": 11.7559, "step": 32146 }, { "epoch": 1.7505326281540776, "grad_norm": 0.5351082534180633, "learning_rate": 8.049005248886021e-06, "loss": 11.7867, "step": 32147 }, { "epoch": 1.7505870821506606, "grad_norm": 0.5900071507189579, "learning_rate": 8.045539442478144e-06, "loss": 11.8933, "step": 32148 }, { "epoch": 1.7506415361472436, "grad_norm": 0.5572461041020155, "learning_rate": 8.042074351123596e-06, "loss": 11.9106, "step": 32149 }, { "epoch": 1.7506959901438266, "grad_norm": 0.6163914801817144, "learning_rate": 8.038609974849276e-06, "loss": 11.9965, "step": 32150 }, { "epoch": 1.7507504441404096, "grad_norm": 0.7564647393300263, "learning_rate": 8.035146313682173e-06, "loss": 11.8039, "step": 32151 }, { "epoch": 1.7508048981369928, "grad_norm": 0.48232305983174045, "learning_rate": 8.03168336764919e-06, "loss": 11.7294, "step": 32152 }, { "epoch": 1.7508593521335758, "grad_norm": 0.4935820149526705, "learning_rate": 8.028221136777237e-06, "loss": 11.8181, "step": 32153 }, { "epoch": 1.7509138061301588, "grad_norm": 0.5416017889021689, "learning_rate": 8.024759621093281e-06, "loss": 11.8525, "step": 32154 }, { "epoch": 1.7509682601267418, "grad_norm": 0.5129958786548087, "learning_rate": 8.021298820624212e-06, "loss": 11.7635, "step": 32155 }, { "epoch": 1.7510227141233248, "grad_norm": 0.5283839041939262, "learning_rate": 8.017838735396932e-06, "loss": 11.7942, "step": 32156 }, { "epoch": 1.7510771681199078, "grad_norm": 0.5214212576874, "learning_rate": 8.014379365438396e-06, "loss": 11.8274, "step": 32157 }, { "epoch": 1.7511316221164908, "grad_norm": 0.5444012761202118, "learning_rate": 8.010920710775437e-06, "loss": 11.7991, "step": 32158 }, { "epoch": 1.7511860761130738, "grad_norm": 0.5768476318510125, "learning_rate": 8.007462771435015e-06, "loss": 11.79, "step": 32159 }, { "epoch": 1.7512405301096567, "grad_norm": 0.5243840732132846, "learning_rate": 8.00400554744397e-06, "loss": 11.6581, "step": 32160 }, { "epoch": 1.7512949841062397, "grad_norm": 0.5432265632365408, "learning_rate": 8.00054903882922e-06, "loss": 11.8268, "step": 32161 }, { "epoch": 1.7513494381028227, "grad_norm": 0.49905626161452704, "learning_rate": 7.997093245617638e-06, "loss": 11.8295, "step": 32162 }, { "epoch": 1.7514038920994057, "grad_norm": 0.5509428819257189, "learning_rate": 7.99363816783606e-06, "loss": 11.8169, "step": 32163 }, { "epoch": 1.7514583460959887, "grad_norm": 0.5070706101941347, "learning_rate": 7.990183805511398e-06, "loss": 11.6974, "step": 32164 }, { "epoch": 1.7515128000925717, "grad_norm": 0.5655243994198613, "learning_rate": 7.986730158670485e-06, "loss": 11.8078, "step": 32165 }, { "epoch": 1.7515672540891547, "grad_norm": 0.6123305922211647, "learning_rate": 7.983277227340203e-06, "loss": 11.9129, "step": 32166 }, { "epoch": 1.7516217080857377, "grad_norm": 0.538501649298484, "learning_rate": 7.979825011547381e-06, "loss": 11.8663, "step": 32167 }, { "epoch": 1.7516761620823207, "grad_norm": 0.5830568664258794, "learning_rate": 7.976373511318857e-06, "loss": 11.7276, "step": 32168 }, { "epoch": 1.7517306160789037, "grad_norm": 0.5367117301081419, "learning_rate": 7.972922726681508e-06, "loss": 11.7983, "step": 32169 }, { "epoch": 1.751785070075487, "grad_norm": 0.528729286250279, "learning_rate": 7.969472657662136e-06, "loss": 11.6916, "step": 32170 }, { "epoch": 1.7518395240720699, "grad_norm": 0.48942031309086037, "learning_rate": 7.966023304287585e-06, "loss": 11.7504, "step": 32171 }, { "epoch": 1.7518939780686529, "grad_norm": 0.5260973860763183, "learning_rate": 7.962574666584676e-06, "loss": 11.8194, "step": 32172 }, { "epoch": 1.7519484320652359, "grad_norm": 0.6013993876569956, "learning_rate": 7.959126744580203e-06, "loss": 11.9182, "step": 32173 }, { "epoch": 1.7520028860618189, "grad_norm": 0.5291298509488411, "learning_rate": 7.955679538301008e-06, "loss": 11.6776, "step": 32174 }, { "epoch": 1.7520573400584019, "grad_norm": 0.5724929330919186, "learning_rate": 7.952233047773871e-06, "loss": 11.6852, "step": 32175 }, { "epoch": 1.752111794054985, "grad_norm": 0.5458027967153894, "learning_rate": 7.948787273025626e-06, "loss": 11.8398, "step": 32176 }, { "epoch": 1.752166248051568, "grad_norm": 0.5948731986463153, "learning_rate": 7.945342214083029e-06, "loss": 11.8019, "step": 32177 }, { "epoch": 1.752220702048151, "grad_norm": 0.5175419612460501, "learning_rate": 7.941897870972881e-06, "loss": 11.8234, "step": 32178 }, { "epoch": 1.752275156044734, "grad_norm": 0.5192168255454845, "learning_rate": 7.938454243722004e-06, "loss": 11.7962, "step": 32179 }, { "epoch": 1.752329610041317, "grad_norm": 0.5530401944175701, "learning_rate": 7.935011332357112e-06, "loss": 11.8509, "step": 32180 }, { "epoch": 1.7523840640379, "grad_norm": 0.5483546100052389, "learning_rate": 7.931569136905048e-06, "loss": 11.8307, "step": 32181 }, { "epoch": 1.752438518034483, "grad_norm": 0.5308029554244464, "learning_rate": 7.928127657392526e-06, "loss": 11.7503, "step": 32182 }, { "epoch": 1.752492972031066, "grad_norm": 0.6463200199516159, "learning_rate": 7.92468689384631e-06, "loss": 11.9085, "step": 32183 }, { "epoch": 1.752547426027649, "grad_norm": 0.6052874888046295, "learning_rate": 7.921246846293195e-06, "loss": 11.8176, "step": 32184 }, { "epoch": 1.752601880024232, "grad_norm": 0.5277665412074546, "learning_rate": 7.917807514759879e-06, "loss": 11.7246, "step": 32185 }, { "epoch": 1.752656334020815, "grad_norm": 0.5577879226699427, "learning_rate": 7.914368899273161e-06, "loss": 11.8003, "step": 32186 }, { "epoch": 1.752710788017398, "grad_norm": 0.5877162949416089, "learning_rate": 7.910930999859734e-06, "loss": 11.836, "step": 32187 }, { "epoch": 1.752765242013981, "grad_norm": 0.5768428727662831, "learning_rate": 7.907493816546362e-06, "loss": 11.7091, "step": 32188 }, { "epoch": 1.752819696010564, "grad_norm": 0.5081975865111498, "learning_rate": 7.90405734935975e-06, "loss": 11.7753, "step": 32189 }, { "epoch": 1.752874150007147, "grad_norm": 0.5409616280016912, "learning_rate": 7.90062159832663e-06, "loss": 11.8314, "step": 32190 }, { "epoch": 1.75292860400373, "grad_norm": 0.6280703923892754, "learning_rate": 7.897186563473735e-06, "loss": 11.8578, "step": 32191 }, { "epoch": 1.752983058000313, "grad_norm": 0.5890986189124914, "learning_rate": 7.893752244827768e-06, "loss": 11.787, "step": 32192 }, { "epoch": 1.7530375119968962, "grad_norm": 0.5070755262227107, "learning_rate": 7.89031864241543e-06, "loss": 11.7177, "step": 32193 }, { "epoch": 1.7530919659934792, "grad_norm": 0.49700882147831205, "learning_rate": 7.886885756263407e-06, "loss": 11.8008, "step": 32194 }, { "epoch": 1.7531464199900622, "grad_norm": 0.5594109306667665, "learning_rate": 7.883453586398404e-06, "loss": 11.7856, "step": 32195 }, { "epoch": 1.7532008739866451, "grad_norm": 0.5707705400748605, "learning_rate": 7.88002213284712e-06, "loss": 11.7651, "step": 32196 }, { "epoch": 1.7532553279832281, "grad_norm": 0.52791498707753, "learning_rate": 7.876591395636234e-06, "loss": 11.799, "step": 32197 }, { "epoch": 1.7533097819798111, "grad_norm": 0.5885877943751425, "learning_rate": 7.873161374792426e-06, "loss": 11.6264, "step": 32198 }, { "epoch": 1.7533642359763943, "grad_norm": 0.5577006781823926, "learning_rate": 7.86973207034235e-06, "loss": 11.71, "step": 32199 }, { "epoch": 1.7534186899729773, "grad_norm": 0.5008494017413169, "learning_rate": 7.86630348231271e-06, "loss": 11.7965, "step": 32200 }, { "epoch": 1.7534731439695603, "grad_norm": 0.5661881923445657, "learning_rate": 7.862875610730125e-06, "loss": 11.884, "step": 32201 }, { "epoch": 1.7535275979661433, "grad_norm": 0.6334994442026655, "learning_rate": 7.859448455621288e-06, "loss": 11.8528, "step": 32202 }, { "epoch": 1.7535820519627263, "grad_norm": 0.5008431638330966, "learning_rate": 7.856022017012832e-06, "loss": 11.7394, "step": 32203 }, { "epoch": 1.7536365059593093, "grad_norm": 0.5261220463483217, "learning_rate": 7.852596294931391e-06, "loss": 11.7669, "step": 32204 }, { "epoch": 1.7536909599558923, "grad_norm": 0.5660318987309142, "learning_rate": 7.8491712894036e-06, "loss": 11.7858, "step": 32205 }, { "epoch": 1.7537454139524753, "grad_norm": 0.5383905058425613, "learning_rate": 7.845747000456138e-06, "loss": 11.7916, "step": 32206 }, { "epoch": 1.7537998679490583, "grad_norm": 0.5486025497378929, "learning_rate": 7.842323428115572e-06, "loss": 11.8162, "step": 32207 }, { "epoch": 1.7538543219456413, "grad_norm": 0.5545640399730635, "learning_rate": 7.838900572408581e-06, "loss": 11.812, "step": 32208 }, { "epoch": 1.7539087759422243, "grad_norm": 0.5135894571875912, "learning_rate": 7.835478433361732e-06, "loss": 11.7503, "step": 32209 }, { "epoch": 1.7539632299388073, "grad_norm": 0.5727064257373675, "learning_rate": 7.83205701100168e-06, "loss": 11.7242, "step": 32210 }, { "epoch": 1.7540176839353903, "grad_norm": 0.48632272167826107, "learning_rate": 7.828636305354986e-06, "loss": 11.8623, "step": 32211 }, { "epoch": 1.7540721379319733, "grad_norm": 0.5449425851250413, "learning_rate": 7.825216316448292e-06, "loss": 11.7668, "step": 32212 }, { "epoch": 1.7541265919285562, "grad_norm": 0.5409883419744331, "learning_rate": 7.821797044308177e-06, "loss": 11.8818, "step": 32213 }, { "epoch": 1.7541810459251392, "grad_norm": 0.5330606175454291, "learning_rate": 7.818378488961208e-06, "loss": 11.8365, "step": 32214 }, { "epoch": 1.7542354999217222, "grad_norm": 0.5497375834604316, "learning_rate": 7.814960650434011e-06, "loss": 11.8318, "step": 32215 }, { "epoch": 1.7542899539183054, "grad_norm": 0.6446595210524717, "learning_rate": 7.811543528753106e-06, "loss": 11.8336, "step": 32216 }, { "epoch": 1.7543444079148884, "grad_norm": 0.5698699944627266, "learning_rate": 7.808127123945108e-06, "loss": 11.6559, "step": 32217 }, { "epoch": 1.7543988619114714, "grad_norm": 0.522611867715547, "learning_rate": 7.804711436036593e-06, "loss": 11.7201, "step": 32218 }, { "epoch": 1.7544533159080544, "grad_norm": 0.5714231990359632, "learning_rate": 7.801296465054087e-06, "loss": 11.8032, "step": 32219 }, { "epoch": 1.7545077699046374, "grad_norm": 0.540221801484937, "learning_rate": 7.797882211024177e-06, "loss": 11.8662, "step": 32220 }, { "epoch": 1.7545622239012204, "grad_norm": 0.5585542781002193, "learning_rate": 7.794468673973376e-06, "loss": 11.6747, "step": 32221 }, { "epoch": 1.7546166778978036, "grad_norm": 0.5320363297074058, "learning_rate": 7.791055853928264e-06, "loss": 11.7608, "step": 32222 }, { "epoch": 1.7546711318943866, "grad_norm": 0.5462074931922277, "learning_rate": 7.787643750915374e-06, "loss": 11.559, "step": 32223 }, { "epoch": 1.7547255858909696, "grad_norm": 0.535820239283221, "learning_rate": 7.784232364961208e-06, "loss": 11.7968, "step": 32224 }, { "epoch": 1.7547800398875526, "grad_norm": 0.5547533369282873, "learning_rate": 7.780821696092333e-06, "loss": 11.877, "step": 32225 }, { "epoch": 1.7548344938841356, "grad_norm": 0.6763425151967221, "learning_rate": 7.777411744335238e-06, "loss": 11.8638, "step": 32226 }, { "epoch": 1.7548889478807186, "grad_norm": 0.5207995028623225, "learning_rate": 7.77400250971645e-06, "loss": 11.7608, "step": 32227 }, { "epoch": 1.7549434018773016, "grad_norm": 0.5806136414228242, "learning_rate": 7.77059399226251e-06, "loss": 11.7124, "step": 32228 }, { "epoch": 1.7549978558738846, "grad_norm": 0.5977322860435186, "learning_rate": 7.767186191999876e-06, "loss": 11.6959, "step": 32229 }, { "epoch": 1.7550523098704676, "grad_norm": 0.5416504793801568, "learning_rate": 7.763779108955094e-06, "loss": 11.7832, "step": 32230 }, { "epoch": 1.7551067638670506, "grad_norm": 0.567089416905431, "learning_rate": 7.76037274315461e-06, "loss": 11.8185, "step": 32231 }, { "epoch": 1.7551612178636335, "grad_norm": 0.5591491809986737, "learning_rate": 7.756967094624946e-06, "loss": 11.7368, "step": 32232 }, { "epoch": 1.7552156718602165, "grad_norm": 0.5325861996181607, "learning_rate": 7.753562163392592e-06, "loss": 11.7578, "step": 32233 }, { "epoch": 1.7552701258567995, "grad_norm": 0.5506910665304571, "learning_rate": 7.750157949483983e-06, "loss": 11.8254, "step": 32234 }, { "epoch": 1.7553245798533825, "grad_norm": 0.5056868603709823, "learning_rate": 7.746754452925631e-06, "loss": 11.7153, "step": 32235 }, { "epoch": 1.7553790338499655, "grad_norm": 0.5344924082242175, "learning_rate": 7.74335167374397e-06, "loss": 11.7105, "step": 32236 }, { "epoch": 1.7554334878465485, "grad_norm": 0.5599740681400854, "learning_rate": 7.739949611965491e-06, "loss": 11.8362, "step": 32237 }, { "epoch": 1.7554879418431315, "grad_norm": 0.5271103339849061, "learning_rate": 7.736548267616628e-06, "loss": 11.5224, "step": 32238 }, { "epoch": 1.7555423958397145, "grad_norm": 0.5685243285199002, "learning_rate": 7.733147640723837e-06, "loss": 11.946, "step": 32239 }, { "epoch": 1.7555968498362977, "grad_norm": 0.5300780559426779, "learning_rate": 7.729747731313574e-06, "loss": 11.7598, "step": 32240 }, { "epoch": 1.7556513038328807, "grad_norm": 0.5029465535898692, "learning_rate": 7.726348539412254e-06, "loss": 11.7824, "step": 32241 }, { "epoch": 1.7557057578294637, "grad_norm": 0.5929412511542949, "learning_rate": 7.72295006504633e-06, "loss": 11.836, "step": 32242 }, { "epoch": 1.7557602118260467, "grad_norm": 0.565940440276524, "learning_rate": 7.719552308242239e-06, "loss": 11.7975, "step": 32243 }, { "epoch": 1.7558146658226297, "grad_norm": 0.5574182071318897, "learning_rate": 7.716155269026349e-06, "loss": 11.9, "step": 32244 }, { "epoch": 1.7558691198192127, "grad_norm": 0.49909450072414313, "learning_rate": 7.712758947425147e-06, "loss": 11.7809, "step": 32245 }, { "epoch": 1.755923573815796, "grad_norm": 0.5235263892340182, "learning_rate": 7.709363343464982e-06, "loss": 11.7689, "step": 32246 }, { "epoch": 1.7559780278123789, "grad_norm": 0.5664352797917843, "learning_rate": 7.705968457172297e-06, "loss": 11.7751, "step": 32247 }, { "epoch": 1.7560324818089619, "grad_norm": 0.5830060694994326, "learning_rate": 7.702574288573461e-06, "loss": 11.8712, "step": 32248 }, { "epoch": 1.7560869358055449, "grad_norm": 0.5566821559668119, "learning_rate": 7.699180837694908e-06, "loss": 11.8874, "step": 32249 }, { "epoch": 1.7561413898021279, "grad_norm": 0.5491128300321498, "learning_rate": 7.695788104562984e-06, "loss": 11.9275, "step": 32250 }, { "epoch": 1.7561958437987109, "grad_norm": 0.5680822424895551, "learning_rate": 7.692396089204101e-06, "loss": 11.8738, "step": 32251 }, { "epoch": 1.7562502977952938, "grad_norm": 0.6692234037475192, "learning_rate": 7.68900479164464e-06, "loss": 11.7829, "step": 32252 }, { "epoch": 1.7563047517918768, "grad_norm": 0.5513177088985287, "learning_rate": 7.685614211910951e-06, "loss": 11.7933, "step": 32253 }, { "epoch": 1.7563592057884598, "grad_norm": 0.6010922202352793, "learning_rate": 7.682224350029387e-06, "loss": 11.7873, "step": 32254 }, { "epoch": 1.7564136597850428, "grad_norm": 0.5554814044252768, "learning_rate": 7.678835206026357e-06, "loss": 11.7286, "step": 32255 }, { "epoch": 1.7564681137816258, "grad_norm": 0.5534693096387924, "learning_rate": 7.675446779928163e-06, "loss": 11.8511, "step": 32256 }, { "epoch": 1.7565225677782088, "grad_norm": 0.5657564163925257, "learning_rate": 7.672059071761196e-06, "loss": 11.8573, "step": 32257 }, { "epoch": 1.7565770217747918, "grad_norm": 0.5257295790399134, "learning_rate": 7.668672081551765e-06, "loss": 11.8729, "step": 32258 }, { "epoch": 1.7566314757713748, "grad_norm": 0.5803789243855803, "learning_rate": 7.665285809326239e-06, "loss": 11.8916, "step": 32259 }, { "epoch": 1.7566859297679578, "grad_norm": 0.6112079479631757, "learning_rate": 7.66190025511091e-06, "loss": 11.8393, "step": 32260 }, { "epoch": 1.7567403837645408, "grad_norm": 0.5415121407551673, "learning_rate": 7.658515418932144e-06, "loss": 11.868, "step": 32261 }, { "epoch": 1.7567948377611238, "grad_norm": 0.5504172781674589, "learning_rate": 7.655131300816254e-06, "loss": 11.7321, "step": 32262 }, { "epoch": 1.756849291757707, "grad_norm": 0.5419586328626931, "learning_rate": 7.651747900789551e-06, "loss": 11.7968, "step": 32263 }, { "epoch": 1.75690374575429, "grad_norm": 0.49878985276194937, "learning_rate": 7.64836521887835e-06, "loss": 11.8255, "step": 32264 }, { "epoch": 1.756958199750873, "grad_norm": 0.5384417709443856, "learning_rate": 7.644983255108928e-06, "loss": 11.6968, "step": 32265 }, { "epoch": 1.757012653747456, "grad_norm": 0.5099488674337725, "learning_rate": 7.64160200950761e-06, "loss": 11.6918, "step": 32266 }, { "epoch": 1.757067107744039, "grad_norm": 0.5744231889499453, "learning_rate": 7.638221482100694e-06, "loss": 11.7467, "step": 32267 }, { "epoch": 1.757121561740622, "grad_norm": 0.6547415147829072, "learning_rate": 7.63484167291444e-06, "loss": 11.7437, "step": 32268 }, { "epoch": 1.7571760157372052, "grad_norm": 0.5257627177962527, "learning_rate": 7.631462581975158e-06, "loss": 11.6708, "step": 32269 }, { "epoch": 1.7572304697337882, "grad_norm": 0.5846229757023956, "learning_rate": 7.628084209309106e-06, "loss": 11.8139, "step": 32270 }, { "epoch": 1.7572849237303712, "grad_norm": 0.5961961171614157, "learning_rate": 7.624706554942573e-06, "loss": 11.8151, "step": 32271 }, { "epoch": 1.7573393777269541, "grad_norm": 0.5271634897816913, "learning_rate": 7.621329618901796e-06, "loss": 11.8484, "step": 32272 }, { "epoch": 1.7573938317235371, "grad_norm": 0.490433808307365, "learning_rate": 7.617953401213074e-06, "loss": 11.7982, "step": 32273 }, { "epoch": 1.7574482857201201, "grad_norm": 0.5442882276878024, "learning_rate": 7.614577901902631e-06, "loss": 11.7319, "step": 32274 }, { "epoch": 1.7575027397167031, "grad_norm": 0.5778531847508808, "learning_rate": 7.6112031209967125e-06, "loss": 11.8007, "step": 32275 }, { "epoch": 1.7575571937132861, "grad_norm": 0.494578925015559, "learning_rate": 7.607829058521576e-06, "loss": 11.7508, "step": 32276 }, { "epoch": 1.757611647709869, "grad_norm": 0.531911092636844, "learning_rate": 7.604455714503467e-06, "loss": 11.7154, "step": 32277 }, { "epoch": 1.757666101706452, "grad_norm": 0.5440454427937312, "learning_rate": 7.601083088968586e-06, "loss": 11.8183, "step": 32278 }, { "epoch": 1.757720555703035, "grad_norm": 0.4721846534624302, "learning_rate": 7.59771118194319e-06, "loss": 11.5942, "step": 32279 }, { "epoch": 1.757775009699618, "grad_norm": 0.5711021111911527, "learning_rate": 7.594339993453481e-06, "loss": 11.7782, "step": 32280 }, { "epoch": 1.757829463696201, "grad_norm": 0.5456321476474515, "learning_rate": 7.590969523525693e-06, "loss": 11.8084, "step": 32281 }, { "epoch": 1.757883917692784, "grad_norm": 0.6153892555574109, "learning_rate": 7.587599772186005e-06, "loss": 11.7252, "step": 32282 }, { "epoch": 1.757938371689367, "grad_norm": 0.5864348175280548, "learning_rate": 7.584230739460663e-06, "loss": 11.8638, "step": 32283 }, { "epoch": 1.75799282568595, "grad_norm": 0.5530913718208338, "learning_rate": 7.5808624253758345e-06, "loss": 11.7893, "step": 32284 }, { "epoch": 1.758047279682533, "grad_norm": 0.5258033004590266, "learning_rate": 7.577494829957698e-06, "loss": 11.8255, "step": 32285 }, { "epoch": 1.7581017336791163, "grad_norm": 0.5209343895028691, "learning_rate": 7.57412795323249e-06, "loss": 11.7554, "step": 32286 }, { "epoch": 1.7581561876756993, "grad_norm": 0.6293390912174309, "learning_rate": 7.570761795226333e-06, "loss": 11.704, "step": 32287 }, { "epoch": 1.7582106416722822, "grad_norm": 0.5319007120541434, "learning_rate": 7.567396355965439e-06, "loss": 11.8324, "step": 32288 }, { "epoch": 1.7582650956688652, "grad_norm": 0.580508535233131, "learning_rate": 7.564031635475988e-06, "loss": 11.9045, "step": 32289 }, { "epoch": 1.7583195496654482, "grad_norm": 0.5068231410167017, "learning_rate": 7.560667633784113e-06, "loss": 11.7702, "step": 32290 }, { "epoch": 1.7583740036620312, "grad_norm": 0.5250683827358754, "learning_rate": 7.5573043509160055e-06, "loss": 11.7798, "step": 32291 }, { "epoch": 1.7584284576586144, "grad_norm": 0.5100140078716555, "learning_rate": 7.553941786897778e-06, "loss": 11.7662, "step": 32292 }, { "epoch": 1.7584829116551974, "grad_norm": 0.5553137085994898, "learning_rate": 7.5505799417556315e-06, "loss": 11.6263, "step": 32293 }, { "epoch": 1.7585373656517804, "grad_norm": 0.49266713690707487, "learning_rate": 7.5472188155156666e-06, "loss": 11.682, "step": 32294 }, { "epoch": 1.7585918196483634, "grad_norm": 0.6093620570986684, "learning_rate": 7.54385840820403e-06, "loss": 11.726, "step": 32295 }, { "epoch": 1.7586462736449464, "grad_norm": 0.5638044436141385, "learning_rate": 7.540498719846856e-06, "loss": 11.7721, "step": 32296 }, { "epoch": 1.7587007276415294, "grad_norm": 0.5268638154805048, "learning_rate": 7.537139750470268e-06, "loss": 11.7714, "step": 32297 }, { "epoch": 1.7587551816381124, "grad_norm": 0.5547729103595103, "learning_rate": 7.5337815001003895e-06, "loss": 11.6223, "step": 32298 }, { "epoch": 1.7588096356346954, "grad_norm": 0.5643332879019087, "learning_rate": 7.530423968763323e-06, "loss": 11.7611, "step": 32299 }, { "epoch": 1.7588640896312784, "grad_norm": 0.5092644379332668, "learning_rate": 7.52706715648519e-06, "loss": 11.7484, "step": 32300 }, { "epoch": 1.7589185436278614, "grad_norm": 0.6123002738172678, "learning_rate": 7.523711063292105e-06, "loss": 12.0026, "step": 32301 }, { "epoch": 1.7589729976244444, "grad_norm": 0.5364123042148184, "learning_rate": 7.520355689210134e-06, "loss": 11.615, "step": 32302 }, { "epoch": 1.7590274516210274, "grad_norm": 0.5853044926867247, "learning_rate": 7.517001034265403e-06, "loss": 11.8361, "step": 32303 }, { "epoch": 1.7590819056176104, "grad_norm": 0.5177055192904207, "learning_rate": 7.513647098483978e-06, "loss": 11.8876, "step": 32304 }, { "epoch": 1.7591363596141933, "grad_norm": 0.709516664642074, "learning_rate": 7.5102938818919274e-06, "loss": 11.888, "step": 32305 }, { "epoch": 1.7591908136107763, "grad_norm": 0.5587832619399481, "learning_rate": 7.506941384515365e-06, "loss": 11.7161, "step": 32306 }, { "epoch": 1.7592452676073593, "grad_norm": 0.5509328295572602, "learning_rate": 7.503589606380312e-06, "loss": 11.8065, "step": 32307 }, { "epoch": 1.7592997216039423, "grad_norm": 0.5763956979013817, "learning_rate": 7.5002385475128835e-06, "loss": 11.7853, "step": 32308 }, { "epoch": 1.7593541756005253, "grad_norm": 0.5897447799682455, "learning_rate": 7.4968882079390905e-06, "loss": 11.8323, "step": 32309 }, { "epoch": 1.7594086295971085, "grad_norm": 0.5614212276382561, "learning_rate": 7.4935385876850114e-06, "loss": 11.8205, "step": 32310 }, { "epoch": 1.7594630835936915, "grad_norm": 0.5580453990328673, "learning_rate": 7.4901896867766944e-06, "loss": 11.8253, "step": 32311 }, { "epoch": 1.7595175375902745, "grad_norm": 0.5768845641338056, "learning_rate": 7.486841505240172e-06, "loss": 11.8223, "step": 32312 }, { "epoch": 1.7595719915868575, "grad_norm": 0.5870514910026379, "learning_rate": 7.483494043101514e-06, "loss": 11.848, "step": 32313 }, { "epoch": 1.7596264455834405, "grad_norm": 0.5314168646702158, "learning_rate": 7.4801473003866864e-06, "loss": 11.86, "step": 32314 }, { "epoch": 1.7596808995800237, "grad_norm": 0.620104276143737, "learning_rate": 7.476801277121748e-06, "loss": 11.8212, "step": 32315 }, { "epoch": 1.7597353535766067, "grad_norm": 0.5435303021810113, "learning_rate": 7.473455973332743e-06, "loss": 11.7835, "step": 32316 }, { "epoch": 1.7597898075731897, "grad_norm": 0.5963718671984589, "learning_rate": 7.470111389045642e-06, "loss": 11.7939, "step": 32317 }, { "epoch": 1.7598442615697727, "grad_norm": 0.5304119456924522, "learning_rate": 7.466767524286477e-06, "loss": 11.8243, "step": 32318 }, { "epoch": 1.7598987155663557, "grad_norm": 0.553422404781632, "learning_rate": 7.463424379081241e-06, "loss": 11.9248, "step": 32319 }, { "epoch": 1.7599531695629387, "grad_norm": 0.5249786498460067, "learning_rate": 7.460081953455955e-06, "loss": 11.8219, "step": 32320 }, { "epoch": 1.7600076235595217, "grad_norm": 0.5083800382337074, "learning_rate": 7.456740247436567e-06, "loss": 11.8487, "step": 32321 }, { "epoch": 1.7600620775561047, "grad_norm": 0.5440220910278729, "learning_rate": 7.4533992610490875e-06, "loss": 11.7141, "step": 32322 }, { "epoch": 1.7601165315526877, "grad_norm": 0.48060071553130085, "learning_rate": 7.450058994319531e-06, "loss": 11.7604, "step": 32323 }, { "epoch": 1.7601709855492707, "grad_norm": 0.5719979749730558, "learning_rate": 7.446719447273798e-06, "loss": 11.8195, "step": 32324 }, { "epoch": 1.7602254395458536, "grad_norm": 0.535382825884724, "learning_rate": 7.443380619937901e-06, "loss": 11.7395, "step": 32325 }, { "epoch": 1.7602798935424366, "grad_norm": 0.6219871745107087, "learning_rate": 7.440042512337808e-06, "loss": 11.9031, "step": 32326 }, { "epoch": 1.7603343475390196, "grad_norm": 0.5571690019897455, "learning_rate": 7.4367051244994545e-06, "loss": 11.8527, "step": 32327 }, { "epoch": 1.7603888015356026, "grad_norm": 0.5644400894009494, "learning_rate": 7.43336845644882e-06, "loss": 11.9148, "step": 32328 }, { "epoch": 1.7604432555321856, "grad_norm": 0.5039294177055477, "learning_rate": 7.430032508211826e-06, "loss": 11.8055, "step": 32329 }, { "epoch": 1.7604977095287686, "grad_norm": 0.5505615147334901, "learning_rate": 7.426697279814432e-06, "loss": 11.7359, "step": 32330 }, { "epoch": 1.7605521635253516, "grad_norm": 0.5532087565581637, "learning_rate": 7.4233627712825495e-06, "loss": 11.7698, "step": 32331 }, { "epoch": 1.7606066175219346, "grad_norm": 0.5392862202807782, "learning_rate": 7.420028982642124e-06, "loss": 11.7708, "step": 32332 }, { "epoch": 1.7606610715185178, "grad_norm": 0.5398926937844774, "learning_rate": 7.416695913919114e-06, "loss": 11.8584, "step": 32333 }, { "epoch": 1.7607155255151008, "grad_norm": 0.5723571536725904, "learning_rate": 7.413363565139364e-06, "loss": 11.8024, "step": 32334 }, { "epoch": 1.7607699795116838, "grad_norm": 0.5295461305273266, "learning_rate": 7.410031936328854e-06, "loss": 11.6689, "step": 32335 }, { "epoch": 1.7608244335082668, "grad_norm": 0.5634649371573618, "learning_rate": 7.40670102751343e-06, "loss": 11.6344, "step": 32336 }, { "epoch": 1.7608788875048498, "grad_norm": 0.5427299651954265, "learning_rate": 7.403370838719037e-06, "loss": 11.6804, "step": 32337 }, { "epoch": 1.7609333415014328, "grad_norm": 0.5157469887731284, "learning_rate": 7.400041369971577e-06, "loss": 11.682, "step": 32338 }, { "epoch": 1.760987795498016, "grad_norm": 0.6181380236442666, "learning_rate": 7.3967126212969085e-06, "loss": 11.8238, "step": 32339 }, { "epoch": 1.761042249494599, "grad_norm": 0.5783597891575023, "learning_rate": 7.393384592720942e-06, "loss": 11.7492, "step": 32340 }, { "epoch": 1.761096703491182, "grad_norm": 0.5322414404189337, "learning_rate": 7.390057284269536e-06, "loss": 11.8629, "step": 32341 }, { "epoch": 1.761151157487765, "grad_norm": 0.5611333027155997, "learning_rate": 7.3867306959685915e-06, "loss": 11.8159, "step": 32342 }, { "epoch": 1.761205611484348, "grad_norm": 0.5706571472987123, "learning_rate": 7.383404827843965e-06, "loss": 11.838, "step": 32343 }, { "epoch": 1.761260065480931, "grad_norm": 0.6170261038711772, "learning_rate": 7.380079679921493e-06, "loss": 11.876, "step": 32344 }, { "epoch": 1.761314519477514, "grad_norm": 0.5252955293287406, "learning_rate": 7.376755252227063e-06, "loss": 11.843, "step": 32345 }, { "epoch": 1.761368973474097, "grad_norm": 0.5390535813073829, "learning_rate": 7.373431544786513e-06, "loss": 11.7998, "step": 32346 }, { "epoch": 1.76142342747068, "grad_norm": 0.537408796287597, "learning_rate": 7.3701085576256986e-06, "loss": 11.8537, "step": 32347 }, { "epoch": 1.761477881467263, "grad_norm": 0.5563103088766296, "learning_rate": 7.366786290770445e-06, "loss": 11.7829, "step": 32348 }, { "epoch": 1.761532335463846, "grad_norm": 0.5364377866093762, "learning_rate": 7.363464744246596e-06, "loss": 11.8085, "step": 32349 }, { "epoch": 1.761586789460429, "grad_norm": 0.5351507697297039, "learning_rate": 7.360143918079987e-06, "loss": 11.8374, "step": 32350 }, { "epoch": 1.761641243457012, "grad_norm": 0.5204197436056024, "learning_rate": 7.356823812296432e-06, "loss": 11.8803, "step": 32351 }, { "epoch": 1.7616956974535949, "grad_norm": 0.5480532340536863, "learning_rate": 7.353504426921754e-06, "loss": 11.7533, "step": 32352 }, { "epoch": 1.7617501514501779, "grad_norm": 0.5295869301956416, "learning_rate": 7.350185761981776e-06, "loss": 11.8076, "step": 32353 }, { "epoch": 1.7618046054467609, "grad_norm": 0.4870461952741857, "learning_rate": 7.346867817502267e-06, "loss": 11.8195, "step": 32354 }, { "epoch": 1.7618590594433439, "grad_norm": 0.5677620771240898, "learning_rate": 7.3435505935090635e-06, "loss": 11.7581, "step": 32355 }, { "epoch": 1.761913513439927, "grad_norm": 0.5574819696135239, "learning_rate": 7.340234090027942e-06, "loss": 11.6624, "step": 32356 }, { "epoch": 1.76196796743651, "grad_norm": 0.590936554342043, "learning_rate": 7.336918307084717e-06, "loss": 11.7542, "step": 32357 }, { "epoch": 1.762022421433093, "grad_norm": 0.5000063304337666, "learning_rate": 7.333603244705134e-06, "loss": 11.8136, "step": 32358 }, { "epoch": 1.762076875429676, "grad_norm": 0.5676488735060264, "learning_rate": 7.330288902914983e-06, "loss": 11.8305, "step": 32359 }, { "epoch": 1.762131329426259, "grad_norm": 0.6944711710574692, "learning_rate": 7.326975281740078e-06, "loss": 11.6905, "step": 32360 }, { "epoch": 1.762185783422842, "grad_norm": 0.5067506588848002, "learning_rate": 7.323662381206131e-06, "loss": 11.891, "step": 32361 }, { "epoch": 1.7622402374194253, "grad_norm": 0.5903404600956154, "learning_rate": 7.320350201338944e-06, "loss": 11.7509, "step": 32362 }, { "epoch": 1.7622946914160083, "grad_norm": 0.6223132012732371, "learning_rate": 7.31703874216425e-06, "loss": 11.8712, "step": 32363 }, { "epoch": 1.7623491454125912, "grad_norm": 0.5699465982485366, "learning_rate": 7.3137280037077985e-06, "loss": 11.7497, "step": 32364 }, { "epoch": 1.7624035994091742, "grad_norm": 0.5382094454685953, "learning_rate": 7.310417985995355e-06, "loss": 11.7891, "step": 32365 }, { "epoch": 1.7624580534057572, "grad_norm": 0.9520201525640292, "learning_rate": 7.307108689052633e-06, "loss": 11.7321, "step": 32366 }, { "epoch": 1.7625125074023402, "grad_norm": 0.6391005015733988, "learning_rate": 7.30380011290539e-06, "loss": 11.9134, "step": 32367 }, { "epoch": 1.7625669613989232, "grad_norm": 0.581569098823044, "learning_rate": 7.300492257579327e-06, "loss": 11.9759, "step": 32368 }, { "epoch": 1.7626214153955062, "grad_norm": 0.5519937761675575, "learning_rate": 7.297185123100192e-06, "loss": 11.9339, "step": 32369 }, { "epoch": 1.7626758693920892, "grad_norm": 0.5469487552409232, "learning_rate": 7.293878709493684e-06, "loss": 11.7986, "step": 32370 }, { "epoch": 1.7627303233886722, "grad_norm": 0.5260441176529977, "learning_rate": 7.290573016785518e-06, "loss": 11.6944, "step": 32371 }, { "epoch": 1.7627847773852552, "grad_norm": 0.5805763485213549, "learning_rate": 7.287268045001428e-06, "loss": 11.8504, "step": 32372 }, { "epoch": 1.7628392313818382, "grad_norm": 0.5655698371088128, "learning_rate": 7.283963794167081e-06, "loss": 11.7975, "step": 32373 }, { "epoch": 1.7628936853784212, "grad_norm": 0.5660394550762327, "learning_rate": 7.28066026430817e-06, "loss": 11.7062, "step": 32374 }, { "epoch": 1.7629481393750042, "grad_norm": 0.4911911745799803, "learning_rate": 7.277357455450407e-06, "loss": 11.85, "step": 32375 }, { "epoch": 1.7630025933715872, "grad_norm": 0.5423764304606848, "learning_rate": 7.274055367619437e-06, "loss": 11.7692, "step": 32376 }, { "epoch": 1.7630570473681701, "grad_norm": 0.5731570304029355, "learning_rate": 7.270754000840985e-06, "loss": 11.6807, "step": 32377 }, { "epoch": 1.7631115013647531, "grad_norm": 0.6037863315480049, "learning_rate": 7.267453355140685e-06, "loss": 11.5989, "step": 32378 }, { "epoch": 1.7631659553613361, "grad_norm": 0.536057241950262, "learning_rate": 7.26415343054424e-06, "loss": 11.7712, "step": 32379 }, { "epoch": 1.7632204093579193, "grad_norm": 0.5426477716803638, "learning_rate": 7.260854227077274e-06, "loss": 11.7726, "step": 32380 }, { "epoch": 1.7632748633545023, "grad_norm": 0.48845889683067467, "learning_rate": 7.257555744765454e-06, "loss": 11.7433, "step": 32381 }, { "epoch": 1.7633293173510853, "grad_norm": 0.9140697807030868, "learning_rate": 7.25425798363445e-06, "loss": 11.7978, "step": 32382 }, { "epoch": 1.7633837713476683, "grad_norm": 0.4784156197793373, "learning_rate": 7.250960943709884e-06, "loss": 11.7587, "step": 32383 }, { "epoch": 1.7634382253442513, "grad_norm": 0.6121897958137692, "learning_rate": 7.247664625017403e-06, "loss": 11.7678, "step": 32384 }, { "epoch": 1.7634926793408345, "grad_norm": 0.5203835716886616, "learning_rate": 7.244369027582621e-06, "loss": 11.8362, "step": 32385 }, { "epoch": 1.7635471333374175, "grad_norm": 0.7471726687272454, "learning_rate": 7.241074151431182e-06, "loss": 11.7367, "step": 32386 }, { "epoch": 1.7636015873340005, "grad_norm": 0.5778906960302164, "learning_rate": 7.237779996588723e-06, "loss": 11.8412, "step": 32387 }, { "epoch": 1.7636560413305835, "grad_norm": 0.6550759964786931, "learning_rate": 7.234486563080823e-06, "loss": 11.8367, "step": 32388 }, { "epoch": 1.7637104953271665, "grad_norm": 0.5475345367993918, "learning_rate": 7.231193850933127e-06, "loss": 11.8007, "step": 32389 }, { "epoch": 1.7637649493237495, "grad_norm": 0.5899456756389151, "learning_rate": 7.227901860171215e-06, "loss": 11.8182, "step": 32390 }, { "epoch": 1.7638194033203325, "grad_norm": 0.5973111699437943, "learning_rate": 7.224610590820713e-06, "loss": 11.8454, "step": 32391 }, { "epoch": 1.7638738573169155, "grad_norm": 0.5544637859212342, "learning_rate": 7.221320042907176e-06, "loss": 11.7157, "step": 32392 }, { "epoch": 1.7639283113134985, "grad_norm": 0.5580414332870065, "learning_rate": 7.2180302164562175e-06, "loss": 11.7504, "step": 32393 }, { "epoch": 1.7639827653100815, "grad_norm": 0.5193774247888508, "learning_rate": 7.214741111493439e-06, "loss": 11.6611, "step": 32394 }, { "epoch": 1.7640372193066645, "grad_norm": 0.5531527796970839, "learning_rate": 7.211452728044377e-06, "loss": 11.7538, "step": 32395 }, { "epoch": 1.7640916733032475, "grad_norm": 0.5379305994824084, "learning_rate": 7.20816506613462e-06, "loss": 11.7033, "step": 32396 }, { "epoch": 1.7641461272998304, "grad_norm": 0.5246291274011378, "learning_rate": 7.2048781257897265e-06, "loss": 11.8177, "step": 32397 }, { "epoch": 1.7642005812964134, "grad_norm": 0.5485662663006204, "learning_rate": 7.2015919070352545e-06, "loss": 11.7589, "step": 32398 }, { "epoch": 1.7642550352929964, "grad_norm": 0.5439113601867139, "learning_rate": 7.198306409896794e-06, "loss": 11.8224, "step": 32399 }, { "epoch": 1.7643094892895794, "grad_norm": 0.5510783818630199, "learning_rate": 7.195021634399834e-06, "loss": 11.9455, "step": 32400 }, { "epoch": 1.7643639432861624, "grad_norm": 0.5078001366956315, "learning_rate": 7.191737580569979e-06, "loss": 11.7833, "step": 32401 }, { "epoch": 1.7644183972827454, "grad_norm": 0.5463870607280911, "learning_rate": 7.1884542484327076e-06, "loss": 11.6649, "step": 32402 }, { "epoch": 1.7644728512793286, "grad_norm": 0.5293179509605374, "learning_rate": 7.18517163801361e-06, "loss": 11.7558, "step": 32403 }, { "epoch": 1.7645273052759116, "grad_norm": 0.5661948285546391, "learning_rate": 7.181889749338178e-06, "loss": 11.8284, "step": 32404 }, { "epoch": 1.7645817592724946, "grad_norm": 0.4774295334887779, "learning_rate": 7.178608582431912e-06, "loss": 11.7392, "step": 32405 }, { "epoch": 1.7646362132690776, "grad_norm": 0.5569778331117314, "learning_rate": 7.1753281373203805e-06, "loss": 11.7874, "step": 32406 }, { "epoch": 1.7646906672656606, "grad_norm": 0.49605450321583283, "learning_rate": 7.172048414029042e-06, "loss": 11.7344, "step": 32407 }, { "epoch": 1.7647451212622436, "grad_norm": 0.5286431509239733, "learning_rate": 7.1687694125834205e-06, "loss": 11.8066, "step": 32408 }, { "epoch": 1.7647995752588268, "grad_norm": 0.6143050511893186, "learning_rate": 7.165491133009039e-06, "loss": 11.8055, "step": 32409 }, { "epoch": 1.7648540292554098, "grad_norm": 0.5104935075402033, "learning_rate": 7.1622135753313445e-06, "loss": 11.7688, "step": 32410 }, { "epoch": 1.7649084832519928, "grad_norm": 0.5085736202065824, "learning_rate": 7.158936739575861e-06, "loss": 11.7869, "step": 32411 }, { "epoch": 1.7649629372485758, "grad_norm": 0.5548308218267389, "learning_rate": 7.155660625768046e-06, "loss": 11.647, "step": 32412 }, { "epoch": 1.7650173912451588, "grad_norm": 0.5182332690613815, "learning_rate": 7.152385233933389e-06, "loss": 11.8057, "step": 32413 }, { "epoch": 1.7650718452417418, "grad_norm": 0.5407855924686837, "learning_rate": 7.149110564097372e-06, "loss": 11.7418, "step": 32414 }, { "epoch": 1.7651262992383248, "grad_norm": 0.6337329798454913, "learning_rate": 7.145836616285406e-06, "loss": 11.7943, "step": 32415 }, { "epoch": 1.7651807532349078, "grad_norm": 0.48449606329339867, "learning_rate": 7.142563390523016e-06, "loss": 11.5754, "step": 32416 }, { "epoch": 1.7652352072314907, "grad_norm": 0.5302133558003101, "learning_rate": 7.139290886835604e-06, "loss": 11.8318, "step": 32417 }, { "epoch": 1.7652896612280737, "grad_norm": 0.5462321802071552, "learning_rate": 7.136019105248659e-06, "loss": 11.7628, "step": 32418 }, { "epoch": 1.7653441152246567, "grad_norm": 0.593250827650166, "learning_rate": 7.132748045787585e-06, "loss": 11.936, "step": 32419 }, { "epoch": 1.7653985692212397, "grad_norm": 0.534377515132765, "learning_rate": 7.1294777084778385e-06, "loss": 11.8307, "step": 32420 }, { "epoch": 1.7654530232178227, "grad_norm": 0.5957141896429868, "learning_rate": 7.126208093344855e-06, "loss": 11.8671, "step": 32421 }, { "epoch": 1.7655074772144057, "grad_norm": 0.5253310196150951, "learning_rate": 7.122939200414047e-06, "loss": 11.5939, "step": 32422 }, { "epoch": 1.7655619312109887, "grad_norm": 0.5727741432569967, "learning_rate": 7.119671029710851e-06, "loss": 11.8215, "step": 32423 }, { "epoch": 1.7656163852075717, "grad_norm": 0.5645552107529307, "learning_rate": 7.116403581260666e-06, "loss": 11.5887, "step": 32424 }, { "epoch": 1.7656708392041547, "grad_norm": 0.5728926466432189, "learning_rate": 7.113136855088887e-06, "loss": 11.6244, "step": 32425 }, { "epoch": 1.765725293200738, "grad_norm": 0.5505402048947162, "learning_rate": 7.109870851220946e-06, "loss": 11.8847, "step": 32426 }, { "epoch": 1.765779747197321, "grad_norm": 0.48449133413234013, "learning_rate": 7.106605569682223e-06, "loss": 11.8173, "step": 32427 }, { "epoch": 1.7658342011939039, "grad_norm": 0.6641475685607001, "learning_rate": 7.103341010498121e-06, "loss": 11.8476, "step": 32428 }, { "epoch": 1.7658886551904869, "grad_norm": 0.5200225627804707, "learning_rate": 7.100077173693998e-06, "loss": 11.7741, "step": 32429 }, { "epoch": 1.7659431091870699, "grad_norm": 0.6478267440136369, "learning_rate": 7.0968140592952645e-06, "loss": 11.7211, "step": 32430 }, { "epoch": 1.7659975631836529, "grad_norm": 0.526273367586206, "learning_rate": 7.093551667327292e-06, "loss": 11.7279, "step": 32431 }, { "epoch": 1.766052017180236, "grad_norm": 0.5293466186121332, "learning_rate": 7.090289997815436e-06, "loss": 11.6945, "step": 32432 }, { "epoch": 1.766106471176819, "grad_norm": 0.5234387758570058, "learning_rate": 7.087029050785077e-06, "loss": 11.785, "step": 32433 }, { "epoch": 1.766160925173402, "grad_norm": 0.5608956079550506, "learning_rate": 7.083768826261561e-06, "loss": 11.7521, "step": 32434 }, { "epoch": 1.766215379169985, "grad_norm": 0.6132855594715134, "learning_rate": 7.080509324270224e-06, "loss": 11.8305, "step": 32435 }, { "epoch": 1.766269833166568, "grad_norm": 0.579739800056128, "learning_rate": 7.077250544836444e-06, "loss": 11.8372, "step": 32436 }, { "epoch": 1.766324287163151, "grad_norm": 0.4848196679170275, "learning_rate": 7.073992487985537e-06, "loss": 11.6512, "step": 32437 }, { "epoch": 1.766378741159734, "grad_norm": 0.5769411461503011, "learning_rate": 7.070735153742858e-06, "loss": 11.7398, "step": 32438 }, { "epoch": 1.766433195156317, "grad_norm": 0.5400025851720152, "learning_rate": 7.06747854213371e-06, "loss": 11.7714, "step": 32439 }, { "epoch": 1.7664876491529, "grad_norm": 0.5430651435837082, "learning_rate": 7.0642226531834495e-06, "loss": 11.7308, "step": 32440 }, { "epoch": 1.766542103149483, "grad_norm": 0.5546201747900835, "learning_rate": 7.0609674869173695e-06, "loss": 11.8682, "step": 32441 }, { "epoch": 1.766596557146066, "grad_norm": 0.4697497270887615, "learning_rate": 7.057713043360781e-06, "loss": 11.7482, "step": 32442 }, { "epoch": 1.766651011142649, "grad_norm": 0.5527719939885997, "learning_rate": 7.054459322539031e-06, "loss": 11.689, "step": 32443 }, { "epoch": 1.766705465139232, "grad_norm": 0.5539698824192684, "learning_rate": 7.051206324477389e-06, "loss": 11.8499, "step": 32444 }, { "epoch": 1.766759919135815, "grad_norm": 0.5806791927372109, "learning_rate": 7.047954049201144e-06, "loss": 11.8532, "step": 32445 }, { "epoch": 1.766814373132398, "grad_norm": 0.5164966670585809, "learning_rate": 7.044702496735589e-06, "loss": 11.7236, "step": 32446 }, { "epoch": 1.766868827128981, "grad_norm": 0.5388727806756906, "learning_rate": 7.041451667106014e-06, "loss": 11.8655, "step": 32447 }, { "epoch": 1.766923281125564, "grad_norm": 0.5258345518559903, "learning_rate": 7.038201560337721e-06, "loss": 11.8075, "step": 32448 }, { "epoch": 1.7669777351221472, "grad_norm": 0.5586393142600838, "learning_rate": 7.034952176455945e-06, "loss": 11.7938, "step": 32449 }, { "epoch": 1.7670321891187302, "grad_norm": 0.6787701241470055, "learning_rate": 7.031703515485988e-06, "loss": 11.8707, "step": 32450 }, { "epoch": 1.7670866431153132, "grad_norm": 0.5443562415419549, "learning_rate": 7.028455577453074e-06, "loss": 11.8743, "step": 32451 }, { "epoch": 1.7671410971118962, "grad_norm": 0.548410018421957, "learning_rate": 7.025208362382496e-06, "loss": 11.7629, "step": 32452 }, { "epoch": 1.7671955511084791, "grad_norm": 0.5662985835500658, "learning_rate": 7.021961870299476e-06, "loss": 11.868, "step": 32453 }, { "epoch": 1.7672500051050621, "grad_norm": 0.5611080212631334, "learning_rate": 7.0187161012292945e-06, "loss": 11.789, "step": 32454 }, { "epoch": 1.7673044591016454, "grad_norm": 0.5441373707393591, "learning_rate": 7.015471055197164e-06, "loss": 11.756, "step": 32455 }, { "epoch": 1.7673589130982283, "grad_norm": 0.6761327254056887, "learning_rate": 7.0122267322282995e-06, "loss": 11.894, "step": 32456 }, { "epoch": 1.7674133670948113, "grad_norm": 0.563502754538389, "learning_rate": 7.008983132347968e-06, "loss": 11.7751, "step": 32457 }, { "epoch": 1.7674678210913943, "grad_norm": 0.5391698601908398, "learning_rate": 7.005740255581395e-06, "loss": 11.8214, "step": 32458 }, { "epoch": 1.7675222750879773, "grad_norm": 0.5756762889962632, "learning_rate": 7.002498101953758e-06, "loss": 11.7541, "step": 32459 }, { "epoch": 1.7675767290845603, "grad_norm": 0.5510747624109077, "learning_rate": 6.999256671490306e-06, "loss": 11.9001, "step": 32460 }, { "epoch": 1.7676311830811433, "grad_norm": 0.5600344165461107, "learning_rate": 6.996015964216218e-06, "loss": 11.6302, "step": 32461 }, { "epoch": 1.7676856370777263, "grad_norm": 0.5105209825942861, "learning_rate": 6.992775980156718e-06, "loss": 11.6263, "step": 32462 }, { "epoch": 1.7677400910743093, "grad_norm": 0.5287327792460506, "learning_rate": 6.989536719336976e-06, "loss": 11.7988, "step": 32463 }, { "epoch": 1.7677945450708923, "grad_norm": 0.5235574865193675, "learning_rate": 6.986298181782213e-06, "loss": 11.7791, "step": 32464 }, { "epoch": 1.7678489990674753, "grad_norm": 0.5248584359930557, "learning_rate": 6.983060367517591e-06, "loss": 11.5514, "step": 32465 }, { "epoch": 1.7679034530640583, "grad_norm": 0.53106990569008, "learning_rate": 6.979823276568276e-06, "loss": 11.73, "step": 32466 }, { "epoch": 1.7679579070606413, "grad_norm": 0.5624229679426564, "learning_rate": 6.97658690895947e-06, "loss": 11.788, "step": 32467 }, { "epoch": 1.7680123610572243, "grad_norm": 0.5983849167658366, "learning_rate": 6.97335126471631e-06, "loss": 11.8528, "step": 32468 }, { "epoch": 1.7680668150538073, "grad_norm": 0.49099023311629675, "learning_rate": 6.9701163438639635e-06, "loss": 11.8061, "step": 32469 }, { "epoch": 1.7681212690503902, "grad_norm": 0.5159192516763268, "learning_rate": 6.9668821464276224e-06, "loss": 11.8415, "step": 32470 }, { "epoch": 1.7681757230469732, "grad_norm": 0.5505523919006803, "learning_rate": 6.9636486724323765e-06, "loss": 11.741, "step": 32471 }, { "epoch": 1.7682301770435562, "grad_norm": 0.5282648311414133, "learning_rate": 6.960415921903429e-06, "loss": 11.7232, "step": 32472 }, { "epoch": 1.7682846310401394, "grad_norm": 0.5803095678248418, "learning_rate": 6.95718389486587e-06, "loss": 11.7988, "step": 32473 }, { "epoch": 1.7683390850367224, "grad_norm": 0.5393963369431248, "learning_rate": 6.95395259134487e-06, "loss": 11.7687, "step": 32474 }, { "epoch": 1.7683935390333054, "grad_norm": 0.5067067948482463, "learning_rate": 6.95072201136554e-06, "loss": 11.7722, "step": 32475 }, { "epoch": 1.7684479930298884, "grad_norm": 0.5390073898022799, "learning_rate": 6.947492154952972e-06, "loss": 11.7714, "step": 32476 }, { "epoch": 1.7685024470264714, "grad_norm": 0.5290798418720969, "learning_rate": 6.944263022132336e-06, "loss": 11.8138, "step": 32477 }, { "epoch": 1.7685569010230544, "grad_norm": 0.47873354783519273, "learning_rate": 6.941034612928699e-06, "loss": 11.7539, "step": 32478 }, { "epoch": 1.7686113550196376, "grad_norm": 0.551475868516879, "learning_rate": 6.9378069273671855e-06, "loss": 11.7834, "step": 32479 }, { "epoch": 1.7686658090162206, "grad_norm": 0.5259224440001655, "learning_rate": 6.934579965472898e-06, "loss": 11.6581, "step": 32480 }, { "epoch": 1.7687202630128036, "grad_norm": 0.5675922206340825, "learning_rate": 6.931353727270917e-06, "loss": 11.7332, "step": 32481 }, { "epoch": 1.7687747170093866, "grad_norm": 0.4966813392031055, "learning_rate": 6.928128212786356e-06, "loss": 11.7096, "step": 32482 }, { "epoch": 1.7688291710059696, "grad_norm": 0.5851621807802333, "learning_rate": 6.92490342204426e-06, "loss": 11.7999, "step": 32483 }, { "epoch": 1.7688836250025526, "grad_norm": 0.5693229515501992, "learning_rate": 6.921679355069732e-06, "loss": 11.8676, "step": 32484 }, { "epoch": 1.7689380789991356, "grad_norm": 0.5154227529447211, "learning_rate": 6.918456011887842e-06, "loss": 11.7555, "step": 32485 }, { "epoch": 1.7689925329957186, "grad_norm": 0.5550616391724824, "learning_rate": 6.915233392523623e-06, "loss": 11.8038, "step": 32486 }, { "epoch": 1.7690469869923016, "grad_norm": 0.5692104693139884, "learning_rate": 6.91201149700218e-06, "loss": 11.762, "step": 32487 }, { "epoch": 1.7691014409888846, "grad_norm": 0.5375555291794875, "learning_rate": 6.908790325348536e-06, "loss": 11.7449, "step": 32488 }, { "epoch": 1.7691558949854675, "grad_norm": 0.5655107579397572, "learning_rate": 6.905569877587759e-06, "loss": 11.812, "step": 32489 }, { "epoch": 1.7692103489820505, "grad_norm": 0.5203234674230789, "learning_rate": 6.902350153744874e-06, "loss": 11.7405, "step": 32490 }, { "epoch": 1.7692648029786335, "grad_norm": 0.5442782451858077, "learning_rate": 6.8991311538449175e-06, "loss": 11.8278, "step": 32491 }, { "epoch": 1.7693192569752165, "grad_norm": 0.5938194500026797, "learning_rate": 6.895912877912958e-06, "loss": 11.8093, "step": 32492 }, { "epoch": 1.7693737109717995, "grad_norm": 0.49089936157676045, "learning_rate": 6.892695325973963e-06, "loss": 11.7251, "step": 32493 }, { "epoch": 1.7694281649683825, "grad_norm": 0.5462910053179499, "learning_rate": 6.8894784980530255e-06, "loss": 11.7654, "step": 32494 }, { "epoch": 1.7694826189649655, "grad_norm": 0.5416389054162575, "learning_rate": 6.88626239417508e-06, "loss": 11.8812, "step": 32495 }, { "epoch": 1.7695370729615487, "grad_norm": 0.5772196206579886, "learning_rate": 6.883047014365173e-06, "loss": 11.8, "step": 32496 }, { "epoch": 1.7695915269581317, "grad_norm": 0.5731945478590326, "learning_rate": 6.8798323586483305e-06, "loss": 11.8409, "step": 32497 }, { "epoch": 1.7696459809547147, "grad_norm": 0.5237249111482807, "learning_rate": 6.876618427049509e-06, "loss": 11.6249, "step": 32498 }, { "epoch": 1.7697004349512977, "grad_norm": 0.6041684509377229, "learning_rate": 6.873405219593732e-06, "loss": 11.8516, "step": 32499 }, { "epoch": 1.7697548889478807, "grad_norm": 0.5715753407255886, "learning_rate": 6.870192736305958e-06, "loss": 11.8486, "step": 32500 }, { "epoch": 1.7698093429444637, "grad_norm": 0.6189144514321696, "learning_rate": 6.866980977211201e-06, "loss": 11.8001, "step": 32501 }, { "epoch": 1.769863796941047, "grad_norm": 0.5777239824397306, "learning_rate": 6.8637699423344085e-06, "loss": 11.8568, "step": 32502 }, { "epoch": 1.7699182509376299, "grad_norm": 0.8850594492937024, "learning_rate": 6.860559631700558e-06, "loss": 11.7279, "step": 32503 }, { "epoch": 1.7699727049342129, "grad_norm": 0.5851050335484635, "learning_rate": 6.857350045334643e-06, "loss": 11.8075, "step": 32504 }, { "epoch": 1.7700271589307959, "grad_norm": 0.6013720013585722, "learning_rate": 6.854141183261564e-06, "loss": 11.9136, "step": 32505 }, { "epoch": 1.7700816129273789, "grad_norm": 0.509443011363841, "learning_rate": 6.850933045506302e-06, "loss": 11.6482, "step": 32506 }, { "epoch": 1.7701360669239619, "grad_norm": 0.5679414901449624, "learning_rate": 6.847725632093815e-06, "loss": 11.5742, "step": 32507 }, { "epoch": 1.7701905209205449, "grad_norm": 0.5392368385181602, "learning_rate": 6.844518943049027e-06, "loss": 11.8295, "step": 32508 }, { "epoch": 1.7702449749171278, "grad_norm": 0.5751937584515109, "learning_rate": 6.841312978396896e-06, "loss": 11.8178, "step": 32509 }, { "epoch": 1.7702994289137108, "grad_norm": 0.5864386990441026, "learning_rate": 6.838107738162325e-06, "loss": 11.8829, "step": 32510 }, { "epoch": 1.7703538829102938, "grad_norm": 0.5505025726782754, "learning_rate": 6.83490322237027e-06, "loss": 11.8084, "step": 32511 }, { "epoch": 1.7704083369068768, "grad_norm": 0.5154488770079803, "learning_rate": 6.831699431045602e-06, "loss": 11.8203, "step": 32512 }, { "epoch": 1.7704627909034598, "grad_norm": 0.5465051550207456, "learning_rate": 6.828496364213277e-06, "loss": 11.7996, "step": 32513 }, { "epoch": 1.7705172449000428, "grad_norm": 0.5741377442620422, "learning_rate": 6.825294021898221e-06, "loss": 11.6627, "step": 32514 }, { "epoch": 1.7705716988966258, "grad_norm": 0.5494894114487702, "learning_rate": 6.822092404125258e-06, "loss": 11.8403, "step": 32515 }, { "epoch": 1.7706261528932088, "grad_norm": 0.5090681600776872, "learning_rate": 6.818891510919356e-06, "loss": 11.7853, "step": 32516 }, { "epoch": 1.7706806068897918, "grad_norm": 0.5263071241968802, "learning_rate": 6.815691342305364e-06, "loss": 11.7875, "step": 32517 }, { "epoch": 1.7707350608863748, "grad_norm": 0.549212220543552, "learning_rate": 6.812491898308193e-06, "loss": 11.7381, "step": 32518 }, { "epoch": 1.770789514882958, "grad_norm": 0.5314527347005255, "learning_rate": 6.809293178952714e-06, "loss": 11.6578, "step": 32519 }, { "epoch": 1.770843968879541, "grad_norm": 0.5252082284132608, "learning_rate": 6.806095184263783e-06, "loss": 11.8465, "step": 32520 }, { "epoch": 1.770898422876124, "grad_norm": 0.49797130645623167, "learning_rate": 6.802897914266315e-06, "loss": 11.8213, "step": 32521 }, { "epoch": 1.770952876872707, "grad_norm": 0.5635534863376405, "learning_rate": 6.799701368985112e-06, "loss": 11.9435, "step": 32522 }, { "epoch": 1.77100733086929, "grad_norm": 0.5629515308452587, "learning_rate": 6.7965055484450865e-06, "loss": 11.8811, "step": 32523 }, { "epoch": 1.771061784865873, "grad_norm": 0.5315500683196582, "learning_rate": 6.7933104526710534e-06, "loss": 11.709, "step": 32524 }, { "epoch": 1.7711162388624562, "grad_norm": 0.5764388303605275, "learning_rate": 6.790116081687858e-06, "loss": 11.8658, "step": 32525 }, { "epoch": 1.7711706928590392, "grad_norm": 0.6623134247580197, "learning_rate": 6.786922435520371e-06, "loss": 11.9887, "step": 32526 }, { "epoch": 1.7712251468556222, "grad_norm": 0.5157210021703791, "learning_rate": 6.783729514193382e-06, "loss": 11.7958, "step": 32527 }, { "epoch": 1.7712796008522051, "grad_norm": 0.5376612123642904, "learning_rate": 6.78053731773175e-06, "loss": 11.816, "step": 32528 }, { "epoch": 1.7713340548487881, "grad_norm": 0.5261152942844866, "learning_rate": 6.77734584616031e-06, "loss": 11.7756, "step": 32529 }, { "epoch": 1.7713885088453711, "grad_norm": 0.5143438841387615, "learning_rate": 6.7741550995038535e-06, "loss": 11.7241, "step": 32530 }, { "epoch": 1.7714429628419541, "grad_norm": 0.6092866657952518, "learning_rate": 6.770965077787206e-06, "loss": 11.7917, "step": 32531 }, { "epoch": 1.7714974168385371, "grad_norm": 0.5320070727590585, "learning_rate": 6.767775781035157e-06, "loss": 11.7141, "step": 32532 }, { "epoch": 1.7715518708351201, "grad_norm": 0.5772470246873229, "learning_rate": 6.764587209272533e-06, "loss": 11.7088, "step": 32533 }, { "epoch": 1.771606324831703, "grad_norm": 0.5277213074435889, "learning_rate": 6.76139936252409e-06, "loss": 11.6816, "step": 32534 }, { "epoch": 1.771660778828286, "grad_norm": 0.5219917950417122, "learning_rate": 6.7582122408146656e-06, "loss": 11.7806, "step": 32535 }, { "epoch": 1.771715232824869, "grad_norm": 0.5715357613118955, "learning_rate": 6.755025844169027e-06, "loss": 11.6965, "step": 32536 }, { "epoch": 1.771769686821452, "grad_norm": 0.5850917935202838, "learning_rate": 6.751840172611923e-06, "loss": 11.5886, "step": 32537 }, { "epoch": 1.771824140818035, "grad_norm": 0.6101323911637545, "learning_rate": 6.748655226168155e-06, "loss": 11.836, "step": 32538 }, { "epoch": 1.771878594814618, "grad_norm": 0.5412089774253621, "learning_rate": 6.745471004862481e-06, "loss": 11.7079, "step": 32539 }, { "epoch": 1.771933048811201, "grad_norm": 0.5299501793833984, "learning_rate": 6.742287508719647e-06, "loss": 11.7622, "step": 32540 }, { "epoch": 1.771987502807784, "grad_norm": 0.5232663481832034, "learning_rate": 6.739104737764457e-06, "loss": 11.6454, "step": 32541 }, { "epoch": 1.772041956804367, "grad_norm": 0.5395042908687733, "learning_rate": 6.735922692021601e-06, "loss": 11.8339, "step": 32542 }, { "epoch": 1.7720964108009503, "grad_norm": 0.5607707941876608, "learning_rate": 6.732741371515871e-06, "loss": 11.7649, "step": 32543 }, { "epoch": 1.7721508647975333, "grad_norm": 0.50940726724126, "learning_rate": 6.72956077627197e-06, "loss": 11.5963, "step": 32544 }, { "epoch": 1.7722053187941162, "grad_norm": 0.5687962796539477, "learning_rate": 6.7263809063146554e-06, "loss": 11.8172, "step": 32545 }, { "epoch": 1.7722597727906992, "grad_norm": 0.5552807747158219, "learning_rate": 6.723201761668651e-06, "loss": 11.7434, "step": 32546 }, { "epoch": 1.7723142267872822, "grad_norm": 0.6312128068106516, "learning_rate": 6.72002334235865e-06, "loss": 11.7108, "step": 32547 }, { "epoch": 1.7723686807838654, "grad_norm": 0.5364946785921043, "learning_rate": 6.71684564840942e-06, "loss": 11.7466, "step": 32548 }, { "epoch": 1.7724231347804484, "grad_norm": 0.4954720453777132, "learning_rate": 6.7136686798456084e-06, "loss": 11.7325, "step": 32549 }, { "epoch": 1.7724775887770314, "grad_norm": 0.5507651694484306, "learning_rate": 6.710492436691984e-06, "loss": 11.8744, "step": 32550 }, { "epoch": 1.7725320427736144, "grad_norm": 0.5603768587860525, "learning_rate": 6.707316918973194e-06, "loss": 11.8327, "step": 32551 }, { "epoch": 1.7725864967701974, "grad_norm": 0.5773301010353931, "learning_rate": 6.704142126713953e-06, "loss": 11.7658, "step": 32552 }, { "epoch": 1.7726409507667804, "grad_norm": 0.5225682569384548, "learning_rate": 6.700968059938962e-06, "loss": 11.8519, "step": 32553 }, { "epoch": 1.7726954047633634, "grad_norm": 0.5220744324181364, "learning_rate": 6.697794718672878e-06, "loss": 11.7751, "step": 32554 }, { "epoch": 1.7727498587599464, "grad_norm": 0.5522113729084689, "learning_rate": 6.694622102940396e-06, "loss": 11.8958, "step": 32555 }, { "epoch": 1.7728043127565294, "grad_norm": 0.5953742583947584, "learning_rate": 6.691450212766193e-06, "loss": 11.8135, "step": 32556 }, { "epoch": 1.7728587667531124, "grad_norm": 0.582818102037976, "learning_rate": 6.688279048174895e-06, "loss": 11.8136, "step": 32557 }, { "epoch": 1.7729132207496954, "grad_norm": 0.5999572791971642, "learning_rate": 6.685108609191204e-06, "loss": 11.8104, "step": 32558 }, { "epoch": 1.7729676747462784, "grad_norm": 0.510340518078583, "learning_rate": 6.681938895839746e-06, "loss": 11.7312, "step": 32559 }, { "epoch": 1.7730221287428614, "grad_norm": 0.7048680747222791, "learning_rate": 6.678769908145188e-06, "loss": 11.8127, "step": 32560 }, { "epoch": 1.7730765827394444, "grad_norm": 0.5453801199230138, "learning_rate": 6.675601646132157e-06, "loss": 11.8925, "step": 32561 }, { "epoch": 1.7731310367360273, "grad_norm": 0.5304706086726125, "learning_rate": 6.672434109825299e-06, "loss": 11.8602, "step": 32562 }, { "epoch": 1.7731854907326103, "grad_norm": 0.6165774646844658, "learning_rate": 6.66926729924926e-06, "loss": 11.7511, "step": 32563 }, { "epoch": 1.7732399447291933, "grad_norm": 0.5789662439924932, "learning_rate": 6.6661012144286325e-06, "loss": 11.8659, "step": 32564 }, { "epoch": 1.7732943987257763, "grad_norm": 0.590542330161939, "learning_rate": 6.662935855388086e-06, "loss": 11.7068, "step": 32565 }, { "epoch": 1.7733488527223595, "grad_norm": 0.5549403780924586, "learning_rate": 6.659771222152167e-06, "loss": 11.6931, "step": 32566 }, { "epoch": 1.7734033067189425, "grad_norm": 0.6011244737428332, "learning_rate": 6.656607314745522e-06, "loss": 11.8375, "step": 32567 }, { "epoch": 1.7734577607155255, "grad_norm": 0.5470194606503198, "learning_rate": 6.653444133192777e-06, "loss": 11.8163, "step": 32568 }, { "epoch": 1.7735122147121085, "grad_norm": 0.5263834449077107, "learning_rate": 6.650281677518477e-06, "loss": 11.7583, "step": 32569 }, { "epoch": 1.7735666687086915, "grad_norm": 0.6066447590285262, "learning_rate": 6.64711994774726e-06, "loss": 11.8415, "step": 32570 }, { "epoch": 1.7736211227052745, "grad_norm": 0.564971141513668, "learning_rate": 6.6439589439036834e-06, "loss": 11.7612, "step": 32571 }, { "epoch": 1.7736755767018577, "grad_norm": 0.5426395113191236, "learning_rate": 6.640798666012349e-06, "loss": 11.8211, "step": 32572 }, { "epoch": 1.7737300306984407, "grad_norm": 0.5355788596137272, "learning_rate": 6.637639114097804e-06, "loss": 11.7272, "step": 32573 }, { "epoch": 1.7737844846950237, "grad_norm": 0.5417248876945642, "learning_rate": 6.6344802881846416e-06, "loss": 11.7728, "step": 32574 }, { "epoch": 1.7738389386916067, "grad_norm": 0.5176878213694106, "learning_rate": 6.63132218829744e-06, "loss": 11.5263, "step": 32575 }, { "epoch": 1.7738933926881897, "grad_norm": 0.538084768549536, "learning_rate": 6.628164814460702e-06, "loss": 11.8603, "step": 32576 }, { "epoch": 1.7739478466847727, "grad_norm": 0.5634966133398733, "learning_rate": 6.62500816669902e-06, "loss": 11.8694, "step": 32577 }, { "epoch": 1.7740023006813557, "grad_norm": 0.5087286585919892, "learning_rate": 6.621852245036952e-06, "loss": 11.6815, "step": 32578 }, { "epoch": 1.7740567546779387, "grad_norm": 0.5553469008752997, "learning_rate": 6.618697049499001e-06, "loss": 11.7431, "step": 32579 }, { "epoch": 1.7741112086745217, "grad_norm": 0.5456400676770684, "learning_rate": 6.615542580109735e-06, "loss": 11.762, "step": 32580 }, { "epoch": 1.7741656626711046, "grad_norm": 0.536257816136381, "learning_rate": 6.612388836893657e-06, "loss": 11.523, "step": 32581 }, { "epoch": 1.7742201166676876, "grad_norm": 0.5835429825597319, "learning_rate": 6.609235819875314e-06, "loss": 11.7725, "step": 32582 }, { "epoch": 1.7742745706642706, "grad_norm": 0.508417587120594, "learning_rate": 6.60608352907921e-06, "loss": 11.8408, "step": 32583 }, { "epoch": 1.7743290246608536, "grad_norm": 0.5429689687452576, "learning_rate": 6.6029319645298575e-06, "loss": 11.8121, "step": 32584 }, { "epoch": 1.7743834786574366, "grad_norm": 0.5955028609000584, "learning_rate": 6.599781126251792e-06, "loss": 11.7714, "step": 32585 }, { "epoch": 1.7744379326540196, "grad_norm": 0.6138863765080108, "learning_rate": 6.596631014269472e-06, "loss": 11.8127, "step": 32586 }, { "epoch": 1.7744923866506026, "grad_norm": 0.5274485178907716, "learning_rate": 6.593481628607423e-06, "loss": 11.7936, "step": 32587 }, { "epoch": 1.7745468406471856, "grad_norm": 0.6001581529946779, "learning_rate": 6.590332969290114e-06, "loss": 11.8609, "step": 32588 }, { "epoch": 1.7746012946437688, "grad_norm": 0.5261086031578917, "learning_rate": 6.587185036342036e-06, "loss": 11.8238, "step": 32589 }, { "epoch": 1.7746557486403518, "grad_norm": 0.5738828339982922, "learning_rate": 6.584037829787692e-06, "loss": 11.8126, "step": 32590 }, { "epoch": 1.7747102026369348, "grad_norm": 0.5554205995944045, "learning_rate": 6.580891349651519e-06, "loss": 11.8371, "step": 32591 }, { "epoch": 1.7747646566335178, "grad_norm": 0.5183023933811208, "learning_rate": 6.577745595958018e-06, "loss": 11.7964, "step": 32592 }, { "epoch": 1.7748191106301008, "grad_norm": 0.5084647466055651, "learning_rate": 6.574600568731615e-06, "loss": 11.7346, "step": 32593 }, { "epoch": 1.7748735646266838, "grad_norm": 0.5481947987741224, "learning_rate": 6.571456267996801e-06, "loss": 11.7844, "step": 32594 }, { "epoch": 1.774928018623267, "grad_norm": 0.5891789659361069, "learning_rate": 6.568312693778011e-06, "loss": 11.7306, "step": 32595 }, { "epoch": 1.77498247261985, "grad_norm": 0.5276265723583782, "learning_rate": 6.5651698460996834e-06, "loss": 11.7326, "step": 32596 }, { "epoch": 1.775036926616433, "grad_norm": 0.5704125501566568, "learning_rate": 6.562027724986264e-06, "loss": 11.6956, "step": 32597 }, { "epoch": 1.775091380613016, "grad_norm": 0.562789157110727, "learning_rate": 6.558886330462189e-06, "loss": 11.789, "step": 32598 }, { "epoch": 1.775145834609599, "grad_norm": 0.5928072781857274, "learning_rate": 6.555745662551882e-06, "loss": 11.6489, "step": 32599 }, { "epoch": 1.775200288606182, "grad_norm": 0.5326225760551547, "learning_rate": 6.5526057212797585e-06, "loss": 11.7364, "step": 32600 }, { "epoch": 1.775254742602765, "grad_norm": 0.5278390274475373, "learning_rate": 6.549466506670244e-06, "loss": 11.7725, "step": 32601 }, { "epoch": 1.775309196599348, "grad_norm": 0.5295545231723466, "learning_rate": 6.546328018747772e-06, "loss": 11.7202, "step": 32602 }, { "epoch": 1.775363650595931, "grad_norm": 0.4988386655783545, "learning_rate": 6.543190257536702e-06, "loss": 11.8298, "step": 32603 }, { "epoch": 1.775418104592514, "grad_norm": 0.5897230884922778, "learning_rate": 6.5400532230614706e-06, "loss": 11.8236, "step": 32604 }, { "epoch": 1.775472558589097, "grad_norm": 0.5374701214690483, "learning_rate": 6.536916915346469e-06, "loss": 11.7702, "step": 32605 }, { "epoch": 1.77552701258568, "grad_norm": 0.5796462809143641, "learning_rate": 6.533781334416056e-06, "loss": 11.8336, "step": 32606 }, { "epoch": 1.775581466582263, "grad_norm": 0.5378564911576558, "learning_rate": 6.5306464802946445e-06, "loss": 11.6341, "step": 32607 }, { "epoch": 1.775635920578846, "grad_norm": 0.5606812729941905, "learning_rate": 6.527512353006593e-06, "loss": 11.9215, "step": 32608 }, { "epoch": 1.7756903745754289, "grad_norm": 0.5763508853752508, "learning_rate": 6.524378952576282e-06, "loss": 11.539, "step": 32609 }, { "epoch": 1.7757448285720119, "grad_norm": 0.6068411877977646, "learning_rate": 6.52124627902807e-06, "loss": 11.7649, "step": 32610 }, { "epoch": 1.7757992825685949, "grad_norm": 0.6007422795674634, "learning_rate": 6.518114332386327e-06, "loss": 11.8686, "step": 32611 }, { "epoch": 1.7758537365651779, "grad_norm": 0.5722685691181251, "learning_rate": 6.5149831126754105e-06, "loss": 11.9874, "step": 32612 }, { "epoch": 1.775908190561761, "grad_norm": 0.551065592349112, "learning_rate": 6.511852619919656e-06, "loss": 11.6544, "step": 32613 }, { "epoch": 1.775962644558344, "grad_norm": 0.5352143631889089, "learning_rate": 6.508722854143423e-06, "loss": 11.653, "step": 32614 }, { "epoch": 1.776017098554927, "grad_norm": 0.5635059400041645, "learning_rate": 6.505593815371047e-06, "loss": 11.8178, "step": 32615 }, { "epoch": 1.77607155255151, "grad_norm": 0.5749195072114188, "learning_rate": 6.502465503626831e-06, "loss": 11.843, "step": 32616 }, { "epoch": 1.776126006548093, "grad_norm": 0.5009668994357556, "learning_rate": 6.499337918935133e-06, "loss": 11.7927, "step": 32617 }, { "epoch": 1.7761804605446763, "grad_norm": 0.504484548633708, "learning_rate": 6.496211061320256e-06, "loss": 11.7142, "step": 32618 }, { "epoch": 1.7762349145412593, "grad_norm": 0.5485633307366743, "learning_rate": 6.493084930806537e-06, "loss": 11.7661, "step": 32619 }, { "epoch": 1.7762893685378423, "grad_norm": 0.4947471269227897, "learning_rate": 6.489959527418255e-06, "loss": 11.7114, "step": 32620 }, { "epoch": 1.7763438225344252, "grad_norm": 0.5299280750765318, "learning_rate": 6.486834851179746e-06, "loss": 11.7021, "step": 32621 }, { "epoch": 1.7763982765310082, "grad_norm": 0.6046416560381348, "learning_rate": 6.4837109021152696e-06, "loss": 11.8864, "step": 32622 }, { "epoch": 1.7764527305275912, "grad_norm": 0.5971586014993002, "learning_rate": 6.48058768024915e-06, "loss": 11.8263, "step": 32623 }, { "epoch": 1.7765071845241742, "grad_norm": 0.5535618459704909, "learning_rate": 6.477465185605669e-06, "loss": 11.8389, "step": 32624 }, { "epoch": 1.7765616385207572, "grad_norm": 0.5770403531098282, "learning_rate": 6.474343418209106e-06, "loss": 11.7517, "step": 32625 }, { "epoch": 1.7766160925173402, "grad_norm": 0.5941900106701521, "learning_rate": 6.471222378083719e-06, "loss": 11.784, "step": 32626 }, { "epoch": 1.7766705465139232, "grad_norm": 0.5260095947151255, "learning_rate": 6.468102065253812e-06, "loss": 11.7068, "step": 32627 }, { "epoch": 1.7767250005105062, "grad_norm": 0.4935738656016135, "learning_rate": 6.464982479743598e-06, "loss": 11.7772, "step": 32628 }, { "epoch": 1.7767794545070892, "grad_norm": 0.5438689668500842, "learning_rate": 6.461863621577391e-06, "loss": 11.6524, "step": 32629 }, { "epoch": 1.7768339085036722, "grad_norm": 0.5956595402759075, "learning_rate": 6.458745490779405e-06, "loss": 11.7141, "step": 32630 }, { "epoch": 1.7768883625002552, "grad_norm": 0.530747206389311, "learning_rate": 6.4556280873739214e-06, "loss": 11.8797, "step": 32631 }, { "epoch": 1.7769428164968382, "grad_norm": 0.5550826770474203, "learning_rate": 6.452511411385131e-06, "loss": 11.8071, "step": 32632 }, { "epoch": 1.7769972704934212, "grad_norm": 0.51211618541263, "learning_rate": 6.449395462837315e-06, "loss": 11.6899, "step": 32633 }, { "epoch": 1.7770517244900041, "grad_norm": 0.5216564382793045, "learning_rate": 6.446280241754687e-06, "loss": 11.7083, "step": 32634 }, { "epoch": 1.7771061784865871, "grad_norm": 0.5227773404881256, "learning_rate": 6.443165748161484e-06, "loss": 11.791, "step": 32635 }, { "epoch": 1.7771606324831704, "grad_norm": 0.5125099603910652, "learning_rate": 6.440051982081918e-06, "loss": 11.7994, "step": 32636 }, { "epoch": 1.7772150864797533, "grad_norm": 0.5557595719417294, "learning_rate": 6.436938943540172e-06, "loss": 11.596, "step": 32637 }, { "epoch": 1.7772695404763363, "grad_norm": 0.5539884593410619, "learning_rate": 6.433826632560491e-06, "loss": 11.7835, "step": 32638 }, { "epoch": 1.7773239944729193, "grad_norm": 0.5064855042708822, "learning_rate": 6.430715049167069e-06, "loss": 11.7443, "step": 32639 }, { "epoch": 1.7773784484695023, "grad_norm": 0.5309002760722017, "learning_rate": 6.427604193384085e-06, "loss": 11.9209, "step": 32640 }, { "epoch": 1.7774329024660853, "grad_norm": 0.5919438412692817, "learning_rate": 6.424494065235764e-06, "loss": 11.6732, "step": 32641 }, { "epoch": 1.7774873564626685, "grad_norm": 0.5355567293841104, "learning_rate": 6.421384664746244e-06, "loss": 11.7462, "step": 32642 }, { "epoch": 1.7775418104592515, "grad_norm": 0.5255919046413203, "learning_rate": 6.418275991939759e-06, "loss": 11.8555, "step": 32643 }, { "epoch": 1.7775962644558345, "grad_norm": 0.5583327883722515, "learning_rate": 6.415168046840425e-06, "loss": 11.7699, "step": 32644 }, { "epoch": 1.7776507184524175, "grad_norm": 0.5817085883094335, "learning_rate": 6.412060829472466e-06, "loss": 11.7111, "step": 32645 }, { "epoch": 1.7777051724490005, "grad_norm": 0.5088954993976289, "learning_rate": 6.408954339860007e-06, "loss": 11.8435, "step": 32646 }, { "epoch": 1.7777596264455835, "grad_norm": 0.5786548787336618, "learning_rate": 6.405848578027207e-06, "loss": 11.7142, "step": 32647 }, { "epoch": 1.7778140804421665, "grad_norm": 0.5514046851765381, "learning_rate": 6.402743543998235e-06, "loss": 11.6604, "step": 32648 }, { "epoch": 1.7778685344387495, "grad_norm": 0.5248882071075454, "learning_rate": 6.399639237797206e-06, "loss": 11.727, "step": 32649 }, { "epoch": 1.7779229884353325, "grad_norm": 0.5662896027745242, "learning_rate": 6.396535659448288e-06, "loss": 11.8725, "step": 32650 }, { "epoch": 1.7779774424319155, "grad_norm": 0.5455941611714284, "learning_rate": 6.393432808975619e-06, "loss": 11.8148, "step": 32651 }, { "epoch": 1.7780318964284985, "grad_norm": 0.5795116195227646, "learning_rate": 6.3903306864032895e-06, "loss": 11.7415, "step": 32652 }, { "epoch": 1.7780863504250815, "grad_norm": 0.5435402068006697, "learning_rate": 6.38722929175547e-06, "loss": 11.7942, "step": 32653 }, { "epoch": 1.7781408044216644, "grad_norm": 0.4898854798438047, "learning_rate": 6.384128625056241e-06, "loss": 11.6952, "step": 32654 }, { "epoch": 1.7781952584182474, "grad_norm": 0.5502141616198264, "learning_rate": 6.381028686329738e-06, "loss": 11.687, "step": 32655 }, { "epoch": 1.7782497124148304, "grad_norm": 0.5715235364798035, "learning_rate": 6.377929475600064e-06, "loss": 11.6627, "step": 32656 }, { "epoch": 1.7783041664114134, "grad_norm": 0.5565606715258251, "learning_rate": 6.37483099289129e-06, "loss": 11.7002, "step": 32657 }, { "epoch": 1.7783586204079964, "grad_norm": 0.559697312513193, "learning_rate": 6.371733238227539e-06, "loss": 11.8468, "step": 32658 }, { "epoch": 1.7784130744045796, "grad_norm": 0.5666534709032173, "learning_rate": 6.368636211632883e-06, "loss": 11.7649, "step": 32659 }, { "epoch": 1.7784675284011626, "grad_norm": 0.5574114448826537, "learning_rate": 6.3655399131314245e-06, "loss": 11.7239, "step": 32660 }, { "epoch": 1.7785219823977456, "grad_norm": 0.5925011940755665, "learning_rate": 6.362444342747232e-06, "loss": 11.8493, "step": 32661 }, { "epoch": 1.7785764363943286, "grad_norm": 0.5269764138423191, "learning_rate": 6.359349500504364e-06, "loss": 11.7507, "step": 32662 }, { "epoch": 1.7786308903909116, "grad_norm": 0.5403476584558234, "learning_rate": 6.356255386426912e-06, "loss": 11.8175, "step": 32663 }, { "epoch": 1.7786853443874946, "grad_norm": 0.61565991339433, "learning_rate": 6.353162000538903e-06, "loss": 12.0279, "step": 32664 }, { "epoch": 1.7787397983840778, "grad_norm": 0.5591293243317734, "learning_rate": 6.350069342864429e-06, "loss": 11.7781, "step": 32665 }, { "epoch": 1.7787942523806608, "grad_norm": 0.5662041809359237, "learning_rate": 6.346977413427524e-06, "loss": 11.9033, "step": 32666 }, { "epoch": 1.7788487063772438, "grad_norm": 0.5844082084590769, "learning_rate": 6.343886212252215e-06, "loss": 11.8189, "step": 32667 }, { "epoch": 1.7789031603738268, "grad_norm": 0.7302541912977605, "learning_rate": 6.340795739362559e-06, "loss": 11.8814, "step": 32668 }, { "epoch": 1.7789576143704098, "grad_norm": 0.5493051291355272, "learning_rate": 6.337705994782573e-06, "loss": 11.7421, "step": 32669 }, { "epoch": 1.7790120683669928, "grad_norm": 0.5271900879138697, "learning_rate": 6.3346169785363115e-06, "loss": 11.7093, "step": 32670 }, { "epoch": 1.7790665223635758, "grad_norm": 0.5556375173986846, "learning_rate": 6.331528690647759e-06, "loss": 11.7206, "step": 32671 }, { "epoch": 1.7791209763601588, "grad_norm": 0.5979757578752126, "learning_rate": 6.328441131140938e-06, "loss": 11.7581, "step": 32672 }, { "epoch": 1.7791754303567417, "grad_norm": 0.5278252118688105, "learning_rate": 6.325354300039887e-06, "loss": 11.8388, "step": 32673 }, { "epoch": 1.7792298843533247, "grad_norm": 0.5879647844127346, "learning_rate": 6.322268197368586e-06, "loss": 11.7815, "step": 32674 }, { "epoch": 1.7792843383499077, "grad_norm": 0.5437098437859124, "learning_rate": 6.3191828231510375e-06, "loss": 11.7574, "step": 32675 }, { "epoch": 1.7793387923464907, "grad_norm": 0.5610889898473906, "learning_rate": 6.316098177411245e-06, "loss": 11.7519, "step": 32676 }, { "epoch": 1.7793932463430737, "grad_norm": 0.5288993721382848, "learning_rate": 6.313014260173167e-06, "loss": 11.641, "step": 32677 }, { "epoch": 1.7794477003396567, "grad_norm": 0.5735405654787178, "learning_rate": 6.309931071460806e-06, "loss": 11.7809, "step": 32678 }, { "epoch": 1.7795021543362397, "grad_norm": 0.550847871540742, "learning_rate": 6.3068486112981325e-06, "loss": 11.7952, "step": 32679 }, { "epoch": 1.7795566083328227, "grad_norm": 0.6224556952124183, "learning_rate": 6.303766879709117e-06, "loss": 11.8511, "step": 32680 }, { "epoch": 1.7796110623294057, "grad_norm": 0.49699209659575416, "learning_rate": 6.300685876717716e-06, "loss": 11.7905, "step": 32681 }, { "epoch": 1.779665516325989, "grad_norm": 0.5197056696772931, "learning_rate": 6.2976056023479e-06, "loss": 11.7658, "step": 32682 }, { "epoch": 1.779719970322572, "grad_norm": 0.5299205418333491, "learning_rate": 6.294526056623617e-06, "loss": 11.7874, "step": 32683 }, { "epoch": 1.779774424319155, "grad_norm": 0.5006191990495225, "learning_rate": 6.291447239568815e-06, "loss": 11.6852, "step": 32684 }, { "epoch": 1.7798288783157379, "grad_norm": 0.5241928736389975, "learning_rate": 6.28836915120744e-06, "loss": 11.7525, "step": 32685 }, { "epoch": 1.7798833323123209, "grad_norm": 0.5512105472519098, "learning_rate": 6.28529179156343e-06, "loss": 11.8235, "step": 32686 }, { "epoch": 1.7799377863089039, "grad_norm": 0.5758030156783916, "learning_rate": 6.282215160660676e-06, "loss": 11.8157, "step": 32687 }, { "epoch": 1.779992240305487, "grad_norm": 0.5578829898472907, "learning_rate": 6.279139258523159e-06, "loss": 11.6964, "step": 32688 }, { "epoch": 1.78004669430207, "grad_norm": 0.5467188643625759, "learning_rate": 6.27606408517476e-06, "loss": 11.7781, "step": 32689 }, { "epoch": 1.780101148298653, "grad_norm": 0.6195338947234599, "learning_rate": 6.2729896406394155e-06, "loss": 11.8318, "step": 32690 }, { "epoch": 1.780155602295236, "grad_norm": 0.555715689687519, "learning_rate": 6.269915924941006e-06, "loss": 11.7176, "step": 32691 }, { "epoch": 1.780210056291819, "grad_norm": 0.5294021269516758, "learning_rate": 6.266842938103468e-06, "loss": 11.8282, "step": 32692 }, { "epoch": 1.780264510288402, "grad_norm": 0.5760740376196016, "learning_rate": 6.2637706801506605e-06, "loss": 11.7588, "step": 32693 }, { "epoch": 1.780318964284985, "grad_norm": 0.5384086038782483, "learning_rate": 6.2606991511064865e-06, "loss": 11.783, "step": 32694 }, { "epoch": 1.780373418281568, "grad_norm": 0.5761383915638604, "learning_rate": 6.257628350994859e-06, "loss": 11.8015, "step": 32695 }, { "epoch": 1.780427872278151, "grad_norm": 0.546854857346783, "learning_rate": 6.254558279839628e-06, "loss": 11.8423, "step": 32696 }, { "epoch": 1.780482326274734, "grad_norm": 0.5394379748626792, "learning_rate": 6.251488937664674e-06, "loss": 11.6775, "step": 32697 }, { "epoch": 1.780536780271317, "grad_norm": 0.5814153801090587, "learning_rate": 6.248420324493853e-06, "loss": 11.7156, "step": 32698 }, { "epoch": 1.7805912342679, "grad_norm": 0.5081416254563221, "learning_rate": 6.245352440351037e-06, "loss": 11.8127, "step": 32699 }, { "epoch": 1.780645688264483, "grad_norm": 0.5885158572104521, "learning_rate": 6.242285285260097e-06, "loss": 11.7697, "step": 32700 }, { "epoch": 1.780700142261066, "grad_norm": 0.5903081283855978, "learning_rate": 6.2392188592448554e-06, "loss": 11.8192, "step": 32701 }, { "epoch": 1.780754596257649, "grad_norm": 0.5813850653756711, "learning_rate": 6.236153162329184e-06, "loss": 11.6511, "step": 32702 }, { "epoch": 1.780809050254232, "grad_norm": 0.6015448142938231, "learning_rate": 6.233088194536896e-06, "loss": 11.8689, "step": 32703 }, { "epoch": 1.780863504250815, "grad_norm": 0.5436128411710512, "learning_rate": 6.2300239558918506e-06, "loss": 11.7093, "step": 32704 }, { "epoch": 1.780917958247398, "grad_norm": 0.601270612565341, "learning_rate": 6.226960446417851e-06, "loss": 11.7905, "step": 32705 }, { "epoch": 1.7809724122439812, "grad_norm": 0.5890192276838048, "learning_rate": 6.223897666138756e-06, "loss": 11.9191, "step": 32706 }, { "epoch": 1.7810268662405642, "grad_norm": 0.5455178899138761, "learning_rate": 6.220835615078346e-06, "loss": 11.7916, "step": 32707 }, { "epoch": 1.7810813202371472, "grad_norm": 0.552575852189147, "learning_rate": 6.217774293260436e-06, "loss": 11.7924, "step": 32708 }, { "epoch": 1.7811357742337302, "grad_norm": 0.526128252261358, "learning_rate": 6.2147137007088405e-06, "loss": 11.794, "step": 32709 }, { "epoch": 1.7811902282303131, "grad_norm": 0.5799430183200035, "learning_rate": 6.2116538374473735e-06, "loss": 11.7877, "step": 32710 }, { "epoch": 1.7812446822268961, "grad_norm": 0.5552499412949681, "learning_rate": 6.208594703499804e-06, "loss": 11.8429, "step": 32711 }, { "epoch": 1.7812991362234794, "grad_norm": 0.5965677348838014, "learning_rate": 6.2055362988899355e-06, "loss": 11.8804, "step": 32712 }, { "epoch": 1.7813535902200623, "grad_norm": 0.5188102393525604, "learning_rate": 6.202478623641539e-06, "loss": 11.7638, "step": 32713 }, { "epoch": 1.7814080442166453, "grad_norm": 0.5312133620617769, "learning_rate": 6.1994216777784166e-06, "loss": 11.6473, "step": 32714 }, { "epoch": 1.7814624982132283, "grad_norm": 0.5897830592166103, "learning_rate": 6.196365461324305e-06, "loss": 11.805, "step": 32715 }, { "epoch": 1.7815169522098113, "grad_norm": 0.546088818681991, "learning_rate": 6.193309974302997e-06, "loss": 11.8009, "step": 32716 }, { "epoch": 1.7815714062063943, "grad_norm": 0.5243351059626951, "learning_rate": 6.190255216738239e-06, "loss": 11.7343, "step": 32717 }, { "epoch": 1.7816258602029773, "grad_norm": 0.5256419405680276, "learning_rate": 6.187201188653779e-06, "loss": 11.7348, "step": 32718 }, { "epoch": 1.7816803141995603, "grad_norm": 0.5594804599033242, "learning_rate": 6.184147890073388e-06, "loss": 11.692, "step": 32719 }, { "epoch": 1.7817347681961433, "grad_norm": 0.5321778004033472, "learning_rate": 6.18109532102078e-06, "loss": 11.7293, "step": 32720 }, { "epoch": 1.7817892221927263, "grad_norm": 0.5011377673136107, "learning_rate": 6.178043481519713e-06, "loss": 11.7447, "step": 32721 }, { "epoch": 1.7818436761893093, "grad_norm": 0.5403480612341243, "learning_rate": 6.174992371593924e-06, "loss": 11.8208, "step": 32722 }, { "epoch": 1.7818981301858923, "grad_norm": 0.5525945969178169, "learning_rate": 6.171941991267116e-06, "loss": 11.8392, "step": 32723 }, { "epoch": 1.7819525841824753, "grad_norm": 0.5598211056078931, "learning_rate": 6.168892340563037e-06, "loss": 11.7512, "step": 32724 }, { "epoch": 1.7820070381790583, "grad_norm": 0.5632965586119917, "learning_rate": 6.165843419505379e-06, "loss": 11.696, "step": 32725 }, { "epoch": 1.7820614921756412, "grad_norm": 0.5739009722718315, "learning_rate": 6.162795228117868e-06, "loss": 11.8494, "step": 32726 }, { "epoch": 1.7821159461722242, "grad_norm": 0.5474179586942003, "learning_rate": 6.159747766424195e-06, "loss": 11.8151, "step": 32727 }, { "epoch": 1.7821704001688072, "grad_norm": 0.5117156862463175, "learning_rate": 6.156701034448042e-06, "loss": 11.7808, "step": 32728 }, { "epoch": 1.7822248541653904, "grad_norm": 0.5650488556079817, "learning_rate": 6.153655032213135e-06, "loss": 11.7281, "step": 32729 }, { "epoch": 1.7822793081619734, "grad_norm": 0.584714363894475, "learning_rate": 6.150609759743131e-06, "loss": 11.7672, "step": 32730 }, { "epoch": 1.7823337621585564, "grad_norm": 0.5345534779849421, "learning_rate": 6.147565217061735e-06, "loss": 11.8112, "step": 32731 }, { "epoch": 1.7823882161551394, "grad_norm": 0.5670964215084656, "learning_rate": 6.144521404192616e-06, "loss": 11.8892, "step": 32732 }, { "epoch": 1.7824426701517224, "grad_norm": 0.6037630502503243, "learning_rate": 6.141478321159422e-06, "loss": 12.0556, "step": 32733 }, { "epoch": 1.7824971241483054, "grad_norm": 0.5704912551789066, "learning_rate": 6.138435967985845e-06, "loss": 11.7784, "step": 32734 }, { "epoch": 1.7825515781448886, "grad_norm": 0.4837017365684751, "learning_rate": 6.135394344695522e-06, "loss": 11.7829, "step": 32735 }, { "epoch": 1.7826060321414716, "grad_norm": 0.5633150706322234, "learning_rate": 6.132353451312134e-06, "loss": 11.7697, "step": 32736 }, { "epoch": 1.7826604861380546, "grad_norm": 0.5994231024040529, "learning_rate": 6.129313287859295e-06, "loss": 11.8579, "step": 32737 }, { "epoch": 1.7827149401346376, "grad_norm": 0.5598219524080285, "learning_rate": 6.126273854360653e-06, "loss": 11.8211, "step": 32738 }, { "epoch": 1.7827693941312206, "grad_norm": 0.5437292911132128, "learning_rate": 6.123235150839857e-06, "loss": 11.7411, "step": 32739 }, { "epoch": 1.7828238481278036, "grad_norm": 0.5355352760056432, "learning_rate": 6.120197177320508e-06, "loss": 11.8826, "step": 32740 }, { "epoch": 1.7828783021243866, "grad_norm": 0.5801272054397876, "learning_rate": 6.117159933826277e-06, "loss": 11.8695, "step": 32741 }, { "epoch": 1.7829327561209696, "grad_norm": 0.6026029119665858, "learning_rate": 6.114123420380724e-06, "loss": 11.7992, "step": 32742 }, { "epoch": 1.7829872101175526, "grad_norm": 0.5090448052521881, "learning_rate": 6.111087637007506e-06, "loss": 11.8298, "step": 32743 }, { "epoch": 1.7830416641141356, "grad_norm": 0.5809190171254183, "learning_rate": 6.1080525837302284e-06, "loss": 11.8446, "step": 32744 }, { "epoch": 1.7830961181107186, "grad_norm": 0.5084379390934818, "learning_rate": 6.105018260572459e-06, "loss": 11.6902, "step": 32745 }, { "epoch": 1.7831505721073015, "grad_norm": 0.5102844516896258, "learning_rate": 6.101984667557836e-06, "loss": 11.7845, "step": 32746 }, { "epoch": 1.7832050261038845, "grad_norm": 0.5393110489205621, "learning_rate": 6.098951804709918e-06, "loss": 11.7556, "step": 32747 }, { "epoch": 1.7832594801004675, "grad_norm": 0.5610236594888275, "learning_rate": 6.095919672052286e-06, "loss": 11.8582, "step": 32748 }, { "epoch": 1.7833139340970505, "grad_norm": 0.5295984301667501, "learning_rate": 6.0928882696085435e-06, "loss": 11.8565, "step": 32749 }, { "epoch": 1.7833683880936335, "grad_norm": 0.5455010219919253, "learning_rate": 6.0898575974022375e-06, "loss": 11.7454, "step": 32750 }, { "epoch": 1.7834228420902165, "grad_norm": 0.532496244299336, "learning_rate": 6.0868276554569725e-06, "loss": 11.7405, "step": 32751 }, { "epoch": 1.7834772960867997, "grad_norm": 0.5289465348200701, "learning_rate": 6.083798443796251e-06, "loss": 11.7201, "step": 32752 }, { "epoch": 1.7835317500833827, "grad_norm": 0.5679209360527092, "learning_rate": 6.080769962443689e-06, "loss": 11.7737, "step": 32753 }, { "epoch": 1.7835862040799657, "grad_norm": 0.521273573393573, "learning_rate": 6.077742211422799e-06, "loss": 11.6698, "step": 32754 }, { "epoch": 1.7836406580765487, "grad_norm": 0.5747886977133413, "learning_rate": 6.07471519075713e-06, "loss": 11.8493, "step": 32755 }, { "epoch": 1.7836951120731317, "grad_norm": 0.5486564983341604, "learning_rate": 6.071688900470251e-06, "loss": 11.5643, "step": 32756 }, { "epoch": 1.7837495660697147, "grad_norm": 0.5807463120178638, "learning_rate": 6.068663340585656e-06, "loss": 11.7643, "step": 32757 }, { "epoch": 1.783804020066298, "grad_norm": 0.6299894467839595, "learning_rate": 6.065638511126881e-06, "loss": 11.8122, "step": 32758 }, { "epoch": 1.783858474062881, "grad_norm": 0.5433739098086517, "learning_rate": 6.062614412117462e-06, "loss": 11.8107, "step": 32759 }, { "epoch": 1.7839129280594639, "grad_norm": 0.5579663249929967, "learning_rate": 6.059591043580892e-06, "loss": 11.7298, "step": 32760 }, { "epoch": 1.7839673820560469, "grad_norm": 0.5379817593054868, "learning_rate": 6.056568405540719e-06, "loss": 11.8564, "step": 32761 }, { "epoch": 1.7840218360526299, "grad_norm": 0.5656778190490366, "learning_rate": 6.053546498020401e-06, "loss": 11.7822, "step": 32762 }, { "epoch": 1.7840762900492129, "grad_norm": 0.517132446100167, "learning_rate": 6.050525321043487e-06, "loss": 11.677, "step": 32763 }, { "epoch": 1.7841307440457959, "grad_norm": 0.5526752874623392, "learning_rate": 6.047504874633414e-06, "loss": 11.683, "step": 32764 }, { "epoch": 1.7841851980423789, "grad_norm": 0.6635846424445987, "learning_rate": 6.0444851588137065e-06, "loss": 11.8886, "step": 32765 }, { "epoch": 1.7842396520389618, "grad_norm": 0.5028456486693429, "learning_rate": 6.041466173607868e-06, "loss": 11.7425, "step": 32766 }, { "epoch": 1.7842941060355448, "grad_norm": 0.5358399233934722, "learning_rate": 6.038447919039314e-06, "loss": 11.8024, "step": 32767 }, { "epoch": 1.7843485600321278, "grad_norm": 0.6545409848659228, "learning_rate": 6.03543039513157e-06, "loss": 11.932, "step": 32768 }, { "epoch": 1.7844030140287108, "grad_norm": 0.5409977093844731, "learning_rate": 6.032413601908049e-06, "loss": 11.7231, "step": 32769 }, { "epoch": 1.7844574680252938, "grad_norm": 0.5547535912506245, "learning_rate": 6.029397539392246e-06, "loss": 11.8589, "step": 32770 }, { "epoch": 1.7845119220218768, "grad_norm": 0.5452665682740018, "learning_rate": 6.026382207607617e-06, "loss": 11.8653, "step": 32771 }, { "epoch": 1.7845663760184598, "grad_norm": 0.5795592234860748, "learning_rate": 6.02336760657759e-06, "loss": 11.8091, "step": 32772 }, { "epoch": 1.7846208300150428, "grad_norm": 0.5758528998235982, "learning_rate": 6.020353736325635e-06, "loss": 11.7532, "step": 32773 }, { "epoch": 1.7846752840116258, "grad_norm": 0.5091401476597546, "learning_rate": 6.017340596875143e-06, "loss": 11.6873, "step": 32774 }, { "epoch": 1.7847297380082088, "grad_norm": 0.531788250732534, "learning_rate": 6.014328188249596e-06, "loss": 11.8192, "step": 32775 }, { "epoch": 1.784784192004792, "grad_norm": 0.5296134574860314, "learning_rate": 6.011316510472386e-06, "loss": 11.7253, "step": 32776 }, { "epoch": 1.784838646001375, "grad_norm": 0.527924853212081, "learning_rate": 6.00830556356693e-06, "loss": 11.8661, "step": 32777 }, { "epoch": 1.784893099997958, "grad_norm": 0.5520828888280412, "learning_rate": 6.005295347556672e-06, "loss": 11.7979, "step": 32778 }, { "epoch": 1.784947553994541, "grad_norm": 0.5351865975895744, "learning_rate": 6.0022858624649735e-06, "loss": 11.7371, "step": 32779 }, { "epoch": 1.785002007991124, "grad_norm": 0.537562345824475, "learning_rate": 5.999277108315271e-06, "loss": 11.749, "step": 32780 }, { "epoch": 1.785056461987707, "grad_norm": 0.6614506627680491, "learning_rate": 5.9962690851309675e-06, "loss": 11.9624, "step": 32781 }, { "epoch": 1.7851109159842902, "grad_norm": 0.532982334405565, "learning_rate": 5.993261792935423e-06, "loss": 11.8047, "step": 32782 }, { "epoch": 1.7851653699808732, "grad_norm": 0.565314211049647, "learning_rate": 5.990255231752062e-06, "loss": 11.8521, "step": 32783 }, { "epoch": 1.7852198239774562, "grad_norm": 0.559505079002129, "learning_rate": 5.9872494016042225e-06, "loss": 11.9165, "step": 32784 }, { "epoch": 1.7852742779740391, "grad_norm": 0.49870783123113654, "learning_rate": 5.984244302515307e-06, "loss": 11.6201, "step": 32785 }, { "epoch": 1.7853287319706221, "grad_norm": 0.5204790619077684, "learning_rate": 5.981239934508686e-06, "loss": 11.8264, "step": 32786 }, { "epoch": 1.7853831859672051, "grad_norm": 0.5996319298658219, "learning_rate": 5.9782362976076865e-06, "loss": 11.8328, "step": 32787 }, { "epoch": 1.7854376399637881, "grad_norm": 0.5637128176984325, "learning_rate": 5.97523339183571e-06, "loss": 11.8389, "step": 32788 }, { "epoch": 1.7854920939603711, "grad_norm": 0.5489482003779558, "learning_rate": 5.972231217216062e-06, "loss": 11.7052, "step": 32789 }, { "epoch": 1.785546547956954, "grad_norm": 0.6024242860078325, "learning_rate": 5.969229773772134e-06, "loss": 11.9011, "step": 32790 }, { "epoch": 1.785601001953537, "grad_norm": 0.5263928789986556, "learning_rate": 5.966229061527229e-06, "loss": 11.8157, "step": 32791 }, { "epoch": 1.78565545595012, "grad_norm": 0.5241021630678591, "learning_rate": 5.963229080504696e-06, "loss": 11.7763, "step": 32792 }, { "epoch": 1.785709909946703, "grad_norm": 0.5216179403005902, "learning_rate": 5.960229830727882e-06, "loss": 11.7932, "step": 32793 }, { "epoch": 1.785764363943286, "grad_norm": 0.5423555008380151, "learning_rate": 5.957231312220069e-06, "loss": 11.8024, "step": 32794 }, { "epoch": 1.785818817939869, "grad_norm": 0.5718442422207454, "learning_rate": 5.9542335250046155e-06, "loss": 11.5255, "step": 32795 }, { "epoch": 1.785873271936452, "grad_norm": 0.5243168265178102, "learning_rate": 5.951236469104815e-06, "loss": 11.7026, "step": 32796 }, { "epoch": 1.785927725933035, "grad_norm": 0.48359023598502987, "learning_rate": 5.9482401445439595e-06, "loss": 11.7673, "step": 32797 }, { "epoch": 1.785982179929618, "grad_norm": 0.5389200397432837, "learning_rate": 5.945244551345364e-06, "loss": 11.7954, "step": 32798 }, { "epoch": 1.7860366339262013, "grad_norm": 0.5026237159577307, "learning_rate": 5.94224968953232e-06, "loss": 11.6851, "step": 32799 }, { "epoch": 1.7860910879227843, "grad_norm": 0.5157097858037699, "learning_rate": 5.939255559128121e-06, "loss": 11.7758, "step": 32800 }, { "epoch": 1.7861455419193673, "grad_norm": 0.5293425019336567, "learning_rate": 5.936262160156025e-06, "loss": 11.7399, "step": 32801 }, { "epoch": 1.7861999959159502, "grad_norm": 0.4931659322533404, "learning_rate": 5.933269492639359e-06, "loss": 11.6566, "step": 32802 }, { "epoch": 1.7862544499125332, "grad_norm": 0.5246199651328718, "learning_rate": 5.930277556601338e-06, "loss": 11.7588, "step": 32803 }, { "epoch": 1.7863089039091162, "grad_norm": 0.545291086389174, "learning_rate": 5.927286352065253e-06, "loss": 11.7092, "step": 32804 }, { "epoch": 1.7863633579056994, "grad_norm": 0.6090685558430966, "learning_rate": 5.9242958790543865e-06, "loss": 11.859, "step": 32805 }, { "epoch": 1.7864178119022824, "grad_norm": 0.5063771339523607, "learning_rate": 5.921306137591975e-06, "loss": 11.637, "step": 32806 }, { "epoch": 1.7864722658988654, "grad_norm": 0.5353859595848508, "learning_rate": 5.918317127701245e-06, "loss": 11.8884, "step": 32807 }, { "epoch": 1.7865267198954484, "grad_norm": 0.5738267069233514, "learning_rate": 5.915328849405466e-06, "loss": 11.8586, "step": 32808 }, { "epoch": 1.7865811738920314, "grad_norm": 0.5902060104006173, "learning_rate": 5.912341302727864e-06, "loss": 11.7625, "step": 32809 }, { "epoch": 1.7866356278886144, "grad_norm": 0.5447056975591531, "learning_rate": 5.909354487691687e-06, "loss": 11.8551, "step": 32810 }, { "epoch": 1.7866900818851974, "grad_norm": 0.6176259087339802, "learning_rate": 5.9063684043201285e-06, "loss": 11.75, "step": 32811 }, { "epoch": 1.7867445358817804, "grad_norm": 0.5873460302544379, "learning_rate": 5.903383052636446e-06, "loss": 11.7537, "step": 32812 }, { "epoch": 1.7867989898783634, "grad_norm": 0.5253352394469801, "learning_rate": 5.900398432663823e-06, "loss": 11.84, "step": 32813 }, { "epoch": 1.7868534438749464, "grad_norm": 0.583531651090751, "learning_rate": 5.897414544425483e-06, "loss": 11.9039, "step": 32814 }, { "epoch": 1.7869078978715294, "grad_norm": 0.5496762027681773, "learning_rate": 5.894431387944644e-06, "loss": 11.6629, "step": 32815 }, { "epoch": 1.7869623518681124, "grad_norm": 0.5588490667752386, "learning_rate": 5.891448963244483e-06, "loss": 11.7954, "step": 32816 }, { "epoch": 1.7870168058646954, "grad_norm": 0.5534679792393347, "learning_rate": 5.888467270348208e-06, "loss": 11.7441, "step": 32817 }, { "epoch": 1.7870712598612783, "grad_norm": 0.5075673098860951, "learning_rate": 5.885486309278964e-06, "loss": 11.7683, "step": 32818 }, { "epoch": 1.7871257138578613, "grad_norm": 0.582522103117588, "learning_rate": 5.882506080059968e-06, "loss": 11.7187, "step": 32819 }, { "epoch": 1.7871801678544443, "grad_norm": 0.560677545645884, "learning_rate": 5.879526582714412e-06, "loss": 11.7653, "step": 32820 }, { "epoch": 1.7872346218510273, "grad_norm": 0.5288387157816162, "learning_rate": 5.876547817265421e-06, "loss": 11.7938, "step": 32821 }, { "epoch": 1.7872890758476105, "grad_norm": 0.5107213676622757, "learning_rate": 5.873569783736188e-06, "loss": 11.748, "step": 32822 }, { "epoch": 1.7873435298441935, "grad_norm": 0.5409498270161708, "learning_rate": 5.87059248214985e-06, "loss": 11.8192, "step": 32823 }, { "epoch": 1.7873979838407765, "grad_norm": 0.4982023751157804, "learning_rate": 5.867615912529589e-06, "loss": 11.7029, "step": 32824 }, { "epoch": 1.7874524378373595, "grad_norm": 0.5016788576366078, "learning_rate": 5.864640074898509e-06, "loss": 11.7278, "step": 32825 }, { "epoch": 1.7875068918339425, "grad_norm": 0.6166524688272519, "learning_rate": 5.861664969279779e-06, "loss": 11.8109, "step": 32826 }, { "epoch": 1.7875613458305255, "grad_norm": 0.6151538621448444, "learning_rate": 5.858690595696559e-06, "loss": 11.8117, "step": 32827 }, { "epoch": 1.7876157998271087, "grad_norm": 0.5488559382736814, "learning_rate": 5.855716954171919e-06, "loss": 11.7301, "step": 32828 }, { "epoch": 1.7876702538236917, "grad_norm": 0.5040495965532082, "learning_rate": 5.852744044729008e-06, "loss": 11.8371, "step": 32829 }, { "epoch": 1.7877247078202747, "grad_norm": 0.5197784734437446, "learning_rate": 5.849771867390974e-06, "loss": 11.7459, "step": 32830 }, { "epoch": 1.7877791618168577, "grad_norm": 0.5692484554854838, "learning_rate": 5.846800422180876e-06, "loss": 11.7582, "step": 32831 }, { "epoch": 1.7878336158134407, "grad_norm": 0.5860774792490339, "learning_rate": 5.843829709121862e-06, "loss": 11.8864, "step": 32832 }, { "epoch": 1.7878880698100237, "grad_norm": 0.5093837756154763, "learning_rate": 5.840859728237013e-06, "loss": 11.6715, "step": 32833 }, { "epoch": 1.7879425238066067, "grad_norm": 0.529759685057493, "learning_rate": 5.837890479549435e-06, "loss": 11.8237, "step": 32834 }, { "epoch": 1.7879969778031897, "grad_norm": 0.5358648716126443, "learning_rate": 5.834921963082207e-06, "loss": 11.7788, "step": 32835 }, { "epoch": 1.7880514317997727, "grad_norm": 0.49667954301496087, "learning_rate": 5.831954178858412e-06, "loss": 11.6285, "step": 32836 }, { "epoch": 1.7881058857963557, "grad_norm": 0.5044001812051533, "learning_rate": 5.828987126901164e-06, "loss": 11.6813, "step": 32837 }, { "epoch": 1.7881603397929386, "grad_norm": 0.49883837962208377, "learning_rate": 5.826020807233467e-06, "loss": 11.8703, "step": 32838 }, { "epoch": 1.7882147937895216, "grad_norm": 0.520880660530782, "learning_rate": 5.823055219878448e-06, "loss": 11.7585, "step": 32839 }, { "epoch": 1.7882692477861046, "grad_norm": 0.5862275949805851, "learning_rate": 5.820090364859132e-06, "loss": 11.7042, "step": 32840 }, { "epoch": 1.7883237017826876, "grad_norm": 0.6167287607529381, "learning_rate": 5.817126242198578e-06, "loss": 11.7581, "step": 32841 }, { "epoch": 1.7883781557792706, "grad_norm": 0.5315434795855483, "learning_rate": 5.814162851919869e-06, "loss": 11.567, "step": 32842 }, { "epoch": 1.7884326097758536, "grad_norm": 0.5537450764862275, "learning_rate": 5.811200194046007e-06, "loss": 11.7587, "step": 32843 }, { "epoch": 1.7884870637724366, "grad_norm": 0.5531575408292612, "learning_rate": 5.808238268600064e-06, "loss": 11.929, "step": 32844 }, { "epoch": 1.7885415177690196, "grad_norm": 0.5542420036282248, "learning_rate": 5.8052770756050315e-06, "loss": 11.7609, "step": 32845 }, { "epoch": 1.7885959717656028, "grad_norm": 0.5390362602659625, "learning_rate": 5.80231661508398e-06, "loss": 11.7488, "step": 32846 }, { "epoch": 1.7886504257621858, "grad_norm": 0.5517606227076052, "learning_rate": 5.799356887059915e-06, "loss": 11.844, "step": 32847 }, { "epoch": 1.7887048797587688, "grad_norm": 0.5451060092260289, "learning_rate": 5.7963978915558384e-06, "loss": 11.7586, "step": 32848 }, { "epoch": 1.7887593337553518, "grad_norm": 0.5593841263572491, "learning_rate": 5.793439628594777e-06, "loss": 11.7846, "step": 32849 }, { "epoch": 1.7888137877519348, "grad_norm": 0.5517176516321998, "learning_rate": 5.7904820981997125e-06, "loss": 11.7455, "step": 32850 }, { "epoch": 1.788868241748518, "grad_norm": 0.5971910759972061, "learning_rate": 5.787525300393681e-06, "loss": 11.8001, "step": 32851 }, { "epoch": 1.788922695745101, "grad_norm": 0.5457154284841771, "learning_rate": 5.784569235199633e-06, "loss": 11.7141, "step": 32852 }, { "epoch": 1.788977149741684, "grad_norm": 0.5036840400789974, "learning_rate": 5.78161390264057e-06, "loss": 11.8536, "step": 32853 }, { "epoch": 1.789031603738267, "grad_norm": 0.5905699640468144, "learning_rate": 5.778659302739497e-06, "loss": 11.6751, "step": 32854 }, { "epoch": 1.78908605773485, "grad_norm": 0.4858515758353979, "learning_rate": 5.775705435519351e-06, "loss": 11.6447, "step": 32855 }, { "epoch": 1.789140511731433, "grad_norm": 0.49867023838384755, "learning_rate": 5.772752301003148e-06, "loss": 11.6954, "step": 32856 }, { "epoch": 1.789194965728016, "grad_norm": 0.5234473695025423, "learning_rate": 5.769799899213812e-06, "loss": 11.8778, "step": 32857 }, { "epoch": 1.789249419724599, "grad_norm": 0.5285744211556234, "learning_rate": 5.766848230174304e-06, "loss": 11.8652, "step": 32858 }, { "epoch": 1.789303873721182, "grad_norm": 0.5694143238664334, "learning_rate": 5.763897293907605e-06, "loss": 11.6405, "step": 32859 }, { "epoch": 1.789358327717765, "grad_norm": 0.5514348461301628, "learning_rate": 5.760947090436619e-06, "loss": 11.7148, "step": 32860 }, { "epoch": 1.789412781714348, "grad_norm": 0.5305600285038201, "learning_rate": 5.757997619784339e-06, "loss": 11.7894, "step": 32861 }, { "epoch": 1.789467235710931, "grad_norm": 0.5262665324909412, "learning_rate": 5.755048881973657e-06, "loss": 11.8313, "step": 32862 }, { "epoch": 1.789521689707514, "grad_norm": 0.5583189119528796, "learning_rate": 5.752100877027511e-06, "loss": 11.8179, "step": 32863 }, { "epoch": 1.789576143704097, "grad_norm": 0.5139461141436407, "learning_rate": 5.74915360496886e-06, "loss": 11.6874, "step": 32864 }, { "epoch": 1.78963059770068, "grad_norm": 0.531811869166695, "learning_rate": 5.746207065820575e-06, "loss": 11.9065, "step": 32865 }, { "epoch": 1.7896850516972629, "grad_norm": 0.5573450083285999, "learning_rate": 5.743261259605603e-06, "loss": 11.7443, "step": 32866 }, { "epoch": 1.7897395056938459, "grad_norm": 0.5363820158103323, "learning_rate": 5.740316186346839e-06, "loss": 11.7429, "step": 32867 }, { "epoch": 1.7897939596904289, "grad_norm": 0.5264741953493456, "learning_rate": 5.737371846067174e-06, "loss": 11.5681, "step": 32868 }, { "epoch": 1.789848413687012, "grad_norm": 0.5184990745929842, "learning_rate": 5.734428238789524e-06, "loss": 11.8289, "step": 32869 }, { "epoch": 1.789902867683595, "grad_norm": 0.5524714354586424, "learning_rate": 5.73148536453676e-06, "loss": 11.6553, "step": 32870 }, { "epoch": 1.789957321680178, "grad_norm": 0.5484058375687554, "learning_rate": 5.728543223331784e-06, "loss": 11.8601, "step": 32871 }, { "epoch": 1.790011775676761, "grad_norm": 0.5695995821207973, "learning_rate": 5.725601815197445e-06, "loss": 11.7591, "step": 32872 }, { "epoch": 1.790066229673344, "grad_norm": 0.57874390247035, "learning_rate": 5.72266114015666e-06, "loss": 11.7745, "step": 32873 }, { "epoch": 1.790120683669927, "grad_norm": 0.531637325728104, "learning_rate": 5.719721198232253e-06, "loss": 11.7771, "step": 32874 }, { "epoch": 1.7901751376665103, "grad_norm": 0.5462136666414714, "learning_rate": 5.716781989447106e-06, "loss": 11.6699, "step": 32875 }, { "epoch": 1.7902295916630933, "grad_norm": 0.503406205559153, "learning_rate": 5.713843513824091e-06, "loss": 11.6755, "step": 32876 }, { "epoch": 1.7902840456596762, "grad_norm": 0.4759854353265222, "learning_rate": 5.710905771386043e-06, "loss": 11.6307, "step": 32877 }, { "epoch": 1.7903384996562592, "grad_norm": 0.5647717676050596, "learning_rate": 5.70796876215578e-06, "loss": 11.7854, "step": 32878 }, { "epoch": 1.7903929536528422, "grad_norm": 0.5050902283551958, "learning_rate": 5.705032486156181e-06, "loss": 11.5763, "step": 32879 }, { "epoch": 1.7904474076494252, "grad_norm": 0.5145242576764114, "learning_rate": 5.702096943410052e-06, "loss": 11.7488, "step": 32880 }, { "epoch": 1.7905018616460082, "grad_norm": 0.5428305930845646, "learning_rate": 5.69916213394025e-06, "loss": 11.7431, "step": 32881 }, { "epoch": 1.7905563156425912, "grad_norm": 0.5071416773790963, "learning_rate": 5.696228057769559e-06, "loss": 11.7854, "step": 32882 }, { "epoch": 1.7906107696391742, "grad_norm": 0.5550151678472932, "learning_rate": 5.693294714920816e-06, "loss": 11.8018, "step": 32883 }, { "epoch": 1.7906652236357572, "grad_norm": 0.5399487750150528, "learning_rate": 5.690362105416825e-06, "loss": 11.7867, "step": 32884 }, { "epoch": 1.7907196776323402, "grad_norm": 0.5485015365277324, "learning_rate": 5.6874302292803995e-06, "loss": 11.682, "step": 32885 }, { "epoch": 1.7907741316289232, "grad_norm": 0.7571456953560544, "learning_rate": 5.684499086534345e-06, "loss": 11.9283, "step": 32886 }, { "epoch": 1.7908285856255062, "grad_norm": 0.5273593563894615, "learning_rate": 5.681568677201432e-06, "loss": 11.7999, "step": 32887 }, { "epoch": 1.7908830396220892, "grad_norm": 0.626783140819843, "learning_rate": 5.678639001304464e-06, "loss": 11.9423, "step": 32888 }, { "epoch": 1.7909374936186722, "grad_norm": 0.5420035564587004, "learning_rate": 5.675710058866202e-06, "loss": 11.8615, "step": 32889 }, { "epoch": 1.7909919476152552, "grad_norm": 0.518446198273281, "learning_rate": 5.672781849909436e-06, "loss": 11.6747, "step": 32890 }, { "epoch": 1.7910464016118381, "grad_norm": 0.5310628896367843, "learning_rate": 5.669854374456962e-06, "loss": 11.7012, "step": 32891 }, { "epoch": 1.7911008556084214, "grad_norm": 0.5225573995709512, "learning_rate": 5.666927632531494e-06, "loss": 11.8082, "step": 32892 }, { "epoch": 1.7911553096050044, "grad_norm": 0.5447897525909504, "learning_rate": 5.664001624155835e-06, "loss": 11.8022, "step": 32893 }, { "epoch": 1.7912097636015873, "grad_norm": 0.5324193042929436, "learning_rate": 5.661076349352701e-06, "loss": 11.9058, "step": 32894 }, { "epoch": 1.7912642175981703, "grad_norm": 0.6308899564451923, "learning_rate": 5.658151808144874e-06, "loss": 11.6467, "step": 32895 }, { "epoch": 1.7913186715947533, "grad_norm": 0.5388995465943365, "learning_rate": 5.655228000555069e-06, "loss": 11.7834, "step": 32896 }, { "epoch": 1.7913731255913363, "grad_norm": 0.5674606228068833, "learning_rate": 5.652304926606045e-06, "loss": 11.7996, "step": 32897 }, { "epoch": 1.7914275795879195, "grad_norm": 0.5652502506867298, "learning_rate": 5.649382586320517e-06, "loss": 11.7887, "step": 32898 }, { "epoch": 1.7914820335845025, "grad_norm": 0.5457927536965536, "learning_rate": 5.646460979721202e-06, "loss": 11.6952, "step": 32899 }, { "epoch": 1.7915364875810855, "grad_norm": 0.6501923471868624, "learning_rate": 5.643540106830825e-06, "loss": 11.9492, "step": 32900 }, { "epoch": 1.7915909415776685, "grad_norm": 0.5184715250681738, "learning_rate": 5.640619967672123e-06, "loss": 11.789, "step": 32901 }, { "epoch": 1.7916453955742515, "grad_norm": 0.5407044796019927, "learning_rate": 5.637700562267767e-06, "loss": 11.8146, "step": 32902 }, { "epoch": 1.7916998495708345, "grad_norm": 0.5577398170497451, "learning_rate": 5.634781890640484e-06, "loss": 11.7791, "step": 32903 }, { "epoch": 1.7917543035674175, "grad_norm": 0.564337721593817, "learning_rate": 5.631863952812955e-06, "loss": 11.9121, "step": 32904 }, { "epoch": 1.7918087575640005, "grad_norm": 0.5491338197300244, "learning_rate": 5.628946748807895e-06, "loss": 11.7338, "step": 32905 }, { "epoch": 1.7918632115605835, "grad_norm": 0.529156402740374, "learning_rate": 5.626030278647954e-06, "loss": 11.7318, "step": 32906 }, { "epoch": 1.7919176655571665, "grad_norm": 0.6292734936144189, "learning_rate": 5.623114542355845e-06, "loss": 11.7865, "step": 32907 }, { "epoch": 1.7919721195537495, "grad_norm": 0.558882568823229, "learning_rate": 5.620199539954218e-06, "loss": 11.9001, "step": 32908 }, { "epoch": 1.7920265735503325, "grad_norm": 0.5287236011991581, "learning_rate": 5.6172852714657335e-06, "loss": 11.6772, "step": 32909 }, { "epoch": 1.7920810275469155, "grad_norm": 0.5153919322195507, "learning_rate": 5.614371736913082e-06, "loss": 11.6605, "step": 32910 }, { "epoch": 1.7921354815434984, "grad_norm": 0.49298910148421154, "learning_rate": 5.6114589363188915e-06, "loss": 11.7694, "step": 32911 }, { "epoch": 1.7921899355400814, "grad_norm": 0.5639422226384082, "learning_rate": 5.608546869705822e-06, "loss": 11.7764, "step": 32912 }, { "epoch": 1.7922443895366644, "grad_norm": 0.48621126242864204, "learning_rate": 5.605635537096543e-06, "loss": 11.6721, "step": 32913 }, { "epoch": 1.7922988435332474, "grad_norm": 0.5956402582652661, "learning_rate": 5.602724938513649e-06, "loss": 11.8766, "step": 32914 }, { "epoch": 1.7923532975298304, "grad_norm": 0.578792789996769, "learning_rate": 5.599815073979819e-06, "loss": 11.8459, "step": 32915 }, { "epoch": 1.7924077515264136, "grad_norm": 0.520250136008144, "learning_rate": 5.5969059435176386e-06, "loss": 11.8288, "step": 32916 }, { "epoch": 1.7924622055229966, "grad_norm": 0.6073175168381555, "learning_rate": 5.593997547149765e-06, "loss": 11.8408, "step": 32917 }, { "epoch": 1.7925166595195796, "grad_norm": 0.5155050623384161, "learning_rate": 5.5910898848987925e-06, "loss": 11.831, "step": 32918 }, { "epoch": 1.7925711135161626, "grad_norm": 0.5066378974759215, "learning_rate": 5.5881829567873355e-06, "loss": 11.7648, "step": 32919 }, { "epoch": 1.7926255675127456, "grad_norm": 0.530946138852541, "learning_rate": 5.585276762838009e-06, "loss": 11.7931, "step": 32920 }, { "epoch": 1.7926800215093288, "grad_norm": 0.5880682300761637, "learning_rate": 5.582371303073386e-06, "loss": 11.8818, "step": 32921 }, { "epoch": 1.7927344755059118, "grad_norm": 0.7158473795371252, "learning_rate": 5.57946657751609e-06, "loss": 11.8053, "step": 32922 }, { "epoch": 1.7927889295024948, "grad_norm": 0.5643935947936998, "learning_rate": 5.576562586188694e-06, "loss": 11.7546, "step": 32923 }, { "epoch": 1.7928433834990778, "grad_norm": 0.5462105170578148, "learning_rate": 5.573659329113767e-06, "loss": 11.7147, "step": 32924 }, { "epoch": 1.7928978374956608, "grad_norm": 0.5741835540156626, "learning_rate": 5.570756806313926e-06, "loss": 11.9257, "step": 32925 }, { "epoch": 1.7929522914922438, "grad_norm": 0.5454007195822214, "learning_rate": 5.567855017811696e-06, "loss": 11.7167, "step": 32926 }, { "epoch": 1.7930067454888268, "grad_norm": 0.565170862047559, "learning_rate": 5.564953963629671e-06, "loss": 11.7812, "step": 32927 }, { "epoch": 1.7930611994854098, "grad_norm": 0.5751964823867437, "learning_rate": 5.562053643790411e-06, "loss": 11.8629, "step": 32928 }, { "epoch": 1.7931156534819928, "grad_norm": 0.6041347164794797, "learning_rate": 5.5591540583164406e-06, "loss": 11.8195, "step": 32929 }, { "epoch": 1.7931701074785757, "grad_norm": 0.542418557889861, "learning_rate": 5.556255207230343e-06, "loss": 11.802, "step": 32930 }, { "epoch": 1.7932245614751587, "grad_norm": 0.5974718855396798, "learning_rate": 5.553357090554623e-06, "loss": 11.7662, "step": 32931 }, { "epoch": 1.7932790154717417, "grad_norm": 0.49823910135020305, "learning_rate": 5.550459708311862e-06, "loss": 11.7352, "step": 32932 }, { "epoch": 1.7933334694683247, "grad_norm": 0.5628189693596657, "learning_rate": 5.547563060524541e-06, "loss": 11.711, "step": 32933 }, { "epoch": 1.7933879234649077, "grad_norm": 0.5985189818794429, "learning_rate": 5.54466714721521e-06, "loss": 11.6791, "step": 32934 }, { "epoch": 1.7934423774614907, "grad_norm": 0.549031945653635, "learning_rate": 5.541771968406406e-06, "loss": 11.8105, "step": 32935 }, { "epoch": 1.7934968314580737, "grad_norm": 0.585048829188477, "learning_rate": 5.538877524120611e-06, "loss": 11.8103, "step": 32936 }, { "epoch": 1.7935512854546567, "grad_norm": 0.5897362453287507, "learning_rate": 5.5359838143803635e-06, "loss": 11.8408, "step": 32937 }, { "epoch": 1.7936057394512397, "grad_norm": 0.519749619448922, "learning_rate": 5.533090839208133e-06, "loss": 11.714, "step": 32938 }, { "epoch": 1.793660193447823, "grad_norm": 0.5693986335915681, "learning_rate": 5.5301985986264234e-06, "loss": 11.7528, "step": 32939 }, { "epoch": 1.793714647444406, "grad_norm": 0.5171493502925723, "learning_rate": 5.527307092657741e-06, "loss": 11.6295, "step": 32940 }, { "epoch": 1.793769101440989, "grad_norm": 0.5438200903069883, "learning_rate": 5.5244163213245545e-06, "loss": 11.8122, "step": 32941 }, { "epoch": 1.7938235554375719, "grad_norm": 0.6501297040214571, "learning_rate": 5.521526284649359e-06, "loss": 11.8396, "step": 32942 }, { "epoch": 1.7938780094341549, "grad_norm": 0.5484339027683268, "learning_rate": 5.518636982654612e-06, "loss": 11.7793, "step": 32943 }, { "epoch": 1.7939324634307379, "grad_norm": 0.549753147222448, "learning_rate": 5.515748415362798e-06, "loss": 11.893, "step": 32944 }, { "epoch": 1.793986917427321, "grad_norm": 0.5140662256940166, "learning_rate": 5.512860582796353e-06, "loss": 11.7629, "step": 32945 }, { "epoch": 1.794041371423904, "grad_norm": 0.5795490634534858, "learning_rate": 5.5099734849777595e-06, "loss": 11.7468, "step": 32946 }, { "epoch": 1.794095825420487, "grad_norm": 0.5368867896303627, "learning_rate": 5.5070871219294776e-06, "loss": 11.808, "step": 32947 }, { "epoch": 1.79415027941707, "grad_norm": 0.5882965945355642, "learning_rate": 5.50420149367391e-06, "loss": 11.8415, "step": 32948 }, { "epoch": 1.794204733413653, "grad_norm": 0.5701701114734583, "learning_rate": 5.501316600233508e-06, "loss": 11.7892, "step": 32949 }, { "epoch": 1.794259187410236, "grad_norm": 0.5466941626380739, "learning_rate": 5.4984324416307405e-06, "loss": 11.8151, "step": 32950 }, { "epoch": 1.794313641406819, "grad_norm": 0.5454520733639197, "learning_rate": 5.495549017887991e-06, "loss": 11.7103, "step": 32951 }, { "epoch": 1.794368095403402, "grad_norm": 0.5774645190721264, "learning_rate": 5.492666329027718e-06, "loss": 11.7959, "step": 32952 }, { "epoch": 1.794422549399985, "grad_norm": 0.5382498296973779, "learning_rate": 5.4897843750723045e-06, "loss": 11.7658, "step": 32953 }, { "epoch": 1.794477003396568, "grad_norm": 0.5640751589546652, "learning_rate": 5.486903156044187e-06, "loss": 11.8879, "step": 32954 }, { "epoch": 1.794531457393151, "grad_norm": 0.5619623001192191, "learning_rate": 5.48402267196575e-06, "loss": 11.9502, "step": 32955 }, { "epoch": 1.794585911389734, "grad_norm": 0.6720658821772526, "learning_rate": 5.481142922859428e-06, "loss": 11.8907, "step": 32956 }, { "epoch": 1.794640365386317, "grad_norm": 0.5309193801337244, "learning_rate": 5.4782639087475714e-06, "loss": 11.8927, "step": 32957 }, { "epoch": 1.7946948193829, "grad_norm": 0.5060859659602768, "learning_rate": 5.475385629652585e-06, "loss": 11.7418, "step": 32958 }, { "epoch": 1.794749273379483, "grad_norm": 0.5500296358704544, "learning_rate": 5.472508085596861e-06, "loss": 11.5995, "step": 32959 }, { "epoch": 1.794803727376066, "grad_norm": 0.5415131342335241, "learning_rate": 5.469631276602749e-06, "loss": 11.7777, "step": 32960 }, { "epoch": 1.794858181372649, "grad_norm": 0.4773191893379602, "learning_rate": 5.4667552026926415e-06, "loss": 11.7708, "step": 32961 }, { "epoch": 1.7949126353692322, "grad_norm": 0.6345859726488672, "learning_rate": 5.46387986388891e-06, "loss": 11.9852, "step": 32962 }, { "epoch": 1.7949670893658152, "grad_norm": 0.5511800044115259, "learning_rate": 5.461005260213892e-06, "loss": 11.807, "step": 32963 }, { "epoch": 1.7950215433623982, "grad_norm": 0.6016446725644914, "learning_rate": 5.458131391689958e-06, "loss": 11.8363, "step": 32964 }, { "epoch": 1.7950759973589812, "grad_norm": 0.5373914682548222, "learning_rate": 5.455258258339446e-06, "loss": 11.782, "step": 32965 }, { "epoch": 1.7951304513555641, "grad_norm": 0.49659643496513534, "learning_rate": 5.452385860184705e-06, "loss": 11.7047, "step": 32966 }, { "epoch": 1.7951849053521471, "grad_norm": 0.6386390216313624, "learning_rate": 5.449514197248051e-06, "loss": 11.6199, "step": 32967 }, { "epoch": 1.7952393593487304, "grad_norm": 0.6626104470852224, "learning_rate": 5.4466432695518545e-06, "loss": 11.8896, "step": 32968 }, { "epoch": 1.7952938133453133, "grad_norm": 0.5813216714212438, "learning_rate": 5.443773077118419e-06, "loss": 11.8182, "step": 32969 }, { "epoch": 1.7953482673418963, "grad_norm": 0.5478349669179279, "learning_rate": 5.4409036199700395e-06, "loss": 11.8171, "step": 32970 }, { "epoch": 1.7954027213384793, "grad_norm": 0.55359895922794, "learning_rate": 5.438034898129063e-06, "loss": 11.6894, "step": 32971 }, { "epoch": 1.7954571753350623, "grad_norm": 0.5285163082513964, "learning_rate": 5.4351669116177725e-06, "loss": 11.7452, "step": 32972 }, { "epoch": 1.7955116293316453, "grad_norm": 0.5531286289009327, "learning_rate": 5.432299660458484e-06, "loss": 11.7209, "step": 32973 }, { "epoch": 1.7955660833282283, "grad_norm": 0.533561977470842, "learning_rate": 5.429433144673512e-06, "loss": 11.7901, "step": 32974 }, { "epoch": 1.7956205373248113, "grad_norm": 0.5161039121828637, "learning_rate": 5.426567364285107e-06, "loss": 11.6132, "step": 32975 }, { "epoch": 1.7956749913213943, "grad_norm": 0.5068217370561073, "learning_rate": 5.423702319315593e-06, "loss": 11.7355, "step": 32976 }, { "epoch": 1.7957294453179773, "grad_norm": 0.6415120804812983, "learning_rate": 5.42083800978721e-06, "loss": 11.9336, "step": 32977 }, { "epoch": 1.7957838993145603, "grad_norm": 0.526911813800498, "learning_rate": 5.417974435722273e-06, "loss": 11.7279, "step": 32978 }, { "epoch": 1.7958383533111433, "grad_norm": 0.4895660059007519, "learning_rate": 5.415111597143019e-06, "loss": 11.8068, "step": 32979 }, { "epoch": 1.7958928073077263, "grad_norm": 0.5158891977356518, "learning_rate": 5.412249494071709e-06, "loss": 11.8079, "step": 32980 }, { "epoch": 1.7959472613043093, "grad_norm": 0.53320740878039, "learning_rate": 5.4093881265306235e-06, "loss": 11.7425, "step": 32981 }, { "epoch": 1.7960017153008923, "grad_norm": 0.5302737779417936, "learning_rate": 5.406527494541991e-06, "loss": 11.7888, "step": 32982 }, { "epoch": 1.7960561692974752, "grad_norm": 0.5772101317185414, "learning_rate": 5.4036675981280485e-06, "loss": 11.8371, "step": 32983 }, { "epoch": 1.7961106232940582, "grad_norm": 0.5354878640147748, "learning_rate": 5.400808437311078e-06, "loss": 11.9267, "step": 32984 }, { "epoch": 1.7961650772906415, "grad_norm": 0.5921124983555275, "learning_rate": 5.397950012113273e-06, "loss": 11.8837, "step": 32985 }, { "epoch": 1.7962195312872244, "grad_norm": 0.5211389425899624, "learning_rate": 5.395092322556883e-06, "loss": 11.8441, "step": 32986 }, { "epoch": 1.7962739852838074, "grad_norm": 0.5216618274321483, "learning_rate": 5.3922353686641e-06, "loss": 11.6793, "step": 32987 }, { "epoch": 1.7963284392803904, "grad_norm": 0.5172682682443953, "learning_rate": 5.389379150457186e-06, "loss": 11.7679, "step": 32988 }, { "epoch": 1.7963828932769734, "grad_norm": 0.4961430726988221, "learning_rate": 5.38652366795831e-06, "loss": 11.7149, "step": 32989 }, { "epoch": 1.7964373472735564, "grad_norm": 0.6063725149669045, "learning_rate": 5.383668921189689e-06, "loss": 11.8792, "step": 32990 }, { "epoch": 1.7964918012701396, "grad_norm": 0.5702487715606085, "learning_rate": 5.380814910173548e-06, "loss": 11.7312, "step": 32991 }, { "epoch": 1.7965462552667226, "grad_norm": 0.5801547243275779, "learning_rate": 5.377961634932027e-06, "loss": 11.7412, "step": 32992 }, { "epoch": 1.7966007092633056, "grad_norm": 0.5527701141612907, "learning_rate": 5.375109095487374e-06, "loss": 11.7354, "step": 32993 }, { "epoch": 1.7966551632598886, "grad_norm": 0.5735356711884958, "learning_rate": 5.372257291861715e-06, "loss": 11.8257, "step": 32994 }, { "epoch": 1.7967096172564716, "grad_norm": 0.5822056748790422, "learning_rate": 5.369406224077256e-06, "loss": 11.6295, "step": 32995 }, { "epoch": 1.7967640712530546, "grad_norm": 0.5841452217221534, "learning_rate": 5.366555892156178e-06, "loss": 11.8433, "step": 32996 }, { "epoch": 1.7968185252496376, "grad_norm": 0.5432889074077022, "learning_rate": 5.363706296120618e-06, "loss": 11.7912, "step": 32997 }, { "epoch": 1.7968729792462206, "grad_norm": 0.5221669384389692, "learning_rate": 5.360857435992772e-06, "loss": 11.8043, "step": 32998 }, { "epoch": 1.7969274332428036, "grad_norm": 0.5864699388637166, "learning_rate": 5.358009311794754e-06, "loss": 11.8788, "step": 32999 }, { "epoch": 1.7969818872393866, "grad_norm": 0.5059116582391517, "learning_rate": 5.355161923548724e-06, "loss": 11.686, "step": 33000 }, { "epoch": 1.7970363412359696, "grad_norm": 0.5752081796614689, "learning_rate": 5.352315271276831e-06, "loss": 11.7916, "step": 33001 }, { "epoch": 1.7970907952325526, "grad_norm": 0.5372043231782221, "learning_rate": 5.349469355001202e-06, "loss": 11.8713, "step": 33002 }, { "epoch": 1.7971452492291355, "grad_norm": 0.5803842771590753, "learning_rate": 5.346624174743986e-06, "loss": 11.9202, "step": 33003 }, { "epoch": 1.7971997032257185, "grad_norm": 0.5032991441482636, "learning_rate": 5.343779730527277e-06, "loss": 11.6306, "step": 33004 }, { "epoch": 1.7972541572223015, "grad_norm": 0.5803015960911285, "learning_rate": 5.340936022373222e-06, "loss": 11.8288, "step": 33005 }, { "epoch": 1.7973086112188845, "grad_norm": 0.5305174714243117, "learning_rate": 5.338093050303905e-06, "loss": 11.6913, "step": 33006 }, { "epoch": 1.7973630652154675, "grad_norm": 0.6319059823594511, "learning_rate": 5.335250814341464e-06, "loss": 12.0044, "step": 33007 }, { "epoch": 1.7974175192120505, "grad_norm": 0.5374725158357982, "learning_rate": 5.332409314508003e-06, "loss": 11.6781, "step": 33008 }, { "epoch": 1.7974719732086337, "grad_norm": 0.5578158580914994, "learning_rate": 5.329568550825581e-06, "loss": 11.8052, "step": 33009 }, { "epoch": 1.7975264272052167, "grad_norm": 0.5957287702480674, "learning_rate": 5.3267285233163045e-06, "loss": 11.8027, "step": 33010 }, { "epoch": 1.7975808812017997, "grad_norm": 0.4860268760649681, "learning_rate": 5.3238892320022886e-06, "loss": 11.8393, "step": 33011 }, { "epoch": 1.7976353351983827, "grad_norm": 0.5436182457278895, "learning_rate": 5.3210506769055705e-06, "loss": 11.7452, "step": 33012 }, { "epoch": 1.7976897891949657, "grad_norm": 0.5570223204020921, "learning_rate": 5.318212858048244e-06, "loss": 11.787, "step": 33013 }, { "epoch": 1.7977442431915487, "grad_norm": 0.5986976690922344, "learning_rate": 5.315375775452369e-06, "loss": 11.8338, "step": 33014 }, { "epoch": 1.797798697188132, "grad_norm": 0.551169818946494, "learning_rate": 5.312539429140018e-06, "loss": 11.8283, "step": 33015 }, { "epoch": 1.797853151184715, "grad_norm": 0.6176384875769115, "learning_rate": 5.309703819133238e-06, "loss": 11.869, "step": 33016 }, { "epoch": 1.7979076051812979, "grad_norm": 0.5242391249554645, "learning_rate": 5.306868945454068e-06, "loss": 11.8052, "step": 33017 }, { "epoch": 1.7979620591778809, "grad_norm": 0.5367222734114467, "learning_rate": 5.304034808124591e-06, "loss": 11.7946, "step": 33018 }, { "epoch": 1.7980165131744639, "grad_norm": 0.5272415705511996, "learning_rate": 5.3012014071668e-06, "loss": 11.882, "step": 33019 }, { "epoch": 1.7980709671710469, "grad_norm": 0.5580051244643481, "learning_rate": 5.298368742602766e-06, "loss": 11.6822, "step": 33020 }, { "epoch": 1.7981254211676299, "grad_norm": 0.6239567137405468, "learning_rate": 5.295536814454472e-06, "loss": 11.8083, "step": 33021 }, { "epoch": 1.7981798751642128, "grad_norm": 0.5463087922129438, "learning_rate": 5.292705622743977e-06, "loss": 11.5613, "step": 33022 }, { "epoch": 1.7982343291607958, "grad_norm": 0.5552737913614896, "learning_rate": 5.289875167493286e-06, "loss": 11.7769, "step": 33023 }, { "epoch": 1.7982887831573788, "grad_norm": 0.5147142660490316, "learning_rate": 5.287045448724404e-06, "loss": 11.8173, "step": 33024 }, { "epoch": 1.7983432371539618, "grad_norm": 0.5788489928901949, "learning_rate": 5.284216466459357e-06, "loss": 11.7764, "step": 33025 }, { "epoch": 1.7983976911505448, "grad_norm": 0.5810937995182615, "learning_rate": 5.281388220720107e-06, "loss": 11.8385, "step": 33026 }, { "epoch": 1.7984521451471278, "grad_norm": 0.541854898260314, "learning_rate": 5.27856071152868e-06, "loss": 11.7527, "step": 33027 }, { "epoch": 1.7985065991437108, "grad_norm": 0.4937860521646309, "learning_rate": 5.275733938907046e-06, "loss": 11.7261, "step": 33028 }, { "epoch": 1.7985610531402938, "grad_norm": 0.5547554882226353, "learning_rate": 5.272907902877189e-06, "loss": 11.7142, "step": 33029 }, { "epoch": 1.7986155071368768, "grad_norm": 0.544133630270464, "learning_rate": 5.27008260346109e-06, "loss": 11.8562, "step": 33030 }, { "epoch": 1.7986699611334598, "grad_norm": 0.5407408427436463, "learning_rate": 5.2672580406807e-06, "loss": 11.8199, "step": 33031 }, { "epoch": 1.798724415130043, "grad_norm": 0.5375801738999948, "learning_rate": 5.2644342145580005e-06, "loss": 11.7831, "step": 33032 }, { "epoch": 1.798778869126626, "grad_norm": 0.6789230656775038, "learning_rate": 5.261611125114963e-06, "loss": 11.8353, "step": 33033 }, { "epoch": 1.798833323123209, "grad_norm": 0.6411747950919314, "learning_rate": 5.258788772373513e-06, "loss": 11.7864, "step": 33034 }, { "epoch": 1.798887777119792, "grad_norm": 0.510044093749449, "learning_rate": 5.255967156355623e-06, "loss": 11.863, "step": 33035 }, { "epoch": 1.798942231116375, "grad_norm": 0.5360965215681892, "learning_rate": 5.253146277083199e-06, "loss": 11.6881, "step": 33036 }, { "epoch": 1.798996685112958, "grad_norm": 0.5428324202304564, "learning_rate": 5.250326134578221e-06, "loss": 11.6235, "step": 33037 }, { "epoch": 1.7990511391095412, "grad_norm": 0.49448376491651336, "learning_rate": 5.247506728862595e-06, "loss": 11.7862, "step": 33038 }, { "epoch": 1.7991055931061242, "grad_norm": 0.5633106550018803, "learning_rate": 5.244688059958225e-06, "loss": 11.8028, "step": 33039 }, { "epoch": 1.7991600471027072, "grad_norm": 0.6164474620025211, "learning_rate": 5.241870127887072e-06, "loss": 11.7571, "step": 33040 }, { "epoch": 1.7992145010992902, "grad_norm": 0.556047598832963, "learning_rate": 5.239052932671018e-06, "loss": 11.7722, "step": 33041 }, { "epoch": 1.7992689550958731, "grad_norm": 0.561233804075412, "learning_rate": 5.23623647433199e-06, "loss": 11.833, "step": 33042 }, { "epoch": 1.7993234090924561, "grad_norm": 0.5363343879344251, "learning_rate": 5.23342075289186e-06, "loss": 11.8106, "step": 33043 }, { "epoch": 1.7993778630890391, "grad_norm": 0.5180780725819338, "learning_rate": 5.230605768372554e-06, "loss": 11.7159, "step": 33044 }, { "epoch": 1.7994323170856221, "grad_norm": 0.5917444056953549, "learning_rate": 5.227791520795955e-06, "loss": 11.8704, "step": 33045 }, { "epoch": 1.7994867710822051, "grad_norm": 0.6058205122319649, "learning_rate": 5.2249780101839345e-06, "loss": 11.776, "step": 33046 }, { "epoch": 1.799541225078788, "grad_norm": 0.5910864790269835, "learning_rate": 5.222165236558385e-06, "loss": 11.8462, "step": 33047 }, { "epoch": 1.799595679075371, "grad_norm": 0.5409314574371767, "learning_rate": 5.2193531999411795e-06, "loss": 11.8283, "step": 33048 }, { "epoch": 1.799650133071954, "grad_norm": 0.703583447428384, "learning_rate": 5.2165419003541545e-06, "loss": 11.8901, "step": 33049 }, { "epoch": 1.799704587068537, "grad_norm": 0.5390945416622668, "learning_rate": 5.213731337819217e-06, "loss": 11.7955, "step": 33050 }, { "epoch": 1.79975904106512, "grad_norm": 0.49991864252572077, "learning_rate": 5.210921512358191e-06, "loss": 11.7977, "step": 33051 }, { "epoch": 1.799813495061703, "grad_norm": 0.5359739836329991, "learning_rate": 5.2081124239929395e-06, "loss": 11.7865, "step": 33052 }, { "epoch": 1.799867949058286, "grad_norm": 0.6323550774870728, "learning_rate": 5.205304072745299e-06, "loss": 11.9618, "step": 33053 }, { "epoch": 1.799922403054869, "grad_norm": 0.5009089060164308, "learning_rate": 5.202496458637118e-06, "loss": 11.7868, "step": 33054 }, { "epoch": 1.7999768570514523, "grad_norm": 0.5397962049504392, "learning_rate": 5.199689581690204e-06, "loss": 11.7871, "step": 33055 }, { "epoch": 1.8000313110480353, "grad_norm": 0.7182613204472554, "learning_rate": 5.196883441926415e-06, "loss": 11.7472, "step": 33056 }, { "epoch": 1.8000857650446183, "grad_norm": 0.5485250475107362, "learning_rate": 5.194078039367556e-06, "loss": 11.6487, "step": 33057 }, { "epoch": 1.8001402190412013, "grad_norm": 0.5951127871722497, "learning_rate": 5.191273374035455e-06, "loss": 11.9036, "step": 33058 }, { "epoch": 1.8001946730377842, "grad_norm": 0.5071896827292842, "learning_rate": 5.1884694459519045e-06, "loss": 11.6455, "step": 33059 }, { "epoch": 1.8002491270343672, "grad_norm": 0.5661822993466582, "learning_rate": 5.185666255138721e-06, "loss": 11.8508, "step": 33060 }, { "epoch": 1.8003035810309505, "grad_norm": 0.5037982708150142, "learning_rate": 5.182863801617677e-06, "loss": 11.8576, "step": 33061 }, { "epoch": 1.8003580350275334, "grad_norm": 0.5182435953116797, "learning_rate": 5.180062085410609e-06, "loss": 11.7623, "step": 33062 }, { "epoch": 1.8004124890241164, "grad_norm": 0.6543159068382637, "learning_rate": 5.177261106539255e-06, "loss": 11.9147, "step": 33063 }, { "epoch": 1.8004669430206994, "grad_norm": 0.573379508773424, "learning_rate": 5.174460865025443e-06, "loss": 11.6139, "step": 33064 }, { "epoch": 1.8005213970172824, "grad_norm": 0.5455433176042846, "learning_rate": 5.171661360890911e-06, "loss": 11.6488, "step": 33065 }, { "epoch": 1.8005758510138654, "grad_norm": 0.616186317030893, "learning_rate": 5.168862594157442e-06, "loss": 11.7815, "step": 33066 }, { "epoch": 1.8006303050104484, "grad_norm": 0.5576784896864911, "learning_rate": 5.166064564846818e-06, "loss": 11.7695, "step": 33067 }, { "epoch": 1.8006847590070314, "grad_norm": 0.558524115267277, "learning_rate": 5.163267272980776e-06, "loss": 11.9219, "step": 33068 }, { "epoch": 1.8007392130036144, "grad_norm": 0.5776225595460162, "learning_rate": 5.160470718581068e-06, "loss": 11.8872, "step": 33069 }, { "epoch": 1.8007936670001974, "grad_norm": 0.5695110058292474, "learning_rate": 5.157674901669441e-06, "loss": 11.6353, "step": 33070 }, { "epoch": 1.8008481209967804, "grad_norm": 0.68544646777409, "learning_rate": 5.154879822267633e-06, "loss": 11.8049, "step": 33071 }, { "epoch": 1.8009025749933634, "grad_norm": 0.7136796969335726, "learning_rate": 5.152085480397395e-06, "loss": 11.7655, "step": 33072 }, { "epoch": 1.8009570289899464, "grad_norm": 0.5283364656274697, "learning_rate": 5.149291876080431e-06, "loss": 11.7, "step": 33073 }, { "epoch": 1.8010114829865294, "grad_norm": 0.5487399564934956, "learning_rate": 5.146499009338501e-06, "loss": 11.7431, "step": 33074 }, { "epoch": 1.8010659369831123, "grad_norm": 0.573375986776589, "learning_rate": 5.143706880193289e-06, "loss": 11.6611, "step": 33075 }, { "epoch": 1.8011203909796953, "grad_norm": 0.5127462304835643, "learning_rate": 5.1409154886665315e-06, "loss": 11.7831, "step": 33076 }, { "epoch": 1.8011748449762783, "grad_norm": 0.5341124749966027, "learning_rate": 5.138124834779901e-06, "loss": 11.7424, "step": 33077 }, { "epoch": 1.8012292989728613, "grad_norm": 0.558461180509717, "learning_rate": 5.135334918555146e-06, "loss": 11.5907, "step": 33078 }, { "epoch": 1.8012837529694445, "grad_norm": 0.5695077286896717, "learning_rate": 5.132545740013928e-06, "loss": 11.812, "step": 33079 }, { "epoch": 1.8013382069660275, "grad_norm": 0.6236338186733865, "learning_rate": 5.129757299177928e-06, "loss": 11.8106, "step": 33080 }, { "epoch": 1.8013926609626105, "grad_norm": 0.6048148154238496, "learning_rate": 5.126969596068853e-06, "loss": 11.9336, "step": 33081 }, { "epoch": 1.8014471149591935, "grad_norm": 0.6021874516734276, "learning_rate": 5.124182630708385e-06, "loss": 11.8336, "step": 33082 }, { "epoch": 1.8015015689557765, "grad_norm": 0.5510384474859819, "learning_rate": 5.1213964031181615e-06, "loss": 11.8096, "step": 33083 }, { "epoch": 1.8015560229523597, "grad_norm": 0.5608582089557207, "learning_rate": 5.118610913319888e-06, "loss": 11.8161, "step": 33084 }, { "epoch": 1.8016104769489427, "grad_norm": 0.5780941252311154, "learning_rate": 5.115826161335202e-06, "loss": 11.7778, "step": 33085 }, { "epoch": 1.8016649309455257, "grad_norm": 0.5049369266089829, "learning_rate": 5.113042147185765e-06, "loss": 11.7887, "step": 33086 }, { "epoch": 1.8017193849421087, "grad_norm": 0.572217096407932, "learning_rate": 5.110258870893225e-06, "loss": 11.8803, "step": 33087 }, { "epoch": 1.8017738389386917, "grad_norm": 0.5332739464871372, "learning_rate": 5.1074763324792215e-06, "loss": 11.8783, "step": 33088 }, { "epoch": 1.8018282929352747, "grad_norm": 0.584070318484261, "learning_rate": 5.104694531965415e-06, "loss": 11.7572, "step": 33089 }, { "epoch": 1.8018827469318577, "grad_norm": 0.5883338984393851, "learning_rate": 5.101913469373387e-06, "loss": 11.7152, "step": 33090 }, { "epoch": 1.8019372009284407, "grad_norm": 0.5086451157941407, "learning_rate": 5.099133144724821e-06, "loss": 11.7435, "step": 33091 }, { "epoch": 1.8019916549250237, "grad_norm": 0.5571779434488101, "learning_rate": 5.096353558041289e-06, "loss": 11.8092, "step": 33092 }, { "epoch": 1.8020461089216067, "grad_norm": 0.5312922849609283, "learning_rate": 5.0935747093444285e-06, "loss": 11.6488, "step": 33093 }, { "epoch": 1.8021005629181897, "grad_norm": 0.5166841817620795, "learning_rate": 5.090796598655867e-06, "loss": 11.718, "step": 33094 }, { "epoch": 1.8021550169147726, "grad_norm": 0.5906951796143799, "learning_rate": 5.0880192259971645e-06, "loss": 11.7187, "step": 33095 }, { "epoch": 1.8022094709113556, "grad_norm": 0.4857076515708823, "learning_rate": 5.0852425913899605e-06, "loss": 11.7128, "step": 33096 }, { "epoch": 1.8022639249079386, "grad_norm": 0.548613000878737, "learning_rate": 5.082466694855803e-06, "loss": 11.8502, "step": 33097 }, { "epoch": 1.8023183789045216, "grad_norm": 0.5440445244085107, "learning_rate": 5.079691536416331e-06, "loss": 11.7426, "step": 33098 }, { "epoch": 1.8023728329011046, "grad_norm": 0.5765165755015134, "learning_rate": 5.0769171160930824e-06, "loss": 11.9014, "step": 33099 }, { "epoch": 1.8024272868976876, "grad_norm": 0.5193150501470704, "learning_rate": 5.074143433907641e-06, "loss": 11.7797, "step": 33100 }, { "epoch": 1.8024817408942706, "grad_norm": 0.6264881221743966, "learning_rate": 5.071370489881589e-06, "loss": 11.857, "step": 33101 }, { "epoch": 1.8025361948908538, "grad_norm": 0.5292029358386698, "learning_rate": 5.068598284036474e-06, "loss": 11.6965, "step": 33102 }, { "epoch": 1.8025906488874368, "grad_norm": 0.5577462168844834, "learning_rate": 5.065826816393848e-06, "loss": 11.718, "step": 33103 }, { "epoch": 1.8026451028840198, "grad_norm": 0.5124702474738054, "learning_rate": 5.063056086975293e-06, "loss": 11.7704, "step": 33104 }, { "epoch": 1.8026995568806028, "grad_norm": 0.5172760410745203, "learning_rate": 5.0602860958023136e-06, "loss": 11.7581, "step": 33105 }, { "epoch": 1.8027540108771858, "grad_norm": 0.5681498001389979, "learning_rate": 5.057516842896492e-06, "loss": 11.7469, "step": 33106 }, { "epoch": 1.8028084648737688, "grad_norm": 0.553499130275587, "learning_rate": 5.054748328279324e-06, "loss": 11.725, "step": 33107 }, { "epoch": 1.802862918870352, "grad_norm": 0.5267487531792995, "learning_rate": 5.051980551972369e-06, "loss": 11.8036, "step": 33108 }, { "epoch": 1.802917372866935, "grad_norm": 0.5835299047872015, "learning_rate": 5.049213513997142e-06, "loss": 11.7708, "step": 33109 }, { "epoch": 1.802971826863518, "grad_norm": 0.564457165664405, "learning_rate": 5.046447214375138e-06, "loss": 11.9303, "step": 33110 }, { "epoch": 1.803026280860101, "grad_norm": 0.5626347562366967, "learning_rate": 5.043681653127885e-06, "loss": 11.5876, "step": 33111 }, { "epoch": 1.803080734856684, "grad_norm": 0.5262466057129509, "learning_rate": 5.040916830276887e-06, "loss": 11.8207, "step": 33112 }, { "epoch": 1.803135188853267, "grad_norm": 0.5368449488581447, "learning_rate": 5.03815274584366e-06, "loss": 11.7659, "step": 33113 }, { "epoch": 1.80318964284985, "grad_norm": 0.6010632105142093, "learning_rate": 5.035389399849666e-06, "loss": 11.9371, "step": 33114 }, { "epoch": 1.803244096846433, "grad_norm": 0.5916678779008685, "learning_rate": 5.032626792316408e-06, "loss": 11.7108, "step": 33115 }, { "epoch": 1.803298550843016, "grad_norm": 0.5369634232869531, "learning_rate": 5.029864923265382e-06, "loss": 11.8294, "step": 33116 }, { "epoch": 1.803353004839599, "grad_norm": 0.5089388708774236, "learning_rate": 5.027103792718036e-06, "loss": 11.7105, "step": 33117 }, { "epoch": 1.803407458836182, "grad_norm": 0.5112987221801994, "learning_rate": 5.024343400695874e-06, "loss": 11.6755, "step": 33118 }, { "epoch": 1.803461912832765, "grad_norm": 0.5754809501452369, "learning_rate": 5.021583747220349e-06, "loss": 11.843, "step": 33119 }, { "epoch": 1.803516366829348, "grad_norm": 0.569141860179444, "learning_rate": 5.018824832312907e-06, "loss": 11.7935, "step": 33120 }, { "epoch": 1.803570820825931, "grad_norm": 0.5524580090942568, "learning_rate": 5.016066655995022e-06, "loss": 11.7522, "step": 33121 }, { "epoch": 1.803625274822514, "grad_norm": 0.5185444764335322, "learning_rate": 5.013309218288109e-06, "loss": 11.7835, "step": 33122 }, { "epoch": 1.8036797288190969, "grad_norm": 0.5046861956728936, "learning_rate": 5.0105525192136515e-06, "loss": 11.6846, "step": 33123 }, { "epoch": 1.8037341828156799, "grad_norm": 0.549262063097627, "learning_rate": 5.007796558793054e-06, "loss": 11.7959, "step": 33124 }, { "epoch": 1.803788636812263, "grad_norm": 0.5706250564929886, "learning_rate": 5.005041337047778e-06, "loss": 11.6666, "step": 33125 }, { "epoch": 1.803843090808846, "grad_norm": 0.5280845153105259, "learning_rate": 5.002286853999216e-06, "loss": 11.7222, "step": 33126 }, { "epoch": 1.803897544805429, "grad_norm": 0.6090800605198194, "learning_rate": 4.999533109668797e-06, "loss": 11.8456, "step": 33127 }, { "epoch": 1.803951998802012, "grad_norm": 0.5449261964947809, "learning_rate": 4.996780104077958e-06, "loss": 11.8051, "step": 33128 }, { "epoch": 1.804006452798595, "grad_norm": 0.5253676105361571, "learning_rate": 4.994027837248094e-06, "loss": 11.826, "step": 33129 }, { "epoch": 1.804060906795178, "grad_norm": 0.5381580407631475, "learning_rate": 4.991276309200588e-06, "loss": 11.8203, "step": 33130 }, { "epoch": 1.8041153607917613, "grad_norm": 0.5776018075832223, "learning_rate": 4.988525519956855e-06, "loss": 11.8983, "step": 33131 }, { "epoch": 1.8041698147883443, "grad_norm": 0.5193345683995854, "learning_rate": 4.985775469538268e-06, "loss": 11.7223, "step": 33132 }, { "epoch": 1.8042242687849273, "grad_norm": 0.5355267333610721, "learning_rate": 4.983026157966242e-06, "loss": 11.7857, "step": 33133 }, { "epoch": 1.8042787227815102, "grad_norm": 0.5441305285446537, "learning_rate": 4.980277585262128e-06, "loss": 11.8036, "step": 33134 }, { "epoch": 1.8043331767780932, "grad_norm": 0.557721790054812, "learning_rate": 4.977529751447318e-06, "loss": 11.7417, "step": 33135 }, { "epoch": 1.8043876307746762, "grad_norm": 0.5690619179987269, "learning_rate": 4.974782656543164e-06, "loss": 11.7234, "step": 33136 }, { "epoch": 1.8044420847712592, "grad_norm": 0.5062107400952486, "learning_rate": 4.9720363005710365e-06, "loss": 11.7422, "step": 33137 }, { "epoch": 1.8044965387678422, "grad_norm": 0.5485248627223632, "learning_rate": 4.969290683552297e-06, "loss": 11.6907, "step": 33138 }, { "epoch": 1.8045509927644252, "grad_norm": 0.5305898060488378, "learning_rate": 4.966545805508293e-06, "loss": 11.7244, "step": 33139 }, { "epoch": 1.8046054467610082, "grad_norm": 0.5429745541015821, "learning_rate": 4.963801666460377e-06, "loss": 11.7969, "step": 33140 }, { "epoch": 1.8046599007575912, "grad_norm": 0.592253568178117, "learning_rate": 4.961058266429852e-06, "loss": 11.6663, "step": 33141 }, { "epoch": 1.8047143547541742, "grad_norm": 0.5559710956921947, "learning_rate": 4.95831560543808e-06, "loss": 11.7606, "step": 33142 }, { "epoch": 1.8047688087507572, "grad_norm": 0.6063945203602211, "learning_rate": 4.955573683506387e-06, "loss": 11.7917, "step": 33143 }, { "epoch": 1.8048232627473402, "grad_norm": 0.5676914394759548, "learning_rate": 4.9528325006560905e-06, "loss": 11.7848, "step": 33144 }, { "epoch": 1.8048777167439232, "grad_norm": 0.5441310715019334, "learning_rate": 4.950092056908518e-06, "loss": 11.789, "step": 33145 }, { "epoch": 1.8049321707405062, "grad_norm": 0.562846309247268, "learning_rate": 4.947352352284962e-06, "loss": 11.8551, "step": 33146 }, { "epoch": 1.8049866247370892, "grad_norm": 0.5843194000520776, "learning_rate": 4.94461338680674e-06, "loss": 11.7459, "step": 33147 }, { "epoch": 1.8050410787336721, "grad_norm": 0.5762016328871316, "learning_rate": 4.941875160495135e-06, "loss": 11.8354, "step": 33148 }, { "epoch": 1.8050955327302554, "grad_norm": 0.5579528750347603, "learning_rate": 4.939137673371452e-06, "loss": 11.811, "step": 33149 }, { "epoch": 1.8051499867268384, "grad_norm": 0.5200221724236684, "learning_rate": 4.936400925456997e-06, "loss": 11.7344, "step": 33150 }, { "epoch": 1.8052044407234213, "grad_norm": 0.6106854710543946, "learning_rate": 4.933664916773007e-06, "loss": 11.8581, "step": 33151 }, { "epoch": 1.8052588947200043, "grad_norm": 0.554498844373309, "learning_rate": 4.930929647340776e-06, "loss": 11.8799, "step": 33152 }, { "epoch": 1.8053133487165873, "grad_norm": 0.5525246288148445, "learning_rate": 4.9281951171816e-06, "loss": 11.6564, "step": 33153 }, { "epoch": 1.8053678027131705, "grad_norm": 0.535704884413912, "learning_rate": 4.925461326316705e-06, "loss": 11.6345, "step": 33154 }, { "epoch": 1.8054222567097535, "grad_norm": 0.5484366647238916, "learning_rate": 4.922728274767374e-06, "loss": 11.9068, "step": 33155 }, { "epoch": 1.8054767107063365, "grad_norm": 0.5386424025094163, "learning_rate": 4.919995962554846e-06, "loss": 11.7773, "step": 33156 }, { "epoch": 1.8055311647029195, "grad_norm": 0.5393011097664493, "learning_rate": 4.9172643897003936e-06, "loss": 11.7834, "step": 33157 }, { "epoch": 1.8055856186995025, "grad_norm": 0.5000349905754421, "learning_rate": 4.9145335562252204e-06, "loss": 11.6803, "step": 33158 }, { "epoch": 1.8056400726960855, "grad_norm": 0.5346719670032192, "learning_rate": 4.911803462150588e-06, "loss": 11.715, "step": 33159 }, { "epoch": 1.8056945266926685, "grad_norm": 0.5394760021643212, "learning_rate": 4.9090741074977245e-06, "loss": 11.715, "step": 33160 }, { "epoch": 1.8057489806892515, "grad_norm": 0.5571987610832586, "learning_rate": 4.906345492287834e-06, "loss": 11.8169, "step": 33161 }, { "epoch": 1.8058034346858345, "grad_norm": 0.5534356781641624, "learning_rate": 4.903617616542156e-06, "loss": 11.6854, "step": 33162 }, { "epoch": 1.8058578886824175, "grad_norm": 0.5355276617833473, "learning_rate": 4.900890480281883e-06, "loss": 11.8339, "step": 33163 }, { "epoch": 1.8059123426790005, "grad_norm": 0.529139284611768, "learning_rate": 4.898164083528245e-06, "loss": 11.6885, "step": 33164 }, { "epoch": 1.8059667966755835, "grad_norm": 0.6012888361537658, "learning_rate": 4.895438426302435e-06, "loss": 11.7846, "step": 33165 }, { "epoch": 1.8060212506721665, "grad_norm": 0.5267517422127683, "learning_rate": 4.892713508625635e-06, "loss": 11.7775, "step": 33166 }, { "epoch": 1.8060757046687494, "grad_norm": 0.5625057355380759, "learning_rate": 4.8899893305190514e-06, "loss": 11.8971, "step": 33167 }, { "epoch": 1.8061301586653324, "grad_norm": 0.5216984893198304, "learning_rate": 4.887265892003856e-06, "loss": 11.816, "step": 33168 }, { "epoch": 1.8061846126619154, "grad_norm": 0.6246677665710533, "learning_rate": 4.884543193101232e-06, "loss": 11.8072, "step": 33169 }, { "epoch": 1.8062390666584984, "grad_norm": 0.5301392317448178, "learning_rate": 4.881821233832362e-06, "loss": 11.6522, "step": 33170 }, { "epoch": 1.8062935206550814, "grad_norm": 0.5864101392981528, "learning_rate": 4.879100014218385e-06, "loss": 11.7581, "step": 33171 }, { "epoch": 1.8063479746516646, "grad_norm": 0.6302461544781593, "learning_rate": 4.876379534280495e-06, "loss": 11.8697, "step": 33172 }, { "epoch": 1.8064024286482476, "grad_norm": 0.5126815062616424, "learning_rate": 4.873659794039809e-06, "loss": 11.8067, "step": 33173 }, { "epoch": 1.8064568826448306, "grad_norm": 0.5338329742520125, "learning_rate": 4.87094079351752e-06, "loss": 11.8254, "step": 33174 }, { "epoch": 1.8065113366414136, "grad_norm": 0.5558821238255813, "learning_rate": 4.868222532734734e-06, "loss": 11.8499, "step": 33175 }, { "epoch": 1.8065657906379966, "grad_norm": 0.5297772250345244, "learning_rate": 4.86550501171259e-06, "loss": 11.6535, "step": 33176 }, { "epoch": 1.8066202446345796, "grad_norm": 0.5301268819352307, "learning_rate": 4.862788230472259e-06, "loss": 11.5577, "step": 33177 }, { "epoch": 1.8066746986311628, "grad_norm": 0.5195226774698151, "learning_rate": 4.860072189034826e-06, "loss": 11.604, "step": 33178 }, { "epoch": 1.8067291526277458, "grad_norm": 0.5873862330329568, "learning_rate": 4.857356887421438e-06, "loss": 11.8314, "step": 33179 }, { "epoch": 1.8067836066243288, "grad_norm": 0.5355624033162096, "learning_rate": 4.854642325653202e-06, "loss": 11.5848, "step": 33180 }, { "epoch": 1.8068380606209118, "grad_norm": 0.5334199011435603, "learning_rate": 4.851928503751202e-06, "loss": 11.6847, "step": 33181 }, { "epoch": 1.8068925146174948, "grad_norm": 0.5424485365186009, "learning_rate": 4.849215421736586e-06, "loss": 11.7574, "step": 33182 }, { "epoch": 1.8069469686140778, "grad_norm": 0.5519686488107474, "learning_rate": 4.846503079630404e-06, "loss": 11.7667, "step": 33183 }, { "epoch": 1.8070014226106608, "grad_norm": 0.5372690316473078, "learning_rate": 4.843791477453785e-06, "loss": 11.7687, "step": 33184 }, { "epoch": 1.8070558766072438, "grad_norm": 0.5206480337445053, "learning_rate": 4.8410806152278e-06, "loss": 11.6573, "step": 33185 }, { "epoch": 1.8071103306038268, "grad_norm": 0.5600968336725373, "learning_rate": 4.838370492973521e-06, "loss": 11.7819, "step": 33186 }, { "epoch": 1.8071647846004097, "grad_norm": 0.5411745921831451, "learning_rate": 4.835661110712042e-06, "loss": 11.8349, "step": 33187 }, { "epoch": 1.8072192385969927, "grad_norm": 0.5557196344027634, "learning_rate": 4.832952468464413e-06, "loss": 11.7677, "step": 33188 }, { "epoch": 1.8072736925935757, "grad_norm": 0.5819464925228438, "learning_rate": 4.830244566251729e-06, "loss": 11.7541, "step": 33189 }, { "epoch": 1.8073281465901587, "grad_norm": 0.573373616125001, "learning_rate": 4.827537404095006e-06, "loss": 11.7272, "step": 33190 }, { "epoch": 1.8073826005867417, "grad_norm": 0.6061551048176003, "learning_rate": 4.824830982015305e-06, "loss": 11.8376, "step": 33191 }, { "epoch": 1.8074370545833247, "grad_norm": 0.5699583363847336, "learning_rate": 4.822125300033686e-06, "loss": 11.5983, "step": 33192 }, { "epoch": 1.8074915085799077, "grad_norm": 0.5608869196218322, "learning_rate": 4.819420358171178e-06, "loss": 11.8257, "step": 33193 }, { "epoch": 1.8075459625764907, "grad_norm": 0.5604810907891347, "learning_rate": 4.816716156448831e-06, "loss": 11.6896, "step": 33194 }, { "epoch": 1.807600416573074, "grad_norm": 0.5146847407936258, "learning_rate": 4.814012694887649e-06, "loss": 11.7387, "step": 33195 }, { "epoch": 1.807654870569657, "grad_norm": 0.506587606170563, "learning_rate": 4.811309973508682e-06, "loss": 11.7663, "step": 33196 }, { "epoch": 1.80770932456624, "grad_norm": 0.5397408178174148, "learning_rate": 4.808607992332914e-06, "loss": 11.8642, "step": 33197 }, { "epoch": 1.8077637785628229, "grad_norm": 0.541070921712911, "learning_rate": 4.805906751381373e-06, "loss": 11.7178, "step": 33198 }, { "epoch": 1.8078182325594059, "grad_norm": 0.5703390458843115, "learning_rate": 4.803206250675097e-06, "loss": 11.8087, "step": 33199 }, { "epoch": 1.8078726865559889, "grad_norm": 0.5797365627006066, "learning_rate": 4.800506490235013e-06, "loss": 11.7641, "step": 33200 }, { "epoch": 1.807927140552572, "grad_norm": 0.5177289704755285, "learning_rate": 4.797807470082172e-06, "loss": 11.7672, "step": 33201 }, { "epoch": 1.807981594549155, "grad_norm": 0.5838144505341726, "learning_rate": 4.795109190237557e-06, "loss": 11.8404, "step": 33202 }, { "epoch": 1.808036048545738, "grad_norm": 0.5445052167707678, "learning_rate": 4.792411650722117e-06, "loss": 11.8278, "step": 33203 }, { "epoch": 1.808090502542321, "grad_norm": 0.5908857135536393, "learning_rate": 4.78971485155687e-06, "loss": 11.8719, "step": 33204 }, { "epoch": 1.808144956538904, "grad_norm": 0.48831478538696144, "learning_rate": 4.787018792762743e-06, "loss": 11.7465, "step": 33205 }, { "epoch": 1.808199410535487, "grad_norm": 0.548509157319017, "learning_rate": 4.7843234743607525e-06, "loss": 11.6314, "step": 33206 }, { "epoch": 1.80825386453207, "grad_norm": 0.561310458372122, "learning_rate": 4.781628896371815e-06, "loss": 11.8118, "step": 33207 }, { "epoch": 1.808308318528653, "grad_norm": 0.5234777153094029, "learning_rate": 4.778935058816902e-06, "loss": 11.7851, "step": 33208 }, { "epoch": 1.808362772525236, "grad_norm": 0.6093699230494634, "learning_rate": 4.776241961716965e-06, "loss": 11.8338, "step": 33209 }, { "epoch": 1.808417226521819, "grad_norm": 0.5867070046528813, "learning_rate": 4.773549605092931e-06, "loss": 11.8577, "step": 33210 }, { "epoch": 1.808471680518402, "grad_norm": 0.5452169022255674, "learning_rate": 4.77085798896576e-06, "loss": 11.9047, "step": 33211 }, { "epoch": 1.808526134514985, "grad_norm": 0.5196413237297726, "learning_rate": 4.7681671133563476e-06, "loss": 11.5996, "step": 33212 }, { "epoch": 1.808580588511568, "grad_norm": 0.6086607823386528, "learning_rate": 4.765476978285633e-06, "loss": 11.8549, "step": 33213 }, { "epoch": 1.808635042508151, "grad_norm": 0.5089794525742093, "learning_rate": 4.762787583774564e-06, "loss": 11.678, "step": 33214 }, { "epoch": 1.808689496504734, "grad_norm": 0.574101671690218, "learning_rate": 4.760098929844003e-06, "loss": 11.6728, "step": 33215 }, { "epoch": 1.808743950501317, "grad_norm": 0.5626940699284111, "learning_rate": 4.757411016514912e-06, "loss": 11.8825, "step": 33216 }, { "epoch": 1.8087984044979, "grad_norm": 0.5615339223466838, "learning_rate": 4.754723843808151e-06, "loss": 11.8007, "step": 33217 }, { "epoch": 1.8088528584944832, "grad_norm": 0.5455314024715735, "learning_rate": 4.752037411744637e-06, "loss": 11.8554, "step": 33218 }, { "epoch": 1.8089073124910662, "grad_norm": 0.5402356367933193, "learning_rate": 4.749351720345252e-06, "loss": 11.8262, "step": 33219 }, { "epoch": 1.8089617664876492, "grad_norm": 0.5582377215152176, "learning_rate": 4.74666676963087e-06, "loss": 11.8435, "step": 33220 }, { "epoch": 1.8090162204842322, "grad_norm": 0.588828318117417, "learning_rate": 4.743982559622395e-06, "loss": 11.7713, "step": 33221 }, { "epoch": 1.8090706744808152, "grad_norm": 0.5557691878795636, "learning_rate": 4.741299090340678e-06, "loss": 11.8135, "step": 33222 }, { "epoch": 1.8091251284773981, "grad_norm": 0.6213764815908573, "learning_rate": 4.7386163618066026e-06, "loss": 11.8986, "step": 33223 }, { "epoch": 1.8091795824739814, "grad_norm": 0.5406688724320587, "learning_rate": 4.735934374041007e-06, "loss": 11.8455, "step": 33224 }, { "epoch": 1.8092340364705644, "grad_norm": 0.5716837753206888, "learning_rate": 4.733253127064763e-06, "loss": 11.7499, "step": 33225 }, { "epoch": 1.8092884904671473, "grad_norm": 0.5579331798827912, "learning_rate": 4.730572620898732e-06, "loss": 11.7505, "step": 33226 }, { "epoch": 1.8093429444637303, "grad_norm": 0.5274949203688987, "learning_rate": 4.727892855563731e-06, "loss": 11.5504, "step": 33227 }, { "epoch": 1.8093973984603133, "grad_norm": 0.5336049351114056, "learning_rate": 4.7252138310806324e-06, "loss": 11.8481, "step": 33228 }, { "epoch": 1.8094518524568963, "grad_norm": 0.6252957727507026, "learning_rate": 4.722535547470242e-06, "loss": 11.8298, "step": 33229 }, { "epoch": 1.8095063064534793, "grad_norm": 0.5727061752869124, "learning_rate": 4.719858004753375e-06, "loss": 11.8586, "step": 33230 }, { "epoch": 1.8095607604500623, "grad_norm": 0.5170299752091735, "learning_rate": 4.717181202950893e-06, "loss": 11.8325, "step": 33231 }, { "epoch": 1.8096152144466453, "grad_norm": 0.5299384660261783, "learning_rate": 4.71450514208357e-06, "loss": 11.7124, "step": 33232 }, { "epoch": 1.8096696684432283, "grad_norm": 0.5499786496689046, "learning_rate": 4.711829822172254e-06, "loss": 11.8386, "step": 33233 }, { "epoch": 1.8097241224398113, "grad_norm": 0.5593279097519885, "learning_rate": 4.709155243237706e-06, "loss": 11.8838, "step": 33234 }, { "epoch": 1.8097785764363943, "grad_norm": 0.5756440750572177, "learning_rate": 4.706481405300756e-06, "loss": 11.7977, "step": 33235 }, { "epoch": 1.8098330304329773, "grad_norm": 0.5310462041528016, "learning_rate": 4.703808308382196e-06, "loss": 11.7635, "step": 33236 }, { "epoch": 1.8098874844295603, "grad_norm": 0.5094785712825856, "learning_rate": 4.701135952502788e-06, "loss": 11.7599, "step": 33237 }, { "epoch": 1.8099419384261433, "grad_norm": 0.528188634335068, "learning_rate": 4.69846433768335e-06, "loss": 11.7453, "step": 33238 }, { "epoch": 1.8099963924227263, "grad_norm": 0.5171714685422232, "learning_rate": 4.695793463944631e-06, "loss": 11.6009, "step": 33239 }, { "epoch": 1.8100508464193092, "grad_norm": 0.5283440178362762, "learning_rate": 4.693123331307392e-06, "loss": 11.7741, "step": 33240 }, { "epoch": 1.8101053004158922, "grad_norm": 0.5691858803247282, "learning_rate": 4.690453939792427e-06, "loss": 11.7403, "step": 33241 }, { "epoch": 1.8101597544124755, "grad_norm": 0.575870998897387, "learning_rate": 4.687785289420454e-06, "loss": 11.7369, "step": 33242 }, { "epoch": 1.8102142084090584, "grad_norm": 0.583534904636657, "learning_rate": 4.6851173802122675e-06, "loss": 11.8699, "step": 33243 }, { "epoch": 1.8102686624056414, "grad_norm": 0.5322452685176067, "learning_rate": 4.6824502121885714e-06, "loss": 11.733, "step": 33244 }, { "epoch": 1.8103231164022244, "grad_norm": 0.5102198165469266, "learning_rate": 4.6797837853701394e-06, "loss": 11.7132, "step": 33245 }, { "epoch": 1.8103775703988074, "grad_norm": 0.5272270547549536, "learning_rate": 4.677118099777688e-06, "loss": 11.7703, "step": 33246 }, { "epoch": 1.8104320243953904, "grad_norm": 0.5301405200023217, "learning_rate": 4.674453155431946e-06, "loss": 11.7402, "step": 33247 }, { "epoch": 1.8104864783919736, "grad_norm": 0.5920261063214396, "learning_rate": 4.671788952353662e-06, "loss": 11.7978, "step": 33248 }, { "epoch": 1.8105409323885566, "grad_norm": 0.4839884667414224, "learning_rate": 4.669125490563531e-06, "loss": 11.7457, "step": 33249 }, { "epoch": 1.8105953863851396, "grad_norm": 0.6332621013155387, "learning_rate": 4.666462770082247e-06, "loss": 11.7544, "step": 33250 }, { "epoch": 1.8106498403817226, "grad_norm": 0.5678331737352504, "learning_rate": 4.663800790930561e-06, "loss": 11.7125, "step": 33251 }, { "epoch": 1.8107042943783056, "grad_norm": 0.5643788351233826, "learning_rate": 4.661139553129123e-06, "loss": 11.8097, "step": 33252 }, { "epoch": 1.8107587483748886, "grad_norm": 0.5241273300555964, "learning_rate": 4.658479056698672e-06, "loss": 11.6924, "step": 33253 }, { "epoch": 1.8108132023714716, "grad_norm": 0.521911469572689, "learning_rate": 4.655819301659869e-06, "loss": 11.7294, "step": 33254 }, { "epoch": 1.8108676563680546, "grad_norm": 0.5678487776401085, "learning_rate": 4.653160288033409e-06, "loss": 11.8871, "step": 33255 }, { "epoch": 1.8109221103646376, "grad_norm": 0.49279398776295474, "learning_rate": 4.650502015839953e-06, "loss": 11.7573, "step": 33256 }, { "epoch": 1.8109765643612206, "grad_norm": 0.49438942981577944, "learning_rate": 4.647844485100184e-06, "loss": 11.7171, "step": 33257 }, { "epoch": 1.8110310183578036, "grad_norm": 0.7163185688421985, "learning_rate": 4.645187695834774e-06, "loss": 11.7511, "step": 33258 }, { "epoch": 1.8110854723543865, "grad_norm": 0.5259278219870365, "learning_rate": 4.642531648064374e-06, "loss": 11.7328, "step": 33259 }, { "epoch": 1.8111399263509695, "grad_norm": 0.5057567915034931, "learning_rate": 4.639876341809657e-06, "loss": 11.7507, "step": 33260 }, { "epoch": 1.8111943803475525, "grad_norm": 0.5143063553239388, "learning_rate": 4.637221777091227e-06, "loss": 11.8169, "step": 33261 }, { "epoch": 1.8112488343441355, "grad_norm": 0.5147328992008211, "learning_rate": 4.634567953929758e-06, "loss": 11.813, "step": 33262 }, { "epoch": 1.8113032883407185, "grad_norm": 0.5632237736043315, "learning_rate": 4.6319148723459e-06, "loss": 11.6644, "step": 33263 }, { "epoch": 1.8113577423373015, "grad_norm": 0.580805442319458, "learning_rate": 4.6292625323602346e-06, "loss": 11.7563, "step": 33264 }, { "epoch": 1.8114121963338847, "grad_norm": 0.5617976680277361, "learning_rate": 4.626610933993447e-06, "loss": 11.7837, "step": 33265 }, { "epoch": 1.8114666503304677, "grad_norm": 0.649113014473685, "learning_rate": 4.62396007726611e-06, "loss": 11.8386, "step": 33266 }, { "epoch": 1.8115211043270507, "grad_norm": 0.5411429635084574, "learning_rate": 4.621309962198861e-06, "loss": 11.7343, "step": 33267 }, { "epoch": 1.8115755583236337, "grad_norm": 0.5173265291460204, "learning_rate": 4.618660588812284e-06, "loss": 11.7112, "step": 33268 }, { "epoch": 1.8116300123202167, "grad_norm": 0.6377719460421658, "learning_rate": 4.616011957127009e-06, "loss": 11.8151, "step": 33269 }, { "epoch": 1.8116844663167997, "grad_norm": 0.47039995371666304, "learning_rate": 4.613364067163639e-06, "loss": 11.8002, "step": 33270 }, { "epoch": 1.811738920313383, "grad_norm": 0.5676157980270421, "learning_rate": 4.610716918942726e-06, "loss": 11.788, "step": 33271 }, { "epoch": 1.811793374309966, "grad_norm": 0.5539329546890714, "learning_rate": 4.6080705124848854e-06, "loss": 11.7338, "step": 33272 }, { "epoch": 1.811847828306549, "grad_norm": 0.6242612650651835, "learning_rate": 4.60542484781068e-06, "loss": 11.7645, "step": 33273 }, { "epoch": 1.8119022823031319, "grad_norm": 0.5256584462171094, "learning_rate": 4.602779924940681e-06, "loss": 11.6497, "step": 33274 }, { "epoch": 1.8119567362997149, "grad_norm": 0.5091817870369126, "learning_rate": 4.600135743895473e-06, "loss": 11.6957, "step": 33275 }, { "epoch": 1.8120111902962979, "grad_norm": 0.6179767147898236, "learning_rate": 4.597492304695605e-06, "loss": 11.9228, "step": 33276 }, { "epoch": 1.8120656442928809, "grad_norm": 0.547809004844969, "learning_rate": 4.5948496073616395e-06, "loss": 11.8367, "step": 33277 }, { "epoch": 1.8121200982894639, "grad_norm": 0.5145807759496834, "learning_rate": 4.592207651914115e-06, "loss": 11.7819, "step": 33278 }, { "epoch": 1.8121745522860468, "grad_norm": 0.5736815125760386, "learning_rate": 4.589566438373594e-06, "loss": 11.7776, "step": 33279 }, { "epoch": 1.8122290062826298, "grad_norm": 0.6243906740683176, "learning_rate": 4.586925966760602e-06, "loss": 11.7867, "step": 33280 }, { "epoch": 1.8122834602792128, "grad_norm": 0.5354100915325727, "learning_rate": 4.584286237095669e-06, "loss": 11.7204, "step": 33281 }, { "epoch": 1.8123379142757958, "grad_norm": 0.5680149612435472, "learning_rate": 4.581647249399335e-06, "loss": 11.6994, "step": 33282 }, { "epoch": 1.8123923682723788, "grad_norm": 0.5192362144096057, "learning_rate": 4.579009003692103e-06, "loss": 11.7376, "step": 33283 }, { "epoch": 1.8124468222689618, "grad_norm": 0.5867927421074767, "learning_rate": 4.576371499994503e-06, "loss": 11.8128, "step": 33284 }, { "epoch": 1.8125012762655448, "grad_norm": 0.5088532240817195, "learning_rate": 4.573734738327052e-06, "loss": 11.7488, "step": 33285 }, { "epoch": 1.8125557302621278, "grad_norm": 0.5126047136330019, "learning_rate": 4.57109871871022e-06, "loss": 11.6943, "step": 33286 }, { "epoch": 1.8126101842587108, "grad_norm": 0.5445520444535854, "learning_rate": 4.56846344116455e-06, "loss": 11.7375, "step": 33287 }, { "epoch": 1.812664638255294, "grad_norm": 0.5235453073984584, "learning_rate": 4.565828905710501e-06, "loss": 11.7828, "step": 33288 }, { "epoch": 1.812719092251877, "grad_norm": 0.6082597676176608, "learning_rate": 4.563195112368579e-06, "loss": 11.7967, "step": 33289 }, { "epoch": 1.81277354624846, "grad_norm": 0.5184887605400904, "learning_rate": 4.560562061159257e-06, "loss": 11.853, "step": 33290 }, { "epoch": 1.812828000245043, "grad_norm": 0.5212808673359198, "learning_rate": 4.557929752102996e-06, "loss": 11.7158, "step": 33291 }, { "epoch": 1.812882454241626, "grad_norm": 0.5108478368583238, "learning_rate": 4.555298185220291e-06, "loss": 11.7638, "step": 33292 }, { "epoch": 1.812936908238209, "grad_norm": 0.5577709139267213, "learning_rate": 4.552667360531582e-06, "loss": 11.7567, "step": 33293 }, { "epoch": 1.8129913622347922, "grad_norm": 0.5426431823864006, "learning_rate": 4.5500372780573505e-06, "loss": 11.8768, "step": 33294 }, { "epoch": 1.8130458162313752, "grad_norm": 0.5837656771242806, "learning_rate": 4.5474079378180265e-06, "loss": 11.7182, "step": 33295 }, { "epoch": 1.8131002702279582, "grad_norm": 0.5553445735645262, "learning_rate": 4.54477933983406e-06, "loss": 11.6822, "step": 33296 }, { "epoch": 1.8131547242245412, "grad_norm": 0.5262483855216749, "learning_rate": 4.5421514841259115e-06, "loss": 11.7623, "step": 33297 }, { "epoch": 1.8132091782211242, "grad_norm": 0.5803607492229638, "learning_rate": 4.539524370713988e-06, "loss": 11.7194, "step": 33298 }, { "epoch": 1.8132636322177071, "grad_norm": 0.5781905249474677, "learning_rate": 4.53689799961875e-06, "loss": 11.9124, "step": 33299 }, { "epoch": 1.8133180862142901, "grad_norm": 0.5236121957406582, "learning_rate": 4.5342723708606036e-06, "loss": 11.81, "step": 33300 }, { "epoch": 1.8133725402108731, "grad_norm": 0.4992174506719999, "learning_rate": 4.531647484459945e-06, "loss": 11.7106, "step": 33301 }, { "epoch": 1.8134269942074561, "grad_norm": 0.5549655380744889, "learning_rate": 4.529023340437222e-06, "loss": 11.6963, "step": 33302 }, { "epoch": 1.8134814482040391, "grad_norm": 0.5603578588330804, "learning_rate": 4.5263999388128085e-06, "loss": 11.8157, "step": 33303 }, { "epoch": 1.813535902200622, "grad_norm": 0.5322873348611824, "learning_rate": 4.523777279607133e-06, "loss": 11.7214, "step": 33304 }, { "epoch": 1.813590356197205, "grad_norm": 0.5531494564247507, "learning_rate": 4.521155362840568e-06, "loss": 11.7389, "step": 33305 }, { "epoch": 1.813644810193788, "grad_norm": 0.6302713973539736, "learning_rate": 4.518534188533508e-06, "loss": 11.6865, "step": 33306 }, { "epoch": 1.813699264190371, "grad_norm": 0.49506686958018375, "learning_rate": 4.515913756706347e-06, "loss": 11.724, "step": 33307 }, { "epoch": 1.813753718186954, "grad_norm": 0.5194636773562015, "learning_rate": 4.513294067379448e-06, "loss": 11.7975, "step": 33308 }, { "epoch": 1.813808172183537, "grad_norm": 0.5697393268501594, "learning_rate": 4.510675120573204e-06, "loss": 11.8829, "step": 33309 }, { "epoch": 1.81386262618012, "grad_norm": 0.5224354794587687, "learning_rate": 4.508056916307957e-06, "loss": 11.7556, "step": 33310 }, { "epoch": 1.813917080176703, "grad_norm": 0.5405521840368618, "learning_rate": 4.505439454604054e-06, "loss": 11.7114, "step": 33311 }, { "epoch": 1.8139715341732863, "grad_norm": 0.5638519595378328, "learning_rate": 4.5028227354818935e-06, "loss": 11.8164, "step": 33312 }, { "epoch": 1.8140259881698693, "grad_norm": 0.568896695552425, "learning_rate": 4.500206758961778e-06, "loss": 11.7192, "step": 33313 }, { "epoch": 1.8140804421664523, "grad_norm": 0.5615458323351968, "learning_rate": 4.497591525064082e-06, "loss": 11.7677, "step": 33314 }, { "epoch": 1.8141348961630352, "grad_norm": 0.5293997293990172, "learning_rate": 4.4949770338091225e-06, "loss": 11.8222, "step": 33315 }, { "epoch": 1.8141893501596182, "grad_norm": 0.568010811307435, "learning_rate": 4.49236328521725e-06, "loss": 11.9389, "step": 33316 }, { "epoch": 1.8142438041562012, "grad_norm": 0.49981054646930256, "learning_rate": 4.489750279308757e-06, "loss": 11.8114, "step": 33317 }, { "epoch": 1.8142982581527844, "grad_norm": 0.5225249272932014, "learning_rate": 4.4871380161039865e-06, "loss": 11.7547, "step": 33318 }, { "epoch": 1.8143527121493674, "grad_norm": 0.5493646771922226, "learning_rate": 4.484526495623265e-06, "loss": 11.5598, "step": 33319 }, { "epoch": 1.8144071661459504, "grad_norm": 0.6062000021701177, "learning_rate": 4.481915717886886e-06, "loss": 11.743, "step": 33320 }, { "epoch": 1.8144616201425334, "grad_norm": 0.5753678436648219, "learning_rate": 4.479305682915136e-06, "loss": 11.9379, "step": 33321 }, { "epoch": 1.8145160741391164, "grad_norm": 0.6455252697951465, "learning_rate": 4.476696390728318e-06, "loss": 11.7879, "step": 33322 }, { "epoch": 1.8145705281356994, "grad_norm": 0.5407366107188332, "learning_rate": 4.47408784134673e-06, "loss": 11.8897, "step": 33323 }, { "epoch": 1.8146249821322824, "grad_norm": 0.5738336666237459, "learning_rate": 4.471480034790676e-06, "loss": 11.8631, "step": 33324 }, { "epoch": 1.8146794361288654, "grad_norm": 0.5893288333801068, "learning_rate": 4.468872971080384e-06, "loss": 11.8426, "step": 33325 }, { "epoch": 1.8147338901254484, "grad_norm": 0.5472307563950716, "learning_rate": 4.466266650236184e-06, "loss": 11.7547, "step": 33326 }, { "epoch": 1.8147883441220314, "grad_norm": 0.6296197773275323, "learning_rate": 4.463661072278291e-06, "loss": 11.8189, "step": 33327 }, { "epoch": 1.8148427981186144, "grad_norm": 0.5841100972519099, "learning_rate": 4.461056237227001e-06, "loss": 11.7946, "step": 33328 }, { "epoch": 1.8148972521151974, "grad_norm": 0.5151973204027636, "learning_rate": 4.458452145102554e-06, "loss": 11.7281, "step": 33329 }, { "epoch": 1.8149517061117804, "grad_norm": 0.5503912756346909, "learning_rate": 4.4558487959252215e-06, "loss": 11.8112, "step": 33330 }, { "epoch": 1.8150061601083634, "grad_norm": 0.5226465804257504, "learning_rate": 4.453246189715232e-06, "loss": 11.8009, "step": 33331 }, { "epoch": 1.8150606141049463, "grad_norm": 0.6119326188396066, "learning_rate": 4.450644326492803e-06, "loss": 11.8591, "step": 33332 }, { "epoch": 1.8151150681015293, "grad_norm": 0.5231959641530465, "learning_rate": 4.448043206278196e-06, "loss": 11.7045, "step": 33333 }, { "epoch": 1.8151695220981123, "grad_norm": 0.4931963916129974, "learning_rate": 4.445442829091629e-06, "loss": 11.7716, "step": 33334 }, { "epoch": 1.8152239760946955, "grad_norm": 0.5697086660320094, "learning_rate": 4.442843194953317e-06, "loss": 11.9071, "step": 33335 }, { "epoch": 1.8152784300912785, "grad_norm": 0.5087142488913186, "learning_rate": 4.440244303883501e-06, "loss": 11.7693, "step": 33336 }, { "epoch": 1.8153328840878615, "grad_norm": 0.5295033186600981, "learning_rate": 4.437646155902353e-06, "loss": 11.8257, "step": 33337 }, { "epoch": 1.8153873380844445, "grad_norm": 0.5766055544264328, "learning_rate": 4.435048751030102e-06, "loss": 11.8032, "step": 33338 }, { "epoch": 1.8154417920810275, "grad_norm": 0.5235030491962843, "learning_rate": 4.432452089286931e-06, "loss": 11.8001, "step": 33339 }, { "epoch": 1.8154962460776105, "grad_norm": 0.5517239769619563, "learning_rate": 4.429856170693047e-06, "loss": 11.7633, "step": 33340 }, { "epoch": 1.8155507000741937, "grad_norm": 0.5235495340085781, "learning_rate": 4.427260995268634e-06, "loss": 11.7956, "step": 33341 }, { "epoch": 1.8156051540707767, "grad_norm": 0.47897288269388455, "learning_rate": 4.424666563033853e-06, "loss": 11.6569, "step": 33342 }, { "epoch": 1.8156596080673597, "grad_norm": 0.5624732894903519, "learning_rate": 4.422072874008909e-06, "loss": 11.9135, "step": 33343 }, { "epoch": 1.8157140620639427, "grad_norm": 0.5635515144033085, "learning_rate": 4.419479928213932e-06, "loss": 11.6486, "step": 33344 }, { "epoch": 1.8157685160605257, "grad_norm": 0.5859999162358663, "learning_rate": 4.4168877256691165e-06, "loss": 11.7451, "step": 33345 }, { "epoch": 1.8158229700571087, "grad_norm": 0.5564169979815358, "learning_rate": 4.414296266394624e-06, "loss": 11.8639, "step": 33346 }, { "epoch": 1.8158774240536917, "grad_norm": 0.5819972700378305, "learning_rate": 4.411705550410572e-06, "loss": 11.7479, "step": 33347 }, { "epoch": 1.8159318780502747, "grad_norm": 0.5720018007402299, "learning_rate": 4.409115577737155e-06, "loss": 11.8476, "step": 33348 }, { "epoch": 1.8159863320468577, "grad_norm": 0.5472262518933869, "learning_rate": 4.406526348394458e-06, "loss": 11.8056, "step": 33349 }, { "epoch": 1.8160407860434407, "grad_norm": 0.5890280566237442, "learning_rate": 4.403937862402663e-06, "loss": 11.7987, "step": 33350 }, { "epoch": 1.8160952400400237, "grad_norm": 0.5684000945037254, "learning_rate": 4.401350119781877e-06, "loss": 11.8233, "step": 33351 }, { "epoch": 1.8161496940366066, "grad_norm": 0.5102991266086635, "learning_rate": 4.398763120552218e-06, "loss": 11.6719, "step": 33352 }, { "epoch": 1.8162041480331896, "grad_norm": 0.5504354368638585, "learning_rate": 4.396176864733815e-06, "loss": 11.8069, "step": 33353 }, { "epoch": 1.8162586020297726, "grad_norm": 0.608197532885495, "learning_rate": 4.39359135234676e-06, "loss": 11.7731, "step": 33354 }, { "epoch": 1.8163130560263556, "grad_norm": 0.6193116912963961, "learning_rate": 4.3910065834111725e-06, "loss": 11.8017, "step": 33355 }, { "epoch": 1.8163675100229386, "grad_norm": 0.5551117452595495, "learning_rate": 4.38842255794717e-06, "loss": 11.8228, "step": 33356 }, { "epoch": 1.8164219640195216, "grad_norm": 0.5526334834554533, "learning_rate": 4.385839275974812e-06, "loss": 11.7001, "step": 33357 }, { "epoch": 1.8164764180161048, "grad_norm": 0.5410470492028854, "learning_rate": 4.3832567375142186e-06, "loss": 11.769, "step": 33358 }, { "epoch": 1.8165308720126878, "grad_norm": 0.4824851732505219, "learning_rate": 4.380674942585428e-06, "loss": 11.7198, "step": 33359 }, { "epoch": 1.8165853260092708, "grad_norm": 0.5371286258570362, "learning_rate": 4.378093891208568e-06, "loss": 11.7387, "step": 33360 }, { "epoch": 1.8166397800058538, "grad_norm": 0.5261341840184692, "learning_rate": 4.37551358340369e-06, "loss": 11.6994, "step": 33361 }, { "epoch": 1.8166942340024368, "grad_norm": 0.506506395276939, "learning_rate": 4.372934019190833e-06, "loss": 11.7797, "step": 33362 }, { "epoch": 1.8167486879990198, "grad_norm": 0.5681386074969045, "learning_rate": 4.370355198590103e-06, "loss": 11.7945, "step": 33363 }, { "epoch": 1.816803141995603, "grad_norm": 0.5927926920329334, "learning_rate": 4.367777121621508e-06, "loss": 11.9358, "step": 33364 }, { "epoch": 1.816857595992186, "grad_norm": 0.5492525892142982, "learning_rate": 4.365199788305119e-06, "loss": 11.8749, "step": 33365 }, { "epoch": 1.816912049988769, "grad_norm": 0.5868734938464674, "learning_rate": 4.362623198660975e-06, "loss": 11.8656, "step": 33366 }, { "epoch": 1.816966503985352, "grad_norm": 0.5737197849021911, "learning_rate": 4.360047352709107e-06, "loss": 11.6498, "step": 33367 }, { "epoch": 1.817020957981935, "grad_norm": 0.5010726442825639, "learning_rate": 4.357472250469563e-06, "loss": 11.7842, "step": 33368 }, { "epoch": 1.817075411978518, "grad_norm": 0.5260934326391239, "learning_rate": 4.354897891962339e-06, "loss": 11.763, "step": 33369 }, { "epoch": 1.817129865975101, "grad_norm": 0.5762939373781866, "learning_rate": 4.352324277207498e-06, "loss": 11.8411, "step": 33370 }, { "epoch": 1.817184319971684, "grad_norm": 0.46623385380449567, "learning_rate": 4.349751406224989e-06, "loss": 11.7587, "step": 33371 }, { "epoch": 1.817238773968267, "grad_norm": 0.5648296635165347, "learning_rate": 4.3471792790348634e-06, "loss": 11.7265, "step": 33372 }, { "epoch": 1.81729322796485, "grad_norm": 0.5351545931153535, "learning_rate": 4.344607895657127e-06, "loss": 11.7363, "step": 33373 }, { "epoch": 1.817347681961433, "grad_norm": 0.5870113109642406, "learning_rate": 4.342037256111742e-06, "loss": 11.701, "step": 33374 }, { "epoch": 1.817402135958016, "grad_norm": 0.538544264144728, "learning_rate": 4.3394673604187255e-06, "loss": 11.7449, "step": 33375 }, { "epoch": 1.817456589954599, "grad_norm": 0.5717901168737194, "learning_rate": 4.33689820859805e-06, "loss": 11.8415, "step": 33376 }, { "epoch": 1.817511043951182, "grad_norm": 0.6177129381709924, "learning_rate": 4.3343298006697005e-06, "loss": 11.6388, "step": 33377 }, { "epoch": 1.817565497947765, "grad_norm": 0.5550373898347775, "learning_rate": 4.331762136653639e-06, "loss": 11.7749, "step": 33378 }, { "epoch": 1.817619951944348, "grad_norm": 0.4833398613095589, "learning_rate": 4.329195216569837e-06, "loss": 11.5995, "step": 33379 }, { "epoch": 1.8176744059409309, "grad_norm": 0.5039485652407718, "learning_rate": 4.32662904043829e-06, "loss": 11.8422, "step": 33380 }, { "epoch": 1.8177288599375139, "grad_norm": 0.5287346972619532, "learning_rate": 4.324063608278883e-06, "loss": 11.8981, "step": 33381 }, { "epoch": 1.817783313934097, "grad_norm": 0.5433745401187149, "learning_rate": 4.3214989201116105e-06, "loss": 11.7002, "step": 33382 }, { "epoch": 1.81783776793068, "grad_norm": 0.5623893756865836, "learning_rate": 4.318934975956413e-06, "loss": 11.7466, "step": 33383 }, { "epoch": 1.817892221927263, "grad_norm": 0.5229503510293498, "learning_rate": 4.316371775833217e-06, "loss": 11.7498, "step": 33384 }, { "epoch": 1.817946675923846, "grad_norm": 0.5196069408964588, "learning_rate": 4.3138093197619634e-06, "loss": 11.6792, "step": 33385 }, { "epoch": 1.818001129920429, "grad_norm": 0.5270375830935387, "learning_rate": 4.311247607762569e-06, "loss": 11.7954, "step": 33386 }, { "epoch": 1.8180555839170123, "grad_norm": 0.523893051620918, "learning_rate": 4.308686639854975e-06, "loss": 11.7321, "step": 33387 }, { "epoch": 1.8181100379135953, "grad_norm": 0.5614365727548793, "learning_rate": 4.306126416059064e-06, "loss": 11.7834, "step": 33388 }, { "epoch": 1.8181644919101783, "grad_norm": 0.513632033751421, "learning_rate": 4.303566936394765e-06, "loss": 11.7146, "step": 33389 }, { "epoch": 1.8182189459067613, "grad_norm": 0.5477296502040427, "learning_rate": 4.301008200882006e-06, "loss": 11.7553, "step": 33390 }, { "epoch": 1.8182733999033442, "grad_norm": 0.5785223420716445, "learning_rate": 4.298450209540628e-06, "loss": 11.7638, "step": 33391 }, { "epoch": 1.8183278538999272, "grad_norm": 0.5166285606492488, "learning_rate": 4.295892962390558e-06, "loss": 11.7441, "step": 33392 }, { "epoch": 1.8183823078965102, "grad_norm": 0.6215707218826039, "learning_rate": 4.293336459451669e-06, "loss": 11.8865, "step": 33393 }, { "epoch": 1.8184367618930932, "grad_norm": 0.5193871759628252, "learning_rate": 4.290780700743846e-06, "loss": 11.753, "step": 33394 }, { "epoch": 1.8184912158896762, "grad_norm": 0.5457247022914097, "learning_rate": 4.288225686286962e-06, "loss": 11.8888, "step": 33395 }, { "epoch": 1.8185456698862592, "grad_norm": 0.6053692002563366, "learning_rate": 4.285671416100889e-06, "loss": 11.7425, "step": 33396 }, { "epoch": 1.8186001238828422, "grad_norm": 0.5378080864852702, "learning_rate": 4.28311789020549e-06, "loss": 11.7059, "step": 33397 }, { "epoch": 1.8186545778794252, "grad_norm": 0.5961398522834784, "learning_rate": 4.280565108620604e-06, "loss": 11.8281, "step": 33398 }, { "epoch": 1.8187090318760082, "grad_norm": 0.616068474741855, "learning_rate": 4.278013071366116e-06, "loss": 11.9312, "step": 33399 }, { "epoch": 1.8187634858725912, "grad_norm": 0.5498839603826776, "learning_rate": 4.275461778461831e-06, "loss": 11.7726, "step": 33400 }, { "epoch": 1.8188179398691742, "grad_norm": 0.5951827338815259, "learning_rate": 4.272911229927634e-06, "loss": 11.759, "step": 33401 }, { "epoch": 1.8188723938657572, "grad_norm": 0.5330808372606821, "learning_rate": 4.27036142578332e-06, "loss": 11.8161, "step": 33402 }, { "epoch": 1.8189268478623402, "grad_norm": 0.5826470579002819, "learning_rate": 4.267812366048718e-06, "loss": 11.8479, "step": 33403 }, { "epoch": 1.8189813018589231, "grad_norm": 0.510313617563817, "learning_rate": 4.265264050743667e-06, "loss": 11.7532, "step": 33404 }, { "epoch": 1.8190357558555064, "grad_norm": 0.5416770344837316, "learning_rate": 4.2627164798879845e-06, "loss": 11.8805, "step": 33405 }, { "epoch": 1.8190902098520894, "grad_norm": 0.5713216715113655, "learning_rate": 4.260169653501467e-06, "loss": 11.8938, "step": 33406 }, { "epoch": 1.8191446638486723, "grad_norm": 0.5185567635271205, "learning_rate": 4.25762357160393e-06, "loss": 11.7382, "step": 33407 }, { "epoch": 1.8191991178452553, "grad_norm": 0.5088800254469613, "learning_rate": 4.25507823421516e-06, "loss": 11.7215, "step": 33408 }, { "epoch": 1.8192535718418383, "grad_norm": 0.5321922930209237, "learning_rate": 4.252533641354972e-06, "loss": 11.7429, "step": 33409 }, { "epoch": 1.8193080258384213, "grad_norm": 0.5342018890374852, "learning_rate": 4.249989793043119e-06, "loss": 11.6897, "step": 33410 }, { "epoch": 1.8193624798350045, "grad_norm": 0.523277589494354, "learning_rate": 4.247446689299428e-06, "loss": 11.7073, "step": 33411 }, { "epoch": 1.8194169338315875, "grad_norm": 0.5845380724600674, "learning_rate": 4.24490433014364e-06, "loss": 11.7868, "step": 33412 }, { "epoch": 1.8194713878281705, "grad_norm": 0.5011164408601148, "learning_rate": 4.242362715595527e-06, "loss": 11.8852, "step": 33413 }, { "epoch": 1.8195258418247535, "grad_norm": 0.5536522211802043, "learning_rate": 4.239821845674874e-06, "loss": 11.8753, "step": 33414 }, { "epoch": 1.8195802958213365, "grad_norm": 0.49836783003234875, "learning_rate": 4.23728172040141e-06, "loss": 11.7772, "step": 33415 }, { "epoch": 1.8196347498179195, "grad_norm": 0.5547445071794451, "learning_rate": 4.2347423397948945e-06, "loss": 11.8077, "step": 33416 }, { "epoch": 1.8196892038145025, "grad_norm": 0.5113337040094863, "learning_rate": 4.232203703875104e-06, "loss": 11.6234, "step": 33417 }, { "epoch": 1.8197436578110855, "grad_norm": 0.5791002775489162, "learning_rate": 4.229665812661742e-06, "loss": 11.9064, "step": 33418 }, { "epoch": 1.8197981118076685, "grad_norm": 0.504620041642399, "learning_rate": 4.227128666174574e-06, "loss": 11.7447, "step": 33419 }, { "epoch": 1.8198525658042515, "grad_norm": 0.6257757387641968, "learning_rate": 4.224592264433302e-06, "loss": 11.8402, "step": 33420 }, { "epoch": 1.8199070198008345, "grad_norm": 0.6169789961814764, "learning_rate": 4.222056607457669e-06, "loss": 11.8489, "step": 33421 }, { "epoch": 1.8199614737974175, "grad_norm": 0.7099164133328735, "learning_rate": 4.219521695267392e-06, "loss": 11.6361, "step": 33422 }, { "epoch": 1.8200159277940005, "grad_norm": 0.5359550502325594, "learning_rate": 4.216987527882166e-06, "loss": 11.8551, "step": 33423 }, { "epoch": 1.8200703817905834, "grad_norm": 0.5362127744955114, "learning_rate": 4.214454105321719e-06, "loss": 11.7809, "step": 33424 }, { "epoch": 1.8201248357871664, "grad_norm": 0.5881300916325793, "learning_rate": 4.211921427605725e-06, "loss": 11.7678, "step": 33425 }, { "epoch": 1.8201792897837494, "grad_norm": 0.5299651412201789, "learning_rate": 4.209389494753902e-06, "loss": 11.7097, "step": 33426 }, { "epoch": 1.8202337437803324, "grad_norm": 0.5394203768634799, "learning_rate": 4.206858306785922e-06, "loss": 11.7668, "step": 33427 }, { "epoch": 1.8202881977769156, "grad_norm": 0.55487418992176, "learning_rate": 4.20432786372148e-06, "loss": 11.7554, "step": 33428 }, { "epoch": 1.8203426517734986, "grad_norm": 0.5553023300732464, "learning_rate": 4.201798165580262e-06, "loss": 11.747, "step": 33429 }, { "epoch": 1.8203971057700816, "grad_norm": 0.5267832576306941, "learning_rate": 4.199269212381906e-06, "loss": 11.8648, "step": 33430 }, { "epoch": 1.8204515597666646, "grad_norm": 0.5247482544173551, "learning_rate": 4.196741004146121e-06, "loss": 11.6874, "step": 33431 }, { "epoch": 1.8205060137632476, "grad_norm": 0.5166248630521795, "learning_rate": 4.194213540892544e-06, "loss": 11.8875, "step": 33432 }, { "epoch": 1.8205604677598306, "grad_norm": 0.507621012519032, "learning_rate": 4.191686822640806e-06, "loss": 11.7607, "step": 33433 }, { "epoch": 1.8206149217564138, "grad_norm": 0.5069945546515978, "learning_rate": 4.1891608494106e-06, "loss": 11.7674, "step": 33434 }, { "epoch": 1.8206693757529968, "grad_norm": 0.6204043248303384, "learning_rate": 4.186635621221524e-06, "loss": 11.8956, "step": 33435 }, { "epoch": 1.8207238297495798, "grad_norm": 0.5070988944301096, "learning_rate": 4.184111138093261e-06, "loss": 11.7003, "step": 33436 }, { "epoch": 1.8207782837461628, "grad_norm": 0.5401162562355685, "learning_rate": 4.181587400045406e-06, "loss": 11.772, "step": 33437 }, { "epoch": 1.8208327377427458, "grad_norm": 0.6120771082563683, "learning_rate": 4.179064407097588e-06, "loss": 11.8083, "step": 33438 }, { "epoch": 1.8208871917393288, "grad_norm": 0.5474527864695609, "learning_rate": 4.176542159269448e-06, "loss": 11.8253, "step": 33439 }, { "epoch": 1.8209416457359118, "grad_norm": 0.4984756252724798, "learning_rate": 4.17402065658058e-06, "loss": 11.7179, "step": 33440 }, { "epoch": 1.8209960997324948, "grad_norm": 0.5595777782453509, "learning_rate": 4.171499899050612e-06, "loss": 11.8789, "step": 33441 }, { "epoch": 1.8210505537290778, "grad_norm": 0.551713785230797, "learning_rate": 4.16897988669912e-06, "loss": 11.7899, "step": 33442 }, { "epoch": 1.8211050077256608, "grad_norm": 0.5359306983219716, "learning_rate": 4.166460619545698e-06, "loss": 11.6985, "step": 33443 }, { "epoch": 1.8211594617222437, "grad_norm": 0.5220655482790552, "learning_rate": 4.1639420976099745e-06, "loss": 11.7098, "step": 33444 }, { "epoch": 1.8212139157188267, "grad_norm": 0.5894974152081649, "learning_rate": 4.1614243209114886e-06, "loss": 11.7912, "step": 33445 }, { "epoch": 1.8212683697154097, "grad_norm": 0.549315203766272, "learning_rate": 4.158907289469849e-06, "loss": 11.715, "step": 33446 }, { "epoch": 1.8213228237119927, "grad_norm": 0.5489235924790603, "learning_rate": 4.156391003304616e-06, "loss": 11.8682, "step": 33447 }, { "epoch": 1.8213772777085757, "grad_norm": 0.587180068057914, "learning_rate": 4.153875462435363e-06, "loss": 11.7768, "step": 33448 }, { "epoch": 1.8214317317051587, "grad_norm": 0.6152518409633797, "learning_rate": 4.151360666881643e-06, "loss": 11.8247, "step": 33449 }, { "epoch": 1.8214861857017417, "grad_norm": 0.5084308337759688, "learning_rate": 4.1488466166630046e-06, "loss": 11.7028, "step": 33450 }, { "epoch": 1.8215406396983247, "grad_norm": 0.585755402914931, "learning_rate": 4.146333311799044e-06, "loss": 11.7917, "step": 33451 }, { "epoch": 1.821595093694908, "grad_norm": 0.5121071994847662, "learning_rate": 4.1438207523092466e-06, "loss": 11.7782, "step": 33452 }, { "epoch": 1.821649547691491, "grad_norm": 0.510229864272492, "learning_rate": 4.141308938213162e-06, "loss": 11.8306, "step": 33453 }, { "epoch": 1.821704001688074, "grad_norm": 0.5967912187830556, "learning_rate": 4.1387978695303645e-06, "loss": 11.6707, "step": 33454 }, { "epoch": 1.8217584556846569, "grad_norm": 0.592826957125138, "learning_rate": 4.1362875462803264e-06, "loss": 11.9532, "step": 33455 }, { "epoch": 1.8218129096812399, "grad_norm": 0.6119896503146464, "learning_rate": 4.13377796848261e-06, "loss": 11.7899, "step": 33456 }, { "epoch": 1.821867363677823, "grad_norm": 0.5436330551330625, "learning_rate": 4.1312691361566905e-06, "loss": 11.8297, "step": 33457 }, { "epoch": 1.821921817674406, "grad_norm": 0.5556863848375361, "learning_rate": 4.128761049322127e-06, "loss": 11.7746, "step": 33458 }, { "epoch": 1.821976271670989, "grad_norm": 0.6291168622184845, "learning_rate": 4.126253707998373e-06, "loss": 11.8717, "step": 33459 }, { "epoch": 1.822030725667572, "grad_norm": 0.5158533575860381, "learning_rate": 4.123747112204945e-06, "loss": 11.7635, "step": 33460 }, { "epoch": 1.822085179664155, "grad_norm": 0.5252329388785419, "learning_rate": 4.121241261961372e-06, "loss": 11.8638, "step": 33461 }, { "epoch": 1.822139633660738, "grad_norm": 0.5511530083840109, "learning_rate": 4.118736157287073e-06, "loss": 11.8423, "step": 33462 }, { "epoch": 1.822194087657321, "grad_norm": 0.5093725510377453, "learning_rate": 4.116231798201586e-06, "loss": 11.7447, "step": 33463 }, { "epoch": 1.822248541653904, "grad_norm": 0.5513784833495462, "learning_rate": 4.11372818472433e-06, "loss": 11.8521, "step": 33464 }, { "epoch": 1.822302995650487, "grad_norm": 0.5737361005938579, "learning_rate": 4.111225316874823e-06, "loss": 11.8044, "step": 33465 }, { "epoch": 1.82235744964707, "grad_norm": 0.5736580509093636, "learning_rate": 4.108723194672504e-06, "loss": 11.7777, "step": 33466 }, { "epoch": 1.822411903643653, "grad_norm": 0.4762929827191769, "learning_rate": 4.106221818136835e-06, "loss": 11.7381, "step": 33467 }, { "epoch": 1.822466357640236, "grad_norm": 0.5685099642399056, "learning_rate": 4.103721187287268e-06, "loss": 11.7572, "step": 33468 }, { "epoch": 1.822520811636819, "grad_norm": 0.5643956750201643, "learning_rate": 4.101221302143244e-06, "loss": 11.7392, "step": 33469 }, { "epoch": 1.822575265633402, "grad_norm": 0.5591510287579061, "learning_rate": 4.0987221627242114e-06, "loss": 11.706, "step": 33470 }, { "epoch": 1.822629719629985, "grad_norm": 0.5173483899573453, "learning_rate": 4.0962237690496005e-06, "loss": 11.6633, "step": 33471 }, { "epoch": 1.822684173626568, "grad_norm": 0.5755603656146498, "learning_rate": 4.093726121138819e-06, "loss": 11.7247, "step": 33472 }, { "epoch": 1.822738627623151, "grad_norm": 0.5355424571927285, "learning_rate": 4.0912292190113275e-06, "loss": 11.6119, "step": 33473 }, { "epoch": 1.822793081619734, "grad_norm": 0.5545457762756087, "learning_rate": 4.088733062686501e-06, "loss": 11.7037, "step": 33474 }, { "epoch": 1.8228475356163172, "grad_norm": 0.5973165584493365, "learning_rate": 4.086237652183789e-06, "loss": 11.7259, "step": 33475 }, { "epoch": 1.8229019896129002, "grad_norm": 0.5586578768783266, "learning_rate": 4.083742987522565e-06, "loss": 11.7125, "step": 33476 }, { "epoch": 1.8229564436094832, "grad_norm": 0.535720181937925, "learning_rate": 4.081249068722237e-06, "loss": 11.7799, "step": 33477 }, { "epoch": 1.8230108976060662, "grad_norm": 0.5093407659815583, "learning_rate": 4.078755895802222e-06, "loss": 11.8156, "step": 33478 }, { "epoch": 1.8230653516026492, "grad_norm": 0.5401439310621149, "learning_rate": 4.076263468781871e-06, "loss": 11.7707, "step": 33479 }, { "epoch": 1.8231198055992321, "grad_norm": 0.5588026189918949, "learning_rate": 4.073771787680591e-06, "loss": 11.7741, "step": 33480 }, { "epoch": 1.8231742595958154, "grad_norm": 0.7520636812294781, "learning_rate": 4.0712808525177445e-06, "loss": 11.898, "step": 33481 }, { "epoch": 1.8232287135923984, "grad_norm": 0.5100512927896358, "learning_rate": 4.068790663312705e-06, "loss": 11.6486, "step": 33482 }, { "epoch": 1.8232831675889813, "grad_norm": 0.5180993532143796, "learning_rate": 4.066301220084834e-06, "loss": 11.7956, "step": 33483 }, { "epoch": 1.8233376215855643, "grad_norm": 0.552233970687271, "learning_rate": 4.063812522853494e-06, "loss": 11.7794, "step": 33484 }, { "epoch": 1.8233920755821473, "grad_norm": 0.5829374629722772, "learning_rate": 4.061324571638048e-06, "loss": 11.8612, "step": 33485 }, { "epoch": 1.8234465295787303, "grad_norm": 0.5354389819448515, "learning_rate": 4.058837366457813e-06, "loss": 11.827, "step": 33486 }, { "epoch": 1.8235009835753133, "grad_norm": 0.5289263660728635, "learning_rate": 4.056350907332141e-06, "loss": 11.8566, "step": 33487 }, { "epoch": 1.8235554375718963, "grad_norm": 0.5190264478511732, "learning_rate": 4.053865194280404e-06, "loss": 11.7686, "step": 33488 }, { "epoch": 1.8236098915684793, "grad_norm": 0.6100448974671006, "learning_rate": 4.051380227321877e-06, "loss": 11.7739, "step": 33489 }, { "epoch": 1.8236643455650623, "grad_norm": 0.5570678020624251, "learning_rate": 4.048896006475922e-06, "loss": 11.9415, "step": 33490 }, { "epoch": 1.8237187995616453, "grad_norm": 0.6030830455316337, "learning_rate": 4.046412531761845e-06, "loss": 11.8224, "step": 33491 }, { "epoch": 1.8237732535582283, "grad_norm": 0.5958088040953632, "learning_rate": 4.043929803198943e-06, "loss": 11.8535, "step": 33492 }, { "epoch": 1.8238277075548113, "grad_norm": 0.5204784015270262, "learning_rate": 4.041447820806543e-06, "loss": 11.8078, "step": 33493 }, { "epoch": 1.8238821615513943, "grad_norm": 0.5771124407140996, "learning_rate": 4.038966584603932e-06, "loss": 11.6476, "step": 33494 }, { "epoch": 1.8239366155479773, "grad_norm": 0.5696700008076531, "learning_rate": 4.036486094610414e-06, "loss": 11.7869, "step": 33495 }, { "epoch": 1.8239910695445603, "grad_norm": 0.555014384169201, "learning_rate": 4.034006350845265e-06, "loss": 11.8537, "step": 33496 }, { "epoch": 1.8240455235411432, "grad_norm": 0.5408362132387518, "learning_rate": 4.031527353327791e-06, "loss": 11.7032, "step": 33497 }, { "epoch": 1.8240999775377265, "grad_norm": 0.5366653302165357, "learning_rate": 4.029049102077231e-06, "loss": 11.7905, "step": 33498 }, { "epoch": 1.8241544315343095, "grad_norm": 0.5751795087361428, "learning_rate": 4.026571597112893e-06, "loss": 11.7215, "step": 33499 }, { "epoch": 1.8242088855308924, "grad_norm": 0.5635483618024176, "learning_rate": 4.0240948384540286e-06, "loss": 11.8326, "step": 33500 }, { "epoch": 1.8242633395274754, "grad_norm": 0.5132812454468167, "learning_rate": 4.02161882611991e-06, "loss": 11.744, "step": 33501 }, { "epoch": 1.8243177935240584, "grad_norm": 0.7318438973514497, "learning_rate": 4.019143560129757e-06, "loss": 11.9016, "step": 33502 }, { "epoch": 1.8243722475206414, "grad_norm": 0.5615510206887165, "learning_rate": 4.016669040502863e-06, "loss": 11.8107, "step": 33503 }, { "epoch": 1.8244267015172246, "grad_norm": 0.6124006959764241, "learning_rate": 4.014195267258425e-06, "loss": 11.89, "step": 33504 }, { "epoch": 1.8244811555138076, "grad_norm": 0.5255157333733381, "learning_rate": 4.011722240415716e-06, "loss": 11.7617, "step": 33505 }, { "epoch": 1.8245356095103906, "grad_norm": 0.5741861787237976, "learning_rate": 4.009249959993943e-06, "loss": 11.8035, "step": 33506 }, { "epoch": 1.8245900635069736, "grad_norm": 0.557906623715009, "learning_rate": 4.006778426012348e-06, "loss": 11.767, "step": 33507 }, { "epoch": 1.8246445175035566, "grad_norm": 0.510337835917614, "learning_rate": 4.004307638490135e-06, "loss": 11.8081, "step": 33508 }, { "epoch": 1.8246989715001396, "grad_norm": 0.5642408994260448, "learning_rate": 4.001837597446523e-06, "loss": 11.8833, "step": 33509 }, { "epoch": 1.8247534254967226, "grad_norm": 0.580121439087195, "learning_rate": 3.99936830290073e-06, "loss": 11.8494, "step": 33510 }, { "epoch": 1.8248078794933056, "grad_norm": 0.5428320757164858, "learning_rate": 3.996899754871952e-06, "loss": 11.7593, "step": 33511 }, { "epoch": 1.8248623334898886, "grad_norm": 0.5673038505201914, "learning_rate": 3.994431953379374e-06, "loss": 11.8183, "step": 33512 }, { "epoch": 1.8249167874864716, "grad_norm": 0.5365934862740096, "learning_rate": 3.991964898442191e-06, "loss": 11.565, "step": 33513 }, { "epoch": 1.8249712414830546, "grad_norm": 0.526265695858374, "learning_rate": 3.989498590079577e-06, "loss": 11.7453, "step": 33514 }, { "epoch": 1.8250256954796376, "grad_norm": 0.5468074167815248, "learning_rate": 3.987033028310749e-06, "loss": 11.8768, "step": 33515 }, { "epoch": 1.8250801494762205, "grad_norm": 0.5590618180762743, "learning_rate": 3.984568213154827e-06, "loss": 11.7486, "step": 33516 }, { "epoch": 1.8251346034728035, "grad_norm": 0.5274691560683105, "learning_rate": 3.982104144631027e-06, "loss": 11.6954, "step": 33517 }, { "epoch": 1.8251890574693865, "grad_norm": 0.5301098752998251, "learning_rate": 3.979640822758468e-06, "loss": 11.8421, "step": 33518 }, { "epoch": 1.8252435114659695, "grad_norm": 0.6038447826816482, "learning_rate": 3.977178247556346e-06, "loss": 11.795, "step": 33519 }, { "epoch": 1.8252979654625525, "grad_norm": 0.5864158844167162, "learning_rate": 3.974716419043767e-06, "loss": 11.7993, "step": 33520 }, { "epoch": 1.8253524194591357, "grad_norm": 0.5217450081365466, "learning_rate": 3.972255337239916e-06, "loss": 11.757, "step": 33521 }, { "epoch": 1.8254068734557187, "grad_norm": 0.5495316015868976, "learning_rate": 3.969795002163912e-06, "loss": 11.7986, "step": 33522 }, { "epoch": 1.8254613274523017, "grad_norm": 0.5359550676514172, "learning_rate": 3.967335413834872e-06, "loss": 11.8066, "step": 33523 }, { "epoch": 1.8255157814488847, "grad_norm": 0.528853344261235, "learning_rate": 3.964876572271947e-06, "loss": 11.7869, "step": 33524 }, { "epoch": 1.8255702354454677, "grad_norm": 0.5651961918332623, "learning_rate": 3.962418477494234e-06, "loss": 11.8611, "step": 33525 }, { "epoch": 1.8256246894420507, "grad_norm": 0.6233616935130436, "learning_rate": 3.959961129520862e-06, "loss": 11.7343, "step": 33526 }, { "epoch": 1.825679143438634, "grad_norm": 0.5257494402201819, "learning_rate": 3.957504528370948e-06, "loss": 11.6419, "step": 33527 }, { "epoch": 1.825733597435217, "grad_norm": 0.5632429576399808, "learning_rate": 3.955048674063577e-06, "loss": 11.8467, "step": 33528 }, { "epoch": 1.8257880514318, "grad_norm": 0.5665512604678933, "learning_rate": 3.952593566617868e-06, "loss": 11.8056, "step": 33529 }, { "epoch": 1.825842505428383, "grad_norm": 0.4963751260642664, "learning_rate": 3.950139206052883e-06, "loss": 11.6963, "step": 33530 }, { "epoch": 1.8258969594249659, "grad_norm": 0.6040634228372773, "learning_rate": 3.94768559238774e-06, "loss": 11.6905, "step": 33531 }, { "epoch": 1.8259514134215489, "grad_norm": 0.5819911824782974, "learning_rate": 3.94523272564149e-06, "loss": 11.8623, "step": 33532 }, { "epoch": 1.8260058674181319, "grad_norm": 0.5712258398644969, "learning_rate": 3.942780605833218e-06, "loss": 11.8323, "step": 33533 }, { "epoch": 1.8260603214147149, "grad_norm": 0.5260499541865548, "learning_rate": 3.940329232981999e-06, "loss": 11.7525, "step": 33534 }, { "epoch": 1.8261147754112979, "grad_norm": 0.6315230831867228, "learning_rate": 3.937878607106882e-06, "loss": 11.7957, "step": 33535 }, { "epoch": 1.8261692294078808, "grad_norm": 0.5747869563676422, "learning_rate": 3.93542872822692e-06, "loss": 11.7983, "step": 33536 }, { "epoch": 1.8262236834044638, "grad_norm": 0.5092012466463031, "learning_rate": 3.932979596361197e-06, "loss": 11.7679, "step": 33537 }, { "epoch": 1.8262781374010468, "grad_norm": 0.5208759833375679, "learning_rate": 3.930531211528721e-06, "loss": 11.8191, "step": 33538 }, { "epoch": 1.8263325913976298, "grad_norm": 0.510786388246234, "learning_rate": 3.928083573748554e-06, "loss": 11.7865, "step": 33539 }, { "epoch": 1.8263870453942128, "grad_norm": 0.5496315114605621, "learning_rate": 3.925636683039713e-06, "loss": 11.7737, "step": 33540 }, { "epoch": 1.8264414993907958, "grad_norm": 0.5115296641116465, "learning_rate": 3.92319053942124e-06, "loss": 11.5915, "step": 33541 }, { "epoch": 1.8264959533873788, "grad_norm": 0.5194726769345599, "learning_rate": 3.920745142912152e-06, "loss": 11.7564, "step": 33542 }, { "epoch": 1.8265504073839618, "grad_norm": 0.5129502256690269, "learning_rate": 3.9183004935314575e-06, "loss": 11.8047, "step": 33543 }, { "epoch": 1.8266048613805448, "grad_norm": 0.5156117474702039, "learning_rate": 3.915856591298172e-06, "loss": 11.6586, "step": 33544 }, { "epoch": 1.826659315377128, "grad_norm": 0.5090345224098513, "learning_rate": 3.913413436231284e-06, "loss": 11.7061, "step": 33545 }, { "epoch": 1.826713769373711, "grad_norm": 0.5375689023583371, "learning_rate": 3.91097102834983e-06, "loss": 11.7355, "step": 33546 }, { "epoch": 1.826768223370294, "grad_norm": 0.5301205373658786, "learning_rate": 3.908529367672764e-06, "loss": 11.7086, "step": 33547 }, { "epoch": 1.826822677366877, "grad_norm": 0.5711261212450155, "learning_rate": 3.906088454219081e-06, "loss": 11.7186, "step": 33548 }, { "epoch": 1.82687713136346, "grad_norm": 0.552201056906126, "learning_rate": 3.9036482880077885e-06, "loss": 11.7314, "step": 33549 }, { "epoch": 1.826931585360043, "grad_norm": 0.6254665150234314, "learning_rate": 3.901208869057838e-06, "loss": 11.717, "step": 33550 }, { "epoch": 1.8269860393566262, "grad_norm": 0.5310749922950447, "learning_rate": 3.898770197388202e-06, "loss": 11.8462, "step": 33551 }, { "epoch": 1.8270404933532092, "grad_norm": 0.5839481534108598, "learning_rate": 3.896332273017844e-06, "loss": 11.8887, "step": 33552 }, { "epoch": 1.8270949473497922, "grad_norm": 0.5488042552274396, "learning_rate": 3.8938950959657164e-06, "loss": 11.7305, "step": 33553 }, { "epoch": 1.8271494013463752, "grad_norm": 0.5482019480639584, "learning_rate": 3.891458666250791e-06, "loss": 11.8, "step": 33554 }, { "epoch": 1.8272038553429581, "grad_norm": 0.5318702393227676, "learning_rate": 3.889022983891988e-06, "loss": 11.6851, "step": 33555 }, { "epoch": 1.8272583093395411, "grad_norm": 0.5674620351831604, "learning_rate": 3.886588048908268e-06, "loss": 11.6989, "step": 33556 }, { "epoch": 1.8273127633361241, "grad_norm": 0.5604734315062843, "learning_rate": 3.8841538613185516e-06, "loss": 11.7631, "step": 33557 }, { "epoch": 1.8273672173327071, "grad_norm": 0.5664493019605271, "learning_rate": 3.881720421141766e-06, "loss": 11.774, "step": 33558 }, { "epoch": 1.8274216713292901, "grad_norm": 0.5485116698974144, "learning_rate": 3.8792877283968635e-06, "loss": 11.8739, "step": 33559 }, { "epoch": 1.8274761253258731, "grad_norm": 0.5468985098187719, "learning_rate": 3.8768557831027175e-06, "loss": 11.7811, "step": 33560 }, { "epoch": 1.827530579322456, "grad_norm": 0.56447714643943, "learning_rate": 3.87442458527828e-06, "loss": 11.8985, "step": 33561 }, { "epoch": 1.827585033319039, "grad_norm": 0.5239203982580789, "learning_rate": 3.871994134942436e-06, "loss": 11.786, "step": 33562 }, { "epoch": 1.827639487315622, "grad_norm": 0.6344256070612694, "learning_rate": 3.86956443211407e-06, "loss": 11.7964, "step": 33563 }, { "epoch": 1.827693941312205, "grad_norm": 0.5252318449258297, "learning_rate": 3.867135476812101e-06, "loss": 11.6403, "step": 33564 }, { "epoch": 1.827748395308788, "grad_norm": 0.5342986990063621, "learning_rate": 3.864707269055401e-06, "loss": 11.6848, "step": 33565 }, { "epoch": 1.827802849305371, "grad_norm": 0.5265795921368792, "learning_rate": 3.862279808862878e-06, "loss": 11.7295, "step": 33566 }, { "epoch": 1.827857303301954, "grad_norm": 0.5534938744120571, "learning_rate": 3.859853096253363e-06, "loss": 11.8545, "step": 33567 }, { "epoch": 1.8279117572985373, "grad_norm": 0.5314140467853407, "learning_rate": 3.8574271312457725e-06, "loss": 11.7534, "step": 33568 }, { "epoch": 1.8279662112951203, "grad_norm": 0.589235670157932, "learning_rate": 3.8550019138589465e-06, "loss": 11.8212, "step": 33569 }, { "epoch": 1.8280206652917033, "grad_norm": 0.6138541162915487, "learning_rate": 3.852577444111738e-06, "loss": 11.7081, "step": 33570 }, { "epoch": 1.8280751192882863, "grad_norm": 0.5913178943141056, "learning_rate": 3.850153722023031e-06, "loss": 11.7072, "step": 33571 }, { "epoch": 1.8281295732848692, "grad_norm": 0.5667341265681017, "learning_rate": 3.8477307476116445e-06, "loss": 11.7691, "step": 33572 }, { "epoch": 1.8281840272814522, "grad_norm": 0.5785024779146959, "learning_rate": 3.84530852089644e-06, "loss": 11.9016, "step": 33573 }, { "epoch": 1.8282384812780355, "grad_norm": 0.5261711522291102, "learning_rate": 3.842887041896226e-06, "loss": 11.7926, "step": 33574 }, { "epoch": 1.8282929352746184, "grad_norm": 0.5538186553914405, "learning_rate": 3.8404663106298426e-06, "loss": 11.6854, "step": 33575 }, { "epoch": 1.8283473892712014, "grad_norm": 0.5154450321743397, "learning_rate": 3.8380463271161294e-06, "loss": 11.8107, "step": 33576 }, { "epoch": 1.8284018432677844, "grad_norm": 0.5768646424999689, "learning_rate": 3.835627091373883e-06, "loss": 11.7498, "step": 33577 }, { "epoch": 1.8284562972643674, "grad_norm": 0.5540500575422419, "learning_rate": 3.833208603421945e-06, "loss": 11.6689, "step": 33578 }, { "epoch": 1.8285107512609504, "grad_norm": 0.5255612682975356, "learning_rate": 3.830790863279088e-06, "loss": 11.6593, "step": 33579 }, { "epoch": 1.8285652052575334, "grad_norm": 0.6457394825812683, "learning_rate": 3.828373870964153e-06, "loss": 11.7615, "step": 33580 }, { "epoch": 1.8286196592541164, "grad_norm": 0.5497840083945347, "learning_rate": 3.82595762649588e-06, "loss": 11.8321, "step": 33581 }, { "epoch": 1.8286741132506994, "grad_norm": 0.5115940301137489, "learning_rate": 3.8235421298931096e-06, "loss": 11.9497, "step": 33582 }, { "epoch": 1.8287285672472824, "grad_norm": 0.5415655013355506, "learning_rate": 3.8211273811746055e-06, "loss": 11.6561, "step": 33583 }, { "epoch": 1.8287830212438654, "grad_norm": 0.5508433141977883, "learning_rate": 3.8187133803591295e-06, "loss": 11.7448, "step": 33584 }, { "epoch": 1.8288374752404484, "grad_norm": 0.5045897152001088, "learning_rate": 3.816300127465466e-06, "loss": 11.7792, "step": 33585 }, { "epoch": 1.8288919292370314, "grad_norm": 0.5137576802657025, "learning_rate": 3.8138876225124022e-06, "loss": 11.7979, "step": 33586 }, { "epoch": 1.8289463832336144, "grad_norm": 0.5264990376744149, "learning_rate": 3.8114758655186657e-06, "loss": 11.6629, "step": 33587 }, { "epoch": 1.8290008372301974, "grad_norm": 0.5882370370095509, "learning_rate": 3.8090648565030305e-06, "loss": 11.8454, "step": 33588 }, { "epoch": 1.8290552912267803, "grad_norm": 0.49267080795308765, "learning_rate": 3.806654595484227e-06, "loss": 11.7577, "step": 33589 }, { "epoch": 1.8291097452233633, "grad_norm": 0.5659817523041979, "learning_rate": 3.804245082481017e-06, "loss": 11.845, "step": 33590 }, { "epoch": 1.8291641992199466, "grad_norm": 0.5002647537304712, "learning_rate": 3.8018363175121306e-06, "loss": 11.7127, "step": 33591 }, { "epoch": 1.8292186532165295, "grad_norm": 0.537488999759871, "learning_rate": 3.7994283005962974e-06, "loss": 11.7534, "step": 33592 }, { "epoch": 1.8292731072131125, "grad_norm": 0.5781595998247397, "learning_rate": 3.7970210317522457e-06, "loss": 11.7543, "step": 33593 }, { "epoch": 1.8293275612096955, "grad_norm": 0.514201095862873, "learning_rate": 3.794614510998684e-06, "loss": 11.7786, "step": 33594 }, { "epoch": 1.8293820152062785, "grad_norm": 0.47920266652534765, "learning_rate": 3.7922087383543414e-06, "loss": 11.6605, "step": 33595 }, { "epoch": 1.8294364692028615, "grad_norm": 0.5740164054785348, "learning_rate": 3.7898037138379027e-06, "loss": 11.8035, "step": 33596 }, { "epoch": 1.8294909231994447, "grad_norm": 0.5942858605802906, "learning_rate": 3.7873994374680864e-06, "loss": 11.7615, "step": 33597 }, { "epoch": 1.8295453771960277, "grad_norm": 0.5675241724473631, "learning_rate": 3.7849959092636e-06, "loss": 11.7158, "step": 33598 }, { "epoch": 1.8295998311926107, "grad_norm": 0.5415942452650854, "learning_rate": 3.782593129243117e-06, "loss": 11.7951, "step": 33599 }, { "epoch": 1.8296542851891937, "grad_norm": 0.5467909468926304, "learning_rate": 3.780191097425323e-06, "loss": 11.6424, "step": 33600 }, { "epoch": 1.8297087391857767, "grad_norm": 0.5691203385637891, "learning_rate": 3.7777898138289025e-06, "loss": 11.7199, "step": 33601 }, { "epoch": 1.8297631931823597, "grad_norm": 0.5605891023426018, "learning_rate": 3.7753892784725188e-06, "loss": 11.8701, "step": 33602 }, { "epoch": 1.8298176471789427, "grad_norm": 0.584924898795815, "learning_rate": 3.7729894913748564e-06, "loss": 11.902, "step": 33603 }, { "epoch": 1.8298721011755257, "grad_norm": 0.644131364469211, "learning_rate": 3.7705904525545567e-06, "loss": 11.7686, "step": 33604 }, { "epoch": 1.8299265551721087, "grad_norm": 0.5328483942849503, "learning_rate": 3.7681921620302818e-06, "loss": 11.6705, "step": 33605 }, { "epoch": 1.8299810091686917, "grad_norm": 0.5084748882255641, "learning_rate": 3.765794619820684e-06, "loss": 11.7388, "step": 33606 }, { "epoch": 1.8300354631652747, "grad_norm": 0.5387853995100667, "learning_rate": 3.7633978259444035e-06, "loss": 11.8504, "step": 33607 }, { "epoch": 1.8300899171618576, "grad_norm": 0.552683848284675, "learning_rate": 3.7610017804200815e-06, "loss": 11.8648, "step": 33608 }, { "epoch": 1.8301443711584406, "grad_norm": 0.5490437201936529, "learning_rate": 3.758606483266347e-06, "loss": 11.7925, "step": 33609 }, { "epoch": 1.8301988251550236, "grad_norm": 0.5589677545008368, "learning_rate": 3.7562119345018408e-06, "loss": 11.797, "step": 33610 }, { "epoch": 1.8302532791516066, "grad_norm": 0.5426710002220848, "learning_rate": 3.7538181341451593e-06, "loss": 11.8016, "step": 33611 }, { "epoch": 1.8303077331481896, "grad_norm": 0.5816641511814663, "learning_rate": 3.751425082214932e-06, "loss": 11.7449, "step": 33612 }, { "epoch": 1.8303621871447726, "grad_norm": 0.54028323069145, "learning_rate": 3.7490327787297664e-06, "loss": 11.8756, "step": 33613 }, { "epoch": 1.8304166411413556, "grad_norm": 0.5437505885248093, "learning_rate": 3.746641223708258e-06, "loss": 11.7894, "step": 33614 }, { "epoch": 1.8304710951379388, "grad_norm": 0.5107128114373266, "learning_rate": 3.744250417169015e-06, "loss": 11.7212, "step": 33615 }, { "epoch": 1.8305255491345218, "grad_norm": 0.553345160010139, "learning_rate": 3.7418603591306112e-06, "loss": 11.7638, "step": 33616 }, { "epoch": 1.8305800031311048, "grad_norm": 0.5201727159599808, "learning_rate": 3.7394710496116648e-06, "loss": 11.8593, "step": 33617 }, { "epoch": 1.8306344571276878, "grad_norm": 0.5548311846824785, "learning_rate": 3.7370824886307056e-06, "loss": 11.7792, "step": 33618 }, { "epoch": 1.8306889111242708, "grad_norm": 0.4999218151319178, "learning_rate": 3.734694676206352e-06, "loss": 11.7706, "step": 33619 }, { "epoch": 1.830743365120854, "grad_norm": 0.5730300704645586, "learning_rate": 3.7323076123571556e-06, "loss": 11.7754, "step": 33620 }, { "epoch": 1.830797819117437, "grad_norm": 0.620387799682463, "learning_rate": 3.7299212971016797e-06, "loss": 11.8119, "step": 33621 }, { "epoch": 1.83085227311402, "grad_norm": 0.5691015568130691, "learning_rate": 3.7275357304584978e-06, "loss": 11.8631, "step": 33622 }, { "epoch": 1.830906727110603, "grad_norm": 0.6034637461747832, "learning_rate": 3.725150912446118e-06, "loss": 11.8568, "step": 33623 }, { "epoch": 1.830961181107186, "grad_norm": 0.4916739862755106, "learning_rate": 3.722766843083114e-06, "loss": 11.644, "step": 33624 }, { "epoch": 1.831015635103769, "grad_norm": 0.5358512196182962, "learning_rate": 3.7203835223880268e-06, "loss": 11.7572, "step": 33625 }, { "epoch": 1.831070089100352, "grad_norm": 0.5223640356404262, "learning_rate": 3.7180009503793743e-06, "loss": 11.7622, "step": 33626 }, { "epoch": 1.831124543096935, "grad_norm": 0.5357213005973265, "learning_rate": 3.715619127075709e-06, "loss": 11.7943, "step": 33627 }, { "epoch": 1.831178997093518, "grad_norm": 0.5403553467227992, "learning_rate": 3.713238052495516e-06, "loss": 11.7865, "step": 33628 }, { "epoch": 1.831233451090101, "grad_norm": 0.555072468847666, "learning_rate": 3.7108577266573464e-06, "loss": 11.7744, "step": 33629 }, { "epoch": 1.831287905086684, "grad_norm": 0.5400711145683039, "learning_rate": 3.708478149579686e-06, "loss": 11.7561, "step": 33630 }, { "epoch": 1.831342359083267, "grad_norm": 0.5609842697305087, "learning_rate": 3.7060993212810535e-06, "loss": 11.7684, "step": 33631 }, { "epoch": 1.83139681307985, "grad_norm": 0.5558519570080996, "learning_rate": 3.703721241779956e-06, "loss": 11.7791, "step": 33632 }, { "epoch": 1.831451267076433, "grad_norm": 0.5742799670780467, "learning_rate": 3.7013439110948454e-06, "loss": 11.6923, "step": 33633 }, { "epoch": 1.831505721073016, "grad_norm": 0.4924399750888138, "learning_rate": 3.6989673292442407e-06, "loss": 11.8236, "step": 33634 }, { "epoch": 1.831560175069599, "grad_norm": 0.5755479277658104, "learning_rate": 3.6965914962466153e-06, "loss": 11.6702, "step": 33635 }, { "epoch": 1.8316146290661819, "grad_norm": 0.5668670997137093, "learning_rate": 3.694216412120444e-06, "loss": 11.7997, "step": 33636 }, { "epoch": 1.8316690830627649, "grad_norm": 0.5988314770585237, "learning_rate": 3.691842076884211e-06, "loss": 11.7937, "step": 33637 }, { "epoch": 1.831723537059348, "grad_norm": 0.5620322342811402, "learning_rate": 3.689468490556347e-06, "loss": 11.8267, "step": 33638 }, { "epoch": 1.831777991055931, "grad_norm": 0.5555223649173702, "learning_rate": 3.687095653155337e-06, "loss": 11.7555, "step": 33639 }, { "epoch": 1.831832445052514, "grad_norm": 0.5262201788231075, "learning_rate": 3.6847235646996102e-06, "loss": 11.7956, "step": 33640 }, { "epoch": 1.831886899049097, "grad_norm": 0.6097477835124032, "learning_rate": 3.6823522252076306e-06, "loss": 11.8023, "step": 33641 }, { "epoch": 1.83194135304568, "grad_norm": 0.5737853532295223, "learning_rate": 3.67998163469786e-06, "loss": 11.8284, "step": 33642 }, { "epoch": 1.831995807042263, "grad_norm": 0.5420299003831286, "learning_rate": 3.6776117931886733e-06, "loss": 11.757, "step": 33643 }, { "epoch": 1.8320502610388463, "grad_norm": 0.5757652343362268, "learning_rate": 3.6752427006985446e-06, "loss": 11.8109, "step": 33644 }, { "epoch": 1.8321047150354293, "grad_norm": 0.5118663299731981, "learning_rate": 3.67287435724587e-06, "loss": 11.6178, "step": 33645 }, { "epoch": 1.8321591690320123, "grad_norm": 0.5685451668477773, "learning_rate": 3.67050676284908e-06, "loss": 11.8246, "step": 33646 }, { "epoch": 1.8322136230285953, "grad_norm": 0.5323133237344017, "learning_rate": 3.668139917526592e-06, "loss": 11.8171, "step": 33647 }, { "epoch": 1.8322680770251782, "grad_norm": 0.5140507649072633, "learning_rate": 3.665773821296792e-06, "loss": 11.7333, "step": 33648 }, { "epoch": 1.8323225310217612, "grad_norm": 0.5631979179573443, "learning_rate": 3.6634084741781094e-06, "loss": 11.7532, "step": 33649 }, { "epoch": 1.8323769850183442, "grad_norm": 0.5506797082488566, "learning_rate": 3.6610438761888965e-06, "loss": 11.8504, "step": 33650 }, { "epoch": 1.8324314390149272, "grad_norm": 0.514061050701803, "learning_rate": 3.658680027347583e-06, "loss": 11.7024, "step": 33651 }, { "epoch": 1.8324858930115102, "grad_norm": 0.523363066878204, "learning_rate": 3.6563169276725206e-06, "loss": 11.801, "step": 33652 }, { "epoch": 1.8325403470080932, "grad_norm": 0.5703393082583804, "learning_rate": 3.6539545771820837e-06, "loss": 11.7958, "step": 33653 }, { "epoch": 1.8325948010046762, "grad_norm": 0.5645968935853388, "learning_rate": 3.6515929758946798e-06, "loss": 11.8249, "step": 33654 }, { "epoch": 1.8326492550012592, "grad_norm": 0.5269401939551209, "learning_rate": 3.6492321238286166e-06, "loss": 11.8765, "step": 33655 }, { "epoch": 1.8327037089978422, "grad_norm": 0.5474930751105823, "learning_rate": 3.64687202100229e-06, "loss": 11.814, "step": 33656 }, { "epoch": 1.8327581629944252, "grad_norm": 0.5282810706860984, "learning_rate": 3.6445126674340636e-06, "loss": 11.8933, "step": 33657 }, { "epoch": 1.8328126169910082, "grad_norm": 0.5192044567011344, "learning_rate": 3.6421540631422447e-06, "loss": 11.6787, "step": 33658 }, { "epoch": 1.8328670709875912, "grad_norm": 0.607667276788634, "learning_rate": 3.6397962081452074e-06, "loss": 11.9255, "step": 33659 }, { "epoch": 1.8329215249841742, "grad_norm": 0.5863241837845117, "learning_rate": 3.6374391024612597e-06, "loss": 11.7884, "step": 33660 }, { "epoch": 1.8329759789807574, "grad_norm": 0.564734192758178, "learning_rate": 3.6350827461087646e-06, "loss": 11.6631, "step": 33661 }, { "epoch": 1.8330304329773404, "grad_norm": 0.5771472327379159, "learning_rate": 3.632727139106018e-06, "loss": 11.7665, "step": 33662 }, { "epoch": 1.8330848869739234, "grad_norm": 0.5001199290655215, "learning_rate": 3.6303722814713503e-06, "loss": 11.6977, "step": 33663 }, { "epoch": 1.8331393409705063, "grad_norm": 0.5356334823763569, "learning_rate": 3.628018173223069e-06, "loss": 11.8373, "step": 33664 }, { "epoch": 1.8331937949670893, "grad_norm": 0.5407584876816743, "learning_rate": 3.6256648143794703e-06, "loss": 11.7917, "step": 33665 }, { "epoch": 1.8332482489636723, "grad_norm": 0.5262477456202952, "learning_rate": 3.623312204958873e-06, "loss": 11.8158, "step": 33666 }, { "epoch": 1.8333027029602555, "grad_norm": 0.5384037636619359, "learning_rate": 3.6209603449795515e-06, "loss": 11.7585, "step": 33667 }, { "epoch": 1.8333571569568385, "grad_norm": 0.5770060144243758, "learning_rate": 3.6186092344598023e-06, "loss": 11.7487, "step": 33668 }, { "epoch": 1.8334116109534215, "grad_norm": 0.48745336548330126, "learning_rate": 3.6162588734179326e-06, "loss": 11.8087, "step": 33669 }, { "epoch": 1.8334660649500045, "grad_norm": 0.5086000950835574, "learning_rate": 3.6139092618721727e-06, "loss": 11.7118, "step": 33670 }, { "epoch": 1.8335205189465875, "grad_norm": 0.5192043453566273, "learning_rate": 3.611560399840841e-06, "loss": 11.6702, "step": 33671 }, { "epoch": 1.8335749729431705, "grad_norm": 0.5433328063871803, "learning_rate": 3.6092122873421783e-06, "loss": 11.8374, "step": 33672 }, { "epoch": 1.8336294269397535, "grad_norm": 0.5404471055371471, "learning_rate": 3.6068649243944264e-06, "loss": 11.7247, "step": 33673 }, { "epoch": 1.8336838809363365, "grad_norm": 0.5311974378110095, "learning_rate": 3.604518311015881e-06, "loss": 11.655, "step": 33674 }, { "epoch": 1.8337383349329195, "grad_norm": 0.5458367952834732, "learning_rate": 3.6021724472247387e-06, "loss": 11.7666, "step": 33675 }, { "epoch": 1.8337927889295025, "grad_norm": 0.6863974465711044, "learning_rate": 3.599827333039296e-06, "loss": 11.8068, "step": 33676 }, { "epoch": 1.8338472429260855, "grad_norm": 0.49571684168626645, "learning_rate": 3.59748296847775e-06, "loss": 11.6708, "step": 33677 }, { "epoch": 1.8339016969226685, "grad_norm": 0.5016990640118166, "learning_rate": 3.5951393535583413e-06, "loss": 11.6921, "step": 33678 }, { "epoch": 1.8339561509192515, "grad_norm": 0.5088780451289755, "learning_rate": 3.592796488299299e-06, "loss": 11.7353, "step": 33679 }, { "epoch": 1.8340106049158345, "grad_norm": 0.5834436289501197, "learning_rate": 3.5904543727188322e-06, "loss": 11.8659, "step": 33680 }, { "epoch": 1.8340650589124174, "grad_norm": 0.5486892229223033, "learning_rate": 3.5881130068351698e-06, "loss": 11.7854, "step": 33681 }, { "epoch": 1.8341195129090004, "grad_norm": 0.5295833476695455, "learning_rate": 3.5857723906665197e-06, "loss": 11.7739, "step": 33682 }, { "epoch": 1.8341739669055834, "grad_norm": 0.5252285010143786, "learning_rate": 3.5834325242310453e-06, "loss": 11.7262, "step": 33683 }, { "epoch": 1.8342284209021664, "grad_norm": 0.6019713085614004, "learning_rate": 3.581093407546987e-06, "loss": 11.7305, "step": 33684 }, { "epoch": 1.8342828748987496, "grad_norm": 0.5351547807703942, "learning_rate": 3.5787550406325086e-06, "loss": 11.7171, "step": 33685 }, { "epoch": 1.8343373288953326, "grad_norm": 0.5282363304151417, "learning_rate": 3.576417423505807e-06, "loss": 11.6159, "step": 33686 }, { "epoch": 1.8343917828919156, "grad_norm": 0.5251180636048071, "learning_rate": 3.5740805561850445e-06, "loss": 11.9031, "step": 33687 }, { "epoch": 1.8344462368884986, "grad_norm": 0.622908629364273, "learning_rate": 3.571744438688418e-06, "loss": 12.027, "step": 33688 }, { "epoch": 1.8345006908850816, "grad_norm": 0.5142626164041925, "learning_rate": 3.5694090710340576e-06, "loss": 11.6893, "step": 33689 }, { "epoch": 1.8345551448816648, "grad_norm": 0.5283159116676002, "learning_rate": 3.567074453240149e-06, "loss": 11.6854, "step": 33690 }, { "epoch": 1.8346095988782478, "grad_norm": 0.5511602178658775, "learning_rate": 3.564740585324855e-06, "loss": 11.6854, "step": 33691 }, { "epoch": 1.8346640528748308, "grad_norm": 0.6081058223194803, "learning_rate": 3.562407467306295e-06, "loss": 11.8995, "step": 33692 }, { "epoch": 1.8347185068714138, "grad_norm": 0.6237050189656914, "learning_rate": 3.560075099202642e-06, "loss": 11.8137, "step": 33693 }, { "epoch": 1.8347729608679968, "grad_norm": 0.5849995900037084, "learning_rate": 3.5577434810320055e-06, "loss": 11.8858, "step": 33694 }, { "epoch": 1.8348274148645798, "grad_norm": 0.5450710006351104, "learning_rate": 3.5554126128125256e-06, "loss": 11.9415, "step": 33695 }, { "epoch": 1.8348818688611628, "grad_norm": 0.8510014482508347, "learning_rate": 3.5530824945623542e-06, "loss": 11.8374, "step": 33696 }, { "epoch": 1.8349363228577458, "grad_norm": 0.5490630740266585, "learning_rate": 3.5507531262995553e-06, "loss": 11.8302, "step": 33697 }, { "epoch": 1.8349907768543288, "grad_norm": 0.6000585345043453, "learning_rate": 3.5484245080423027e-06, "loss": 11.9073, "step": 33698 }, { "epoch": 1.8350452308509118, "grad_norm": 0.5315158199283085, "learning_rate": 3.5460966398086602e-06, "loss": 11.8954, "step": 33699 }, { "epoch": 1.8350996848474947, "grad_norm": 0.5903353983840656, "learning_rate": 3.543769521616758e-06, "loss": 11.7407, "step": 33700 }, { "epoch": 1.8351541388440777, "grad_norm": 0.5577473741100369, "learning_rate": 3.541443153484658e-06, "loss": 11.8393, "step": 33701 }, { "epoch": 1.8352085928406607, "grad_norm": 0.5673069611908624, "learning_rate": 3.5391175354304807e-06, "loss": 11.7718, "step": 33702 }, { "epoch": 1.8352630468372437, "grad_norm": 0.6519382976389545, "learning_rate": 3.5367926674723216e-06, "loss": 11.9164, "step": 33703 }, { "epoch": 1.8353175008338267, "grad_norm": 0.5968121862080892, "learning_rate": 3.5344685496282227e-06, "loss": 11.7602, "step": 33704 }, { "epoch": 1.8353719548304097, "grad_norm": 0.6036063004325238, "learning_rate": 3.532145181916269e-06, "loss": 11.6866, "step": 33705 }, { "epoch": 1.8354264088269927, "grad_norm": 0.5781734630937158, "learning_rate": 3.5298225643545457e-06, "loss": 11.8314, "step": 33706 }, { "epoch": 1.8354808628235757, "grad_norm": 0.5272934985316827, "learning_rate": 3.5275006969610835e-06, "loss": 11.7894, "step": 33707 }, { "epoch": 1.835535316820159, "grad_norm": 0.5774879294105703, "learning_rate": 3.5251795797539676e-06, "loss": 11.7908, "step": 33708 }, { "epoch": 1.835589770816742, "grad_norm": 0.5483932207367126, "learning_rate": 3.5228592127512285e-06, "loss": 11.7713, "step": 33709 }, { "epoch": 1.835644224813325, "grad_norm": 0.5514752429617766, "learning_rate": 3.5205395959709286e-06, "loss": 11.7251, "step": 33710 }, { "epoch": 1.835698678809908, "grad_norm": 0.5734875429848707, "learning_rate": 3.5182207294310654e-06, "loss": 11.7756, "step": 33711 }, { "epoch": 1.8357531328064909, "grad_norm": 0.5350863732956618, "learning_rate": 3.5159026131497132e-06, "loss": 11.7566, "step": 33712 }, { "epoch": 1.8358075868030739, "grad_norm": 0.5154652665206887, "learning_rate": 3.5135852471449016e-06, "loss": 11.7158, "step": 33713 }, { "epoch": 1.835862040799657, "grad_norm": 0.5818234333088478, "learning_rate": 3.5112686314346054e-06, "loss": 11.6471, "step": 33714 }, { "epoch": 1.83591649479624, "grad_norm": 0.5309050324678231, "learning_rate": 3.508952766036877e-06, "loss": 11.8418, "step": 33715 }, { "epoch": 1.835970948792823, "grad_norm": 0.5316833087325932, "learning_rate": 3.506637650969702e-06, "loss": 11.5931, "step": 33716 }, { "epoch": 1.836025402789406, "grad_norm": 0.508372623903475, "learning_rate": 3.5043232862510987e-06, "loss": 11.7174, "step": 33717 }, { "epoch": 1.836079856785989, "grad_norm": 0.5095887549123126, "learning_rate": 3.5020096718990756e-06, "loss": 11.6387, "step": 33718 }, { "epoch": 1.836134310782572, "grad_norm": 0.5077160094496781, "learning_rate": 3.499696807931585e-06, "loss": 11.7219, "step": 33719 }, { "epoch": 1.836188764779155, "grad_norm": 0.5156147150890515, "learning_rate": 3.4973846943666568e-06, "loss": 11.8598, "step": 33720 }, { "epoch": 1.836243218775738, "grad_norm": 0.5657735402184215, "learning_rate": 3.4950733312222315e-06, "loss": 11.7291, "step": 33721 }, { "epoch": 1.836297672772321, "grad_norm": 0.5027216645238088, "learning_rate": 3.492762718516307e-06, "loss": 11.7495, "step": 33722 }, { "epoch": 1.836352126768904, "grad_norm": 0.575887213552128, "learning_rate": 3.490452856266857e-06, "loss": 11.8109, "step": 33723 }, { "epoch": 1.836406580765487, "grad_norm": 0.6160079531206806, "learning_rate": 3.488143744491801e-06, "loss": 11.9721, "step": 33724 }, { "epoch": 1.83646103476207, "grad_norm": 0.5444621174267315, "learning_rate": 3.4858353832091463e-06, "loss": 11.9605, "step": 33725 }, { "epoch": 1.836515488758653, "grad_norm": 0.5286723973743807, "learning_rate": 3.483527772436812e-06, "loss": 11.7713, "step": 33726 }, { "epoch": 1.836569942755236, "grad_norm": 0.5236218450241388, "learning_rate": 3.481220912192762e-06, "loss": 11.827, "step": 33727 }, { "epoch": 1.836624396751819, "grad_norm": 0.517060319718189, "learning_rate": 3.4789148024949035e-06, "loss": 11.77, "step": 33728 }, { "epoch": 1.836678850748402, "grad_norm": 0.5859984114605573, "learning_rate": 3.4766094433612006e-06, "loss": 11.8325, "step": 33729 }, { "epoch": 1.836733304744985, "grad_norm": 0.6051276245372441, "learning_rate": 3.474304834809583e-06, "loss": 11.9341, "step": 33730 }, { "epoch": 1.8367877587415682, "grad_norm": 0.5844962191411933, "learning_rate": 3.4720009768579365e-06, "loss": 11.7248, "step": 33731 }, { "epoch": 1.8368422127381512, "grad_norm": 0.5563580638775164, "learning_rate": 3.4696978695242132e-06, "loss": 11.7365, "step": 33732 }, { "epoch": 1.8368966667347342, "grad_norm": 0.4947205237272991, "learning_rate": 3.46739551282631e-06, "loss": 11.7052, "step": 33733 }, { "epoch": 1.8369511207313172, "grad_norm": 0.6067653619978699, "learning_rate": 3.465093906782124e-06, "loss": 11.8687, "step": 33734 }, { "epoch": 1.8370055747279002, "grad_norm": 0.5250863734251067, "learning_rate": 3.462793051409552e-06, "loss": 11.6991, "step": 33735 }, { "epoch": 1.8370600287244832, "grad_norm": 0.5854503019613355, "learning_rate": 3.46049294672649e-06, "loss": 11.7324, "step": 33736 }, { "epoch": 1.8371144827210664, "grad_norm": 0.5107861565301598, "learning_rate": 3.4581935927508357e-06, "loss": 11.7062, "step": 33737 }, { "epoch": 1.8371689367176494, "grad_norm": 0.6099423778392281, "learning_rate": 3.455894989500441e-06, "loss": 11.8401, "step": 33738 }, { "epoch": 1.8372233907142324, "grad_norm": 0.5488953774915811, "learning_rate": 3.453597136993203e-06, "loss": 11.752, "step": 33739 }, { "epoch": 1.8372778447108153, "grad_norm": 0.5356423523111601, "learning_rate": 3.4513000352469848e-06, "loss": 11.7573, "step": 33740 }, { "epoch": 1.8373322987073983, "grad_norm": 0.5279503509526772, "learning_rate": 3.44900368427965e-06, "loss": 11.8028, "step": 33741 }, { "epoch": 1.8373867527039813, "grad_norm": 0.5434130940980396, "learning_rate": 3.4467080841090628e-06, "loss": 11.7407, "step": 33742 }, { "epoch": 1.8374412067005643, "grad_norm": 0.5766316413889403, "learning_rate": 3.4444132347530635e-06, "loss": 11.8449, "step": 33743 }, { "epoch": 1.8374956606971473, "grad_norm": 0.652101556792841, "learning_rate": 3.4421191362294824e-06, "loss": 11.93, "step": 33744 }, { "epoch": 1.8375501146937303, "grad_norm": 0.5960265972082481, "learning_rate": 3.4398257885561945e-06, "loss": 11.8512, "step": 33745 }, { "epoch": 1.8376045686903133, "grad_norm": 0.5640952024080866, "learning_rate": 3.4375331917510077e-06, "loss": 11.7464, "step": 33746 }, { "epoch": 1.8376590226868963, "grad_norm": 0.5098863373297707, "learning_rate": 3.435241345831752e-06, "loss": 11.7593, "step": 33747 }, { "epoch": 1.8377134766834793, "grad_norm": 0.5095515741351909, "learning_rate": 3.4329502508162583e-06, "loss": 11.6392, "step": 33748 }, { "epoch": 1.8377679306800623, "grad_norm": 0.542932740237173, "learning_rate": 3.4306599067223443e-06, "loss": 11.7267, "step": 33749 }, { "epoch": 1.8378223846766453, "grad_norm": 0.565006808595657, "learning_rate": 3.428370313567797e-06, "loss": 11.6211, "step": 33750 }, { "epoch": 1.8378768386732283, "grad_norm": 0.6050045356351809, "learning_rate": 3.426081471370435e-06, "loss": 11.8577, "step": 33751 }, { "epoch": 1.8379312926698113, "grad_norm": 0.583321263599445, "learning_rate": 3.423793380148077e-06, "loss": 11.7218, "step": 33752 }, { "epoch": 1.8379857466663942, "grad_norm": 0.5872353698009192, "learning_rate": 3.4215060399184986e-06, "loss": 11.7619, "step": 33753 }, { "epoch": 1.8380402006629775, "grad_norm": 0.5766620994994406, "learning_rate": 3.4192194506994733e-06, "loss": 11.9911, "step": 33754 }, { "epoch": 1.8380946546595605, "grad_norm": 0.5074922654203639, "learning_rate": 3.4169336125087994e-06, "loss": 11.6697, "step": 33755 }, { "epoch": 1.8381491086561434, "grad_norm": 0.5260217142404257, "learning_rate": 3.414648525364239e-06, "loss": 11.7082, "step": 33756 }, { "epoch": 1.8382035626527264, "grad_norm": 0.5470496184281105, "learning_rate": 3.41236418928359e-06, "loss": 11.7668, "step": 33757 }, { "epoch": 1.8382580166493094, "grad_norm": 0.5724853788041439, "learning_rate": 3.410080604284571e-06, "loss": 11.7921, "step": 33758 }, { "epoch": 1.8383124706458924, "grad_norm": 0.6009617179526612, "learning_rate": 3.4077977703849794e-06, "loss": 11.7534, "step": 33759 }, { "epoch": 1.8383669246424756, "grad_norm": 0.5783049663565869, "learning_rate": 3.405515687602534e-06, "loss": 11.736, "step": 33760 }, { "epoch": 1.8384213786390586, "grad_norm": 0.5730574125204992, "learning_rate": 3.4032343559549984e-06, "loss": 11.7719, "step": 33761 }, { "epoch": 1.8384758326356416, "grad_norm": 0.519669808350654, "learning_rate": 3.400953775460136e-06, "loss": 11.7636, "step": 33762 }, { "epoch": 1.8385302866322246, "grad_norm": 0.5964376024612283, "learning_rate": 3.398673946135644e-06, "loss": 11.8903, "step": 33763 }, { "epoch": 1.8385847406288076, "grad_norm": 0.5672682021379412, "learning_rate": 3.396394867999264e-06, "loss": 11.7035, "step": 33764 }, { "epoch": 1.8386391946253906, "grad_norm": 0.5265068891237084, "learning_rate": 3.3941165410687147e-06, "loss": 11.8036, "step": 33765 }, { "epoch": 1.8386936486219736, "grad_norm": 0.516687143355546, "learning_rate": 3.3918389653617043e-06, "loss": 11.7173, "step": 33766 }, { "epoch": 1.8387481026185566, "grad_norm": 0.5810793523202998, "learning_rate": 3.3895621408959745e-06, "loss": 11.8949, "step": 33767 }, { "epoch": 1.8388025566151396, "grad_norm": 0.578112063505665, "learning_rate": 3.3872860676891995e-06, "loss": 11.6485, "step": 33768 }, { "epoch": 1.8388570106117226, "grad_norm": 0.5520149915577371, "learning_rate": 3.3850107457590983e-06, "loss": 11.7525, "step": 33769 }, { "epoch": 1.8389114646083056, "grad_norm": 0.6627735492956288, "learning_rate": 3.3827361751233465e-06, "loss": 11.7934, "step": 33770 }, { "epoch": 1.8389659186048886, "grad_norm": 0.5528380821301336, "learning_rate": 3.3804623557996516e-06, "loss": 11.6964, "step": 33771 }, { "epoch": 1.8390203726014716, "grad_norm": 0.5441781566992544, "learning_rate": 3.3781892878056777e-06, "loss": 11.8651, "step": 33772 }, { "epoch": 1.8390748265980545, "grad_norm": 0.5268215498162374, "learning_rate": 3.37591697115911e-06, "loss": 11.8521, "step": 33773 }, { "epoch": 1.8391292805946375, "grad_norm": 0.5350371230736668, "learning_rate": 3.3736454058776236e-06, "loss": 11.8748, "step": 33774 }, { "epoch": 1.8391837345912205, "grad_norm": 0.5694064059618958, "learning_rate": 3.37137459197886e-06, "loss": 11.8134, "step": 33775 }, { "epoch": 1.8392381885878035, "grad_norm": 0.5457504647782292, "learning_rate": 3.3691045294805047e-06, "loss": 11.8112, "step": 33776 }, { "epoch": 1.8392926425843865, "grad_norm": 0.5434586038544973, "learning_rate": 3.3668352184001885e-06, "loss": 11.7694, "step": 33777 }, { "epoch": 1.8393470965809697, "grad_norm": 0.48286760906165666, "learning_rate": 3.3645666587555635e-06, "loss": 11.637, "step": 33778 }, { "epoch": 1.8394015505775527, "grad_norm": 0.5622188256202755, "learning_rate": 3.3622988505642826e-06, "loss": 11.798, "step": 33779 }, { "epoch": 1.8394560045741357, "grad_norm": 0.5629060755557711, "learning_rate": 3.360031793843965e-06, "loss": 11.7861, "step": 33780 }, { "epoch": 1.8395104585707187, "grad_norm": 0.6039799015888979, "learning_rate": 3.3577654886122524e-06, "loss": 11.7994, "step": 33781 }, { "epoch": 1.8395649125673017, "grad_norm": 0.5314482109570983, "learning_rate": 3.3554999348867633e-06, "loss": 11.7301, "step": 33782 }, { "epoch": 1.8396193665638847, "grad_norm": 0.5141102963073315, "learning_rate": 3.3532351326851174e-06, "loss": 11.7489, "step": 33783 }, { "epoch": 1.839673820560468, "grad_norm": 0.5368867589358306, "learning_rate": 3.3509710820249228e-06, "loss": 11.7278, "step": 33784 }, { "epoch": 1.839728274557051, "grad_norm": 0.5577536904149286, "learning_rate": 3.3487077829237655e-06, "loss": 11.8324, "step": 33785 }, { "epoch": 1.839782728553634, "grad_norm": 0.5442647554330254, "learning_rate": 3.346445235399287e-06, "loss": 11.7692, "step": 33786 }, { "epoch": 1.8398371825502169, "grad_norm": 0.6038344235159461, "learning_rate": 3.3441834394690507e-06, "loss": 11.76, "step": 33787 }, { "epoch": 1.8398916365467999, "grad_norm": 0.5063113196907719, "learning_rate": 3.341922395150643e-06, "loss": 11.8317, "step": 33788 }, { "epoch": 1.8399460905433829, "grad_norm": 0.5978406466617064, "learning_rate": 3.3396621024616714e-06, "loss": 11.8848, "step": 33789 }, { "epoch": 1.8400005445399659, "grad_norm": 0.5054431701294958, "learning_rate": 3.337402561419689e-06, "loss": 11.8459, "step": 33790 }, { "epoch": 1.8400549985365489, "grad_norm": 0.5702263084054048, "learning_rate": 3.3351437720422818e-06, "loss": 11.7451, "step": 33791 }, { "epoch": 1.8401094525331319, "grad_norm": 0.6097616800921006, "learning_rate": 3.3328857343470023e-06, "loss": 11.9097, "step": 33792 }, { "epoch": 1.8401639065297148, "grad_norm": 0.5396892024496525, "learning_rate": 3.330628448351414e-06, "loss": 11.7652, "step": 33793 }, { "epoch": 1.8402183605262978, "grad_norm": 0.6901732418284905, "learning_rate": 3.328371914073081e-06, "loss": 11.855, "step": 33794 }, { "epoch": 1.8402728145228808, "grad_norm": 0.597373235272736, "learning_rate": 3.3261161315295218e-06, "loss": 11.8066, "step": 33795 }, { "epoch": 1.8403272685194638, "grad_norm": 0.6055857919287467, "learning_rate": 3.3238611007383124e-06, "loss": 11.7263, "step": 33796 }, { "epoch": 1.8403817225160468, "grad_norm": 0.5341443804936415, "learning_rate": 3.3216068217169606e-06, "loss": 11.8828, "step": 33797 }, { "epoch": 1.8404361765126298, "grad_norm": 0.5368310268770802, "learning_rate": 3.3193532944830185e-06, "loss": 11.8119, "step": 33798 }, { "epoch": 1.8404906305092128, "grad_norm": 0.5166187888250507, "learning_rate": 3.317100519053984e-06, "loss": 11.6499, "step": 33799 }, { "epoch": 1.8405450845057958, "grad_norm": 0.5043569772314318, "learning_rate": 3.314848495447387e-06, "loss": 11.7501, "step": 33800 }, { "epoch": 1.840599538502379, "grad_norm": 0.5723431816021484, "learning_rate": 3.312597223680758e-06, "loss": 11.8072, "step": 33801 }, { "epoch": 1.840653992498962, "grad_norm": 0.5232662325831565, "learning_rate": 3.310346703771583e-06, "loss": 11.7075, "step": 33802 }, { "epoch": 1.840708446495545, "grad_norm": 0.5849260386456648, "learning_rate": 3.3080969357373703e-06, "loss": 11.8087, "step": 33803 }, { "epoch": 1.840762900492128, "grad_norm": 0.5810891250532745, "learning_rate": 3.305847919595606e-06, "loss": 11.6649, "step": 33804 }, { "epoch": 1.840817354488711, "grad_norm": 0.5432501955593974, "learning_rate": 3.3035996553637762e-06, "loss": 11.7504, "step": 33805 }, { "epoch": 1.840871808485294, "grad_norm": 0.5981020426794402, "learning_rate": 3.3013521430593884e-06, "loss": 11.736, "step": 33806 }, { "epoch": 1.8409262624818772, "grad_norm": 0.540309161120785, "learning_rate": 3.2991053826998853e-06, "loss": 11.7747, "step": 33807 }, { "epoch": 1.8409807164784602, "grad_norm": 0.5358183372556647, "learning_rate": 3.2968593743027744e-06, "loss": 11.8613, "step": 33808 }, { "epoch": 1.8410351704750432, "grad_norm": 0.6148948096617408, "learning_rate": 3.294614117885486e-06, "loss": 11.7305, "step": 33809 }, { "epoch": 1.8410896244716262, "grad_norm": 0.5049933350510138, "learning_rate": 3.2923696134654957e-06, "loss": 11.7738, "step": 33810 }, { "epoch": 1.8411440784682092, "grad_norm": 0.5862344141498278, "learning_rate": 3.290125861060267e-06, "loss": 11.7892, "step": 33811 }, { "epoch": 1.8411985324647921, "grad_norm": 0.5401173404633814, "learning_rate": 3.287882860687219e-06, "loss": 11.766, "step": 33812 }, { "epoch": 1.8412529864613751, "grad_norm": 0.5384693365841544, "learning_rate": 3.2856406123638496e-06, "loss": 11.72, "step": 33813 }, { "epoch": 1.8413074404579581, "grad_norm": 0.6024943728394331, "learning_rate": 3.283399116107533e-06, "loss": 11.8491, "step": 33814 }, { "epoch": 1.8413618944545411, "grad_norm": 0.5547636411800385, "learning_rate": 3.281158371935711e-06, "loss": 11.8517, "step": 33815 }, { "epoch": 1.8414163484511241, "grad_norm": 0.5204289649108008, "learning_rate": 3.278918379865847e-06, "loss": 11.7569, "step": 33816 }, { "epoch": 1.8414708024477071, "grad_norm": 0.5472886969893096, "learning_rate": 3.2766791399153175e-06, "loss": 11.6995, "step": 33817 }, { "epoch": 1.84152525644429, "grad_norm": 0.5206538728662238, "learning_rate": 3.2744406521015627e-06, "loss": 11.7028, "step": 33818 }, { "epoch": 1.841579710440873, "grad_norm": 0.596221335886329, "learning_rate": 3.272202916441969e-06, "loss": 11.9266, "step": 33819 }, { "epoch": 1.841634164437456, "grad_norm": 0.5045741931754221, "learning_rate": 3.2699659329539557e-06, "loss": 11.7785, "step": 33820 }, { "epoch": 1.841688618434039, "grad_norm": 0.5872171316368902, "learning_rate": 3.267729701654898e-06, "loss": 11.7731, "step": 33821 }, { "epoch": 1.841743072430622, "grad_norm": 0.5898172680419017, "learning_rate": 3.265494222562193e-06, "loss": 11.9146, "step": 33822 }, { "epoch": 1.841797526427205, "grad_norm": 0.5707552746372736, "learning_rate": 3.2632594956932603e-06, "loss": 11.6587, "step": 33823 }, { "epoch": 1.8418519804237883, "grad_norm": 0.52788861099732, "learning_rate": 3.2610255210654082e-06, "loss": 11.7568, "step": 33824 }, { "epoch": 1.8419064344203713, "grad_norm": 0.5824919257319955, "learning_rate": 3.2587922986960674e-06, "loss": 11.8044, "step": 33825 }, { "epoch": 1.8419608884169543, "grad_norm": 0.5163323818054658, "learning_rate": 3.2565598286025566e-06, "loss": 11.7038, "step": 33826 }, { "epoch": 1.8420153424135373, "grad_norm": 0.5501110666957616, "learning_rate": 3.2543281108022736e-06, "loss": 11.6753, "step": 33827 }, { "epoch": 1.8420697964101203, "grad_norm": 0.5496418342032785, "learning_rate": 3.2520971453125715e-06, "loss": 11.7697, "step": 33828 }, { "epoch": 1.8421242504067032, "grad_norm": 0.5626608410252125, "learning_rate": 3.2498669321507692e-06, "loss": 11.8011, "step": 33829 }, { "epoch": 1.8421787044032865, "grad_norm": 0.5368191917332996, "learning_rate": 3.2476374713342304e-06, "loss": 11.8879, "step": 33830 }, { "epoch": 1.8422331583998695, "grad_norm": 0.5143398354378745, "learning_rate": 3.2454087628802863e-06, "loss": 11.7049, "step": 33831 }, { "epoch": 1.8422876123964524, "grad_norm": 0.4836503017569531, "learning_rate": 3.2431808068062786e-06, "loss": 11.7691, "step": 33832 }, { "epoch": 1.8423420663930354, "grad_norm": 0.542519706301802, "learning_rate": 3.240953603129515e-06, "loss": 11.8129, "step": 33833 }, { "epoch": 1.8423965203896184, "grad_norm": 0.5948890937753298, "learning_rate": 3.238727151867338e-06, "loss": 11.9013, "step": 33834 }, { "epoch": 1.8424509743862014, "grad_norm": 0.6205790799442409, "learning_rate": 3.236501453037033e-06, "loss": 11.7787, "step": 33835 }, { "epoch": 1.8425054283827844, "grad_norm": 0.5399811529053937, "learning_rate": 3.23427650665592e-06, "loss": 11.7706, "step": 33836 }, { "epoch": 1.8425598823793674, "grad_norm": 0.5837166679594895, "learning_rate": 3.232052312741296e-06, "loss": 11.8098, "step": 33837 }, { "epoch": 1.8426143363759504, "grad_norm": 0.5234130122805322, "learning_rate": 3.2298288713104695e-06, "loss": 11.7363, "step": 33838 }, { "epoch": 1.8426687903725334, "grad_norm": 0.5453969860580936, "learning_rate": 3.227606182380716e-06, "loss": 11.8542, "step": 33839 }, { "epoch": 1.8427232443691164, "grad_norm": 0.5675145898980608, "learning_rate": 3.2253842459693318e-06, "loss": 11.7664, "step": 33840 }, { "epoch": 1.8427776983656994, "grad_norm": 0.5845154079088531, "learning_rate": 3.2231630620935814e-06, "loss": 11.8287, "step": 33841 }, { "epoch": 1.8428321523622824, "grad_norm": 0.5210611882694561, "learning_rate": 3.2209426307707515e-06, "loss": 11.5919, "step": 33842 }, { "epoch": 1.8428866063588654, "grad_norm": 0.576083556110766, "learning_rate": 3.218722952018094e-06, "loss": 11.8641, "step": 33843 }, { "epoch": 1.8429410603554484, "grad_norm": 0.506464158582543, "learning_rate": 3.2165040258528844e-06, "loss": 11.8904, "step": 33844 }, { "epoch": 1.8429955143520313, "grad_norm": 0.5790154077641596, "learning_rate": 3.2142858522923757e-06, "loss": 11.6134, "step": 33845 }, { "epoch": 1.8430499683486143, "grad_norm": 0.5140118067808708, "learning_rate": 3.2120684313537985e-06, "loss": 11.9575, "step": 33846 }, { "epoch": 1.8431044223451973, "grad_norm": 0.5090120031802001, "learning_rate": 3.2098517630544168e-06, "loss": 11.7169, "step": 33847 }, { "epoch": 1.8431588763417805, "grad_norm": 0.5135841133725867, "learning_rate": 3.2076358474114498e-06, "loss": 11.6483, "step": 33848 }, { "epoch": 1.8432133303383635, "grad_norm": 0.5670370227024023, "learning_rate": 3.2054206844421397e-06, "loss": 11.8771, "step": 33849 }, { "epoch": 1.8432677843349465, "grad_norm": 0.5503407544068725, "learning_rate": 3.203206274163717e-06, "loss": 11.8097, "step": 33850 }, { "epoch": 1.8433222383315295, "grad_norm": 0.5307854885737221, "learning_rate": 3.20099261659339e-06, "loss": 11.7264, "step": 33851 }, { "epoch": 1.8433766923281125, "grad_norm": 0.49715861760956076, "learning_rate": 3.1987797117483786e-06, "loss": 11.7581, "step": 33852 }, { "epoch": 1.8434311463246955, "grad_norm": 0.5831303357984639, "learning_rate": 3.196567559645891e-06, "loss": 11.9091, "step": 33853 }, { "epoch": 1.8434856003212787, "grad_norm": 0.5464664626966996, "learning_rate": 3.194356160303136e-06, "loss": 11.7487, "step": 33854 }, { "epoch": 1.8435400543178617, "grad_norm": 0.5775957701878826, "learning_rate": 3.192145513737299e-06, "loss": 11.8228, "step": 33855 }, { "epoch": 1.8435945083144447, "grad_norm": 0.619429479039788, "learning_rate": 3.189935619965567e-06, "loss": 11.7778, "step": 33856 }, { "epoch": 1.8436489623110277, "grad_norm": 0.5248449823582793, "learning_rate": 3.1877264790051377e-06, "loss": 11.7079, "step": 33857 }, { "epoch": 1.8437034163076107, "grad_norm": 0.5542833636643758, "learning_rate": 3.185518090873174e-06, "loss": 11.8628, "step": 33858 }, { "epoch": 1.8437578703041937, "grad_norm": 0.5158652188160496, "learning_rate": 3.183310455586852e-06, "loss": 11.7808, "step": 33859 }, { "epoch": 1.8438123243007767, "grad_norm": 0.544956279425335, "learning_rate": 3.1811035731633576e-06, "loss": 11.8081, "step": 33860 }, { "epoch": 1.8438667782973597, "grad_norm": 0.6838284135909499, "learning_rate": 3.1788974436198328e-06, "loss": 11.8219, "step": 33861 }, { "epoch": 1.8439212322939427, "grad_norm": 0.5159639415411428, "learning_rate": 3.1766920669734414e-06, "loss": 11.7425, "step": 33862 }, { "epoch": 1.8439756862905257, "grad_norm": 0.5035792350524115, "learning_rate": 3.1744874432413253e-06, "loss": 11.7774, "step": 33863 }, { "epoch": 1.8440301402871087, "grad_norm": 0.5575584635097174, "learning_rate": 3.1722835724406374e-06, "loss": 11.8778, "step": 33864 }, { "epoch": 1.8440845942836916, "grad_norm": 0.521595897479169, "learning_rate": 3.1700804545885087e-06, "loss": 11.8565, "step": 33865 }, { "epoch": 1.8441390482802746, "grad_norm": 0.5325072597234949, "learning_rate": 3.16787808970207e-06, "loss": 11.6101, "step": 33866 }, { "epoch": 1.8441935022768576, "grad_norm": 0.526921449884404, "learning_rate": 3.1656764777984625e-06, "loss": 11.785, "step": 33867 }, { "epoch": 1.8442479562734406, "grad_norm": 0.5604604463304292, "learning_rate": 3.1634756188947736e-06, "loss": 11.8091, "step": 33868 }, { "epoch": 1.8443024102700236, "grad_norm": 0.5237033650127261, "learning_rate": 3.161275513008155e-06, "loss": 11.7208, "step": 33869 }, { "epoch": 1.8443568642666066, "grad_norm": 0.6297718900730161, "learning_rate": 3.159076160155683e-06, "loss": 11.7882, "step": 33870 }, { "epoch": 1.8444113182631898, "grad_norm": 0.5738234071997348, "learning_rate": 3.1568775603544766e-06, "loss": 11.8193, "step": 33871 }, { "epoch": 1.8444657722597728, "grad_norm": 0.5641880449837706, "learning_rate": 3.1546797136216443e-06, "loss": 11.8338, "step": 33872 }, { "epoch": 1.8445202262563558, "grad_norm": 0.5413760584223677, "learning_rate": 3.1524826199742506e-06, "loss": 11.6959, "step": 33873 }, { "epoch": 1.8445746802529388, "grad_norm": 0.5616691097512236, "learning_rate": 3.1502862794294152e-06, "loss": 11.6755, "step": 33874 }, { "epoch": 1.8446291342495218, "grad_norm": 0.6246207200975681, "learning_rate": 3.1480906920041798e-06, "loss": 11.8428, "step": 33875 }, { "epoch": 1.8446835882461048, "grad_norm": 0.5401450050853716, "learning_rate": 3.1458958577156195e-06, "loss": 11.6716, "step": 33876 }, { "epoch": 1.844738042242688, "grad_norm": 0.5416520671529061, "learning_rate": 3.143701776580832e-06, "loss": 11.7409, "step": 33877 }, { "epoch": 1.844792496239271, "grad_norm": 0.49940488635913727, "learning_rate": 3.141508448616859e-06, "loss": 11.6558, "step": 33878 }, { "epoch": 1.844846950235854, "grad_norm": 0.5851468047936453, "learning_rate": 3.1393158738407645e-06, "loss": 11.7389, "step": 33879 }, { "epoch": 1.844901404232437, "grad_norm": 0.5432004950819189, "learning_rate": 3.13712405226958e-06, "loss": 11.9062, "step": 33880 }, { "epoch": 1.84495585822902, "grad_norm": 0.5686325387673025, "learning_rate": 3.1349329839203802e-06, "loss": 11.8332, "step": 33881 }, { "epoch": 1.845010312225603, "grad_norm": 0.5388373122055787, "learning_rate": 3.1327426688101733e-06, "loss": 11.6179, "step": 33882 }, { "epoch": 1.845064766222186, "grad_norm": 0.519201245880715, "learning_rate": 3.1305531069560025e-06, "loss": 11.8168, "step": 33883 }, { "epoch": 1.845119220218769, "grad_norm": 0.566674166251352, "learning_rate": 3.1283642983749085e-06, "loss": 11.7421, "step": 33884 }, { "epoch": 1.845173674215352, "grad_norm": 0.5437773880958092, "learning_rate": 3.1261762430838894e-06, "loss": 11.8667, "step": 33885 }, { "epoch": 1.845228128211935, "grad_norm": 0.5301400094827246, "learning_rate": 3.1239889410999644e-06, "loss": 11.7364, "step": 33886 }, { "epoch": 1.845282582208518, "grad_norm": 0.5028840908671239, "learning_rate": 3.121802392440165e-06, "loss": 11.8648, "step": 33887 }, { "epoch": 1.845337036205101, "grad_norm": 0.5392734574427442, "learning_rate": 3.1196165971214553e-06, "loss": 11.8577, "step": 33888 }, { "epoch": 1.845391490201684, "grad_norm": 0.528793753122846, "learning_rate": 3.1174315551608768e-06, "loss": 11.8459, "step": 33889 }, { "epoch": 1.845445944198267, "grad_norm": 0.5099363291544728, "learning_rate": 3.1152472665753717e-06, "loss": 11.8106, "step": 33890 }, { "epoch": 1.84550039819485, "grad_norm": 0.5899545497919645, "learning_rate": 3.113063731381971e-06, "loss": 11.7961, "step": 33891 }, { "epoch": 1.845554852191433, "grad_norm": 0.5196418330767255, "learning_rate": 3.1108809495976275e-06, "loss": 11.6222, "step": 33892 }, { "epoch": 1.8456093061880159, "grad_norm": 0.5522045974661178, "learning_rate": 3.108698921239317e-06, "loss": 11.6702, "step": 33893 }, { "epoch": 1.845663760184599, "grad_norm": 0.5347161207556804, "learning_rate": 3.1065176463240364e-06, "loss": 11.7245, "step": 33894 }, { "epoch": 1.845718214181182, "grad_norm": 0.5985366129011804, "learning_rate": 3.104337124868706e-06, "loss": 11.9679, "step": 33895 }, { "epoch": 1.845772668177765, "grad_norm": 0.5567806245290867, "learning_rate": 3.1021573568903007e-06, "loss": 11.8281, "step": 33896 }, { "epoch": 1.845827122174348, "grad_norm": 0.4984262442091683, "learning_rate": 3.0999783424057626e-06, "loss": 11.6549, "step": 33897 }, { "epoch": 1.845881576170931, "grad_norm": 0.5305676714975427, "learning_rate": 3.0978000814320452e-06, "loss": 11.8301, "step": 33898 }, { "epoch": 1.845936030167514, "grad_norm": 0.5835378027254757, "learning_rate": 3.09562257398609e-06, "loss": 11.9314, "step": 33899 }, { "epoch": 1.8459904841640973, "grad_norm": 0.538974559621088, "learning_rate": 3.093445820084817e-06, "loss": 11.8596, "step": 33900 }, { "epoch": 1.8460449381606803, "grad_norm": 0.6174274770695499, "learning_rate": 3.0912698197451796e-06, "loss": 11.8054, "step": 33901 }, { "epoch": 1.8460993921572633, "grad_norm": 0.4943718810476187, "learning_rate": 3.0890945729840524e-06, "loss": 11.7585, "step": 33902 }, { "epoch": 1.8461538461538463, "grad_norm": 0.5133081439897508, "learning_rate": 3.0869200798184004e-06, "loss": 11.8115, "step": 33903 }, { "epoch": 1.8462083001504292, "grad_norm": 0.57591531044245, "learning_rate": 3.08474634026511e-06, "loss": 11.8596, "step": 33904 }, { "epoch": 1.8462627541470122, "grad_norm": 0.5128937353643369, "learning_rate": 3.082573354341067e-06, "loss": 11.854, "step": 33905 }, { "epoch": 1.8463172081435952, "grad_norm": 0.5434425857331038, "learning_rate": 3.0804011220632033e-06, "loss": 11.7711, "step": 33906 }, { "epoch": 1.8463716621401782, "grad_norm": 0.6067532377719885, "learning_rate": 3.0782296434483825e-06, "loss": 11.7875, "step": 33907 }, { "epoch": 1.8464261161367612, "grad_norm": 0.5375928356579653, "learning_rate": 3.0760589185135026e-06, "loss": 11.8202, "step": 33908 }, { "epoch": 1.8464805701333442, "grad_norm": 0.5511883926322493, "learning_rate": 3.0738889472754493e-06, "loss": 11.8206, "step": 33909 }, { "epoch": 1.8465350241299272, "grad_norm": 0.5489226508382511, "learning_rate": 3.071719729751077e-06, "loss": 11.8662, "step": 33910 }, { "epoch": 1.8465894781265102, "grad_norm": 0.48990277090348355, "learning_rate": 3.0695512659572823e-06, "loss": 11.7124, "step": 33911 }, { "epoch": 1.8466439321230932, "grad_norm": 0.5500921390007175, "learning_rate": 3.0673835559109075e-06, "loss": 11.6711, "step": 33912 }, { "epoch": 1.8466983861196762, "grad_norm": 0.53862484676813, "learning_rate": 3.0652165996288174e-06, "loss": 11.8484, "step": 33913 }, { "epoch": 1.8467528401162592, "grad_norm": 0.5906898883728187, "learning_rate": 3.0630503971278646e-06, "loss": 11.8781, "step": 33914 }, { "epoch": 1.8468072941128422, "grad_norm": 0.6049249207166347, "learning_rate": 3.060884948424869e-06, "loss": 11.8281, "step": 33915 }, { "epoch": 1.8468617481094252, "grad_norm": 0.5412072270855967, "learning_rate": 3.0587202535367065e-06, "loss": 11.5532, "step": 33916 }, { "epoch": 1.8469162021060082, "grad_norm": 0.512322497233974, "learning_rate": 3.0565563124801745e-06, "loss": 11.7961, "step": 33917 }, { "epoch": 1.8469706561025914, "grad_norm": 0.5129580644807363, "learning_rate": 3.054393125272137e-06, "loss": 11.6918, "step": 33918 }, { "epoch": 1.8470251100991744, "grad_norm": 0.5634917997525085, "learning_rate": 3.052230691929381e-06, "loss": 11.814, "step": 33919 }, { "epoch": 1.8470795640957574, "grad_norm": 0.5260412748198389, "learning_rate": 3.050069012468737e-06, "loss": 11.7143, "step": 33920 }, { "epoch": 1.8471340180923403, "grad_norm": 0.5614031038340227, "learning_rate": 3.0479080869070253e-06, "loss": 11.8278, "step": 33921 }, { "epoch": 1.8471884720889233, "grad_norm": 0.5044524238585407, "learning_rate": 3.0457479152610324e-06, "loss": 11.7716, "step": 33922 }, { "epoch": 1.8472429260855066, "grad_norm": 0.4937868742172702, "learning_rate": 3.043588497547567e-06, "loss": 11.7005, "step": 33923 }, { "epoch": 1.8472973800820895, "grad_norm": 0.5206714157371767, "learning_rate": 3.041429833783427e-06, "loss": 11.7411, "step": 33924 }, { "epoch": 1.8473518340786725, "grad_norm": 0.5995436217804876, "learning_rate": 3.039271923985365e-06, "loss": 11.7636, "step": 33925 }, { "epoch": 1.8474062880752555, "grad_norm": 0.6395912019597665, "learning_rate": 3.037114768170202e-06, "loss": 11.7916, "step": 33926 }, { "epoch": 1.8474607420718385, "grad_norm": 0.5854515514875622, "learning_rate": 3.03495836635469e-06, "loss": 11.8173, "step": 33927 }, { "epoch": 1.8475151960684215, "grad_norm": 0.5693612498369074, "learning_rate": 3.0328027185556052e-06, "loss": 11.802, "step": 33928 }, { "epoch": 1.8475696500650045, "grad_norm": 0.5205507286134287, "learning_rate": 3.0306478247897008e-06, "loss": 11.7203, "step": 33929 }, { "epoch": 1.8476241040615875, "grad_norm": 0.5217218520518706, "learning_rate": 3.028493685073752e-06, "loss": 11.8448, "step": 33930 }, { "epoch": 1.8476785580581705, "grad_norm": 0.5338324715648306, "learning_rate": 3.0263402994244794e-06, "loss": 11.682, "step": 33931 }, { "epoch": 1.8477330120547535, "grad_norm": 0.5709604337486814, "learning_rate": 3.0241876678586577e-06, "loss": 11.8978, "step": 33932 }, { "epoch": 1.8477874660513365, "grad_norm": 0.5203366203077877, "learning_rate": 3.0220357903930297e-06, "loss": 11.8205, "step": 33933 }, { "epoch": 1.8478419200479195, "grad_norm": 0.5372597897618722, "learning_rate": 3.0198846670443037e-06, "loss": 11.8024, "step": 33934 }, { "epoch": 1.8478963740445025, "grad_norm": 0.543726202110921, "learning_rate": 3.017734297829211e-06, "loss": 11.7345, "step": 33935 }, { "epoch": 1.8479508280410855, "grad_norm": 0.5597512685153038, "learning_rate": 3.015584682764494e-06, "loss": 11.8128, "step": 33936 }, { "epoch": 1.8480052820376685, "grad_norm": 0.6448671965687806, "learning_rate": 3.013435821866839e-06, "loss": 11.6734, "step": 33937 }, { "epoch": 1.8480597360342514, "grad_norm": 0.5558892060875587, "learning_rate": 3.0112877151529884e-06, "loss": 11.7819, "step": 33938 }, { "epoch": 1.8481141900308344, "grad_norm": 0.5321331313157706, "learning_rate": 3.0091403626396177e-06, "loss": 11.7569, "step": 33939 }, { "epoch": 1.8481686440274174, "grad_norm": 0.6038253896564069, "learning_rate": 3.0069937643434354e-06, "loss": 11.7305, "step": 33940 }, { "epoch": 1.8482230980240006, "grad_norm": 0.5640110249845319, "learning_rate": 3.0048479202811398e-06, "loss": 11.7231, "step": 33941 }, { "epoch": 1.8482775520205836, "grad_norm": 0.5658637186911623, "learning_rate": 3.002702830469406e-06, "loss": 11.7794, "step": 33942 }, { "epoch": 1.8483320060171666, "grad_norm": 0.6092022829773311, "learning_rate": 3.000558494924932e-06, "loss": 11.894, "step": 33943 }, { "epoch": 1.8483864600137496, "grad_norm": 0.5247351580081084, "learning_rate": 2.9984149136643823e-06, "loss": 11.7366, "step": 33944 }, { "epoch": 1.8484409140103326, "grad_norm": 0.5024333038604187, "learning_rate": 2.9962720867044326e-06, "loss": 11.7122, "step": 33945 }, { "epoch": 1.8484953680069156, "grad_norm": 0.536140477341649, "learning_rate": 2.9941300140617246e-06, "loss": 11.7737, "step": 33946 }, { "epoch": 1.8485498220034988, "grad_norm": 0.5158297429179122, "learning_rate": 2.991988695752923e-06, "loss": 11.6573, "step": 33947 }, { "epoch": 1.8486042760000818, "grad_norm": 0.48013086023660484, "learning_rate": 2.9898481317947036e-06, "loss": 11.7769, "step": 33948 }, { "epoch": 1.8486587299966648, "grad_norm": 0.569550321927307, "learning_rate": 2.987708322203675e-06, "loss": 11.7834, "step": 33949 }, { "epoch": 1.8487131839932478, "grad_norm": 0.5193882620624866, "learning_rate": 2.985569266996513e-06, "loss": 11.8766, "step": 33950 }, { "epoch": 1.8487676379898308, "grad_norm": 0.5274351535341895, "learning_rate": 2.9834309661898264e-06, "loss": 11.6403, "step": 33951 }, { "epoch": 1.8488220919864138, "grad_norm": 0.5220389440061838, "learning_rate": 2.9812934198002463e-06, "loss": 11.7715, "step": 33952 }, { "epoch": 1.8488765459829968, "grad_norm": 0.5481192506398179, "learning_rate": 2.979156627844404e-06, "loss": 11.7027, "step": 33953 }, { "epoch": 1.8489309999795798, "grad_norm": 0.5675706386525854, "learning_rate": 2.977020590338908e-06, "loss": 11.8059, "step": 33954 }, { "epoch": 1.8489854539761628, "grad_norm": 0.5724062075791675, "learning_rate": 2.974885307300379e-06, "loss": 11.7369, "step": 33955 }, { "epoch": 1.8490399079727458, "grad_norm": 0.6104988207407752, "learning_rate": 2.9727507787454035e-06, "loss": 11.8075, "step": 33956 }, { "epoch": 1.8490943619693287, "grad_norm": 0.5096959022771089, "learning_rate": 2.970617004690579e-06, "loss": 11.7302, "step": 33957 }, { "epoch": 1.8491488159659117, "grad_norm": 0.5879912890201504, "learning_rate": 2.968483985152526e-06, "loss": 11.8342, "step": 33958 }, { "epoch": 1.8492032699624947, "grad_norm": 0.5704977956925356, "learning_rate": 2.9663517201478084e-06, "loss": 11.8169, "step": 33959 }, { "epoch": 1.8492577239590777, "grad_norm": 0.5562458096238031, "learning_rate": 2.964220209693014e-06, "loss": 11.6883, "step": 33960 }, { "epoch": 1.8493121779556607, "grad_norm": 0.5260987530055947, "learning_rate": 2.9620894538047175e-06, "loss": 11.6639, "step": 33961 }, { "epoch": 1.8493666319522437, "grad_norm": 0.538078585004661, "learning_rate": 2.9599594524994834e-06, "loss": 11.8035, "step": 33962 }, { "epoch": 1.8494210859488267, "grad_norm": 0.5188453130396761, "learning_rate": 2.9578302057938766e-06, "loss": 11.5551, "step": 33963 }, { "epoch": 1.84947553994541, "grad_norm": 0.5379328298430154, "learning_rate": 2.9557017137044617e-06, "loss": 11.7338, "step": 33964 }, { "epoch": 1.849529993941993, "grad_norm": 0.5754936847753943, "learning_rate": 2.953573976247781e-06, "loss": 11.719, "step": 33965 }, { "epoch": 1.849584447938576, "grad_norm": 0.5249416751920055, "learning_rate": 2.9514469934403876e-06, "loss": 11.6886, "step": 33966 }, { "epoch": 1.849638901935159, "grad_norm": 0.5380695841891844, "learning_rate": 2.949320765298813e-06, "loss": 11.7186, "step": 33967 }, { "epoch": 1.849693355931742, "grad_norm": 0.5157242662529193, "learning_rate": 2.9471952918395883e-06, "loss": 11.6475, "step": 33968 }, { "epoch": 1.8497478099283249, "grad_norm": 0.599758824239076, "learning_rate": 2.945070573079256e-06, "loss": 11.8665, "step": 33969 }, { "epoch": 1.849802263924908, "grad_norm": 0.54743572491124, "learning_rate": 2.942946609034336e-06, "loss": 11.7804, "step": 33970 }, { "epoch": 1.849856717921491, "grad_norm": 0.5556212574903687, "learning_rate": 2.940823399721326e-06, "loss": 11.7047, "step": 33971 }, { "epoch": 1.849911171918074, "grad_norm": 0.5043351902681977, "learning_rate": 2.938700945156769e-06, "loss": 11.8355, "step": 33972 }, { "epoch": 1.849965625914657, "grad_norm": 0.5355679036669441, "learning_rate": 2.9365792453571404e-06, "loss": 11.8139, "step": 33973 }, { "epoch": 1.85002007991124, "grad_norm": 0.5586999119186564, "learning_rate": 2.934458300338949e-06, "loss": 11.7065, "step": 33974 }, { "epoch": 1.850074533907823, "grad_norm": 0.5536597163442772, "learning_rate": 2.9323381101186933e-06, "loss": 11.7894, "step": 33975 }, { "epoch": 1.850128987904406, "grad_norm": 0.5836834594475118, "learning_rate": 2.9302186747128478e-06, "loss": 11.8819, "step": 33976 }, { "epoch": 1.850183441900989, "grad_norm": 0.5914927375099355, "learning_rate": 2.9280999941379005e-06, "loss": 11.6999, "step": 33977 }, { "epoch": 1.850237895897572, "grad_norm": 0.4948145150744575, "learning_rate": 2.9259820684103267e-06, "loss": 11.7372, "step": 33978 }, { "epoch": 1.850292349894155, "grad_norm": 0.5262052599923139, "learning_rate": 2.923864897546602e-06, "loss": 11.7061, "step": 33979 }, { "epoch": 1.850346803890738, "grad_norm": 0.6282740310847362, "learning_rate": 2.9217484815631803e-06, "loss": 11.8636, "step": 33980 }, { "epoch": 1.850401257887321, "grad_norm": 0.508782728275568, "learning_rate": 2.9196328204765145e-06, "loss": 11.8079, "step": 33981 }, { "epoch": 1.850455711883904, "grad_norm": 0.5279491061551861, "learning_rate": 2.9175179143030697e-06, "loss": 11.7281, "step": 33982 }, { "epoch": 1.850510165880487, "grad_norm": 0.49618291423348704, "learning_rate": 2.915403763059288e-06, "loss": 11.7713, "step": 33983 }, { "epoch": 1.85056461987707, "grad_norm": 0.5385394130196661, "learning_rate": 2.913290366761612e-06, "loss": 11.8213, "step": 33984 }, { "epoch": 1.850619073873653, "grad_norm": 0.5479233683614301, "learning_rate": 2.9111777254264728e-06, "loss": 11.7341, "step": 33985 }, { "epoch": 1.850673527870236, "grad_norm": 0.49936015882159296, "learning_rate": 2.9090658390702907e-06, "loss": 11.7457, "step": 33986 }, { "epoch": 1.850727981866819, "grad_norm": 0.5808296453329583, "learning_rate": 2.9069547077094972e-06, "loss": 11.773, "step": 33987 }, { "epoch": 1.8507824358634022, "grad_norm": 0.5293009174682723, "learning_rate": 2.904844331360501e-06, "loss": 11.8333, "step": 33988 }, { "epoch": 1.8508368898599852, "grad_norm": 0.5841705642277881, "learning_rate": 2.902734710039723e-06, "loss": 11.8187, "step": 33989 }, { "epoch": 1.8508913438565682, "grad_norm": 0.5649771813488085, "learning_rate": 2.9006258437635605e-06, "loss": 11.7901, "step": 33990 }, { "epoch": 1.8509457978531512, "grad_norm": 0.5302723487249812, "learning_rate": 2.8985177325484113e-06, "loss": 11.7943, "step": 33991 }, { "epoch": 1.8510002518497342, "grad_norm": 0.5866540247940978, "learning_rate": 2.8964103764106855e-06, "loss": 11.8323, "step": 33992 }, { "epoch": 1.8510547058463174, "grad_norm": 0.5551975874233183, "learning_rate": 2.894303775366736e-06, "loss": 11.9083, "step": 33993 }, { "epoch": 1.8511091598429004, "grad_norm": 0.5270400714667117, "learning_rate": 2.8921979294329825e-06, "loss": 11.8537, "step": 33994 }, { "epoch": 1.8511636138394834, "grad_norm": 0.5135052772250565, "learning_rate": 2.8900928386257906e-06, "loss": 11.71, "step": 33995 }, { "epoch": 1.8512180678360663, "grad_norm": 0.5771000790059908, "learning_rate": 2.887988502961503e-06, "loss": 11.7572, "step": 33996 }, { "epoch": 1.8512725218326493, "grad_norm": 0.5483909976668098, "learning_rate": 2.8858849224565164e-06, "loss": 11.8167, "step": 33997 }, { "epoch": 1.8513269758292323, "grad_norm": 0.5226034055040285, "learning_rate": 2.8837820971271634e-06, "loss": 11.7273, "step": 33998 }, { "epoch": 1.8513814298258153, "grad_norm": 0.541963955985011, "learning_rate": 2.881680026989808e-06, "loss": 11.7224, "step": 33999 }, { "epoch": 1.8514358838223983, "grad_norm": 0.5181706792953558, "learning_rate": 2.879578712060793e-06, "loss": 11.8442, "step": 34000 }, { "epoch": 1.8514903378189813, "grad_norm": 0.5399078815227443, "learning_rate": 2.877478152356472e-06, "loss": 11.7581, "step": 34001 }, { "epoch": 1.8515447918155643, "grad_norm": 0.5359867665277845, "learning_rate": 2.8753783478931653e-06, "loss": 11.7962, "step": 34002 }, { "epoch": 1.8515992458121473, "grad_norm": 0.5435760006232293, "learning_rate": 2.8732792986871925e-06, "loss": 11.7628, "step": 34003 }, { "epoch": 1.8516536998087303, "grad_norm": 0.555593989298099, "learning_rate": 2.8711810047549082e-06, "loss": 11.8577, "step": 34004 }, { "epoch": 1.8517081538053133, "grad_norm": 0.5285585549972435, "learning_rate": 2.8690834661125988e-06, "loss": 11.7192, "step": 34005 }, { "epoch": 1.8517626078018963, "grad_norm": 0.5503586250886979, "learning_rate": 2.8669866827765844e-06, "loss": 11.6285, "step": 34006 }, { "epoch": 1.8518170617984793, "grad_norm": 0.5363522402626031, "learning_rate": 2.864890654763175e-06, "loss": 11.7311, "step": 34007 }, { "epoch": 1.8518715157950623, "grad_norm": 0.5241239077671972, "learning_rate": 2.8627953820886567e-06, "loss": 11.5543, "step": 34008 }, { "epoch": 1.8519259697916453, "grad_norm": 0.5527898725009992, "learning_rate": 2.860700864769339e-06, "loss": 11.9241, "step": 34009 }, { "epoch": 1.8519804237882282, "grad_norm": 0.5655947024463986, "learning_rate": 2.8586071028214976e-06, "loss": 11.8699, "step": 34010 }, { "epoch": 1.8520348777848115, "grad_norm": 0.5864008975548566, "learning_rate": 2.85651409626142e-06, "loss": 11.8148, "step": 34011 }, { "epoch": 1.8520893317813945, "grad_norm": 0.5272738235541787, "learning_rate": 2.8544218451053816e-06, "loss": 11.7192, "step": 34012 }, { "epoch": 1.8521437857779774, "grad_norm": 0.545272321828681, "learning_rate": 2.8523303493696364e-06, "loss": 11.8962, "step": 34013 }, { "epoch": 1.8521982397745604, "grad_norm": 0.5912552431684458, "learning_rate": 2.850239609070482e-06, "loss": 11.7549, "step": 34014 }, { "epoch": 1.8522526937711434, "grad_norm": 0.4986214073980574, "learning_rate": 2.8481496242241502e-06, "loss": 11.8233, "step": 34015 }, { "epoch": 1.8523071477677264, "grad_norm": 0.5315990421146979, "learning_rate": 2.8460603948469057e-06, "loss": 11.805, "step": 34016 }, { "epoch": 1.8523616017643096, "grad_norm": 0.5277944879919373, "learning_rate": 2.8439719209549687e-06, "loss": 11.8399, "step": 34017 }, { "epoch": 1.8524160557608926, "grad_norm": 0.5307441067634918, "learning_rate": 2.841884202564604e-06, "loss": 11.8005, "step": 34018 }, { "epoch": 1.8524705097574756, "grad_norm": 0.5361733699957829, "learning_rate": 2.839797239692055e-06, "loss": 11.7477, "step": 34019 }, { "epoch": 1.8525249637540586, "grad_norm": 0.50796473172156, "learning_rate": 2.8377110323535293e-06, "loss": 11.3824, "step": 34020 }, { "epoch": 1.8525794177506416, "grad_norm": 0.5777575613399936, "learning_rate": 2.83562558056526e-06, "loss": 11.7881, "step": 34021 }, { "epoch": 1.8526338717472246, "grad_norm": 0.5001863028163757, "learning_rate": 2.833540884343455e-06, "loss": 11.6736, "step": 34022 }, { "epoch": 1.8526883257438076, "grad_norm": 0.5182572141374138, "learning_rate": 2.831456943704336e-06, "loss": 11.8024, "step": 34023 }, { "epoch": 1.8527427797403906, "grad_norm": 0.6047831763564729, "learning_rate": 2.829373758664089e-06, "loss": 11.8276, "step": 34024 }, { "epoch": 1.8527972337369736, "grad_norm": 0.5913716437327828, "learning_rate": 2.8272913292389457e-06, "loss": 11.8893, "step": 34025 }, { "epoch": 1.8528516877335566, "grad_norm": 0.5208796587409614, "learning_rate": 2.8252096554450824e-06, "loss": 11.7287, "step": 34026 }, { "epoch": 1.8529061417301396, "grad_norm": 0.535002590497656, "learning_rate": 2.8231287372986635e-06, "loss": 11.6704, "step": 34027 }, { "epoch": 1.8529605957267226, "grad_norm": 0.5129719394648435, "learning_rate": 2.82104857481591e-06, "loss": 11.6365, "step": 34028 }, { "epoch": 1.8530150497233056, "grad_norm": 0.5661020644053443, "learning_rate": 2.818969168012975e-06, "loss": 11.6811, "step": 34029 }, { "epoch": 1.8530695037198885, "grad_norm": 0.5854461566190753, "learning_rate": 2.8168905169060233e-06, "loss": 11.6504, "step": 34030 }, { "epoch": 1.8531239577164715, "grad_norm": 0.5553709904444278, "learning_rate": 2.8148126215112425e-06, "loss": 11.7144, "step": 34031 }, { "epoch": 1.8531784117130545, "grad_norm": 0.5810422576221066, "learning_rate": 2.812735481844764e-06, "loss": 11.8553, "step": 34032 }, { "epoch": 1.8532328657096375, "grad_norm": 0.5812792611574698, "learning_rate": 2.810659097922763e-06, "loss": 11.8353, "step": 34033 }, { "epoch": 1.8532873197062207, "grad_norm": 0.5702078686054134, "learning_rate": 2.8085834697613722e-06, "loss": 11.7031, "step": 34034 }, { "epoch": 1.8533417737028037, "grad_norm": 0.5366748486364615, "learning_rate": 2.8065085973767445e-06, "loss": 11.8238, "step": 34035 }, { "epoch": 1.8533962276993867, "grad_norm": 0.5374314304561377, "learning_rate": 2.804434480785001e-06, "loss": 11.8128, "step": 34036 }, { "epoch": 1.8534506816959697, "grad_norm": 0.5532755574199162, "learning_rate": 2.8023611200022616e-06, "loss": 11.6945, "step": 34037 }, { "epoch": 1.8535051356925527, "grad_norm": 0.5401358258063391, "learning_rate": 2.80028851504468e-06, "loss": 11.8425, "step": 34038 }, { "epoch": 1.8535595896891357, "grad_norm": 0.5763647670588261, "learning_rate": 2.798216665928333e-06, "loss": 11.7155, "step": 34039 }, { "epoch": 1.853614043685719, "grad_norm": 0.5766449647815882, "learning_rate": 2.796145572669362e-06, "loss": 11.7909, "step": 34040 }, { "epoch": 1.853668497682302, "grad_norm": 0.5725141589151067, "learning_rate": 2.7940752352838773e-06, "loss": 11.8821, "step": 34041 }, { "epoch": 1.853722951678885, "grad_norm": 0.6125597265303008, "learning_rate": 2.7920056537879547e-06, "loss": 11.8031, "step": 34042 }, { "epoch": 1.853777405675468, "grad_norm": 0.5361428075674681, "learning_rate": 2.7899368281977034e-06, "loss": 11.7827, "step": 34043 }, { "epoch": 1.8538318596720509, "grad_norm": 0.5198248211482072, "learning_rate": 2.7878687585291995e-06, "loss": 11.7022, "step": 34044 }, { "epoch": 1.8538863136686339, "grad_norm": 0.5079659696727752, "learning_rate": 2.7858014447985414e-06, "loss": 11.6905, "step": 34045 }, { "epoch": 1.8539407676652169, "grad_norm": 0.5385895893421295, "learning_rate": 2.783734887021783e-06, "loss": 11.7728, "step": 34046 }, { "epoch": 1.8539952216617999, "grad_norm": 0.5560019379130211, "learning_rate": 2.781669085215011e-06, "loss": 11.7799, "step": 34047 }, { "epoch": 1.8540496756583829, "grad_norm": 0.5357157989539586, "learning_rate": 2.779604039394279e-06, "loss": 11.7575, "step": 34048 }, { "epoch": 1.8541041296549658, "grad_norm": 0.5371506830369353, "learning_rate": 2.7775397495756527e-06, "loss": 11.7611, "step": 34049 }, { "epoch": 1.8541585836515488, "grad_norm": 0.5161986694474797, "learning_rate": 2.7754762157751857e-06, "loss": 11.6191, "step": 34050 }, { "epoch": 1.8542130376481318, "grad_norm": 0.5605193402336714, "learning_rate": 2.7734134380089093e-06, "loss": 11.8143, "step": 34051 }, { "epoch": 1.8542674916447148, "grad_norm": 0.564727523872469, "learning_rate": 2.771351416292878e-06, "loss": 11.8044, "step": 34052 }, { "epoch": 1.8543219456412978, "grad_norm": 0.6114687241225171, "learning_rate": 2.7692901506431334e-06, "loss": 11.8897, "step": 34053 }, { "epoch": 1.8543763996378808, "grad_norm": 0.5843686735740672, "learning_rate": 2.767229641075675e-06, "loss": 11.6551, "step": 34054 }, { "epoch": 1.8544308536344638, "grad_norm": 0.5647573311623332, "learning_rate": 2.765169887606567e-06, "loss": 11.725, "step": 34055 }, { "epoch": 1.8544853076310468, "grad_norm": 0.5802162322802504, "learning_rate": 2.7631108902517964e-06, "loss": 11.7947, "step": 34056 }, { "epoch": 1.85453976162763, "grad_norm": 0.652859205557321, "learning_rate": 2.7610526490273738e-06, "loss": 11.7925, "step": 34057 }, { "epoch": 1.854594215624213, "grad_norm": 0.5545669111004263, "learning_rate": 2.7589951639493296e-06, "loss": 11.7274, "step": 34058 }, { "epoch": 1.854648669620796, "grad_norm": 0.5583813046794736, "learning_rate": 2.7569384350336293e-06, "loss": 11.7297, "step": 34059 }, { "epoch": 1.854703123617379, "grad_norm": 0.5484538012243809, "learning_rate": 2.754882462296293e-06, "loss": 11.6868, "step": 34060 }, { "epoch": 1.854757577613962, "grad_norm": 0.532367958735592, "learning_rate": 2.7528272457532865e-06, "loss": 11.7704, "step": 34061 }, { "epoch": 1.854812031610545, "grad_norm": 0.47254148780927535, "learning_rate": 2.7507727854206076e-06, "loss": 11.7237, "step": 34062 }, { "epoch": 1.8548664856071282, "grad_norm": 0.558732675017002, "learning_rate": 2.748719081314244e-06, "loss": 11.7849, "step": 34063 }, { "epoch": 1.8549209396037112, "grad_norm": 0.5876116258967456, "learning_rate": 2.7466661334501266e-06, "loss": 11.9338, "step": 34064 }, { "epoch": 1.8549753936002942, "grad_norm": 0.5205508541048451, "learning_rate": 2.7446139418442763e-06, "loss": 11.8928, "step": 34065 }, { "epoch": 1.8550298475968772, "grad_norm": 0.5501540084999282, "learning_rate": 2.7425625065125917e-06, "loss": 11.7058, "step": 34066 }, { "epoch": 1.8550843015934602, "grad_norm": 0.5275910836032256, "learning_rate": 2.7405118274710482e-06, "loss": 11.9045, "step": 34067 }, { "epoch": 1.8551387555900432, "grad_norm": 0.515798708229121, "learning_rate": 2.738461904735612e-06, "loss": 11.6111, "step": 34068 }, { "epoch": 1.8551932095866261, "grad_norm": 0.5428290845476124, "learning_rate": 2.7364127383221914e-06, "loss": 11.8164, "step": 34069 }, { "epoch": 1.8552476635832091, "grad_norm": 0.6048802628482521, "learning_rate": 2.7343643282467413e-06, "loss": 11.7328, "step": 34070 }, { "epoch": 1.8553021175797921, "grad_norm": 0.5607030518340594, "learning_rate": 2.7323166745251814e-06, "loss": 11.7448, "step": 34071 }, { "epoch": 1.8553565715763751, "grad_norm": 0.4982570699389137, "learning_rate": 2.7302697771734553e-06, "loss": 11.7343, "step": 34072 }, { "epoch": 1.8554110255729581, "grad_norm": 0.4921227757229111, "learning_rate": 2.72822363620745e-06, "loss": 11.6926, "step": 34073 }, { "epoch": 1.855465479569541, "grad_norm": 0.5654418146983228, "learning_rate": 2.7261782516430854e-06, "loss": 11.5966, "step": 34074 }, { "epoch": 1.855519933566124, "grad_norm": 0.7153251081472566, "learning_rate": 2.7241336234962944e-06, "loss": 11.7875, "step": 34075 }, { "epoch": 1.855574387562707, "grad_norm": 0.49568571008554196, "learning_rate": 2.7220897517829303e-06, "loss": 11.7951, "step": 34076 }, { "epoch": 1.85562884155929, "grad_norm": 0.5459610465500246, "learning_rate": 2.720046636518925e-06, "loss": 11.7583, "step": 34077 }, { "epoch": 1.855683295555873, "grad_norm": 0.52765938688209, "learning_rate": 2.718004277720143e-06, "loss": 11.7692, "step": 34078 }, { "epoch": 1.855737749552456, "grad_norm": 0.5497646074992447, "learning_rate": 2.7159626754024615e-06, "loss": 11.7232, "step": 34079 }, { "epoch": 1.855792203549039, "grad_norm": 0.510688996790669, "learning_rate": 2.713921829581789e-06, "loss": 11.6517, "step": 34080 }, { "epoch": 1.8558466575456223, "grad_norm": 0.4958188034929215, "learning_rate": 2.7118817402739695e-06, "loss": 11.7161, "step": 34081 }, { "epoch": 1.8559011115422053, "grad_norm": 0.49682322626932107, "learning_rate": 2.7098424074948782e-06, "loss": 11.7348, "step": 34082 }, { "epoch": 1.8559555655387883, "grad_norm": 0.5405829211041254, "learning_rate": 2.707803831260347e-06, "loss": 11.6823, "step": 34083 }, { "epoch": 1.8560100195353713, "grad_norm": 0.5487267444143137, "learning_rate": 2.7057660115862748e-06, "loss": 11.717, "step": 34084 }, { "epoch": 1.8560644735319543, "grad_norm": 0.5829987937994934, "learning_rate": 2.703728948488471e-06, "loss": 11.6097, "step": 34085 }, { "epoch": 1.8561189275285372, "grad_norm": 0.5683107597925958, "learning_rate": 2.7016926419827892e-06, "loss": 11.5456, "step": 34086 }, { "epoch": 1.8561733815251205, "grad_norm": 0.5247377964798963, "learning_rate": 2.6996570920850615e-06, "loss": 11.6565, "step": 34087 }, { "epoch": 1.8562278355217035, "grad_norm": 0.5438516098119133, "learning_rate": 2.697622298811109e-06, "loss": 11.7745, "step": 34088 }, { "epoch": 1.8562822895182864, "grad_norm": 0.5375365510810982, "learning_rate": 2.6955882621767627e-06, "loss": 11.7952, "step": 34089 }, { "epoch": 1.8563367435148694, "grad_norm": 0.5472913557677537, "learning_rate": 2.6935549821978545e-06, "loss": 11.8205, "step": 34090 }, { "epoch": 1.8563911975114524, "grad_norm": 0.5682658780083004, "learning_rate": 2.6915224588901723e-06, "loss": 11.8902, "step": 34091 }, { "epoch": 1.8564456515080354, "grad_norm": 0.5987486409175985, "learning_rate": 2.689490692269536e-06, "loss": 11.7818, "step": 34092 }, { "epoch": 1.8565001055046184, "grad_norm": 0.5172981863585994, "learning_rate": 2.687459682351734e-06, "loss": 11.7375, "step": 34093 }, { "epoch": 1.8565545595012014, "grad_norm": 0.5655138168649811, "learning_rate": 2.685429429152575e-06, "loss": 11.6837, "step": 34094 }, { "epoch": 1.8566090134977844, "grad_norm": 0.566723333506846, "learning_rate": 2.6833999326878244e-06, "loss": 11.8561, "step": 34095 }, { "epoch": 1.8566634674943674, "grad_norm": 0.508191075909932, "learning_rate": 2.681371192973281e-06, "loss": 11.499, "step": 34096 }, { "epoch": 1.8567179214909504, "grad_norm": 0.5461943753485724, "learning_rate": 2.6793432100247094e-06, "loss": 11.6532, "step": 34097 }, { "epoch": 1.8567723754875334, "grad_norm": 0.5362114674594104, "learning_rate": 2.6773159838578867e-06, "loss": 11.7125, "step": 34098 }, { "epoch": 1.8568268294841164, "grad_norm": 0.566289879168616, "learning_rate": 2.675289514488588e-06, "loss": 11.7581, "step": 34099 }, { "epoch": 1.8568812834806994, "grad_norm": 0.5526048342528732, "learning_rate": 2.673263801932546e-06, "loss": 11.8534, "step": 34100 }, { "epoch": 1.8569357374772824, "grad_norm": 0.5463096012916056, "learning_rate": 2.671238846205526e-06, "loss": 11.8857, "step": 34101 }, { "epoch": 1.8569901914738653, "grad_norm": 0.5366637513217796, "learning_rate": 2.6692146473232816e-06, "loss": 11.8138, "step": 34102 }, { "epoch": 1.8570446454704483, "grad_norm": 0.6373991308782505, "learning_rate": 2.667191205301545e-06, "loss": 11.8892, "step": 34103 }, { "epoch": 1.8570990994670316, "grad_norm": 0.5667846566917036, "learning_rate": 2.6651685201560583e-06, "loss": 11.7853, "step": 34104 }, { "epoch": 1.8571535534636145, "grad_norm": 0.5419256198203309, "learning_rate": 2.6631465919025433e-06, "loss": 11.6945, "step": 34105 }, { "epoch": 1.8572080074601975, "grad_norm": 0.5776209773524162, "learning_rate": 2.661125420556709e-06, "loss": 11.7391, "step": 34106 }, { "epoch": 1.8572624614567805, "grad_norm": 0.5877709735118014, "learning_rate": 2.6591050061342994e-06, "loss": 11.8664, "step": 34107 }, { "epoch": 1.8573169154533635, "grad_norm": 0.5876932201534659, "learning_rate": 2.6570853486510115e-06, "loss": 11.7654, "step": 34108 }, { "epoch": 1.8573713694499465, "grad_norm": 0.5421207030376842, "learning_rate": 2.6550664481225564e-06, "loss": 11.6486, "step": 34109 }, { "epoch": 1.8574258234465297, "grad_norm": 0.5299217307624858, "learning_rate": 2.6530483045646205e-06, "loss": 11.6776, "step": 34110 }, { "epoch": 1.8574802774431127, "grad_norm": 0.5534014212715822, "learning_rate": 2.6510309179929025e-06, "loss": 11.808, "step": 34111 }, { "epoch": 1.8575347314396957, "grad_norm": 0.5490184703527806, "learning_rate": 2.6490142884231016e-06, "loss": 11.7646, "step": 34112 }, { "epoch": 1.8575891854362787, "grad_norm": 0.5146089147151446, "learning_rate": 2.6469984158708825e-06, "loss": 11.8306, "step": 34113 }, { "epoch": 1.8576436394328617, "grad_norm": 0.5469835749781836, "learning_rate": 2.6449833003519444e-06, "loss": 11.8555, "step": 34114 }, { "epoch": 1.8576980934294447, "grad_norm": 0.5818258261115045, "learning_rate": 2.6429689418819403e-06, "loss": 11.7264, "step": 34115 }, { "epoch": 1.8577525474260277, "grad_norm": 0.5738309420067346, "learning_rate": 2.6409553404765365e-06, "loss": 11.7786, "step": 34116 }, { "epoch": 1.8578070014226107, "grad_norm": 0.5532584211809648, "learning_rate": 2.6389424961513866e-06, "loss": 11.7642, "step": 34117 }, { "epoch": 1.8578614554191937, "grad_norm": 0.5342596315396536, "learning_rate": 2.6369304089221447e-06, "loss": 11.7343, "step": 34118 }, { "epoch": 1.8579159094157767, "grad_norm": 0.6193558740509785, "learning_rate": 2.6349190788044652e-06, "loss": 11.7922, "step": 34119 }, { "epoch": 1.8579703634123597, "grad_norm": 0.5782423910365604, "learning_rate": 2.63290850581398e-06, "loss": 11.7755, "step": 34120 }, { "epoch": 1.8580248174089427, "grad_norm": 0.5145585207300134, "learning_rate": 2.6308986899663323e-06, "loss": 11.6257, "step": 34121 }, { "epoch": 1.8580792714055256, "grad_norm": 0.5637859053969433, "learning_rate": 2.6288896312771315e-06, "loss": 11.871, "step": 34122 }, { "epoch": 1.8581337254021086, "grad_norm": 0.4941110555645744, "learning_rate": 2.626881329762021e-06, "loss": 11.6426, "step": 34123 }, { "epoch": 1.8581881793986916, "grad_norm": 0.5744780816570595, "learning_rate": 2.62487378543661e-06, "loss": 11.8303, "step": 34124 }, { "epoch": 1.8582426333952746, "grad_norm": 0.598086808279827, "learning_rate": 2.622866998316509e-06, "loss": 11.6945, "step": 34125 }, { "epoch": 1.8582970873918576, "grad_norm": 0.5973845623077279, "learning_rate": 2.6208609684173493e-06, "loss": 11.6114, "step": 34126 }, { "epoch": 1.8583515413884408, "grad_norm": 0.5666796603852995, "learning_rate": 2.618855695754674e-06, "loss": 11.8497, "step": 34127 }, { "epoch": 1.8584059953850238, "grad_norm": 0.5428249603268561, "learning_rate": 2.6168511803441154e-06, "loss": 11.62, "step": 34128 }, { "epoch": 1.8584604493816068, "grad_norm": 0.5146541338848857, "learning_rate": 2.6148474222012608e-06, "loss": 11.7109, "step": 34129 }, { "epoch": 1.8585149033781898, "grad_norm": 0.5506291667570146, "learning_rate": 2.612844421341676e-06, "loss": 11.8329, "step": 34130 }, { "epoch": 1.8585693573747728, "grad_norm": 0.5131219078540313, "learning_rate": 2.610842177780948e-06, "loss": 11.8199, "step": 34131 }, { "epoch": 1.8586238113713558, "grad_norm": 0.567544050071326, "learning_rate": 2.608840691534642e-06, "loss": 11.7734, "step": 34132 }, { "epoch": 1.858678265367939, "grad_norm": 0.5105797380134522, "learning_rate": 2.6068399626183236e-06, "loss": 11.7357, "step": 34133 }, { "epoch": 1.858732719364522, "grad_norm": 0.5662576253554312, "learning_rate": 2.6048399910475473e-06, "loss": 11.7998, "step": 34134 }, { "epoch": 1.858787173361105, "grad_norm": 0.5163416056105855, "learning_rate": 2.602840776837867e-06, "loss": 11.7284, "step": 34135 }, { "epoch": 1.858841627357688, "grad_norm": 0.536067522575811, "learning_rate": 2.6008423200048483e-06, "loss": 11.8133, "step": 34136 }, { "epoch": 1.858896081354271, "grad_norm": 0.5424786276923304, "learning_rate": 2.5988446205640004e-06, "loss": 11.7822, "step": 34137 }, { "epoch": 1.858950535350854, "grad_norm": 0.5252799566672407, "learning_rate": 2.596847678530867e-06, "loss": 11.6414, "step": 34138 }, { "epoch": 1.859004989347437, "grad_norm": 0.5157648847759491, "learning_rate": 2.5948514939209912e-06, "loss": 11.6841, "step": 34139 }, { "epoch": 1.85905944334402, "grad_norm": 0.5395751269361226, "learning_rate": 2.5928560667498713e-06, "loss": 11.7793, "step": 34140 }, { "epoch": 1.859113897340603, "grad_norm": 0.5638058456493286, "learning_rate": 2.590861397033051e-06, "loss": 11.7167, "step": 34141 }, { "epoch": 1.859168351337186, "grad_norm": 0.5847364246820539, "learning_rate": 2.5888674847860175e-06, "loss": 11.6694, "step": 34142 }, { "epoch": 1.859222805333769, "grad_norm": 0.5103492098472038, "learning_rate": 2.5868743300242916e-06, "loss": 11.7623, "step": 34143 }, { "epoch": 1.859277259330352, "grad_norm": 0.5412922834575296, "learning_rate": 2.584881932763361e-06, "loss": 11.7553, "step": 34144 }, { "epoch": 1.859331713326935, "grad_norm": 0.5538789553502556, "learning_rate": 2.582890293018725e-06, "loss": 11.7341, "step": 34145 }, { "epoch": 1.859386167323518, "grad_norm": 0.5653406075227116, "learning_rate": 2.5808994108058925e-06, "loss": 11.7556, "step": 34146 }, { "epoch": 1.859440621320101, "grad_norm": 0.6198355925560457, "learning_rate": 2.5789092861403076e-06, "loss": 11.7568, "step": 34147 }, { "epoch": 1.859495075316684, "grad_norm": 0.5829700476640527, "learning_rate": 2.5769199190374683e-06, "loss": 11.8438, "step": 34148 }, { "epoch": 1.859549529313267, "grad_norm": 0.6709208864682427, "learning_rate": 2.574931309512818e-06, "loss": 11.7601, "step": 34149 }, { "epoch": 1.8596039833098499, "grad_norm": 0.576368123322428, "learning_rate": 2.5729434575818557e-06, "loss": 11.7127, "step": 34150 }, { "epoch": 1.859658437306433, "grad_norm": 0.5050539730265304, "learning_rate": 2.570956363260024e-06, "loss": 11.732, "step": 34151 }, { "epoch": 1.859712891303016, "grad_norm": 0.6046256561023708, "learning_rate": 2.5689700265627668e-06, "loss": 11.8598, "step": 34152 }, { "epoch": 1.859767345299599, "grad_norm": 0.5269572616654264, "learning_rate": 2.5669844475055492e-06, "loss": 11.6571, "step": 34153 }, { "epoch": 1.859821799296182, "grad_norm": 0.603658806901523, "learning_rate": 2.564999626103781e-06, "loss": 11.8524, "step": 34154 }, { "epoch": 1.859876253292765, "grad_norm": 0.5321904211696481, "learning_rate": 2.563015562372939e-06, "loss": 11.7855, "step": 34155 }, { "epoch": 1.8599307072893483, "grad_norm": 0.5061557068441046, "learning_rate": 2.5610322563284216e-06, "loss": 11.8365, "step": 34156 }, { "epoch": 1.8599851612859313, "grad_norm": 0.5852691174880951, "learning_rate": 2.559049707985639e-06, "loss": 11.7605, "step": 34157 }, { "epoch": 1.8600396152825143, "grad_norm": 0.5499744580013146, "learning_rate": 2.5570679173600564e-06, "loss": 11.8352, "step": 34158 }, { "epoch": 1.8600940692790973, "grad_norm": 0.5637411575606855, "learning_rate": 2.5550868844670283e-06, "loss": 11.775, "step": 34159 }, { "epoch": 1.8601485232756803, "grad_norm": 0.5444245383974731, "learning_rate": 2.5531066093219978e-06, "loss": 11.6991, "step": 34160 }, { "epoch": 1.8602029772722632, "grad_norm": 0.5262136559054372, "learning_rate": 2.551127091940353e-06, "loss": 11.7058, "step": 34161 }, { "epoch": 1.8602574312688462, "grad_norm": 0.6000498145071027, "learning_rate": 2.549148332337481e-06, "loss": 11.7554, "step": 34162 }, { "epoch": 1.8603118852654292, "grad_norm": 0.5196297525263693, "learning_rate": 2.5471703305287807e-06, "loss": 11.7863, "step": 34163 }, { "epoch": 1.8603663392620122, "grad_norm": 0.5747606300573432, "learning_rate": 2.5451930865296183e-06, "loss": 11.728, "step": 34164 }, { "epoch": 1.8604207932585952, "grad_norm": 0.5196508024223219, "learning_rate": 2.5432166003553914e-06, "loss": 11.71, "step": 34165 }, { "epoch": 1.8604752472551782, "grad_norm": 0.5213264834311555, "learning_rate": 2.5412408720214443e-06, "loss": 11.8188, "step": 34166 }, { "epoch": 1.8605297012517612, "grad_norm": 0.5298991715536169, "learning_rate": 2.539265901543153e-06, "loss": 11.8251, "step": 34167 }, { "epoch": 1.8605841552483442, "grad_norm": 0.5340164841269298, "learning_rate": 2.5372916889358835e-06, "loss": 11.7761, "step": 34168 }, { "epoch": 1.8606386092449272, "grad_norm": 0.5569992179735073, "learning_rate": 2.535318234214967e-06, "loss": 11.7586, "step": 34169 }, { "epoch": 1.8606930632415102, "grad_norm": 0.5866682630661555, "learning_rate": 2.5333455373957705e-06, "loss": 11.8225, "step": 34170 }, { "epoch": 1.8607475172380932, "grad_norm": 0.5428021841429663, "learning_rate": 2.5313735984936137e-06, "loss": 11.7837, "step": 34171 }, { "epoch": 1.8608019712346762, "grad_norm": 0.5527744381721219, "learning_rate": 2.529402417523841e-06, "loss": 11.699, "step": 34172 }, { "epoch": 1.8608564252312592, "grad_norm": 0.5296158164996029, "learning_rate": 2.527431994501783e-06, "loss": 11.7278, "step": 34173 }, { "epoch": 1.8609108792278424, "grad_norm": 0.576018565837309, "learning_rate": 2.5254623294427626e-06, "loss": 11.8261, "step": 34174 }, { "epoch": 1.8609653332244254, "grad_norm": 0.5740271902952466, "learning_rate": 2.5234934223621e-06, "loss": 11.7174, "step": 34175 }, { "epoch": 1.8610197872210084, "grad_norm": 0.4834014411927382, "learning_rate": 2.521525273275094e-06, "loss": 11.8187, "step": 34176 }, { "epoch": 1.8610742412175914, "grad_norm": 0.5667849462284822, "learning_rate": 2.5195578821970545e-06, "loss": 11.766, "step": 34177 }, { "epoch": 1.8611286952141743, "grad_norm": 0.5830541716647009, "learning_rate": 2.517591249143281e-06, "loss": 11.7631, "step": 34178 }, { "epoch": 1.8611831492107573, "grad_norm": 0.5263017125893658, "learning_rate": 2.5156253741290603e-06, "loss": 11.7173, "step": 34179 }, { "epoch": 1.8612376032073406, "grad_norm": 0.5570995837286824, "learning_rate": 2.513660257169692e-06, "loss": 11.7829, "step": 34180 }, { "epoch": 1.8612920572039235, "grad_norm": 0.543429935542524, "learning_rate": 2.5116958982804418e-06, "loss": 11.812, "step": 34181 }, { "epoch": 1.8613465112005065, "grad_norm": 0.5425701658297523, "learning_rate": 2.509732297476608e-06, "loss": 11.9944, "step": 34182 }, { "epoch": 1.8614009651970895, "grad_norm": 0.5524225480267694, "learning_rate": 2.507769454773434e-06, "loss": 11.8507, "step": 34183 }, { "epoch": 1.8614554191936725, "grad_norm": 0.5906724294506779, "learning_rate": 2.5058073701861864e-06, "loss": 11.8047, "step": 34184 }, { "epoch": 1.8615098731902555, "grad_norm": 0.5278422417604384, "learning_rate": 2.50384604373014e-06, "loss": 11.7586, "step": 34185 }, { "epoch": 1.8615643271868385, "grad_norm": 0.5351336110155367, "learning_rate": 2.501885475420551e-06, "loss": 11.7011, "step": 34186 }, { "epoch": 1.8616187811834215, "grad_norm": 0.5940612577929096, "learning_rate": 2.499925665272629e-06, "loss": 11.7994, "step": 34187 }, { "epoch": 1.8616732351800045, "grad_norm": 0.5494946382919955, "learning_rate": 2.49796661330165e-06, "loss": 11.827, "step": 34188 }, { "epoch": 1.8617276891765875, "grad_norm": 0.5304235029645848, "learning_rate": 2.4960083195228244e-06, "loss": 11.8589, "step": 34189 }, { "epoch": 1.8617821431731705, "grad_norm": 0.5175006356071478, "learning_rate": 2.494050783951396e-06, "loss": 11.7035, "step": 34190 }, { "epoch": 1.8618365971697535, "grad_norm": 0.5503796944672893, "learning_rate": 2.4920940066025635e-06, "loss": 11.8346, "step": 34191 }, { "epoch": 1.8618910511663365, "grad_norm": 0.5345422841907969, "learning_rate": 2.490137987491581e-06, "loss": 11.7668, "step": 34192 }, { "epoch": 1.8619455051629195, "grad_norm": 0.5448337000854672, "learning_rate": 2.4881827266336255e-06, "loss": 11.8119, "step": 34193 }, { "epoch": 1.8619999591595024, "grad_norm": 0.5575270582841075, "learning_rate": 2.486228224043918e-06, "loss": 11.9021, "step": 34194 }, { "epoch": 1.8620544131560854, "grad_norm": 0.525244067361692, "learning_rate": 2.484274479737658e-06, "loss": 11.6603, "step": 34195 }, { "epoch": 1.8621088671526684, "grad_norm": 0.5677293472556686, "learning_rate": 2.482321493730033e-06, "loss": 11.7894, "step": 34196 }, { "epoch": 1.8621633211492516, "grad_norm": 0.5553831084579393, "learning_rate": 2.480369266036231e-06, "loss": 11.6958, "step": 34197 }, { "epoch": 1.8622177751458346, "grad_norm": 0.5331850581872086, "learning_rate": 2.4784177966714172e-06, "loss": 11.9201, "step": 34198 }, { "epoch": 1.8622722291424176, "grad_norm": 0.5978388936682295, "learning_rate": 2.476467085650791e-06, "loss": 11.8646, "step": 34199 }, { "epoch": 1.8623266831390006, "grad_norm": 0.5316424538573272, "learning_rate": 2.4745171329895065e-06, "loss": 11.7897, "step": 34200 }, { "epoch": 1.8623811371355836, "grad_norm": 0.5333208931118412, "learning_rate": 2.47256793870273e-06, "loss": 11.8553, "step": 34201 }, { "epoch": 1.8624355911321666, "grad_norm": 0.5304161429780652, "learning_rate": 2.470619502805638e-06, "loss": 11.6802, "step": 34202 }, { "epoch": 1.8624900451287498, "grad_norm": 0.5419035587078287, "learning_rate": 2.46867182531334e-06, "loss": 11.8046, "step": 34203 }, { "epoch": 1.8625444991253328, "grad_norm": 0.5318397569660418, "learning_rate": 2.466724906241025e-06, "loss": 11.8833, "step": 34204 }, { "epoch": 1.8625989531219158, "grad_norm": 0.5124298265960896, "learning_rate": 2.464778745603802e-06, "loss": 11.6758, "step": 34205 }, { "epoch": 1.8626534071184988, "grad_norm": 0.5815998586177793, "learning_rate": 2.462833343416826e-06, "loss": 11.9073, "step": 34206 }, { "epoch": 1.8627078611150818, "grad_norm": 0.5996786164517552, "learning_rate": 2.4608886996952186e-06, "loss": 11.8514, "step": 34207 }, { "epoch": 1.8627623151116648, "grad_norm": 0.5399291136870131, "learning_rate": 2.4589448144540783e-06, "loss": 11.8548, "step": 34208 }, { "epoch": 1.8628167691082478, "grad_norm": 0.585185179304002, "learning_rate": 2.457001687708549e-06, "loss": 11.6782, "step": 34209 }, { "epoch": 1.8628712231048308, "grad_norm": 0.5246050621343618, "learning_rate": 2.4550593194737404e-06, "loss": 11.7869, "step": 34210 }, { "epoch": 1.8629256771014138, "grad_norm": 0.5029747833277742, "learning_rate": 2.4531177097647408e-06, "loss": 11.8503, "step": 34211 }, { "epoch": 1.8629801310979968, "grad_norm": 0.6242831355913958, "learning_rate": 2.45117685859666e-06, "loss": 11.8906, "step": 34212 }, { "epoch": 1.8630345850945798, "grad_norm": 0.5425892746415845, "learning_rate": 2.449236765984586e-06, "loss": 11.8247, "step": 34213 }, { "epoch": 1.8630890390911627, "grad_norm": 0.5155520131370054, "learning_rate": 2.447297431943607e-06, "loss": 11.8579, "step": 34214 }, { "epoch": 1.8631434930877457, "grad_norm": 0.5262307966690408, "learning_rate": 2.4453588564887996e-06, "loss": 11.6946, "step": 34215 }, { "epoch": 1.8631979470843287, "grad_norm": 0.5198146549994429, "learning_rate": 2.443421039635252e-06, "loss": 11.7159, "step": 34216 }, { "epoch": 1.8632524010809117, "grad_norm": 0.5144561516247748, "learning_rate": 2.4414839813980183e-06, "loss": 11.7513, "step": 34217 }, { "epoch": 1.8633068550774947, "grad_norm": 0.5160035491790321, "learning_rate": 2.439547681792154e-06, "loss": 11.7505, "step": 34218 }, { "epoch": 1.8633613090740777, "grad_norm": 0.4955868170545605, "learning_rate": 2.4376121408327458e-06, "loss": 11.7747, "step": 34219 }, { "epoch": 1.8634157630706607, "grad_norm": 0.5103738312816297, "learning_rate": 2.435677358534816e-06, "loss": 11.6458, "step": 34220 }, { "epoch": 1.863470217067244, "grad_norm": 0.5371214183596122, "learning_rate": 2.4337433349134077e-06, "loss": 11.5142, "step": 34221 }, { "epoch": 1.863524671063827, "grad_norm": 0.570535075632023, "learning_rate": 2.4318100699835977e-06, "loss": 11.8633, "step": 34222 }, { "epoch": 1.86357912506041, "grad_norm": 0.5840620998834162, "learning_rate": 2.4298775637603745e-06, "loss": 11.7923, "step": 34223 }, { "epoch": 1.863633579056993, "grad_norm": 0.5727108335166309, "learning_rate": 2.427945816258803e-06, "loss": 11.8758, "step": 34224 }, { "epoch": 1.8636880330535759, "grad_norm": 0.5794853517027942, "learning_rate": 2.426014827493872e-06, "loss": 11.7628, "step": 34225 }, { "epoch": 1.863742487050159, "grad_norm": 0.5708225316638832, "learning_rate": 2.424084597480636e-06, "loss": 11.7865, "step": 34226 }, { "epoch": 1.863796941046742, "grad_norm": 0.6114812305209636, "learning_rate": 2.4221551262340716e-06, "loss": 11.8621, "step": 34227 }, { "epoch": 1.863851395043325, "grad_norm": 0.5540456319701818, "learning_rate": 2.4202264137691776e-06, "loss": 11.6821, "step": 34228 }, { "epoch": 1.863905849039908, "grad_norm": 0.5730136453861171, "learning_rate": 2.418298460100987e-06, "loss": 11.814, "step": 34229 }, { "epoch": 1.863960303036491, "grad_norm": 0.523589859419443, "learning_rate": 2.4163712652444547e-06, "loss": 11.6111, "step": 34230 }, { "epoch": 1.864014757033074, "grad_norm": 0.6112924156925106, "learning_rate": 2.4144448292146017e-06, "loss": 11.7233, "step": 34231 }, { "epoch": 1.864069211029657, "grad_norm": 0.5374203790311808, "learning_rate": 2.4125191520263713e-06, "loss": 11.7915, "step": 34232 }, { "epoch": 1.86412366502624, "grad_norm": 0.5362833093421417, "learning_rate": 2.410594233694763e-06, "loss": 11.7705, "step": 34233 }, { "epoch": 1.864178119022823, "grad_norm": 0.5197452888719586, "learning_rate": 2.4086700742347535e-06, "loss": 11.7186, "step": 34234 }, { "epoch": 1.864232573019406, "grad_norm": 0.5190742130390831, "learning_rate": 2.406746673661275e-06, "loss": 11.5895, "step": 34235 }, { "epoch": 1.864287027015989, "grad_norm": 0.6293038113589222, "learning_rate": 2.4048240319893055e-06, "loss": 11.8104, "step": 34236 }, { "epoch": 1.864341481012572, "grad_norm": 0.5279810593944538, "learning_rate": 2.402902149233799e-06, "loss": 11.6538, "step": 34237 }, { "epoch": 1.864395935009155, "grad_norm": 0.527408394474537, "learning_rate": 2.4009810254096875e-06, "loss": 11.8652, "step": 34238 }, { "epoch": 1.864450389005738, "grad_norm": 0.5098192001087642, "learning_rate": 2.3990606605319156e-06, "loss": 11.6551, "step": 34239 }, { "epoch": 1.864504843002321, "grad_norm": 0.5070566259114763, "learning_rate": 2.397141054615415e-06, "loss": 11.735, "step": 34240 }, { "epoch": 1.864559296998904, "grad_norm": 0.49966074639794694, "learning_rate": 2.3952222076751184e-06, "loss": 11.7903, "step": 34241 }, { "epoch": 1.864613750995487, "grad_norm": 0.5308729556639347, "learning_rate": 2.3933041197259253e-06, "loss": 11.7152, "step": 34242 }, { "epoch": 1.86466820499207, "grad_norm": 0.5165496154658284, "learning_rate": 2.391386790782779e-06, "loss": 11.671, "step": 34243 }, { "epoch": 1.8647226589886532, "grad_norm": 0.6427412163321673, "learning_rate": 2.389470220860579e-06, "loss": 11.8475, "step": 34244 }, { "epoch": 1.8647771129852362, "grad_norm": 0.5266160782859761, "learning_rate": 2.387554409974224e-06, "loss": 11.7488, "step": 34245 }, { "epoch": 1.8648315669818192, "grad_norm": 0.5867827464115407, "learning_rate": 2.385639358138636e-06, "loss": 11.9609, "step": 34246 }, { "epoch": 1.8648860209784022, "grad_norm": 0.5885272033914, "learning_rate": 2.3837250653686583e-06, "loss": 11.8016, "step": 34247 }, { "epoch": 1.8649404749749852, "grad_norm": 0.5648847339188333, "learning_rate": 2.3818115316792122e-06, "loss": 11.7979, "step": 34248 }, { "epoch": 1.8649949289715682, "grad_norm": 0.6350035996609225, "learning_rate": 2.379898757085175e-06, "loss": 11.8398, "step": 34249 }, { "epoch": 1.8650493829681514, "grad_norm": 0.524575375399674, "learning_rate": 2.3779867416014125e-06, "loss": 11.899, "step": 34250 }, { "epoch": 1.8651038369647344, "grad_norm": 0.5214220943150758, "learning_rate": 2.3760754852428015e-06, "loss": 11.8004, "step": 34251 }, { "epoch": 1.8651582909613174, "grad_norm": 0.5475231764212184, "learning_rate": 2.374164988024197e-06, "loss": 11.7604, "step": 34252 }, { "epoch": 1.8652127449579003, "grad_norm": 0.5265529373533434, "learning_rate": 2.3722552499604645e-06, "loss": 11.6525, "step": 34253 }, { "epoch": 1.8652671989544833, "grad_norm": 0.5207911192734828, "learning_rate": 2.370346271066426e-06, "loss": 11.7003, "step": 34254 }, { "epoch": 1.8653216529510663, "grad_norm": 0.6038112220169792, "learning_rate": 2.368438051356958e-06, "loss": 11.872, "step": 34255 }, { "epoch": 1.8653761069476493, "grad_norm": 0.5391677969810922, "learning_rate": 2.366530590846905e-06, "loss": 11.6137, "step": 34256 }, { "epoch": 1.8654305609442323, "grad_norm": 0.5426360851693741, "learning_rate": 2.364623889551065e-06, "loss": 11.8784, "step": 34257 }, { "epoch": 1.8654850149408153, "grad_norm": 0.5505850702167205, "learning_rate": 2.3627179474842833e-06, "loss": 11.7918, "step": 34258 }, { "epoch": 1.8655394689373983, "grad_norm": 0.5028812290055151, "learning_rate": 2.360812764661391e-06, "loss": 11.6313, "step": 34259 }, { "epoch": 1.8655939229339813, "grad_norm": 0.5912852801999976, "learning_rate": 2.3589083410971768e-06, "loss": 11.7307, "step": 34260 }, { "epoch": 1.8656483769305643, "grad_norm": 0.5048018768131038, "learning_rate": 2.3570046768064847e-06, "loss": 11.8072, "step": 34261 }, { "epoch": 1.8657028309271473, "grad_norm": 0.5317571589028683, "learning_rate": 2.35510177180408e-06, "loss": 11.6593, "step": 34262 }, { "epoch": 1.8657572849237303, "grad_norm": 0.4986764309703747, "learning_rate": 2.353199626104796e-06, "loss": 11.7164, "step": 34263 }, { "epoch": 1.8658117389203133, "grad_norm": 0.5505031809051203, "learning_rate": 2.3512982397233987e-06, "loss": 11.7329, "step": 34264 }, { "epoch": 1.8658661929168963, "grad_norm": 0.5392223950234498, "learning_rate": 2.3493976126746754e-06, "loss": 11.7521, "step": 34265 }, { "epoch": 1.8659206469134793, "grad_norm": 0.5083800362848726, "learning_rate": 2.347497744973437e-06, "loss": 11.8374, "step": 34266 }, { "epoch": 1.8659751009100625, "grad_norm": 0.5482753870093451, "learning_rate": 2.345598636634405e-06, "loss": 11.7542, "step": 34267 }, { "epoch": 1.8660295549066455, "grad_norm": 0.580260195311508, "learning_rate": 2.34370028767239e-06, "loss": 11.9381, "step": 34268 }, { "epoch": 1.8660840089032285, "grad_norm": 0.5747469685504321, "learning_rate": 2.3418026981021357e-06, "loss": 11.7568, "step": 34269 }, { "epoch": 1.8661384628998114, "grad_norm": 0.5508804485965543, "learning_rate": 2.3399058679383855e-06, "loss": 11.816, "step": 34270 }, { "epoch": 1.8661929168963944, "grad_norm": 0.5502687580181147, "learning_rate": 2.338009797195928e-06, "loss": 11.8374, "step": 34271 }, { "epoch": 1.8662473708929774, "grad_norm": 0.5356402709789219, "learning_rate": 2.3361144858894734e-06, "loss": 11.7471, "step": 34272 }, { "epoch": 1.8663018248895606, "grad_norm": 0.7388047894645445, "learning_rate": 2.334219934033777e-06, "loss": 11.7528, "step": 34273 }, { "epoch": 1.8663562788861436, "grad_norm": 0.5753619368989512, "learning_rate": 2.332326141643548e-06, "loss": 11.817, "step": 34274 }, { "epoch": 1.8664107328827266, "grad_norm": 0.7071034474573465, "learning_rate": 2.3304331087335542e-06, "loss": 11.8966, "step": 34275 }, { "epoch": 1.8664651868793096, "grad_norm": 0.5041905253307493, "learning_rate": 2.328540835318471e-06, "loss": 11.6612, "step": 34276 }, { "epoch": 1.8665196408758926, "grad_norm": 0.5531223309782023, "learning_rate": 2.3266493214130437e-06, "loss": 11.7908, "step": 34277 }, { "epoch": 1.8665740948724756, "grad_norm": 0.5583570288124222, "learning_rate": 2.3247585670319703e-06, "loss": 11.788, "step": 34278 }, { "epoch": 1.8666285488690586, "grad_norm": 0.5290098880893309, "learning_rate": 2.322868572189951e-06, "loss": 11.6824, "step": 34279 }, { "epoch": 1.8666830028656416, "grad_norm": 0.5187326875973255, "learning_rate": 2.3209793369016965e-06, "loss": 11.7978, "step": 34280 }, { "epoch": 1.8667374568622246, "grad_norm": 0.5672294546787048, "learning_rate": 2.3190908611818717e-06, "loss": 11.8205, "step": 34281 }, { "epoch": 1.8667919108588076, "grad_norm": 0.4957828950353944, "learning_rate": 2.3172031450451883e-06, "loss": 11.5603, "step": 34282 }, { "epoch": 1.8668463648553906, "grad_norm": 0.5370713289251441, "learning_rate": 2.3153161885063113e-06, "loss": 11.702, "step": 34283 }, { "epoch": 1.8669008188519736, "grad_norm": 0.5764498160337324, "learning_rate": 2.3134299915799184e-06, "loss": 11.6951, "step": 34284 }, { "epoch": 1.8669552728485566, "grad_norm": 0.5379404055186005, "learning_rate": 2.3115445542806757e-06, "loss": 11.7268, "step": 34285 }, { "epoch": 1.8670097268451396, "grad_norm": 0.5687889360300659, "learning_rate": 2.309659876623238e-06, "loss": 11.808, "step": 34286 }, { "epoch": 1.8670641808417225, "grad_norm": 0.545817895197542, "learning_rate": 2.3077759586222825e-06, "loss": 11.6021, "step": 34287 }, { "epoch": 1.8671186348383055, "grad_norm": 0.5231374223122174, "learning_rate": 2.305892800292442e-06, "loss": 11.7889, "step": 34288 }, { "epoch": 1.8671730888348885, "grad_norm": 0.5538354207890446, "learning_rate": 2.304010401648349e-06, "loss": 11.7504, "step": 34289 }, { "epoch": 1.8672275428314717, "grad_norm": 0.5949262545531886, "learning_rate": 2.3021287627046695e-06, "loss": 11.7684, "step": 34290 }, { "epoch": 1.8672819968280547, "grad_norm": 0.5877613945347738, "learning_rate": 2.3002478834760035e-06, "loss": 11.9002, "step": 34291 }, { "epoch": 1.8673364508246377, "grad_norm": 0.5546132023592178, "learning_rate": 2.298367763976994e-06, "loss": 11.8494, "step": 34292 }, { "epoch": 1.8673909048212207, "grad_norm": 0.5340674487277636, "learning_rate": 2.296488404222286e-06, "loss": 11.8007, "step": 34293 }, { "epoch": 1.8674453588178037, "grad_norm": 0.5522883664425916, "learning_rate": 2.294609804226444e-06, "loss": 11.8901, "step": 34294 }, { "epoch": 1.8674998128143867, "grad_norm": 0.5625909293605665, "learning_rate": 2.292731964004113e-06, "loss": 11.8434, "step": 34295 }, { "epoch": 1.86755426681097, "grad_norm": 0.5467337569973905, "learning_rate": 2.290854883569882e-06, "loss": 11.8364, "step": 34296 }, { "epoch": 1.867608720807553, "grad_norm": 0.5375065484188294, "learning_rate": 2.2889785629383486e-06, "loss": 11.8107, "step": 34297 }, { "epoch": 1.867663174804136, "grad_norm": 0.5255140464611013, "learning_rate": 2.2871030021241134e-06, "loss": 11.6934, "step": 34298 }, { "epoch": 1.867717628800719, "grad_norm": 0.49817229426717313, "learning_rate": 2.2852282011417424e-06, "loss": 11.7442, "step": 34299 }, { "epoch": 1.867772082797302, "grad_norm": 0.5203996708735958, "learning_rate": 2.283354160005824e-06, "loss": 11.8311, "step": 34300 }, { "epoch": 1.8678265367938849, "grad_norm": 0.5673202159230402, "learning_rate": 2.2814808787309348e-06, "loss": 11.7362, "step": 34301 }, { "epoch": 1.8678809907904679, "grad_norm": 1.0016004923503519, "learning_rate": 2.2796083573316306e-06, "loss": 11.822, "step": 34302 }, { "epoch": 1.8679354447870509, "grad_norm": 0.5657832098027313, "learning_rate": 2.277736595822477e-06, "loss": 11.8573, "step": 34303 }, { "epoch": 1.8679898987836339, "grad_norm": 0.5360344128760475, "learning_rate": 2.2758655942180407e-06, "loss": 11.737, "step": 34304 }, { "epoch": 1.8680443527802169, "grad_norm": 0.5493261703465715, "learning_rate": 2.273995352532865e-06, "loss": 11.6219, "step": 34305 }, { "epoch": 1.8680988067767998, "grad_norm": 0.5148775070202533, "learning_rate": 2.272125870781472e-06, "loss": 11.7599, "step": 34306 }, { "epoch": 1.8681532607733828, "grad_norm": 0.5306871466967943, "learning_rate": 2.2702571489784388e-06, "loss": 11.7628, "step": 34307 }, { "epoch": 1.8682077147699658, "grad_norm": 0.5516223979441167, "learning_rate": 2.2683891871382646e-06, "loss": 11.7774, "step": 34308 }, { "epoch": 1.8682621687665488, "grad_norm": 0.5264760278860711, "learning_rate": 2.2665219852754825e-06, "loss": 11.7439, "step": 34309 }, { "epoch": 1.8683166227631318, "grad_norm": 0.5794411286558803, "learning_rate": 2.264655543404626e-06, "loss": 11.6774, "step": 34310 }, { "epoch": 1.8683710767597148, "grad_norm": 0.701812098298003, "learning_rate": 2.2627898615401933e-06, "loss": 11.8844, "step": 34311 }, { "epoch": 1.8684255307562978, "grad_norm": 0.5253276827866118, "learning_rate": 2.260924939696696e-06, "loss": 11.7294, "step": 34312 }, { "epoch": 1.8684799847528808, "grad_norm": 0.5835653874817696, "learning_rate": 2.259060777888633e-06, "loss": 11.8683, "step": 34313 }, { "epoch": 1.868534438749464, "grad_norm": 0.5507583791240651, "learning_rate": 2.2571973761305045e-06, "loss": 11.7279, "step": 34314 }, { "epoch": 1.868588892746047, "grad_norm": 0.5577230961812434, "learning_rate": 2.255334734436809e-06, "loss": 11.753, "step": 34315 }, { "epoch": 1.86864334674263, "grad_norm": 0.5313683324233448, "learning_rate": 2.253472852822014e-06, "loss": 11.6531, "step": 34316 }, { "epoch": 1.868697800739213, "grad_norm": 0.6020399393248772, "learning_rate": 2.251611731300629e-06, "loss": 11.8591, "step": 34317 }, { "epoch": 1.868752254735796, "grad_norm": 0.5795915911399782, "learning_rate": 2.2497513698870766e-06, "loss": 11.7954, "step": 34318 }, { "epoch": 1.868806708732379, "grad_norm": 0.526170584593981, "learning_rate": 2.2478917685958554e-06, "loss": 11.7472, "step": 34319 }, { "epoch": 1.8688611627289622, "grad_norm": 0.5060685953506894, "learning_rate": 2.2460329274414328e-06, "loss": 11.7706, "step": 34320 }, { "epoch": 1.8689156167255452, "grad_norm": 0.49720238683926316, "learning_rate": 2.2441748464382295e-06, "loss": 11.7317, "step": 34321 }, { "epoch": 1.8689700707221282, "grad_norm": 0.7488114605314281, "learning_rate": 2.2423175256007345e-06, "loss": 11.7128, "step": 34322 }, { "epoch": 1.8690245247187112, "grad_norm": 0.5379971433796815, "learning_rate": 2.2404609649433692e-06, "loss": 11.749, "step": 34323 }, { "epoch": 1.8690789787152942, "grad_norm": 0.5231756967868846, "learning_rate": 2.2386051644805783e-06, "loss": 11.7986, "step": 34324 }, { "epoch": 1.8691334327118772, "grad_norm": 0.5753825588422949, "learning_rate": 2.2367501242267717e-06, "loss": 11.7385, "step": 34325 }, { "epoch": 1.8691878867084601, "grad_norm": 0.5608086685297307, "learning_rate": 2.234895844196394e-06, "loss": 11.7776, "step": 34326 }, { "epoch": 1.8692423407050431, "grad_norm": 0.5394895111798916, "learning_rate": 2.233042324403889e-06, "loss": 11.7824, "step": 34327 }, { "epoch": 1.8692967947016261, "grad_norm": 0.5689546395263193, "learning_rate": 2.2311895648636117e-06, "loss": 11.7004, "step": 34328 }, { "epoch": 1.8693512486982091, "grad_norm": 0.600586015788296, "learning_rate": 2.229337565590006e-06, "loss": 11.8067, "step": 34329 }, { "epoch": 1.8694057026947921, "grad_norm": 0.5158412661360852, "learning_rate": 2.227486326597483e-06, "loss": 11.6357, "step": 34330 }, { "epoch": 1.869460156691375, "grad_norm": 0.50905046108615, "learning_rate": 2.225635847900409e-06, "loss": 11.7642, "step": 34331 }, { "epoch": 1.869514610687958, "grad_norm": 0.5485737885268168, "learning_rate": 2.2237861295131946e-06, "loss": 11.8075, "step": 34332 }, { "epoch": 1.869569064684541, "grad_norm": 0.5234525241867425, "learning_rate": 2.2219371714502058e-06, "loss": 11.772, "step": 34333 }, { "epoch": 1.869623518681124, "grad_norm": 0.5333371854004794, "learning_rate": 2.2200889737258423e-06, "loss": 11.7838, "step": 34334 }, { "epoch": 1.869677972677707, "grad_norm": 0.5365931389998904, "learning_rate": 2.2182415363544482e-06, "loss": 11.8084, "step": 34335 }, { "epoch": 1.86973242667429, "grad_norm": 0.6032294290204636, "learning_rate": 2.2163948593504236e-06, "loss": 11.8715, "step": 34336 }, { "epoch": 1.8697868806708733, "grad_norm": 0.5684661051567209, "learning_rate": 2.2145489427281117e-06, "loss": 11.6568, "step": 34337 }, { "epoch": 1.8698413346674563, "grad_norm": 0.5391462109251062, "learning_rate": 2.2127037865018573e-06, "loss": 11.7007, "step": 34338 }, { "epoch": 1.8698957886640393, "grad_norm": 0.5558384979744345, "learning_rate": 2.2108593906860155e-06, "loss": 11.662, "step": 34339 }, { "epoch": 1.8699502426606223, "grad_norm": 0.590831257116328, "learning_rate": 2.2090157552949297e-06, "loss": 11.8627, "step": 34340 }, { "epoch": 1.8700046966572053, "grad_norm": 0.5359014405334612, "learning_rate": 2.2071728803429337e-06, "loss": 11.73, "step": 34341 }, { "epoch": 1.8700591506537882, "grad_norm": 0.5653957194860302, "learning_rate": 2.205330765844382e-06, "loss": 11.8693, "step": 34342 }, { "epoch": 1.8701136046503715, "grad_norm": 0.5562694823055013, "learning_rate": 2.203489411813553e-06, "loss": 11.6833, "step": 34343 }, { "epoch": 1.8701680586469545, "grad_norm": 0.5149356771779124, "learning_rate": 2.2016488182648122e-06, "loss": 11.7022, "step": 34344 }, { "epoch": 1.8702225126435374, "grad_norm": 0.5422724576588746, "learning_rate": 2.199808985212437e-06, "loss": 11.5619, "step": 34345 }, { "epoch": 1.8702769666401204, "grad_norm": 0.5864744721307434, "learning_rate": 2.19796991267075e-06, "loss": 11.6507, "step": 34346 }, { "epoch": 1.8703314206367034, "grad_norm": 0.46228481529677695, "learning_rate": 2.196131600654061e-06, "loss": 11.7805, "step": 34347 }, { "epoch": 1.8703858746332864, "grad_norm": 0.5508542909946582, "learning_rate": 2.194294049176637e-06, "loss": 11.8564, "step": 34348 }, { "epoch": 1.8704403286298694, "grad_norm": 0.5355241792843812, "learning_rate": 2.1924572582528003e-06, "loss": 11.8426, "step": 34349 }, { "epoch": 1.8704947826264524, "grad_norm": 0.6120102724229697, "learning_rate": 2.1906212278968053e-06, "loss": 11.7656, "step": 34350 }, { "epoch": 1.8705492366230354, "grad_norm": 0.5116721589038303, "learning_rate": 2.188785958122963e-06, "loss": 11.6986, "step": 34351 }, { "epoch": 1.8706036906196184, "grad_norm": 0.5128777813311106, "learning_rate": 2.1869514489454955e-06, "loss": 11.6956, "step": 34352 }, { "epoch": 1.8706581446162014, "grad_norm": 0.5499742456332106, "learning_rate": 2.185117700378714e-06, "loss": 11.8258, "step": 34353 }, { "epoch": 1.8707125986127844, "grad_norm": 0.5396749535097277, "learning_rate": 2.1832847124368614e-06, "loss": 11.8081, "step": 34354 }, { "epoch": 1.8707670526093674, "grad_norm": 0.5932369636036566, "learning_rate": 2.1814524851341833e-06, "loss": 11.8096, "step": 34355 }, { "epoch": 1.8708215066059504, "grad_norm": 0.5744945478452651, "learning_rate": 2.179621018484945e-06, "loss": 11.8209, "step": 34356 }, { "epoch": 1.8708759606025334, "grad_norm": 0.5524508983211879, "learning_rate": 2.1777903125033694e-06, "loss": 11.859, "step": 34357 }, { "epoch": 1.8709304145991164, "grad_norm": 0.511186491903641, "learning_rate": 2.1759603672037e-06, "loss": 11.7553, "step": 34358 }, { "epoch": 1.8709848685956993, "grad_norm": 0.597577069022772, "learning_rate": 2.1741311826001808e-06, "loss": 11.9734, "step": 34359 }, { "epoch": 1.8710393225922826, "grad_norm": 0.5804922313953728, "learning_rate": 2.172302758707001e-06, "loss": 11.6796, "step": 34360 }, { "epoch": 1.8710937765888656, "grad_norm": 0.5485987486874512, "learning_rate": 2.170475095538427e-06, "loss": 11.838, "step": 34361 }, { "epoch": 1.8711482305854485, "grad_norm": 0.5347484606120664, "learning_rate": 2.1686481931086244e-06, "loss": 11.8154, "step": 34362 }, { "epoch": 1.8712026845820315, "grad_norm": 0.5123476391533188, "learning_rate": 2.166822051431816e-06, "loss": 11.7624, "step": 34363 }, { "epoch": 1.8712571385786145, "grad_norm": 0.6508918327409489, "learning_rate": 2.1649966705222234e-06, "loss": 11.745, "step": 34364 }, { "epoch": 1.8713115925751975, "grad_norm": 0.5250660221748026, "learning_rate": 2.1631720503940134e-06, "loss": 11.6672, "step": 34365 }, { "epoch": 1.8713660465717807, "grad_norm": 0.6354470342955629, "learning_rate": 2.1613481910613963e-06, "loss": 11.8038, "step": 34366 }, { "epoch": 1.8714205005683637, "grad_norm": 0.48238835692212567, "learning_rate": 2.159525092538539e-06, "loss": 11.736, "step": 34367 }, { "epoch": 1.8714749545649467, "grad_norm": 0.564512669862306, "learning_rate": 2.157702754839608e-06, "loss": 11.8659, "step": 34368 }, { "epoch": 1.8715294085615297, "grad_norm": 0.5635828074828091, "learning_rate": 2.1558811779788023e-06, "loss": 11.7195, "step": 34369 }, { "epoch": 1.8715838625581127, "grad_norm": 0.5534831164136985, "learning_rate": 2.154060361970267e-06, "loss": 11.7614, "step": 34370 }, { "epoch": 1.8716383165546957, "grad_norm": 0.49612570940930945, "learning_rate": 2.1522403068281795e-06, "loss": 11.8315, "step": 34371 }, { "epoch": 1.8716927705512787, "grad_norm": 0.6191222455780548, "learning_rate": 2.1504210125666614e-06, "loss": 11.8296, "step": 34372 }, { "epoch": 1.8717472245478617, "grad_norm": 0.5328092438598759, "learning_rate": 2.1486024791998903e-06, "loss": 11.6856, "step": 34373 }, { "epoch": 1.8718016785444447, "grad_norm": 0.4964358302001134, "learning_rate": 2.1467847067419887e-06, "loss": 11.7469, "step": 34374 }, { "epoch": 1.8718561325410277, "grad_norm": 0.5587324996028472, "learning_rate": 2.144967695207101e-06, "loss": 11.8047, "step": 34375 }, { "epoch": 1.8719105865376107, "grad_norm": 0.5908732823712312, "learning_rate": 2.1431514446093594e-06, "loss": 11.7955, "step": 34376 }, { "epoch": 1.8719650405341937, "grad_norm": 0.5157730303173998, "learning_rate": 2.1413359549628862e-06, "loss": 11.7474, "step": 34377 }, { "epoch": 1.8720194945307767, "grad_norm": 0.5848190070966441, "learning_rate": 2.139521226281793e-06, "loss": 11.8345, "step": 34378 }, { "epoch": 1.8720739485273596, "grad_norm": 0.5816650124556637, "learning_rate": 2.1377072585801906e-06, "loss": 11.7985, "step": 34379 }, { "epoch": 1.8721284025239426, "grad_norm": 0.5434419924222774, "learning_rate": 2.1358940518721893e-06, "loss": 11.7551, "step": 34380 }, { "epoch": 1.8721828565205256, "grad_norm": 0.5277405683949002, "learning_rate": 2.1340816061718893e-06, "loss": 11.8552, "step": 34381 }, { "epoch": 1.8722373105171086, "grad_norm": 0.48254009195127046, "learning_rate": 2.1322699214933793e-06, "loss": 11.7246, "step": 34382 }, { "epoch": 1.8722917645136916, "grad_norm": 0.5822475979806188, "learning_rate": 2.1304589978507595e-06, "loss": 11.7647, "step": 34383 }, { "epoch": 1.8723462185102748, "grad_norm": 0.6217129123806907, "learning_rate": 2.1286488352580846e-06, "loss": 11.765, "step": 34384 }, { "epoch": 1.8724006725068578, "grad_norm": 0.6047053339299858, "learning_rate": 2.126839433729466e-06, "loss": 11.7751, "step": 34385 }, { "epoch": 1.8724551265034408, "grad_norm": 0.600128254369047, "learning_rate": 2.125030793278948e-06, "loss": 11.8939, "step": 34386 }, { "epoch": 1.8725095805000238, "grad_norm": 0.5384829704509522, "learning_rate": 2.1232229139206196e-06, "loss": 11.7675, "step": 34387 }, { "epoch": 1.8725640344966068, "grad_norm": 0.546335750772106, "learning_rate": 2.121415795668513e-06, "loss": 11.7511, "step": 34388 }, { "epoch": 1.8726184884931898, "grad_norm": 0.5903271827455143, "learning_rate": 2.119609438536685e-06, "loss": 11.7408, "step": 34389 }, { "epoch": 1.872672942489773, "grad_norm": 0.5416514137835514, "learning_rate": 2.11780384253919e-06, "loss": 11.7675, "step": 34390 }, { "epoch": 1.872727396486356, "grad_norm": 0.5236622722793727, "learning_rate": 2.1159990076900727e-06, "loss": 11.8181, "step": 34391 }, { "epoch": 1.872781850482939, "grad_norm": 0.5594087362080239, "learning_rate": 2.114194934003366e-06, "loss": 11.7708, "step": 34392 }, { "epoch": 1.872836304479522, "grad_norm": 0.5846820398290147, "learning_rate": 2.112391621493093e-06, "loss": 11.8291, "step": 34393 }, { "epoch": 1.872890758476105, "grad_norm": 0.519282890580764, "learning_rate": 2.1105890701732743e-06, "loss": 11.7426, "step": 34394 }, { "epoch": 1.872945212472688, "grad_norm": 0.5318798358420342, "learning_rate": 2.1087872800579333e-06, "loss": 11.7557, "step": 34395 }, { "epoch": 1.872999666469271, "grad_norm": 0.5705674234138456, "learning_rate": 2.1069862511610694e-06, "loss": 11.6563, "step": 34396 }, { "epoch": 1.873054120465854, "grad_norm": 0.4792590627200801, "learning_rate": 2.1051859834967156e-06, "loss": 11.6679, "step": 34397 }, { "epoch": 1.873108574462437, "grad_norm": 0.6341616288108749, "learning_rate": 2.1033864770788503e-06, "loss": 11.7423, "step": 34398 }, { "epoch": 1.87316302845902, "grad_norm": 0.5527814185344745, "learning_rate": 2.1015877319214507e-06, "loss": 11.9045, "step": 34399 }, { "epoch": 1.873217482455603, "grad_norm": 0.5782207855250743, "learning_rate": 2.0997897480385386e-06, "loss": 11.7253, "step": 34400 }, { "epoch": 1.873271936452186, "grad_norm": 0.5304157200611321, "learning_rate": 2.09799252544407e-06, "loss": 11.6856, "step": 34401 }, { "epoch": 1.873326390448769, "grad_norm": 0.5394937941037234, "learning_rate": 2.096196064152034e-06, "loss": 11.8111, "step": 34402 }, { "epoch": 1.873380844445352, "grad_norm": 0.5954734149265039, "learning_rate": 2.0944003641763966e-06, "loss": 11.843, "step": 34403 }, { "epoch": 1.873435298441935, "grad_norm": 0.551728549063149, "learning_rate": 2.0926054255311136e-06, "loss": 11.7095, "step": 34404 }, { "epoch": 1.873489752438518, "grad_norm": 0.5462890870139695, "learning_rate": 2.0908112482301622e-06, "loss": 11.7751, "step": 34405 }, { "epoch": 1.873544206435101, "grad_norm": 0.5500798009257194, "learning_rate": 2.0890178322874653e-06, "loss": 11.8971, "step": 34406 }, { "epoch": 1.873598660431684, "grad_norm": 0.6280786023691517, "learning_rate": 2.0872251777170005e-06, "loss": 11.8629, "step": 34407 }, { "epoch": 1.873653114428267, "grad_norm": 0.5237086988875659, "learning_rate": 2.0854332845327007e-06, "loss": 11.7362, "step": 34408 }, { "epoch": 1.87370756842485, "grad_norm": 0.5262139772734832, "learning_rate": 2.083642152748466e-06, "loss": 11.8662, "step": 34409 }, { "epoch": 1.873762022421433, "grad_norm": 0.5298680180649677, "learning_rate": 2.0818517823782745e-06, "loss": 11.7746, "step": 34410 }, { "epoch": 1.873816476418016, "grad_norm": 0.5205467443915855, "learning_rate": 2.0800621734360035e-06, "loss": 11.7789, "step": 34411 }, { "epoch": 1.873870930414599, "grad_norm": 0.6486251825344184, "learning_rate": 2.078273325935598e-06, "loss": 11.7703, "step": 34412 }, { "epoch": 1.8739253844111823, "grad_norm": 0.5099860177347235, "learning_rate": 2.0764852398909684e-06, "loss": 11.7349, "step": 34413 }, { "epoch": 1.8739798384077653, "grad_norm": 0.5821970470073644, "learning_rate": 2.074697915316004e-06, "loss": 11.8652, "step": 34414 }, { "epoch": 1.8740342924043483, "grad_norm": 0.5218286181112657, "learning_rate": 2.0729113522246155e-06, "loss": 11.8774, "step": 34415 }, { "epoch": 1.8740887464009313, "grad_norm": 0.5745509749204943, "learning_rate": 2.0711255506306814e-06, "loss": 11.6911, "step": 34416 }, { "epoch": 1.8741432003975143, "grad_norm": 0.6063070917365466, "learning_rate": 2.069340510548112e-06, "loss": 11.7745, "step": 34417 }, { "epoch": 1.8741976543940972, "grad_norm": 0.5818062815628678, "learning_rate": 2.067556231990775e-06, "loss": 11.7646, "step": 34418 }, { "epoch": 1.8742521083906802, "grad_norm": 0.5071365366911584, "learning_rate": 2.065772714972525e-06, "loss": 11.857, "step": 34419 }, { "epoch": 1.8743065623872632, "grad_norm": 0.5530547851562596, "learning_rate": 2.063989959507262e-06, "loss": 11.7571, "step": 34420 }, { "epoch": 1.8743610163838462, "grad_norm": 0.5200060839544695, "learning_rate": 2.062207965608831e-06, "loss": 11.7833, "step": 34421 }, { "epoch": 1.8744154703804292, "grad_norm": 0.5272580332080232, "learning_rate": 2.0604267332911096e-06, "loss": 11.6509, "step": 34422 }, { "epoch": 1.8744699243770122, "grad_norm": 0.567258798481717, "learning_rate": 2.0586462625679203e-06, "loss": 11.8684, "step": 34423 }, { "epoch": 1.8745243783735952, "grad_norm": 0.5232239501305265, "learning_rate": 2.0568665534531184e-06, "loss": 11.739, "step": 34424 }, { "epoch": 1.8745788323701782, "grad_norm": 0.5078560684131614, "learning_rate": 2.0550876059605594e-06, "loss": 11.7891, "step": 34425 }, { "epoch": 1.8746332863667612, "grad_norm": 0.5451956301968227, "learning_rate": 2.053309420104055e-06, "loss": 11.7283, "step": 34426 }, { "epoch": 1.8746877403633442, "grad_norm": 0.5816667572424823, "learning_rate": 2.0515319958974487e-06, "loss": 11.8666, "step": 34427 }, { "epoch": 1.8747421943599272, "grad_norm": 0.5185057030445498, "learning_rate": 2.0497553333545637e-06, "loss": 11.7493, "step": 34428 }, { "epoch": 1.8747966483565102, "grad_norm": 0.5482425338200828, "learning_rate": 2.0479794324891887e-06, "loss": 11.7361, "step": 34429 }, { "epoch": 1.8748511023530934, "grad_norm": 0.5393914245802071, "learning_rate": 2.046204293315168e-06, "loss": 11.7319, "step": 34430 }, { "epoch": 1.8749055563496764, "grad_norm": 0.5342755576887441, "learning_rate": 2.04442991584628e-06, "loss": 11.7162, "step": 34431 }, { "epoch": 1.8749600103462594, "grad_norm": 0.515268639026471, "learning_rate": 2.042656300096335e-06, "loss": 11.7082, "step": 34432 }, { "epoch": 1.8750144643428424, "grad_norm": 0.5224587268132403, "learning_rate": 2.0408834460791227e-06, "loss": 11.7594, "step": 34433 }, { "epoch": 1.8750689183394254, "grad_norm": 0.5892220430621732, "learning_rate": 2.0391113538084316e-06, "loss": 11.8748, "step": 34434 }, { "epoch": 1.8751233723360083, "grad_norm": 0.5619071835590458, "learning_rate": 2.037340023298029e-06, "loss": 11.742, "step": 34435 }, { "epoch": 1.8751778263325916, "grad_norm": 0.5974927532265994, "learning_rate": 2.035569454561692e-06, "loss": 11.774, "step": 34436 }, { "epoch": 1.8752322803291746, "grad_norm": 0.525811668882926, "learning_rate": 2.0337996476132214e-06, "loss": 11.7742, "step": 34437 }, { "epoch": 1.8752867343257575, "grad_norm": 0.5553818211235978, "learning_rate": 2.0320306024663395e-06, "loss": 11.7991, "step": 34438 }, { "epoch": 1.8753411883223405, "grad_norm": 0.5918921431889814, "learning_rate": 2.0302623191348126e-06, "loss": 11.8323, "step": 34439 }, { "epoch": 1.8753956423189235, "grad_norm": 0.5258447972096325, "learning_rate": 2.0284947976324077e-06, "loss": 11.7244, "step": 34440 }, { "epoch": 1.8754500963155065, "grad_norm": 0.532229743224184, "learning_rate": 2.026728037972847e-06, "loss": 11.7806, "step": 34441 }, { "epoch": 1.8755045503120895, "grad_norm": 0.5235079017539482, "learning_rate": 2.0249620401698865e-06, "loss": 11.7687, "step": 34442 }, { "epoch": 1.8755590043086725, "grad_norm": 0.6012943838447397, "learning_rate": 2.023196804237237e-06, "loss": 11.8524, "step": 34443 }, { "epoch": 1.8756134583052555, "grad_norm": 0.5462113512508756, "learning_rate": 2.021432330188655e-06, "loss": 11.7749, "step": 34444 }, { "epoch": 1.8756679123018385, "grad_norm": 0.496202303587155, "learning_rate": 2.0196686180378397e-06, "loss": 11.6893, "step": 34445 }, { "epoch": 1.8757223662984215, "grad_norm": 0.5559307539325787, "learning_rate": 2.017905667798514e-06, "loss": 11.7268, "step": 34446 }, { "epoch": 1.8757768202950045, "grad_norm": 0.513245831471796, "learning_rate": 2.0161434794843893e-06, "loss": 11.7377, "step": 34447 }, { "epoch": 1.8758312742915875, "grad_norm": 0.5683770587521315, "learning_rate": 2.014382053109165e-06, "loss": 11.793, "step": 34448 }, { "epoch": 1.8758857282881705, "grad_norm": 0.5482440403752477, "learning_rate": 2.012621388686531e-06, "loss": 11.8129, "step": 34449 }, { "epoch": 1.8759401822847535, "grad_norm": 0.5094562828130171, "learning_rate": 2.0108614862301866e-06, "loss": 11.7884, "step": 34450 }, { "epoch": 1.8759946362813364, "grad_norm": 0.6093824328480545, "learning_rate": 2.0091023457538105e-06, "loss": 11.8513, "step": 34451 }, { "epoch": 1.8760490902779194, "grad_norm": 0.535910924486717, "learning_rate": 2.0073439672711024e-06, "loss": 11.878, "step": 34452 }, { "epoch": 1.8761035442745024, "grad_norm": 0.5940137508258937, "learning_rate": 2.005586350795707e-06, "loss": 11.8551, "step": 34453 }, { "epoch": 1.8761579982710856, "grad_norm": 0.5250192951520369, "learning_rate": 2.003829496341325e-06, "loss": 11.7418, "step": 34454 }, { "epoch": 1.8762124522676686, "grad_norm": 0.5981081202272566, "learning_rate": 2.0020734039215782e-06, "loss": 11.7875, "step": 34455 }, { "epoch": 1.8762669062642516, "grad_norm": 0.6114501315845203, "learning_rate": 2.000318073550156e-06, "loss": 11.8459, "step": 34456 }, { "epoch": 1.8763213602608346, "grad_norm": 0.4906140205056725, "learning_rate": 1.9985635052406915e-06, "loss": 11.7518, "step": 34457 }, { "epoch": 1.8763758142574176, "grad_norm": 0.5900485469254249, "learning_rate": 1.99680969900683e-06, "loss": 11.8525, "step": 34458 }, { "epoch": 1.8764302682540008, "grad_norm": 0.5923781937237657, "learning_rate": 1.995056654862215e-06, "loss": 11.7313, "step": 34459 }, { "epoch": 1.8764847222505838, "grad_norm": 0.5422296100505702, "learning_rate": 1.9933043728204702e-06, "loss": 11.7604, "step": 34460 }, { "epoch": 1.8765391762471668, "grad_norm": 0.4869438152224834, "learning_rate": 1.9915528528952175e-06, "loss": 11.6852, "step": 34461 }, { "epoch": 1.8765936302437498, "grad_norm": 0.5031597530798516, "learning_rate": 1.9898020951001016e-06, "loss": 11.7802, "step": 34462 }, { "epoch": 1.8766480842403328, "grad_norm": 0.5869418858344352, "learning_rate": 1.9880520994487115e-06, "loss": 11.7233, "step": 34463 }, { "epoch": 1.8767025382369158, "grad_norm": 0.5768040134503095, "learning_rate": 1.986302865954681e-06, "loss": 11.7805, "step": 34464 }, { "epoch": 1.8767569922334988, "grad_norm": 0.5407664945084442, "learning_rate": 1.984554394631577e-06, "loss": 11.7714, "step": 34465 }, { "epoch": 1.8768114462300818, "grad_norm": 0.5181589858084656, "learning_rate": 1.9828066854930328e-06, "loss": 11.8283, "step": 34466 }, { "epoch": 1.8768659002266648, "grad_norm": 0.5918453354931499, "learning_rate": 1.981059738552604e-06, "loss": 11.7435, "step": 34467 }, { "epoch": 1.8769203542232478, "grad_norm": 0.6205678159164849, "learning_rate": 1.9793135538239028e-06, "loss": 11.8655, "step": 34468 }, { "epoch": 1.8769748082198308, "grad_norm": 0.5245735247157259, "learning_rate": 1.9775681313204953e-06, "loss": 11.8524, "step": 34469 }, { "epoch": 1.8770292622164138, "grad_norm": 0.5423945105094565, "learning_rate": 1.9758234710559485e-06, "loss": 11.7362, "step": 34470 }, { "epoch": 1.8770837162129967, "grad_norm": 0.5091349420645608, "learning_rate": 1.9740795730438524e-06, "loss": 11.7449, "step": 34471 }, { "epoch": 1.8771381702095797, "grad_norm": 0.5295070002646127, "learning_rate": 1.9723364372977394e-06, "loss": 11.6581, "step": 34472 }, { "epoch": 1.8771926242061627, "grad_norm": 0.5319138956302755, "learning_rate": 1.970594063831177e-06, "loss": 11.7964, "step": 34473 }, { "epoch": 1.8772470782027457, "grad_norm": 0.564039425531116, "learning_rate": 1.9688524526577213e-06, "loss": 11.6605, "step": 34474 }, { "epoch": 1.8773015321993287, "grad_norm": 0.5564437896006181, "learning_rate": 1.9671116037909056e-06, "loss": 11.6495, "step": 34475 }, { "epoch": 1.8773559861959117, "grad_norm": 0.6194062080551602, "learning_rate": 1.9653715172442743e-06, "loss": 11.7993, "step": 34476 }, { "epoch": 1.877410440192495, "grad_norm": 0.6026581706880969, "learning_rate": 1.9636321930313507e-06, "loss": 11.8409, "step": 34477 }, { "epoch": 1.877464894189078, "grad_norm": 0.5308738460868556, "learning_rate": 1.961893631165668e-06, "loss": 11.8138, "step": 34478 }, { "epoch": 1.877519348185661, "grad_norm": 0.545804917861025, "learning_rate": 1.9601558316607482e-06, "loss": 11.7917, "step": 34479 }, { "epoch": 1.877573802182244, "grad_norm": 0.6841642340268604, "learning_rate": 1.9584187945300812e-06, "loss": 11.793, "step": 34480 }, { "epoch": 1.877628256178827, "grad_norm": 0.5817327235030416, "learning_rate": 1.9566825197872007e-06, "loss": 11.8931, "step": 34481 }, { "epoch": 1.8776827101754099, "grad_norm": 0.5477205931273549, "learning_rate": 1.9549470074455957e-06, "loss": 11.7111, "step": 34482 }, { "epoch": 1.877737164171993, "grad_norm": 0.5515966502652739, "learning_rate": 1.9532122575187663e-06, "loss": 11.8316, "step": 34483 }, { "epoch": 1.877791618168576, "grad_norm": 0.4810358389520921, "learning_rate": 1.9514782700202018e-06, "loss": 11.7007, "step": 34484 }, { "epoch": 1.877846072165159, "grad_norm": 0.5236914559142065, "learning_rate": 1.94974504496338e-06, "loss": 11.7711, "step": 34485 }, { "epoch": 1.877900526161742, "grad_norm": 0.5716203529750217, "learning_rate": 1.948012582361791e-06, "loss": 11.8149, "step": 34486 }, { "epoch": 1.877954980158325, "grad_norm": 0.5472489722211471, "learning_rate": 1.94628088222889e-06, "loss": 11.862, "step": 34487 }, { "epoch": 1.878009434154908, "grad_norm": 0.5891219500076248, "learning_rate": 1.9445499445781666e-06, "loss": 11.7981, "step": 34488 }, { "epoch": 1.878063888151491, "grad_norm": 0.5271053617140028, "learning_rate": 1.9428197694230543e-06, "loss": 11.7337, "step": 34489 }, { "epoch": 1.878118342148074, "grad_norm": 0.5517514029627272, "learning_rate": 1.94109035677702e-06, "loss": 11.9251, "step": 34490 }, { "epoch": 1.878172796144657, "grad_norm": 0.5427870678584611, "learning_rate": 1.9393617066535196e-06, "loss": 11.676, "step": 34491 }, { "epoch": 1.87822725014124, "grad_norm": 0.5426510252194713, "learning_rate": 1.937633819065987e-06, "loss": 11.7591, "step": 34492 }, { "epoch": 1.878281704137823, "grad_norm": 0.5740285832375813, "learning_rate": 1.935906694027856e-06, "loss": 11.6884, "step": 34493 }, { "epoch": 1.878336158134406, "grad_norm": 0.5414019091039056, "learning_rate": 1.9341803315525488e-06, "loss": 11.6635, "step": 34494 }, { "epoch": 1.878390612130989, "grad_norm": 0.5739667806560893, "learning_rate": 1.9324547316535104e-06, "loss": 11.7823, "step": 34495 }, { "epoch": 1.878445066127572, "grad_norm": 0.5395552988286576, "learning_rate": 1.9307298943441523e-06, "loss": 11.7178, "step": 34496 }, { "epoch": 1.878499520124155, "grad_norm": 0.4898450664483993, "learning_rate": 1.929005819637886e-06, "loss": 11.7795, "step": 34497 }, { "epoch": 1.878553974120738, "grad_norm": 0.4883211430185245, "learning_rate": 1.9272825075481226e-06, "loss": 11.73, "step": 34498 }, { "epoch": 1.878608428117321, "grad_norm": 0.5283353580397148, "learning_rate": 1.925559958088241e-06, "loss": 11.7586, "step": 34499 }, { "epoch": 1.8786628821139042, "grad_norm": 0.5234978536808704, "learning_rate": 1.9238381712716636e-06, "loss": 11.9098, "step": 34500 }, { "epoch": 1.8787173361104872, "grad_norm": 0.5749481724090452, "learning_rate": 1.9221171471117684e-06, "loss": 11.7901, "step": 34501 }, { "epoch": 1.8787717901070702, "grad_norm": 0.5375052414148336, "learning_rate": 1.9203968856219224e-06, "loss": 11.6708, "step": 34502 }, { "epoch": 1.8788262441036532, "grad_norm": 0.5686279822901938, "learning_rate": 1.918677386815537e-06, "loss": 11.9018, "step": 34503 }, { "epoch": 1.8788806981002362, "grad_norm": 0.5093394053034656, "learning_rate": 1.9169586507059577e-06, "loss": 11.8013, "step": 34504 }, { "epoch": 1.8789351520968192, "grad_norm": 0.571703094684345, "learning_rate": 1.9152406773065513e-06, "loss": 11.8528, "step": 34505 }, { "epoch": 1.8789896060934024, "grad_norm": 0.5498325117636943, "learning_rate": 1.9135234666306844e-06, "loss": 11.6962, "step": 34506 }, { "epoch": 1.8790440600899854, "grad_norm": 0.5639003883480723, "learning_rate": 1.9118070186917137e-06, "loss": 11.8434, "step": 34507 }, { "epoch": 1.8790985140865684, "grad_norm": 0.5960834375712886, "learning_rate": 1.9100913335029833e-06, "loss": 11.8433, "step": 34508 }, { "epoch": 1.8791529680831514, "grad_norm": 0.5022991779269027, "learning_rate": 1.908376411077828e-06, "loss": 11.5936, "step": 34509 }, { "epoch": 1.8792074220797343, "grad_norm": 0.5216442959501958, "learning_rate": 1.9066622514295807e-06, "loss": 11.7873, "step": 34510 }, { "epoch": 1.8792618760763173, "grad_norm": 0.5401386107197366, "learning_rate": 1.9049488545715865e-06, "loss": 11.8959, "step": 34511 }, { "epoch": 1.8793163300729003, "grad_norm": 0.5106782080787051, "learning_rate": 1.9032362205171572e-06, "loss": 11.7131, "step": 34512 }, { "epoch": 1.8793707840694833, "grad_norm": 0.5643095554772389, "learning_rate": 1.9015243492796154e-06, "loss": 11.7519, "step": 34513 }, { "epoch": 1.8794252380660663, "grad_norm": 0.5136996016727898, "learning_rate": 1.8998132408722724e-06, "loss": 11.7354, "step": 34514 }, { "epoch": 1.8794796920626493, "grad_norm": 0.5778985080824569, "learning_rate": 1.898102895308429e-06, "loss": 11.7375, "step": 34515 }, { "epoch": 1.8795341460592323, "grad_norm": 0.548016986559713, "learning_rate": 1.8963933126013856e-06, "loss": 11.6967, "step": 34516 }, { "epoch": 1.8795886000558153, "grad_norm": 0.5332376768984646, "learning_rate": 1.8946844927644425e-06, "loss": 11.625, "step": 34517 }, { "epoch": 1.8796430540523983, "grad_norm": 0.5273766765887075, "learning_rate": 1.8929764358109003e-06, "loss": 11.7244, "step": 34518 }, { "epoch": 1.8796975080489813, "grad_norm": 0.5529812715269194, "learning_rate": 1.8912691417540152e-06, "loss": 11.5003, "step": 34519 }, { "epoch": 1.8797519620455643, "grad_norm": 0.5449124996821897, "learning_rate": 1.8895626106070763e-06, "loss": 11.8011, "step": 34520 }, { "epoch": 1.8798064160421473, "grad_norm": 0.5784908225647555, "learning_rate": 1.88785684238334e-06, "loss": 11.8468, "step": 34521 }, { "epoch": 1.8798608700387303, "grad_norm": 0.5575582693831567, "learning_rate": 1.886151837096084e-06, "loss": 11.7628, "step": 34522 }, { "epoch": 1.8799153240353133, "grad_norm": 0.5495828736806819, "learning_rate": 1.884447594758576e-06, "loss": 11.7958, "step": 34523 }, { "epoch": 1.8799697780318965, "grad_norm": 0.5294812376046137, "learning_rate": 1.8827441153840496e-06, "loss": 11.7934, "step": 34524 }, { "epoch": 1.8800242320284795, "grad_norm": 0.527139784511984, "learning_rate": 1.8810413989857722e-06, "loss": 11.7265, "step": 34525 }, { "epoch": 1.8800786860250625, "grad_norm": 0.551246936249197, "learning_rate": 1.8793394455769552e-06, "loss": 11.6954, "step": 34526 }, { "epoch": 1.8801331400216454, "grad_norm": 0.5171860086676685, "learning_rate": 1.8776382551708548e-06, "loss": 11.7661, "step": 34527 }, { "epoch": 1.8801875940182284, "grad_norm": 0.5310445627915399, "learning_rate": 1.875937827780705e-06, "loss": 11.702, "step": 34528 }, { "epoch": 1.8802420480148117, "grad_norm": 0.541717433002239, "learning_rate": 1.874238163419706e-06, "loss": 11.693, "step": 34529 }, { "epoch": 1.8802965020113946, "grad_norm": 0.5230157604658559, "learning_rate": 1.8725392621010917e-06, "loss": 11.9111, "step": 34530 }, { "epoch": 1.8803509560079776, "grad_norm": 0.5148423224131015, "learning_rate": 1.870841123838063e-06, "loss": 11.6381, "step": 34531 }, { "epoch": 1.8804054100045606, "grad_norm": 0.5341492950235581, "learning_rate": 1.8691437486438313e-06, "loss": 11.8082, "step": 34532 }, { "epoch": 1.8804598640011436, "grad_norm": 0.575986502869782, "learning_rate": 1.867447136531597e-06, "loss": 11.7954, "step": 34533 }, { "epoch": 1.8805143179977266, "grad_norm": 0.5303804944689541, "learning_rate": 1.8657512875145388e-06, "loss": 11.7158, "step": 34534 }, { "epoch": 1.8805687719943096, "grad_norm": 0.5393704055071213, "learning_rate": 1.8640562016058794e-06, "loss": 11.7661, "step": 34535 }, { "epoch": 1.8806232259908926, "grad_norm": 0.5307814562574126, "learning_rate": 1.8623618788187524e-06, "loss": 11.7323, "step": 34536 }, { "epoch": 1.8806776799874756, "grad_norm": 0.5245617896281497, "learning_rate": 1.860668319166381e-06, "loss": 11.6104, "step": 34537 }, { "epoch": 1.8807321339840586, "grad_norm": 0.5890252640452627, "learning_rate": 1.8589755226618987e-06, "loss": 11.7918, "step": 34538 }, { "epoch": 1.8807865879806416, "grad_norm": 0.639612477749031, "learning_rate": 1.857283489318473e-06, "loss": 11.7559, "step": 34539 }, { "epoch": 1.8808410419772246, "grad_norm": 0.5893184885080097, "learning_rate": 1.8555922191492825e-06, "loss": 11.7902, "step": 34540 }, { "epoch": 1.8808954959738076, "grad_norm": 0.542572726714157, "learning_rate": 1.8539017121674495e-06, "loss": 11.7784, "step": 34541 }, { "epoch": 1.8809499499703906, "grad_norm": 0.5043011800731687, "learning_rate": 1.8522119683861528e-06, "loss": 11.7943, "step": 34542 }, { "epoch": 1.8810044039669735, "grad_norm": 0.5480161379186976, "learning_rate": 1.8505229878185038e-06, "loss": 11.7191, "step": 34543 }, { "epoch": 1.8810588579635565, "grad_norm": 0.5861594521838982, "learning_rate": 1.8488347704776477e-06, "loss": 11.9703, "step": 34544 }, { "epoch": 1.8811133119601395, "grad_norm": 0.513443270616714, "learning_rate": 1.8471473163767295e-06, "loss": 11.7988, "step": 34545 }, { "epoch": 1.8811677659567225, "grad_norm": 0.5734974278795244, "learning_rate": 1.8454606255288386e-06, "loss": 11.6857, "step": 34546 }, { "epoch": 1.8812222199533057, "grad_norm": 0.601132744975943, "learning_rate": 1.8437746979471093e-06, "loss": 11.8225, "step": 34547 }, { "epoch": 1.8812766739498887, "grad_norm": 0.4990396662536802, "learning_rate": 1.842089533644653e-06, "loss": 11.7413, "step": 34548 }, { "epoch": 1.8813311279464717, "grad_norm": 0.5113258685520127, "learning_rate": 1.8404051326345596e-06, "loss": 11.7624, "step": 34549 }, { "epoch": 1.8813855819430547, "grad_norm": 0.5341688777125242, "learning_rate": 1.8387214949299514e-06, "loss": 11.6345, "step": 34550 }, { "epoch": 1.8814400359396377, "grad_norm": 0.531137488453673, "learning_rate": 1.837038620543896e-06, "loss": 11.7813, "step": 34551 }, { "epoch": 1.8814944899362207, "grad_norm": 0.5573666641696239, "learning_rate": 1.8353565094894941e-06, "loss": 11.8014, "step": 34552 }, { "epoch": 1.881548943932804, "grad_norm": 0.5511885283203259, "learning_rate": 1.8336751617798132e-06, "loss": 11.7881, "step": 34553 }, { "epoch": 1.881603397929387, "grad_norm": 0.5616453379529558, "learning_rate": 1.8319945774279534e-06, "loss": 11.8446, "step": 34554 }, { "epoch": 1.88165785192597, "grad_norm": 0.5507030940031911, "learning_rate": 1.8303147564469492e-06, "loss": 11.7357, "step": 34555 }, { "epoch": 1.881712305922553, "grad_norm": 0.49553125219301664, "learning_rate": 1.8286356988498898e-06, "loss": 11.6769, "step": 34556 }, { "epoch": 1.881766759919136, "grad_norm": 0.5741465435718031, "learning_rate": 1.8269574046498205e-06, "loss": 11.8025, "step": 34557 }, { "epoch": 1.8818212139157189, "grad_norm": 0.520782133536226, "learning_rate": 1.8252798738597866e-06, "loss": 11.7877, "step": 34558 }, { "epoch": 1.8818756679123019, "grad_norm": 0.574023474605489, "learning_rate": 1.8236031064928548e-06, "loss": 11.7481, "step": 34559 }, { "epoch": 1.8819301219088849, "grad_norm": 0.5782810539166594, "learning_rate": 1.8219271025620489e-06, "loss": 11.833, "step": 34560 }, { "epoch": 1.8819845759054679, "grad_norm": 0.6411418693401827, "learning_rate": 1.820251862080391e-06, "loss": 11.714, "step": 34561 }, { "epoch": 1.8820390299020509, "grad_norm": 0.5110308301288894, "learning_rate": 1.8185773850609267e-06, "loss": 11.7653, "step": 34562 }, { "epoch": 1.8820934838986338, "grad_norm": 0.5322964441196149, "learning_rate": 1.8169036715166677e-06, "loss": 11.744, "step": 34563 }, { "epoch": 1.8821479378952168, "grad_norm": 0.6191140639186177, "learning_rate": 1.8152307214606368e-06, "loss": 11.7733, "step": 34564 }, { "epoch": 1.8822023918917998, "grad_norm": 0.5484913916318791, "learning_rate": 1.8135585349058236e-06, "loss": 11.7513, "step": 34565 }, { "epoch": 1.8822568458883828, "grad_norm": 0.5840247447273171, "learning_rate": 1.8118871118652515e-06, "loss": 11.8865, "step": 34566 }, { "epoch": 1.8823112998849658, "grad_norm": 0.48549691500511627, "learning_rate": 1.8102164523519206e-06, "loss": 11.7408, "step": 34567 }, { "epoch": 1.8823657538815488, "grad_norm": 0.5255324341211323, "learning_rate": 1.8085465563787985e-06, "loss": 11.8428, "step": 34568 }, { "epoch": 1.8824202078781318, "grad_norm": 0.5819003900039607, "learning_rate": 1.8068774239589082e-06, "loss": 11.8125, "step": 34569 }, { "epoch": 1.882474661874715, "grad_norm": 0.5291091749693664, "learning_rate": 1.8052090551051837e-06, "loss": 11.8378, "step": 34570 }, { "epoch": 1.882529115871298, "grad_norm": 0.5638550577042505, "learning_rate": 1.8035414498306258e-06, "loss": 11.5874, "step": 34571 }, { "epoch": 1.882583569867881, "grad_norm": 0.5451776922628029, "learning_rate": 1.8018746081482018e-06, "loss": 11.68, "step": 34572 }, { "epoch": 1.882638023864464, "grad_norm": 0.6496991930191914, "learning_rate": 1.800208530070857e-06, "loss": 11.758, "step": 34573 }, { "epoch": 1.882692477861047, "grad_norm": 0.5364858740991691, "learning_rate": 1.798543215611581e-06, "loss": 11.7761, "step": 34574 }, { "epoch": 1.88274693185763, "grad_norm": 0.556211449417477, "learning_rate": 1.7968786647832747e-06, "loss": 11.8209, "step": 34575 }, { "epoch": 1.8828013858542132, "grad_norm": 0.5515509456259532, "learning_rate": 1.7952148775989275e-06, "loss": 11.8054, "step": 34576 }, { "epoch": 1.8828558398507962, "grad_norm": 0.6478233205323938, "learning_rate": 1.7935518540714514e-06, "loss": 11.8756, "step": 34577 }, { "epoch": 1.8829102938473792, "grad_norm": 0.5265629244130572, "learning_rate": 1.7918895942137804e-06, "loss": 11.7566, "step": 34578 }, { "epoch": 1.8829647478439622, "grad_norm": 0.5120578431869297, "learning_rate": 1.7902280980388596e-06, "loss": 11.676, "step": 34579 }, { "epoch": 1.8830192018405452, "grad_norm": 0.5100364969390428, "learning_rate": 1.7885673655595902e-06, "loss": 11.8194, "step": 34580 }, { "epoch": 1.8830736558371282, "grad_norm": 0.6390320572686244, "learning_rate": 1.7869073967888839e-06, "loss": 11.8057, "step": 34581 }, { "epoch": 1.8831281098337112, "grad_norm": 0.6243437268660795, "learning_rate": 1.7852481917396636e-06, "loss": 11.8041, "step": 34582 }, { "epoch": 1.8831825638302941, "grad_norm": 0.5538963090657304, "learning_rate": 1.7835897504248078e-06, "loss": 11.7161, "step": 34583 }, { "epoch": 1.8832370178268771, "grad_norm": 0.5899599698293884, "learning_rate": 1.7819320728572508e-06, "loss": 11.8435, "step": 34584 }, { "epoch": 1.8832914718234601, "grad_norm": 0.5630917006394409, "learning_rate": 1.780275159049849e-06, "loss": 11.7576, "step": 34585 }, { "epoch": 1.8833459258200431, "grad_norm": 0.5680457632757285, "learning_rate": 1.778619009015503e-06, "loss": 11.817, "step": 34586 }, { "epoch": 1.8834003798166261, "grad_norm": 0.6171949631287466, "learning_rate": 1.7769636227670805e-06, "loss": 11.7812, "step": 34587 }, { "epoch": 1.883454833813209, "grad_norm": 0.5608462336342018, "learning_rate": 1.7753090003174711e-06, "loss": 11.6799, "step": 34588 }, { "epoch": 1.883509287809792, "grad_norm": 0.614067725318175, "learning_rate": 1.773655141679531e-06, "loss": 11.876, "step": 34589 }, { "epoch": 1.883563741806375, "grad_norm": 0.5843568433323433, "learning_rate": 1.772002046866117e-06, "loss": 11.8226, "step": 34590 }, { "epoch": 1.883618195802958, "grad_norm": 0.5584145875563088, "learning_rate": 1.770349715890085e-06, "loss": 11.727, "step": 34591 }, { "epoch": 1.883672649799541, "grad_norm": 0.553526472848038, "learning_rate": 1.7686981487642918e-06, "loss": 12.0033, "step": 34592 }, { "epoch": 1.8837271037961243, "grad_norm": 0.5529915250229152, "learning_rate": 1.76704734550156e-06, "loss": 11.7626, "step": 34593 }, { "epoch": 1.8837815577927073, "grad_norm": 0.5301679117743063, "learning_rate": 1.7653973061147688e-06, "loss": 11.8459, "step": 34594 }, { "epoch": 1.8838360117892903, "grad_norm": 0.6135710586873316, "learning_rate": 1.7637480306166964e-06, "loss": 11.8688, "step": 34595 }, { "epoch": 1.8838904657858733, "grad_norm": 0.511829590838322, "learning_rate": 1.7620995190202105e-06, "loss": 11.7596, "step": 34596 }, { "epoch": 1.8839449197824563, "grad_norm": 0.507604732791635, "learning_rate": 1.7604517713381008e-06, "loss": 11.7183, "step": 34597 }, { "epoch": 1.8839993737790393, "grad_norm": 0.5305830915378384, "learning_rate": 1.7588047875832013e-06, "loss": 11.7633, "step": 34598 }, { "epoch": 1.8840538277756225, "grad_norm": 0.5432931869801424, "learning_rate": 1.7571585677683133e-06, "loss": 11.8153, "step": 34599 }, { "epoch": 1.8841082817722055, "grad_norm": 0.585687679615243, "learning_rate": 1.7555131119062374e-06, "loss": 11.7612, "step": 34600 }, { "epoch": 1.8841627357687885, "grad_norm": 0.5167908151283527, "learning_rate": 1.7538684200097632e-06, "loss": 11.7514, "step": 34601 }, { "epoch": 1.8842171897653714, "grad_norm": 0.5467397447246971, "learning_rate": 1.7522244920916698e-06, "loss": 11.8294, "step": 34602 }, { "epoch": 1.8842716437619544, "grad_norm": 0.6097082491675548, "learning_rate": 1.7505813281647797e-06, "loss": 11.8247, "step": 34603 }, { "epoch": 1.8843260977585374, "grad_norm": 0.5857637570024662, "learning_rate": 1.7489389282418278e-06, "loss": 11.7459, "step": 34604 }, { "epoch": 1.8843805517551204, "grad_norm": 0.5376543082440299, "learning_rate": 1.7472972923356035e-06, "loss": 11.845, "step": 34605 }, { "epoch": 1.8844350057517034, "grad_norm": 0.5574582469953928, "learning_rate": 1.7456564204588854e-06, "loss": 11.8851, "step": 34606 }, { "epoch": 1.8844894597482864, "grad_norm": 0.5628027342243649, "learning_rate": 1.744016312624408e-06, "loss": 11.8243, "step": 34607 }, { "epoch": 1.8845439137448694, "grad_norm": 0.4962840768947248, "learning_rate": 1.7423769688449388e-06, "loss": 11.7812, "step": 34608 }, { "epoch": 1.8845983677414524, "grad_norm": 0.5508468887795229, "learning_rate": 1.740738389133234e-06, "loss": 11.8607, "step": 34609 }, { "epoch": 1.8846528217380354, "grad_norm": 0.5677877786463065, "learning_rate": 1.7391005735020172e-06, "loss": 11.6974, "step": 34610 }, { "epoch": 1.8847072757346184, "grad_norm": 0.6195100329471449, "learning_rate": 1.7374635219640334e-06, "loss": 11.756, "step": 34611 }, { "epoch": 1.8847617297312014, "grad_norm": 0.5438648655468336, "learning_rate": 1.735827234532006e-06, "loss": 11.7925, "step": 34612 }, { "epoch": 1.8848161837277844, "grad_norm": 0.532194074554113, "learning_rate": 1.7341917112186801e-06, "loss": 11.7477, "step": 34613 }, { "epoch": 1.8848706377243674, "grad_norm": 0.5519866347819824, "learning_rate": 1.7325569520367458e-06, "loss": 11.6991, "step": 34614 }, { "epoch": 1.8849250917209504, "grad_norm": 0.5740721307059174, "learning_rate": 1.730922956998926e-06, "loss": 11.6997, "step": 34615 }, { "epoch": 1.8849795457175333, "grad_norm": 0.5505885728565861, "learning_rate": 1.7292897261179442e-06, "loss": 11.6975, "step": 34616 }, { "epoch": 1.8850339997141166, "grad_norm": 0.5209356291069546, "learning_rate": 1.7276572594064677e-06, "loss": 11.7307, "step": 34617 }, { "epoch": 1.8850884537106996, "grad_norm": 0.5481467800813334, "learning_rate": 1.72602555687722e-06, "loss": 11.6987, "step": 34618 }, { "epoch": 1.8851429077072825, "grad_norm": 0.5774504452202481, "learning_rate": 1.7243946185428794e-06, "loss": 11.8261, "step": 34619 }, { "epoch": 1.8851973617038655, "grad_norm": 0.5343436440407358, "learning_rate": 1.722764444416125e-06, "loss": 11.7185, "step": 34620 }, { "epoch": 1.8852518157004485, "grad_norm": 0.6282483195123607, "learning_rate": 1.7211350345096355e-06, "loss": 11.5196, "step": 34621 }, { "epoch": 1.8853062696970315, "grad_norm": 0.5681866131835598, "learning_rate": 1.7195063888360786e-06, "loss": 11.767, "step": 34622 }, { "epoch": 1.8853607236936147, "grad_norm": 0.5812783220152261, "learning_rate": 1.7178785074081216e-06, "loss": 11.9382, "step": 34623 }, { "epoch": 1.8854151776901977, "grad_norm": 0.526674766439873, "learning_rate": 1.7162513902384214e-06, "loss": 11.6523, "step": 34624 }, { "epoch": 1.8854696316867807, "grad_norm": 0.541344562149101, "learning_rate": 1.7146250373396455e-06, "loss": 11.8034, "step": 34625 }, { "epoch": 1.8855240856833637, "grad_norm": 0.5301244783231273, "learning_rate": 1.7129994487244061e-06, "loss": 11.6972, "step": 34626 }, { "epoch": 1.8855785396799467, "grad_norm": 0.5027119398328848, "learning_rate": 1.7113746244053818e-06, "loss": 11.7451, "step": 34627 }, { "epoch": 1.8856329936765297, "grad_norm": 0.5125000611931756, "learning_rate": 1.709750564395185e-06, "loss": 11.7177, "step": 34628 }, { "epoch": 1.8856874476731127, "grad_norm": 0.5237211434302128, "learning_rate": 1.7081272687064609e-06, "loss": 11.765, "step": 34629 }, { "epoch": 1.8857419016696957, "grad_norm": 0.639306643617262, "learning_rate": 1.7065047373518105e-06, "loss": 11.6949, "step": 34630 }, { "epoch": 1.8857963556662787, "grad_norm": 0.5658080562396036, "learning_rate": 1.7048829703438685e-06, "loss": 11.7816, "step": 34631 }, { "epoch": 1.8858508096628617, "grad_norm": 0.5362130035284358, "learning_rate": 1.7032619676952356e-06, "loss": 11.815, "step": 34632 }, { "epoch": 1.8859052636594447, "grad_norm": 0.5027667432043391, "learning_rate": 1.7016417294185349e-06, "loss": 11.6848, "step": 34633 }, { "epoch": 1.8859597176560277, "grad_norm": 0.6115580945655306, "learning_rate": 1.7000222555263346e-06, "loss": 11.7466, "step": 34634 }, { "epoch": 1.8860141716526106, "grad_norm": 0.5362984113650079, "learning_rate": 1.6984035460312687e-06, "loss": 11.7809, "step": 34635 }, { "epoch": 1.8860686256491936, "grad_norm": 0.5479685299225547, "learning_rate": 1.6967856009458826e-06, "loss": 11.913, "step": 34636 }, { "epoch": 1.8861230796457766, "grad_norm": 0.5556102697829234, "learning_rate": 1.6951684202827888e-06, "loss": 11.8064, "step": 34637 }, { "epoch": 1.8861775336423596, "grad_norm": 0.5221456171550668, "learning_rate": 1.6935520040545328e-06, "loss": 11.798, "step": 34638 }, { "epoch": 1.8862319876389426, "grad_norm": 0.5436265338598268, "learning_rate": 1.6919363522737263e-06, "loss": 11.7613, "step": 34639 }, { "epoch": 1.8862864416355258, "grad_norm": 0.5368084362797071, "learning_rate": 1.6903214649529043e-06, "loss": 11.8183, "step": 34640 }, { "epoch": 1.8863408956321088, "grad_norm": 0.6089297506825833, "learning_rate": 1.6887073421046117e-06, "loss": 11.7475, "step": 34641 }, { "epoch": 1.8863953496286918, "grad_norm": 0.5182800471381385, "learning_rate": 1.6870939837414278e-06, "loss": 11.7802, "step": 34642 }, { "epoch": 1.8864498036252748, "grad_norm": 0.5707836716971906, "learning_rate": 1.685481389875887e-06, "loss": 11.8407, "step": 34643 }, { "epoch": 1.8865042576218578, "grad_norm": 0.5300192048478521, "learning_rate": 1.6838695605205346e-06, "loss": 11.6704, "step": 34644 }, { "epoch": 1.8865587116184408, "grad_norm": 0.5188660417075077, "learning_rate": 1.682258495687905e-06, "loss": 11.7235, "step": 34645 }, { "epoch": 1.886613165615024, "grad_norm": 0.571706838943987, "learning_rate": 1.6806481953905106e-06, "loss": 11.7495, "step": 34646 }, { "epoch": 1.886667619611607, "grad_norm": 0.531707708010136, "learning_rate": 1.6790386596408858e-06, "loss": 11.6087, "step": 34647 }, { "epoch": 1.88672207360819, "grad_norm": 0.5221425359013574, "learning_rate": 1.6774298884515427e-06, "loss": 11.7859, "step": 34648 }, { "epoch": 1.886776527604773, "grad_norm": 0.512961130352283, "learning_rate": 1.6758218818350046e-06, "loss": 11.7506, "step": 34649 }, { "epoch": 1.886830981601356, "grad_norm": 0.5830322974777441, "learning_rate": 1.6742146398037617e-06, "loss": 11.8445, "step": 34650 }, { "epoch": 1.886885435597939, "grad_norm": 0.5815686863134284, "learning_rate": 1.6726081623703038e-06, "loss": 11.8466, "step": 34651 }, { "epoch": 1.886939889594522, "grad_norm": 0.5212978927462005, "learning_rate": 1.6710024495471433e-06, "loss": 11.7543, "step": 34652 }, { "epoch": 1.886994343591105, "grad_norm": 0.5131432598187302, "learning_rate": 1.6693975013467478e-06, "loss": 11.7428, "step": 34653 }, { "epoch": 1.887048797587688, "grad_norm": 0.5679582808671314, "learning_rate": 1.6677933177816184e-06, "loss": 11.8561, "step": 34654 }, { "epoch": 1.887103251584271, "grad_norm": 0.5255067247519414, "learning_rate": 1.6661898988642123e-06, "loss": 11.7917, "step": 34655 }, { "epoch": 1.887157705580854, "grad_norm": 0.563327143505627, "learning_rate": 1.6645872446070078e-06, "loss": 11.8587, "step": 34656 }, { "epoch": 1.887212159577437, "grad_norm": 0.5254916221724062, "learning_rate": 1.6629853550224618e-06, "loss": 11.7731, "step": 34657 }, { "epoch": 1.88726661357402, "grad_norm": 0.569864926068086, "learning_rate": 1.661384230123031e-06, "loss": 11.6328, "step": 34658 }, { "epoch": 1.887321067570603, "grad_norm": 0.5249439267573294, "learning_rate": 1.659783869921172e-06, "loss": 11.7202, "step": 34659 }, { "epoch": 1.887375521567186, "grad_norm": 0.514860100244056, "learning_rate": 1.6581842744293307e-06, "loss": 11.8043, "step": 34660 }, { "epoch": 1.887429975563769, "grad_norm": 0.5258641290207511, "learning_rate": 1.6565854436599303e-06, "loss": 11.7038, "step": 34661 }, { "epoch": 1.887484429560352, "grad_norm": 0.5452153717444679, "learning_rate": 1.6549873776254166e-06, "loss": 11.8005, "step": 34662 }, { "epoch": 1.887538883556935, "grad_norm": 0.5488223450922112, "learning_rate": 1.6533900763382125e-06, "loss": 11.7307, "step": 34663 }, { "epoch": 1.887593337553518, "grad_norm": 0.5598097191485137, "learning_rate": 1.651793539810742e-06, "loss": 11.7573, "step": 34664 }, { "epoch": 1.887647791550101, "grad_norm": 0.5145815345171996, "learning_rate": 1.650197768055417e-06, "loss": 11.7986, "step": 34665 }, { "epoch": 1.887702245546684, "grad_norm": 0.5233328645199831, "learning_rate": 1.6486027610846499e-06, "loss": 11.7454, "step": 34666 }, { "epoch": 1.887756699543267, "grad_norm": 0.5236644100545705, "learning_rate": 1.647008518910842e-06, "loss": 11.7264, "step": 34667 }, { "epoch": 1.88781115353985, "grad_norm": 0.48575464511707533, "learning_rate": 1.6454150415463832e-06, "loss": 11.695, "step": 34668 }, { "epoch": 1.8878656075364333, "grad_norm": 0.5971994981377021, "learning_rate": 1.6438223290036747e-06, "loss": 11.7124, "step": 34669 }, { "epoch": 1.8879200615330163, "grad_norm": 0.5287751875479139, "learning_rate": 1.6422303812951068e-06, "loss": 11.7466, "step": 34670 }, { "epoch": 1.8879745155295993, "grad_norm": 0.5880344382484398, "learning_rate": 1.640639198433036e-06, "loss": 11.9114, "step": 34671 }, { "epoch": 1.8880289695261823, "grad_norm": 0.5139600890663518, "learning_rate": 1.6390487804298527e-06, "loss": 11.7664, "step": 34672 }, { "epoch": 1.8880834235227653, "grad_norm": 0.5631639749303369, "learning_rate": 1.6374591272979244e-06, "loss": 11.7814, "step": 34673 }, { "epoch": 1.8881378775193483, "grad_norm": 0.5110284170270594, "learning_rate": 1.6358702390496084e-06, "loss": 11.6061, "step": 34674 }, { "epoch": 1.8881923315159312, "grad_norm": 0.5624146109225929, "learning_rate": 1.634282115697261e-06, "loss": 11.8418, "step": 34675 }, { "epoch": 1.8882467855125142, "grad_norm": 0.5334597198508444, "learning_rate": 1.6326947572532281e-06, "loss": 11.7894, "step": 34676 }, { "epoch": 1.8883012395090972, "grad_norm": 0.5403257393225684, "learning_rate": 1.6311081637298665e-06, "loss": 11.7361, "step": 34677 }, { "epoch": 1.8883556935056802, "grad_norm": 0.5198175447217575, "learning_rate": 1.6295223351394884e-06, "loss": 11.7902, "step": 34678 }, { "epoch": 1.8884101475022632, "grad_norm": 0.5247410821422873, "learning_rate": 1.627937271494462e-06, "loss": 11.6793, "step": 34679 }, { "epoch": 1.8884646014988462, "grad_norm": 0.5245438830662863, "learning_rate": 1.626352972807077e-06, "loss": 11.8101, "step": 34680 }, { "epoch": 1.8885190554954292, "grad_norm": 0.5197853217966982, "learning_rate": 1.6247694390896685e-06, "loss": 11.7402, "step": 34681 }, { "epoch": 1.8885735094920122, "grad_norm": 0.6099667352408009, "learning_rate": 1.6231866703545594e-06, "loss": 11.7899, "step": 34682 }, { "epoch": 1.8886279634885952, "grad_norm": 0.541271358248567, "learning_rate": 1.6216046666140405e-06, "loss": 11.7236, "step": 34683 }, { "epoch": 1.8886824174851782, "grad_norm": 0.5240606070074448, "learning_rate": 1.620023427880435e-06, "loss": 11.8339, "step": 34684 }, { "epoch": 1.8887368714817612, "grad_norm": 0.5808576552608417, "learning_rate": 1.6184429541660106e-06, "loss": 11.7861, "step": 34685 }, { "epoch": 1.8887913254783442, "grad_norm": 0.4815287009993346, "learning_rate": 1.6168632454830802e-06, "loss": 11.8493, "step": 34686 }, { "epoch": 1.8888457794749274, "grad_norm": 0.5570491946695187, "learning_rate": 1.6152843018439111e-06, "loss": 11.8726, "step": 34687 }, { "epoch": 1.8889002334715104, "grad_norm": 0.5613414560471554, "learning_rate": 1.6137061232607942e-06, "loss": 11.6397, "step": 34688 }, { "epoch": 1.8889546874680934, "grad_norm": 0.518802792322339, "learning_rate": 1.6121287097460192e-06, "loss": 11.6943, "step": 34689 }, { "epoch": 1.8890091414646764, "grad_norm": 0.5352295290433896, "learning_rate": 1.6105520613118097e-06, "loss": 11.6351, "step": 34690 }, { "epoch": 1.8890635954612593, "grad_norm": 0.6134100862756692, "learning_rate": 1.6089761779704449e-06, "loss": 11.9323, "step": 34691 }, { "epoch": 1.8891180494578426, "grad_norm": 0.5595482894017889, "learning_rate": 1.6074010597341927e-06, "loss": 11.8118, "step": 34692 }, { "epoch": 1.8891725034544256, "grad_norm": 0.49573460899014316, "learning_rate": 1.6058267066152765e-06, "loss": 11.7629, "step": 34693 }, { "epoch": 1.8892269574510085, "grad_norm": 0.48920837880105894, "learning_rate": 1.6042531186259646e-06, "loss": 11.7556, "step": 34694 }, { "epoch": 1.8892814114475915, "grad_norm": 0.5987163818653659, "learning_rate": 1.6026802957784691e-06, "loss": 11.7194, "step": 34695 }, { "epoch": 1.8893358654441745, "grad_norm": 0.5411999354481595, "learning_rate": 1.6011082380850472e-06, "loss": 11.8184, "step": 34696 }, { "epoch": 1.8893903194407575, "grad_norm": 0.5333057676812357, "learning_rate": 1.5995369455579001e-06, "loss": 11.8026, "step": 34697 }, { "epoch": 1.8894447734373405, "grad_norm": 0.531896850009571, "learning_rate": 1.5979664182092514e-06, "loss": 11.8222, "step": 34698 }, { "epoch": 1.8894992274339235, "grad_norm": 0.5833299775246741, "learning_rate": 1.5963966560513465e-06, "loss": 11.8024, "step": 34699 }, { "epoch": 1.8895536814305065, "grad_norm": 0.49339265781876485, "learning_rate": 1.5948276590963318e-06, "loss": 11.7774, "step": 34700 }, { "epoch": 1.8896081354270895, "grad_norm": 0.5774944496657055, "learning_rate": 1.5932594273564528e-06, "loss": 11.8376, "step": 34701 }, { "epoch": 1.8896625894236725, "grad_norm": 0.5468619797962504, "learning_rate": 1.5916919608438885e-06, "loss": 11.5729, "step": 34702 }, { "epoch": 1.8897170434202555, "grad_norm": 0.5493901200405292, "learning_rate": 1.5901252595708293e-06, "loss": 11.8508, "step": 34703 }, { "epoch": 1.8897714974168385, "grad_norm": 0.5532054980662606, "learning_rate": 1.5885593235494657e-06, "loss": 11.6965, "step": 34704 }, { "epoch": 1.8898259514134215, "grad_norm": 0.5317276365390414, "learning_rate": 1.586994152791954e-06, "loss": 11.7395, "step": 34705 }, { "epoch": 1.8898804054100045, "grad_norm": 0.549413117040086, "learning_rate": 1.5854297473104961e-06, "loss": 11.7359, "step": 34706 }, { "epoch": 1.8899348594065875, "grad_norm": 0.5398246119148612, "learning_rate": 1.5838661071172268e-06, "loss": 11.7162, "step": 34707 }, { "epoch": 1.8899893134031704, "grad_norm": 0.5557072050995043, "learning_rate": 1.5823032322243248e-06, "loss": 11.7413, "step": 34708 }, { "epoch": 1.8900437673997534, "grad_norm": 0.5064212637841524, "learning_rate": 1.580741122643936e-06, "loss": 11.7254, "step": 34709 }, { "epoch": 1.8900982213963367, "grad_norm": 0.5484464114083785, "learning_rate": 1.5791797783882178e-06, "loss": 11.7821, "step": 34710 }, { "epoch": 1.8901526753929196, "grad_norm": 0.6471395838410182, "learning_rate": 1.5776191994693046e-06, "loss": 11.8386, "step": 34711 }, { "epoch": 1.8902071293895026, "grad_norm": 0.5416635492934879, "learning_rate": 1.57605938589932e-06, "loss": 11.7639, "step": 34712 }, { "epoch": 1.8902615833860856, "grad_norm": 0.5751980565896976, "learning_rate": 1.5745003376903987e-06, "loss": 11.8457, "step": 34713 }, { "epoch": 1.8903160373826686, "grad_norm": 0.4850482014936915, "learning_rate": 1.5729420548546758e-06, "loss": 11.775, "step": 34714 }, { "epoch": 1.8903704913792516, "grad_norm": 0.5402958731500984, "learning_rate": 1.5713845374042634e-06, "loss": 11.7768, "step": 34715 }, { "epoch": 1.8904249453758348, "grad_norm": 0.5487319009951255, "learning_rate": 1.5698277853512634e-06, "loss": 11.8579, "step": 34716 }, { "epoch": 1.8904793993724178, "grad_norm": 0.5120235422922093, "learning_rate": 1.5682717987077988e-06, "loss": 11.6429, "step": 34717 }, { "epoch": 1.8905338533690008, "grad_norm": 0.5177553700338551, "learning_rate": 1.5667165774859604e-06, "loss": 11.8002, "step": 34718 }, { "epoch": 1.8905883073655838, "grad_norm": 0.5220602464741136, "learning_rate": 1.5651621216978274e-06, "loss": 11.7366, "step": 34719 }, { "epoch": 1.8906427613621668, "grad_norm": 0.5365320626779672, "learning_rate": 1.5636084313555122e-06, "loss": 11.7242, "step": 34720 }, { "epoch": 1.8906972153587498, "grad_norm": 0.5375619959670963, "learning_rate": 1.5620555064710939e-06, "loss": 11.7972, "step": 34721 }, { "epoch": 1.8907516693553328, "grad_norm": 0.5818501230587102, "learning_rate": 1.56050334705663e-06, "loss": 11.6516, "step": 34722 }, { "epoch": 1.8908061233519158, "grad_norm": 0.5517358627891079, "learning_rate": 1.558951953124199e-06, "loss": 11.7736, "step": 34723 }, { "epoch": 1.8908605773484988, "grad_norm": 0.5677645282670712, "learning_rate": 1.5574013246858587e-06, "loss": 11.5162, "step": 34724 }, { "epoch": 1.8909150313450818, "grad_norm": 0.5050136781134978, "learning_rate": 1.5558514617536878e-06, "loss": 11.78, "step": 34725 }, { "epoch": 1.8909694853416648, "grad_norm": 0.5766250407978719, "learning_rate": 1.5543023643397214e-06, "loss": 11.815, "step": 34726 }, { "epoch": 1.8910239393382478, "grad_norm": 0.5869011819929751, "learning_rate": 1.5527540324560052e-06, "loss": 11.8199, "step": 34727 }, { "epoch": 1.8910783933348307, "grad_norm": 0.5516178557245982, "learning_rate": 1.5512064661145854e-06, "loss": 11.8658, "step": 34728 }, { "epoch": 1.8911328473314137, "grad_norm": 0.5291889074957623, "learning_rate": 1.5496596653274965e-06, "loss": 11.7808, "step": 34729 }, { "epoch": 1.8911873013279967, "grad_norm": 0.4693384375666813, "learning_rate": 1.5481136301067623e-06, "loss": 11.7714, "step": 34730 }, { "epoch": 1.8912417553245797, "grad_norm": 0.5802511886742016, "learning_rate": 1.5465683604644177e-06, "loss": 11.8109, "step": 34731 }, { "epoch": 1.8912962093211627, "grad_norm": 0.5188283475927622, "learning_rate": 1.5450238564124531e-06, "loss": 11.7134, "step": 34732 }, { "epoch": 1.891350663317746, "grad_norm": 0.5295055212468985, "learning_rate": 1.5434801179629034e-06, "loss": 11.8336, "step": 34733 }, { "epoch": 1.891405117314329, "grad_norm": 0.5805184217072353, "learning_rate": 1.5419371451277476e-06, "loss": 11.8499, "step": 34734 }, { "epoch": 1.891459571310912, "grad_norm": 0.5713144011010932, "learning_rate": 1.5403949379190096e-06, "loss": 11.7572, "step": 34735 }, { "epoch": 1.891514025307495, "grad_norm": 0.5472873777591046, "learning_rate": 1.5388534963486801e-06, "loss": 11.7151, "step": 34736 }, { "epoch": 1.891568479304078, "grad_norm": 0.5995463581218854, "learning_rate": 1.5373128204287268e-06, "loss": 11.8759, "step": 34737 }, { "epoch": 1.891622933300661, "grad_norm": 0.5159348677970845, "learning_rate": 1.5357729101711517e-06, "loss": 11.6406, "step": 34738 }, { "epoch": 1.891677387297244, "grad_norm": 0.5494847554065374, "learning_rate": 1.5342337655879112e-06, "loss": 11.7725, "step": 34739 }, { "epoch": 1.891731841293827, "grad_norm": 0.5773632038676855, "learning_rate": 1.5326953866909855e-06, "loss": 11.7697, "step": 34740 }, { "epoch": 1.89178629529041, "grad_norm": 0.5364709842273305, "learning_rate": 1.531157773492331e-06, "loss": 11.7066, "step": 34741 }, { "epoch": 1.891840749286993, "grad_norm": 0.4894740729461135, "learning_rate": 1.529620926003905e-06, "loss": 11.6814, "step": 34742 }, { "epoch": 1.891895203283576, "grad_norm": 0.5604563994796176, "learning_rate": 1.528084844237665e-06, "loss": 11.8408, "step": 34743 }, { "epoch": 1.891949657280159, "grad_norm": 0.5781736009590769, "learning_rate": 1.5265495282055453e-06, "loss": 11.8307, "step": 34744 }, { "epoch": 1.892004111276742, "grad_norm": 0.542475410913117, "learning_rate": 1.5250149779195034e-06, "loss": 11.7901, "step": 34745 }, { "epoch": 1.892058565273325, "grad_norm": 0.5735364564893207, "learning_rate": 1.5234811933914406e-06, "loss": 11.6456, "step": 34746 }, { "epoch": 1.892113019269908, "grad_norm": 0.5097625218550237, "learning_rate": 1.521948174633314e-06, "loss": 11.7917, "step": 34747 }, { "epoch": 1.892167473266491, "grad_norm": 0.5090108605281053, "learning_rate": 1.5204159216570258e-06, "loss": 11.698, "step": 34748 }, { "epoch": 1.892221927263074, "grad_norm": 0.5908728465726407, "learning_rate": 1.518884434474499e-06, "loss": 11.8075, "step": 34749 }, { "epoch": 1.892276381259657, "grad_norm": 0.5441837765889614, "learning_rate": 1.5173537130976578e-06, "loss": 11.7498, "step": 34750 }, { "epoch": 1.89233083525624, "grad_norm": 0.523598287999606, "learning_rate": 1.5158237575383816e-06, "loss": 11.7017, "step": 34751 }, { "epoch": 1.892385289252823, "grad_norm": 0.5610758206507106, "learning_rate": 1.5142945678085719e-06, "loss": 11.8497, "step": 34752 }, { "epoch": 1.892439743249406, "grad_norm": 0.4921037485056164, "learning_rate": 1.512766143920119e-06, "loss": 11.7247, "step": 34753 }, { "epoch": 1.892494197245989, "grad_norm": 0.5580199380516381, "learning_rate": 1.5112384858849137e-06, "loss": 11.666, "step": 34754 }, { "epoch": 1.892548651242572, "grad_norm": 0.5300540095189908, "learning_rate": 1.5097115937148464e-06, "loss": 11.8728, "step": 34755 }, { "epoch": 1.892603105239155, "grad_norm": 0.5143627162845545, "learning_rate": 1.5081854674217632e-06, "loss": 11.8203, "step": 34756 }, { "epoch": 1.8926575592357382, "grad_norm": 0.5525976886598135, "learning_rate": 1.5066601070175657e-06, "loss": 11.8271, "step": 34757 }, { "epoch": 1.8927120132323212, "grad_norm": 0.5606215905430598, "learning_rate": 1.5051355125140775e-06, "loss": 11.8279, "step": 34758 }, { "epoch": 1.8927664672289042, "grad_norm": 0.5401105593454313, "learning_rate": 1.5036116839231785e-06, "loss": 11.7004, "step": 34759 }, { "epoch": 1.8928209212254872, "grad_norm": 0.5605481407711291, "learning_rate": 1.5020886212567254e-06, "loss": 11.8379, "step": 34760 }, { "epoch": 1.8928753752220702, "grad_norm": 0.4993905054794739, "learning_rate": 1.5005663245265423e-06, "loss": 11.7567, "step": 34761 }, { "epoch": 1.8929298292186534, "grad_norm": 0.5246236421027616, "learning_rate": 1.499044793744464e-06, "loss": 11.7809, "step": 34762 }, { "epoch": 1.8929842832152364, "grad_norm": 0.5593205693022092, "learning_rate": 1.497524028922337e-06, "loss": 11.7566, "step": 34763 }, { "epoch": 1.8930387372118194, "grad_norm": 0.5288054314884846, "learning_rate": 1.4960040300719846e-06, "loss": 11.7922, "step": 34764 }, { "epoch": 1.8930931912084024, "grad_norm": 0.60937038346665, "learning_rate": 1.494484797205231e-06, "loss": 11.8041, "step": 34765 }, { "epoch": 1.8931476452049854, "grad_norm": 0.5091641163587646, "learning_rate": 1.492966330333867e-06, "loss": 11.5661, "step": 34766 }, { "epoch": 1.8932020992015683, "grad_norm": 0.5661740741072653, "learning_rate": 1.4914486294697271e-06, "loss": 11.8103, "step": 34767 }, { "epoch": 1.8932565531981513, "grad_norm": 0.5477024116714037, "learning_rate": 1.4899316946246022e-06, "loss": 11.7092, "step": 34768 }, { "epoch": 1.8933110071947343, "grad_norm": 0.47994673754978207, "learning_rate": 1.4884155258102828e-06, "loss": 11.6568, "step": 34769 }, { "epoch": 1.8933654611913173, "grad_norm": 0.5191042016630331, "learning_rate": 1.4869001230385815e-06, "loss": 11.7265, "step": 34770 }, { "epoch": 1.8934199151879003, "grad_norm": 0.49992275049186446, "learning_rate": 1.4853854863212447e-06, "loss": 11.7216, "step": 34771 }, { "epoch": 1.8934743691844833, "grad_norm": 0.537041236182039, "learning_rate": 1.483871615670085e-06, "loss": 11.7867, "step": 34772 }, { "epoch": 1.8935288231810663, "grad_norm": 0.5737665435883587, "learning_rate": 1.4823585110968486e-06, "loss": 11.9449, "step": 34773 }, { "epoch": 1.8935832771776493, "grad_norm": 0.5464830454808277, "learning_rate": 1.480846172613315e-06, "loss": 11.752, "step": 34774 }, { "epoch": 1.8936377311742323, "grad_norm": 0.5219663064655751, "learning_rate": 1.4793346002312524e-06, "loss": 11.7249, "step": 34775 }, { "epoch": 1.8936921851708153, "grad_norm": 0.5716351177460596, "learning_rate": 1.4778237939623962e-06, "loss": 11.7522, "step": 34776 }, { "epoch": 1.8937466391673983, "grad_norm": 0.526522341466927, "learning_rate": 1.4763137538185146e-06, "loss": 11.6462, "step": 34777 }, { "epoch": 1.8938010931639813, "grad_norm": 0.5344246023625869, "learning_rate": 1.4748044798113315e-06, "loss": 11.612, "step": 34778 }, { "epoch": 1.8938555471605643, "grad_norm": 0.590258437698882, "learning_rate": 1.473295971952604e-06, "loss": 11.7494, "step": 34779 }, { "epoch": 1.8939100011571475, "grad_norm": 0.49805653184473947, "learning_rate": 1.4717882302540454e-06, "loss": 11.7127, "step": 34780 }, { "epoch": 1.8939644551537305, "grad_norm": 0.5563173794132958, "learning_rate": 1.4702812547273793e-06, "loss": 11.7098, "step": 34781 }, { "epoch": 1.8940189091503135, "grad_norm": 0.5431409688264829, "learning_rate": 1.4687750453843296e-06, "loss": 11.8068, "step": 34782 }, { "epoch": 1.8940733631468964, "grad_norm": 0.5453814382357687, "learning_rate": 1.4672696022366095e-06, "loss": 11.839, "step": 34783 }, { "epoch": 1.8941278171434794, "grad_norm": 0.5824055691910709, "learning_rate": 1.4657649252959204e-06, "loss": 11.8123, "step": 34784 }, { "epoch": 1.8941822711400624, "grad_norm": 0.5229939638108031, "learning_rate": 1.4642610145739755e-06, "loss": 11.6223, "step": 34785 }, { "epoch": 1.8942367251366456, "grad_norm": 0.5515915628150705, "learning_rate": 1.462757870082454e-06, "loss": 11.7954, "step": 34786 }, { "epoch": 1.8942911791332286, "grad_norm": 0.5806034845993756, "learning_rate": 1.4612554918330579e-06, "loss": 11.6721, "step": 34787 }, { "epoch": 1.8943456331298116, "grad_norm": 0.5664725552415001, "learning_rate": 1.4597538798374554e-06, "loss": 11.6593, "step": 34788 }, { "epoch": 1.8944000871263946, "grad_norm": 0.5032714377117913, "learning_rate": 1.4582530341073487e-06, "loss": 11.7285, "step": 34789 }, { "epoch": 1.8944545411229776, "grad_norm": 0.5413865182310977, "learning_rate": 1.4567529546543833e-06, "loss": 11.8393, "step": 34790 }, { "epoch": 1.8945089951195606, "grad_norm": 0.5406849285193681, "learning_rate": 1.4552536414902285e-06, "loss": 11.7651, "step": 34791 }, { "epoch": 1.8945634491161436, "grad_norm": 0.5438037667417679, "learning_rate": 1.453755094626552e-06, "loss": 11.8675, "step": 34792 }, { "epoch": 1.8946179031127266, "grad_norm": 0.48452529742500633, "learning_rate": 1.4522573140750008e-06, "loss": 11.7498, "step": 34793 }, { "epoch": 1.8946723571093096, "grad_norm": 0.5504460965053491, "learning_rate": 1.4507602998472204e-06, "loss": 11.758, "step": 34794 }, { "epoch": 1.8947268111058926, "grad_norm": 0.5358206341292236, "learning_rate": 1.4492640519548572e-06, "loss": 11.8275, "step": 34795 }, { "epoch": 1.8947812651024756, "grad_norm": 0.5243397355325661, "learning_rate": 1.4477685704095356e-06, "loss": 11.8239, "step": 34796 }, { "epoch": 1.8948357190990586, "grad_norm": 0.5464946328937796, "learning_rate": 1.4462738552229128e-06, "loss": 11.7493, "step": 34797 }, { "epoch": 1.8948901730956416, "grad_norm": 0.6018134686648586, "learning_rate": 1.444779906406568e-06, "loss": 11.7378, "step": 34798 }, { "epoch": 1.8949446270922246, "grad_norm": 0.550844955802657, "learning_rate": 1.4432867239721592e-06, "loss": 11.7504, "step": 34799 }, { "epoch": 1.8949990810888075, "grad_norm": 0.5681524069504759, "learning_rate": 1.4417943079312768e-06, "loss": 11.6855, "step": 34800 }, { "epoch": 1.8950535350853905, "grad_norm": 0.6080234433877981, "learning_rate": 1.4403026582955337e-06, "loss": 11.8908, "step": 34801 }, { "epoch": 1.8951079890819735, "grad_norm": 0.5256429397977135, "learning_rate": 1.4388117750765207e-06, "loss": 11.6753, "step": 34802 }, { "epoch": 1.8951624430785567, "grad_norm": 0.5545665709539822, "learning_rate": 1.4373216582858285e-06, "loss": 11.6976, "step": 34803 }, { "epoch": 1.8952168970751397, "grad_norm": 0.5014566453021978, "learning_rate": 1.43583230793507e-06, "loss": 11.7029, "step": 34804 }, { "epoch": 1.8952713510717227, "grad_norm": 0.538242420412618, "learning_rate": 1.4343437240357916e-06, "loss": 11.7059, "step": 34805 }, { "epoch": 1.8953258050683057, "grad_norm": 0.5268630452561806, "learning_rate": 1.432855906599595e-06, "loss": 11.6758, "step": 34806 }, { "epoch": 1.8953802590648887, "grad_norm": 0.586093835070148, "learning_rate": 1.4313688556380377e-06, "loss": 11.7773, "step": 34807 }, { "epoch": 1.8954347130614717, "grad_norm": 0.5295547503407131, "learning_rate": 1.4298825711626884e-06, "loss": 11.683, "step": 34808 }, { "epoch": 1.895489167058055, "grad_norm": 0.5564299490582887, "learning_rate": 1.4283970531851044e-06, "loss": 11.7742, "step": 34809 }, { "epoch": 1.895543621054638, "grad_norm": 0.6002470713604036, "learning_rate": 1.4269123017168318e-06, "loss": 11.8173, "step": 34810 }, { "epoch": 1.895598075051221, "grad_norm": 0.6225778379082046, "learning_rate": 1.4254283167694172e-06, "loss": 12.0001, "step": 34811 }, { "epoch": 1.895652529047804, "grad_norm": 0.5628654435386629, "learning_rate": 1.4239450983544068e-06, "loss": 11.7122, "step": 34812 }, { "epoch": 1.895706983044387, "grad_norm": 0.49712718917347337, "learning_rate": 1.4224626464833246e-06, "loss": 11.7467, "step": 34813 }, { "epoch": 1.8957614370409699, "grad_norm": 0.5111721505786188, "learning_rate": 1.420980961167717e-06, "loss": 11.7691, "step": 34814 }, { "epoch": 1.8958158910375529, "grad_norm": 0.5331358144303171, "learning_rate": 1.4195000424190751e-06, "loss": 11.8331, "step": 34815 }, { "epoch": 1.8958703450341359, "grad_norm": 0.6406120784888542, "learning_rate": 1.418019890248956e-06, "loss": 11.6602, "step": 34816 }, { "epoch": 1.8959247990307189, "grad_norm": 0.5092941387868579, "learning_rate": 1.4165405046688285e-06, "loss": 11.7415, "step": 34817 }, { "epoch": 1.8959792530273019, "grad_norm": 0.5475993714739043, "learning_rate": 1.4150618856902164e-06, "loss": 11.6683, "step": 34818 }, { "epoch": 1.8960337070238849, "grad_norm": 0.5204246077710504, "learning_rate": 1.4135840333246219e-06, "loss": 11.8169, "step": 34819 }, { "epoch": 1.8960881610204678, "grad_norm": 0.5832453250973386, "learning_rate": 1.4121069475835247e-06, "loss": 11.8012, "step": 34820 }, { "epoch": 1.8961426150170508, "grad_norm": 0.5604925075904181, "learning_rate": 1.4106306284784265e-06, "loss": 11.7356, "step": 34821 }, { "epoch": 1.8961970690136338, "grad_norm": 0.5131718079393366, "learning_rate": 1.409155076020785e-06, "loss": 11.8441, "step": 34822 }, { "epoch": 1.8962515230102168, "grad_norm": 0.512722172347258, "learning_rate": 1.407680290222091e-06, "loss": 11.812, "step": 34823 }, { "epoch": 1.8963059770067998, "grad_norm": 0.6201351620301891, "learning_rate": 1.406206271093824e-06, "loss": 11.8832, "step": 34824 }, { "epoch": 1.8963604310033828, "grad_norm": 0.5031496210608059, "learning_rate": 1.4047330186474085e-06, "loss": 11.6435, "step": 34825 }, { "epoch": 1.8964148849999658, "grad_norm": 0.5593957168834991, "learning_rate": 1.403260532894346e-06, "loss": 11.8077, "step": 34826 }, { "epoch": 1.896469338996549, "grad_norm": 0.5424184431281923, "learning_rate": 1.4017888138460388e-06, "loss": 11.7674, "step": 34827 }, { "epoch": 1.896523792993132, "grad_norm": 0.5589082158369856, "learning_rate": 1.4003178615139777e-06, "loss": 11.7461, "step": 34828 }, { "epoch": 1.896578246989715, "grad_norm": 0.5458886550404423, "learning_rate": 1.3988476759095758e-06, "loss": 11.7457, "step": 34829 }, { "epoch": 1.896632700986298, "grad_norm": 0.5190695557372617, "learning_rate": 1.3973782570442684e-06, "loss": 11.7606, "step": 34830 }, { "epoch": 1.896687154982881, "grad_norm": 0.5457732202297492, "learning_rate": 1.3959096049294795e-06, "loss": 11.7472, "step": 34831 }, { "epoch": 1.8967416089794642, "grad_norm": 0.5009467775369594, "learning_rate": 1.3944417195766335e-06, "loss": 11.8235, "step": 34832 }, { "epoch": 1.8967960629760472, "grad_norm": 0.5759621152852069, "learning_rate": 1.3929746009971433e-06, "loss": 11.7632, "step": 34833 }, { "epoch": 1.8968505169726302, "grad_norm": 0.5048663985822598, "learning_rate": 1.3915082492024334e-06, "loss": 11.7819, "step": 34834 }, { "epoch": 1.8969049709692132, "grad_norm": 0.49344584842703065, "learning_rate": 1.3900426642038721e-06, "loss": 11.6221, "step": 34835 }, { "epoch": 1.8969594249657962, "grad_norm": 0.5586343453616, "learning_rate": 1.388577846012884e-06, "loss": 11.8319, "step": 34836 }, { "epoch": 1.8970138789623792, "grad_norm": 0.5940360802026438, "learning_rate": 1.3871137946408597e-06, "loss": 11.9579, "step": 34837 }, { "epoch": 1.8970683329589622, "grad_norm": 0.5076628703369054, "learning_rate": 1.3856505100991678e-06, "loss": 11.7297, "step": 34838 }, { "epoch": 1.8971227869555451, "grad_norm": 0.6115903399663384, "learning_rate": 1.3841879923991885e-06, "loss": 11.8168, "step": 34839 }, { "epoch": 1.8971772409521281, "grad_norm": 0.5392599585338174, "learning_rate": 1.3827262415523124e-06, "loss": 11.814, "step": 34840 }, { "epoch": 1.8972316949487111, "grad_norm": 0.5822170653633048, "learning_rate": 1.381265257569897e-06, "loss": 11.772, "step": 34841 }, { "epoch": 1.8972861489452941, "grad_norm": 0.5396554052281479, "learning_rate": 1.3798050404633e-06, "loss": 11.735, "step": 34842 }, { "epoch": 1.8973406029418771, "grad_norm": 0.517078631050961, "learning_rate": 1.3783455902438792e-06, "loss": 11.692, "step": 34843 }, { "epoch": 1.8973950569384601, "grad_norm": 0.5173106123180002, "learning_rate": 1.3768869069229695e-06, "loss": 11.8019, "step": 34844 }, { "epoch": 1.897449510935043, "grad_norm": 0.5681062754518449, "learning_rate": 1.3754289905119288e-06, "loss": 11.6775, "step": 34845 }, { "epoch": 1.897503964931626, "grad_norm": 0.6031698787342817, "learning_rate": 1.3739718410221037e-06, "loss": 11.831, "step": 34846 }, { "epoch": 1.897558418928209, "grad_norm": 0.49968257780963066, "learning_rate": 1.372515458464796e-06, "loss": 11.8007, "step": 34847 }, { "epoch": 1.897612872924792, "grad_norm": 0.5205453754959796, "learning_rate": 1.3710598428513633e-06, "loss": 11.6712, "step": 34848 }, { "epoch": 1.897667326921375, "grad_norm": 0.5691755387365655, "learning_rate": 1.3696049941930967e-06, "loss": 11.8457, "step": 34849 }, { "epoch": 1.8977217809179583, "grad_norm": 0.49082575073352686, "learning_rate": 1.3681509125013314e-06, "loss": 11.7527, "step": 34850 }, { "epoch": 1.8977762349145413, "grad_norm": 0.5544832287565732, "learning_rate": 1.3666975977873697e-06, "loss": 11.8401, "step": 34851 }, { "epoch": 1.8978306889111243, "grad_norm": 0.5811081783586601, "learning_rate": 1.3652450500625026e-06, "loss": 11.7448, "step": 34852 }, { "epoch": 1.8978851429077073, "grad_norm": 0.5831570482655312, "learning_rate": 1.3637932693380318e-06, "loss": 11.7733, "step": 34853 }, { "epoch": 1.8979395969042903, "grad_norm": 0.5353805915594402, "learning_rate": 1.3623422556252375e-06, "loss": 11.7708, "step": 34854 }, { "epoch": 1.8979940509008733, "grad_norm": 0.6718657003779454, "learning_rate": 1.3608920089354217e-06, "loss": 11.8133, "step": 34855 }, { "epoch": 1.8980485048974565, "grad_norm": 0.5680055538659191, "learning_rate": 1.3594425292798418e-06, "loss": 11.7966, "step": 34856 }, { "epoch": 1.8981029588940395, "grad_norm": 0.5177834196991179, "learning_rate": 1.3579938166697892e-06, "loss": 11.726, "step": 34857 }, { "epoch": 1.8981574128906225, "grad_norm": 0.5225753726577712, "learning_rate": 1.3565458711165101e-06, "loss": 11.7457, "step": 34858 }, { "epoch": 1.8982118668872054, "grad_norm": 0.48728829065372314, "learning_rate": 1.3550986926312736e-06, "loss": 11.7121, "step": 34859 }, { "epoch": 1.8982663208837884, "grad_norm": 0.5293433402269976, "learning_rate": 1.353652281225337e-06, "loss": 11.6661, "step": 34860 }, { "epoch": 1.8983207748803714, "grad_norm": 0.6206070880327738, "learning_rate": 1.3522066369099473e-06, "loss": 11.8236, "step": 34861 }, { "epoch": 1.8983752288769544, "grad_norm": 0.5734420945731776, "learning_rate": 1.3507617596963396e-06, "loss": 11.841, "step": 34862 }, { "epoch": 1.8984296828735374, "grad_norm": 0.5792092352979687, "learning_rate": 1.3493176495957493e-06, "loss": 11.8518, "step": 34863 }, { "epoch": 1.8984841368701204, "grad_norm": 0.6182898818515133, "learning_rate": 1.3478743066194011e-06, "loss": 11.7188, "step": 34864 }, { "epoch": 1.8985385908667034, "grad_norm": 0.5467828552647913, "learning_rate": 1.3464317307785413e-06, "loss": 11.7695, "step": 34865 }, { "epoch": 1.8985930448632864, "grad_norm": 0.5113883760743647, "learning_rate": 1.344989922084361e-06, "loss": 11.7157, "step": 34866 }, { "epoch": 1.8986474988598694, "grad_norm": 0.5428993289081379, "learning_rate": 1.3435488805480845e-06, "loss": 11.8631, "step": 34867 }, { "epoch": 1.8987019528564524, "grad_norm": 0.5534172518042819, "learning_rate": 1.342108606180925e-06, "loss": 11.7682, "step": 34868 }, { "epoch": 1.8987564068530354, "grad_norm": 0.5327623657273541, "learning_rate": 1.340669098994063e-06, "loss": 11.6865, "step": 34869 }, { "epoch": 1.8988108608496184, "grad_norm": 0.5025208902523031, "learning_rate": 1.339230358998711e-06, "loss": 11.8081, "step": 34870 }, { "epoch": 1.8988653148462014, "grad_norm": 0.5593433130700441, "learning_rate": 1.3377923862060493e-06, "loss": 11.7972, "step": 34871 }, { "epoch": 1.8989197688427844, "grad_norm": 0.5229594386061016, "learning_rate": 1.3363551806272578e-06, "loss": 11.7237, "step": 34872 }, { "epoch": 1.8989742228393676, "grad_norm": 0.5664251049278367, "learning_rate": 1.3349187422735166e-06, "loss": 11.746, "step": 34873 }, { "epoch": 1.8990286768359506, "grad_norm": 0.5076171833846765, "learning_rate": 1.3334830711559832e-06, "loss": 11.8043, "step": 34874 }, { "epoch": 1.8990831308325336, "grad_norm": 0.611139018203511, "learning_rate": 1.3320481672858487e-06, "loss": 11.8499, "step": 34875 }, { "epoch": 1.8991375848291165, "grad_norm": 0.5403750894731288, "learning_rate": 1.3306140306742377e-06, "loss": 11.7036, "step": 34876 }, { "epoch": 1.8991920388256995, "grad_norm": 0.5165284539906411, "learning_rate": 1.32918066133233e-06, "loss": 11.7363, "step": 34877 }, { "epoch": 1.8992464928222825, "grad_norm": 0.6575863872562808, "learning_rate": 1.32774805927125e-06, "loss": 11.8829, "step": 34878 }, { "epoch": 1.8993009468188657, "grad_norm": 0.5447377032095552, "learning_rate": 1.3263162245021553e-06, "loss": 11.8546, "step": 34879 }, { "epoch": 1.8993554008154487, "grad_norm": 0.5401548915126622, "learning_rate": 1.324885157036171e-06, "loss": 11.8171, "step": 34880 }, { "epoch": 1.8994098548120317, "grad_norm": 0.6040337382301092, "learning_rate": 1.3234548568844318e-06, "loss": 11.8438, "step": 34881 }, { "epoch": 1.8994643088086147, "grad_norm": 0.518418596072161, "learning_rate": 1.3220253240580516e-06, "loss": 11.7245, "step": 34882 }, { "epoch": 1.8995187628051977, "grad_norm": 0.5037391785542648, "learning_rate": 1.3205965585681545e-06, "loss": 11.7445, "step": 34883 }, { "epoch": 1.8995732168017807, "grad_norm": 0.530468799518843, "learning_rate": 1.319168560425843e-06, "loss": 11.7476, "step": 34884 }, { "epoch": 1.8996276707983637, "grad_norm": 0.5375853461548761, "learning_rate": 1.3177413296422303e-06, "loss": 11.7333, "step": 34885 }, { "epoch": 1.8996821247949467, "grad_norm": 0.5772703460003947, "learning_rate": 1.3163148662284074e-06, "loss": 11.837, "step": 34886 }, { "epoch": 1.8997365787915297, "grad_norm": 0.4990338475383423, "learning_rate": 1.3148891701954768e-06, "loss": 11.7326, "step": 34887 }, { "epoch": 1.8997910327881127, "grad_norm": 0.5248471910613208, "learning_rate": 1.3134642415545074e-06, "loss": 11.8689, "step": 34888 }, { "epoch": 1.8998454867846957, "grad_norm": 0.5597959271870933, "learning_rate": 1.31204008031659e-06, "loss": 11.8377, "step": 34889 }, { "epoch": 1.8998999407812787, "grad_norm": 0.5805332350728085, "learning_rate": 1.3106166864928048e-06, "loss": 11.7712, "step": 34890 }, { "epoch": 1.8999543947778617, "grad_norm": 0.5226958549523403, "learning_rate": 1.30919406009421e-06, "loss": 11.865, "step": 34891 }, { "epoch": 1.9000088487744446, "grad_norm": 0.4992119103294987, "learning_rate": 1.3077722011318738e-06, "loss": 11.5739, "step": 34892 }, { "epoch": 1.9000633027710276, "grad_norm": 0.5891212049804477, "learning_rate": 1.3063511096168546e-06, "loss": 11.8646, "step": 34893 }, { "epoch": 1.9001177567676106, "grad_norm": 0.5596632414305603, "learning_rate": 1.3049307855601877e-06, "loss": 11.864, "step": 34894 }, { "epoch": 1.9001722107641936, "grad_norm": 0.5521483647653863, "learning_rate": 1.3035112289729422e-06, "loss": 11.6744, "step": 34895 }, { "epoch": 1.9002266647607768, "grad_norm": 0.5352249635968738, "learning_rate": 1.3020924398661426e-06, "loss": 11.7585, "step": 34896 }, { "epoch": 1.9002811187573598, "grad_norm": 0.7299639279554151, "learning_rate": 1.3006744182508245e-06, "loss": 11.6792, "step": 34897 }, { "epoch": 1.9003355727539428, "grad_norm": 0.5527762706527218, "learning_rate": 1.2992571641380124e-06, "loss": 11.7747, "step": 34898 }, { "epoch": 1.9003900267505258, "grad_norm": 0.5634854671760061, "learning_rate": 1.297840677538731e-06, "loss": 11.625, "step": 34899 }, { "epoch": 1.9004444807471088, "grad_norm": 0.5102684204527475, "learning_rate": 1.296424958463993e-06, "loss": 11.72, "step": 34900 }, { "epoch": 1.9004989347436918, "grad_norm": 0.5235308436576003, "learning_rate": 1.2950100069248018e-06, "loss": 11.8088, "step": 34901 }, { "epoch": 1.900553388740275, "grad_norm": 0.5487990887186326, "learning_rate": 1.2935958229321698e-06, "loss": 11.6855, "step": 34902 }, { "epoch": 1.900607842736858, "grad_norm": 0.5080858442369418, "learning_rate": 1.292182406497089e-06, "loss": 11.8006, "step": 34903 }, { "epoch": 1.900662296733441, "grad_norm": 0.5764407085076949, "learning_rate": 1.29076975763055e-06, "loss": 11.6335, "step": 34904 }, { "epoch": 1.900716750730024, "grad_norm": 0.5435858284155105, "learning_rate": 1.2893578763435443e-06, "loss": 11.6927, "step": 34905 }, { "epoch": 1.900771204726607, "grad_norm": 0.611928884447569, "learning_rate": 1.2879467626470298e-06, "loss": 11.519, "step": 34906 }, { "epoch": 1.90082565872319, "grad_norm": 0.520866001968785, "learning_rate": 1.28653641655202e-06, "loss": 11.747, "step": 34907 }, { "epoch": 1.900880112719773, "grad_norm": 0.5933677414894604, "learning_rate": 1.2851268380694393e-06, "loss": 11.8656, "step": 34908 }, { "epoch": 1.900934566716356, "grad_norm": 0.5794399435556268, "learning_rate": 1.2837180272102678e-06, "loss": 11.8481, "step": 34909 }, { "epoch": 1.900989020712939, "grad_norm": 0.5638927687898467, "learning_rate": 1.2823099839854636e-06, "loss": 11.7772, "step": 34910 }, { "epoch": 1.901043474709522, "grad_norm": 0.5723553513297639, "learning_rate": 1.2809027084059733e-06, "loss": 11.6843, "step": 34911 }, { "epoch": 1.901097928706105, "grad_norm": 0.5956225606954836, "learning_rate": 1.2794962004827437e-06, "loss": 11.6345, "step": 34912 }, { "epoch": 1.901152382702688, "grad_norm": 0.5187793448726408, "learning_rate": 1.2780904602266997e-06, "loss": 11.7602, "step": 34913 }, { "epoch": 1.901206836699271, "grad_norm": 0.6625625031443545, "learning_rate": 1.2766854876487766e-06, "loss": 11.7441, "step": 34914 }, { "epoch": 1.901261290695854, "grad_norm": 0.6301397190904372, "learning_rate": 1.2752812827599103e-06, "loss": 11.6538, "step": 34915 }, { "epoch": 1.901315744692437, "grad_norm": 0.5569390578766883, "learning_rate": 1.2738778455710031e-06, "loss": 11.7446, "step": 34916 }, { "epoch": 1.90137019868902, "grad_norm": 0.5747944201762731, "learning_rate": 1.272475176092991e-06, "loss": 11.7425, "step": 34917 }, { "epoch": 1.901424652685603, "grad_norm": 0.5872191550488564, "learning_rate": 1.2710732743367536e-06, "loss": 11.7856, "step": 34918 }, { "epoch": 1.901479106682186, "grad_norm": 0.5258704340142019, "learning_rate": 1.2696721403132273e-06, "loss": 11.8165, "step": 34919 }, { "epoch": 1.901533560678769, "grad_norm": 0.5250324522894962, "learning_rate": 1.2682717740332694e-06, "loss": 11.764, "step": 34920 }, { "epoch": 1.901588014675352, "grad_norm": 0.5188689434614271, "learning_rate": 1.266872175507794e-06, "loss": 11.6813, "step": 34921 }, { "epoch": 1.901642468671935, "grad_norm": 0.526756402285969, "learning_rate": 1.2654733447476807e-06, "loss": 11.7247, "step": 34922 }, { "epoch": 1.901696922668518, "grad_norm": 0.5317676067744854, "learning_rate": 1.2640752817637992e-06, "loss": 11.8025, "step": 34923 }, { "epoch": 1.901751376665101, "grad_norm": 0.5196343589005462, "learning_rate": 1.2626779865670403e-06, "loss": 11.7772, "step": 34924 }, { "epoch": 1.901805830661684, "grad_norm": 0.5156707167033562, "learning_rate": 1.2612814591682399e-06, "loss": 11.8267, "step": 34925 }, { "epoch": 1.9018602846582673, "grad_norm": 0.6338894852183763, "learning_rate": 1.2598856995782782e-06, "loss": 11.8186, "step": 34926 }, { "epoch": 1.9019147386548503, "grad_norm": 0.5402170246045844, "learning_rate": 1.258490707808002e-06, "loss": 11.8636, "step": 34927 }, { "epoch": 1.9019691926514333, "grad_norm": 0.5225756205200797, "learning_rate": 1.2570964838682586e-06, "loss": 11.7981, "step": 34928 }, { "epoch": 1.9020236466480163, "grad_norm": 0.5036981029513681, "learning_rate": 1.2557030277698945e-06, "loss": 11.8473, "step": 34929 }, { "epoch": 1.9020781006445993, "grad_norm": 0.5302650787095671, "learning_rate": 1.2543103395237454e-06, "loss": 11.7383, "step": 34930 }, { "epoch": 1.9021325546411822, "grad_norm": 0.5640592816036747, "learning_rate": 1.2529184191406474e-06, "loss": 11.8471, "step": 34931 }, { "epoch": 1.9021870086377652, "grad_norm": 0.5283403228744005, "learning_rate": 1.2515272666314027e-06, "loss": 11.663, "step": 34932 }, { "epoch": 1.9022414626343482, "grad_norm": 0.5660956425878506, "learning_rate": 1.250136882006847e-06, "loss": 11.7854, "step": 34933 }, { "epoch": 1.9022959166309312, "grad_norm": 0.5202634141052236, "learning_rate": 1.2487472652777832e-06, "loss": 11.764, "step": 34934 }, { "epoch": 1.9023503706275142, "grad_norm": 0.5103911372556909, "learning_rate": 1.2473584164550245e-06, "loss": 11.6607, "step": 34935 }, { "epoch": 1.9024048246240972, "grad_norm": 0.5184577260363474, "learning_rate": 1.2459703355493736e-06, "loss": 11.7746, "step": 34936 }, { "epoch": 1.9024592786206802, "grad_norm": 0.5519968341127948, "learning_rate": 1.2445830225716104e-06, "loss": 11.6335, "step": 34937 }, { "epoch": 1.9025137326172632, "grad_norm": 0.5758726591964479, "learning_rate": 1.2431964775325267e-06, "loss": 11.8986, "step": 34938 }, { "epoch": 1.9025681866138462, "grad_norm": 0.5901030153822988, "learning_rate": 1.2418107004429135e-06, "loss": 11.793, "step": 34939 }, { "epoch": 1.9026226406104292, "grad_norm": 0.5969603883994249, "learning_rate": 1.2404256913135404e-06, "loss": 11.8338, "step": 34940 }, { "epoch": 1.9026770946070122, "grad_norm": 0.5266099654687076, "learning_rate": 1.2390414501551984e-06, "loss": 11.7505, "step": 34941 }, { "epoch": 1.9027315486035952, "grad_norm": 0.5501564057848704, "learning_rate": 1.2376579769786124e-06, "loss": 11.7582, "step": 34942 }, { "epoch": 1.9027860026001784, "grad_norm": 0.5913755801523414, "learning_rate": 1.2362752717945625e-06, "loss": 11.7118, "step": 34943 }, { "epoch": 1.9028404565967614, "grad_norm": 0.5740167269089241, "learning_rate": 1.2348933346137958e-06, "loss": 11.7911, "step": 34944 }, { "epoch": 1.9028949105933444, "grad_norm": 0.49452260907208945, "learning_rate": 1.2335121654470705e-06, "loss": 11.6782, "step": 34945 }, { "epoch": 1.9029493645899274, "grad_norm": 0.5383000224602161, "learning_rate": 1.232131764305111e-06, "loss": 11.844, "step": 34946 }, { "epoch": 1.9030038185865104, "grad_norm": 0.5316669763904741, "learning_rate": 1.2307521311986536e-06, "loss": 11.8824, "step": 34947 }, { "epoch": 1.9030582725830933, "grad_norm": 0.5807140241198657, "learning_rate": 1.229373266138445e-06, "loss": 11.7666, "step": 34948 }, { "epoch": 1.9031127265796766, "grad_norm": 0.5643874459202864, "learning_rate": 1.2279951691351876e-06, "loss": 11.8332, "step": 34949 }, { "epoch": 1.9031671805762596, "grad_norm": 0.5849060923044125, "learning_rate": 1.2266178401995954e-06, "loss": 11.7471, "step": 34950 }, { "epoch": 1.9032216345728425, "grad_norm": 0.5169059632750649, "learning_rate": 1.225241279342404e-06, "loss": 11.8156, "step": 34951 }, { "epoch": 1.9032760885694255, "grad_norm": 0.5676538317201736, "learning_rate": 1.2238654865742938e-06, "loss": 11.7099, "step": 34952 }, { "epoch": 1.9033305425660085, "grad_norm": 0.555361791346935, "learning_rate": 1.2224904619059673e-06, "loss": 11.8238, "step": 34953 }, { "epoch": 1.9033849965625915, "grad_norm": 0.5340937115298117, "learning_rate": 1.2211162053481162e-06, "loss": 11.7109, "step": 34954 }, { "epoch": 1.9034394505591745, "grad_norm": 0.5149744115786244, "learning_rate": 1.2197427169114317e-06, "loss": 11.6893, "step": 34955 }, { "epoch": 1.9034939045557575, "grad_norm": 0.500862523279399, "learning_rate": 1.2183699966066052e-06, "loss": 11.765, "step": 34956 }, { "epoch": 1.9035483585523405, "grad_norm": 0.5919726300378692, "learning_rate": 1.216998044444284e-06, "loss": 11.7503, "step": 34957 }, { "epoch": 1.9036028125489235, "grad_norm": 0.5216627565954235, "learning_rate": 1.2156268604351707e-06, "loss": 11.7937, "step": 34958 }, { "epoch": 1.9036572665455065, "grad_norm": 0.5350789227643593, "learning_rate": 1.2142564445898896e-06, "loss": 11.7579, "step": 34959 }, { "epoch": 1.9037117205420895, "grad_norm": 0.5672783880458488, "learning_rate": 1.2128867969191326e-06, "loss": 11.7553, "step": 34960 }, { "epoch": 1.9037661745386725, "grad_norm": 0.4998683821936918, "learning_rate": 1.2115179174335244e-06, "loss": 11.796, "step": 34961 }, { "epoch": 1.9038206285352555, "grad_norm": 0.49377638318305933, "learning_rate": 1.2101498061437234e-06, "loss": 11.7123, "step": 34962 }, { "epoch": 1.9038750825318385, "grad_norm": 0.5317072407810388, "learning_rate": 1.208782463060365e-06, "loss": 11.7181, "step": 34963 }, { "epoch": 1.9039295365284215, "grad_norm": 0.6067866019767924, "learning_rate": 1.2074158881940744e-06, "loss": 11.7319, "step": 34964 }, { "epoch": 1.9039839905250044, "grad_norm": 0.5461599204792117, "learning_rate": 1.2060500815554877e-06, "loss": 11.7649, "step": 34965 }, { "epoch": 1.9040384445215877, "grad_norm": 0.5512674428664004, "learning_rate": 1.2046850431552293e-06, "loss": 11.8201, "step": 34966 }, { "epoch": 1.9040928985181707, "grad_norm": 0.551134458254267, "learning_rate": 1.203320773003902e-06, "loss": 11.7467, "step": 34967 }, { "epoch": 1.9041473525147536, "grad_norm": 0.49863207282143507, "learning_rate": 1.2019572711121196e-06, "loss": 11.659, "step": 34968 }, { "epoch": 1.9042018065113366, "grad_norm": 0.5695888542949131, "learning_rate": 1.2005945374904958e-06, "loss": 11.7296, "step": 34969 }, { "epoch": 1.9042562605079196, "grad_norm": 0.5737764133068176, "learning_rate": 1.199232572149611e-06, "loss": 11.8685, "step": 34970 }, { "epoch": 1.9043107145045026, "grad_norm": 0.6623615169926745, "learning_rate": 1.197871375100068e-06, "loss": 11.8326, "step": 34971 }, { "epoch": 1.9043651685010858, "grad_norm": 0.5531808203617824, "learning_rate": 1.196510946352436e-06, "loss": 11.6211, "step": 34972 }, { "epoch": 1.9044196224976688, "grad_norm": 0.5011029737950699, "learning_rate": 1.1951512859173064e-06, "loss": 11.7735, "step": 34973 }, { "epoch": 1.9044740764942518, "grad_norm": 0.5635643958075988, "learning_rate": 1.1937923938052598e-06, "loss": 11.7972, "step": 34974 }, { "epoch": 1.9045285304908348, "grad_norm": 0.4859862049418657, "learning_rate": 1.1924342700268431e-06, "loss": 11.7243, "step": 34975 }, { "epoch": 1.9045829844874178, "grad_norm": 0.5534201210675502, "learning_rate": 1.191076914592626e-06, "loss": 11.9276, "step": 34976 }, { "epoch": 1.9046374384840008, "grad_norm": 0.5011984913829557, "learning_rate": 1.1897203275131664e-06, "loss": 11.7358, "step": 34977 }, { "epoch": 1.9046918924805838, "grad_norm": 0.5675792748840617, "learning_rate": 1.1883645087990224e-06, "loss": 11.769, "step": 34978 }, { "epoch": 1.9047463464771668, "grad_norm": 0.5026309591187752, "learning_rate": 1.1870094584607195e-06, "loss": 11.5996, "step": 34979 }, { "epoch": 1.9048008004737498, "grad_norm": 0.5581656370747227, "learning_rate": 1.1856551765088042e-06, "loss": 11.77, "step": 34980 }, { "epoch": 1.9048552544703328, "grad_norm": 0.5663165010056022, "learning_rate": 1.1843016629538128e-06, "loss": 11.7731, "step": 34981 }, { "epoch": 1.9049097084669158, "grad_norm": 0.5172392838909518, "learning_rate": 1.1829489178062481e-06, "loss": 11.695, "step": 34982 }, { "epoch": 1.9049641624634988, "grad_norm": 0.5497732495772556, "learning_rate": 1.181596941076646e-06, "loss": 11.8683, "step": 34983 }, { "epoch": 1.9050186164600817, "grad_norm": 0.5413920720232699, "learning_rate": 1.18024573277552e-06, "loss": 11.6641, "step": 34984 }, { "epoch": 1.9050730704566647, "grad_norm": 0.547364218464008, "learning_rate": 1.1788952929133846e-06, "loss": 11.6054, "step": 34985 }, { "epoch": 1.9051275244532477, "grad_norm": 0.5487273460843867, "learning_rate": 1.1775456215007197e-06, "loss": 11.7632, "step": 34986 }, { "epoch": 1.9051819784498307, "grad_norm": 0.5150943124030233, "learning_rate": 1.1761967185480394e-06, "loss": 11.7124, "step": 34987 }, { "epoch": 1.9052364324464137, "grad_norm": 0.5506035108844471, "learning_rate": 1.174848584065824e-06, "loss": 11.584, "step": 34988 }, { "epoch": 1.9052908864429967, "grad_norm": 0.5648535428830471, "learning_rate": 1.1735012180645543e-06, "loss": 11.7641, "step": 34989 }, { "epoch": 1.90534534043958, "grad_norm": 0.5546063878312949, "learning_rate": 1.1721546205547218e-06, "loss": 11.8059, "step": 34990 }, { "epoch": 1.905399794436163, "grad_norm": 0.5946641378680784, "learning_rate": 1.1708087915467846e-06, "loss": 11.8523, "step": 34991 }, { "epoch": 1.905454248432746, "grad_norm": 0.5573673365857627, "learning_rate": 1.1694637310512125e-06, "loss": 11.6701, "step": 34992 }, { "epoch": 1.905508702429329, "grad_norm": 0.5404327531472225, "learning_rate": 1.1681194390784634e-06, "loss": 11.8141, "step": 34993 }, { "epoch": 1.905563156425912, "grad_norm": 0.542847902451296, "learning_rate": 1.1667759156389847e-06, "loss": 11.7001, "step": 34994 }, { "epoch": 1.9056176104224951, "grad_norm": 0.5595541408136204, "learning_rate": 1.1654331607432457e-06, "loss": 11.7201, "step": 34995 }, { "epoch": 1.905672064419078, "grad_norm": 0.5288638373784262, "learning_rate": 1.1640911744016602e-06, "loss": 11.8266, "step": 34996 }, { "epoch": 1.905726518415661, "grad_norm": 0.5970397485499701, "learning_rate": 1.1627499566246869e-06, "loss": 11.7677, "step": 34997 }, { "epoch": 1.905780972412244, "grad_norm": 0.5616017549771714, "learning_rate": 1.1614095074227282e-06, "loss": 11.7861, "step": 34998 }, { "epoch": 1.905835426408827, "grad_norm": 0.5635818877416472, "learning_rate": 1.1600698268062315e-06, "loss": 11.8511, "step": 34999 }, { "epoch": 1.90588988040541, "grad_norm": 0.5023035649247424, "learning_rate": 1.1587309147856217e-06, "loss": 11.7506, "step": 35000 }, { "epoch": 1.905944334401993, "grad_norm": 0.613569279577508, "learning_rate": 1.1573927713712796e-06, "loss": 11.7161, "step": 35001 }, { "epoch": 1.905998788398576, "grad_norm": 0.5262782385154369, "learning_rate": 1.156055396573641e-06, "loss": 11.7928, "step": 35002 }, { "epoch": 1.906053242395159, "grad_norm": 0.5671827952736994, "learning_rate": 1.1547187904030866e-06, "loss": 11.7986, "step": 35003 }, { "epoch": 1.906107696391742, "grad_norm": 0.4781231166406721, "learning_rate": 1.153382952870008e-06, "loss": 11.6468, "step": 35004 }, { "epoch": 1.906162150388325, "grad_norm": 0.579281572459171, "learning_rate": 1.152047883984808e-06, "loss": 11.825, "step": 35005 }, { "epoch": 1.906216604384908, "grad_norm": 0.5480966016272341, "learning_rate": 1.1507135837578453e-06, "loss": 11.8069, "step": 35006 }, { "epoch": 1.906271058381491, "grad_norm": 0.5126263720681261, "learning_rate": 1.1493800521995334e-06, "loss": 11.7038, "step": 35007 }, { "epoch": 1.906325512378074, "grad_norm": 0.6769674666132978, "learning_rate": 1.1480472893201977e-06, "loss": 11.7818, "step": 35008 }, { "epoch": 1.906379966374657, "grad_norm": 0.5307353123798643, "learning_rate": 1.1467152951302407e-06, "loss": 11.7825, "step": 35009 }, { "epoch": 1.90643442037124, "grad_norm": 0.5434710656450639, "learning_rate": 1.1453840696399986e-06, "loss": 11.7149, "step": 35010 }, { "epoch": 1.906488874367823, "grad_norm": 0.516724319134081, "learning_rate": 1.144053612859819e-06, "loss": 11.7374, "step": 35011 }, { "epoch": 1.906543328364406, "grad_norm": 0.5074130646145986, "learning_rate": 1.142723924800071e-06, "loss": 11.7568, "step": 35012 }, { "epoch": 1.9065977823609892, "grad_norm": 0.5503949663861676, "learning_rate": 1.1413950054710687e-06, "loss": 11.849, "step": 35013 }, { "epoch": 1.9066522363575722, "grad_norm": 0.5871422964262129, "learning_rate": 1.1400668548831595e-06, "loss": 11.6125, "step": 35014 }, { "epoch": 1.9067066903541552, "grad_norm": 0.5684713531708555, "learning_rate": 1.1387394730466793e-06, "loss": 11.7334, "step": 35015 }, { "epoch": 1.9067611443507382, "grad_norm": 0.5339253585247791, "learning_rate": 1.1374128599719314e-06, "loss": 11.6277, "step": 35016 }, { "epoch": 1.9068155983473212, "grad_norm": 0.6252833041214423, "learning_rate": 1.1360870156692405e-06, "loss": 11.7459, "step": 35017 }, { "epoch": 1.9068700523439042, "grad_norm": 0.5034389970866846, "learning_rate": 1.1347619401489206e-06, "loss": 11.7586, "step": 35018 }, { "epoch": 1.9069245063404874, "grad_norm": 0.47682379643605904, "learning_rate": 1.1334376334212638e-06, "loss": 11.759, "step": 35019 }, { "epoch": 1.9069789603370704, "grad_norm": 0.5630438106014305, "learning_rate": 1.1321140954965836e-06, "loss": 11.7311, "step": 35020 }, { "epoch": 1.9070334143336534, "grad_norm": 0.5712070275988911, "learning_rate": 1.13079132638515e-06, "loss": 11.7696, "step": 35021 }, { "epoch": 1.9070878683302364, "grad_norm": 0.5284279871716235, "learning_rate": 1.1294693260972877e-06, "loss": 11.7122, "step": 35022 }, { "epoch": 1.9071423223268194, "grad_norm": 0.5604633493767001, "learning_rate": 1.1281480946432332e-06, "loss": 11.8063, "step": 35023 }, { "epoch": 1.9071967763234023, "grad_norm": 0.5505765064327027, "learning_rate": 1.1268276320332893e-06, "loss": 11.7465, "step": 35024 }, { "epoch": 1.9072512303199853, "grad_norm": 0.5023365981449948, "learning_rate": 1.1255079382777144e-06, "loss": 11.7987, "step": 35025 }, { "epoch": 1.9073056843165683, "grad_norm": 0.5342114329208129, "learning_rate": 1.1241890133867671e-06, "loss": 11.7392, "step": 35026 }, { "epoch": 1.9073601383131513, "grad_norm": 0.5126656077229695, "learning_rate": 1.1228708573707058e-06, "loss": 11.819, "step": 35027 }, { "epoch": 1.9074145923097343, "grad_norm": 0.5578011430400167, "learning_rate": 1.121553470239789e-06, "loss": 11.8733, "step": 35028 }, { "epoch": 1.9074690463063173, "grad_norm": 0.5135564892159001, "learning_rate": 1.1202368520042527e-06, "loss": 11.7069, "step": 35029 }, { "epoch": 1.9075235003029003, "grad_norm": 0.5355118246894365, "learning_rate": 1.1189210026743225e-06, "loss": 11.7699, "step": 35030 }, { "epoch": 1.9075779542994833, "grad_norm": 0.599579854014161, "learning_rate": 1.1176059222602676e-06, "loss": 11.919, "step": 35031 }, { "epoch": 1.9076324082960663, "grad_norm": 0.5283005049642556, "learning_rate": 1.1162916107722798e-06, "loss": 11.8298, "step": 35032 }, { "epoch": 1.9076868622926493, "grad_norm": 0.5101769098963235, "learning_rate": 1.1149780682205957e-06, "loss": 11.7398, "step": 35033 }, { "epoch": 1.9077413162892323, "grad_norm": 0.5498331710435225, "learning_rate": 1.113665294615418e-06, "loss": 11.8011, "step": 35034 }, { "epoch": 1.9077957702858153, "grad_norm": 0.5832457360829273, "learning_rate": 1.112353289966961e-06, "loss": 11.6953, "step": 35035 }, { "epoch": 1.9078502242823985, "grad_norm": 0.5279775767510355, "learning_rate": 1.1110420542854384e-06, "loss": 11.8075, "step": 35036 }, { "epoch": 1.9079046782789815, "grad_norm": 0.5205927459750868, "learning_rate": 1.109731587581031e-06, "loss": 11.7359, "step": 35037 }, { "epoch": 1.9079591322755645, "grad_norm": 0.5881479585714262, "learning_rate": 1.108421889863931e-06, "loss": 11.7509, "step": 35038 }, { "epoch": 1.9080135862721475, "grad_norm": 0.5403880405130227, "learning_rate": 1.1071129611443408e-06, "loss": 11.8494, "step": 35039 }, { "epoch": 1.9080680402687304, "grad_norm": 0.6171662767645459, "learning_rate": 1.1058048014324084e-06, "loss": 11.9234, "step": 35040 }, { "epoch": 1.9081224942653134, "grad_norm": 0.5373698224508806, "learning_rate": 1.1044974107383255e-06, "loss": 11.6986, "step": 35041 }, { "epoch": 1.9081769482618967, "grad_norm": 0.5334942218426715, "learning_rate": 1.1031907890722615e-06, "loss": 11.7383, "step": 35042 }, { "epoch": 1.9082314022584796, "grad_norm": 0.5215693598715035, "learning_rate": 1.101884936444364e-06, "loss": 11.5175, "step": 35043 }, { "epoch": 1.9082858562550626, "grad_norm": 0.5439630012830671, "learning_rate": 1.1005798528648025e-06, "loss": 11.6718, "step": 35044 }, { "epoch": 1.9083403102516456, "grad_norm": 0.5169738020244817, "learning_rate": 1.0992755383437137e-06, "loss": 11.692, "step": 35045 }, { "epoch": 1.9083947642482286, "grad_norm": 0.5267399179343627, "learning_rate": 1.0979719928912446e-06, "loss": 11.7187, "step": 35046 }, { "epoch": 1.9084492182448116, "grad_norm": 0.5240608640848374, "learning_rate": 1.0966692165175318e-06, "loss": 11.7991, "step": 35047 }, { "epoch": 1.9085036722413946, "grad_norm": 0.5479382086282198, "learning_rate": 1.0953672092327006e-06, "loss": 11.8021, "step": 35048 }, { "epoch": 1.9085581262379776, "grad_norm": 0.5635261883703365, "learning_rate": 1.094065971046887e-06, "loss": 11.6825, "step": 35049 }, { "epoch": 1.9086125802345606, "grad_norm": 0.5543134097400486, "learning_rate": 1.0927655019701943e-06, "loss": 11.9005, "step": 35050 }, { "epoch": 1.9086670342311436, "grad_norm": 0.5595174971811848, "learning_rate": 1.0914658020127477e-06, "loss": 11.8898, "step": 35051 }, { "epoch": 1.9087214882277266, "grad_norm": 0.5394529378112851, "learning_rate": 1.0901668711846614e-06, "loss": 11.7729, "step": 35052 }, { "epoch": 1.9087759422243096, "grad_norm": 0.5580037430524944, "learning_rate": 1.0888687094960049e-06, "loss": 11.7652, "step": 35053 }, { "epoch": 1.9088303962208926, "grad_norm": 0.5193653645595908, "learning_rate": 1.0875713169569146e-06, "loss": 11.7774, "step": 35054 }, { "epoch": 1.9088848502174756, "grad_norm": 0.5520991921498845, "learning_rate": 1.0862746935774381e-06, "loss": 11.6531, "step": 35055 }, { "epoch": 1.9089393042140586, "grad_norm": 0.5447856659242378, "learning_rate": 1.0849788393676896e-06, "loss": 11.8382, "step": 35056 }, { "epoch": 1.9089937582106415, "grad_norm": 0.5596622529204539, "learning_rate": 1.0836837543377165e-06, "loss": 11.8003, "step": 35057 }, { "epoch": 1.9090482122072245, "grad_norm": 0.5716859031427447, "learning_rate": 1.082389438497622e-06, "loss": 11.739, "step": 35058 }, { "epoch": 1.9091026662038075, "grad_norm": 0.5529552089456344, "learning_rate": 1.0810958918574533e-06, "loss": 11.7993, "step": 35059 }, { "epoch": 1.9091571202003907, "grad_norm": 0.590923171759506, "learning_rate": 1.0798031144272691e-06, "loss": 11.7941, "step": 35060 }, { "epoch": 1.9092115741969737, "grad_norm": 0.503203522617532, "learning_rate": 1.0785111062171282e-06, "loss": 11.6206, "step": 35061 }, { "epoch": 1.9092660281935567, "grad_norm": 0.5159708977929509, "learning_rate": 1.0772198672370782e-06, "loss": 11.775, "step": 35062 }, { "epoch": 1.9093204821901397, "grad_norm": 0.5641158346192275, "learning_rate": 1.0759293974971441e-06, "loss": 11.8559, "step": 35063 }, { "epoch": 1.9093749361867227, "grad_norm": 0.5603151273798148, "learning_rate": 1.0746396970073846e-06, "loss": 11.6271, "step": 35064 }, { "epoch": 1.909429390183306, "grad_norm": 0.4953463292263194, "learning_rate": 1.073350765777803e-06, "loss": 11.814, "step": 35065 }, { "epoch": 1.909483844179889, "grad_norm": 0.5693987504248924, "learning_rate": 1.0720626038184467e-06, "loss": 11.8154, "step": 35066 }, { "epoch": 1.909538298176472, "grad_norm": 0.49598276689437376, "learning_rate": 1.0707752111393189e-06, "loss": 11.6372, "step": 35067 }, { "epoch": 1.909592752173055, "grad_norm": 0.5512118148892549, "learning_rate": 1.0694885877504334e-06, "loss": 11.9031, "step": 35068 }, { "epoch": 1.909647206169638, "grad_norm": 0.5783836042091899, "learning_rate": 1.0682027336617939e-06, "loss": 11.8721, "step": 35069 }, { "epoch": 1.909701660166221, "grad_norm": 0.5378272435245747, "learning_rate": 1.0669176488834032e-06, "loss": 11.5571, "step": 35070 }, { "epoch": 1.9097561141628039, "grad_norm": 0.5237454585595565, "learning_rate": 1.0656333334252532e-06, "loss": 11.6939, "step": 35071 }, { "epoch": 1.9098105681593869, "grad_norm": 0.5758661119362838, "learning_rate": 1.064349787297325e-06, "loss": 11.7597, "step": 35072 }, { "epoch": 1.9098650221559699, "grad_norm": 0.5862225418671749, "learning_rate": 1.0630670105096108e-06, "loss": 11.7829, "step": 35073 }, { "epoch": 1.9099194761525529, "grad_norm": 0.6267844827987517, "learning_rate": 1.0617850030720688e-06, "loss": 11.8347, "step": 35074 }, { "epoch": 1.9099739301491359, "grad_norm": 0.5373539663053456, "learning_rate": 1.0605037649946915e-06, "loss": 11.762, "step": 35075 }, { "epoch": 1.9100283841457188, "grad_norm": 0.561820988168871, "learning_rate": 1.059223296287415e-06, "loss": 11.7992, "step": 35076 }, { "epoch": 1.9100828381423018, "grad_norm": 0.5095559453567118, "learning_rate": 1.0579435969602203e-06, "loss": 11.7944, "step": 35077 }, { "epoch": 1.9101372921388848, "grad_norm": 0.4888594334549358, "learning_rate": 1.056664667023044e-06, "loss": 11.8618, "step": 35078 }, { "epoch": 1.9101917461354678, "grad_norm": 0.5353049185426352, "learning_rate": 1.0553865064858448e-06, "loss": 11.7997, "step": 35079 }, { "epoch": 1.9102462001320508, "grad_norm": 0.5223524984841007, "learning_rate": 1.0541091153585481e-06, "loss": 11.6515, "step": 35080 }, { "epoch": 1.9103006541286338, "grad_norm": 0.5663943238723873, "learning_rate": 1.0528324936510902e-06, "loss": 11.7447, "step": 35081 }, { "epoch": 1.9103551081252168, "grad_norm": 0.5433977328800638, "learning_rate": 1.0515566413733969e-06, "loss": 11.8036, "step": 35082 }, { "epoch": 1.9104095621218, "grad_norm": 0.5724687132210393, "learning_rate": 1.0502815585354042e-06, "loss": 11.7263, "step": 35083 }, { "epoch": 1.910464016118383, "grad_norm": 0.5410814429033893, "learning_rate": 1.0490072451470046e-06, "loss": 11.8019, "step": 35084 }, { "epoch": 1.910518470114966, "grad_norm": 0.5795403204573999, "learning_rate": 1.0477337012181232e-06, "loss": 11.864, "step": 35085 }, { "epoch": 1.910572924111549, "grad_norm": 0.5843096261051132, "learning_rate": 1.0464609267586522e-06, "loss": 11.8717, "step": 35086 }, { "epoch": 1.910627378108132, "grad_norm": 0.5547838288076281, "learning_rate": 1.045188921778506e-06, "loss": 11.7711, "step": 35087 }, { "epoch": 1.910681832104715, "grad_norm": 0.5089451102713335, "learning_rate": 1.0439176862875654e-06, "loss": 11.6835, "step": 35088 }, { "epoch": 1.9107362861012982, "grad_norm": 0.6201359920253151, "learning_rate": 1.0426472202957004e-06, "loss": 11.6396, "step": 35089 }, { "epoch": 1.9107907400978812, "grad_norm": 0.5881591957683492, "learning_rate": 1.0413775238128253e-06, "loss": 11.8939, "step": 35090 }, { "epoch": 1.9108451940944642, "grad_norm": 0.5522354787812104, "learning_rate": 1.0401085968487766e-06, "loss": 11.7936, "step": 35091 }, { "epoch": 1.9108996480910472, "grad_norm": 0.5235783192898293, "learning_rate": 1.0388404394134577e-06, "loss": 11.7094, "step": 35092 }, { "epoch": 1.9109541020876302, "grad_norm": 0.5104367125330096, "learning_rate": 1.0375730515167047e-06, "loss": 11.7649, "step": 35093 }, { "epoch": 1.9110085560842132, "grad_norm": 0.6352311273546106, "learning_rate": 1.0363064331683769e-06, "loss": 11.9029, "step": 35094 }, { "epoch": 1.9110630100807962, "grad_norm": 0.5808278235580587, "learning_rate": 1.0350405843783216e-06, "loss": 11.7317, "step": 35095 }, { "epoch": 1.9111174640773791, "grad_norm": 0.5439018501035373, "learning_rate": 1.0337755051563868e-06, "loss": 11.7704, "step": 35096 }, { "epoch": 1.9111719180739621, "grad_norm": 0.5535290140400511, "learning_rate": 1.0325111955124201e-06, "loss": 11.7334, "step": 35097 }, { "epoch": 1.9112263720705451, "grad_norm": 0.5355060497331602, "learning_rate": 1.0312476554562356e-06, "loss": 11.7896, "step": 35098 }, { "epoch": 1.9112808260671281, "grad_norm": 0.5190122938502716, "learning_rate": 1.02998488499767e-06, "loss": 11.8073, "step": 35099 }, { "epoch": 1.9113352800637111, "grad_norm": 0.5204723389919724, "learning_rate": 1.028722884146538e-06, "loss": 11.7878, "step": 35100 }, { "epoch": 1.911389734060294, "grad_norm": 0.5564546198705889, "learning_rate": 1.0274616529126536e-06, "loss": 11.8006, "step": 35101 }, { "epoch": 1.911444188056877, "grad_norm": 0.48794837984224265, "learning_rate": 1.0262011913058312e-06, "loss": 11.6623, "step": 35102 }, { "epoch": 1.91149864205346, "grad_norm": 0.5390515935615477, "learning_rate": 1.024941499335863e-06, "loss": 11.828, "step": 35103 }, { "epoch": 1.911553096050043, "grad_norm": 0.5235486524429563, "learning_rate": 1.0236825770125414e-06, "loss": 11.7012, "step": 35104 }, { "epoch": 1.911607550046626, "grad_norm": 0.6402393695134693, "learning_rate": 1.0224244243456693e-06, "loss": 11.7515, "step": 35105 }, { "epoch": 1.9116620040432093, "grad_norm": 0.515391054436438, "learning_rate": 1.0211670413450169e-06, "loss": 11.8416, "step": 35106 }, { "epoch": 1.9117164580397923, "grad_norm": 0.49968832184637235, "learning_rate": 1.0199104280203763e-06, "loss": 11.768, "step": 35107 }, { "epoch": 1.9117709120363753, "grad_norm": 0.6274243618865167, "learning_rate": 1.0186545843815065e-06, "loss": 11.7803, "step": 35108 }, { "epoch": 1.9118253660329583, "grad_norm": 0.5097495364866304, "learning_rate": 1.0173995104381773e-06, "loss": 11.779, "step": 35109 }, { "epoch": 1.9118798200295413, "grad_norm": 0.5261417959977284, "learning_rate": 1.0161452062001587e-06, "loss": 11.8723, "step": 35110 }, { "epoch": 1.9119342740261243, "grad_norm": 0.5459633910347668, "learning_rate": 1.0148916716771761e-06, "loss": 11.8145, "step": 35111 }, { "epoch": 1.9119887280227075, "grad_norm": 0.5602467867364136, "learning_rate": 1.0136389068790108e-06, "loss": 11.779, "step": 35112 }, { "epoch": 1.9120431820192905, "grad_norm": 0.5941671271494218, "learning_rate": 1.0123869118153883e-06, "loss": 11.6405, "step": 35113 }, { "epoch": 1.9120976360158735, "grad_norm": 0.5483298253064384, "learning_rate": 1.0111356864960341e-06, "loss": 11.6138, "step": 35114 }, { "epoch": 1.9121520900124565, "grad_norm": 0.5154942674498695, "learning_rate": 1.0098852309307072e-06, "loss": 11.8123, "step": 35115 }, { "epoch": 1.9122065440090394, "grad_norm": 0.5525543639258955, "learning_rate": 1.0086355451290997e-06, "loss": 11.8114, "step": 35116 }, { "epoch": 1.9122609980056224, "grad_norm": 0.5227600270568211, "learning_rate": 1.007386629100948e-06, "loss": 11.7644, "step": 35117 }, { "epoch": 1.9123154520022054, "grad_norm": 0.5627995273649774, "learning_rate": 1.006138482855956e-06, "loss": 11.6594, "step": 35118 }, { "epoch": 1.9123699059987884, "grad_norm": 0.5338480996883471, "learning_rate": 1.0048911064038268e-06, "loss": 11.7901, "step": 35119 }, { "epoch": 1.9124243599953714, "grad_norm": 0.5442165476819657, "learning_rate": 1.003644499754275e-06, "loss": 11.6895, "step": 35120 }, { "epoch": 1.9124788139919544, "grad_norm": 0.5476404177077041, "learning_rate": 1.0023986629169813e-06, "loss": 11.7744, "step": 35121 }, { "epoch": 1.9125332679885374, "grad_norm": 0.5437739273626775, "learning_rate": 1.0011535959016494e-06, "loss": 11.7232, "step": 35122 }, { "epoch": 1.9125877219851204, "grad_norm": 0.5599405951560065, "learning_rate": 9.999092987179381e-07, "loss": 11.7276, "step": 35123 }, { "epoch": 1.9126421759817034, "grad_norm": 0.49533066232163964, "learning_rate": 9.986657713755288e-07, "loss": 11.7487, "step": 35124 }, { "epoch": 1.9126966299782864, "grad_norm": 0.5326189323704272, "learning_rate": 9.974230138841024e-07, "loss": 11.7116, "step": 35125 }, { "epoch": 1.9127510839748694, "grad_norm": 0.5294952348997704, "learning_rate": 9.961810262533178e-07, "loss": 11.8349, "step": 35126 }, { "epoch": 1.9128055379714524, "grad_norm": 0.6010079103320684, "learning_rate": 9.94939808492834e-07, "loss": 11.8098, "step": 35127 }, { "epoch": 1.9128599919680354, "grad_norm": 0.5693917939451096, "learning_rate": 9.93699360612299e-07, "loss": 11.6701, "step": 35128 }, { "epoch": 1.9129144459646186, "grad_norm": 0.4904476688547059, "learning_rate": 9.924596826213716e-07, "loss": 11.6774, "step": 35129 }, { "epoch": 1.9129688999612016, "grad_norm": 0.4995587318185474, "learning_rate": 9.912207745296665e-07, "loss": 11.7444, "step": 35130 }, { "epoch": 1.9130233539577846, "grad_norm": 0.5644042562087479, "learning_rate": 9.899826363468311e-07, "loss": 11.774, "step": 35131 }, { "epoch": 1.9130778079543675, "grad_norm": 0.6478245834824435, "learning_rate": 9.887452680825138e-07, "loss": 11.7276, "step": 35132 }, { "epoch": 1.9131322619509505, "grad_norm": 0.5025718304468798, "learning_rate": 9.875086697463066e-07, "loss": 11.7485, "step": 35133 }, { "epoch": 1.9131867159475335, "grad_norm": 0.5132824629887921, "learning_rate": 9.86272841347835e-07, "loss": 11.5666, "step": 35134 }, { "epoch": 1.9132411699441167, "grad_norm": 0.5160447393687133, "learning_rate": 9.85037782896714e-07, "loss": 11.8409, "step": 35135 }, { "epoch": 1.9132956239406997, "grad_norm": 0.5831760641182391, "learning_rate": 9.838034944025354e-07, "loss": 11.7491, "step": 35136 }, { "epoch": 1.9133500779372827, "grad_norm": 0.5000670887401855, "learning_rate": 9.82569975874914e-07, "loss": 11.6315, "step": 35137 }, { "epoch": 1.9134045319338657, "grad_norm": 0.5364252803555681, "learning_rate": 9.813372273234311e-07, "loss": 11.6952, "step": 35138 }, { "epoch": 1.9134589859304487, "grad_norm": 0.5859474620258575, "learning_rate": 9.80105248757679e-07, "loss": 11.773, "step": 35139 }, { "epoch": 1.9135134399270317, "grad_norm": 0.5183197641860716, "learning_rate": 9.788740401872277e-07, "loss": 11.8488, "step": 35140 }, { "epoch": 1.9135678939236147, "grad_norm": 0.5311511630162272, "learning_rate": 9.776436016216473e-07, "loss": 11.885, "step": 35141 }, { "epoch": 1.9136223479201977, "grad_norm": 0.5721275190221004, "learning_rate": 9.764139330705412e-07, "loss": 11.8198, "step": 35142 }, { "epoch": 1.9136768019167807, "grad_norm": 0.5439242256473281, "learning_rate": 9.751850345434355e-07, "loss": 11.7999, "step": 35143 }, { "epoch": 1.9137312559133637, "grad_norm": 0.5025895639682192, "learning_rate": 9.739569060498998e-07, "loss": 11.6802, "step": 35144 }, { "epoch": 1.9137857099099467, "grad_norm": 0.522579173012891, "learning_rate": 9.727295475994714e-07, "loss": 11.7436, "step": 35145 }, { "epoch": 1.9138401639065297, "grad_norm": 0.5540909247206703, "learning_rate": 9.715029592017088e-07, "loss": 11.8201, "step": 35146 }, { "epoch": 1.9138946179031127, "grad_norm": 0.5299964103986897, "learning_rate": 9.702771408661604e-07, "loss": 11.8015, "step": 35147 }, { "epoch": 1.9139490718996957, "grad_norm": 0.5460718503237835, "learning_rate": 9.690520926023294e-07, "loss": 11.7618, "step": 35148 }, { "epoch": 1.9140035258962786, "grad_norm": 0.5642893204770933, "learning_rate": 9.678278144197639e-07, "loss": 11.7825, "step": 35149 }, { "epoch": 1.9140579798928616, "grad_norm": 0.5199466557557606, "learning_rate": 9.666043063279673e-07, "loss": 11.8994, "step": 35150 }, { "epoch": 1.9141124338894446, "grad_norm": 0.5713658464418989, "learning_rate": 9.653815683364764e-07, "loss": 11.8829, "step": 35151 }, { "epoch": 1.9141668878860276, "grad_norm": 0.5318327808591264, "learning_rate": 9.641596004547726e-07, "loss": 11.7626, "step": 35152 }, { "epoch": 1.9142213418826108, "grad_norm": 0.5282506846256024, "learning_rate": 9.629384026923816e-07, "loss": 11.8018, "step": 35153 }, { "epoch": 1.9142757958791938, "grad_norm": 0.5720525814173348, "learning_rate": 9.617179750587957e-07, "loss": 11.8022, "step": 35154 }, { "epoch": 1.9143302498757768, "grad_norm": 0.5258208866729774, "learning_rate": 9.604983175634852e-07, "loss": 11.9336, "step": 35155 }, { "epoch": 1.9143847038723598, "grad_norm": 0.5159547586650071, "learning_rate": 9.592794302159646e-07, "loss": 11.7323, "step": 35156 }, { "epoch": 1.9144391578689428, "grad_norm": 0.5610796327787909, "learning_rate": 9.580613130256822e-07, "loss": 11.8646, "step": 35157 }, { "epoch": 1.9144936118655258, "grad_norm": 0.5767096618913562, "learning_rate": 9.5684396600213e-07, "loss": 11.8894, "step": 35158 }, { "epoch": 1.914548065862109, "grad_norm": 0.5294956672415153, "learning_rate": 9.556273891547673e-07, "loss": 11.7973, "step": 35159 }, { "epoch": 1.914602519858692, "grad_norm": 0.5391260841721528, "learning_rate": 9.544115824930532e-07, "loss": 11.7829, "step": 35160 }, { "epoch": 1.914656973855275, "grad_norm": 0.5405415191523248, "learning_rate": 9.53196546026458e-07, "loss": 11.7448, "step": 35161 }, { "epoch": 1.914711427851858, "grad_norm": 0.5841377470590796, "learning_rate": 9.519822797643962e-07, "loss": 11.8143, "step": 35162 }, { "epoch": 1.914765881848441, "grad_norm": 0.6184059434122927, "learning_rate": 9.507687837163492e-07, "loss": 11.729, "step": 35163 }, { "epoch": 1.914820335845024, "grad_norm": 0.5255925439390693, "learning_rate": 9.495560578917318e-07, "loss": 11.7779, "step": 35164 }, { "epoch": 1.914874789841607, "grad_norm": 0.5253838804623853, "learning_rate": 9.483441022999695e-07, "loss": 11.7927, "step": 35165 }, { "epoch": 1.91492924383819, "grad_norm": 0.5980539158184621, "learning_rate": 9.471329169504995e-07, "loss": 11.9218, "step": 35166 }, { "epoch": 1.914983697834773, "grad_norm": 0.5617716604024158, "learning_rate": 9.459225018527251e-07, "loss": 11.8383, "step": 35167 }, { "epoch": 1.915038151831356, "grad_norm": 0.5623123857783747, "learning_rate": 9.447128570160723e-07, "loss": 11.6922, "step": 35168 }, { "epoch": 1.915092605827939, "grad_norm": 0.5506840989740919, "learning_rate": 9.435039824499559e-07, "loss": 11.8841, "step": 35169 }, { "epoch": 1.915147059824522, "grad_norm": 0.53043677168556, "learning_rate": 9.422958781637569e-07, "loss": 11.6814, "step": 35170 }, { "epoch": 1.915201513821105, "grad_norm": 0.5097510373721862, "learning_rate": 9.410885441668793e-07, "loss": 11.7102, "step": 35171 }, { "epoch": 1.915255967817688, "grad_norm": 0.5199440180916941, "learning_rate": 9.398819804687043e-07, "loss": 11.7086, "step": 35172 }, { "epoch": 1.915310421814271, "grad_norm": 0.5762046373965938, "learning_rate": 9.386761870786243e-07, "loss": 11.9591, "step": 35173 }, { "epoch": 1.915364875810854, "grad_norm": 0.5586414190330928, "learning_rate": 9.374711640060096e-07, "loss": 11.8247, "step": 35174 }, { "epoch": 1.915419329807437, "grad_norm": 0.5381081233376709, "learning_rate": 9.362669112602307e-07, "loss": 11.708, "step": 35175 }, { "epoch": 1.9154737838040201, "grad_norm": 0.559612607472234, "learning_rate": 9.350634288506466e-07, "loss": 11.7455, "step": 35176 }, { "epoch": 1.915528237800603, "grad_norm": 0.5597247638643995, "learning_rate": 9.338607167866276e-07, "loss": 11.7732, "step": 35177 }, { "epoch": 1.915582691797186, "grad_norm": 0.5832729433958163, "learning_rate": 9.326587750775329e-07, "loss": 11.8304, "step": 35178 }, { "epoch": 1.915637145793769, "grad_norm": 0.5401258334400405, "learning_rate": 9.314576037326772e-07, "loss": 11.7651, "step": 35179 }, { "epoch": 1.915691599790352, "grad_norm": 0.546976026951646, "learning_rate": 9.302572027614309e-07, "loss": 11.8512, "step": 35180 }, { "epoch": 1.915746053786935, "grad_norm": 0.5205921963085846, "learning_rate": 9.290575721731198e-07, "loss": 11.7567, "step": 35181 }, { "epoch": 1.9158005077835183, "grad_norm": 0.5490305793584135, "learning_rate": 9.278587119770698e-07, "loss": 11.7593, "step": 35182 }, { "epoch": 1.9158549617801013, "grad_norm": 0.5386055788617824, "learning_rate": 9.266606221826069e-07, "loss": 11.6951, "step": 35183 }, { "epoch": 1.9159094157766843, "grad_norm": 0.4888484154079049, "learning_rate": 9.254633027990456e-07, "loss": 11.7217, "step": 35184 }, { "epoch": 1.9159638697732673, "grad_norm": 0.5561620307709734, "learning_rate": 9.242667538356898e-07, "loss": 11.8107, "step": 35185 }, { "epoch": 1.9160183237698503, "grad_norm": 0.533857419028927, "learning_rate": 9.230709753018651e-07, "loss": 11.804, "step": 35186 }, { "epoch": 1.9160727777664333, "grad_norm": 0.5740980065069025, "learning_rate": 9.218759672068422e-07, "loss": 11.7448, "step": 35187 }, { "epoch": 1.9161272317630162, "grad_norm": 0.5348810160177284, "learning_rate": 9.206817295599246e-07, "loss": 11.7366, "step": 35188 }, { "epoch": 1.9161816857595992, "grad_norm": 0.5639451320436223, "learning_rate": 9.194882623704049e-07, "loss": 11.7164, "step": 35189 }, { "epoch": 1.9162361397561822, "grad_norm": 0.49415545276392187, "learning_rate": 9.182955656475645e-07, "loss": 11.529, "step": 35190 }, { "epoch": 1.9162905937527652, "grad_norm": 0.557829386350709, "learning_rate": 9.171036394006737e-07, "loss": 11.7855, "step": 35191 }, { "epoch": 1.9163450477493482, "grad_norm": 0.5947507546188459, "learning_rate": 9.159124836390032e-07, "loss": 11.849, "step": 35192 }, { "epoch": 1.9163995017459312, "grad_norm": 0.5565434666075539, "learning_rate": 9.147220983718119e-07, "loss": 11.7487, "step": 35193 }, { "epoch": 1.9164539557425142, "grad_norm": 0.5687280597467993, "learning_rate": 9.135324836083592e-07, "loss": 11.7756, "step": 35194 }, { "epoch": 1.9165084097390972, "grad_norm": 0.5369250923409661, "learning_rate": 9.123436393578822e-07, "loss": 11.7732, "step": 35195 }, { "epoch": 1.9165628637356802, "grad_norm": 0.6092539975696989, "learning_rate": 9.111555656296511e-07, "loss": 11.692, "step": 35196 }, { "epoch": 1.9166173177322632, "grad_norm": 0.5167452649292813, "learning_rate": 9.09968262432892e-07, "loss": 11.7265, "step": 35197 }, { "epoch": 1.9166717717288462, "grad_norm": 0.5385597377763062, "learning_rate": 9.087817297768308e-07, "loss": 11.833, "step": 35198 }, { "epoch": 1.9167262257254294, "grad_norm": 0.5416782185874432, "learning_rate": 9.075959676707046e-07, "loss": 11.6064, "step": 35199 }, { "epoch": 1.9167806797220124, "grad_norm": 0.537139385306688, "learning_rate": 9.064109761237282e-07, "loss": 11.681, "step": 35200 }, { "epoch": 1.9168351337185954, "grad_norm": 0.6357180096696138, "learning_rate": 9.052267551451165e-07, "loss": 11.808, "step": 35201 }, { "epoch": 1.9168895877151784, "grad_norm": 0.6055184640562326, "learning_rate": 9.040433047440844e-07, "loss": 11.788, "step": 35202 }, { "epoch": 1.9169440417117614, "grad_norm": 0.5045144401513638, "learning_rate": 9.028606249298355e-07, "loss": 11.7997, "step": 35203 }, { "epoch": 1.9169984957083444, "grad_norm": 0.5459174889517361, "learning_rate": 9.016787157115514e-07, "loss": 11.8305, "step": 35204 }, { "epoch": 1.9170529497049276, "grad_norm": 0.4910436462510843, "learning_rate": 9.004975770984358e-07, "loss": 11.7572, "step": 35205 }, { "epoch": 1.9171074037015106, "grad_norm": 0.5478522802477646, "learning_rate": 8.993172090996593e-07, "loss": 11.8115, "step": 35206 }, { "epoch": 1.9171618576980936, "grad_norm": 0.5559894155851322, "learning_rate": 8.981376117244255e-07, "loss": 11.8449, "step": 35207 }, { "epoch": 1.9172163116946765, "grad_norm": 0.538612690716442, "learning_rate": 8.969587849818828e-07, "loss": 11.7906, "step": 35208 }, { "epoch": 1.9172707656912595, "grad_norm": 0.5084388156015692, "learning_rate": 8.957807288812126e-07, "loss": 11.7268, "step": 35209 }, { "epoch": 1.9173252196878425, "grad_norm": 0.4832681966615627, "learning_rate": 8.946034434315742e-07, "loss": 11.7395, "step": 35210 }, { "epoch": 1.9173796736844255, "grad_norm": 0.5738225621440586, "learning_rate": 8.934269286421159e-07, "loss": 11.8553, "step": 35211 }, { "epoch": 1.9174341276810085, "grad_norm": 0.48711473108013803, "learning_rate": 8.922511845219971e-07, "loss": 11.7085, "step": 35212 }, { "epoch": 1.9174885816775915, "grad_norm": 0.5202528831488631, "learning_rate": 8.910762110803439e-07, "loss": 11.8102, "step": 35213 }, { "epoch": 1.9175430356741745, "grad_norm": 0.519394116891388, "learning_rate": 8.899020083263043e-07, "loss": 11.7792, "step": 35214 }, { "epoch": 1.9175974896707575, "grad_norm": 0.519271590028062, "learning_rate": 8.887285762690156e-07, "loss": 11.8252, "step": 35215 }, { "epoch": 1.9176519436673405, "grad_norm": 0.5455615839571457, "learning_rate": 8.875559149175816e-07, "loss": 11.8203, "step": 35216 }, { "epoch": 1.9177063976639235, "grad_norm": 0.5390491771381011, "learning_rate": 8.863840242811394e-07, "loss": 11.744, "step": 35217 }, { "epoch": 1.9177608516605065, "grad_norm": 0.5617895119488668, "learning_rate": 8.85212904368804e-07, "loss": 11.7927, "step": 35218 }, { "epoch": 1.9178153056570895, "grad_norm": 0.49923998977061224, "learning_rate": 8.84042555189657e-07, "loss": 11.7869, "step": 35219 }, { "epoch": 1.9178697596536725, "grad_norm": 0.5392256851222319, "learning_rate": 8.828729767528354e-07, "loss": 11.7381, "step": 35220 }, { "epoch": 1.9179242136502554, "grad_norm": 0.5580498866571791, "learning_rate": 8.817041690673989e-07, "loss": 11.7798, "step": 35221 }, { "epoch": 1.9179786676468384, "grad_norm": 0.5553461907649563, "learning_rate": 8.805361321424732e-07, "loss": 11.8, "step": 35222 }, { "epoch": 1.9180331216434217, "grad_norm": 0.538107620107467, "learning_rate": 8.793688659871069e-07, "loss": 11.6728, "step": 35223 }, { "epoch": 1.9180875756400046, "grad_norm": 0.5358888643911297, "learning_rate": 8.782023706103925e-07, "loss": 11.8671, "step": 35224 }, { "epoch": 1.9181420296365876, "grad_norm": 0.5473243382040065, "learning_rate": 8.770366460214008e-07, "loss": 11.7563, "step": 35225 }, { "epoch": 1.9181964836331706, "grad_norm": 0.5619020179221044, "learning_rate": 8.75871692229191e-07, "loss": 11.8151, "step": 35226 }, { "epoch": 1.9182509376297536, "grad_norm": 0.646298529253663, "learning_rate": 8.747075092428336e-07, "loss": 11.7864, "step": 35227 }, { "epoch": 1.9183053916263366, "grad_norm": 0.5448105127013941, "learning_rate": 8.73544097071366e-07, "loss": 11.6923, "step": 35228 }, { "epoch": 1.9183598456229198, "grad_norm": 0.5097718353891445, "learning_rate": 8.723814557238474e-07, "loss": 11.7472, "step": 35229 }, { "epoch": 1.9184142996195028, "grad_norm": 0.6254578642322416, "learning_rate": 8.712195852093152e-07, "loss": 11.7263, "step": 35230 }, { "epoch": 1.9184687536160858, "grad_norm": 0.5030137023454851, "learning_rate": 8.700584855367955e-07, "loss": 11.7914, "step": 35231 }, { "epoch": 1.9185232076126688, "grad_norm": 0.524172881676483, "learning_rate": 8.688981567153365e-07, "loss": 11.7223, "step": 35232 }, { "epoch": 1.9185776616092518, "grad_norm": 0.5398621754495531, "learning_rate": 8.677385987539532e-07, "loss": 11.9072, "step": 35233 }, { "epoch": 1.9186321156058348, "grad_norm": 0.5669199821899846, "learning_rate": 8.665798116616496e-07, "loss": 11.6823, "step": 35234 }, { "epoch": 1.9186865696024178, "grad_norm": 0.558354801015339, "learning_rate": 8.654217954474408e-07, "loss": 11.7699, "step": 35235 }, { "epoch": 1.9187410235990008, "grad_norm": 0.5154310645832462, "learning_rate": 8.642645501203416e-07, "loss": 11.7, "step": 35236 }, { "epoch": 1.9187954775955838, "grad_norm": 0.5253994038750188, "learning_rate": 8.631080756893562e-07, "loss": 11.734, "step": 35237 }, { "epoch": 1.9188499315921668, "grad_norm": 0.582087935449611, "learning_rate": 8.619523721634548e-07, "loss": 11.8067, "step": 35238 }, { "epoch": 1.9189043855887498, "grad_norm": 0.5274111672244881, "learning_rate": 8.607974395516416e-07, "loss": 11.7308, "step": 35239 }, { "epoch": 1.9189588395853328, "grad_norm": 0.5722438827303705, "learning_rate": 8.596432778628983e-07, "loss": 11.6561, "step": 35240 }, { "epoch": 1.9190132935819157, "grad_norm": 0.5250881599929415, "learning_rate": 8.584898871061841e-07, "loss": 11.7011, "step": 35241 }, { "epoch": 1.9190677475784987, "grad_norm": 0.6673704896322545, "learning_rate": 8.573372672904923e-07, "loss": 11.8617, "step": 35242 }, { "epoch": 1.9191222015750817, "grad_norm": 0.5550575162151639, "learning_rate": 8.561854184247597e-07, "loss": 11.6047, "step": 35243 }, { "epoch": 1.9191766555716647, "grad_norm": 0.609772576805787, "learning_rate": 8.550343405179573e-07, "loss": 11.811, "step": 35244 }, { "epoch": 1.9192311095682477, "grad_norm": 0.49943831048443377, "learning_rate": 8.538840335790443e-07, "loss": 11.7641, "step": 35245 }, { "epoch": 1.919285563564831, "grad_norm": 0.5877683080457907, "learning_rate": 8.527344976169471e-07, "loss": 11.7762, "step": 35246 }, { "epoch": 1.919340017561414, "grad_norm": 0.6439500745850081, "learning_rate": 8.51585732640614e-07, "loss": 11.7894, "step": 35247 }, { "epoch": 1.919394471557997, "grad_norm": 0.537615234870368, "learning_rate": 8.504377386589824e-07, "loss": 11.7894, "step": 35248 }, { "epoch": 1.91944892555458, "grad_norm": 0.5793091396641953, "learning_rate": 8.492905156809671e-07, "loss": 11.8368, "step": 35249 }, { "epoch": 1.919503379551163, "grad_norm": 0.5705253878050031, "learning_rate": 8.481440637154947e-07, "loss": 11.8582, "step": 35250 }, { "epoch": 1.919557833547746, "grad_norm": 0.4914867197880977, "learning_rate": 8.469983827714911e-07, "loss": 11.6931, "step": 35251 }, { "epoch": 1.919612287544329, "grad_norm": 0.529209964279529, "learning_rate": 8.458534728578494e-07, "loss": 11.6899, "step": 35252 }, { "epoch": 1.919666741540912, "grad_norm": 0.5437293401220309, "learning_rate": 8.447093339834844e-07, "loss": 11.7877, "step": 35253 }, { "epoch": 1.919721195537495, "grad_norm": 0.5633118117687254, "learning_rate": 8.435659661572892e-07, "loss": 11.7408, "step": 35254 }, { "epoch": 1.919775649534078, "grad_norm": 0.5199596714588441, "learning_rate": 8.424233693881456e-07, "loss": 11.7181, "step": 35255 }, { "epoch": 1.919830103530661, "grad_norm": 0.4993925874144359, "learning_rate": 8.412815436849575e-07, "loss": 11.7928, "step": 35256 }, { "epoch": 1.919884557527244, "grad_norm": 0.4955370450074922, "learning_rate": 8.401404890565845e-07, "loss": 11.5063, "step": 35257 }, { "epoch": 1.919939011523827, "grad_norm": 0.5295129961762205, "learning_rate": 8.390002055119084e-07, "loss": 11.7691, "step": 35258 }, { "epoch": 1.91999346552041, "grad_norm": 0.5341947484634186, "learning_rate": 8.378606930597999e-07, "loss": 11.9021, "step": 35259 }, { "epoch": 1.920047919516993, "grad_norm": 0.49459724132531996, "learning_rate": 8.367219517091074e-07, "loss": 11.6327, "step": 35260 }, { "epoch": 1.920102373513576, "grad_norm": 0.552824060456351, "learning_rate": 8.355839814687017e-07, "loss": 11.7846, "step": 35261 }, { "epoch": 1.920156827510159, "grad_norm": 0.5945931968247791, "learning_rate": 8.344467823474311e-07, "loss": 11.7119, "step": 35262 }, { "epoch": 1.920211281506742, "grad_norm": 0.5165917185399999, "learning_rate": 8.33310354354122e-07, "loss": 11.7467, "step": 35263 }, { "epoch": 1.920265735503325, "grad_norm": 0.5398463526646301, "learning_rate": 8.321746974976341e-07, "loss": 11.689, "step": 35264 }, { "epoch": 1.920320189499908, "grad_norm": 0.5122382129269146, "learning_rate": 8.310398117867713e-07, "loss": 11.686, "step": 35265 }, { "epoch": 1.920374643496491, "grad_norm": 0.4963068238640571, "learning_rate": 8.299056972303821e-07, "loss": 11.769, "step": 35266 }, { "epoch": 1.920429097493074, "grad_norm": 0.5939624631643763, "learning_rate": 8.287723538372705e-07, "loss": 11.841, "step": 35267 }, { "epoch": 1.920483551489657, "grad_norm": 0.5601564948005896, "learning_rate": 8.276397816162629e-07, "loss": 11.6909, "step": 35268 }, { "epoch": 1.9205380054862402, "grad_norm": 0.5345917124766969, "learning_rate": 8.265079805761522e-07, "loss": 11.6399, "step": 35269 }, { "epoch": 1.9205924594828232, "grad_norm": 0.5148940207216658, "learning_rate": 8.253769507257536e-07, "loss": 11.6773, "step": 35270 }, { "epoch": 1.9206469134794062, "grad_norm": 0.5404967793073, "learning_rate": 8.242466920738601e-07, "loss": 11.6103, "step": 35271 }, { "epoch": 1.9207013674759892, "grad_norm": 0.5186321777187234, "learning_rate": 8.231172046292424e-07, "loss": 11.875, "step": 35272 }, { "epoch": 1.9207558214725722, "grad_norm": 0.5180517870130527, "learning_rate": 8.219884884007045e-07, "loss": 11.7603, "step": 35273 }, { "epoch": 1.9208102754691552, "grad_norm": 0.504696692025867, "learning_rate": 8.208605433970173e-07, "loss": 11.7911, "step": 35274 }, { "epoch": 1.9208647294657384, "grad_norm": 0.5492580926459601, "learning_rate": 8.197333696269515e-07, "loss": 11.827, "step": 35275 }, { "epoch": 1.9209191834623214, "grad_norm": 0.5867281708173184, "learning_rate": 8.186069670992779e-07, "loss": 11.7523, "step": 35276 }, { "epoch": 1.9209736374589044, "grad_norm": 0.6153437883647792, "learning_rate": 8.17481335822734e-07, "loss": 11.7761, "step": 35277 }, { "epoch": 1.9210280914554874, "grad_norm": 0.5026668114833792, "learning_rate": 8.163564758060905e-07, "loss": 11.6629, "step": 35278 }, { "epoch": 1.9210825454520704, "grad_norm": 0.5320818665546363, "learning_rate": 8.152323870581069e-07, "loss": 11.7831, "step": 35279 }, { "epoch": 1.9211369994486533, "grad_norm": 0.5681963567239882, "learning_rate": 8.141090695874876e-07, "loss": 11.8499, "step": 35280 }, { "epoch": 1.9211914534452363, "grad_norm": 0.4949459466197695, "learning_rate": 8.129865234030032e-07, "loss": 11.7889, "step": 35281 }, { "epoch": 1.9212459074418193, "grad_norm": 0.5349621663826487, "learning_rate": 8.11864748513369e-07, "loss": 11.7013, "step": 35282 }, { "epoch": 1.9213003614384023, "grad_norm": 0.5774407758647135, "learning_rate": 8.107437449273114e-07, "loss": 11.8837, "step": 35283 }, { "epoch": 1.9213548154349853, "grad_norm": 0.5460741780068611, "learning_rate": 8.096235126535456e-07, "loss": 11.8375, "step": 35284 }, { "epoch": 1.9214092694315683, "grad_norm": 0.5667162986954193, "learning_rate": 8.085040517007758e-07, "loss": 11.7073, "step": 35285 }, { "epoch": 1.9214637234281513, "grad_norm": 0.5757809426311653, "learning_rate": 8.073853620777061e-07, "loss": 11.8311, "step": 35286 }, { "epoch": 1.9215181774247343, "grad_norm": 0.5300675981497448, "learning_rate": 8.062674437930517e-07, "loss": 11.6415, "step": 35287 }, { "epoch": 1.9215726314213173, "grad_norm": 0.5504102236991174, "learning_rate": 8.051502968554947e-07, "loss": 11.8556, "step": 35288 }, { "epoch": 1.9216270854179003, "grad_norm": 0.5219588919888506, "learning_rate": 8.04033921273728e-07, "loss": 11.7289, "step": 35289 }, { "epoch": 1.9216815394144833, "grad_norm": 0.532605736327605, "learning_rate": 8.029183170564225e-07, "loss": 11.9089, "step": 35290 }, { "epoch": 1.9217359934110663, "grad_norm": 0.5545374656691011, "learning_rate": 8.018034842122601e-07, "loss": 11.6906, "step": 35291 }, { "epoch": 1.9217904474076493, "grad_norm": 0.5785721568173094, "learning_rate": 8.006894227499118e-07, "loss": 11.8068, "step": 35292 }, { "epoch": 1.9218449014042325, "grad_norm": 0.5855414702112186, "learning_rate": 7.995761326780371e-07, "loss": 11.8148, "step": 35293 }, { "epoch": 1.9218993554008155, "grad_norm": 0.5362638828443235, "learning_rate": 7.984636140052959e-07, "loss": 11.7047, "step": 35294 }, { "epoch": 1.9219538093973985, "grad_norm": 0.5477454407750199, "learning_rate": 7.973518667403368e-07, "loss": 11.7107, "step": 35295 }, { "epoch": 1.9220082633939815, "grad_norm": 0.4952478844872741, "learning_rate": 7.962408908918085e-07, "loss": 11.7421, "step": 35296 }, { "epoch": 1.9220627173905644, "grad_norm": 0.5232201606893285, "learning_rate": 7.951306864683372e-07, "loss": 11.7441, "step": 35297 }, { "epoch": 1.9221171713871477, "grad_norm": 0.5422497469933022, "learning_rate": 7.940212534785718e-07, "loss": 11.8066, "step": 35298 }, { "epoch": 1.9221716253837307, "grad_norm": 0.5773271614851948, "learning_rate": 7.929125919311387e-07, "loss": 11.8527, "step": 35299 }, { "epoch": 1.9222260793803136, "grad_norm": 0.5069161014283836, "learning_rate": 7.918047018346419e-07, "loss": 11.6907, "step": 35300 }, { "epoch": 1.9222805333768966, "grad_norm": 0.5265502382157767, "learning_rate": 7.906975831977303e-07, "loss": 11.8117, "step": 35301 }, { "epoch": 1.9223349873734796, "grad_norm": 0.5979082352160378, "learning_rate": 7.895912360289748e-07, "loss": 11.7516, "step": 35302 }, { "epoch": 1.9223894413700626, "grad_norm": 0.4978611012246709, "learning_rate": 7.884856603370017e-07, "loss": 11.6019, "step": 35303 }, { "epoch": 1.9224438953666456, "grad_norm": 0.5553903313803183, "learning_rate": 7.873808561304041e-07, "loss": 11.8605, "step": 35304 }, { "epoch": 1.9224983493632286, "grad_norm": 0.486835429970468, "learning_rate": 7.862768234177643e-07, "loss": 11.6685, "step": 35305 }, { "epoch": 1.9225528033598116, "grad_norm": 0.5372476764856517, "learning_rate": 7.85173562207675e-07, "loss": 11.7563, "step": 35306 }, { "epoch": 1.9226072573563946, "grad_norm": 0.5329644864882591, "learning_rate": 7.840710725087186e-07, "loss": 11.6573, "step": 35307 }, { "epoch": 1.9226617113529776, "grad_norm": 0.7622171642642938, "learning_rate": 7.829693543294658e-07, "loss": 11.8296, "step": 35308 }, { "epoch": 1.9227161653495606, "grad_norm": 0.6431431136042206, "learning_rate": 7.818684076784766e-07, "loss": 11.9103, "step": 35309 }, { "epoch": 1.9227706193461436, "grad_norm": 0.5997994841951583, "learning_rate": 7.807682325643218e-07, "loss": 11.7868, "step": 35310 }, { "epoch": 1.9228250733427266, "grad_norm": 0.5578947978803397, "learning_rate": 7.796688289955389e-07, "loss": 11.8253, "step": 35311 }, { "epoch": 1.9228795273393096, "grad_norm": 0.5286349018608756, "learning_rate": 7.785701969806991e-07, "loss": 11.6868, "step": 35312 }, { "epoch": 1.9229339813358926, "grad_norm": 0.583396661750172, "learning_rate": 7.77472336528351e-07, "loss": 11.861, "step": 35313 }, { "epoch": 1.9229884353324755, "grad_norm": 0.5394180114020597, "learning_rate": 7.763752476469988e-07, "loss": 11.696, "step": 35314 }, { "epoch": 1.9230428893290585, "grad_norm": 0.6335493071578289, "learning_rate": 7.752789303452024e-07, "loss": 11.6758, "step": 35315 }, { "epoch": 1.9230973433256418, "grad_norm": 0.5197597917635172, "learning_rate": 7.741833846314773e-07, "loss": 11.8524, "step": 35316 }, { "epoch": 1.9231517973222247, "grad_norm": 0.5378542335438624, "learning_rate": 7.730886105143387e-07, "loss": 11.6057, "step": 35317 }, { "epoch": 1.9232062513188077, "grad_norm": 0.5634133311170223, "learning_rate": 7.719946080023021e-07, "loss": 11.8391, "step": 35318 }, { "epoch": 1.9232607053153907, "grad_norm": 0.5735120756466036, "learning_rate": 7.70901377103872e-07, "loss": 11.8891, "step": 35319 }, { "epoch": 1.9233151593119737, "grad_norm": 0.5057175099043302, "learning_rate": 7.698089178275635e-07, "loss": 11.8658, "step": 35320 }, { "epoch": 1.9233696133085567, "grad_norm": 0.5543803388535611, "learning_rate": 7.68717230181859e-07, "loss": 11.8473, "step": 35321 }, { "epoch": 1.92342406730514, "grad_norm": 0.5872870913362307, "learning_rate": 7.676263141752404e-07, "loss": 11.817, "step": 35322 }, { "epoch": 1.923478521301723, "grad_norm": 0.51143281922094, "learning_rate": 7.665361698162121e-07, "loss": 11.7323, "step": 35323 }, { "epoch": 1.923532975298306, "grad_norm": 0.5044437056489144, "learning_rate": 7.654467971132451e-07, "loss": 11.7122, "step": 35324 }, { "epoch": 1.923587429294889, "grad_norm": 0.5450926954154512, "learning_rate": 7.643581960747992e-07, "loss": 11.6908, "step": 35325 }, { "epoch": 1.923641883291472, "grad_norm": 0.5756954108170521, "learning_rate": 7.632703667093344e-07, "loss": 11.848, "step": 35326 }, { "epoch": 1.923696337288055, "grad_norm": 0.4863770442798545, "learning_rate": 7.621833090253327e-07, "loss": 11.6358, "step": 35327 }, { "epoch": 1.9237507912846379, "grad_norm": 0.508350176005348, "learning_rate": 7.61097023031232e-07, "loss": 11.8138, "step": 35328 }, { "epoch": 1.9238052452812209, "grad_norm": 0.5992019878117516, "learning_rate": 7.60011508735492e-07, "loss": 11.6911, "step": 35329 }, { "epoch": 1.9238596992778039, "grad_norm": 0.5505014228180067, "learning_rate": 7.589267661465393e-07, "loss": 11.8331, "step": 35330 }, { "epoch": 1.9239141532743869, "grad_norm": 0.5914248529883434, "learning_rate": 7.578427952728117e-07, "loss": 11.6986, "step": 35331 }, { "epoch": 1.9239686072709699, "grad_norm": 0.5849041648920296, "learning_rate": 7.567595961227469e-07, "loss": 11.8056, "step": 35332 }, { "epoch": 1.9240230612675528, "grad_norm": 0.5272009266257041, "learning_rate": 7.556771687047492e-07, "loss": 11.5029, "step": 35333 }, { "epoch": 1.9240775152641358, "grad_norm": 0.5604447882902809, "learning_rate": 7.545955130272675e-07, "loss": 11.8475, "step": 35334 }, { "epoch": 1.9241319692607188, "grad_norm": 0.5504769635070138, "learning_rate": 7.535146290986839e-07, "loss": 11.8832, "step": 35335 }, { "epoch": 1.9241864232573018, "grad_norm": 0.5029963049925109, "learning_rate": 7.524345169274139e-07, "loss": 11.7606, "step": 35336 }, { "epoch": 1.9242408772538848, "grad_norm": 0.5312068726749273, "learning_rate": 7.513551765218618e-07, "loss": 11.8833, "step": 35337 }, { "epoch": 1.9242953312504678, "grad_norm": 0.5125757415662441, "learning_rate": 7.5027660789041e-07, "loss": 11.7914, "step": 35338 }, { "epoch": 1.924349785247051, "grad_norm": 0.5649880153604923, "learning_rate": 7.491988110414517e-07, "loss": 11.6746, "step": 35339 }, { "epoch": 1.924404239243634, "grad_norm": 0.5012386348472996, "learning_rate": 7.4812178598338e-07, "loss": 11.795, "step": 35340 }, { "epoch": 1.924458693240217, "grad_norm": 0.5576508571761037, "learning_rate": 7.47045532724544e-07, "loss": 11.6963, "step": 35341 }, { "epoch": 1.9245131472368, "grad_norm": 0.5388868489754646, "learning_rate": 7.459700512733369e-07, "loss": 11.8101, "step": 35342 }, { "epoch": 1.924567601233383, "grad_norm": 0.5524929396746211, "learning_rate": 7.448953416380966e-07, "loss": 11.755, "step": 35343 }, { "epoch": 1.924622055229966, "grad_norm": 0.5281468662307155, "learning_rate": 7.438214038272162e-07, "loss": 11.6712, "step": 35344 }, { "epoch": 1.9246765092265492, "grad_norm": 0.5019786033804621, "learning_rate": 7.427482378490114e-07, "loss": 11.7936, "step": 35345 }, { "epoch": 1.9247309632231322, "grad_norm": 0.4985147264600398, "learning_rate": 7.416758437118421e-07, "loss": 11.7349, "step": 35346 }, { "epoch": 1.9247854172197152, "grad_norm": 0.6123380396788846, "learning_rate": 7.406042214240572e-07, "loss": 11.7973, "step": 35347 }, { "epoch": 1.9248398712162982, "grad_norm": 0.5479284479493136, "learning_rate": 7.395333709939722e-07, "loss": 11.7835, "step": 35348 }, { "epoch": 1.9248943252128812, "grad_norm": 0.611817874657679, "learning_rate": 7.384632924299139e-07, "loss": 11.841, "step": 35349 }, { "epoch": 1.9249487792094642, "grad_norm": 0.5113991007685988, "learning_rate": 7.373939857402201e-07, "loss": 11.7846, "step": 35350 }, { "epoch": 1.9250032332060472, "grad_norm": 0.852600629121004, "learning_rate": 7.363254509331952e-07, "loss": 11.6187, "step": 35351 }, { "epoch": 1.9250576872026302, "grad_norm": 0.5040558355011653, "learning_rate": 7.352576880171547e-07, "loss": 11.821, "step": 35352 }, { "epoch": 1.9251121411992131, "grad_norm": 0.5355425617423119, "learning_rate": 7.34190697000392e-07, "loss": 11.7523, "step": 35353 }, { "epoch": 1.9251665951957961, "grad_norm": 0.49322277301141726, "learning_rate": 7.331244778912116e-07, "loss": 11.8026, "step": 35354 }, { "epoch": 1.9252210491923791, "grad_norm": 0.5022982131103606, "learning_rate": 7.32059030697907e-07, "loss": 11.843, "step": 35355 }, { "epoch": 1.9252755031889621, "grad_norm": 0.5167383581109334, "learning_rate": 7.30994355428749e-07, "loss": 11.7494, "step": 35356 }, { "epoch": 1.9253299571855451, "grad_norm": 0.532425071896843, "learning_rate": 7.299304520920313e-07, "loss": 11.8206, "step": 35357 }, { "epoch": 1.925384411182128, "grad_norm": 0.5259448588169953, "learning_rate": 7.288673206960139e-07, "loss": 11.7795, "step": 35358 }, { "epoch": 1.925438865178711, "grad_norm": 0.5248042446606963, "learning_rate": 7.278049612489901e-07, "loss": 11.8046, "step": 35359 }, { "epoch": 1.925493319175294, "grad_norm": 0.5425324117292638, "learning_rate": 7.267433737591867e-07, "loss": 11.8307, "step": 35360 }, { "epoch": 1.925547773171877, "grad_norm": 0.5062761099637456, "learning_rate": 7.256825582348748e-07, "loss": 11.7454, "step": 35361 }, { "epoch": 1.92560222716846, "grad_norm": 0.5352392168668737, "learning_rate": 7.246225146843144e-07, "loss": 11.7295, "step": 35362 }, { "epoch": 1.9256566811650433, "grad_norm": 0.5184501313269085, "learning_rate": 7.235632431157325e-07, "loss": 11.7991, "step": 35363 }, { "epoch": 1.9257111351616263, "grad_norm": 0.5216924404581125, "learning_rate": 7.225047435373666e-07, "loss": 11.8121, "step": 35364 }, { "epoch": 1.9257655891582093, "grad_norm": 0.5641134648379601, "learning_rate": 7.21447015957466e-07, "loss": 11.8982, "step": 35365 }, { "epoch": 1.9258200431547923, "grad_norm": 0.5949317025428467, "learning_rate": 7.20390060384224e-07, "loss": 11.7294, "step": 35366 }, { "epoch": 1.9258744971513753, "grad_norm": 0.5402392675001839, "learning_rate": 7.193338768259005e-07, "loss": 11.7455, "step": 35367 }, { "epoch": 1.9259289511479585, "grad_norm": 0.6061302741210017, "learning_rate": 7.182784652906671e-07, "loss": 11.7955, "step": 35368 }, { "epoch": 1.9259834051445415, "grad_norm": 0.5470773797960535, "learning_rate": 7.172238257867614e-07, "loss": 11.7651, "step": 35369 }, { "epoch": 1.9260378591411245, "grad_norm": 0.5421452448098459, "learning_rate": 7.161699583223658e-07, "loss": 11.7071, "step": 35370 }, { "epoch": 1.9260923131377075, "grad_norm": 0.5261602671605133, "learning_rate": 7.151168629056737e-07, "loss": 11.8025, "step": 35371 }, { "epoch": 1.9261467671342904, "grad_norm": 0.5599441292931336, "learning_rate": 7.140645395449008e-07, "loss": 11.7575, "step": 35372 }, { "epoch": 1.9262012211308734, "grad_norm": 0.5600867048068385, "learning_rate": 7.130129882482073e-07, "loss": 11.6645, "step": 35373 }, { "epoch": 1.9262556751274564, "grad_norm": 0.5715260293873371, "learning_rate": 7.119622090237754e-07, "loss": 11.8297, "step": 35374 }, { "epoch": 1.9263101291240394, "grad_norm": 0.5468786873557178, "learning_rate": 7.109122018797764e-07, "loss": 11.8405, "step": 35375 }, { "epoch": 1.9263645831206224, "grad_norm": 0.5285094818170534, "learning_rate": 7.098629668243706e-07, "loss": 11.7535, "step": 35376 }, { "epoch": 1.9264190371172054, "grad_norm": 0.5607105169284818, "learning_rate": 7.088145038657179e-07, "loss": 11.6754, "step": 35377 }, { "epoch": 1.9264734911137884, "grad_norm": 0.49940159785988447, "learning_rate": 7.077668130119786e-07, "loss": 11.7179, "step": 35378 }, { "epoch": 1.9265279451103714, "grad_norm": 0.4907213726895333, "learning_rate": 7.067198942713016e-07, "loss": 11.7589, "step": 35379 }, { "epoch": 1.9265823991069544, "grad_norm": 0.5659091368713514, "learning_rate": 7.056737476518138e-07, "loss": 11.9466, "step": 35380 }, { "epoch": 1.9266368531035374, "grad_norm": 0.5643104997612685, "learning_rate": 7.046283731616643e-07, "loss": 11.901, "step": 35381 }, { "epoch": 1.9266913071001204, "grad_norm": 0.5137125062711562, "learning_rate": 7.035837708089799e-07, "loss": 11.8287, "step": 35382 }, { "epoch": 1.9267457610967034, "grad_norm": 0.5954182365678513, "learning_rate": 7.025399406018652e-07, "loss": 11.7802, "step": 35383 }, { "epoch": 1.9268002150932864, "grad_norm": 0.5230969445519821, "learning_rate": 7.014968825484691e-07, "loss": 11.7085, "step": 35384 }, { "epoch": 1.9268546690898694, "grad_norm": 0.5325847700152697, "learning_rate": 7.004545966568854e-07, "loss": 11.671, "step": 35385 }, { "epoch": 1.9269091230864526, "grad_norm": 0.5498119132753048, "learning_rate": 6.994130829352074e-07, "loss": 11.873, "step": 35386 }, { "epoch": 1.9269635770830356, "grad_norm": 0.5351749950341316, "learning_rate": 6.98372341391551e-07, "loss": 11.7139, "step": 35387 }, { "epoch": 1.9270180310796186, "grad_norm": 0.5443566007650343, "learning_rate": 6.973323720339986e-07, "loss": 11.8042, "step": 35388 }, { "epoch": 1.9270724850762015, "grad_norm": 0.5390561097638134, "learning_rate": 6.962931748706547e-07, "loss": 11.7709, "step": 35389 }, { "epoch": 1.9271269390727845, "grad_norm": 0.565712985744551, "learning_rate": 6.952547499095797e-07, "loss": 11.8785, "step": 35390 }, { "epoch": 1.9271813930693675, "grad_norm": 0.5805432028399905, "learning_rate": 6.942170971588558e-07, "loss": 11.8645, "step": 35391 }, { "epoch": 1.9272358470659507, "grad_norm": 0.625499406559887, "learning_rate": 6.931802166265544e-07, "loss": 11.7966, "step": 35392 }, { "epoch": 1.9272903010625337, "grad_norm": 0.517132198749326, "learning_rate": 6.92144108320747e-07, "loss": 11.7365, "step": 35393 }, { "epoch": 1.9273447550591167, "grad_norm": 0.5861462659480324, "learning_rate": 6.911087722494824e-07, "loss": 11.8478, "step": 35394 }, { "epoch": 1.9273992090556997, "grad_norm": 0.5619082696620521, "learning_rate": 6.90074208420799e-07, "loss": 11.7398, "step": 35395 }, { "epoch": 1.9274536630522827, "grad_norm": 0.5148110059500736, "learning_rate": 6.890404168427567e-07, "loss": 11.7284, "step": 35396 }, { "epoch": 1.9275081170488657, "grad_norm": 0.5645150423670221, "learning_rate": 6.880073975233825e-07, "loss": 11.9115, "step": 35397 }, { "epoch": 1.9275625710454487, "grad_norm": 0.6195674293480592, "learning_rate": 6.869751504707255e-07, "loss": 11.7153, "step": 35398 }, { "epoch": 1.9276170250420317, "grad_norm": 0.5095060571853789, "learning_rate": 6.859436756928017e-07, "loss": 11.7286, "step": 35399 }, { "epoch": 1.9276714790386147, "grad_norm": 0.5364047275544772, "learning_rate": 6.849129731976378e-07, "loss": 11.7814, "step": 35400 }, { "epoch": 1.9277259330351977, "grad_norm": 0.5484462847732213, "learning_rate": 6.838830429932386e-07, "loss": 11.6552, "step": 35401 }, { "epoch": 1.9277803870317807, "grad_norm": 0.5992665760189813, "learning_rate": 6.828538850876309e-07, "loss": 11.8318, "step": 35402 }, { "epoch": 1.9278348410283637, "grad_norm": 0.5201795017438869, "learning_rate": 6.818254994887973e-07, "loss": 11.8069, "step": 35403 }, { "epoch": 1.9278892950249467, "grad_norm": 0.5738206542609812, "learning_rate": 6.807978862047537e-07, "loss": 11.8386, "step": 35404 }, { "epoch": 1.9279437490215297, "grad_norm": 0.535493682482242, "learning_rate": 6.797710452434714e-07, "loss": 11.6852, "step": 35405 }, { "epoch": 1.9279982030181126, "grad_norm": 0.5544308372407974, "learning_rate": 6.78744976612955e-07, "loss": 11.8048, "step": 35406 }, { "epoch": 1.9280526570146956, "grad_norm": 0.6455312421116323, "learning_rate": 6.777196803211649e-07, "loss": 11.8757, "step": 35407 }, { "epoch": 1.9281071110112786, "grad_norm": 0.49445511996845926, "learning_rate": 6.766951563760948e-07, "loss": 11.6895, "step": 35408 }, { "epoch": 1.9281615650078618, "grad_norm": 0.5431249779727285, "learning_rate": 6.756714047856938e-07, "loss": 11.7307, "step": 35409 }, { "epoch": 1.9282160190044448, "grad_norm": 0.5803088746264368, "learning_rate": 6.746484255579222e-07, "loss": 11.7811, "step": 35410 }, { "epoch": 1.9282704730010278, "grad_norm": 0.514407079938101, "learning_rate": 6.736262187007513e-07, "loss": 11.5352, "step": 35411 }, { "epoch": 1.9283249269976108, "grad_norm": 0.5249767434297249, "learning_rate": 6.726047842221084e-07, "loss": 11.8586, "step": 35412 }, { "epoch": 1.9283793809941938, "grad_norm": 0.5934939878917341, "learning_rate": 6.715841221299534e-07, "loss": 11.6992, "step": 35413 }, { "epoch": 1.9284338349907768, "grad_norm": 0.5314712080866695, "learning_rate": 6.705642324322248e-07, "loss": 11.7601, "step": 35414 }, { "epoch": 1.92848828898736, "grad_norm": 0.5375466080781637, "learning_rate": 6.695451151368381e-07, "loss": 11.5534, "step": 35415 }, { "epoch": 1.928542742983943, "grad_norm": 0.5091149042748288, "learning_rate": 6.685267702517317e-07, "loss": 11.7133, "step": 35416 }, { "epoch": 1.928597196980526, "grad_norm": 0.5299152011881831, "learning_rate": 6.675091977848102e-07, "loss": 11.7254, "step": 35417 }, { "epoch": 1.928651650977109, "grad_norm": 0.5082856489996755, "learning_rate": 6.664923977440119e-07, "loss": 11.7531, "step": 35418 }, { "epoch": 1.928706104973692, "grad_norm": 0.5583785568851579, "learning_rate": 6.65476370137208e-07, "loss": 11.7428, "step": 35419 }, { "epoch": 1.928760558970275, "grad_norm": 0.5235554945693908, "learning_rate": 6.644611149723257e-07, "loss": 11.6274, "step": 35420 }, { "epoch": 1.928815012966858, "grad_norm": 0.5536106244421286, "learning_rate": 6.634466322572586e-07, "loss": 11.756, "step": 35421 }, { "epoch": 1.928869466963441, "grad_norm": 0.598404157875678, "learning_rate": 6.624329219998781e-07, "loss": 11.7432, "step": 35422 }, { "epoch": 1.928923920960024, "grad_norm": 0.5319733815319337, "learning_rate": 6.614199842080893e-07, "loss": 11.7326, "step": 35423 }, { "epoch": 1.928978374956607, "grad_norm": 0.5583179663698711, "learning_rate": 6.604078188897523e-07, "loss": 11.8577, "step": 35424 }, { "epoch": 1.92903282895319, "grad_norm": 0.6470860211071525, "learning_rate": 6.593964260527385e-07, "loss": 11.6878, "step": 35425 }, { "epoch": 1.929087282949773, "grad_norm": 0.5941266616046202, "learning_rate": 6.583858057049308e-07, "loss": 11.9415, "step": 35426 }, { "epoch": 1.929141736946356, "grad_norm": 0.5077404659532531, "learning_rate": 6.57375957854156e-07, "loss": 11.7247, "step": 35427 }, { "epoch": 1.929196190942939, "grad_norm": 0.5371235103430241, "learning_rate": 6.56366882508297e-07, "loss": 11.697, "step": 35428 }, { "epoch": 1.929250644939522, "grad_norm": 0.5335731010933777, "learning_rate": 6.553585796751916e-07, "loss": 11.8056, "step": 35429 }, { "epoch": 1.929305098936105, "grad_norm": 0.5326915148899944, "learning_rate": 6.543510493626781e-07, "loss": 11.7094, "step": 35430 }, { "epoch": 1.929359552932688, "grad_norm": 0.5458947629846782, "learning_rate": 6.533442915785836e-07, "loss": 11.866, "step": 35431 }, { "epoch": 1.9294140069292711, "grad_norm": 0.7991401788652261, "learning_rate": 6.523383063307465e-07, "loss": 11.6802, "step": 35432 }, { "epoch": 1.9294684609258541, "grad_norm": 0.55400984203346, "learning_rate": 6.513330936269824e-07, "loss": 11.7957, "step": 35433 }, { "epoch": 1.929522914922437, "grad_norm": 0.5486984418403, "learning_rate": 6.503286534751185e-07, "loss": 11.8621, "step": 35434 }, { "epoch": 1.92957736891902, "grad_norm": 0.5650074778730099, "learning_rate": 6.493249858829597e-07, "loss": 11.9049, "step": 35435 }, { "epoch": 1.929631822915603, "grad_norm": 0.5348936191789725, "learning_rate": 6.483220908583221e-07, "loss": 11.7854, "step": 35436 }, { "epoch": 1.929686276912186, "grad_norm": 0.5484853027327932, "learning_rate": 6.47319968408977e-07, "loss": 11.7551, "step": 35437 }, { "epoch": 1.9297407309087693, "grad_norm": 0.596676607543935, "learning_rate": 6.463186185427405e-07, "loss": 11.9053, "step": 35438 }, { "epoch": 1.9297951849053523, "grad_norm": 0.5244260156174037, "learning_rate": 6.453180412673843e-07, "loss": 11.8333, "step": 35439 }, { "epoch": 1.9298496389019353, "grad_norm": 0.6116199511475648, "learning_rate": 6.44318236590713e-07, "loss": 11.8368, "step": 35440 }, { "epoch": 1.9299040928985183, "grad_norm": 0.5096518970216495, "learning_rate": 6.433192045204762e-07, "loss": 11.8159, "step": 35441 }, { "epoch": 1.9299585468951013, "grad_norm": 0.508541784617971, "learning_rate": 6.423209450644452e-07, "loss": 11.7323, "step": 35442 }, { "epoch": 1.9300130008916843, "grad_norm": 0.5555457044378989, "learning_rate": 6.413234582303918e-07, "loss": 11.8258, "step": 35443 }, { "epoch": 1.9300674548882673, "grad_norm": 0.5365538766671122, "learning_rate": 6.403267440260763e-07, "loss": 11.7692, "step": 35444 }, { "epoch": 1.9301219088848502, "grad_norm": 0.5190823342156515, "learning_rate": 6.39330802459237e-07, "loss": 11.6126, "step": 35445 }, { "epoch": 1.9301763628814332, "grad_norm": 0.5493325550371453, "learning_rate": 6.383356335376234e-07, "loss": 11.8537, "step": 35446 }, { "epoch": 1.9302308168780162, "grad_norm": 0.568077630585568, "learning_rate": 6.373412372689735e-07, "loss": 11.887, "step": 35447 }, { "epoch": 1.9302852708745992, "grad_norm": 0.5642403482181683, "learning_rate": 6.363476136610369e-07, "loss": 11.815, "step": 35448 }, { "epoch": 1.9303397248711822, "grad_norm": 0.5813725648126096, "learning_rate": 6.353547627215073e-07, "loss": 11.8681, "step": 35449 }, { "epoch": 1.9303941788677652, "grad_norm": 0.5534224653106757, "learning_rate": 6.343626844581229e-07, "loss": 11.7836, "step": 35450 }, { "epoch": 1.9304486328643482, "grad_norm": 0.5909850161885183, "learning_rate": 6.333713788785999e-07, "loss": 11.7532, "step": 35451 }, { "epoch": 1.9305030868609312, "grad_norm": 0.5617021735881333, "learning_rate": 6.323808459906544e-07, "loss": 11.8077, "step": 35452 }, { "epoch": 1.9305575408575142, "grad_norm": 0.5088197444062463, "learning_rate": 6.31391085801969e-07, "loss": 11.7859, "step": 35453 }, { "epoch": 1.9306119948540972, "grad_norm": 0.5667951383496671, "learning_rate": 6.304020983202486e-07, "loss": 11.7958, "step": 35454 }, { "epoch": 1.9306664488506802, "grad_norm": 0.5631110617421782, "learning_rate": 6.294138835531982e-07, "loss": 11.7575, "step": 35455 }, { "epoch": 1.9307209028472634, "grad_norm": 0.4771713502211208, "learning_rate": 6.284264415084895e-07, "loss": 11.6977, "step": 35456 }, { "epoch": 1.9307753568438464, "grad_norm": 0.6243482940799704, "learning_rate": 6.274397721937941e-07, "loss": 11.8624, "step": 35457 }, { "epoch": 1.9308298108404294, "grad_norm": 0.5242654353878011, "learning_rate": 6.264538756167837e-07, "loss": 11.8154, "step": 35458 }, { "epoch": 1.9308842648370124, "grad_norm": 0.5354684200038378, "learning_rate": 6.25468751785141e-07, "loss": 11.8179, "step": 35459 }, { "epoch": 1.9309387188335954, "grad_norm": 0.5434240838919755, "learning_rate": 6.244844007065265e-07, "loss": 11.8165, "step": 35460 }, { "epoch": 1.9309931728301784, "grad_norm": 0.5358392178486855, "learning_rate": 6.235008223885785e-07, "loss": 11.8687, "step": 35461 }, { "epoch": 1.9310476268267616, "grad_norm": 0.5295326054658354, "learning_rate": 6.225180168389578e-07, "loss": 11.6926, "step": 35462 }, { "epoch": 1.9311020808233446, "grad_norm": 0.5109574104897429, "learning_rate": 6.215359840652912e-07, "loss": 11.5464, "step": 35463 }, { "epoch": 1.9311565348199276, "grad_norm": 0.5126465925223631, "learning_rate": 6.205547240752396e-07, "loss": 11.9344, "step": 35464 }, { "epoch": 1.9312109888165105, "grad_norm": 0.562043269587614, "learning_rate": 6.195742368764191e-07, "loss": 11.8007, "step": 35465 }, { "epoch": 1.9312654428130935, "grad_norm": 0.516122612809263, "learning_rate": 6.185945224764456e-07, "loss": 11.8307, "step": 35466 }, { "epoch": 1.9313198968096765, "grad_norm": 0.5150862949424221, "learning_rate": 6.176155808829575e-07, "loss": 11.7832, "step": 35467 }, { "epoch": 1.9313743508062595, "grad_norm": 0.5312854379425083, "learning_rate": 6.16637412103549e-07, "loss": 11.5806, "step": 35468 }, { "epoch": 1.9314288048028425, "grad_norm": 0.5217652543439314, "learning_rate": 6.156600161458359e-07, "loss": 11.6881, "step": 35469 }, { "epoch": 1.9314832587994255, "grad_norm": 0.5782885527472418, "learning_rate": 6.146833930174234e-07, "loss": 11.8026, "step": 35470 }, { "epoch": 1.9315377127960085, "grad_norm": 0.5043587481644304, "learning_rate": 6.137075427258943e-07, "loss": 11.7683, "step": 35471 }, { "epoch": 1.9315921667925915, "grad_norm": 0.5348028428426579, "learning_rate": 6.127324652788424e-07, "loss": 11.6308, "step": 35472 }, { "epoch": 1.9316466207891745, "grad_norm": 0.6041838035066207, "learning_rate": 6.117581606838507e-07, "loss": 11.7065, "step": 35473 }, { "epoch": 1.9317010747857575, "grad_norm": 0.544692518559051, "learning_rate": 6.10784628948502e-07, "loss": 11.7577, "step": 35474 }, { "epoch": 1.9317555287823405, "grad_norm": 0.5529879675505308, "learning_rate": 6.098118700803568e-07, "loss": 11.6863, "step": 35475 }, { "epoch": 1.9318099827789235, "grad_norm": 0.5296035195851783, "learning_rate": 6.088398840869758e-07, "loss": 11.8179, "step": 35476 }, { "epoch": 1.9318644367755065, "grad_norm": 0.7115354645674317, "learning_rate": 6.078686709759307e-07, "loss": 11.6767, "step": 35477 }, { "epoch": 1.9319188907720894, "grad_norm": 0.5158612277346752, "learning_rate": 6.068982307547599e-07, "loss": 11.8101, "step": 35478 }, { "epoch": 1.9319733447686727, "grad_norm": 0.550608074542295, "learning_rate": 6.059285634310241e-07, "loss": 11.7904, "step": 35479 }, { "epoch": 1.9320277987652557, "grad_norm": 0.5920005190755275, "learning_rate": 6.049596690122506e-07, "loss": 11.7993, "step": 35480 }, { "epoch": 1.9320822527618386, "grad_norm": 0.6001616860296559, "learning_rate": 6.039915475059777e-07, "loss": 11.7818, "step": 35481 }, { "epoch": 1.9321367067584216, "grad_norm": 0.5419926409527603, "learning_rate": 6.030241989197438e-07, "loss": 11.8246, "step": 35482 }, { "epoch": 1.9321911607550046, "grad_norm": 0.516740599127782, "learning_rate": 6.020576232610542e-07, "loss": 11.812, "step": 35483 }, { "epoch": 1.9322456147515876, "grad_norm": 0.5638874430702876, "learning_rate": 6.010918205374361e-07, "loss": 11.7153, "step": 35484 }, { "epoch": 1.9323000687481708, "grad_norm": 0.5943364813106105, "learning_rate": 6.001267907564057e-07, "loss": 11.6287, "step": 35485 }, { "epoch": 1.9323545227447538, "grad_norm": 0.5856328089669531, "learning_rate": 5.991625339254458e-07, "loss": 11.7378, "step": 35486 }, { "epoch": 1.9324089767413368, "grad_norm": 0.5527893443743865, "learning_rate": 5.981990500520729e-07, "loss": 11.8416, "step": 35487 }, { "epoch": 1.9324634307379198, "grad_norm": 0.5339137887451216, "learning_rate": 5.972363391437696e-07, "loss": 11.8466, "step": 35488 }, { "epoch": 1.9325178847345028, "grad_norm": 0.6167140816443636, "learning_rate": 5.962744012080413e-07, "loss": 11.7049, "step": 35489 }, { "epoch": 1.9325723387310858, "grad_norm": 0.5891799387008039, "learning_rate": 5.953132362523372e-07, "loss": 11.6747, "step": 35490 }, { "epoch": 1.9326267927276688, "grad_norm": 0.5361965009771246, "learning_rate": 5.943528442841517e-07, "loss": 11.7759, "step": 35491 }, { "epoch": 1.9326812467242518, "grad_norm": 0.5822303092732038, "learning_rate": 5.933932253109454e-07, "loss": 11.8003, "step": 35492 }, { "epoch": 1.9327357007208348, "grad_norm": 0.5725928998089853, "learning_rate": 5.924343793401898e-07, "loss": 11.9443, "step": 35493 }, { "epoch": 1.9327901547174178, "grad_norm": 0.5570129172893064, "learning_rate": 5.914763063793349e-07, "loss": 11.7837, "step": 35494 }, { "epoch": 1.9328446087140008, "grad_norm": 0.49833430172502596, "learning_rate": 5.905190064358301e-07, "loss": 11.7331, "step": 35495 }, { "epoch": 1.9328990627105838, "grad_norm": 0.48798094462713315, "learning_rate": 5.89562479517125e-07, "loss": 11.7916, "step": 35496 }, { "epoch": 1.9329535167071668, "grad_norm": 0.5102213066152852, "learning_rate": 5.886067256306472e-07, "loss": 11.8337, "step": 35497 }, { "epoch": 1.9330079707037497, "grad_norm": 0.5262706629664062, "learning_rate": 5.876517447838347e-07, "loss": 11.7168, "step": 35498 }, { "epoch": 1.9330624247003327, "grad_norm": 0.5274663062535434, "learning_rate": 5.866975369841155e-07, "loss": 11.7886, "step": 35499 }, { "epoch": 1.9331168786969157, "grad_norm": 0.5142626794514161, "learning_rate": 5.857441022389054e-07, "loss": 11.7051, "step": 35500 }, { "epoch": 1.9331713326934987, "grad_norm": 0.5462579250009353, "learning_rate": 5.84791440555621e-07, "loss": 11.8119, "step": 35501 }, { "epoch": 1.933225786690082, "grad_norm": 0.5515835747959236, "learning_rate": 5.838395519416784e-07, "loss": 11.8155, "step": 35502 }, { "epoch": 1.933280240686665, "grad_norm": 0.5028460130939895, "learning_rate": 5.828884364044607e-07, "loss": 11.8183, "step": 35503 }, { "epoch": 1.933334694683248, "grad_norm": 0.5402563121058426, "learning_rate": 5.819380939513841e-07, "loss": 11.8034, "step": 35504 }, { "epoch": 1.933389148679831, "grad_norm": 0.504151490597803, "learning_rate": 5.809885245898206e-07, "loss": 11.7673, "step": 35505 }, { "epoch": 1.933443602676414, "grad_norm": 0.5367612822078753, "learning_rate": 5.800397283271752e-07, "loss": 11.8184, "step": 35506 }, { "epoch": 1.933498056672997, "grad_norm": 0.6087798429679145, "learning_rate": 5.790917051707978e-07, "loss": 11.7996, "step": 35507 }, { "epoch": 1.9335525106695801, "grad_norm": 0.5058173259283866, "learning_rate": 5.781444551280823e-07, "loss": 11.7636, "step": 35508 }, { "epoch": 1.933606964666163, "grad_norm": 0.5104911424120063, "learning_rate": 5.771979782063897e-07, "loss": 11.7917, "step": 35509 }, { "epoch": 1.933661418662746, "grad_norm": 0.5269449614424476, "learning_rate": 5.762522744130805e-07, "loss": 11.8466, "step": 35510 }, { "epoch": 1.933715872659329, "grad_norm": 0.6238794507286992, "learning_rate": 5.753073437555046e-07, "loss": 11.8674, "step": 35511 }, { "epoch": 1.933770326655912, "grad_norm": 0.5058354461907706, "learning_rate": 5.743631862410115e-07, "loss": 11.7186, "step": 35512 }, { "epoch": 1.933824780652495, "grad_norm": 0.6166115157126538, "learning_rate": 5.73419801876951e-07, "loss": 11.8859, "step": 35513 }, { "epoch": 1.933879234649078, "grad_norm": 0.5258356103363334, "learning_rate": 5.724771906706505e-07, "loss": 11.7401, "step": 35514 }, { "epoch": 1.933933688645661, "grad_norm": 0.5825308617439735, "learning_rate": 5.715353526294376e-07, "loss": 11.8494, "step": 35515 }, { "epoch": 1.933988142642244, "grad_norm": 0.5293163193035932, "learning_rate": 5.705942877606396e-07, "loss": 11.7866, "step": 35516 }, { "epoch": 1.934042596638827, "grad_norm": 0.5160769179071342, "learning_rate": 5.69653996071573e-07, "loss": 11.5809, "step": 35517 }, { "epoch": 1.93409705063541, "grad_norm": 0.49040082299889726, "learning_rate": 5.687144775695541e-07, "loss": 11.7248, "step": 35518 }, { "epoch": 1.934151504631993, "grad_norm": 0.5418900609016448, "learning_rate": 5.677757322618881e-07, "loss": 11.6884, "step": 35519 }, { "epoch": 1.934205958628576, "grad_norm": 0.4995072799842643, "learning_rate": 5.668377601558694e-07, "loss": 11.7995, "step": 35520 }, { "epoch": 1.934260412625159, "grad_norm": 0.5595781752915366, "learning_rate": 5.659005612587919e-07, "loss": 11.9312, "step": 35521 }, { "epoch": 1.934314866621742, "grad_norm": 0.5210870773252599, "learning_rate": 5.649641355779501e-07, "loss": 11.6057, "step": 35522 }, { "epoch": 1.934369320618325, "grad_norm": 0.5302416990401119, "learning_rate": 5.640284831206266e-07, "loss": 11.8015, "step": 35523 }, { "epoch": 1.934423774614908, "grad_norm": 0.5602177390948307, "learning_rate": 5.630936038940826e-07, "loss": 11.7848, "step": 35524 }, { "epoch": 1.934478228611491, "grad_norm": 0.674124727988565, "learning_rate": 5.621594979056122e-07, "loss": 11.8302, "step": 35525 }, { "epoch": 1.9345326826080742, "grad_norm": 0.525792940730806, "learning_rate": 5.612261651624651e-07, "loss": 11.7443, "step": 35526 }, { "epoch": 1.9345871366046572, "grad_norm": 0.542087456639, "learning_rate": 5.60293605671891e-07, "loss": 11.7502, "step": 35527 }, { "epoch": 1.9346415906012402, "grad_norm": 0.6032764142708296, "learning_rate": 5.593618194411509e-07, "loss": 11.8292, "step": 35528 }, { "epoch": 1.9346960445978232, "grad_norm": 0.5052367945536791, "learning_rate": 5.584308064774834e-07, "loss": 11.6832, "step": 35529 }, { "epoch": 1.9347504985944062, "grad_norm": 0.5790057680640368, "learning_rate": 5.575005667881383e-07, "loss": 11.8919, "step": 35530 }, { "epoch": 1.9348049525909894, "grad_norm": 0.5269509280830994, "learning_rate": 5.565711003803542e-07, "loss": 11.7955, "step": 35531 }, { "epoch": 1.9348594065875724, "grad_norm": 0.5238391545030523, "learning_rate": 5.556424072613365e-07, "loss": 11.7945, "step": 35532 }, { "epoch": 1.9349138605841554, "grad_norm": 0.5270434170481811, "learning_rate": 5.547144874383348e-07, "loss": 11.8329, "step": 35533 }, { "epoch": 1.9349683145807384, "grad_norm": 0.5729170709511119, "learning_rate": 5.537873409185434e-07, "loss": 11.6325, "step": 35534 }, { "epoch": 1.9350227685773214, "grad_norm": 0.6404820091796056, "learning_rate": 5.52860967709179e-07, "loss": 11.8465, "step": 35535 }, { "epoch": 1.9350772225739044, "grad_norm": 0.572087389665054, "learning_rate": 5.519353678174465e-07, "loss": 11.5826, "step": 35536 }, { "epoch": 1.9351316765704873, "grad_norm": 0.5094313491671534, "learning_rate": 5.510105412505406e-07, "loss": 11.7929, "step": 35537 }, { "epoch": 1.9351861305670703, "grad_norm": 0.5397780977723677, "learning_rate": 5.500864880156553e-07, "loss": 11.6658, "step": 35538 }, { "epoch": 1.9352405845636533, "grad_norm": 0.5748822295368378, "learning_rate": 5.491632081199626e-07, "loss": 11.6088, "step": 35539 }, { "epoch": 1.9352950385602363, "grad_norm": 0.6642626763132676, "learning_rate": 5.482407015706681e-07, "loss": 11.9099, "step": 35540 }, { "epoch": 1.9353494925568193, "grad_norm": 0.49085089642363156, "learning_rate": 5.473189683749325e-07, "loss": 11.7104, "step": 35541 }, { "epoch": 1.9354039465534023, "grad_norm": 0.5444184984950573, "learning_rate": 5.463980085399167e-07, "loss": 11.6211, "step": 35542 }, { "epoch": 1.9354584005499853, "grad_norm": 0.561893596251195, "learning_rate": 5.45477822072793e-07, "loss": 11.7222, "step": 35543 }, { "epoch": 1.9355128545465683, "grad_norm": 0.6352864392320118, "learning_rate": 5.44558408980711e-07, "loss": 11.7787, "step": 35544 }, { "epoch": 1.9355673085431513, "grad_norm": 0.6272580156377827, "learning_rate": 5.436397692708206e-07, "loss": 11.8069, "step": 35545 }, { "epoch": 1.9356217625397343, "grad_norm": 0.6382483803998975, "learning_rate": 5.427219029502717e-07, "loss": 11.8302, "step": 35546 }, { "epoch": 1.9356762165363173, "grad_norm": 0.5368684556254641, "learning_rate": 5.418048100261808e-07, "loss": 11.8036, "step": 35547 }, { "epoch": 1.9357306705329003, "grad_norm": 0.5487146134663398, "learning_rate": 5.408884905057088e-07, "loss": 11.8738, "step": 35548 }, { "epoch": 1.9357851245294835, "grad_norm": 0.554506082062712, "learning_rate": 5.399729443959611e-07, "loss": 11.8091, "step": 35549 }, { "epoch": 1.9358395785260665, "grad_norm": 0.5123244370176998, "learning_rate": 5.390581717040766e-07, "loss": 11.8091, "step": 35550 }, { "epoch": 1.9358940325226495, "grad_norm": 0.5316891974945974, "learning_rate": 5.381441724371384e-07, "loss": 11.7525, "step": 35551 }, { "epoch": 1.9359484865192325, "grad_norm": 0.5424051800823211, "learning_rate": 5.37230946602274e-07, "loss": 11.7651, "step": 35552 }, { "epoch": 1.9360029405158155, "grad_norm": 0.5074854984584203, "learning_rate": 5.363184942065891e-07, "loss": 11.785, "step": 35553 }, { "epoch": 1.9360573945123984, "grad_norm": 0.5125622814230949, "learning_rate": 5.354068152571668e-07, "loss": 11.773, "step": 35554 }, { "epoch": 1.9361118485089817, "grad_norm": 0.5326360895208188, "learning_rate": 5.344959097611014e-07, "loss": 11.7576, "step": 35555 }, { "epoch": 1.9361663025055647, "grad_norm": 0.5538480999750912, "learning_rate": 5.33585777725476e-07, "loss": 11.7114, "step": 35556 }, { "epoch": 1.9362207565021476, "grad_norm": 0.5686752853133247, "learning_rate": 5.326764191573741e-07, "loss": 11.7576, "step": 35557 }, { "epoch": 1.9362752104987306, "grad_norm": 0.5361564625880706, "learning_rate": 5.317678340638566e-07, "loss": 11.7045, "step": 35558 }, { "epoch": 1.9363296644953136, "grad_norm": 0.5772105290727838, "learning_rate": 5.308600224519844e-07, "loss": 11.7416, "step": 35559 }, { "epoch": 1.9363841184918966, "grad_norm": 0.554722701537153, "learning_rate": 5.299529843288409e-07, "loss": 11.7756, "step": 35560 }, { "epoch": 1.9364385724884796, "grad_norm": 0.5684014070116883, "learning_rate": 5.290467197014537e-07, "loss": 11.8636, "step": 35561 }, { "epoch": 1.9364930264850626, "grad_norm": 0.594703750715069, "learning_rate": 5.281412285768839e-07, "loss": 11.6712, "step": 35562 }, { "epoch": 1.9365474804816456, "grad_norm": 0.5671834112957976, "learning_rate": 5.272365109621702e-07, "loss": 11.7228, "step": 35563 }, { "epoch": 1.9366019344782286, "grad_norm": 0.533732680674147, "learning_rate": 5.263325668643404e-07, "loss": 11.7501, "step": 35564 }, { "epoch": 1.9366563884748116, "grad_norm": 0.5474627287160215, "learning_rate": 5.254293962904444e-07, "loss": 11.8011, "step": 35565 }, { "epoch": 1.9367108424713946, "grad_norm": 0.537744675436712, "learning_rate": 5.245269992474766e-07, "loss": 11.7322, "step": 35566 }, { "epoch": 1.9367652964679776, "grad_norm": 0.6856842289761352, "learning_rate": 5.236253757424758e-07, "loss": 11.8531, "step": 35567 }, { "epoch": 1.9368197504645606, "grad_norm": 0.528508075490259, "learning_rate": 5.227245257824475e-07, "loss": 11.8795, "step": 35568 }, { "epoch": 1.9368742044611436, "grad_norm": 0.5458446312322265, "learning_rate": 5.218244493743862e-07, "loss": 11.7738, "step": 35569 }, { "epoch": 1.9369286584577265, "grad_norm": 0.5817752403466625, "learning_rate": 5.209251465253196e-07, "loss": 11.8707, "step": 35570 }, { "epoch": 1.9369831124543095, "grad_norm": 0.5757848563191925, "learning_rate": 5.200266172422085e-07, "loss": 11.8887, "step": 35571 }, { "epoch": 1.9370375664508928, "grad_norm": 0.5366360202542961, "learning_rate": 5.191288615320478e-07, "loss": 11.7073, "step": 35572 }, { "epoch": 1.9370920204474757, "grad_norm": 0.5009043441019304, "learning_rate": 5.182318794018315e-07, "loss": 11.6977, "step": 35573 }, { "epoch": 1.9371464744440587, "grad_norm": 0.5453093343904455, "learning_rate": 5.173356708585208e-07, "loss": 11.8582, "step": 35574 }, { "epoch": 1.9372009284406417, "grad_norm": 0.5530277793269945, "learning_rate": 5.164402359090992e-07, "loss": 11.8872, "step": 35575 }, { "epoch": 1.9372553824372247, "grad_norm": 0.5171475565767923, "learning_rate": 5.155455745605276e-07, "loss": 11.764, "step": 35576 }, { "epoch": 1.9373098364338077, "grad_norm": 0.5567248051119154, "learning_rate": 5.146516868197448e-07, "loss": 11.7506, "step": 35577 }, { "epoch": 1.937364290430391, "grad_norm": 0.5453075674968821, "learning_rate": 5.137585726937233e-07, "loss": 11.7504, "step": 35578 }, { "epoch": 1.937418744426974, "grad_norm": 0.5437802702069786, "learning_rate": 5.128662321893906e-07, "loss": 11.7658, "step": 35579 }, { "epoch": 1.937473198423557, "grad_norm": 0.5910353216614329, "learning_rate": 5.11974665313697e-07, "loss": 11.8461, "step": 35580 }, { "epoch": 1.93752765242014, "grad_norm": 0.5412227303444469, "learning_rate": 5.1108387207357e-07, "loss": 11.7498, "step": 35581 }, { "epoch": 1.937582106416723, "grad_norm": 0.5293252489419342, "learning_rate": 5.101938524759486e-07, "loss": 11.7091, "step": 35582 }, { "epoch": 1.937636560413306, "grad_norm": 0.5686496400691846, "learning_rate": 5.093046065277385e-07, "loss": 11.8361, "step": 35583 }, { "epoch": 1.937691014409889, "grad_norm": 0.6288173121877694, "learning_rate": 5.084161342358562e-07, "loss": 11.7634, "step": 35584 }, { "epoch": 1.9377454684064719, "grad_norm": 0.5355093804957634, "learning_rate": 5.075284356072185e-07, "loss": 11.798, "step": 35585 }, { "epoch": 1.9377999224030549, "grad_norm": 0.5980710019276938, "learning_rate": 5.06641510648731e-07, "loss": 11.7094, "step": 35586 }, { "epoch": 1.9378543763996379, "grad_norm": 0.5638489993134836, "learning_rate": 5.05755359367277e-07, "loss": 11.6973, "step": 35587 }, { "epoch": 1.9379088303962209, "grad_norm": 0.5412255550250967, "learning_rate": 5.048699817697511e-07, "loss": 11.8987, "step": 35588 }, { "epoch": 1.9379632843928039, "grad_norm": 0.53510416101725, "learning_rate": 5.039853778630477e-07, "loss": 11.7264, "step": 35589 }, { "epoch": 1.9380177383893868, "grad_norm": 0.5512286243043325, "learning_rate": 5.03101547654039e-07, "loss": 11.6982, "step": 35590 }, { "epoch": 1.9380721923859698, "grad_norm": 0.5852851572606463, "learning_rate": 5.022184911495864e-07, "loss": 11.6774, "step": 35591 }, { "epoch": 1.9381266463825528, "grad_norm": 0.5660109327879069, "learning_rate": 5.013362083565843e-07, "loss": 11.816, "step": 35592 }, { "epoch": 1.9381811003791358, "grad_norm": 0.5389142622214665, "learning_rate": 5.004546992818715e-07, "loss": 11.8273, "step": 35593 }, { "epoch": 1.9382355543757188, "grad_norm": 0.5466833615710256, "learning_rate": 4.995739639323094e-07, "loss": 11.7037, "step": 35594 }, { "epoch": 1.9382900083723018, "grad_norm": 0.5101710844844005, "learning_rate": 4.98694002314748e-07, "loss": 11.7182, "step": 35595 }, { "epoch": 1.938344462368885, "grad_norm": 0.5598999360532281, "learning_rate": 4.978148144360262e-07, "loss": 11.8324, "step": 35596 }, { "epoch": 1.938398916365468, "grad_norm": 0.5141143040876567, "learning_rate": 4.969364003029941e-07, "loss": 11.7442, "step": 35597 }, { "epoch": 1.938453370362051, "grad_norm": 0.5282292840073158, "learning_rate": 4.960587599224575e-07, "loss": 11.8029, "step": 35598 }, { "epoch": 1.938507824358634, "grad_norm": 0.5559970232317345, "learning_rate": 4.951818933012553e-07, "loss": 11.6902, "step": 35599 }, { "epoch": 1.938562278355217, "grad_norm": 0.5585131601873601, "learning_rate": 4.943058004462042e-07, "loss": 11.8164, "step": 35600 }, { "epoch": 1.9386167323518002, "grad_norm": 0.5361690102528642, "learning_rate": 4.934304813641211e-07, "loss": 11.7702, "step": 35601 }, { "epoch": 1.9386711863483832, "grad_norm": 0.5611180396196582, "learning_rate": 4.925559360618226e-07, "loss": 11.6667, "step": 35602 }, { "epoch": 1.9387256403449662, "grad_norm": 0.6096788073552318, "learning_rate": 4.916821645460812e-07, "loss": 11.8067, "step": 35603 }, { "epoch": 1.9387800943415492, "grad_norm": 0.606285916864132, "learning_rate": 4.908091668237136e-07, "loss": 11.7109, "step": 35604 }, { "epoch": 1.9388345483381322, "grad_norm": 0.5485993629123017, "learning_rate": 4.899369429014922e-07, "loss": 11.8023, "step": 35605 }, { "epoch": 1.9388890023347152, "grad_norm": 0.5112756488971831, "learning_rate": 4.890654927862226e-07, "loss": 11.8408, "step": 35606 }, { "epoch": 1.9389434563312982, "grad_norm": 0.5109369905202668, "learning_rate": 4.881948164846661e-07, "loss": 11.8175, "step": 35607 }, { "epoch": 1.9389979103278812, "grad_norm": 0.5256283769630259, "learning_rate": 4.87324914003584e-07, "loss": 11.7667, "step": 35608 }, { "epoch": 1.9390523643244642, "grad_norm": 0.5116411016231788, "learning_rate": 4.864557853497597e-07, "loss": 11.681, "step": 35609 }, { "epoch": 1.9391068183210471, "grad_norm": 0.5106795253498314, "learning_rate": 4.855874305299435e-07, "loss": 11.7391, "step": 35610 }, { "epoch": 1.9391612723176301, "grad_norm": 0.5515760517609177, "learning_rate": 4.847198495508853e-07, "loss": 11.8701, "step": 35611 }, { "epoch": 1.9392157263142131, "grad_norm": 0.5935175150195036, "learning_rate": 4.838530424193355e-07, "loss": 11.7416, "step": 35612 }, { "epoch": 1.9392701803107961, "grad_norm": 0.5456600415296303, "learning_rate": 4.829870091420219e-07, "loss": 11.7238, "step": 35613 }, { "epoch": 1.9393246343073791, "grad_norm": 0.5820267786212897, "learning_rate": 4.821217497257058e-07, "loss": 11.8029, "step": 35614 }, { "epoch": 1.939379088303962, "grad_norm": 0.5972214761178529, "learning_rate": 4.812572641770929e-07, "loss": 11.8013, "step": 35615 }, { "epoch": 1.939433542300545, "grad_norm": 0.5705273882659945, "learning_rate": 4.803935525029224e-07, "loss": 11.7862, "step": 35616 }, { "epoch": 1.939487996297128, "grad_norm": 0.6152412570596103, "learning_rate": 4.795306147098999e-07, "loss": 11.8496, "step": 35617 }, { "epoch": 1.939542450293711, "grad_norm": 0.6127777368977805, "learning_rate": 4.786684508047201e-07, "loss": 11.8153, "step": 35618 }, { "epoch": 1.9395969042902943, "grad_norm": 0.5332224816342236, "learning_rate": 4.778070607941221e-07, "loss": 11.8921, "step": 35619 }, { "epoch": 1.9396513582868773, "grad_norm": 0.560218249044104, "learning_rate": 4.769464446847782e-07, "loss": 11.7938, "step": 35620 }, { "epoch": 1.9397058122834603, "grad_norm": 0.5371161705150863, "learning_rate": 4.760866024833943e-07, "loss": 11.795, "step": 35621 }, { "epoch": 1.9397602662800433, "grad_norm": 0.5914869001523443, "learning_rate": 4.752275341966428e-07, "loss": 11.7295, "step": 35622 }, { "epoch": 1.9398147202766263, "grad_norm": 0.5783624654681653, "learning_rate": 4.7436923983120717e-07, "loss": 11.9032, "step": 35623 }, { "epoch": 1.9398691742732093, "grad_norm": 0.5788289930007964, "learning_rate": 4.735117193937821e-07, "loss": 11.8802, "step": 35624 }, { "epoch": 1.9399236282697925, "grad_norm": 0.5369306486402379, "learning_rate": 4.726549728910179e-07, "loss": 11.8194, "step": 35625 }, { "epoch": 1.9399780822663755, "grad_norm": 0.5421521257155484, "learning_rate": 4.717990003295758e-07, "loss": 11.8647, "step": 35626 }, { "epoch": 1.9400325362629585, "grad_norm": 0.5615799146355253, "learning_rate": 4.709438017161172e-07, "loss": 11.8358, "step": 35627 }, { "epoch": 1.9400869902595415, "grad_norm": 0.5786992811693126, "learning_rate": 4.700893770572812e-07, "loss": 11.7863, "step": 35628 }, { "epoch": 1.9401414442561244, "grad_norm": 0.5857360801939799, "learning_rate": 4.6923572635974023e-07, "loss": 11.7307, "step": 35629 }, { "epoch": 1.9401958982527074, "grad_norm": 0.5265721571586228, "learning_rate": 4.6838284963010016e-07, "loss": 11.7108, "step": 35630 }, { "epoch": 1.9402503522492904, "grad_norm": 0.5645426037271813, "learning_rate": 4.675307468750112e-07, "loss": 11.7081, "step": 35631 }, { "epoch": 1.9403048062458734, "grad_norm": 0.5340102986397449, "learning_rate": 4.6667941810109026e-07, "loss": 11.704, "step": 35632 }, { "epoch": 1.9403592602424564, "grad_norm": 0.5791237343761372, "learning_rate": 4.6582886331496543e-07, "loss": 11.7368, "step": 35633 }, { "epoch": 1.9404137142390394, "grad_norm": 0.5726002459202131, "learning_rate": 4.649790825232425e-07, "loss": 11.8715, "step": 35634 }, { "epoch": 1.9404681682356224, "grad_norm": 0.5064762737642653, "learning_rate": 4.641300757325273e-07, "loss": 11.8122, "step": 35635 }, { "epoch": 1.9405226222322054, "grad_norm": 0.5245745992275329, "learning_rate": 4.632818429494479e-07, "loss": 11.7848, "step": 35636 }, { "epoch": 1.9405770762287884, "grad_norm": 0.5583594530276754, "learning_rate": 4.6243438418057674e-07, "loss": 11.8176, "step": 35637 }, { "epoch": 1.9406315302253714, "grad_norm": 0.5467704679271195, "learning_rate": 4.6158769943249747e-07, "loss": 11.828, "step": 35638 }, { "epoch": 1.9406859842219544, "grad_norm": 0.6377707657926502, "learning_rate": 4.6074178871181595e-07, "loss": 11.9018, "step": 35639 }, { "epoch": 1.9407404382185374, "grad_norm": 0.5554824544815146, "learning_rate": 4.598966520250936e-07, "loss": 11.6895, "step": 35640 }, { "epoch": 1.9407948922151204, "grad_norm": 0.5584007560908515, "learning_rate": 4.590522893789029e-07, "loss": 11.8051, "step": 35641 }, { "epoch": 1.9408493462117036, "grad_norm": 0.5427642025318676, "learning_rate": 4.5820870077982747e-07, "loss": 11.7008, "step": 35642 }, { "epoch": 1.9409038002082866, "grad_norm": 0.5466973752396468, "learning_rate": 4.5736588623440655e-07, "loss": 11.8237, "step": 35643 }, { "epoch": 1.9409582542048696, "grad_norm": 0.53261653244681, "learning_rate": 4.565238457492016e-07, "loss": 11.847, "step": 35644 }, { "epoch": 1.9410127082014526, "grad_norm": 0.5893298575811762, "learning_rate": 4.5568257933075175e-07, "loss": 11.8655, "step": 35645 }, { "epoch": 1.9410671621980355, "grad_norm": 0.529436593129781, "learning_rate": 4.5484208698562957e-07, "loss": 11.7339, "step": 35646 }, { "epoch": 1.9411216161946185, "grad_norm": 0.4989824159338248, "learning_rate": 4.5400236872032987e-07, "loss": 11.7142, "step": 35647 }, { "epoch": 1.9411760701912018, "grad_norm": 0.5639858010167177, "learning_rate": 4.5316342454141403e-07, "loss": 11.7917, "step": 35648 }, { "epoch": 1.9412305241877847, "grad_norm": 0.5489765442045642, "learning_rate": 4.5232525445538796e-07, "loss": 11.6759, "step": 35649 }, { "epoch": 1.9412849781843677, "grad_norm": 0.552480917792173, "learning_rate": 4.514878584687687e-07, "loss": 11.5201, "step": 35650 }, { "epoch": 1.9413394321809507, "grad_norm": 0.5364813325782707, "learning_rate": 4.506512365880844e-07, "loss": 11.6857, "step": 35651 }, { "epoch": 1.9413938861775337, "grad_norm": 0.522938519881032, "learning_rate": 4.498153888198298e-07, "loss": 11.7345, "step": 35652 }, { "epoch": 1.9414483401741167, "grad_norm": 0.5362017031750769, "learning_rate": 4.4898031517049965e-07, "loss": 11.8485, "step": 35653 }, { "epoch": 1.9415027941706997, "grad_norm": 0.5341845330736563, "learning_rate": 4.4814601564659996e-07, "loss": 11.7259, "step": 35654 }, { "epoch": 1.9415572481672827, "grad_norm": 0.5555783654732127, "learning_rate": 4.473124902546033e-07, "loss": 11.8794, "step": 35655 }, { "epoch": 1.9416117021638657, "grad_norm": 0.5586152100740498, "learning_rate": 4.464797390010045e-07, "loss": 11.6914, "step": 35656 }, { "epoch": 1.9416661561604487, "grad_norm": 0.5447660271687738, "learning_rate": 4.456477618922761e-07, "loss": 11.7789, "step": 35657 }, { "epoch": 1.9417206101570317, "grad_norm": 0.622647630281201, "learning_rate": 4.448165589348796e-07, "loss": 11.7603, "step": 35658 }, { "epoch": 1.9417750641536147, "grad_norm": 0.5431923100601692, "learning_rate": 4.4398613013528766e-07, "loss": 11.792, "step": 35659 }, { "epoch": 1.9418295181501977, "grad_norm": 0.5790240480772548, "learning_rate": 4.4315647549996176e-07, "loss": 11.9162, "step": 35660 }, { "epoch": 1.9418839721467807, "grad_norm": 0.5117777821406759, "learning_rate": 4.4232759503534115e-07, "loss": 11.7656, "step": 35661 }, { "epoch": 1.9419384261433636, "grad_norm": 0.5479628306654931, "learning_rate": 4.414994887478763e-07, "loss": 11.622, "step": 35662 }, { "epoch": 1.9419928801399466, "grad_norm": 0.6233033102570642, "learning_rate": 4.4067215664400643e-07, "loss": 11.9512, "step": 35663 }, { "epoch": 1.9420473341365296, "grad_norm": 0.5564504797952488, "learning_rate": 4.3984559873017086e-07, "loss": 11.673, "step": 35664 }, { "epoch": 1.9421017881331128, "grad_norm": 0.5448089265380534, "learning_rate": 4.3901981501278664e-07, "loss": 11.9291, "step": 35665 }, { "epoch": 1.9421562421296958, "grad_norm": 0.48524318686328877, "learning_rate": 4.3819480549828205e-07, "loss": 11.723, "step": 35666 }, { "epoch": 1.9422106961262788, "grad_norm": 0.6561161077775467, "learning_rate": 4.3737057019307413e-07, "loss": 11.7131, "step": 35667 }, { "epoch": 1.9422651501228618, "grad_norm": 0.652101449136463, "learning_rate": 4.3654710910356886e-07, "loss": 11.9336, "step": 35668 }, { "epoch": 1.9423196041194448, "grad_norm": 0.4927048110208113, "learning_rate": 4.3572442223617225e-07, "loss": 11.6496, "step": 35669 }, { "epoch": 1.9423740581160278, "grad_norm": 0.5433086032537734, "learning_rate": 4.349025095972792e-07, "loss": 11.7397, "step": 35670 }, { "epoch": 1.942428512112611, "grad_norm": 0.5282950663203201, "learning_rate": 4.340813711932734e-07, "loss": 11.8879, "step": 35671 }, { "epoch": 1.942482966109194, "grad_norm": 0.47735063570741465, "learning_rate": 4.3326100703054983e-07, "loss": 11.827, "step": 35672 }, { "epoch": 1.942537420105777, "grad_norm": 0.5427272413004708, "learning_rate": 4.3244141711549223e-07, "loss": 11.7919, "step": 35673 }, { "epoch": 1.94259187410236, "grad_norm": 0.5197529372982094, "learning_rate": 4.316226014544622e-07, "loss": 11.7244, "step": 35674 }, { "epoch": 1.942646328098943, "grad_norm": 0.5386760180800727, "learning_rate": 4.3080456005383243e-07, "loss": 11.7665, "step": 35675 }, { "epoch": 1.942700782095526, "grad_norm": 0.6072065150359209, "learning_rate": 4.2998729291997553e-07, "loss": 11.8564, "step": 35676 }, { "epoch": 1.942755236092109, "grad_norm": 0.5637670787877125, "learning_rate": 4.2917080005921985e-07, "loss": 11.7804, "step": 35677 }, { "epoch": 1.942809690088692, "grad_norm": 0.565105528113536, "learning_rate": 4.28355081477938e-07, "loss": 11.8302, "step": 35678 }, { "epoch": 1.942864144085275, "grad_norm": 0.5648751695002734, "learning_rate": 4.2754013718245836e-07, "loss": 11.6565, "step": 35679 }, { "epoch": 1.942918598081858, "grad_norm": 0.5920240674745916, "learning_rate": 4.267259671791424e-07, "loss": 11.7315, "step": 35680 }, { "epoch": 1.942973052078441, "grad_norm": 0.5077946424213308, "learning_rate": 4.259125714742851e-07, "loss": 11.78, "step": 35681 }, { "epoch": 1.943027506075024, "grad_norm": 0.545072177508665, "learning_rate": 4.25099950074237e-07, "loss": 11.7516, "step": 35682 }, { "epoch": 1.943081960071607, "grad_norm": 0.5208080262270302, "learning_rate": 4.242881029853041e-07, "loss": 11.7015, "step": 35683 }, { "epoch": 1.94313641406819, "grad_norm": 0.48932973275562786, "learning_rate": 4.234770302138147e-07, "loss": 11.6227, "step": 35684 }, { "epoch": 1.943190868064773, "grad_norm": 0.5030720565581431, "learning_rate": 4.2266673176606376e-07, "loss": 11.9005, "step": 35685 }, { "epoch": 1.943245322061356, "grad_norm": 0.5441757426810855, "learning_rate": 4.218572076483573e-07, "loss": 11.8475, "step": 35686 }, { "epoch": 1.943299776057939, "grad_norm": 0.5935279381739246, "learning_rate": 4.210484578669904e-07, "loss": 11.826, "step": 35687 }, { "epoch": 1.943354230054522, "grad_norm": 0.5280797930627416, "learning_rate": 4.202404824282469e-07, "loss": 11.7909, "step": 35688 }, { "epoch": 1.9434086840511051, "grad_norm": 0.5701949166003447, "learning_rate": 4.1943328133841056e-07, "loss": 11.782, "step": 35689 }, { "epoch": 1.943463138047688, "grad_norm": 0.6112172870600644, "learning_rate": 4.1862685460376525e-07, "loss": 11.8887, "step": 35690 }, { "epoch": 1.943517592044271, "grad_norm": 0.5271576083084188, "learning_rate": 4.1782120223057273e-07, "loss": 11.697, "step": 35691 }, { "epoch": 1.943572046040854, "grad_norm": 0.676886655921369, "learning_rate": 4.1701632422510575e-07, "loss": 11.8064, "step": 35692 }, { "epoch": 1.943626500037437, "grad_norm": 0.58317774283948, "learning_rate": 4.1621222059361477e-07, "loss": 11.7945, "step": 35693 }, { "epoch": 1.94368095403402, "grad_norm": 0.5382340106794387, "learning_rate": 4.154088913423615e-07, "loss": 11.6911, "step": 35694 }, { "epoch": 1.9437354080306033, "grad_norm": 0.5199886284674525, "learning_rate": 4.146063364775854e-07, "loss": 11.7783, "step": 35695 }, { "epoch": 1.9437898620271863, "grad_norm": 0.5757448211057639, "learning_rate": 4.13804556005537e-07, "loss": 11.8136, "step": 35696 }, { "epoch": 1.9438443160237693, "grad_norm": 0.6443045661948109, "learning_rate": 4.1300354993244475e-07, "loss": 11.8579, "step": 35697 }, { "epoch": 1.9438987700203523, "grad_norm": 0.5467342450522417, "learning_rate": 4.122033182645368e-07, "loss": 11.7118, "step": 35698 }, { "epoch": 1.9439532240169353, "grad_norm": 0.5542789289943424, "learning_rate": 4.1140386100803063e-07, "loss": 11.8412, "step": 35699 }, { "epoch": 1.9440076780135183, "grad_norm": 0.580831701424614, "learning_rate": 4.1060517816916553e-07, "loss": 11.8274, "step": 35700 }, { "epoch": 1.9440621320101013, "grad_norm": 0.583477141632455, "learning_rate": 4.098072697541144e-07, "loss": 11.8083, "step": 35701 }, { "epoch": 1.9441165860066842, "grad_norm": 0.561768365617742, "learning_rate": 4.090101357691167e-07, "loss": 11.7626, "step": 35702 }, { "epoch": 1.9441710400032672, "grad_norm": 0.5796074077575896, "learning_rate": 4.082137762203564e-07, "loss": 11.7128, "step": 35703 }, { "epoch": 1.9442254939998502, "grad_norm": 0.5634412532462876, "learning_rate": 4.0741819111402846e-07, "loss": 11.777, "step": 35704 }, { "epoch": 1.9442799479964332, "grad_norm": 0.5646150578537383, "learning_rate": 4.0662338045630576e-07, "loss": 11.7639, "step": 35705 }, { "epoch": 1.9443344019930162, "grad_norm": 0.5284997169497452, "learning_rate": 4.058293442533945e-07, "loss": 11.7069, "step": 35706 }, { "epoch": 1.9443888559895992, "grad_norm": 0.5379691627878892, "learning_rate": 4.050360825114563e-07, "loss": 11.8555, "step": 35707 }, { "epoch": 1.9444433099861822, "grad_norm": 0.5539307429360871, "learning_rate": 4.04243595236653e-07, "loss": 11.7842, "step": 35708 }, { "epoch": 1.9444977639827652, "grad_norm": 0.5455320142949022, "learning_rate": 4.0345188243515743e-07, "loss": 11.7417, "step": 35709 }, { "epoch": 1.9445522179793482, "grad_norm": 0.5941572163906921, "learning_rate": 4.0266094411312016e-07, "loss": 11.7816, "step": 35710 }, { "epoch": 1.9446066719759312, "grad_norm": 0.5993469973740057, "learning_rate": 4.018707802766919e-07, "loss": 11.7944, "step": 35711 }, { "epoch": 1.9446611259725144, "grad_norm": 0.5623196863215588, "learning_rate": 4.0108139093202323e-07, "loss": 11.7284, "step": 35712 }, { "epoch": 1.9447155799690974, "grad_norm": 0.5040781196717365, "learning_rate": 4.0029277608524265e-07, "loss": 11.7071, "step": 35713 }, { "epoch": 1.9447700339656804, "grad_norm": 0.524003300164236, "learning_rate": 3.995049357425007e-07, "loss": 11.7005, "step": 35714 }, { "epoch": 1.9448244879622634, "grad_norm": 0.4866673847118549, "learning_rate": 3.987178699098926e-07, "loss": 11.7628, "step": 35715 }, { "epoch": 1.9448789419588464, "grad_norm": 0.5821477240631966, "learning_rate": 3.9793157859358e-07, "loss": 11.8356, "step": 35716 }, { "epoch": 1.9449333959554294, "grad_norm": 0.510507799479634, "learning_rate": 3.971460617996359e-07, "loss": 11.767, "step": 35717 }, { "epoch": 1.9449878499520126, "grad_norm": 0.6280518385953502, "learning_rate": 3.9636131953419975e-07, "loss": 11.6978, "step": 35718 }, { "epoch": 1.9450423039485956, "grad_norm": 0.5285910951440296, "learning_rate": 3.9557735180335567e-07, "loss": 11.8543, "step": 35719 }, { "epoch": 1.9450967579451786, "grad_norm": 0.5458123047948893, "learning_rate": 3.9479415861320977e-07, "loss": 11.7192, "step": 35720 }, { "epoch": 1.9451512119417615, "grad_norm": 0.5328846918990849, "learning_rate": 3.9401173996983507e-07, "loss": 11.8453, "step": 35721 }, { "epoch": 1.9452056659383445, "grad_norm": 0.5329099872425184, "learning_rate": 3.932300958793489e-07, "loss": 11.7942, "step": 35722 }, { "epoch": 1.9452601199349275, "grad_norm": 0.5598684641929673, "learning_rate": 3.924492263477908e-07, "loss": 11.7646, "step": 35723 }, { "epoch": 1.9453145739315105, "grad_norm": 0.559382593285798, "learning_rate": 3.9166913138126704e-07, "loss": 11.7958, "step": 35724 }, { "epoch": 1.9453690279280935, "grad_norm": 0.5545658316275914, "learning_rate": 3.908898109858172e-07, "loss": 11.7752, "step": 35725 }, { "epoch": 1.9454234819246765, "grad_norm": 0.5180100870720471, "learning_rate": 3.901112651675143e-07, "loss": 11.6259, "step": 35726 }, { "epoch": 1.9454779359212595, "grad_norm": 0.545965859639784, "learning_rate": 3.8933349393242003e-07, "loss": 11.8456, "step": 35727 }, { "epoch": 1.9455323899178425, "grad_norm": 0.6642197509422173, "learning_rate": 3.88556497286563e-07, "loss": 11.721, "step": 35728 }, { "epoch": 1.9455868439144255, "grad_norm": 0.4712969524763708, "learning_rate": 3.877802752360049e-07, "loss": 11.6598, "step": 35729 }, { "epoch": 1.9456412979110085, "grad_norm": 0.5494635343391067, "learning_rate": 3.8700482778676327e-07, "loss": 11.8528, "step": 35730 }, { "epoch": 1.9456957519075915, "grad_norm": 0.5428316624175658, "learning_rate": 3.8623015494488877e-07, "loss": 11.7118, "step": 35731 }, { "epoch": 1.9457502059041745, "grad_norm": 0.4863273638061356, "learning_rate": 3.854562567163766e-07, "loss": 11.6769, "step": 35732 }, { "epoch": 1.9458046599007575, "grad_norm": 0.5746758191981314, "learning_rate": 3.8468313310727753e-07, "loss": 11.8249, "step": 35733 }, { "epoch": 1.9458591138973405, "grad_norm": 0.5685567238136227, "learning_rate": 3.8391078412357563e-07, "loss": 11.7821, "step": 35734 }, { "epoch": 1.9459135678939237, "grad_norm": 0.5237326655910691, "learning_rate": 3.831392097712994e-07, "loss": 11.7451, "step": 35735 }, { "epoch": 1.9459680218905067, "grad_norm": 0.6046829238409155, "learning_rate": 3.823684100564329e-07, "loss": 11.8291, "step": 35736 }, { "epoch": 1.9460224758870897, "grad_norm": 0.533224507214216, "learning_rate": 3.8159838498498247e-07, "loss": 11.7393, "step": 35737 }, { "epoch": 1.9460769298836726, "grad_norm": 0.5436070399414449, "learning_rate": 3.8082913456292114e-07, "loss": 11.8217, "step": 35738 }, { "epoch": 1.9461313838802556, "grad_norm": 0.5453353709124602, "learning_rate": 3.800606587962441e-07, "loss": 11.8112, "step": 35739 }, { "epoch": 1.9461858378768386, "grad_norm": 0.5812766822010963, "learning_rate": 3.792929576909132e-07, "loss": 11.8766, "step": 35740 }, { "epoch": 1.9462402918734218, "grad_norm": 0.5537300880662903, "learning_rate": 3.7852603125291265e-07, "loss": 11.7217, "step": 35741 }, { "epoch": 1.9462947458700048, "grad_norm": 0.5276328697901749, "learning_rate": 3.7775987948819316e-07, "loss": 11.8891, "step": 35742 }, { "epoch": 1.9463491998665878, "grad_norm": 0.5260705314244255, "learning_rate": 3.769945024027277e-07, "loss": 11.6815, "step": 35743 }, { "epoch": 1.9464036538631708, "grad_norm": 0.5785938476405321, "learning_rate": 3.7622990000245606e-07, "loss": 11.7479, "step": 35744 }, { "epoch": 1.9464581078597538, "grad_norm": 0.5896680240690657, "learning_rate": 3.75466072293329e-07, "loss": 11.7817, "step": 35745 }, { "epoch": 1.9465125618563368, "grad_norm": 0.6498796511110028, "learning_rate": 3.747030192812862e-07, "loss": 11.7938, "step": 35746 }, { "epoch": 1.9465670158529198, "grad_norm": 0.7237580619834845, "learning_rate": 3.7394074097226725e-07, "loss": 11.8408, "step": 35747 }, { "epoch": 1.9466214698495028, "grad_norm": 0.5563646385313362, "learning_rate": 3.7317923737217876e-07, "loss": 11.7168, "step": 35748 }, { "epoch": 1.9466759238460858, "grad_norm": 0.5168367031565501, "learning_rate": 3.7241850848696023e-07, "loss": 11.7155, "step": 35749 }, { "epoch": 1.9467303778426688, "grad_norm": 0.5130486735454791, "learning_rate": 3.7165855432252926e-07, "loss": 11.7459, "step": 35750 }, { "epoch": 1.9467848318392518, "grad_norm": 0.6244948898279601, "learning_rate": 3.708993748847811e-07, "loss": 11.7626, "step": 35751 }, { "epoch": 1.9468392858358348, "grad_norm": 0.5530923103096124, "learning_rate": 3.701409701796332e-07, "loss": 11.7108, "step": 35752 }, { "epoch": 1.9468937398324178, "grad_norm": 0.5661141389051748, "learning_rate": 3.6938334021296985e-07, "loss": 11.8574, "step": 35753 }, { "epoch": 1.9469481938290008, "grad_norm": 0.5182482736419168, "learning_rate": 3.686264849906973e-07, "loss": 11.5661, "step": 35754 }, { "epoch": 1.9470026478255837, "grad_norm": 0.5639635402331181, "learning_rate": 3.678704045186776e-07, "loss": 11.5872, "step": 35755 }, { "epoch": 1.9470571018221667, "grad_norm": 0.5171289199709851, "learning_rate": 3.6711509880282823e-07, "loss": 11.7741, "step": 35756 }, { "epoch": 1.9471115558187497, "grad_norm": 0.516906244992106, "learning_rate": 3.6636056784898896e-07, "loss": 11.8151, "step": 35757 }, { "epoch": 1.9471660098153327, "grad_norm": 0.5059858006377905, "learning_rate": 3.656068116630329e-07, "loss": 11.5865, "step": 35758 }, { "epoch": 1.947220463811916, "grad_norm": 0.4941656244664837, "learning_rate": 3.6485383025084416e-07, "loss": 11.7932, "step": 35759 }, { "epoch": 1.947274917808499, "grad_norm": 0.6434683748293571, "learning_rate": 3.641016236182404e-07, "loss": 11.7971, "step": 35760 }, { "epoch": 1.947329371805082, "grad_norm": 0.5282323122304959, "learning_rate": 3.6335019177110575e-07, "loss": 11.7219, "step": 35761 }, { "epoch": 1.947383825801665, "grad_norm": 0.502077551476034, "learning_rate": 3.6259953471526885e-07, "loss": 11.7572, "step": 35762 }, { "epoch": 1.947438279798248, "grad_norm": 0.499740545919771, "learning_rate": 3.618496524565584e-07, "loss": 11.7284, "step": 35763 }, { "epoch": 1.947492733794831, "grad_norm": 0.5412275970791999, "learning_rate": 3.6110054500081425e-07, "loss": 11.8111, "step": 35764 }, { "epoch": 1.9475471877914141, "grad_norm": 0.6620992968835064, "learning_rate": 3.6035221235387607e-07, "loss": 11.8467, "step": 35765 }, { "epoch": 1.947601641787997, "grad_norm": 0.5463680273203441, "learning_rate": 3.5960465452152815e-07, "loss": 11.8238, "step": 35766 }, { "epoch": 1.94765609578458, "grad_norm": 0.6230451822896992, "learning_rate": 3.588578715096214e-07, "loss": 11.7563, "step": 35767 }, { "epoch": 1.947710549781163, "grad_norm": 0.5500990160108249, "learning_rate": 3.5811186332393997e-07, "loss": 11.7255, "step": 35768 }, { "epoch": 1.947765003777746, "grad_norm": 0.5274759632381156, "learning_rate": 3.5736662997029046e-07, "loss": 11.7417, "step": 35769 }, { "epoch": 1.947819457774329, "grad_norm": 0.6134745912046852, "learning_rate": 3.566221714544682e-07, "loss": 11.8862, "step": 35770 }, { "epoch": 1.947873911770912, "grad_norm": 0.5942901860692718, "learning_rate": 3.558784877822574e-07, "loss": 11.7984, "step": 35771 }, { "epoch": 1.947928365767495, "grad_norm": 0.5324894676895175, "learning_rate": 3.551355789594535e-07, "loss": 11.5921, "step": 35772 }, { "epoch": 1.947982819764078, "grad_norm": 0.5208748411718979, "learning_rate": 3.543934449918185e-07, "loss": 11.7375, "step": 35773 }, { "epoch": 1.948037273760661, "grad_norm": 0.5884086872750396, "learning_rate": 3.5365208588513666e-07, "loss": 11.8309, "step": 35774 }, { "epoch": 1.948091727757244, "grad_norm": 0.5344587420224608, "learning_rate": 3.5291150164517006e-07, "loss": 11.7614, "step": 35775 }, { "epoch": 1.948146181753827, "grad_norm": 0.5807156630632645, "learning_rate": 3.5217169227765857e-07, "loss": 11.8448, "step": 35776 }, { "epoch": 1.94820063575041, "grad_norm": 0.6509098455259031, "learning_rate": 3.514326577883864e-07, "loss": 11.9187, "step": 35777 }, { "epoch": 1.948255089746993, "grad_norm": 0.6640034731038533, "learning_rate": 3.5069439818308235e-07, "loss": 11.8736, "step": 35778 }, { "epoch": 1.948309543743576, "grad_norm": 0.5197435171253312, "learning_rate": 3.499569134674863e-07, "loss": 11.7151, "step": 35779 }, { "epoch": 1.948363997740159, "grad_norm": 0.5382578311627094, "learning_rate": 3.49220203647338e-07, "loss": 11.711, "step": 35780 }, { "epoch": 1.948418451736742, "grad_norm": 0.5033336945565409, "learning_rate": 3.4848426872836626e-07, "loss": 11.8021, "step": 35781 }, { "epoch": 1.9484729057333252, "grad_norm": 0.5469208153599247, "learning_rate": 3.477491087162887e-07, "loss": 11.668, "step": 35782 }, { "epoch": 1.9485273597299082, "grad_norm": 0.5779186555582223, "learning_rate": 3.4701472361682307e-07, "loss": 11.726, "step": 35783 }, { "epoch": 1.9485818137264912, "grad_norm": 0.567181339386298, "learning_rate": 3.462811134356869e-07, "loss": 11.8276, "step": 35784 }, { "epoch": 1.9486362677230742, "grad_norm": 0.5817138107091715, "learning_rate": 3.455482781785868e-07, "loss": 11.7262, "step": 35785 }, { "epoch": 1.9486907217196572, "grad_norm": 0.5178025546351565, "learning_rate": 3.448162178512071e-07, "loss": 11.7248, "step": 35786 }, { "epoch": 1.9487451757162402, "grad_norm": 0.6111691085854609, "learning_rate": 3.440849324592543e-07, "loss": 11.7132, "step": 35787 }, { "epoch": 1.9487996297128234, "grad_norm": 0.4830768142712869, "learning_rate": 3.4335442200840173e-07, "loss": 11.6678, "step": 35788 }, { "epoch": 1.9488540837094064, "grad_norm": 0.5205172880075745, "learning_rate": 3.4262468650434474e-07, "loss": 11.7703, "step": 35789 }, { "epoch": 1.9489085377059894, "grad_norm": 0.5361165745380371, "learning_rate": 3.4189572595274553e-07, "loss": 11.7288, "step": 35790 }, { "epoch": 1.9489629917025724, "grad_norm": 0.5943033370828161, "learning_rate": 3.4116754035928845e-07, "loss": 11.7242, "step": 35791 }, { "epoch": 1.9490174456991554, "grad_norm": 0.5666927184201768, "learning_rate": 3.4044012972961336e-07, "loss": 11.771, "step": 35792 }, { "epoch": 1.9490718996957384, "grad_norm": 0.5402614924496174, "learning_rate": 3.397134940693936e-07, "loss": 11.8572, "step": 35793 }, { "epoch": 1.9491263536923213, "grad_norm": 0.5913378435730663, "learning_rate": 3.38987633384269e-07, "loss": 11.766, "step": 35794 }, { "epoch": 1.9491808076889043, "grad_norm": 0.5183274891548123, "learning_rate": 3.3826254767990174e-07, "loss": 11.8697, "step": 35795 }, { "epoch": 1.9492352616854873, "grad_norm": 0.5579197662683869, "learning_rate": 3.3753823696190956e-07, "loss": 11.7581, "step": 35796 }, { "epoch": 1.9492897156820703, "grad_norm": 0.541985234741951, "learning_rate": 3.368147012359324e-07, "loss": 11.7521, "step": 35797 }, { "epoch": 1.9493441696786533, "grad_norm": 0.565821080166775, "learning_rate": 3.360919405075991e-07, "loss": 11.7639, "step": 35798 }, { "epoch": 1.9493986236752363, "grad_norm": 0.590903133530086, "learning_rate": 3.353699547825273e-07, "loss": 11.6631, "step": 35799 }, { "epoch": 1.9494530776718193, "grad_norm": 0.5516842409895033, "learning_rate": 3.3464874406634597e-07, "loss": 11.8536, "step": 35800 }, { "epoch": 1.9495075316684023, "grad_norm": 0.5342780122454649, "learning_rate": 3.339283083646283e-07, "loss": 11.7583, "step": 35801 }, { "epoch": 1.9495619856649853, "grad_norm": 0.4950279828894907, "learning_rate": 3.332086476830143e-07, "loss": 11.6153, "step": 35802 }, { "epoch": 1.9496164396615683, "grad_norm": 0.5697677897162219, "learning_rate": 3.324897620270773e-07, "loss": 11.7594, "step": 35803 }, { "epoch": 1.9496708936581513, "grad_norm": 0.5429610345061743, "learning_rate": 3.317716514024127e-07, "loss": 11.7725, "step": 35804 }, { "epoch": 1.9497253476547345, "grad_norm": 0.5510586216933023, "learning_rate": 3.3105431581461623e-07, "loss": 11.9017, "step": 35805 }, { "epoch": 1.9497798016513175, "grad_norm": 0.5190511466084771, "learning_rate": 3.3033775526923883e-07, "loss": 11.739, "step": 35806 }, { "epoch": 1.9498342556479005, "grad_norm": 0.5364381969782219, "learning_rate": 3.296219697718872e-07, "loss": 11.8013, "step": 35807 }, { "epoch": 1.9498887096444835, "grad_norm": 0.5947973922439259, "learning_rate": 3.289069593281013e-07, "loss": 11.719, "step": 35808 }, { "epoch": 1.9499431636410665, "grad_norm": 0.5521077995128394, "learning_rate": 3.2819272394344346e-07, "loss": 11.7002, "step": 35809 }, { "epoch": 1.9499976176376494, "grad_norm": 0.5764012215332128, "learning_rate": 3.274792636234869e-07, "loss": 11.8677, "step": 35810 }, { "epoch": 1.9500520716342327, "grad_norm": 0.5578297352939146, "learning_rate": 3.267665783737606e-07, "loss": 11.9374, "step": 35811 }, { "epoch": 1.9501065256308157, "grad_norm": 0.5322309769436364, "learning_rate": 3.260546681998156e-07, "loss": 11.7566, "step": 35812 }, { "epoch": 1.9501609796273986, "grad_norm": 0.5433238958891174, "learning_rate": 3.253435331071808e-07, "loss": 11.7498, "step": 35813 }, { "epoch": 1.9502154336239816, "grad_norm": 0.48925847983033494, "learning_rate": 3.2463317310138517e-07, "loss": 11.7488, "step": 35814 }, { "epoch": 1.9502698876205646, "grad_norm": 0.5589126774225989, "learning_rate": 3.2392358818796873e-07, "loss": 11.8701, "step": 35815 }, { "epoch": 1.9503243416171476, "grad_norm": 0.5709214017534532, "learning_rate": 3.2321477837242706e-07, "loss": 11.7667, "step": 35816 }, { "epoch": 1.9503787956137306, "grad_norm": 0.5940625147770527, "learning_rate": 3.225067436603002e-07, "loss": 11.7525, "step": 35817 }, { "epoch": 1.9504332496103136, "grad_norm": 0.4917298206686435, "learning_rate": 3.217994840570615e-07, "loss": 11.7953, "step": 35818 }, { "epoch": 1.9504877036068966, "grad_norm": 0.5999062352893816, "learning_rate": 3.210929995682288e-07, "loss": 11.7939, "step": 35819 }, { "epoch": 1.9505421576034796, "grad_norm": 0.4887759734218869, "learning_rate": 3.203872901992977e-07, "loss": 11.7573, "step": 35820 }, { "epoch": 1.9505966116000626, "grad_norm": 0.5424081437122078, "learning_rate": 3.1968235595574156e-07, "loss": 11.7486, "step": 35821 }, { "epoch": 1.9506510655966456, "grad_norm": 0.5817441787724815, "learning_rate": 3.1897819684305606e-07, "loss": 11.8727, "step": 35822 }, { "epoch": 1.9507055195932286, "grad_norm": 0.5180513380536177, "learning_rate": 3.1827481286671454e-07, "loss": 11.7364, "step": 35823 }, { "epoch": 1.9507599735898116, "grad_norm": 0.49946414602183087, "learning_rate": 3.1757220403219043e-07, "loss": 11.8093, "step": 35824 }, { "epoch": 1.9508144275863946, "grad_norm": 0.5077580998679988, "learning_rate": 3.168703703449349e-07, "loss": 11.7523, "step": 35825 }, { "epoch": 1.9508688815829776, "grad_norm": 0.5229095469697806, "learning_rate": 3.161693118104103e-07, "loss": 11.7871, "step": 35826 }, { "epoch": 1.9509233355795605, "grad_norm": 0.5624545584192372, "learning_rate": 3.154690284340789e-07, "loss": 11.8014, "step": 35827 }, { "epoch": 1.9509777895761435, "grad_norm": 0.6411604832075658, "learning_rate": 3.1476952022136964e-07, "loss": 11.8469, "step": 35828 }, { "epoch": 1.9510322435727268, "grad_norm": 0.5604763411659524, "learning_rate": 3.1407078717773377e-07, "loss": 11.7794, "step": 35829 }, { "epoch": 1.9510866975693097, "grad_norm": 0.518215737932677, "learning_rate": 3.1337282930860025e-07, "loss": 11.7237, "step": 35830 }, { "epoch": 1.9511411515658927, "grad_norm": 0.5534885203746889, "learning_rate": 3.1267564661938697e-07, "loss": 11.7628, "step": 35831 }, { "epoch": 1.9511956055624757, "grad_norm": 0.6007370207747913, "learning_rate": 3.119792391155341e-07, "loss": 11.8422, "step": 35832 }, { "epoch": 1.9512500595590587, "grad_norm": 0.5412034443034113, "learning_rate": 3.112836068024483e-07, "loss": 11.8291, "step": 35833 }, { "epoch": 1.951304513555642, "grad_norm": 0.5023854244170816, "learning_rate": 3.1058874968554754e-07, "loss": 11.675, "step": 35834 }, { "epoch": 1.951358967552225, "grad_norm": 0.5998728340734277, "learning_rate": 3.0989466777021634e-07, "loss": 11.8802, "step": 35835 }, { "epoch": 1.951413421548808, "grad_norm": 0.5233710975280607, "learning_rate": 3.0920136106186157e-07, "loss": 11.7182, "step": 35836 }, { "epoch": 1.951467875545391, "grad_norm": 0.5886865846789391, "learning_rate": 3.085088295658789e-07, "loss": 11.8063, "step": 35837 }, { "epoch": 1.951522329541974, "grad_norm": 0.5820195424925961, "learning_rate": 3.0781707328765285e-07, "loss": 11.7188, "step": 35838 }, { "epoch": 1.951576783538557, "grad_norm": 0.5645678801383038, "learning_rate": 3.0712609223255696e-07, "loss": 11.7586, "step": 35839 }, { "epoch": 1.95163123753514, "grad_norm": 0.6150839369493637, "learning_rate": 3.0643588640596467e-07, "loss": 11.8566, "step": 35840 }, { "epoch": 1.9516856915317229, "grad_norm": 0.5641827692573567, "learning_rate": 3.0574645581323834e-07, "loss": 11.6754, "step": 35841 }, { "epoch": 1.9517401455283059, "grad_norm": 0.629641290932581, "learning_rate": 3.050578004597626e-07, "loss": 11.6847, "step": 35842 }, { "epoch": 1.9517945995248889, "grad_norm": 0.5240048791950138, "learning_rate": 3.0436992035086656e-07, "loss": 11.8017, "step": 35843 }, { "epoch": 1.9518490535214719, "grad_norm": 0.5207401473257164, "learning_rate": 3.0368281549191245e-07, "loss": 11.6369, "step": 35844 }, { "epoch": 1.9519035075180549, "grad_norm": 0.5680741577814936, "learning_rate": 3.0299648588822946e-07, "loss": 11.8314, "step": 35845 }, { "epoch": 1.9519579615146379, "grad_norm": 0.5415864232828316, "learning_rate": 3.0231093154518e-07, "loss": 11.8047, "step": 35846 }, { "epoch": 1.9520124155112208, "grad_norm": 0.5483960674646408, "learning_rate": 3.016261524680708e-07, "loss": 11.8152, "step": 35847 }, { "epoch": 1.9520668695078038, "grad_norm": 0.5239235695206559, "learning_rate": 3.009421486622421e-07, "loss": 11.8147, "step": 35848 }, { "epoch": 1.9521213235043868, "grad_norm": 0.6451641908611242, "learning_rate": 3.002589201330008e-07, "loss": 11.765, "step": 35849 }, { "epoch": 1.9521757775009698, "grad_norm": 0.5243196034409832, "learning_rate": 2.995764668856649e-07, "loss": 11.8682, "step": 35850 }, { "epoch": 1.9522302314975528, "grad_norm": 0.562819873362613, "learning_rate": 2.988947889255522e-07, "loss": 11.8393, "step": 35851 }, { "epoch": 1.952284685494136, "grad_norm": 0.5234802610463309, "learning_rate": 2.9821388625794756e-07, "loss": 11.7525, "step": 35852 }, { "epoch": 1.952339139490719, "grad_norm": 0.5223475941794641, "learning_rate": 2.975337588881466e-07, "loss": 11.7858, "step": 35853 }, { "epoch": 1.952393593487302, "grad_norm": 0.5188269331431005, "learning_rate": 2.9685440682144515e-07, "loss": 11.8606, "step": 35854 }, { "epoch": 1.952448047483885, "grad_norm": 0.5103479857297905, "learning_rate": 2.961758300631279e-07, "loss": 11.6701, "step": 35855 }, { "epoch": 1.952502501480468, "grad_norm": 0.5454324703986294, "learning_rate": 2.954980286184683e-07, "loss": 11.8446, "step": 35856 }, { "epoch": 1.952556955477051, "grad_norm": 0.5400136085861927, "learning_rate": 2.9482100249274005e-07, "loss": 11.7596, "step": 35857 }, { "epoch": 1.9526114094736342, "grad_norm": 0.5282519667995272, "learning_rate": 2.941447516911944e-07, "loss": 11.8004, "step": 35858 }, { "epoch": 1.9526658634702172, "grad_norm": 0.5545893747726753, "learning_rate": 2.9346927621910494e-07, "loss": 11.553, "step": 35859 }, { "epoch": 1.9527203174668002, "grad_norm": 0.5537199705247947, "learning_rate": 2.927945760817119e-07, "loss": 11.7516, "step": 35860 }, { "epoch": 1.9527747714633832, "grad_norm": 0.5631832176629185, "learning_rate": 2.921206512842778e-07, "loss": 11.8641, "step": 35861 }, { "epoch": 1.9528292254599662, "grad_norm": 0.5360749020353921, "learning_rate": 2.9144750183203175e-07, "loss": 11.624, "step": 35862 }, { "epoch": 1.9528836794565492, "grad_norm": 0.5334187994037874, "learning_rate": 2.90775127730214e-07, "loss": 11.9112, "step": 35863 }, { "epoch": 1.9529381334531322, "grad_norm": 0.5445344224613425, "learning_rate": 2.901035289840537e-07, "loss": 11.8131, "step": 35864 }, { "epoch": 1.9529925874497152, "grad_norm": 0.5487306338296999, "learning_rate": 2.894327055987578e-07, "loss": 11.6989, "step": 35865 }, { "epoch": 1.9530470414462981, "grad_norm": 0.4904048312942676, "learning_rate": 2.887626575795666e-07, "loss": 11.6938, "step": 35866 }, { "epoch": 1.9531014954428811, "grad_norm": 0.5223437638123665, "learning_rate": 2.880933849316647e-07, "loss": 11.7829, "step": 35867 }, { "epoch": 1.9531559494394641, "grad_norm": 0.5653797954884884, "learning_rate": 2.874248876602814e-07, "loss": 11.6763, "step": 35868 }, { "epoch": 1.9532104034360471, "grad_norm": 0.5670962305740367, "learning_rate": 2.867571657706014e-07, "loss": 11.6804, "step": 35869 }, { "epoch": 1.9532648574326301, "grad_norm": 0.5361234131465358, "learning_rate": 2.8609021926782053e-07, "loss": 11.8379, "step": 35870 }, { "epoch": 1.9533193114292131, "grad_norm": 0.5745048326946705, "learning_rate": 2.8542404815712354e-07, "loss": 11.722, "step": 35871 }, { "epoch": 1.953373765425796, "grad_norm": 0.5558435101449316, "learning_rate": 2.847586524436951e-07, "loss": 11.8554, "step": 35872 }, { "epoch": 1.953428219422379, "grad_norm": 0.5234960261363741, "learning_rate": 2.8409403213269794e-07, "loss": 11.692, "step": 35873 }, { "epoch": 1.953482673418962, "grad_norm": 0.5513330073098779, "learning_rate": 2.8343018722930546e-07, "loss": 11.875, "step": 35874 }, { "epoch": 1.9535371274155453, "grad_norm": 0.5461841410829567, "learning_rate": 2.8276711773869146e-07, "loss": 11.871, "step": 35875 }, { "epoch": 1.9535915814121283, "grad_norm": 0.5587509588084834, "learning_rate": 2.821048236659962e-07, "loss": 11.7609, "step": 35876 }, { "epoch": 1.9536460354087113, "grad_norm": 0.5564115773370264, "learning_rate": 2.814433050163823e-07, "loss": 11.8441, "step": 35877 }, { "epoch": 1.9537004894052943, "grad_norm": 0.5415376775011018, "learning_rate": 2.8078256179498995e-07, "loss": 11.7623, "step": 35878 }, { "epoch": 1.9537549434018773, "grad_norm": 0.5500602009273343, "learning_rate": 2.801225940069485e-07, "loss": 11.8713, "step": 35879 }, { "epoch": 1.9538093973984603, "grad_norm": 0.6269348301275441, "learning_rate": 2.7946340165739825e-07, "loss": 11.8026, "step": 35880 }, { "epoch": 1.9538638513950435, "grad_norm": 0.5750371656321503, "learning_rate": 2.788049847514684e-07, "loss": 11.8651, "step": 35881 }, { "epoch": 1.9539183053916265, "grad_norm": 0.5823462549100343, "learning_rate": 2.7814734329426606e-07, "loss": 11.7562, "step": 35882 }, { "epoch": 1.9539727593882095, "grad_norm": 0.6173978633885092, "learning_rate": 2.7749047729092036e-07, "loss": 11.7826, "step": 35883 }, { "epoch": 1.9540272133847925, "grad_norm": 0.5880719517248111, "learning_rate": 2.768343867465273e-07, "loss": 11.8653, "step": 35884 }, { "epoch": 1.9540816673813755, "grad_norm": 0.5758611817619522, "learning_rate": 2.7617907166619384e-07, "loss": 11.722, "step": 35885 }, { "epoch": 1.9541361213779584, "grad_norm": 0.5832879258558195, "learning_rate": 2.7552453205501595e-07, "loss": 11.7369, "step": 35886 }, { "epoch": 1.9541905753745414, "grad_norm": 0.5287119368124059, "learning_rate": 2.7487076791808954e-07, "loss": 11.8529, "step": 35887 }, { "epoch": 1.9542450293711244, "grad_norm": 0.5157165752512857, "learning_rate": 2.7421777926048833e-07, "loss": 11.8183, "step": 35888 }, { "epoch": 1.9542994833677074, "grad_norm": 0.4865903755511943, "learning_rate": 2.735655660872971e-07, "loss": 11.7977, "step": 35889 }, { "epoch": 1.9543539373642904, "grad_norm": 0.540523073617327, "learning_rate": 2.7291412840357855e-07, "loss": 11.7901, "step": 35890 }, { "epoch": 1.9544083913608734, "grad_norm": 0.5487497903139256, "learning_rate": 2.7226346621440633e-07, "loss": 11.6646, "step": 35891 }, { "epoch": 1.9544628453574564, "grad_norm": 0.5891033440618993, "learning_rate": 2.7161357952483204e-07, "loss": 11.7166, "step": 35892 }, { "epoch": 1.9545172993540394, "grad_norm": 0.5863427885907998, "learning_rate": 2.709644683399182e-07, "loss": 11.8414, "step": 35893 }, { "epoch": 1.9545717533506224, "grad_norm": 0.5819569928162447, "learning_rate": 2.7031613266471636e-07, "loss": 11.7669, "step": 35894 }, { "epoch": 1.9546262073472054, "grad_norm": 0.5619965174082716, "learning_rate": 2.696685725042558e-07, "loss": 11.7991, "step": 35895 }, { "epoch": 1.9546806613437884, "grad_norm": 0.563162459752338, "learning_rate": 2.690217878635659e-07, "loss": 11.8986, "step": 35896 }, { "epoch": 1.9547351153403714, "grad_norm": 0.5085379508253828, "learning_rate": 2.6837577874769817e-07, "loss": 11.7301, "step": 35897 }, { "epoch": 1.9547895693369544, "grad_norm": 0.5520974546880524, "learning_rate": 2.6773054516167074e-07, "loss": 11.7723, "step": 35898 }, { "epoch": 1.9548440233335376, "grad_norm": 0.5518164002357201, "learning_rate": 2.670860871104908e-07, "loss": 11.7063, "step": 35899 }, { "epoch": 1.9548984773301206, "grad_norm": 0.6009453695692543, "learning_rate": 2.664424045991765e-07, "loss": 11.789, "step": 35900 }, { "epoch": 1.9549529313267036, "grad_norm": 0.5692161719869828, "learning_rate": 2.657994976327238e-07, "loss": 11.7154, "step": 35901 }, { "epoch": 1.9550073853232866, "grad_norm": 0.6287841057724544, "learning_rate": 2.6515736621615106e-07, "loss": 11.7546, "step": 35902 }, { "epoch": 1.9550618393198695, "grad_norm": 0.5583070790923353, "learning_rate": 2.6451601035443196e-07, "loss": 11.8544, "step": 35903 }, { "epoch": 1.9551162933164528, "grad_norm": 0.507130766576949, "learning_rate": 2.638754300525625e-07, "loss": 11.6692, "step": 35904 }, { "epoch": 1.9551707473130358, "grad_norm": 0.6557667819767078, "learning_rate": 2.632356253155277e-07, "loss": 11.7887, "step": 35905 }, { "epoch": 1.9552252013096187, "grad_norm": 0.5321084664675033, "learning_rate": 2.6259659614829013e-07, "loss": 11.6028, "step": 35906 }, { "epoch": 1.9552796553062017, "grad_norm": 0.5084910653859216, "learning_rate": 2.6195834255583474e-07, "loss": 11.7913, "step": 35907 }, { "epoch": 1.9553341093027847, "grad_norm": 0.5389367268452145, "learning_rate": 2.613208645431242e-07, "loss": 11.8112, "step": 35908 }, { "epoch": 1.9553885632993677, "grad_norm": 0.5369992545014218, "learning_rate": 2.6068416211509906e-07, "loss": 11.8744, "step": 35909 }, { "epoch": 1.9554430172959507, "grad_norm": 0.5785829195847547, "learning_rate": 2.6004823527672196e-07, "loss": 11.79, "step": 35910 }, { "epoch": 1.9554974712925337, "grad_norm": 0.6076617351802577, "learning_rate": 2.594130840329334e-07, "loss": 11.7503, "step": 35911 }, { "epoch": 1.9555519252891167, "grad_norm": 0.5304492212541593, "learning_rate": 2.587787083886739e-07, "loss": 11.7637, "step": 35912 }, { "epoch": 1.9556063792856997, "grad_norm": 0.5155447374843087, "learning_rate": 2.5814510834888393e-07, "loss": 11.7147, "step": 35913 }, { "epoch": 1.9556608332822827, "grad_norm": 0.5760552732945751, "learning_rate": 2.575122839184818e-07, "loss": 11.8351, "step": 35914 }, { "epoch": 1.9557152872788657, "grad_norm": 0.6113632142587743, "learning_rate": 2.568802351023858e-07, "loss": 11.8025, "step": 35915 }, { "epoch": 1.9557697412754487, "grad_norm": 0.5589133512186288, "learning_rate": 2.5624896190552526e-07, "loss": 11.79, "step": 35916 }, { "epoch": 1.9558241952720317, "grad_norm": 0.5391823032797487, "learning_rate": 2.5561846433279633e-07, "loss": 11.7124, "step": 35917 }, { "epoch": 1.9558786492686147, "grad_norm": 0.5836982543128165, "learning_rate": 2.549887423891062e-07, "loss": 11.6308, "step": 35918 }, { "epoch": 1.9559331032651976, "grad_norm": 0.6049656136525037, "learning_rate": 2.543597960793509e-07, "loss": 11.846, "step": 35919 }, { "epoch": 1.9559875572617806, "grad_norm": 0.5440007353911324, "learning_rate": 2.5373162540841547e-07, "loss": 11.6707, "step": 35920 }, { "epoch": 1.9560420112583636, "grad_norm": 0.5871422008088131, "learning_rate": 2.531042303811959e-07, "loss": 11.6263, "step": 35921 }, { "epoch": 1.9560964652549468, "grad_norm": 0.5726808866474816, "learning_rate": 2.5247761100256615e-07, "loss": 11.7663, "step": 35922 }, { "epoch": 1.9561509192515298, "grad_norm": 0.5255182273916505, "learning_rate": 2.51851767277389e-07, "loss": 11.695, "step": 35923 }, { "epoch": 1.9562053732481128, "grad_norm": 0.7316910928448936, "learning_rate": 2.512266992105494e-07, "loss": 11.8428, "step": 35924 }, { "epoch": 1.9562598272446958, "grad_norm": 0.5466159138712406, "learning_rate": 2.5060240680689907e-07, "loss": 11.7252, "step": 35925 }, { "epoch": 1.9563142812412788, "grad_norm": 0.5120301825262785, "learning_rate": 2.499788900712896e-07, "loss": 11.8882, "step": 35926 }, { "epoch": 1.9563687352378618, "grad_norm": 0.5531173590274187, "learning_rate": 2.493561490085727e-07, "loss": 11.7761, "step": 35927 }, { "epoch": 1.956423189234445, "grad_norm": 0.6035921506793719, "learning_rate": 2.48734183623589e-07, "loss": 11.7222, "step": 35928 }, { "epoch": 1.956477643231028, "grad_norm": 0.5275334665423085, "learning_rate": 2.4811299392117906e-07, "loss": 11.8672, "step": 35929 }, { "epoch": 1.956532097227611, "grad_norm": 0.5079229798744881, "learning_rate": 2.4749257990617224e-07, "loss": 11.7034, "step": 35930 }, { "epoch": 1.956586551224194, "grad_norm": 0.5632949809556068, "learning_rate": 2.46872941583387e-07, "loss": 11.7346, "step": 35931 }, { "epoch": 1.956641005220777, "grad_norm": 0.538298404356519, "learning_rate": 2.4625407895765285e-07, "loss": 11.7376, "step": 35932 }, { "epoch": 1.95669545921736, "grad_norm": 0.5876919351619007, "learning_rate": 2.4563599203376584e-07, "loss": 11.6859, "step": 35933 }, { "epoch": 1.956749913213943, "grad_norm": 0.556354689695551, "learning_rate": 2.450186808165555e-07, "loss": 11.7755, "step": 35934 }, { "epoch": 1.956804367210526, "grad_norm": 0.5260272536635366, "learning_rate": 2.4440214531079586e-07, "loss": 11.7413, "step": 35935 }, { "epoch": 1.956858821207109, "grad_norm": 0.5286884270683888, "learning_rate": 2.4378638552129404e-07, "loss": 11.8724, "step": 35936 }, { "epoch": 1.956913275203692, "grad_norm": 0.5494134903867526, "learning_rate": 2.4317140145284634e-07, "loss": 11.7719, "step": 35937 }, { "epoch": 1.956967729200275, "grad_norm": 0.5925043697881837, "learning_rate": 2.425571931102266e-07, "loss": 11.6398, "step": 35938 }, { "epoch": 1.957022183196858, "grad_norm": 0.5943765468098076, "learning_rate": 2.419437604982089e-07, "loss": 11.9939, "step": 35939 }, { "epoch": 1.957076637193441, "grad_norm": 0.5535055704856124, "learning_rate": 2.4133110362156705e-07, "loss": 11.8611, "step": 35940 }, { "epoch": 1.957131091190024, "grad_norm": 0.5040609159798455, "learning_rate": 2.4071922248506404e-07, "loss": 11.6314, "step": 35941 }, { "epoch": 1.957185545186607, "grad_norm": 0.5475128848344688, "learning_rate": 2.401081170934627e-07, "loss": 11.7481, "step": 35942 }, { "epoch": 1.95723999918319, "grad_norm": 0.4946371589393999, "learning_rate": 2.394977874515036e-07, "loss": 11.7724, "step": 35943 }, { "epoch": 1.957294453179773, "grad_norm": 0.5377785856912255, "learning_rate": 2.3888823356393864e-07, "loss": 11.7273, "step": 35944 }, { "epoch": 1.9573489071763561, "grad_norm": 0.5307371097819616, "learning_rate": 2.3827945543551943e-07, "loss": 11.5581, "step": 35945 }, { "epoch": 1.9574033611729391, "grad_norm": 0.5651747604016063, "learning_rate": 2.376714530709534e-07, "loss": 11.8479, "step": 35946 }, { "epoch": 1.957457815169522, "grad_norm": 0.5283998511994507, "learning_rate": 2.3706422647499226e-07, "loss": 11.7311, "step": 35947 }, { "epoch": 1.957512269166105, "grad_norm": 0.5407110930783279, "learning_rate": 2.3645777565235448e-07, "loss": 11.639, "step": 35948 }, { "epoch": 1.957566723162688, "grad_norm": 0.5639319181617664, "learning_rate": 2.3585210060774742e-07, "loss": 11.7106, "step": 35949 }, { "epoch": 1.957621177159271, "grad_norm": 0.505142217294847, "learning_rate": 2.3524720134587842e-07, "loss": 11.6646, "step": 35950 }, { "epoch": 1.9576756311558543, "grad_norm": 0.5746314781996184, "learning_rate": 2.3464307787146588e-07, "loss": 11.8535, "step": 35951 }, { "epoch": 1.9577300851524373, "grad_norm": 0.5174578720733319, "learning_rate": 2.34039730189195e-07, "loss": 11.7564, "step": 35952 }, { "epoch": 1.9577845391490203, "grad_norm": 0.5251376682898412, "learning_rate": 2.33437158303762e-07, "loss": 11.7435, "step": 35953 }, { "epoch": 1.9578389931456033, "grad_norm": 0.5353396839108149, "learning_rate": 2.3283536221986312e-07, "loss": 11.7581, "step": 35954 }, { "epoch": 1.9578934471421863, "grad_norm": 0.5340541763313547, "learning_rate": 2.322343419421502e-07, "loss": 11.7559, "step": 35955 }, { "epoch": 1.9579479011387693, "grad_norm": 0.5204118864493482, "learning_rate": 2.316340974753306e-07, "loss": 11.8139, "step": 35956 }, { "epoch": 1.9580023551353523, "grad_norm": 0.5972537787744839, "learning_rate": 2.3103462882404503e-07, "loss": 11.7654, "step": 35957 }, { "epoch": 1.9580568091319352, "grad_norm": 0.505470952708302, "learning_rate": 2.3043593599296753e-07, "loss": 11.7382, "step": 35958 }, { "epoch": 1.9581112631285182, "grad_norm": 0.517484750902238, "learning_rate": 2.2983801898674996e-07, "loss": 11.7658, "step": 35959 }, { "epoch": 1.9581657171251012, "grad_norm": 0.6277265276756603, "learning_rate": 2.2924087781004412e-07, "loss": 11.8407, "step": 35960 }, { "epoch": 1.9582201711216842, "grad_norm": 0.5175984169503505, "learning_rate": 2.2864451246749074e-07, "loss": 11.6994, "step": 35961 }, { "epoch": 1.9582746251182672, "grad_norm": 0.5292819777409135, "learning_rate": 2.280489229637417e-07, "loss": 11.7636, "step": 35962 }, { "epoch": 1.9583290791148502, "grad_norm": 0.5432526409607965, "learning_rate": 2.274541093034044e-07, "loss": 11.8115, "step": 35963 }, { "epoch": 1.9583835331114332, "grad_norm": 0.5721332638844125, "learning_rate": 2.2686007149111955e-07, "loss": 11.7229, "step": 35964 }, { "epoch": 1.9584379871080162, "grad_norm": 0.5306818464954678, "learning_rate": 2.2626680953149459e-07, "loss": 11.6406, "step": 35965 }, { "epoch": 1.9584924411045992, "grad_norm": 0.5303782181200513, "learning_rate": 2.2567432342915916e-07, "loss": 11.8844, "step": 35966 }, { "epoch": 1.9585468951011822, "grad_norm": 0.5247701926028948, "learning_rate": 2.250826131887096e-07, "loss": 11.8682, "step": 35967 }, { "epoch": 1.9586013490977654, "grad_norm": 0.4854950431216873, "learning_rate": 2.244916788147533e-07, "loss": 11.7764, "step": 35968 }, { "epoch": 1.9586558030943484, "grad_norm": 0.5879777289575042, "learning_rate": 2.239015203118866e-07, "loss": 11.8634, "step": 35969 }, { "epoch": 1.9587102570909314, "grad_norm": 0.5213985472647493, "learning_rate": 2.2331213768468363e-07, "loss": 11.8124, "step": 35970 }, { "epoch": 1.9587647110875144, "grad_norm": 0.5562151313233744, "learning_rate": 2.2272353093774067e-07, "loss": 11.8171, "step": 35971 }, { "epoch": 1.9588191650840974, "grad_norm": 0.5000176824145144, "learning_rate": 2.2213570007563188e-07, "loss": 11.7175, "step": 35972 }, { "epoch": 1.9588736190806804, "grad_norm": 0.5177771222958579, "learning_rate": 2.215486451029314e-07, "loss": 11.821, "step": 35973 }, { "epoch": 1.9589280730772636, "grad_norm": 0.5053136477219986, "learning_rate": 2.2096236602420216e-07, "loss": 11.8311, "step": 35974 }, { "epoch": 1.9589825270738466, "grad_norm": 0.5547501316099366, "learning_rate": 2.2037686284399617e-07, "loss": 11.744, "step": 35975 }, { "epoch": 1.9590369810704296, "grad_norm": 0.5583075796416789, "learning_rate": 2.197921355668875e-07, "loss": 11.7493, "step": 35976 }, { "epoch": 1.9590914350670126, "grad_norm": 0.4865546407423326, "learning_rate": 2.1920818419739475e-07, "loss": 11.8093, "step": 35977 }, { "epoch": 1.9591458890635955, "grad_norm": 0.5159941282380012, "learning_rate": 2.1862500874008097e-07, "loss": 11.845, "step": 35978 }, { "epoch": 1.9592003430601785, "grad_norm": 0.5738069207992845, "learning_rate": 2.1804260919946472e-07, "loss": 11.7668, "step": 35979 }, { "epoch": 1.9592547970567615, "grad_norm": 0.544092997987775, "learning_rate": 2.1746098558008688e-07, "loss": 11.8062, "step": 35980 }, { "epoch": 1.9593092510533445, "grad_norm": 0.5430659754606114, "learning_rate": 2.16880137886466e-07, "loss": 11.7779, "step": 35981 }, { "epoch": 1.9593637050499275, "grad_norm": 0.5533043904573204, "learning_rate": 2.163000661231096e-07, "loss": 11.7627, "step": 35982 }, { "epoch": 1.9594181590465105, "grad_norm": 0.5491360911113741, "learning_rate": 2.157207702945474e-07, "loss": 11.755, "step": 35983 }, { "epoch": 1.9594726130430935, "grad_norm": 0.5939356269830557, "learning_rate": 2.151422504052758e-07, "loss": 11.8318, "step": 35984 }, { "epoch": 1.9595270670396765, "grad_norm": 0.5346205594886096, "learning_rate": 2.145645064597801e-07, "loss": 11.7513, "step": 35985 }, { "epoch": 1.9595815210362595, "grad_norm": 0.5220698824521048, "learning_rate": 2.139875384625789e-07, "loss": 11.7206, "step": 35986 }, { "epoch": 1.9596359750328425, "grad_norm": 0.5531759666057698, "learning_rate": 2.1341134641813532e-07, "loss": 11.7065, "step": 35987 }, { "epoch": 1.9596904290294255, "grad_norm": 0.5283570502275963, "learning_rate": 2.1283593033094572e-07, "loss": 11.8188, "step": 35988 }, { "epoch": 1.9597448830260085, "grad_norm": 0.5472719714085682, "learning_rate": 2.1226129020547324e-07, "loss": 11.6957, "step": 35989 }, { "epoch": 1.9597993370225915, "grad_norm": 0.5066518263393189, "learning_rate": 2.1168742604619206e-07, "loss": 11.7311, "step": 35990 }, { "epoch": 1.9598537910191745, "grad_norm": 0.5613462019930039, "learning_rate": 2.111143378575653e-07, "loss": 11.7848, "step": 35991 }, { "epoch": 1.9599082450157577, "grad_norm": 0.5879546238432012, "learning_rate": 2.105420256440449e-07, "loss": 11.8254, "step": 35992 }, { "epoch": 1.9599626990123407, "grad_norm": 0.5143005623587931, "learning_rate": 2.09970489410094e-07, "loss": 11.7918, "step": 35993 }, { "epoch": 1.9600171530089237, "grad_norm": 0.5088615201152062, "learning_rate": 2.0939972916014238e-07, "loss": 11.7431, "step": 35994 }, { "epoch": 1.9600716070055066, "grad_norm": 0.6261982315383645, "learning_rate": 2.0882974489863093e-07, "loss": 11.7243, "step": 35995 }, { "epoch": 1.9601260610020896, "grad_norm": 0.5667986517146926, "learning_rate": 2.0826053662998946e-07, "loss": 11.7451, "step": 35996 }, { "epoch": 1.9601805149986726, "grad_norm": 0.5579407408108145, "learning_rate": 2.0769210435865883e-07, "loss": 11.6502, "step": 35997 }, { "epoch": 1.9602349689952558, "grad_norm": 0.5572480299646217, "learning_rate": 2.0712444808904662e-07, "loss": 11.6107, "step": 35998 }, { "epoch": 1.9602894229918388, "grad_norm": 0.498678713069211, "learning_rate": 2.0655756782557157e-07, "loss": 11.8045, "step": 35999 }, { "epoch": 1.9603438769884218, "grad_norm": 0.521042250840242, "learning_rate": 2.059914635726301e-07, "loss": 11.7027, "step": 36000 }, { "epoch": 1.9603983309850048, "grad_norm": 0.5331566584946978, "learning_rate": 2.0542613533465204e-07, "loss": 11.7363, "step": 36001 }, { "epoch": 1.9604527849815878, "grad_norm": 0.5443088404153574, "learning_rate": 2.0486158311600057e-07, "loss": 11.7402, "step": 36002 }, { "epoch": 1.9605072389781708, "grad_norm": 0.5525041906124101, "learning_rate": 2.0429780692108325e-07, "loss": 11.7649, "step": 36003 }, { "epoch": 1.9605616929747538, "grad_norm": 0.6303177546887656, "learning_rate": 2.0373480675428546e-07, "loss": 11.7865, "step": 36004 }, { "epoch": 1.9606161469713368, "grad_norm": 0.5915968681274381, "learning_rate": 2.031725826199815e-07, "loss": 11.8998, "step": 36005 }, { "epoch": 1.9606706009679198, "grad_norm": 0.5011611063775976, "learning_rate": 2.0261113452254566e-07, "loss": 11.7123, "step": 36006 }, { "epoch": 1.9607250549645028, "grad_norm": 0.5570439052407872, "learning_rate": 2.0205046246634108e-07, "loss": 11.7016, "step": 36007 }, { "epoch": 1.9607795089610858, "grad_norm": 0.512534127629289, "learning_rate": 2.0149056645573094e-07, "loss": 11.788, "step": 36008 }, { "epoch": 1.9608339629576688, "grad_norm": 0.5184480782611407, "learning_rate": 2.0093144649505623e-07, "loss": 11.7903, "step": 36009 }, { "epoch": 1.9608884169542518, "grad_norm": 0.5857843005853354, "learning_rate": 2.0037310258868013e-07, "loss": 11.8534, "step": 36010 }, { "epoch": 1.9609428709508347, "grad_norm": 0.5111709976498129, "learning_rate": 1.998155347409436e-07, "loss": 11.5414, "step": 36011 }, { "epoch": 1.9609973249474177, "grad_norm": 0.5292095211027054, "learning_rate": 1.9925874295617652e-07, "loss": 11.732, "step": 36012 }, { "epoch": 1.9610517789440007, "grad_norm": 0.5856080790092227, "learning_rate": 1.9870272723870875e-07, "loss": 11.8398, "step": 36013 }, { "epoch": 1.9611062329405837, "grad_norm": 0.5033977054195551, "learning_rate": 1.9814748759285906e-07, "loss": 11.7954, "step": 36014 }, { "epoch": 1.961160686937167, "grad_norm": 0.577376962998737, "learning_rate": 1.9759302402295731e-07, "loss": 11.8011, "step": 36015 }, { "epoch": 1.96121514093375, "grad_norm": 0.5397605666328251, "learning_rate": 1.9703933653331118e-07, "loss": 11.757, "step": 36016 }, { "epoch": 1.961269594930333, "grad_norm": 0.5541654365163368, "learning_rate": 1.9648642512821724e-07, "loss": 11.7338, "step": 36017 }, { "epoch": 1.961324048926916, "grad_norm": 0.7217196531735636, "learning_rate": 1.9593428981198315e-07, "loss": 11.7926, "step": 36018 }, { "epoch": 1.961378502923499, "grad_norm": 0.519998290054018, "learning_rate": 1.9538293058890543e-07, "loss": 11.6498, "step": 36019 }, { "epoch": 1.961432956920082, "grad_norm": 0.5272176504867117, "learning_rate": 1.948323474632696e-07, "loss": 11.7032, "step": 36020 }, { "epoch": 1.9614874109166651, "grad_norm": 0.5099707324800663, "learning_rate": 1.9428254043935003e-07, "loss": 11.451, "step": 36021 }, { "epoch": 1.9615418649132481, "grad_norm": 0.5179851898636314, "learning_rate": 1.93733509521421e-07, "loss": 11.6959, "step": 36022 }, { "epoch": 1.961596318909831, "grad_norm": 0.5372911964255624, "learning_rate": 1.9318525471376802e-07, "loss": 11.8403, "step": 36023 }, { "epoch": 1.961650772906414, "grad_norm": 0.761199762720354, "learning_rate": 1.9263777602064327e-07, "loss": 11.727, "step": 36024 }, { "epoch": 1.961705226902997, "grad_norm": 0.4977141123013079, "learning_rate": 1.9209107344629885e-07, "loss": 11.7505, "step": 36025 }, { "epoch": 1.96175968089958, "grad_norm": 0.549922404983853, "learning_rate": 1.9154514699499805e-07, "loss": 11.7971, "step": 36026 }, { "epoch": 1.961814134896163, "grad_norm": 0.5653266048395228, "learning_rate": 1.909999966709819e-07, "loss": 11.8807, "step": 36027 }, { "epoch": 1.961868588892746, "grad_norm": 0.5776493133370724, "learning_rate": 1.9045562247848036e-07, "loss": 11.7873, "step": 36028 }, { "epoch": 1.961923042889329, "grad_norm": 0.5485644438477592, "learning_rate": 1.899120244217345e-07, "loss": 11.7357, "step": 36029 }, { "epoch": 1.961977496885912, "grad_norm": 0.5381699981451984, "learning_rate": 1.8936920250497425e-07, "loss": 11.6875, "step": 36030 }, { "epoch": 1.962031950882495, "grad_norm": 0.5204981639255041, "learning_rate": 1.8882715673241847e-07, "loss": 11.7527, "step": 36031 }, { "epoch": 1.962086404879078, "grad_norm": 0.4941649129436532, "learning_rate": 1.8828588710827488e-07, "loss": 11.7216, "step": 36032 }, { "epoch": 1.962140858875661, "grad_norm": 0.5787425359730036, "learning_rate": 1.8774539363676235e-07, "loss": 11.8042, "step": 36033 }, { "epoch": 1.962195312872244, "grad_norm": 0.5298592707443699, "learning_rate": 1.8720567632207753e-07, "loss": 11.7328, "step": 36034 }, { "epoch": 1.962249766868827, "grad_norm": 0.6048682113144075, "learning_rate": 1.8666673516842814e-07, "loss": 11.7806, "step": 36035 }, { "epoch": 1.96230422086541, "grad_norm": 0.5203259207628885, "learning_rate": 1.8612857017998864e-07, "loss": 11.7484, "step": 36036 }, { "epoch": 1.962358674861993, "grad_norm": 0.5203654124111285, "learning_rate": 1.8559118136095566e-07, "loss": 11.7989, "step": 36037 }, { "epoch": 1.9624131288585762, "grad_norm": 0.5574154978845504, "learning_rate": 1.8505456871550364e-07, "loss": 11.803, "step": 36038 }, { "epoch": 1.9624675828551592, "grad_norm": 0.5187730591400143, "learning_rate": 1.8451873224780703e-07, "loss": 11.7569, "step": 36039 }, { "epoch": 1.9625220368517422, "grad_norm": 0.5397345979656596, "learning_rate": 1.8398367196202914e-07, "loss": 11.5374, "step": 36040 }, { "epoch": 1.9625764908483252, "grad_norm": 0.5249696001010945, "learning_rate": 1.8344938786233334e-07, "loss": 11.8053, "step": 36041 }, { "epoch": 1.9626309448449082, "grad_norm": 0.6110236430723038, "learning_rate": 1.8291587995288295e-07, "loss": 11.8182, "step": 36042 }, { "epoch": 1.9626853988414912, "grad_norm": 0.5413015579929863, "learning_rate": 1.82383148237808e-07, "loss": 11.7702, "step": 36043 }, { "epoch": 1.9627398528380744, "grad_norm": 0.583288513988857, "learning_rate": 1.8185119272126072e-07, "loss": 11.714, "step": 36044 }, { "epoch": 1.9627943068346574, "grad_norm": 0.5334439277205439, "learning_rate": 1.8132001340739334e-07, "loss": 11.8311, "step": 36045 }, { "epoch": 1.9628487608312404, "grad_norm": 0.6313076412553729, "learning_rate": 1.8078961030030262e-07, "loss": 11.9019, "step": 36046 }, { "epoch": 1.9629032148278234, "grad_norm": 0.5802374704330805, "learning_rate": 1.8025998340414075e-07, "loss": 11.7941, "step": 36047 }, { "epoch": 1.9629576688244064, "grad_norm": 0.5530229621499704, "learning_rate": 1.7973113272301556e-07, "loss": 11.7996, "step": 36048 }, { "epoch": 1.9630121228209894, "grad_norm": 0.611200710106464, "learning_rate": 1.79203058261046e-07, "loss": 11.7493, "step": 36049 }, { "epoch": 1.9630665768175724, "grad_norm": 0.5538728300789155, "learning_rate": 1.7867576002232878e-07, "loss": 11.8603, "step": 36050 }, { "epoch": 1.9631210308141553, "grad_norm": 0.5902849805325706, "learning_rate": 1.781492380109606e-07, "loss": 11.7395, "step": 36051 }, { "epoch": 1.9631754848107383, "grad_norm": 0.5172885994447218, "learning_rate": 1.7762349223106046e-07, "loss": 11.7958, "step": 36052 }, { "epoch": 1.9632299388073213, "grad_norm": 0.5611427830043566, "learning_rate": 1.770985226866806e-07, "loss": 11.7454, "step": 36053 }, { "epoch": 1.9632843928039043, "grad_norm": 0.5455502001926145, "learning_rate": 1.7657432938193997e-07, "loss": 11.8378, "step": 36054 }, { "epoch": 1.9633388468004873, "grad_norm": 0.5278946672983367, "learning_rate": 1.7605091232089088e-07, "loss": 11.6538, "step": 36055 }, { "epoch": 1.9633933007970703, "grad_norm": 0.6774585307507369, "learning_rate": 1.7552827150760788e-07, "loss": 11.9573, "step": 36056 }, { "epoch": 1.9634477547936533, "grad_norm": 0.5560376521290947, "learning_rate": 1.7500640694616544e-07, "loss": 11.8007, "step": 36057 }, { "epoch": 1.9635022087902363, "grad_norm": 0.5959644389597665, "learning_rate": 1.744853186406048e-07, "loss": 11.8268, "step": 36058 }, { "epoch": 1.9635566627868193, "grad_norm": 0.5390154979311883, "learning_rate": 1.7396500659498938e-07, "loss": 11.7693, "step": 36059 }, { "epoch": 1.9636111167834023, "grad_norm": 0.5274492519851942, "learning_rate": 1.7344547081337147e-07, "loss": 11.7415, "step": 36060 }, { "epoch": 1.9636655707799853, "grad_norm": 0.602824620451956, "learning_rate": 1.7292671129977013e-07, "loss": 11.7833, "step": 36061 }, { "epoch": 1.9637200247765685, "grad_norm": 0.5294160814401556, "learning_rate": 1.724087280582376e-07, "loss": 11.6379, "step": 36062 }, { "epoch": 1.9637744787731515, "grad_norm": 0.49065103439773283, "learning_rate": 1.7189152109280405e-07, "loss": 11.8398, "step": 36063 }, { "epoch": 1.9638289327697345, "grad_norm": 0.5378153877668171, "learning_rate": 1.7137509040746625e-07, "loss": 11.861, "step": 36064 }, { "epoch": 1.9638833867663175, "grad_norm": 0.520991498039108, "learning_rate": 1.7085943600626542e-07, "loss": 11.827, "step": 36065 }, { "epoch": 1.9639378407629005, "grad_norm": 0.5675381880773707, "learning_rate": 1.7034455789320947e-07, "loss": 11.8059, "step": 36066 }, { "epoch": 1.9639922947594837, "grad_norm": 0.48008876926079747, "learning_rate": 1.698304560722841e-07, "loss": 11.5584, "step": 36067 }, { "epoch": 1.9640467487560667, "grad_norm": 0.542396205604003, "learning_rate": 1.693171305475083e-07, "loss": 11.7639, "step": 36068 }, { "epoch": 1.9641012027526497, "grad_norm": 0.51188379760167, "learning_rate": 1.688045813228567e-07, "loss": 11.8636, "step": 36069 }, { "epoch": 1.9641556567492326, "grad_norm": 0.5727338721667895, "learning_rate": 1.682928084023261e-07, "loss": 11.7454, "step": 36070 }, { "epoch": 1.9642101107458156, "grad_norm": 0.5429382756620393, "learning_rate": 1.6778181178989106e-07, "loss": 11.7864, "step": 36071 }, { "epoch": 1.9642645647423986, "grad_norm": 0.6027145212717409, "learning_rate": 1.672715914895262e-07, "loss": 11.7084, "step": 36072 }, { "epoch": 1.9643190187389816, "grad_norm": 0.5824037743355904, "learning_rate": 1.6676214750520613e-07, "loss": 11.7379, "step": 36073 }, { "epoch": 1.9643734727355646, "grad_norm": 0.5365915612304775, "learning_rate": 1.6625347984088324e-07, "loss": 11.7134, "step": 36074 }, { "epoch": 1.9644279267321476, "grad_norm": 0.5111232224399088, "learning_rate": 1.657455885005099e-07, "loss": 11.6599, "step": 36075 }, { "epoch": 1.9644823807287306, "grad_norm": 0.5047051101211683, "learning_rate": 1.6523847348803856e-07, "loss": 11.8254, "step": 36076 }, { "epoch": 1.9645368347253136, "grad_norm": 0.5362279851786779, "learning_rate": 1.6473213480742156e-07, "loss": 11.7533, "step": 36077 }, { "epoch": 1.9645912887218966, "grad_norm": 0.6139211616223916, "learning_rate": 1.642265724625891e-07, "loss": 11.8627, "step": 36078 }, { "epoch": 1.9646457427184796, "grad_norm": 0.5461223989996234, "learning_rate": 1.6372178645747138e-07, "loss": 11.736, "step": 36079 }, { "epoch": 1.9647001967150626, "grad_norm": 0.5359178466362011, "learning_rate": 1.6321777679599858e-07, "loss": 11.8026, "step": 36080 }, { "epoch": 1.9647546507116456, "grad_norm": 0.560500157611614, "learning_rate": 1.627145434820898e-07, "loss": 11.7971, "step": 36081 }, { "epoch": 1.9648091047082286, "grad_norm": 0.516300969854972, "learning_rate": 1.6221208651964192e-07, "loss": 11.6479, "step": 36082 }, { "epoch": 1.9648635587048116, "grad_norm": 0.5579620078889198, "learning_rate": 1.6171040591258512e-07, "loss": 11.8091, "step": 36083 }, { "epoch": 1.9649180127013945, "grad_norm": 0.6173431760203307, "learning_rate": 1.612095016648163e-07, "loss": 11.7821, "step": 36084 }, { "epoch": 1.9649724666979778, "grad_norm": 1.0244614891651473, "learning_rate": 1.607093737802101e-07, "loss": 11.8587, "step": 36085 }, { "epoch": 1.9650269206945608, "grad_norm": 0.537696805089532, "learning_rate": 1.6021002226268567e-07, "loss": 11.7809, "step": 36086 }, { "epoch": 1.9650813746911437, "grad_norm": 0.5401388137255373, "learning_rate": 1.5971144711610654e-07, "loss": 11.7671, "step": 36087 }, { "epoch": 1.9651358286877267, "grad_norm": 0.49885934768199663, "learning_rate": 1.5921364834435847e-07, "loss": 11.7048, "step": 36088 }, { "epoch": 1.9651902826843097, "grad_norm": 0.5756231592654099, "learning_rate": 1.587166259513051e-07, "loss": 11.8952, "step": 36089 }, { "epoch": 1.9652447366808927, "grad_norm": 0.5280664174641819, "learning_rate": 1.5822037994080997e-07, "loss": 11.6791, "step": 36090 }, { "epoch": 1.965299190677476, "grad_norm": 0.5578361953831942, "learning_rate": 1.5772491031673665e-07, "loss": 11.7343, "step": 36091 }, { "epoch": 1.965353644674059, "grad_norm": 0.5191451354626264, "learning_rate": 1.572302170829376e-07, "loss": 11.7964, "step": 36092 }, { "epoch": 1.965408098670642, "grad_norm": 0.5843496016765073, "learning_rate": 1.5673630024326536e-07, "loss": 11.7899, "step": 36093 }, { "epoch": 1.965462552667225, "grad_norm": 0.5260539209834371, "learning_rate": 1.5624315980155014e-07, "loss": 11.6003, "step": 36094 }, { "epoch": 1.965517006663808, "grad_norm": 0.5505038763418566, "learning_rate": 1.5575079576164443e-07, "loss": 11.6972, "step": 36095 }, { "epoch": 1.965571460660391, "grad_norm": 0.5464706381830436, "learning_rate": 1.552592081273563e-07, "loss": 11.871, "step": 36096 }, { "epoch": 1.965625914656974, "grad_norm": 0.6060484170851027, "learning_rate": 1.5476839690250488e-07, "loss": 11.9239, "step": 36097 }, { "epoch": 1.9656803686535569, "grad_norm": 0.5348211016419099, "learning_rate": 1.542783620909316e-07, "loss": 11.7832, "step": 36098 }, { "epoch": 1.9657348226501399, "grad_norm": 0.527604278349885, "learning_rate": 1.537891036964334e-07, "loss": 11.8067, "step": 36099 }, { "epoch": 1.9657892766467229, "grad_norm": 0.545937571217918, "learning_rate": 1.5330062172281835e-07, "loss": 11.8276, "step": 36100 }, { "epoch": 1.9658437306433059, "grad_norm": 0.6159916248778422, "learning_rate": 1.5281291617387227e-07, "loss": 11.9024, "step": 36101 }, { "epoch": 1.9658981846398889, "grad_norm": 0.5393725227542722, "learning_rate": 1.5232598705340327e-07, "loss": 11.8396, "step": 36102 }, { "epoch": 1.9659526386364718, "grad_norm": 0.5850637917033171, "learning_rate": 1.5183983436519722e-07, "loss": 11.6428, "step": 36103 }, { "epoch": 1.9660070926330548, "grad_norm": 0.6049639957187175, "learning_rate": 1.5135445811301774e-07, "loss": 11.8286, "step": 36104 }, { "epoch": 1.9660615466296378, "grad_norm": 0.5391677320089677, "learning_rate": 1.508698583006507e-07, "loss": 11.6355, "step": 36105 }, { "epoch": 1.9661160006262208, "grad_norm": 0.5963141921636924, "learning_rate": 1.5038603493187087e-07, "loss": 11.8, "step": 36106 }, { "epoch": 1.9661704546228038, "grad_norm": 0.5607816036452667, "learning_rate": 1.4990298801043079e-07, "loss": 11.78, "step": 36107 }, { "epoch": 1.966224908619387, "grad_norm": 0.5467884372063281, "learning_rate": 1.4942071754009413e-07, "loss": 11.8496, "step": 36108 }, { "epoch": 1.96627936261597, "grad_norm": 0.5237521257677624, "learning_rate": 1.4893922352460233e-07, "loss": 11.8206, "step": 36109 }, { "epoch": 1.966333816612553, "grad_norm": 0.5031099102344975, "learning_rate": 1.4845850596770793e-07, "loss": 11.7422, "step": 36110 }, { "epoch": 1.966388270609136, "grad_norm": 0.5607691559665281, "learning_rate": 1.4797856487314132e-07, "loss": 11.8437, "step": 36111 }, { "epoch": 1.966442724605719, "grad_norm": 0.6055633202044425, "learning_rate": 1.4749940024464393e-07, "loss": 11.9146, "step": 36112 }, { "epoch": 1.966497178602302, "grad_norm": 0.5361187480751732, "learning_rate": 1.47021012085935e-07, "loss": 11.7409, "step": 36113 }, { "epoch": 1.9665516325988852, "grad_norm": 0.653688839131671, "learning_rate": 1.465434004007338e-07, "loss": 11.9156, "step": 36114 }, { "epoch": 1.9666060865954682, "grad_norm": 0.5149642296727405, "learning_rate": 1.4606656519275952e-07, "loss": 11.789, "step": 36115 }, { "epoch": 1.9666605405920512, "grad_norm": 0.5676475229880306, "learning_rate": 1.4559050646570925e-07, "loss": 11.8223, "step": 36116 }, { "epoch": 1.9667149945886342, "grad_norm": 0.5252849505699192, "learning_rate": 1.4511522422330227e-07, "loss": 11.7996, "step": 36117 }, { "epoch": 1.9667694485852172, "grad_norm": 0.5877817757785959, "learning_rate": 1.4464071846921333e-07, "loss": 11.7568, "step": 36118 }, { "epoch": 1.9668239025818002, "grad_norm": 0.5882393227807133, "learning_rate": 1.4416698920716177e-07, "loss": 11.8876, "step": 36119 }, { "epoch": 1.9668783565783832, "grad_norm": 0.5704054098512796, "learning_rate": 1.4369403644080014e-07, "loss": 11.7115, "step": 36120 }, { "epoch": 1.9669328105749662, "grad_norm": 0.5289613004063723, "learning_rate": 1.4322186017382554e-07, "loss": 11.9047, "step": 36121 }, { "epoch": 1.9669872645715492, "grad_norm": 0.5453688217064268, "learning_rate": 1.427504604098906e-07, "loss": 11.7507, "step": 36122 }, { "epoch": 1.9670417185681321, "grad_norm": 0.5198981806085811, "learning_rate": 1.4227983715269232e-07, "loss": 11.6551, "step": 36123 }, { "epoch": 1.9670961725647151, "grad_norm": 0.5253699755300153, "learning_rate": 1.4180999040586117e-07, "loss": 11.7519, "step": 36124 }, { "epoch": 1.9671506265612981, "grad_norm": 0.5393785358843216, "learning_rate": 1.413409201730609e-07, "loss": 11.859, "step": 36125 }, { "epoch": 1.9672050805578811, "grad_norm": 0.5558675984047506, "learning_rate": 1.408726264579441e-07, "loss": 11.7202, "step": 36126 }, { "epoch": 1.9672595345544641, "grad_norm": 0.5586216581087697, "learning_rate": 1.404051092641523e-07, "loss": 11.89, "step": 36127 }, { "epoch": 1.967313988551047, "grad_norm": 0.537080041467235, "learning_rate": 1.3993836859531596e-07, "loss": 11.5992, "step": 36128 }, { "epoch": 1.96736844254763, "grad_norm": 0.5485479140489351, "learning_rate": 1.3947240445505437e-07, "loss": 11.8157, "step": 36129 }, { "epoch": 1.967422896544213, "grad_norm": 0.5432455194118807, "learning_rate": 1.3900721684702023e-07, "loss": 11.6892, "step": 36130 }, { "epoch": 1.967477350540796, "grad_norm": 0.5711861910186576, "learning_rate": 1.385428057747995e-07, "loss": 11.8944, "step": 36131 }, { "epoch": 1.9675318045373793, "grad_norm": 0.5397806512059342, "learning_rate": 1.3807917124201154e-07, "loss": 11.7456, "step": 36132 }, { "epoch": 1.9675862585339623, "grad_norm": 0.5562135585852154, "learning_rate": 1.3761631325227564e-07, "loss": 11.8171, "step": 36133 }, { "epoch": 1.9676407125305453, "grad_norm": 0.5412331934578432, "learning_rate": 1.3715423180917786e-07, "loss": 11.8402, "step": 36134 }, { "epoch": 1.9676951665271283, "grad_norm": 0.4967746914567025, "learning_rate": 1.366929269163264e-07, "loss": 11.7487, "step": 36135 }, { "epoch": 1.9677496205237113, "grad_norm": 0.5395412386790123, "learning_rate": 1.362323985772851e-07, "loss": 11.7525, "step": 36136 }, { "epoch": 1.9678040745202945, "grad_norm": 0.6026132408976158, "learning_rate": 1.3577264679566214e-07, "loss": 11.8774, "step": 36137 }, { "epoch": 1.9678585285168775, "grad_norm": 0.5580241116103313, "learning_rate": 1.353136715749992e-07, "loss": 11.8646, "step": 36138 }, { "epoch": 1.9679129825134605, "grad_norm": 0.5345519822307682, "learning_rate": 1.3485547291890442e-07, "loss": 11.7214, "step": 36139 }, { "epoch": 1.9679674365100435, "grad_norm": 0.5376399763605038, "learning_rate": 1.3439805083090838e-07, "loss": 11.6297, "step": 36140 }, { "epoch": 1.9680218905066265, "grad_norm": 0.5098290078463686, "learning_rate": 1.3394140531458598e-07, "loss": 11.8686, "step": 36141 }, { "epoch": 1.9680763445032095, "grad_norm": 0.49553484030839434, "learning_rate": 1.3348553637347882e-07, "loss": 11.746, "step": 36142 }, { "epoch": 1.9681307984997924, "grad_norm": 0.5133708628717928, "learning_rate": 1.3303044401112852e-07, "loss": 11.6988, "step": 36143 }, { "epoch": 1.9681852524963754, "grad_norm": 0.6003084040624407, "learning_rate": 1.325761282310878e-07, "loss": 11.9923, "step": 36144 }, { "epoch": 1.9682397064929584, "grad_norm": 0.517819959884357, "learning_rate": 1.3212258903688714e-07, "loss": 11.729, "step": 36145 }, { "epoch": 1.9682941604895414, "grad_norm": 0.5401976231040443, "learning_rate": 1.3166982643204595e-07, "loss": 11.6976, "step": 36146 }, { "epoch": 1.9683486144861244, "grad_norm": 0.5557863424856615, "learning_rate": 1.3121784042009478e-07, "loss": 11.786, "step": 36147 }, { "epoch": 1.9684030684827074, "grad_norm": 0.5617659525947073, "learning_rate": 1.3076663100453079e-07, "loss": 11.8253, "step": 36148 }, { "epoch": 1.9684575224792904, "grad_norm": 0.5604156723002983, "learning_rate": 1.303161981888734e-07, "loss": 11.6716, "step": 36149 }, { "epoch": 1.9685119764758734, "grad_norm": 0.5239089373389401, "learning_rate": 1.2986654197664205e-07, "loss": 11.8228, "step": 36150 }, { "epoch": 1.9685664304724564, "grad_norm": 0.5449828924114282, "learning_rate": 1.294176623713006e-07, "loss": 11.6734, "step": 36151 }, { "epoch": 1.9686208844690394, "grad_norm": 0.5389916291346643, "learning_rate": 1.2896955937635734e-07, "loss": 11.7565, "step": 36152 }, { "epoch": 1.9686753384656224, "grad_norm": 0.5836005375121872, "learning_rate": 1.2852223299529843e-07, "loss": 11.9432, "step": 36153 }, { "epoch": 1.9687297924622054, "grad_norm": 0.5841893821371371, "learning_rate": 1.280756832315988e-07, "loss": 11.8333, "step": 36154 }, { "epoch": 1.9687842464587886, "grad_norm": 0.5275668060950642, "learning_rate": 1.2762991008873348e-07, "loss": 11.7275, "step": 36155 }, { "epoch": 1.9688387004553716, "grad_norm": 0.5217971901470458, "learning_rate": 1.2718491357016637e-07, "loss": 11.8409, "step": 36156 }, { "epoch": 1.9688931544519546, "grad_norm": 0.5498514013741188, "learning_rate": 1.2674069367935026e-07, "loss": 11.8383, "step": 36157 }, { "epoch": 1.9689476084485376, "grad_norm": 0.5214066287859432, "learning_rate": 1.2629725041976014e-07, "loss": 11.7107, "step": 36158 }, { "epoch": 1.9690020624451205, "grad_norm": 0.5597912773438624, "learning_rate": 1.2585458379481552e-07, "loss": 11.7649, "step": 36159 }, { "epoch": 1.9690565164417035, "grad_norm": 0.5673417262170881, "learning_rate": 1.2541269380799136e-07, "loss": 11.7681, "step": 36160 }, { "epoch": 1.9691109704382868, "grad_norm": 0.5287815407454016, "learning_rate": 1.249715804626961e-07, "loss": 11.7399, "step": 36161 }, { "epoch": 1.9691654244348697, "grad_norm": 0.5069560083107458, "learning_rate": 1.2453124376237134e-07, "loss": 11.7218, "step": 36162 }, { "epoch": 1.9692198784314527, "grad_norm": 0.518075331035977, "learning_rate": 1.2409168371044776e-07, "loss": 11.6591, "step": 36163 }, { "epoch": 1.9692743324280357, "grad_norm": 0.5758893774532786, "learning_rate": 1.2365290031032263e-07, "loss": 11.7872, "step": 36164 }, { "epoch": 1.9693287864246187, "grad_norm": 0.5899421541870227, "learning_rate": 1.2321489356543758e-07, "loss": 11.7815, "step": 36165 }, { "epoch": 1.9693832404212017, "grad_norm": 0.5995609830507865, "learning_rate": 1.2277766347917886e-07, "loss": 11.9585, "step": 36166 }, { "epoch": 1.9694376944177847, "grad_norm": 0.5431532295982318, "learning_rate": 1.223412100549437e-07, "loss": 11.7666, "step": 36167 }, { "epoch": 1.9694921484143677, "grad_norm": 0.5715447701570818, "learning_rate": 1.219055332961405e-07, "loss": 11.8167, "step": 36168 }, { "epoch": 1.9695466024109507, "grad_norm": 0.5283972325969607, "learning_rate": 1.2147063320614438e-07, "loss": 11.7003, "step": 36169 }, { "epoch": 1.9696010564075337, "grad_norm": 0.6585150801629824, "learning_rate": 1.2103650978834147e-07, "loss": 11.8573, "step": 36170 }, { "epoch": 1.9696555104041167, "grad_norm": 0.565393904228365, "learning_rate": 1.2060316304610686e-07, "loss": 11.7446, "step": 36171 }, { "epoch": 1.9697099644006997, "grad_norm": 0.5742858779716876, "learning_rate": 1.2017059298281564e-07, "loss": 11.7469, "step": 36172 }, { "epoch": 1.9697644183972827, "grad_norm": 0.5166168818491959, "learning_rate": 1.1973879960183176e-07, "loss": 11.7861, "step": 36173 }, { "epoch": 1.9698188723938657, "grad_norm": 0.5302467696516732, "learning_rate": 1.1930778290650814e-07, "loss": 11.7151, "step": 36174 }, { "epoch": 1.9698733263904487, "grad_norm": 0.5700285546687027, "learning_rate": 1.188775429001865e-07, "loss": 11.7277, "step": 36175 }, { "epoch": 1.9699277803870316, "grad_norm": 0.5419987626982037, "learning_rate": 1.1844807958623083e-07, "loss": 11.7204, "step": 36176 }, { "epoch": 1.9699822343836146, "grad_norm": 0.4873817336037878, "learning_rate": 1.1801939296797182e-07, "loss": 11.6233, "step": 36177 }, { "epoch": 1.9700366883801979, "grad_norm": 0.49949415826014854, "learning_rate": 1.1759148304875122e-07, "loss": 11.6539, "step": 36178 }, { "epoch": 1.9700911423767808, "grad_norm": 0.5398713427126732, "learning_rate": 1.1716434983187751e-07, "loss": 11.7904, "step": 36179 }, { "epoch": 1.9701455963733638, "grad_norm": 0.5627717589213662, "learning_rate": 1.1673799332069247e-07, "loss": 11.7619, "step": 36180 }, { "epoch": 1.9702000503699468, "grad_norm": 0.5212479404199305, "learning_rate": 1.1631241351850453e-07, "loss": 11.7151, "step": 36181 }, { "epoch": 1.9702545043665298, "grad_norm": 0.5318582358039198, "learning_rate": 1.1588761042862218e-07, "loss": 11.7379, "step": 36182 }, { "epoch": 1.9703089583631128, "grad_norm": 0.5851594414369534, "learning_rate": 1.1546358405434277e-07, "loss": 11.77, "step": 36183 }, { "epoch": 1.970363412359696, "grad_norm": 0.5343863641122449, "learning_rate": 1.1504033439896367e-07, "loss": 11.7686, "step": 36184 }, { "epoch": 1.970417866356279, "grad_norm": 0.5995527303681142, "learning_rate": 1.1461786146579334e-07, "loss": 11.7558, "step": 36185 }, { "epoch": 1.970472320352862, "grad_norm": 0.5154038835467099, "learning_rate": 1.1419616525809584e-07, "loss": 11.6241, "step": 36186 }, { "epoch": 1.970526774349445, "grad_norm": 0.5522059941507242, "learning_rate": 1.1377524577916854e-07, "loss": 11.7807, "step": 36187 }, { "epoch": 1.970581228346028, "grad_norm": 0.577810220383659, "learning_rate": 1.1335510303226437e-07, "loss": 11.767, "step": 36188 }, { "epoch": 1.970635682342611, "grad_norm": 0.5724667765123155, "learning_rate": 1.1293573702068072e-07, "loss": 11.6692, "step": 36189 }, { "epoch": 1.970690136339194, "grad_norm": 0.6336511842378724, "learning_rate": 1.1251714774764833e-07, "loss": 11.7558, "step": 36190 }, { "epoch": 1.970744590335777, "grad_norm": 0.5545459044883255, "learning_rate": 1.1209933521643124e-07, "loss": 11.7489, "step": 36191 }, { "epoch": 1.97079904433236, "grad_norm": 0.5551963448600635, "learning_rate": 1.1168229943028241e-07, "loss": 11.7147, "step": 36192 }, { "epoch": 1.970853498328943, "grad_norm": 0.4912865617046046, "learning_rate": 1.112660403924437e-07, "loss": 11.7612, "step": 36193 }, { "epoch": 1.970907952325526, "grad_norm": 0.5470037838420644, "learning_rate": 1.1085055810615696e-07, "loss": 11.6291, "step": 36194 }, { "epoch": 1.970962406322109, "grad_norm": 0.5738744663975652, "learning_rate": 1.1043585257464184e-07, "loss": 11.8226, "step": 36195 }, { "epoch": 1.971016860318692, "grad_norm": 0.5450275334591849, "learning_rate": 1.1002192380112908e-07, "loss": 11.8505, "step": 36196 }, { "epoch": 1.971071314315275, "grad_norm": 0.553970158224054, "learning_rate": 1.0960877178883833e-07, "loss": 11.7523, "step": 36197 }, { "epoch": 1.971125768311858, "grad_norm": 0.5110423403723059, "learning_rate": 1.0919639654097813e-07, "loss": 11.7543, "step": 36198 }, { "epoch": 1.971180222308441, "grad_norm": 0.5611192989680622, "learning_rate": 1.0878479806076813e-07, "loss": 11.8191, "step": 36199 }, { "epoch": 1.971234676305024, "grad_norm": 0.5475503858666368, "learning_rate": 1.0837397635139468e-07, "loss": 11.7071, "step": 36200 }, { "epoch": 1.9712891303016071, "grad_norm": 0.5519847935598601, "learning_rate": 1.0796393141605521e-07, "loss": 11.6902, "step": 36201 }, { "epoch": 1.9713435842981901, "grad_norm": 0.5856589134744672, "learning_rate": 1.0755466325793606e-07, "loss": 11.6871, "step": 36202 }, { "epoch": 1.9713980382947731, "grad_norm": 0.518004116995887, "learning_rate": 1.0714617188022357e-07, "loss": 11.6014, "step": 36203 }, { "epoch": 1.971452492291356, "grad_norm": 0.6271548455564989, "learning_rate": 1.067384572861041e-07, "loss": 11.8362, "step": 36204 }, { "epoch": 1.971506946287939, "grad_norm": 0.594355934820692, "learning_rate": 1.0633151947873065e-07, "loss": 11.708, "step": 36205 }, { "epoch": 1.971561400284522, "grad_norm": 0.530694315035782, "learning_rate": 1.0592535846127849e-07, "loss": 11.7427, "step": 36206 }, { "epoch": 1.9716158542811053, "grad_norm": 0.5497407668420948, "learning_rate": 1.0551997423690063e-07, "loss": 11.7952, "step": 36207 }, { "epoch": 1.9716703082776883, "grad_norm": 0.5237390509891918, "learning_rate": 1.0511536680876122e-07, "loss": 11.7361, "step": 36208 }, { "epoch": 1.9717247622742713, "grad_norm": 0.5521301418158289, "learning_rate": 1.0471153617999108e-07, "loss": 11.7663, "step": 36209 }, { "epoch": 1.9717792162708543, "grad_norm": 0.6364183485173556, "learning_rate": 1.0430848235373214e-07, "loss": 11.9224, "step": 36210 }, { "epoch": 1.9718336702674373, "grad_norm": 0.5466973097287856, "learning_rate": 1.0390620533312634e-07, "loss": 11.8913, "step": 36211 }, { "epoch": 1.9718881242640203, "grad_norm": 0.5324499588525479, "learning_rate": 1.035047051212934e-07, "loss": 11.8186, "step": 36212 }, { "epoch": 1.9719425782606033, "grad_norm": 0.5269022730122065, "learning_rate": 1.0310398172136415e-07, "loss": 11.7582, "step": 36213 }, { "epoch": 1.9719970322571863, "grad_norm": 0.7373916070420619, "learning_rate": 1.0270403513645832e-07, "loss": 11.7461, "step": 36214 }, { "epoch": 1.9720514862537692, "grad_norm": 0.519722595597763, "learning_rate": 1.0230486536967344e-07, "loss": 11.7973, "step": 36215 }, { "epoch": 1.9721059402503522, "grad_norm": 0.5892860532731992, "learning_rate": 1.0190647242411811e-07, "loss": 11.6721, "step": 36216 }, { "epoch": 1.9721603942469352, "grad_norm": 0.5406902008203212, "learning_rate": 1.0150885630288987e-07, "loss": 11.6886, "step": 36217 }, { "epoch": 1.9722148482435182, "grad_norm": 0.557050987792401, "learning_rate": 1.0111201700908624e-07, "loss": 11.8016, "step": 36218 }, { "epoch": 1.9722693022401012, "grad_norm": 0.5353839650173511, "learning_rate": 1.0071595454578253e-07, "loss": 11.8251, "step": 36219 }, { "epoch": 1.9723237562366842, "grad_norm": 0.5437374207443233, "learning_rate": 1.0032066891606517e-07, "loss": 11.8201, "step": 36220 }, { "epoch": 1.9723782102332672, "grad_norm": 0.5638026184592565, "learning_rate": 9.992616012300949e-08, "loss": 11.9145, "step": 36221 }, { "epoch": 1.9724326642298502, "grad_norm": 0.5387061459151387, "learning_rate": 9.953242816967968e-08, "loss": 11.7386, "step": 36222 }, { "epoch": 1.9724871182264332, "grad_norm": 0.5329417947855982, "learning_rate": 9.913947305913995e-08, "loss": 11.7285, "step": 36223 }, { "epoch": 1.9725415722230162, "grad_norm": 0.5437957574204663, "learning_rate": 9.874729479444345e-08, "loss": 11.7789, "step": 36224 }, { "epoch": 1.9725960262195994, "grad_norm": 0.5532036869654136, "learning_rate": 9.835589337864326e-08, "loss": 11.8299, "step": 36225 }, { "epoch": 1.9726504802161824, "grad_norm": 0.5942121282855267, "learning_rate": 9.796526881478141e-08, "loss": 11.8215, "step": 36226 }, { "epoch": 1.9727049342127654, "grad_norm": 0.5400951194990719, "learning_rate": 9.75754211058999e-08, "loss": 11.8526, "step": 36227 }, { "epoch": 1.9727593882093484, "grad_norm": 0.5148944871312742, "learning_rate": 9.718635025501855e-08, "loss": 11.8274, "step": 36228 }, { "epoch": 1.9728138422059314, "grad_norm": 0.5032515065594267, "learning_rate": 9.679805626517934e-08, "loss": 11.6671, "step": 36229 }, { "epoch": 1.9728682962025144, "grad_norm": 0.5470610301875037, "learning_rate": 9.641053913937992e-08, "loss": 11.6036, "step": 36230 }, { "epoch": 1.9729227501990976, "grad_norm": 0.53765309824049, "learning_rate": 9.602379888065116e-08, "loss": 11.8478, "step": 36231 }, { "epoch": 1.9729772041956806, "grad_norm": 0.5587061458410771, "learning_rate": 9.563783549200178e-08, "loss": 11.7793, "step": 36232 }, { "epoch": 1.9730316581922636, "grad_norm": 0.5366956153541028, "learning_rate": 9.525264897641828e-08, "loss": 11.6919, "step": 36233 }, { "epoch": 1.9730861121888466, "grad_norm": 0.511307339449537, "learning_rate": 9.486823933689826e-08, "loss": 11.7315, "step": 36234 }, { "epoch": 1.9731405661854295, "grad_norm": 0.5064735221613555, "learning_rate": 9.448460657645042e-08, "loss": 11.7467, "step": 36235 }, { "epoch": 1.9731950201820125, "grad_norm": 0.5419981151780828, "learning_rate": 9.410175069803906e-08, "loss": 11.7226, "step": 36236 }, { "epoch": 1.9732494741785955, "grad_norm": 0.6664414139562269, "learning_rate": 9.371967170463958e-08, "loss": 11.7607, "step": 36237 }, { "epoch": 1.9733039281751785, "grad_norm": 0.5885293274527131, "learning_rate": 9.333836959923847e-08, "loss": 11.8146, "step": 36238 }, { "epoch": 1.9733583821717615, "grad_norm": 0.5063013234728269, "learning_rate": 9.295784438478894e-08, "loss": 11.7359, "step": 36239 }, { "epoch": 1.9734128361683445, "grad_norm": 0.5694645873013854, "learning_rate": 9.257809606426638e-08, "loss": 11.8281, "step": 36240 }, { "epoch": 1.9734672901649275, "grad_norm": 0.5171677678543232, "learning_rate": 9.219912464060176e-08, "loss": 11.6704, "step": 36241 }, { "epoch": 1.9735217441615105, "grad_norm": 0.559913902747464, "learning_rate": 9.182093011674831e-08, "loss": 11.6763, "step": 36242 }, { "epoch": 1.9735761981580935, "grad_norm": 0.5667602243596593, "learning_rate": 9.144351249564809e-08, "loss": 11.7827, "step": 36243 }, { "epoch": 1.9736306521546765, "grad_norm": 0.556631289938428, "learning_rate": 9.106687178024321e-08, "loss": 11.7439, "step": 36244 }, { "epoch": 1.9736851061512595, "grad_norm": 0.5730097167908303, "learning_rate": 9.069100797345353e-08, "loss": 11.6917, "step": 36245 }, { "epoch": 1.9737395601478425, "grad_norm": 0.542573366011134, "learning_rate": 9.031592107821008e-08, "loss": 11.8264, "step": 36246 }, { "epoch": 1.9737940141444255, "grad_norm": 0.5295022511165232, "learning_rate": 8.99416110974216e-08, "loss": 11.5568, "step": 36247 }, { "epoch": 1.9738484681410087, "grad_norm": 0.6218076862655832, "learning_rate": 8.956807803399691e-08, "loss": 11.8278, "step": 36248 }, { "epoch": 1.9739029221375917, "grad_norm": 0.5000786730024737, "learning_rate": 8.919532189085589e-08, "loss": 11.7464, "step": 36249 }, { "epoch": 1.9739573761341747, "grad_norm": 0.5658771570438295, "learning_rate": 8.88233426708851e-08, "loss": 11.8297, "step": 36250 }, { "epoch": 1.9740118301307576, "grad_norm": 0.5070732070418988, "learning_rate": 8.845214037697113e-08, "loss": 11.7376, "step": 36251 }, { "epoch": 1.9740662841273406, "grad_norm": 0.5577908444636813, "learning_rate": 8.808171501201167e-08, "loss": 11.85, "step": 36252 }, { "epoch": 1.9741207381239236, "grad_norm": 0.48970841671500775, "learning_rate": 8.77120665788933e-08, "loss": 11.7533, "step": 36253 }, { "epoch": 1.9741751921205068, "grad_norm": 0.5301730065638747, "learning_rate": 8.734319508046929e-08, "loss": 11.6283, "step": 36254 }, { "epoch": 1.9742296461170898, "grad_norm": 0.5573255084230732, "learning_rate": 8.697510051962621e-08, "loss": 11.7985, "step": 36255 }, { "epoch": 1.9742841001136728, "grad_norm": 0.5134263743120279, "learning_rate": 8.660778289921734e-08, "loss": 11.7688, "step": 36256 }, { "epoch": 1.9743385541102558, "grad_norm": 0.5559404718408812, "learning_rate": 8.624124222210705e-08, "loss": 11.7263, "step": 36257 }, { "epoch": 1.9743930081068388, "grad_norm": 0.5465372986633922, "learning_rate": 8.587547849112642e-08, "loss": 11.7493, "step": 36258 }, { "epoch": 1.9744474621034218, "grad_norm": 0.6107175971590492, "learning_rate": 8.551049170915094e-08, "loss": 11.8658, "step": 36259 }, { "epoch": 1.9745019161000048, "grad_norm": 0.5091342860021256, "learning_rate": 8.514628187898944e-08, "loss": 11.6125, "step": 36260 }, { "epoch": 1.9745563700965878, "grad_norm": 0.48977344812386725, "learning_rate": 8.47828490034952e-08, "loss": 11.7845, "step": 36261 }, { "epoch": 1.9746108240931708, "grad_norm": 0.5106483975923595, "learning_rate": 8.442019308547711e-08, "loss": 11.7983, "step": 36262 }, { "epoch": 1.9746652780897538, "grad_norm": 0.5654591778208484, "learning_rate": 8.405831412776621e-08, "loss": 11.8042, "step": 36263 }, { "epoch": 1.9747197320863368, "grad_norm": 0.5390632403146243, "learning_rate": 8.369721213318248e-08, "loss": 11.6458, "step": 36264 }, { "epoch": 1.9747741860829198, "grad_norm": 0.6122913704702639, "learning_rate": 8.333688710451259e-08, "loss": 11.7465, "step": 36265 }, { "epoch": 1.9748286400795028, "grad_norm": 0.5812981482406246, "learning_rate": 8.29773390445765e-08, "loss": 11.8522, "step": 36266 }, { "epoch": 1.9748830940760858, "grad_norm": 0.5626605446982994, "learning_rate": 8.261856795617195e-08, "loss": 11.5391, "step": 36267 }, { "epoch": 1.9749375480726687, "grad_norm": 0.5645370918586674, "learning_rate": 8.226057384206343e-08, "loss": 11.7902, "step": 36268 }, { "epoch": 1.9749920020692517, "grad_norm": 0.5304535637631037, "learning_rate": 8.190335670507088e-08, "loss": 11.7177, "step": 36269 }, { "epoch": 1.9750464560658347, "grad_norm": 0.5090654073949913, "learning_rate": 8.154691654794766e-08, "loss": 11.7826, "step": 36270 }, { "epoch": 1.975100910062418, "grad_norm": 0.5589764265176818, "learning_rate": 8.119125337346934e-08, "loss": 11.8097, "step": 36271 }, { "epoch": 1.975155364059001, "grad_norm": 0.4915100717076199, "learning_rate": 8.083636718441145e-08, "loss": 11.8505, "step": 36272 }, { "epoch": 1.975209818055584, "grad_norm": 0.555226556038492, "learning_rate": 8.048225798351628e-08, "loss": 11.7101, "step": 36273 }, { "epoch": 1.975264272052167, "grad_norm": 0.5304210386952574, "learning_rate": 8.012892577354824e-08, "loss": 11.8985, "step": 36274 }, { "epoch": 1.97531872604875, "grad_norm": 0.5224139660861945, "learning_rate": 7.977637055726073e-08, "loss": 11.6573, "step": 36275 }, { "epoch": 1.975373180045333, "grad_norm": 0.5965846458124298, "learning_rate": 7.942459233738486e-08, "loss": 11.838, "step": 36276 }, { "epoch": 1.9754276340419161, "grad_norm": 0.5522995528529425, "learning_rate": 7.90735911166629e-08, "loss": 11.7547, "step": 36277 }, { "epoch": 1.9754820880384991, "grad_norm": 0.574676307176989, "learning_rate": 7.87233668978149e-08, "loss": 11.627, "step": 36278 }, { "epoch": 1.975536542035082, "grad_norm": 0.5173689839251362, "learning_rate": 7.83739196835831e-08, "loss": 11.8449, "step": 36279 }, { "epoch": 1.975590996031665, "grad_norm": 0.5135224400747184, "learning_rate": 7.802524947666534e-08, "loss": 11.6768, "step": 36280 }, { "epoch": 1.975645450028248, "grad_norm": 0.5251815678512176, "learning_rate": 7.767735627978168e-08, "loss": 11.7705, "step": 36281 }, { "epoch": 1.975699904024831, "grad_norm": 0.586207223170747, "learning_rate": 7.733024009562994e-08, "loss": 11.8765, "step": 36282 }, { "epoch": 1.975754358021414, "grad_norm": 0.5574241248148462, "learning_rate": 7.698390092691909e-08, "loss": 11.8348, "step": 36283 }, { "epoch": 1.975808812017997, "grad_norm": 0.7273289480528171, "learning_rate": 7.663833877634696e-08, "loss": 11.933, "step": 36284 }, { "epoch": 1.97586326601458, "grad_norm": 0.5668389575448375, "learning_rate": 7.629355364657809e-08, "loss": 11.8707, "step": 36285 }, { "epoch": 1.975917720011163, "grad_norm": 0.50882618487503, "learning_rate": 7.59495455403103e-08, "loss": 11.7991, "step": 36286 }, { "epoch": 1.975972174007746, "grad_norm": 0.5920375125195722, "learning_rate": 7.560631446023036e-08, "loss": 11.8265, "step": 36287 }, { "epoch": 1.976026628004329, "grad_norm": 0.5267960479256812, "learning_rate": 7.52638604089806e-08, "loss": 11.7645, "step": 36288 }, { "epoch": 1.976081082000912, "grad_norm": 0.5596102602300225, "learning_rate": 7.492218338923662e-08, "loss": 11.7543, "step": 36289 }, { "epoch": 1.976135535997495, "grad_norm": 0.5709911412573924, "learning_rate": 7.458128340366299e-08, "loss": 11.7865, "step": 36290 }, { "epoch": 1.976189989994078, "grad_norm": 0.5582750815536331, "learning_rate": 7.424116045489094e-08, "loss": 11.8644, "step": 36291 }, { "epoch": 1.976244443990661, "grad_norm": 0.5237505920322937, "learning_rate": 7.390181454558498e-08, "loss": 11.7387, "step": 36292 }, { "epoch": 1.976298897987244, "grad_norm": 0.5097076329400535, "learning_rate": 7.356324567837636e-08, "loss": 11.7185, "step": 36293 }, { "epoch": 1.976353351983827, "grad_norm": 0.5429137780618095, "learning_rate": 7.32254538558963e-08, "loss": 11.8103, "step": 36294 }, { "epoch": 1.9764078059804102, "grad_norm": 0.5841830201353465, "learning_rate": 7.288843908076492e-08, "loss": 11.7246, "step": 36295 }, { "epoch": 1.9764622599769932, "grad_norm": 0.5782744501294761, "learning_rate": 7.255220135562457e-08, "loss": 11.8672, "step": 36296 }, { "epoch": 1.9765167139735762, "grad_norm": 0.5426315506050473, "learning_rate": 7.221674068306206e-08, "loss": 11.7543, "step": 36297 }, { "epoch": 1.9765711679701592, "grad_norm": 0.5466101557634807, "learning_rate": 7.188205706570861e-08, "loss": 11.5791, "step": 36298 }, { "epoch": 1.9766256219667422, "grad_norm": 0.5375236649410577, "learning_rate": 7.154815050616215e-08, "loss": 11.7321, "step": 36299 }, { "epoch": 1.9766800759633252, "grad_norm": 0.4964365523418336, "learning_rate": 7.121502100700949e-08, "loss": 11.6377, "step": 36300 }, { "epoch": 1.9767345299599084, "grad_norm": 0.6305503821042118, "learning_rate": 7.088266857084858e-08, "loss": 11.8237, "step": 36301 }, { "epoch": 1.9767889839564914, "grad_norm": 0.5358256091857745, "learning_rate": 7.055109320025511e-08, "loss": 11.7467, "step": 36302 }, { "epoch": 1.9768434379530744, "grad_norm": 0.5497933257512864, "learning_rate": 7.0220294897827e-08, "loss": 11.8914, "step": 36303 }, { "epoch": 1.9768978919496574, "grad_norm": 0.5984924652247515, "learning_rate": 6.989027366611778e-08, "loss": 11.7903, "step": 36304 }, { "epoch": 1.9769523459462404, "grad_norm": 0.550647246077146, "learning_rate": 6.956102950770315e-08, "loss": 11.8169, "step": 36305 }, { "epoch": 1.9770067999428234, "grad_norm": 0.5377030961924383, "learning_rate": 6.923256242513664e-08, "loss": 11.7392, "step": 36306 }, { "epoch": 1.9770612539394063, "grad_norm": 0.5272190434435017, "learning_rate": 6.890487242097177e-08, "loss": 11.7926, "step": 36307 }, { "epoch": 1.9771157079359893, "grad_norm": 0.5148450472477232, "learning_rate": 6.857795949776203e-08, "loss": 11.7307, "step": 36308 }, { "epoch": 1.9771701619325723, "grad_norm": 0.5522228817044328, "learning_rate": 6.825182365806093e-08, "loss": 11.8029, "step": 36309 }, { "epoch": 1.9772246159291553, "grad_norm": 0.5048907866642403, "learning_rate": 6.792646490437759e-08, "loss": 11.5917, "step": 36310 }, { "epoch": 1.9772790699257383, "grad_norm": 0.5817331170603239, "learning_rate": 6.760188323926552e-08, "loss": 11.7732, "step": 36311 }, { "epoch": 1.9773335239223213, "grad_norm": 0.5569403418667223, "learning_rate": 6.727807866523384e-08, "loss": 11.8178, "step": 36312 }, { "epoch": 1.9773879779189043, "grad_norm": 0.5254249993695699, "learning_rate": 6.695505118481382e-08, "loss": 11.7582, "step": 36313 }, { "epoch": 1.9774424319154873, "grad_norm": 0.5173435106520328, "learning_rate": 6.66328008005035e-08, "loss": 11.7492, "step": 36314 }, { "epoch": 1.9774968859120703, "grad_norm": 0.531210626335476, "learning_rate": 6.631132751482305e-08, "loss": 11.8797, "step": 36315 }, { "epoch": 1.9775513399086533, "grad_norm": 0.5094864697529042, "learning_rate": 6.59906313302594e-08, "loss": 11.7976, "step": 36316 }, { "epoch": 1.9776057939052363, "grad_norm": 0.5615927347246377, "learning_rate": 6.567071224931054e-08, "loss": 11.7936, "step": 36317 }, { "epoch": 1.9776602479018195, "grad_norm": 0.5895509348544351, "learning_rate": 6.535157027446337e-08, "loss": 11.6687, "step": 36318 }, { "epoch": 1.9777147018984025, "grad_norm": 0.5208164665987108, "learning_rate": 6.503320540820479e-08, "loss": 11.8263, "step": 36319 }, { "epoch": 1.9777691558949855, "grad_norm": 0.5077054033679513, "learning_rate": 6.47156176530106e-08, "loss": 11.704, "step": 36320 }, { "epoch": 1.9778236098915685, "grad_norm": 0.5650257572135685, "learning_rate": 6.439880701134548e-08, "loss": 11.8266, "step": 36321 }, { "epoch": 1.9778780638881515, "grad_norm": 0.5194485004209728, "learning_rate": 6.408277348567415e-08, "loss": 11.6489, "step": 36322 }, { "epoch": 1.9779325178847345, "grad_norm": 0.5101688774812396, "learning_rate": 6.37675170784502e-08, "loss": 11.753, "step": 36323 }, { "epoch": 1.9779869718813177, "grad_norm": 0.49336019726026487, "learning_rate": 6.34530377921272e-08, "loss": 11.6734, "step": 36324 }, { "epoch": 1.9780414258779007, "grad_norm": 0.5976201422843452, "learning_rate": 6.313933562915874e-08, "loss": 11.8844, "step": 36325 }, { "epoch": 1.9780958798744837, "grad_norm": 0.5111855462812013, "learning_rate": 6.282641059197625e-08, "loss": 11.7917, "step": 36326 }, { "epoch": 1.9781503338710666, "grad_norm": 0.5422945022187791, "learning_rate": 6.251426268301108e-08, "loss": 11.7145, "step": 36327 }, { "epoch": 1.9782047878676496, "grad_norm": 0.510816362739602, "learning_rate": 6.220289190470575e-08, "loss": 11.8235, "step": 36328 }, { "epoch": 1.9782592418642326, "grad_norm": 0.5620730410934067, "learning_rate": 6.18922982594583e-08, "loss": 11.8108, "step": 36329 }, { "epoch": 1.9783136958608156, "grad_norm": 0.5609138016276113, "learning_rate": 6.158248174970017e-08, "loss": 11.7199, "step": 36330 }, { "epoch": 1.9783681498573986, "grad_norm": 0.5202221186895, "learning_rate": 6.12734423778405e-08, "loss": 11.7697, "step": 36331 }, { "epoch": 1.9784226038539816, "grad_norm": 0.572701285802484, "learning_rate": 6.096518014627738e-08, "loss": 11.9467, "step": 36332 }, { "epoch": 1.9784770578505646, "grad_norm": 0.5556647868062884, "learning_rate": 6.065769505740892e-08, "loss": 11.754, "step": 36333 }, { "epoch": 1.9785315118471476, "grad_norm": 0.5417592795573949, "learning_rate": 6.035098711362209e-08, "loss": 11.7906, "step": 36334 }, { "epoch": 1.9785859658437306, "grad_norm": 0.5681967901406085, "learning_rate": 6.004505631730383e-08, "loss": 11.6744, "step": 36335 }, { "epoch": 1.9786404198403136, "grad_norm": 0.5218432259138275, "learning_rate": 5.973990267084118e-08, "loss": 11.8088, "step": 36336 }, { "epoch": 1.9786948738368966, "grad_norm": 0.509235367162828, "learning_rate": 5.9435526176598863e-08, "loss": 11.7769, "step": 36337 }, { "epoch": 1.9787493278334796, "grad_norm": 0.5982462132848184, "learning_rate": 5.913192683694169e-08, "loss": 11.9151, "step": 36338 }, { "epoch": 1.9788037818300626, "grad_norm": 0.6370102271443775, "learning_rate": 5.882910465424551e-08, "loss": 11.8847, "step": 36339 }, { "epoch": 1.9788582358266456, "grad_norm": 0.5820695328139325, "learning_rate": 5.852705963084182e-08, "loss": 11.7506, "step": 36340 }, { "epoch": 1.9789126898232288, "grad_norm": 0.5480070882482397, "learning_rate": 5.8225791769106476e-08, "loss": 11.7188, "step": 36341 }, { "epoch": 1.9789671438198118, "grad_norm": 0.5234889592020289, "learning_rate": 5.7925301071348746e-08, "loss": 11.8682, "step": 36342 }, { "epoch": 1.9790215978163948, "grad_norm": 0.5489082558769252, "learning_rate": 5.7625587539944514e-08, "loss": 11.7167, "step": 36343 }, { "epoch": 1.9790760518129777, "grad_norm": 0.5211445365558279, "learning_rate": 5.732665117719194e-08, "loss": 11.866, "step": 36344 }, { "epoch": 1.9791305058095607, "grad_norm": 0.5313983787771561, "learning_rate": 5.702849198542248e-08, "loss": 11.6887, "step": 36345 }, { "epoch": 1.9791849598061437, "grad_norm": 0.5935234732911259, "learning_rate": 5.673110996696762e-08, "loss": 11.9406, "step": 36346 }, { "epoch": 1.979239413802727, "grad_norm": 0.5406266078476016, "learning_rate": 5.6434505124136616e-08, "loss": 11.7714, "step": 36347 }, { "epoch": 1.97929386779931, "grad_norm": 0.577116098170172, "learning_rate": 5.613867745922763e-08, "loss": 11.7744, "step": 36348 }, { "epoch": 1.979348321795893, "grad_norm": 0.5639332310482835, "learning_rate": 5.584362697453882e-08, "loss": 11.8665, "step": 36349 }, { "epoch": 1.979402775792476, "grad_norm": 0.586943512552007, "learning_rate": 5.554935367237946e-08, "loss": 11.7265, "step": 36350 }, { "epoch": 1.979457229789059, "grad_norm": 0.5458017642790904, "learning_rate": 5.525585755502549e-08, "loss": 11.8465, "step": 36351 }, { "epoch": 1.979511683785642, "grad_norm": 0.6195057890363825, "learning_rate": 5.496313862476399e-08, "loss": 11.7799, "step": 36352 }, { "epoch": 1.979566137782225, "grad_norm": 0.5576611249620743, "learning_rate": 5.46711968838598e-08, "loss": 11.7103, "step": 36353 }, { "epoch": 1.979620591778808, "grad_norm": 0.5828726139288403, "learning_rate": 5.438003233459998e-08, "loss": 11.8346, "step": 36354 }, { "epoch": 1.9796750457753909, "grad_norm": 0.5360299264781121, "learning_rate": 5.40896449792494e-08, "loss": 11.8725, "step": 36355 }, { "epoch": 1.9797294997719739, "grad_norm": 0.5294166679762473, "learning_rate": 5.3800034820050696e-08, "loss": 11.7766, "step": 36356 }, { "epoch": 1.9797839537685569, "grad_norm": 0.5134185955352489, "learning_rate": 5.3511201859268725e-08, "loss": 11.6572, "step": 36357 }, { "epoch": 1.9798384077651399, "grad_norm": 0.6311800638950488, "learning_rate": 5.3223146099135035e-08, "loss": 11.9058, "step": 36358 }, { "epoch": 1.9798928617617229, "grad_norm": 0.5834818119326419, "learning_rate": 5.2935867541914487e-08, "loss": 11.7748, "step": 36359 }, { "epoch": 1.9799473157583058, "grad_norm": 0.5675728564670772, "learning_rate": 5.264936618981642e-08, "loss": 11.692, "step": 36360 }, { "epoch": 1.9800017697548888, "grad_norm": 0.5399396559574882, "learning_rate": 5.236364204507238e-08, "loss": 11.7302, "step": 36361 }, { "epoch": 1.9800562237514718, "grad_norm": 0.5459817402648429, "learning_rate": 5.207869510992502e-08, "loss": 11.7319, "step": 36362 }, { "epoch": 1.9801106777480548, "grad_norm": 0.5013970198304216, "learning_rate": 5.179452538656149e-08, "loss": 11.8196, "step": 36363 }, { "epoch": 1.9801651317446378, "grad_norm": 0.5595841064832009, "learning_rate": 5.151113287721332e-08, "loss": 11.8034, "step": 36364 }, { "epoch": 1.980219585741221, "grad_norm": 0.5352657501973164, "learning_rate": 5.122851758406766e-08, "loss": 11.787, "step": 36365 }, { "epoch": 1.980274039737804, "grad_norm": 0.5478585939635453, "learning_rate": 5.094667950933385e-08, "loss": 11.7097, "step": 36366 }, { "epoch": 1.980328493734387, "grad_norm": 0.5369698837798761, "learning_rate": 5.0665618655210136e-08, "loss": 11.7658, "step": 36367 }, { "epoch": 1.98038294773097, "grad_norm": 0.5646834408333486, "learning_rate": 5.038533502386145e-08, "loss": 11.8147, "step": 36368 }, { "epoch": 1.980437401727553, "grad_norm": 0.5748268256684115, "learning_rate": 5.0105828617474925e-08, "loss": 11.7697, "step": 36369 }, { "epoch": 1.9804918557241362, "grad_norm": 0.5403407442121676, "learning_rate": 4.982709943823771e-08, "loss": 11.7193, "step": 36370 }, { "epoch": 1.9805463097207192, "grad_norm": 0.558221577878052, "learning_rate": 4.9549147488303635e-08, "loss": 11.7359, "step": 36371 }, { "epoch": 1.9806007637173022, "grad_norm": 0.513380065672353, "learning_rate": 4.927197276982653e-08, "loss": 11.7409, "step": 36372 }, { "epoch": 1.9806552177138852, "grad_norm": 0.5309627229713108, "learning_rate": 4.899557528498244e-08, "loss": 11.7988, "step": 36373 }, { "epoch": 1.9807096717104682, "grad_norm": 0.5099223751420987, "learning_rate": 4.871995503591409e-08, "loss": 11.7858, "step": 36374 }, { "epoch": 1.9807641257070512, "grad_norm": 0.5365808849213447, "learning_rate": 4.8445112024753106e-08, "loss": 11.7678, "step": 36375 }, { "epoch": 1.9808185797036342, "grad_norm": 0.5243160670745014, "learning_rate": 4.817104625364221e-08, "loss": 11.8326, "step": 36376 }, { "epoch": 1.9808730337002172, "grad_norm": 0.5720339580511433, "learning_rate": 4.789775772472416e-08, "loss": 11.835, "step": 36377 }, { "epoch": 1.9809274876968002, "grad_norm": 0.5196927828287613, "learning_rate": 4.762524644010835e-08, "loss": 11.7265, "step": 36378 }, { "epoch": 1.9809819416933832, "grad_norm": 0.564518442792885, "learning_rate": 4.735351240192642e-08, "loss": 11.863, "step": 36379 }, { "epoch": 1.9810363956899661, "grad_norm": 0.5165776029013883, "learning_rate": 4.7082555612287804e-08, "loss": 11.7244, "step": 36380 }, { "epoch": 1.9810908496865491, "grad_norm": 0.5488738136409199, "learning_rate": 4.6812376073290806e-08, "loss": 11.6977, "step": 36381 }, { "epoch": 1.9811453036831321, "grad_norm": 0.5675803755394957, "learning_rate": 4.6542973787044866e-08, "loss": 11.7042, "step": 36382 }, { "epoch": 1.9811997576797151, "grad_norm": 0.5550562381550627, "learning_rate": 4.6274348755637185e-08, "loss": 11.9232, "step": 36383 }, { "epoch": 1.9812542116762981, "grad_norm": 0.5136672127163762, "learning_rate": 4.60065009811661e-08, "loss": 11.7147, "step": 36384 }, { "epoch": 1.981308665672881, "grad_norm": 0.5152296456485161, "learning_rate": 4.5739430465718826e-08, "loss": 11.805, "step": 36385 }, { "epoch": 1.981363119669464, "grad_norm": 0.5471769060936504, "learning_rate": 4.547313721136037e-08, "loss": 11.7434, "step": 36386 }, { "epoch": 1.981417573666047, "grad_norm": 0.5797772295236849, "learning_rate": 4.520762122015576e-08, "loss": 11.7883, "step": 36387 }, { "epoch": 1.9814720276626303, "grad_norm": 0.5224920669972644, "learning_rate": 4.4942882494192204e-08, "loss": 11.8305, "step": 36388 }, { "epoch": 1.9815264816592133, "grad_norm": 0.596610670032578, "learning_rate": 4.467892103550142e-08, "loss": 11.7957, "step": 36389 }, { "epoch": 1.9815809356557963, "grad_norm": 0.49203453727486535, "learning_rate": 4.441573684615952e-08, "loss": 11.7003, "step": 36390 }, { "epoch": 1.9816353896523793, "grad_norm": 0.5130185467092806, "learning_rate": 4.415332992820931e-08, "loss": 11.7963, "step": 36391 }, { "epoch": 1.9816898436489623, "grad_norm": 0.4771372129257163, "learning_rate": 4.38917002836714e-08, "loss": 11.6706, "step": 36392 }, { "epoch": 1.9817442976455453, "grad_norm": 0.519489722446061, "learning_rate": 4.36308479145997e-08, "loss": 11.7753, "step": 36393 }, { "epoch": 1.9817987516421285, "grad_norm": 0.5289121310084038, "learning_rate": 4.3370772823014824e-08, "loss": 11.7648, "step": 36394 }, { "epoch": 1.9818532056387115, "grad_norm": 0.5192001536176016, "learning_rate": 4.3111475010948474e-08, "loss": 11.7289, "step": 36395 }, { "epoch": 1.9819076596352945, "grad_norm": 0.5346676956395546, "learning_rate": 4.285295448041016e-08, "loss": 11.7567, "step": 36396 }, { "epoch": 1.9819621136318775, "grad_norm": 0.5346781785191166, "learning_rate": 4.259521123339827e-08, "loss": 11.8591, "step": 36397 }, { "epoch": 1.9820165676284605, "grad_norm": 0.5311257454039943, "learning_rate": 4.233824527194452e-08, "loss": 11.807, "step": 36398 }, { "epoch": 1.9820710216250434, "grad_norm": 0.5162823434128065, "learning_rate": 4.208205659802511e-08, "loss": 11.6991, "step": 36399 }, { "epoch": 1.9821254756216264, "grad_norm": 0.5467079572874044, "learning_rate": 4.182664521362734e-08, "loss": 11.8395, "step": 36400 }, { "epoch": 1.9821799296182094, "grad_norm": 0.5242538245582777, "learning_rate": 4.15720111207607e-08, "loss": 11.7732, "step": 36401 }, { "epoch": 1.9822343836147924, "grad_norm": 0.5335046877696261, "learning_rate": 4.13181543213903e-08, "loss": 11.8929, "step": 36402 }, { "epoch": 1.9822888376113754, "grad_norm": 0.6177807711391469, "learning_rate": 4.106507481749233e-08, "loss": 11.9371, "step": 36403 }, { "epoch": 1.9823432916079584, "grad_norm": 0.5412095866919604, "learning_rate": 4.0812772611042993e-08, "loss": 11.8959, "step": 36404 }, { "epoch": 1.9823977456045414, "grad_norm": 0.6220907738212006, "learning_rate": 4.0561247703985174e-08, "loss": 11.9239, "step": 36405 }, { "epoch": 1.9824521996011244, "grad_norm": 0.5125961095662214, "learning_rate": 4.0310500098295066e-08, "loss": 11.7893, "step": 36406 }, { "epoch": 1.9825066535977074, "grad_norm": 0.5520342049921835, "learning_rate": 4.0060529795904466e-08, "loss": 11.7016, "step": 36407 }, { "epoch": 1.9825611075942904, "grad_norm": 0.5570034617747267, "learning_rate": 3.9811336798778466e-08, "loss": 11.7271, "step": 36408 }, { "epoch": 1.9826155615908734, "grad_norm": 0.5632854091028757, "learning_rate": 3.9562921108837745e-08, "loss": 11.8128, "step": 36409 }, { "epoch": 1.9826700155874564, "grad_norm": 0.5297255578447356, "learning_rate": 3.93152827280141e-08, "loss": 11.7802, "step": 36410 }, { "epoch": 1.9827244695840396, "grad_norm": 0.5445943732363671, "learning_rate": 3.906842165823932e-08, "loss": 11.9233, "step": 36411 }, { "epoch": 1.9827789235806226, "grad_norm": 0.5533317778112701, "learning_rate": 3.882233790143408e-08, "loss": 11.7697, "step": 36412 }, { "epoch": 1.9828333775772056, "grad_norm": 0.5434511729045822, "learning_rate": 3.8577031459519075e-08, "loss": 11.9149, "step": 36413 }, { "epoch": 1.9828878315737886, "grad_norm": 0.5692900136423962, "learning_rate": 3.8332502334381684e-08, "loss": 11.7982, "step": 36414 }, { "epoch": 1.9829422855703716, "grad_norm": 0.531192699353065, "learning_rate": 3.808875052793148e-08, "loss": 11.7621, "step": 36415 }, { "epoch": 1.9829967395669545, "grad_norm": 0.6144391606959894, "learning_rate": 3.7845776042078064e-08, "loss": 11.7797, "step": 36416 }, { "epoch": 1.9830511935635378, "grad_norm": 0.5129640212616524, "learning_rate": 3.7603578878686597e-08, "loss": 11.7144, "step": 36417 }, { "epoch": 1.9831056475601208, "grad_norm": 0.5452520192810797, "learning_rate": 3.736215903966667e-08, "loss": 11.7853, "step": 36418 }, { "epoch": 1.9831601015567037, "grad_norm": 0.539465194466536, "learning_rate": 3.7121516526872345e-08, "loss": 11.7376, "step": 36419 }, { "epoch": 1.9832145555532867, "grad_norm": 0.5754108188862822, "learning_rate": 3.688165134219102e-08, "loss": 11.8138, "step": 36420 }, { "epoch": 1.9832690095498697, "grad_norm": 0.8407767216307679, "learning_rate": 3.664256348747674e-08, "loss": 11.8776, "step": 36421 }, { "epoch": 1.9833234635464527, "grad_norm": 0.5060928623516017, "learning_rate": 3.640425296459471e-08, "loss": 11.7418, "step": 36422 }, { "epoch": 1.9833779175430357, "grad_norm": 0.5452587918783603, "learning_rate": 3.616671977539898e-08, "loss": 11.8999, "step": 36423 }, { "epoch": 1.9834323715396187, "grad_norm": 0.47998421597627455, "learning_rate": 3.5929963921732534e-08, "loss": 11.7164, "step": 36424 }, { "epoch": 1.9834868255362017, "grad_norm": 0.5355787736840985, "learning_rate": 3.569398540544944e-08, "loss": 11.7603, "step": 36425 }, { "epoch": 1.9835412795327847, "grad_norm": 0.5291167248010584, "learning_rate": 3.545878422835935e-08, "loss": 11.7304, "step": 36426 }, { "epoch": 1.9835957335293677, "grad_norm": 0.5715189458160215, "learning_rate": 3.522436039231636e-08, "loss": 11.8027, "step": 36427 }, { "epoch": 1.9836501875259507, "grad_norm": 0.5517468892746402, "learning_rate": 3.499071389911901e-08, "loss": 11.8163, "step": 36428 }, { "epoch": 1.9837046415225337, "grad_norm": 0.49261450619098257, "learning_rate": 3.475784475061028e-08, "loss": 11.7235, "step": 36429 }, { "epoch": 1.9837590955191167, "grad_norm": 0.6400201614758865, "learning_rate": 3.452575294858873e-08, "loss": 11.8131, "step": 36430 }, { "epoch": 1.9838135495156997, "grad_norm": 0.5098108880897478, "learning_rate": 3.429443849485292e-08, "loss": 11.7697, "step": 36431 }, { "epoch": 1.9838680035122827, "grad_norm": 0.5896519442328669, "learning_rate": 3.4063901391212514e-08, "loss": 11.7852, "step": 36432 }, { "epoch": 1.9839224575088656, "grad_norm": 0.5735672501499843, "learning_rate": 3.3834141639454976e-08, "loss": 11.8876, "step": 36433 }, { "epoch": 1.9839769115054486, "grad_norm": 0.5641724388866606, "learning_rate": 3.360515924136776e-08, "loss": 11.7415, "step": 36434 }, { "epoch": 1.9840313655020319, "grad_norm": 0.529777008549507, "learning_rate": 3.337695419872722e-08, "loss": 11.716, "step": 36435 }, { "epoch": 1.9840858194986148, "grad_norm": 0.5489142191689512, "learning_rate": 3.314952651330971e-08, "loss": 11.7486, "step": 36436 }, { "epoch": 1.9841402734951978, "grad_norm": 0.5623179765079697, "learning_rate": 3.29228761868916e-08, "loss": 11.7828, "step": 36437 }, { "epoch": 1.9841947274917808, "grad_norm": 0.535042210829423, "learning_rate": 3.269700322122704e-08, "loss": 11.7817, "step": 36438 }, { "epoch": 1.9842491814883638, "grad_norm": 0.5466478260566233, "learning_rate": 3.247190761808128e-08, "loss": 11.7459, "step": 36439 }, { "epoch": 1.984303635484947, "grad_norm": 0.527203248150186, "learning_rate": 3.2247589379197364e-08, "loss": 11.6313, "step": 36440 }, { "epoch": 1.98435808948153, "grad_norm": 0.5582220042491767, "learning_rate": 3.202404850631835e-08, "loss": 11.8305, "step": 36441 }, { "epoch": 1.984412543478113, "grad_norm": 0.506057199041528, "learning_rate": 3.180128500117619e-08, "loss": 11.7391, "step": 36442 }, { "epoch": 1.984466997474696, "grad_norm": 0.5030049299910746, "learning_rate": 3.1579298865525017e-08, "loss": 11.8705, "step": 36443 }, { "epoch": 1.984521451471279, "grad_norm": 0.5600930442138133, "learning_rate": 3.13580901010746e-08, "loss": 11.7956, "step": 36444 }, { "epoch": 1.984575905467862, "grad_norm": 0.542997673197189, "learning_rate": 3.113765870954577e-08, "loss": 11.7903, "step": 36445 }, { "epoch": 1.984630359464445, "grad_norm": 0.5415007233739847, "learning_rate": 3.091800469264827e-08, "loss": 11.7567, "step": 36446 }, { "epoch": 1.984684813461028, "grad_norm": 0.6795185988670197, "learning_rate": 3.0699128052114056e-08, "loss": 11.9329, "step": 36447 }, { "epoch": 1.984739267457611, "grad_norm": 0.5924422573874469, "learning_rate": 3.048102878961956e-08, "loss": 11.8243, "step": 36448 }, { "epoch": 1.984793721454194, "grad_norm": 0.6158281607939818, "learning_rate": 3.026370690686342e-08, "loss": 11.8351, "step": 36449 }, { "epoch": 1.984848175450777, "grad_norm": 0.7523505407724791, "learning_rate": 3.0047162405544284e-08, "loss": 11.673, "step": 36450 }, { "epoch": 1.98490262944736, "grad_norm": 0.5584705033824234, "learning_rate": 2.983139528734968e-08, "loss": 11.6747, "step": 36451 }, { "epoch": 1.984957083443943, "grad_norm": 0.5710800115087628, "learning_rate": 2.9616405553944958e-08, "loss": 11.7849, "step": 36452 }, { "epoch": 1.985011537440526, "grad_norm": 0.5356196072968289, "learning_rate": 2.9402193207017648e-08, "loss": 11.625, "step": 36453 }, { "epoch": 1.985065991437109, "grad_norm": 0.47829264998905807, "learning_rate": 2.918875824821088e-08, "loss": 11.7912, "step": 36454 }, { "epoch": 1.985120445433692, "grad_norm": 0.5136787925091855, "learning_rate": 2.8976100679212192e-08, "loss": 11.7793, "step": 36455 }, { "epoch": 1.985174899430275, "grad_norm": 0.5177346165727557, "learning_rate": 2.8764220501642514e-08, "loss": 11.7661, "step": 36456 }, { "epoch": 1.985229353426858, "grad_norm": 0.5417904388014813, "learning_rate": 2.855311771717828e-08, "loss": 11.8325, "step": 36457 }, { "epoch": 1.9852838074234411, "grad_norm": 0.5065989398736146, "learning_rate": 2.8342792327451517e-08, "loss": 11.7289, "step": 36458 }, { "epoch": 1.9853382614200241, "grad_norm": 0.5506387088913992, "learning_rate": 2.8133244334094255e-08, "loss": 11.7723, "step": 36459 }, { "epoch": 1.9853927154166071, "grad_norm": 0.5698689998099923, "learning_rate": 2.7924473738738522e-08, "loss": 11.7432, "step": 36460 }, { "epoch": 1.98544716941319, "grad_norm": 0.5959147601358578, "learning_rate": 2.771648054300524e-08, "loss": 11.7884, "step": 36461 }, { "epoch": 1.985501623409773, "grad_norm": 0.5890098564455121, "learning_rate": 2.750926474851534e-08, "loss": 11.9101, "step": 36462 }, { "epoch": 1.985556077406356, "grad_norm": 0.5885155735101597, "learning_rate": 2.7302826356878642e-08, "loss": 11.8218, "step": 36463 }, { "epoch": 1.9856105314029393, "grad_norm": 0.5177049109306143, "learning_rate": 2.7097165369704967e-08, "loss": 11.5807, "step": 36464 }, { "epoch": 1.9856649853995223, "grad_norm": 0.5207191332899337, "learning_rate": 2.689228178858194e-08, "loss": 11.7763, "step": 36465 }, { "epoch": 1.9857194393961053, "grad_norm": 0.5097256203044499, "learning_rate": 2.6688175615119383e-08, "loss": 11.8482, "step": 36466 }, { "epoch": 1.9857738933926883, "grad_norm": 0.5937435099745028, "learning_rate": 2.6484846850882705e-08, "loss": 11.8284, "step": 36467 }, { "epoch": 1.9858283473892713, "grad_norm": 0.5430212541291283, "learning_rate": 2.6282295497470634e-08, "loss": 11.6684, "step": 36468 }, { "epoch": 1.9858828013858543, "grad_norm": 0.46856519532394764, "learning_rate": 2.608052155645968e-08, "loss": 11.619, "step": 36469 }, { "epoch": 1.9859372553824373, "grad_norm": 0.5376089571158441, "learning_rate": 2.587952502940416e-08, "loss": 11.7194, "step": 36470 }, { "epoch": 1.9859917093790203, "grad_norm": 0.5370737918031686, "learning_rate": 2.5679305917880592e-08, "loss": 11.7159, "step": 36471 }, { "epoch": 1.9860461633756032, "grad_norm": 0.534456341970588, "learning_rate": 2.547986422343218e-08, "loss": 11.721, "step": 36472 }, { "epoch": 1.9861006173721862, "grad_norm": 0.5693198990854652, "learning_rate": 2.5281199947624347e-08, "loss": 11.8178, "step": 36473 }, { "epoch": 1.9861550713687692, "grad_norm": 0.5395660060236315, "learning_rate": 2.5083313091989192e-08, "loss": 11.9495, "step": 36474 }, { "epoch": 1.9862095253653522, "grad_norm": 0.5594244618991824, "learning_rate": 2.488620365808103e-08, "loss": 11.746, "step": 36475 }, { "epoch": 1.9862639793619352, "grad_norm": 0.5300685926005979, "learning_rate": 2.4689871647420872e-08, "loss": 11.7432, "step": 36476 }, { "epoch": 1.9863184333585182, "grad_norm": 0.5428098946373274, "learning_rate": 2.4494317061540817e-08, "loss": 11.7523, "step": 36477 }, { "epoch": 1.9863728873551012, "grad_norm": 0.5526840885415635, "learning_rate": 2.4299539901950773e-08, "loss": 11.6715, "step": 36478 }, { "epoch": 1.9864273413516842, "grad_norm": 0.5000964365460533, "learning_rate": 2.410554017018285e-08, "loss": 11.7401, "step": 36479 }, { "epoch": 1.9864817953482672, "grad_norm": 0.5147461783983301, "learning_rate": 2.3912317867724742e-08, "loss": 11.7364, "step": 36480 }, { "epoch": 1.9865362493448504, "grad_norm": 0.8427748965577434, "learning_rate": 2.3719872996097457e-08, "loss": 11.7933, "step": 36481 }, { "epoch": 1.9865907033414334, "grad_norm": 0.4886377342918437, "learning_rate": 2.3528205556788697e-08, "loss": 11.8437, "step": 36482 }, { "epoch": 1.9866451573380164, "grad_norm": 0.5983192921560333, "learning_rate": 2.3337315551297257e-08, "loss": 11.9141, "step": 36483 }, { "epoch": 1.9866996113345994, "grad_norm": 0.553652626616461, "learning_rate": 2.3147202981099738e-08, "loss": 11.8146, "step": 36484 }, { "epoch": 1.9867540653311824, "grad_norm": 0.6418428452775194, "learning_rate": 2.2957867847661628e-08, "loss": 11.8464, "step": 36485 }, { "epoch": 1.9868085193277654, "grad_norm": 0.5455281673631264, "learning_rate": 2.2769310152481736e-08, "loss": 11.82, "step": 36486 }, { "epoch": 1.9868629733243486, "grad_norm": 0.7700380742973841, "learning_rate": 2.2581529897014454e-08, "loss": 11.8567, "step": 36487 }, { "epoch": 1.9869174273209316, "grad_norm": 0.508514097771357, "learning_rate": 2.2394527082714168e-08, "loss": 11.7757, "step": 36488 }, { "epoch": 1.9869718813175146, "grad_norm": 0.5886567007444015, "learning_rate": 2.2208301711046376e-08, "loss": 11.8596, "step": 36489 }, { "epoch": 1.9870263353140976, "grad_norm": 0.5549673988248771, "learning_rate": 2.2022853783443263e-08, "loss": 11.7638, "step": 36490 }, { "epoch": 1.9870807893106806, "grad_norm": 0.5587925331336404, "learning_rate": 2.183818330137033e-08, "loss": 11.8355, "step": 36491 }, { "epoch": 1.9871352433072635, "grad_norm": 0.5328003337131507, "learning_rate": 2.1654290266237553e-08, "loss": 11.6843, "step": 36492 }, { "epoch": 1.9871896973038465, "grad_norm": 0.5352286581011919, "learning_rate": 2.147117467949933e-08, "loss": 11.8615, "step": 36493 }, { "epoch": 1.9872441513004295, "grad_norm": 0.521618915135555, "learning_rate": 2.1288836542554537e-08, "loss": 11.6261, "step": 36494 }, { "epoch": 1.9872986052970125, "grad_norm": 0.5312015028180979, "learning_rate": 2.1107275856846466e-08, "loss": 11.6559, "step": 36495 }, { "epoch": 1.9873530592935955, "grad_norm": 0.5332096331879077, "learning_rate": 2.0926492623762894e-08, "loss": 11.8785, "step": 36496 }, { "epoch": 1.9874075132901785, "grad_norm": 0.5424721875130121, "learning_rate": 2.074648684472491e-08, "loss": 11.6787, "step": 36497 }, { "epoch": 1.9874619672867615, "grad_norm": 0.5406570202646456, "learning_rate": 2.0567258521131394e-08, "loss": 11.6913, "step": 36498 }, { "epoch": 1.9875164212833445, "grad_norm": 0.5796295827097839, "learning_rate": 2.0388807654370125e-08, "loss": 11.8214, "step": 36499 }, { "epoch": 1.9875708752799275, "grad_norm": 0.5368021958895797, "learning_rate": 2.021113424583998e-08, "loss": 11.8478, "step": 36500 }, { "epoch": 1.9876253292765105, "grad_norm": 0.48074724813872993, "learning_rate": 2.0034238296906537e-08, "loss": 11.5588, "step": 36501 }, { "epoch": 1.9876797832730935, "grad_norm": 0.5065848752096026, "learning_rate": 1.9858119808957575e-08, "loss": 11.7973, "step": 36502 }, { "epoch": 1.9877342372696765, "grad_norm": 0.5759036275316106, "learning_rate": 1.968277878335867e-08, "loss": 11.7654, "step": 36503 }, { "epoch": 1.9877886912662597, "grad_norm": 0.5591605320109206, "learning_rate": 1.950821522147539e-08, "loss": 11.6315, "step": 36504 }, { "epoch": 1.9878431452628427, "grad_norm": 0.5598232048830637, "learning_rate": 1.933442912465111e-08, "loss": 11.8587, "step": 36505 }, { "epoch": 1.9878975992594257, "grad_norm": 0.5734144479691698, "learning_rate": 1.9161420494262505e-08, "loss": 11.6655, "step": 36506 }, { "epoch": 1.9879520532560087, "grad_norm": 0.5181470637516574, "learning_rate": 1.8989189331641843e-08, "loss": 11.8563, "step": 36507 }, { "epoch": 1.9880065072525916, "grad_norm": 0.5678972236587222, "learning_rate": 1.881773563812139e-08, "loss": 11.7367, "step": 36508 }, { "epoch": 1.9880609612491746, "grad_norm": 0.5275689525463271, "learning_rate": 1.864705941503342e-08, "loss": 11.5711, "step": 36509 }, { "epoch": 1.9881154152457579, "grad_norm": 0.5226776011583704, "learning_rate": 1.8477160663721295e-08, "loss": 11.8611, "step": 36510 }, { "epoch": 1.9881698692423408, "grad_norm": 0.5907563497459758, "learning_rate": 1.830803938550618e-08, "loss": 11.9167, "step": 36511 }, { "epoch": 1.9882243232389238, "grad_norm": 0.5558311979413025, "learning_rate": 1.8139695581687043e-08, "loss": 11.8081, "step": 36512 }, { "epoch": 1.9882787772355068, "grad_norm": 0.5408375610092016, "learning_rate": 1.7972129253573942e-08, "loss": 11.8025, "step": 36513 }, { "epoch": 1.9883332312320898, "grad_norm": 0.6487321049336314, "learning_rate": 1.7805340402488046e-08, "loss": 11.9208, "step": 36514 }, { "epoch": 1.9883876852286728, "grad_norm": 0.5331738608573782, "learning_rate": 1.763932902970611e-08, "loss": 11.8855, "step": 36515 }, { "epoch": 1.9884421392252558, "grad_norm": 0.49844084490852403, "learning_rate": 1.7474095136538193e-08, "loss": 11.746, "step": 36516 }, { "epoch": 1.9884965932218388, "grad_norm": 0.5258561191422564, "learning_rate": 1.7309638724249954e-08, "loss": 11.7594, "step": 36517 }, { "epoch": 1.9885510472184218, "grad_norm": 0.477293214613775, "learning_rate": 1.714595979412925e-08, "loss": 11.6868, "step": 36518 }, { "epoch": 1.9886055012150048, "grad_norm": 0.5407193248464377, "learning_rate": 1.698305834745284e-08, "loss": 11.7135, "step": 36519 }, { "epoch": 1.9886599552115878, "grad_norm": 0.5378190096474488, "learning_rate": 1.6820934385475272e-08, "loss": 11.7109, "step": 36520 }, { "epoch": 1.9887144092081708, "grad_norm": 0.5317356370743387, "learning_rate": 1.6659587909473307e-08, "loss": 11.742, "step": 36521 }, { "epoch": 1.9887688632047538, "grad_norm": 0.5760190090144073, "learning_rate": 1.649901892069039e-08, "loss": 11.8167, "step": 36522 }, { "epoch": 1.9888233172013368, "grad_norm": 0.5291254998874673, "learning_rate": 1.6339227420381076e-08, "loss": 11.7414, "step": 36523 }, { "epoch": 1.9888777711979198, "grad_norm": 0.5248665097299513, "learning_rate": 1.618021340977771e-08, "loss": 11.7631, "step": 36524 }, { "epoch": 1.9889322251945027, "grad_norm": 0.5435504748659484, "learning_rate": 1.602197689013485e-08, "loss": 11.6174, "step": 36525 }, { "epoch": 1.9889866791910857, "grad_norm": 0.5212935552363418, "learning_rate": 1.5864517862662632e-08, "loss": 11.8869, "step": 36526 }, { "epoch": 1.9890411331876687, "grad_norm": 0.5796327661169799, "learning_rate": 1.5707836328604508e-08, "loss": 11.7896, "step": 36527 }, { "epoch": 1.989095587184252, "grad_norm": 0.535665979283029, "learning_rate": 1.5551932289170623e-08, "loss": 11.73, "step": 36528 }, { "epoch": 1.989150041180835, "grad_norm": 0.5853900510831671, "learning_rate": 1.5396805745560018e-08, "loss": 11.657, "step": 36529 }, { "epoch": 1.989204495177418, "grad_norm": 1.653872634805348, "learning_rate": 1.5242456699005037e-08, "loss": 11.9419, "step": 36530 }, { "epoch": 1.989258949174001, "grad_norm": 0.5540657531119669, "learning_rate": 1.508888515068252e-08, "loss": 11.7679, "step": 36531 }, { "epoch": 1.989313403170584, "grad_norm": 0.5260239814293589, "learning_rate": 1.4936091101802608e-08, "loss": 11.8596, "step": 36532 }, { "epoch": 1.989367857167167, "grad_norm": 0.5370210294461955, "learning_rate": 1.4784074553542138e-08, "loss": 11.7527, "step": 36533 }, { "epoch": 1.9894223111637501, "grad_norm": 0.5940283262279022, "learning_rate": 1.4632835507089049e-08, "loss": 11.9127, "step": 36534 }, { "epoch": 1.9894767651603331, "grad_norm": 0.5591647115083277, "learning_rate": 1.4482373963620177e-08, "loss": 11.7366, "step": 36535 }, { "epoch": 1.989531219156916, "grad_norm": 0.5998355448929859, "learning_rate": 1.4332689924301257e-08, "loss": 11.8799, "step": 36536 }, { "epoch": 1.989585673153499, "grad_norm": 0.5143044566579497, "learning_rate": 1.4183783390298022e-08, "loss": 11.778, "step": 36537 }, { "epoch": 1.989640127150082, "grad_norm": 0.5242737544104311, "learning_rate": 1.4035654362776207e-08, "loss": 11.6855, "step": 36538 }, { "epoch": 1.989694581146665, "grad_norm": 0.5185734968571442, "learning_rate": 1.3888302842868239e-08, "loss": 11.6778, "step": 36539 }, { "epoch": 1.989749035143248, "grad_norm": 0.5775111635310636, "learning_rate": 1.3741728831750955e-08, "loss": 11.7708, "step": 36540 }, { "epoch": 1.989803489139831, "grad_norm": 0.533025138209696, "learning_rate": 1.3595932330534578e-08, "loss": 11.7419, "step": 36541 }, { "epoch": 1.989857943136414, "grad_norm": 0.5913819883562282, "learning_rate": 1.3450913340362637e-08, "loss": 11.8257, "step": 36542 }, { "epoch": 1.989912397132997, "grad_norm": 0.5666547350860178, "learning_rate": 1.330667186236756e-08, "loss": 11.7689, "step": 36543 }, { "epoch": 1.98996685112958, "grad_norm": 0.5617870311779647, "learning_rate": 1.3163207897659569e-08, "loss": 11.7593, "step": 36544 }, { "epoch": 1.990021305126163, "grad_norm": 0.5278297550209919, "learning_rate": 1.3020521447371093e-08, "loss": 11.761, "step": 36545 }, { "epoch": 1.990075759122746, "grad_norm": 0.55048504306361, "learning_rate": 1.2878612512601251e-08, "loss": 11.7731, "step": 36546 }, { "epoch": 1.990130213119329, "grad_norm": 0.5374731388214943, "learning_rate": 1.2737481094460269e-08, "loss": 11.7788, "step": 36547 }, { "epoch": 1.990184667115912, "grad_norm": 0.577594614368669, "learning_rate": 1.2597127194036163e-08, "loss": 11.8198, "step": 36548 }, { "epoch": 1.990239121112495, "grad_norm": 0.5727313297973341, "learning_rate": 1.2457550812416951e-08, "loss": 11.7288, "step": 36549 }, { "epoch": 1.990293575109078, "grad_norm": 0.5253241306983374, "learning_rate": 1.2318751950701757e-08, "loss": 11.7712, "step": 36550 }, { "epoch": 1.9903480291056612, "grad_norm": 0.5600070923392805, "learning_rate": 1.2180730609967495e-08, "loss": 11.6915, "step": 36551 }, { "epoch": 1.9904024831022442, "grad_norm": 0.5692639945535776, "learning_rate": 1.2043486791279978e-08, "loss": 11.8965, "step": 36552 }, { "epoch": 1.9904569370988272, "grad_norm": 0.5878580842199417, "learning_rate": 1.1907020495716126e-08, "loss": 11.6622, "step": 36553 }, { "epoch": 1.9905113910954102, "grad_norm": 0.5272999207693095, "learning_rate": 1.1771331724319545e-08, "loss": 11.6701, "step": 36554 }, { "epoch": 1.9905658450919932, "grad_norm": 0.5069394035366999, "learning_rate": 1.1636420478167154e-08, "loss": 11.6037, "step": 36555 }, { "epoch": 1.9906202990885762, "grad_norm": 0.5307850074672694, "learning_rate": 1.1502286758291459e-08, "loss": 11.7663, "step": 36556 }, { "epoch": 1.9906747530851594, "grad_norm": 0.5553898205062658, "learning_rate": 1.136893056574717e-08, "loss": 11.8379, "step": 36557 }, { "epoch": 1.9907292070817424, "grad_norm": 0.5542884498644787, "learning_rate": 1.12363519015668e-08, "loss": 11.769, "step": 36558 }, { "epoch": 1.9907836610783254, "grad_norm": 0.5387905649061595, "learning_rate": 1.110455076678285e-08, "loss": 11.7206, "step": 36559 }, { "epoch": 1.9908381150749084, "grad_norm": 0.5412154444315345, "learning_rate": 1.0973527162405628e-08, "loss": 11.8113, "step": 36560 }, { "epoch": 1.9908925690714914, "grad_norm": 0.5987705883005071, "learning_rate": 1.0843281089478741e-08, "loss": 11.843, "step": 36561 }, { "epoch": 1.9909470230680744, "grad_norm": 0.6542224568495725, "learning_rate": 1.071381254900139e-08, "loss": 11.8524, "step": 36562 }, { "epoch": 1.9910014770646574, "grad_norm": 0.5760613911943637, "learning_rate": 1.0585121541972776e-08, "loss": 11.6326, "step": 36563 }, { "epoch": 1.9910559310612403, "grad_norm": 0.5359531398178513, "learning_rate": 1.0457208069414304e-08, "loss": 11.8028, "step": 36564 }, { "epoch": 1.9911103850578233, "grad_norm": 0.471814764239132, "learning_rate": 1.033007213230297e-08, "loss": 11.7115, "step": 36565 }, { "epoch": 1.9911648390544063, "grad_norm": 0.5769235638220788, "learning_rate": 1.0203713731626873e-08, "loss": 11.8379, "step": 36566 }, { "epoch": 1.9912192930509893, "grad_norm": 0.5238240480289093, "learning_rate": 1.0078132868385215e-08, "loss": 11.8243, "step": 36567 }, { "epoch": 1.9912737470475723, "grad_norm": 0.5364322950304945, "learning_rate": 9.953329543543888e-09, "loss": 11.6682, "step": 36568 }, { "epoch": 1.9913282010441553, "grad_norm": 0.593512180407724, "learning_rate": 9.829303758068786e-09, "loss": 11.8686, "step": 36569 }, { "epoch": 1.9913826550407383, "grad_norm": 0.5605312415944048, "learning_rate": 9.706055512925804e-09, "loss": 11.8327, "step": 36570 }, { "epoch": 1.9914371090373213, "grad_norm": 0.5514882556214906, "learning_rate": 9.583584809080837e-09, "loss": 11.7296, "step": 36571 }, { "epoch": 1.9914915630339043, "grad_norm": 0.5429878775553492, "learning_rate": 9.461891647488675e-09, "loss": 11.7423, "step": 36572 }, { "epoch": 1.9915460170304873, "grad_norm": 0.5691855765732323, "learning_rate": 9.340976029081905e-09, "loss": 11.7875, "step": 36573 }, { "epoch": 1.9916004710270705, "grad_norm": 0.5521564499331408, "learning_rate": 9.22083795481532e-09, "loss": 11.8163, "step": 36574 }, { "epoch": 1.9916549250236535, "grad_norm": 0.5701652563157238, "learning_rate": 9.101477425610405e-09, "loss": 11.8349, "step": 36575 }, { "epoch": 1.9917093790202365, "grad_norm": 0.5855597507007764, "learning_rate": 8.982894442399747e-09, "loss": 11.807, "step": 36576 }, { "epoch": 1.9917638330168195, "grad_norm": 0.5525490524446289, "learning_rate": 8.865089006104832e-09, "loss": 11.7881, "step": 36577 }, { "epoch": 1.9918182870134025, "grad_norm": 0.5457276094545386, "learning_rate": 8.748061117647144e-09, "loss": 11.7254, "step": 36578 }, { "epoch": 1.9918727410099855, "grad_norm": 0.6052376554592634, "learning_rate": 8.631810777937067e-09, "loss": 11.7223, "step": 36579 }, { "epoch": 1.9919271950065687, "grad_norm": 0.5890097010004732, "learning_rate": 8.516337987862777e-09, "loss": 11.8369, "step": 36580 }, { "epoch": 1.9919816490031517, "grad_norm": 0.5788997216648428, "learning_rate": 8.401642748345762e-09, "loss": 11.7819, "step": 36581 }, { "epoch": 1.9920361029997347, "grad_norm": 0.6038637351962896, "learning_rate": 8.287725060263097e-09, "loss": 11.5915, "step": 36582 }, { "epoch": 1.9920905569963177, "grad_norm": 0.5987582867117255, "learning_rate": 8.17458492450296e-09, "loss": 11.79, "step": 36583 }, { "epoch": 1.9921450109929006, "grad_norm": 0.5258476968420109, "learning_rate": 8.062222341942427e-09, "loss": 11.7373, "step": 36584 }, { "epoch": 1.9921994649894836, "grad_norm": 0.5378773063919398, "learning_rate": 7.950637313458576e-09, "loss": 11.83, "step": 36585 }, { "epoch": 1.9922539189860666, "grad_norm": 0.5388884010314146, "learning_rate": 7.839829839928481e-09, "loss": 11.6727, "step": 36586 }, { "epoch": 1.9923083729826496, "grad_norm": 0.52512221499079, "learning_rate": 7.729799922195912e-09, "loss": 11.7655, "step": 36587 }, { "epoch": 1.9923628269792326, "grad_norm": 0.548364727159414, "learning_rate": 7.620547561126845e-09, "loss": 11.6767, "step": 36588 }, { "epoch": 1.9924172809758156, "grad_norm": 0.5262306886882376, "learning_rate": 7.51207275757615e-09, "loss": 11.7849, "step": 36589 }, { "epoch": 1.9924717349723986, "grad_norm": 0.5296609115846772, "learning_rate": 7.404375512365391e-09, "loss": 11.727, "step": 36590 }, { "epoch": 1.9925261889689816, "grad_norm": 0.5197247939044222, "learning_rate": 7.297455826360544e-09, "loss": 11.74, "step": 36591 }, { "epoch": 1.9925806429655646, "grad_norm": 0.5061356816214125, "learning_rate": 7.191313700372071e-09, "loss": 11.5471, "step": 36592 }, { "epoch": 1.9926350969621476, "grad_norm": 0.6189454476855245, "learning_rate": 7.0859491352437415e-09, "loss": 11.8413, "step": 36593 }, { "epoch": 1.9926895509587306, "grad_norm": 0.5116906400228126, "learning_rate": 6.981362131774916e-09, "loss": 11.6078, "step": 36594 }, { "epoch": 1.9927440049553136, "grad_norm": 0.5443973249715622, "learning_rate": 6.8775526907871594e-09, "loss": 11.802, "step": 36595 }, { "epoch": 1.9927984589518966, "grad_norm": 0.5226089155217758, "learning_rate": 6.7745208130909345e-09, "loss": 11.8135, "step": 36596 }, { "epoch": 1.9928529129484795, "grad_norm": 0.5249070542445562, "learning_rate": 6.6722664994744995e-09, "loss": 11.753, "step": 36597 }, { "epoch": 1.9929073669450628, "grad_norm": 0.5731575805681722, "learning_rate": 6.570789750759421e-09, "loss": 11.7456, "step": 36598 }, { "epoch": 1.9929618209416458, "grad_norm": 0.6046180240218758, "learning_rate": 6.470090567711751e-09, "loss": 11.8848, "step": 36599 }, { "epoch": 1.9930162749382287, "grad_norm": 0.5238906110334236, "learning_rate": 6.370168951119748e-09, "loss": 11.6064, "step": 36600 }, { "epoch": 1.9930707289348117, "grad_norm": 0.5442536443886653, "learning_rate": 6.27102490176057e-09, "loss": 11.8224, "step": 36601 }, { "epoch": 1.9931251829313947, "grad_norm": 0.6052913011545425, "learning_rate": 6.17265842041137e-09, "loss": 11.8681, "step": 36602 }, { "epoch": 1.993179636927978, "grad_norm": 0.5277548338212886, "learning_rate": 6.075069507838205e-09, "loss": 11.7395, "step": 36603 }, { "epoch": 1.993234090924561, "grad_norm": 0.6245102734589224, "learning_rate": 5.978258164784922e-09, "loss": 11.8652, "step": 36604 }, { "epoch": 1.993288544921144, "grad_norm": 0.5429962968186917, "learning_rate": 5.882224392017577e-09, "loss": 11.5717, "step": 36605 }, { "epoch": 1.993342998917727, "grad_norm": 0.5307810892273391, "learning_rate": 5.786968190280018e-09, "loss": 11.7404, "step": 36606 }, { "epoch": 1.99339745291431, "grad_norm": 0.515158080338061, "learning_rate": 5.692489560304992e-09, "loss": 11.6824, "step": 36607 }, { "epoch": 1.993451906910893, "grad_norm": 0.5087382019976419, "learning_rate": 5.598788502847452e-09, "loss": 11.787, "step": 36608 }, { "epoch": 1.993506360907476, "grad_norm": 0.5465389543262851, "learning_rate": 5.505865018606837e-09, "loss": 11.7854, "step": 36609 }, { "epoch": 1.993560814904059, "grad_norm": 0.5229986948256299, "learning_rate": 5.4137191083381e-09, "loss": 11.747, "step": 36610 }, { "epoch": 1.993615268900642, "grad_norm": 0.5524090689207177, "learning_rate": 5.322350772729579e-09, "loss": 11.8494, "step": 36611 }, { "epoch": 1.9936697228972249, "grad_norm": 0.6229786307791355, "learning_rate": 5.231760012502917e-09, "loss": 11.9247, "step": 36612 }, { "epoch": 1.9937241768938079, "grad_norm": 0.5276054388819608, "learning_rate": 5.14194682836866e-09, "loss": 11.7887, "step": 36613 }, { "epoch": 1.9937786308903909, "grad_norm": 0.5685137900197232, "learning_rate": 5.052911221015144e-09, "loss": 11.843, "step": 36614 }, { "epoch": 1.9938330848869739, "grad_norm": 0.6003547945268849, "learning_rate": 4.96465319114181e-09, "loss": 11.8157, "step": 36615 }, { "epoch": 1.9938875388835569, "grad_norm": 0.503537665410977, "learning_rate": 4.877172739436997e-09, "loss": 11.7783, "step": 36616 }, { "epoch": 1.9939419928801398, "grad_norm": 0.5524766885056016, "learning_rate": 4.7904698665779405e-09, "loss": 11.8226, "step": 36617 }, { "epoch": 1.9939964468767228, "grad_norm": 0.5839317510664969, "learning_rate": 4.704544573230774e-09, "loss": 11.7921, "step": 36618 }, { "epoch": 1.9940509008733058, "grad_norm": 0.4937412077674242, "learning_rate": 4.619396860072734e-09, "loss": 11.7857, "step": 36619 }, { "epoch": 1.9941053548698888, "grad_norm": 0.619428771642005, "learning_rate": 4.535026727758851e-09, "loss": 11.7916, "step": 36620 }, { "epoch": 1.994159808866472, "grad_norm": 0.5241669488118829, "learning_rate": 4.45143417695526e-09, "loss": 11.7554, "step": 36621 }, { "epoch": 1.994214262863055, "grad_norm": 0.6134667912022922, "learning_rate": 4.3686192083058904e-09, "loss": 11.9199, "step": 36622 }, { "epoch": 1.994268716859638, "grad_norm": 0.5666420771301303, "learning_rate": 4.286581822454672e-09, "loss": 11.688, "step": 36623 }, { "epoch": 1.994323170856221, "grad_norm": 0.6716040216061332, "learning_rate": 4.20532202003443e-09, "loss": 11.901, "step": 36624 }, { "epoch": 1.994377624852804, "grad_norm": 0.5038869191413636, "learning_rate": 4.124839801689095e-09, "loss": 11.7773, "step": 36625 }, { "epoch": 1.994432078849387, "grad_norm": 0.5304847927368819, "learning_rate": 4.045135168040392e-09, "loss": 11.7209, "step": 36626 }, { "epoch": 1.9944865328459702, "grad_norm": 0.5834841566192898, "learning_rate": 3.966208119698944e-09, "loss": 11.7493, "step": 36627 }, { "epoch": 1.9945409868425532, "grad_norm": 0.5297166483552079, "learning_rate": 3.888058657297577e-09, "loss": 11.8569, "step": 36628 }, { "epoch": 1.9945954408391362, "grad_norm": 0.5238660667981947, "learning_rate": 3.8106867814136086e-09, "loss": 11.8502, "step": 36629 }, { "epoch": 1.9946498948357192, "grad_norm": 0.5234309470475411, "learning_rate": 3.734092492679864e-09, "loss": 11.6249, "step": 36630 }, { "epoch": 1.9947043488323022, "grad_norm": 0.5139694921264127, "learning_rate": 3.658275791684762e-09, "loss": 11.7672, "step": 36631 }, { "epoch": 1.9947588028288852, "grad_norm": 0.5264909707605255, "learning_rate": 3.583236679005619e-09, "loss": 11.7835, "step": 36632 }, { "epoch": 1.9948132568254682, "grad_norm": 0.5242808653399074, "learning_rate": 3.5089751552308537e-09, "loss": 11.6735, "step": 36633 }, { "epoch": 1.9948677108220512, "grad_norm": 0.5194141757764744, "learning_rate": 3.435491220937781e-09, "loss": 11.7064, "step": 36634 }, { "epoch": 1.9949221648186342, "grad_norm": 0.5211938629199598, "learning_rate": 3.362784876703717e-09, "loss": 11.7095, "step": 36635 }, { "epoch": 1.9949766188152172, "grad_norm": 0.6333757755520796, "learning_rate": 3.290856123083774e-09, "loss": 11.7457, "step": 36636 }, { "epoch": 1.9950310728118001, "grad_norm": 0.5435958077205914, "learning_rate": 3.219704960655268e-09, "loss": 11.7806, "step": 36637 }, { "epoch": 1.9950855268083831, "grad_norm": 0.5210460749138116, "learning_rate": 3.1493313899511046e-09, "loss": 11.7653, "step": 36638 }, { "epoch": 1.9951399808049661, "grad_norm": 0.5117830193675624, "learning_rate": 3.0797354115263964e-09, "loss": 11.8055, "step": 36639 }, { "epoch": 1.9951944348015491, "grad_norm": 0.514988069970701, "learning_rate": 3.0109170259251528e-09, "loss": 11.6839, "step": 36640 }, { "epoch": 1.9952488887981321, "grad_norm": 0.5542102234589764, "learning_rate": 2.942876233680281e-09, "loss": 11.7812, "step": 36641 }, { "epoch": 1.995303342794715, "grad_norm": 0.516034683978556, "learning_rate": 2.8756130353246867e-09, "loss": 11.8199, "step": 36642 }, { "epoch": 1.995357796791298, "grad_norm": 0.5155532397239551, "learning_rate": 2.8091274313801762e-09, "loss": 11.7568, "step": 36643 }, { "epoch": 1.9954122507878813, "grad_norm": 0.6329309551328337, "learning_rate": 2.743419422357452e-09, "loss": 11.6283, "step": 36644 }, { "epoch": 1.9954667047844643, "grad_norm": 0.4963986603524539, "learning_rate": 2.6784890087672154e-09, "loss": 11.7111, "step": 36645 }, { "epoch": 1.9955211587810473, "grad_norm": 0.6076339871650054, "learning_rate": 2.61433619112017e-09, "loss": 11.8352, "step": 36646 }, { "epoch": 1.9955756127776303, "grad_norm": 0.5234783332036234, "learning_rate": 2.5509609699159163e-09, "loss": 11.7935, "step": 36647 }, { "epoch": 1.9956300667742133, "grad_norm": 0.5392005307077706, "learning_rate": 2.4883633456429524e-09, "loss": 11.7426, "step": 36648 }, { "epoch": 1.9956845207707963, "grad_norm": 0.5887162033689871, "learning_rate": 2.4265433187897757e-09, "loss": 11.8386, "step": 36649 }, { "epoch": 1.9957389747673795, "grad_norm": 0.5396301072348566, "learning_rate": 2.365500889844885e-09, "loss": 11.7071, "step": 36650 }, { "epoch": 1.9957934287639625, "grad_norm": 0.575394282904709, "learning_rate": 2.3052360592745738e-09, "loss": 11.7565, "step": 36651 }, { "epoch": 1.9958478827605455, "grad_norm": 0.770807824973402, "learning_rate": 2.245748827534033e-09, "loss": 11.8931, "step": 36652 }, { "epoch": 1.9959023367571285, "grad_norm": 0.5378501184092315, "learning_rate": 2.1870391951117618e-09, "loss": 11.7475, "step": 36653 }, { "epoch": 1.9959567907537115, "grad_norm": 0.5328546146445313, "learning_rate": 2.129107162451849e-09, "loss": 11.7567, "step": 36654 }, { "epoch": 1.9960112447502945, "grad_norm": 0.5642278048320595, "learning_rate": 2.0719527300094853e-09, "loss": 11.7792, "step": 36655 }, { "epoch": 1.9960656987468774, "grad_norm": 0.5407460073021433, "learning_rate": 2.0155758982176588e-09, "loss": 11.8449, "step": 36656 }, { "epoch": 1.9961201527434604, "grad_norm": 0.5918623888172896, "learning_rate": 1.959976667520458e-09, "loss": 11.8475, "step": 36657 }, { "epoch": 1.9961746067400434, "grad_norm": 0.5208077918237171, "learning_rate": 1.905155038361972e-09, "loss": 11.715, "step": 36658 }, { "epoch": 1.9962290607366264, "grad_norm": 0.5547570054299232, "learning_rate": 1.8511110111529839e-09, "loss": 11.7199, "step": 36659 }, { "epoch": 1.9962835147332094, "grad_norm": 0.5406713324055593, "learning_rate": 1.7978445863153782e-09, "loss": 11.8062, "step": 36660 }, { "epoch": 1.9963379687297924, "grad_norm": 0.5336696006867239, "learning_rate": 1.7453557642710393e-09, "loss": 11.8141, "step": 36661 }, { "epoch": 1.9963924227263754, "grad_norm": 0.5206807405477759, "learning_rate": 1.6936445454307504e-09, "loss": 11.7819, "step": 36662 }, { "epoch": 1.9964468767229584, "grad_norm": 0.5767948374382369, "learning_rate": 1.642710930183089e-09, "loss": 11.772, "step": 36663 }, { "epoch": 1.9965013307195414, "grad_norm": 0.585163110915863, "learning_rate": 1.5925549189388377e-09, "loss": 11.7973, "step": 36664 }, { "epoch": 1.9965557847161244, "grad_norm": 0.5016347834194185, "learning_rate": 1.5431765120754727e-09, "loss": 11.7421, "step": 36665 }, { "epoch": 1.9966102387127074, "grad_norm": 0.517762633363895, "learning_rate": 1.4945757099815716e-09, "loss": 11.7985, "step": 36666 }, { "epoch": 1.9966646927092904, "grad_norm": 0.5241380060054428, "learning_rate": 1.4467525130346105e-09, "loss": 11.6881, "step": 36667 }, { "epoch": 1.9967191467058736, "grad_norm": 0.5181787526820097, "learning_rate": 1.399706921612065e-09, "loss": 11.7816, "step": 36668 }, { "epoch": 1.9967736007024566, "grad_norm": 0.5505127884394181, "learning_rate": 1.3534389360692068e-09, "loss": 11.6572, "step": 36669 }, { "epoch": 1.9968280546990396, "grad_norm": 0.5693830525480125, "learning_rate": 1.3079485567724093e-09, "loss": 11.8402, "step": 36670 }, { "epoch": 1.9968825086956226, "grad_norm": 0.5860973761223902, "learning_rate": 1.2632357840769438e-09, "loss": 11.8876, "step": 36671 }, { "epoch": 1.9969369626922056, "grad_norm": 0.5704348010003918, "learning_rate": 1.2193006183269794e-09, "loss": 11.8041, "step": 36672 }, { "epoch": 1.9969914166887888, "grad_norm": 0.6279119288513697, "learning_rate": 1.1761430598666856e-09, "loss": 11.8818, "step": 36673 }, { "epoch": 1.9970458706853718, "grad_norm": 0.5198706796872268, "learning_rate": 1.133763109029129e-09, "loss": 11.7613, "step": 36674 }, { "epoch": 1.9971003246819548, "grad_norm": 0.6034201104231257, "learning_rate": 1.0921607661473765e-09, "loss": 11.8964, "step": 36675 }, { "epoch": 1.9971547786785377, "grad_norm": 0.49949102841187076, "learning_rate": 1.051336031543393e-09, "loss": 11.7939, "step": 36676 }, { "epoch": 1.9972092326751207, "grad_norm": 0.5363153137472171, "learning_rate": 1.0112889055391428e-09, "loss": 11.7587, "step": 36677 }, { "epoch": 1.9972636866717037, "grad_norm": 0.5810910374213107, "learning_rate": 9.720193884343865e-10, "loss": 11.724, "step": 36678 }, { "epoch": 1.9973181406682867, "grad_norm": 0.5210939134204752, "learning_rate": 9.335274805399863e-10, "loss": 11.6994, "step": 36679 }, { "epoch": 1.9973725946648697, "grad_norm": 0.5381576891492463, "learning_rate": 8.958131821557025e-10, "loss": 11.8286, "step": 36680 }, { "epoch": 1.9974270486614527, "grad_norm": 0.530220337335392, "learning_rate": 8.588764935812954e-10, "loss": 11.755, "step": 36681 }, { "epoch": 1.9974815026580357, "grad_norm": 0.5728679182487828, "learning_rate": 8.227174150943206e-10, "loss": 11.787, "step": 36682 }, { "epoch": 1.9975359566546187, "grad_norm": 0.5205716047367589, "learning_rate": 7.87335946972334e-10, "loss": 11.8618, "step": 36683 }, { "epoch": 1.9975904106512017, "grad_norm": 0.5241118339580726, "learning_rate": 7.527320895150958e-10, "loss": 11.5183, "step": 36684 }, { "epoch": 1.9976448646477847, "grad_norm": 0.5088502198791645, "learning_rate": 7.189058429557527e-10, "loss": 11.7576, "step": 36685 }, { "epoch": 1.9976993186443677, "grad_norm": 0.5470162434631054, "learning_rate": 6.85857207594065e-10, "loss": 11.6734, "step": 36686 }, { "epoch": 1.9977537726409507, "grad_norm": 0.5110775201370584, "learning_rate": 6.535861836520773e-10, "loss": 11.7879, "step": 36687 }, { "epoch": 1.9978082266375337, "grad_norm": 0.5608037502661866, "learning_rate": 6.220927714073455e-10, "loss": 11.6387, "step": 36688 }, { "epoch": 1.9978626806341166, "grad_norm": 0.5188198153542338, "learning_rate": 5.913769710930161e-10, "loss": 11.8053, "step": 36689 }, { "epoch": 1.9979171346306996, "grad_norm": 0.5757468699154831, "learning_rate": 5.614387829533385e-10, "loss": 11.568, "step": 36690 }, { "epoch": 1.9979715886272829, "grad_norm": 0.5732020279908595, "learning_rate": 5.322782072103571e-10, "loss": 11.7973, "step": 36691 }, { "epoch": 1.9980260426238658, "grad_norm": 0.5661612494643344, "learning_rate": 5.038952441083211e-10, "loss": 11.8387, "step": 36692 }, { "epoch": 1.9980804966204488, "grad_norm": 0.5364384218893937, "learning_rate": 4.762898938470706e-10, "loss": 11.7906, "step": 36693 }, { "epoch": 1.9981349506170318, "grad_norm": 0.6917035468321034, "learning_rate": 4.494621566597523e-10, "loss": 11.8456, "step": 36694 }, { "epoch": 1.9981894046136148, "grad_norm": 0.5582074763770993, "learning_rate": 4.2341203274620653e-10, "loss": 11.7563, "step": 36695 }, { "epoch": 1.9982438586101978, "grad_norm": 0.5400802746154967, "learning_rate": 3.981395223062734e-10, "loss": 11.8059, "step": 36696 }, { "epoch": 1.998298312606781, "grad_norm": 0.6784869398514226, "learning_rate": 3.736446255508952e-10, "loss": 11.8526, "step": 36697 }, { "epoch": 1.998352766603364, "grad_norm": 0.5680346951485059, "learning_rate": 3.499273426466054e-10, "loss": 11.8861, "step": 36698 }, { "epoch": 1.998407220599947, "grad_norm": 0.5160730365803441, "learning_rate": 3.2698767379324426e-10, "loss": 11.7039, "step": 36699 }, { "epoch": 1.99846167459653, "grad_norm": 0.5123216668387516, "learning_rate": 3.0482561917954954e-10, "loss": 11.7522, "step": 36700 }, { "epoch": 1.998516128593113, "grad_norm": 0.5254371195560049, "learning_rate": 2.8344117894985036e-10, "loss": 11.8134, "step": 36701 }, { "epoch": 1.998570582589696, "grad_norm": 0.5854105432065376, "learning_rate": 2.6283435329288453e-10, "loss": 11.7867, "step": 36702 }, { "epoch": 1.998625036586279, "grad_norm": 0.5862551505034992, "learning_rate": 2.4300514236408336e-10, "loss": 11.9344, "step": 36703 }, { "epoch": 1.998679490582862, "grad_norm": 0.5140052659654867, "learning_rate": 2.23953546318878e-10, "loss": 11.7432, "step": 36704 }, { "epoch": 1.998733944579445, "grad_norm": 0.5604743133364141, "learning_rate": 2.0567956529049527e-10, "loss": 11.7821, "step": 36705 }, { "epoch": 1.998788398576028, "grad_norm": 0.5240572928921524, "learning_rate": 1.881831994454686e-10, "loss": 11.6996, "step": 36706 }, { "epoch": 1.998842852572611, "grad_norm": 0.5405593267654587, "learning_rate": 1.7146444889482026e-10, "loss": 11.711, "step": 36707 }, { "epoch": 1.998897306569194, "grad_norm": 0.5587512939355626, "learning_rate": 1.5552331379398155e-10, "loss": 11.8544, "step": 36708 }, { "epoch": 1.998951760565777, "grad_norm": 0.5475112401914279, "learning_rate": 1.403597942428725e-10, "loss": 11.7343, "step": 36709 }, { "epoch": 1.99900621456236, "grad_norm": 0.5490761635001964, "learning_rate": 1.2597389037471984e-10, "loss": 11.8575, "step": 36710 }, { "epoch": 1.999060668558943, "grad_norm": 0.5125875521480342, "learning_rate": 1.1236560230054593e-10, "loss": 11.6728, "step": 36711 }, { "epoch": 1.999115122555526, "grad_norm": 0.5796859431852404, "learning_rate": 9.953493010916859e-11, "loss": 11.8984, "step": 36712 }, { "epoch": 1.999169576552109, "grad_norm": 0.5733528789181468, "learning_rate": 8.748187392271234e-11, "loss": 11.8825, "step": 36713 }, { "epoch": 1.9992240305486921, "grad_norm": 0.5596695605625579, "learning_rate": 7.620643382999503e-11, "loss": 11.7445, "step": 36714 }, { "epoch": 1.9992784845452751, "grad_norm": 0.519350111406422, "learning_rate": 6.570860990873229e-11, "loss": 11.7687, "step": 36715 }, { "epoch": 1.9993329385418581, "grad_norm": 0.5701236866048116, "learning_rate": 5.598840224774193e-11, "loss": 11.8148, "step": 36716 }, { "epoch": 1.999387392538441, "grad_norm": 0.554621853887413, "learning_rate": 4.7045810924739586e-11, "loss": 11.7872, "step": 36717 }, { "epoch": 1.999441846535024, "grad_norm": 0.531288056186138, "learning_rate": 3.888083600633863e-11, "loss": 11.7555, "step": 36718 }, { "epoch": 1.999496300531607, "grad_norm": 0.541802893876769, "learning_rate": 3.149347754805021e-11, "loss": 11.7749, "step": 36719 }, { "epoch": 1.9995507545281903, "grad_norm": 0.6008596151002624, "learning_rate": 2.4883735627589944e-11, "loss": 11.77, "step": 36720 }, { "epoch": 1.9996052085247733, "grad_norm": 0.4876281226770037, "learning_rate": 1.905161026716229e-11, "loss": 11.7265, "step": 36721 }, { "epoch": 1.9996596625213563, "grad_norm": 0.5839723800419259, "learning_rate": 1.399710154448286e-11, "loss": 11.7743, "step": 36722 }, { "epoch": 1.9997141165179393, "grad_norm": 0.570160454135997, "learning_rate": 9.720209470653885e-12, "loss": 11.7272, "step": 36723 }, { "epoch": 1.9997685705145223, "grad_norm": 0.5308431239774418, "learning_rate": 6.220934101186515e-12, "loss": 11.7632, "step": 36724 }, { "epoch": 1.9998230245111053, "grad_norm": 0.5191686533476448, "learning_rate": 3.4992754471829813e-12, "loss": 11.7697, "step": 36725 }, { "epoch": 1.9998774785076883, "grad_norm": 0.525138338749835, "learning_rate": 1.5552335419499743e-12, "loss": 11.7664, "step": 36726 }, { "epoch": 1.9999319325042713, "grad_norm": 0.5327611039604181, "learning_rate": 3.8880838548749357e-13, "loss": 11.6781, "step": 36727 }, { "epoch": 1.9999863865008543, "grad_norm": 0.5408051043597044, "learning_rate": 0.0, "loss": 11.8148, "step": 36728 }, { "epoch": 1.9999863865008543, "step": 36728, "total_flos": 4582719071862784.0, "train_loss": 6.056929863945536, "train_runtime": 133830.3994, "train_samples_per_second": 35.128, "train_steps_per_second": 0.274 } ], "logging_steps": 1.0, "max_steps": 36728, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4582719071862784.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }