diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,9889 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999822547158093, - "eval_steps": 500, - "global_step": 14088, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0007098113676290526, - "grad_norm": 7.898312201157063, - "learning_rate": 9.995030526764163e-06, - "loss": 1.7908, - "step": 10 - }, - { - "epoch": 0.0014196227352581052, - "grad_norm": 5.391248052357186, - "learning_rate": 9.987931279284396e-06, - "loss": 0.7438, - "step": 20 - }, - { - "epoch": 0.002129434102887158, - "grad_norm": 5.37248606279888, - "learning_rate": 9.980832031804629e-06, - "loss": 0.6486, - "step": 30 - }, - { - "epoch": 0.0028392454705162104, - "grad_norm": 6.374151691345343, - "learning_rate": 9.973732784324862e-06, - "loss": 0.6267, - "step": 40 - }, - { - "epoch": 0.003549056838145263, - "grad_norm": 8.880583324938707, - "learning_rate": 9.966633536845095e-06, - "loss": 0.5867, - "step": 50 - }, - { - "epoch": 0.004258868205774316, - "grad_norm": 6.684640112932781, - "learning_rate": 9.959534289365328e-06, - "loss": 0.5473, - "step": 60 - }, - { - "epoch": 0.004968679573403368, - "grad_norm": 9.329177151842533, - "learning_rate": 9.952435041885561e-06, - "loss": 0.549, - "step": 70 - }, - { - "epoch": 0.005678490941032421, - "grad_norm": 4.2697386372932575, - "learning_rate": 9.945335794405794e-06, - "loss": 0.527, - "step": 80 - }, - { - "epoch": 0.006388302308661473, - "grad_norm": 4.981195314782428, - "learning_rate": 9.938236546926027e-06, - "loss": 0.5015, - "step": 90 - }, - { - "epoch": 0.007098113676290526, - "grad_norm": 3.5718526890722155, - "learning_rate": 9.931137299446259e-06, - "loss": 0.5148, - "step": 100 - }, - { - "epoch": 0.007807925043919578, - "grad_norm": 5.35860863602349, - "learning_rate": 9.924038051966492e-06, - "loss": 0.5116, - "step": 110 - }, - { - "epoch": 0.008517736411548632, - "grad_norm": 3.2550821239727434, - "learning_rate": 9.916938804486725e-06, - "loss": 0.4992, - "step": 120 - }, - { - "epoch": 0.009227547779177683, - "grad_norm": 3.4354498076448032, - "learning_rate": 9.909839557006958e-06, - "loss": 0.5088, - "step": 130 - }, - { - "epoch": 0.009937359146806737, - "grad_norm": 4.331679140736939, - "learning_rate": 9.902740309527191e-06, - "loss": 0.5139, - "step": 140 - }, - { - "epoch": 0.010647170514435788, - "grad_norm": 3.4337848966487265, - "learning_rate": 9.895641062047424e-06, - "loss": 0.5041, - "step": 150 - }, - { - "epoch": 0.011356981882064842, - "grad_norm": 8.243351710682422, - "learning_rate": 9.888541814567657e-06, - "loss": 0.5142, - "step": 160 - }, - { - "epoch": 0.012066793249693893, - "grad_norm": 4.091704188438657, - "learning_rate": 9.88144256708789e-06, - "loss": 0.4807, - "step": 170 - }, - { - "epoch": 0.012776604617322947, - "grad_norm": 21.564891334339755, - "learning_rate": 9.874343319608124e-06, - "loss": 0.5092, - "step": 180 - }, - { - "epoch": 0.013486415984951998, - "grad_norm": 3.1521060424258973, - "learning_rate": 9.867244072128355e-06, - "loss": 0.4787, - "step": 190 - }, - { - "epoch": 0.014196227352581052, - "grad_norm": 3.986481726421801, - "learning_rate": 9.860144824648588e-06, - "loss": 0.4827, - "step": 200 - }, - { - "epoch": 0.014906038720210105, - "grad_norm": 4.774263941683351, - "learning_rate": 9.853045577168821e-06, - "loss": 0.4775, - "step": 210 - }, - { - "epoch": 0.015615850087839157, - "grad_norm": 7.968327682274227, - "learning_rate": 9.845946329689053e-06, - "loss": 0.4716, - "step": 220 - }, - { - "epoch": 0.01632566145546821, - "grad_norm": 10.121205974855524, - "learning_rate": 9.838847082209286e-06, - "loss": 0.4969, - "step": 230 - }, - { - "epoch": 0.017035472823097263, - "grad_norm": 7.454679720256471, - "learning_rate": 9.831747834729519e-06, - "loss": 0.4923, - "step": 240 - }, - { - "epoch": 0.017745284190726313, - "grad_norm": 17.103084568275037, - "learning_rate": 9.824648587249752e-06, - "loss": 0.4701, - "step": 250 - }, - { - "epoch": 0.018455095558355367, - "grad_norm": 4.48293929960256, - "learning_rate": 9.817549339769985e-06, - "loss": 0.4734, - "step": 260 - }, - { - "epoch": 0.01916490692598442, - "grad_norm": 5.345114387506581, - "learning_rate": 9.810450092290218e-06, - "loss": 0.4894, - "step": 270 - }, - { - "epoch": 0.019874718293613473, - "grad_norm": 19.40561032433512, - "learning_rate": 9.803350844810451e-06, - "loss": 0.4791, - "step": 280 - }, - { - "epoch": 0.020584529661242523, - "grad_norm": 14.25299022016476, - "learning_rate": 9.796251597330684e-06, - "loss": 0.4699, - "step": 290 - }, - { - "epoch": 0.021294341028871577, - "grad_norm": 8.257072932675099, - "learning_rate": 9.789152349850918e-06, - "loss": 0.4712, - "step": 300 - }, - { - "epoch": 0.02200415239650063, - "grad_norm": 7.954026403143938, - "learning_rate": 9.782053102371149e-06, - "loss": 0.4703, - "step": 310 - }, - { - "epoch": 0.022713963764129683, - "grad_norm": 11.392767049791958, - "learning_rate": 9.774953854891382e-06, - "loss": 0.4991, - "step": 320 - }, - { - "epoch": 0.023423775131758737, - "grad_norm": 3.6589701257251392, - "learning_rate": 9.767854607411615e-06, - "loss": 0.48, - "step": 330 - }, - { - "epoch": 0.024133586499387787, - "grad_norm": 2.8317614498971095, - "learning_rate": 9.760755359931848e-06, - "loss": 0.473, - "step": 340 - }, - { - "epoch": 0.02484339786701684, - "grad_norm": 3.3672884329345467, - "learning_rate": 9.753656112452081e-06, - "loss": 0.4807, - "step": 350 - }, - { - "epoch": 0.025553209234645893, - "grad_norm": 2.918860353664653, - "learning_rate": 9.746556864972314e-06, - "loss": 0.474, - "step": 360 - }, - { - "epoch": 0.026263020602274947, - "grad_norm": 3.985430160063577, - "learning_rate": 9.739457617492548e-06, - "loss": 0.4606, - "step": 370 - }, - { - "epoch": 0.026972831969903997, - "grad_norm": 3.8499162197950216, - "learning_rate": 9.73235837001278e-06, - "loss": 0.474, - "step": 380 - }, - { - "epoch": 0.02768264333753305, - "grad_norm": 2.955339700163119, - "learning_rate": 9.725259122533012e-06, - "loss": 0.472, - "step": 390 - }, - { - "epoch": 0.028392454705162103, - "grad_norm": 5.589731350821559, - "learning_rate": 9.718159875053245e-06, - "loss": 0.4698, - "step": 400 - }, - { - "epoch": 0.029102266072791157, - "grad_norm": 3.9824871173931973, - "learning_rate": 9.711060627573478e-06, - "loss": 0.4581, - "step": 410 - }, - { - "epoch": 0.02981207744042021, - "grad_norm": 2.524559409598369, - "learning_rate": 9.70396138009371e-06, - "loss": 0.4478, - "step": 420 - }, - { - "epoch": 0.03052188880804926, - "grad_norm": 2.970731368598553, - "learning_rate": 9.696862132613943e-06, - "loss": 0.4508, - "step": 430 - }, - { - "epoch": 0.031231700175678313, - "grad_norm": 2.893829595170148, - "learning_rate": 9.689762885134176e-06, - "loss": 0.4379, - "step": 440 - }, - { - "epoch": 0.03194151154330736, - "grad_norm": 22.795684932698137, - "learning_rate": 9.682663637654409e-06, - "loss": 0.4482, - "step": 450 - }, - { - "epoch": 0.03265132291093642, - "grad_norm": 3.2812945854632236, - "learning_rate": 9.675564390174642e-06, - "loss": 0.4599, - "step": 460 - }, - { - "epoch": 0.03336113427856547, - "grad_norm": 11.615453520589618, - "learning_rate": 9.668465142694875e-06, - "loss": 0.4417, - "step": 470 - }, - { - "epoch": 0.03407094564619453, - "grad_norm": 7.726986291359829, - "learning_rate": 9.661365895215108e-06, - "loss": 0.4594, - "step": 480 - }, - { - "epoch": 0.03478075701382358, - "grad_norm": 4.365039492938302, - "learning_rate": 9.654266647735341e-06, - "loss": 0.4669, - "step": 490 - }, - { - "epoch": 0.03549056838145263, - "grad_norm": 6.54988906481092, - "learning_rate": 9.647167400255574e-06, - "loss": 0.4567, - "step": 500 - }, - { - "epoch": 0.03620037974908168, - "grad_norm": 8.933278546995766, - "learning_rate": 9.640068152775806e-06, - "loss": 0.4519, - "step": 510 - }, - { - "epoch": 0.03691019111671073, - "grad_norm": 3.7761657369108907, - "learning_rate": 9.632968905296039e-06, - "loss": 0.4501, - "step": 520 - }, - { - "epoch": 0.03762000248433979, - "grad_norm": 3.9418116527565377, - "learning_rate": 9.625869657816272e-06, - "loss": 0.4561, - "step": 530 - }, - { - "epoch": 0.03832981385196884, - "grad_norm": 3.5489889583606438, - "learning_rate": 9.618770410336505e-06, - "loss": 0.4598, - "step": 540 - }, - { - "epoch": 0.03903962521959789, - "grad_norm": 3.5164230189602548, - "learning_rate": 9.611671162856738e-06, - "loss": 0.4717, - "step": 550 - }, - { - "epoch": 0.03974943658722695, - "grad_norm": 2.1822863392109206, - "learning_rate": 9.604571915376971e-06, - "loss": 0.48, - "step": 560 - }, - { - "epoch": 0.040459247954856, - "grad_norm": 2.5677413826305413, - "learning_rate": 9.597472667897204e-06, - "loss": 0.4605, - "step": 570 - }, - { - "epoch": 0.041169059322485047, - "grad_norm": 3.011759104822335, - "learning_rate": 9.590373420417438e-06, - "loss": 0.4605, - "step": 580 - }, - { - "epoch": 0.0418788706901141, - "grad_norm": 2.56502573080614, - "learning_rate": 9.58327417293767e-06, - "loss": 0.4494, - "step": 590 - }, - { - "epoch": 0.04258868205774315, - "grad_norm": 3.2396125278123806, - "learning_rate": 9.576174925457902e-06, - "loss": 0.4542, - "step": 600 - }, - { - "epoch": 0.04329849342537221, - "grad_norm": 3.480681910714182, - "learning_rate": 9.569075677978135e-06, - "loss": 0.4548, - "step": 610 - }, - { - "epoch": 0.04400830479300126, - "grad_norm": 2.623695100630613, - "learning_rate": 9.561976430498368e-06, - "loss": 0.4594, - "step": 620 - }, - { - "epoch": 0.04471811616063031, - "grad_norm": 3.042303011325611, - "learning_rate": 9.5548771830186e-06, - "loss": 0.4557, - "step": 630 - }, - { - "epoch": 0.04542792752825937, - "grad_norm": 2.8781600946277863, - "learning_rate": 9.547777935538833e-06, - "loss": 0.484, - "step": 640 - }, - { - "epoch": 0.04613773889588842, - "grad_norm": 3.3284195205047684, - "learning_rate": 9.540678688059066e-06, - "loss": 0.4481, - "step": 650 - }, - { - "epoch": 0.04684755026351747, - "grad_norm": 3.5159109068224987, - "learning_rate": 9.533579440579299e-06, - "loss": 0.4665, - "step": 660 - }, - { - "epoch": 0.04755736163114652, - "grad_norm": 6.322136362721481, - "learning_rate": 9.526480193099532e-06, - "loss": 0.4585, - "step": 670 - }, - { - "epoch": 0.04826717299877557, - "grad_norm": 21.902103769968996, - "learning_rate": 9.519380945619765e-06, - "loss": 0.4446, - "step": 680 - }, - { - "epoch": 0.04897698436640463, - "grad_norm": 3.6046359318609356, - "learning_rate": 9.512281698139998e-06, - "loss": 0.4519, - "step": 690 - }, - { - "epoch": 0.04968679573403368, - "grad_norm": 3.039690187186011, - "learning_rate": 9.505182450660231e-06, - "loss": 0.4448, - "step": 700 - }, - { - "epoch": 0.05039660710166273, - "grad_norm": 2.608964873836775, - "learning_rate": 9.498083203180465e-06, - "loss": 0.4486, - "step": 710 - }, - { - "epoch": 0.05110641846929179, - "grad_norm": 3.368889371027321, - "learning_rate": 9.490983955700696e-06, - "loss": 0.4617, - "step": 720 - }, - { - "epoch": 0.05181622983692084, - "grad_norm": 4.094036998235093, - "learning_rate": 9.483884708220929e-06, - "loss": 0.4569, - "step": 730 - }, - { - "epoch": 0.05252604120454989, - "grad_norm": 2.979892302450325, - "learning_rate": 9.476785460741162e-06, - "loss": 0.4645, - "step": 740 - }, - { - "epoch": 0.05323585257217894, - "grad_norm": 3.676607621277054, - "learning_rate": 9.469686213261395e-06, - "loss": 0.4407, - "step": 750 - }, - { - "epoch": 0.05394566393980799, - "grad_norm": 359.9140493382262, - "learning_rate": 9.462586965781628e-06, - "loss": 0.4262, - "step": 760 - }, - { - "epoch": 0.05465547530743705, - "grad_norm": 4.447118089247447, - "learning_rate": 9.455487718301861e-06, - "loss": 0.4344, - "step": 770 - }, - { - "epoch": 0.0553652866750661, - "grad_norm": 4.569754671227615, - "learning_rate": 9.448388470822095e-06, - "loss": 0.4462, - "step": 780 - }, - { - "epoch": 0.05607509804269516, - "grad_norm": 2.3728524211263067, - "learning_rate": 9.441289223342328e-06, - "loss": 0.4386, - "step": 790 - }, - { - "epoch": 0.05678490941032421, - "grad_norm": 2.5997362569615903, - "learning_rate": 9.434189975862559e-06, - "loss": 0.4537, - "step": 800 - }, - { - "epoch": 0.057494720777953257, - "grad_norm": 4.859327134293274, - "learning_rate": 9.427090728382792e-06, - "loss": 0.4514, - "step": 810 - }, - { - "epoch": 0.05820453214558231, - "grad_norm": 2.6304161060559905, - "learning_rate": 9.419991480903025e-06, - "loss": 0.4306, - "step": 820 - }, - { - "epoch": 0.05891434351321136, - "grad_norm": 3.504607730078166, - "learning_rate": 9.412892233423258e-06, - "loss": 0.454, - "step": 830 - }, - { - "epoch": 0.05962415488084042, - "grad_norm": 3.3227222733710864, - "learning_rate": 9.40579298594349e-06, - "loss": 0.4407, - "step": 840 - }, - { - "epoch": 0.06033396624846947, - "grad_norm": 3.328718377292454, - "learning_rate": 9.398693738463723e-06, - "loss": 0.4581, - "step": 850 - }, - { - "epoch": 0.06104377761609852, - "grad_norm": 3.4977954338913864, - "learning_rate": 9.391594490983956e-06, - "loss": 0.4284, - "step": 860 - }, - { - "epoch": 0.06175358898372758, - "grad_norm": 3.228432256709841, - "learning_rate": 9.384495243504189e-06, - "loss": 0.4373, - "step": 870 - }, - { - "epoch": 0.06246340035135663, - "grad_norm": 3.1586832054050964, - "learning_rate": 9.377395996024422e-06, - "loss": 0.4348, - "step": 880 - }, - { - "epoch": 0.06317321171898568, - "grad_norm": 13.155465477764636, - "learning_rate": 9.370296748544655e-06, - "loss": 0.4217, - "step": 890 - }, - { - "epoch": 0.06388302308661473, - "grad_norm": 15.543581430412525, - "learning_rate": 9.363197501064888e-06, - "loss": 0.4593, - "step": 900 - }, - { - "epoch": 0.06459283445424378, - "grad_norm": 8.921864061523843, - "learning_rate": 9.356098253585121e-06, - "loss": 0.4608, - "step": 910 - }, - { - "epoch": 0.06530264582187284, - "grad_norm": 5.3983003526617335, - "learning_rate": 9.348999006105353e-06, - "loss": 0.4514, - "step": 920 - }, - { - "epoch": 0.06601245718950188, - "grad_norm": 7.595139513838182, - "learning_rate": 9.341899758625586e-06, - "loss": 0.4273, - "step": 930 - }, - { - "epoch": 0.06672226855713094, - "grad_norm": 3.2331459925046815, - "learning_rate": 9.334800511145819e-06, - "loss": 0.422, - "step": 940 - }, - { - "epoch": 0.06743207992476, - "grad_norm": 3.8699272404865686, - "learning_rate": 9.327701263666052e-06, - "loss": 0.4477, - "step": 950 - }, - { - "epoch": 0.06814189129238905, - "grad_norm": 2.68446192265652, - "learning_rate": 9.320602016186285e-06, - "loss": 0.4449, - "step": 960 - }, - { - "epoch": 0.0688517026600181, - "grad_norm": 2.637260503772899, - "learning_rate": 9.313502768706518e-06, - "loss": 0.4532, - "step": 970 - }, - { - "epoch": 0.06956151402764715, - "grad_norm": 3.9618993923437085, - "learning_rate": 9.306403521226751e-06, - "loss": 0.4534, - "step": 980 - }, - { - "epoch": 0.07027132539527621, - "grad_norm": 3.429568261104227, - "learning_rate": 9.299304273746985e-06, - "loss": 0.452, - "step": 990 - }, - { - "epoch": 0.07098113676290525, - "grad_norm": 3.663179434126313, - "learning_rate": 9.292205026267218e-06, - "loss": 0.439, - "step": 1000 - }, - { - "epoch": 0.07169094813053431, - "grad_norm": 4.408975026773321, - "learning_rate": 9.285105778787449e-06, - "loss": 0.4184, - "step": 1010 - }, - { - "epoch": 0.07240075949816337, - "grad_norm": 2.415108601943808, - "learning_rate": 9.278006531307682e-06, - "loss": 0.4342, - "step": 1020 - }, - { - "epoch": 0.07311057086579241, - "grad_norm": 6.698239896408658, - "learning_rate": 9.270907283827915e-06, - "loss": 0.4535, - "step": 1030 - }, - { - "epoch": 0.07382038223342147, - "grad_norm": 11.189940656850219, - "learning_rate": 9.263808036348147e-06, - "loss": 0.4192, - "step": 1040 - }, - { - "epoch": 0.07453019360105052, - "grad_norm": 3.85625217339617, - "learning_rate": 9.25670878886838e-06, - "loss": 0.4278, - "step": 1050 - }, - { - "epoch": 0.07524000496867958, - "grad_norm": 32.21212360326382, - "learning_rate": 9.249609541388613e-06, - "loss": 0.4509, - "step": 1060 - }, - { - "epoch": 0.07594981633630862, - "grad_norm": 5.919396215012425, - "learning_rate": 9.242510293908846e-06, - "loss": 0.4525, - "step": 1070 - }, - { - "epoch": 0.07665962770393768, - "grad_norm": 5.904196801283348, - "learning_rate": 9.235411046429079e-06, - "loss": 0.4422, - "step": 1080 - }, - { - "epoch": 0.07736943907156674, - "grad_norm": 4.486326467883555, - "learning_rate": 9.228311798949312e-06, - "loss": 0.4685, - "step": 1090 - }, - { - "epoch": 0.07807925043919578, - "grad_norm": 11.745437972621287, - "learning_rate": 9.221212551469545e-06, - "loss": 0.4646, - "step": 1100 - }, - { - "epoch": 0.07878906180682484, - "grad_norm": 6.5181010077573145, - "learning_rate": 9.214113303989778e-06, - "loss": 0.443, - "step": 1110 - }, - { - "epoch": 0.0794988731744539, - "grad_norm": 11.270983163134655, - "learning_rate": 9.207014056510012e-06, - "loss": 0.4605, - "step": 1120 - }, - { - "epoch": 0.08020868454208294, - "grad_norm": 3.7069012881976975, - "learning_rate": 9.199914809030243e-06, - "loss": 0.4459, - "step": 1130 - }, - { - "epoch": 0.080918495909712, - "grad_norm": 8.667969696855055, - "learning_rate": 9.192815561550476e-06, - "loss": 0.4556, - "step": 1140 - }, - { - "epoch": 0.08162830727734105, - "grad_norm": 7.559635091166787, - "learning_rate": 9.185716314070709e-06, - "loss": 0.4357, - "step": 1150 - }, - { - "epoch": 0.08233811864497009, - "grad_norm": 17.430750080762536, - "learning_rate": 9.178617066590942e-06, - "loss": 0.4301, - "step": 1160 - }, - { - "epoch": 0.08304793001259915, - "grad_norm": 4.351276343100192, - "learning_rate": 9.171517819111175e-06, - "loss": 0.4184, - "step": 1170 - }, - { - "epoch": 0.0837577413802282, - "grad_norm": 6.471581804191342, - "learning_rate": 9.164418571631408e-06, - "loss": 0.4516, - "step": 1180 - }, - { - "epoch": 0.08446755274785726, - "grad_norm": 4.3294841586504855, - "learning_rate": 9.157319324151642e-06, - "loss": 0.4211, - "step": 1190 - }, - { - "epoch": 0.0851773641154863, - "grad_norm": 4.385208474639979, - "learning_rate": 9.150220076671875e-06, - "loss": 0.4203, - "step": 1200 - }, - { - "epoch": 0.08588717548311536, - "grad_norm": 5.8972560031050065, - "learning_rate": 9.143120829192106e-06, - "loss": 0.4284, - "step": 1210 - }, - { - "epoch": 0.08659698685074442, - "grad_norm": 4.604861487503107, - "learning_rate": 9.136021581712339e-06, - "loss": 0.4277, - "step": 1220 - }, - { - "epoch": 0.08730679821837346, - "grad_norm": 4.321101106082931, - "learning_rate": 9.128922334232572e-06, - "loss": 0.4216, - "step": 1230 - }, - { - "epoch": 0.08801660958600252, - "grad_norm": 11.04681514927992, - "learning_rate": 9.121823086752805e-06, - "loss": 0.4181, - "step": 1240 - }, - { - "epoch": 0.08872642095363158, - "grad_norm": 4.31849841935359, - "learning_rate": 9.114723839273037e-06, - "loss": 0.4264, - "step": 1250 - }, - { - "epoch": 0.08943623232126062, - "grad_norm": 4.674845237449041, - "learning_rate": 9.10762459179327e-06, - "loss": 0.4281, - "step": 1260 - }, - { - "epoch": 0.09014604368888968, - "grad_norm": 3.447760098274006, - "learning_rate": 9.100525344313503e-06, - "loss": 0.4304, - "step": 1270 - }, - { - "epoch": 0.09085585505651873, - "grad_norm": 7.189274212443334, - "learning_rate": 9.093426096833736e-06, - "loss": 0.4252, - "step": 1280 - }, - { - "epoch": 0.09156566642414778, - "grad_norm": 19.69024332171456, - "learning_rate": 9.08632684935397e-06, - "loss": 0.4336, - "step": 1290 - }, - { - "epoch": 0.09227547779177683, - "grad_norm": 55.22992334000048, - "learning_rate": 9.079227601874202e-06, - "loss": 0.4256, - "step": 1300 - }, - { - "epoch": 0.09298528915940589, - "grad_norm": 5.066816349007046, - "learning_rate": 9.072128354394435e-06, - "loss": 0.407, - "step": 1310 - }, - { - "epoch": 0.09369510052703495, - "grad_norm": 6.37711035743208, - "learning_rate": 9.065029106914668e-06, - "loss": 0.4257, - "step": 1320 - }, - { - "epoch": 0.09440491189466399, - "grad_norm": 4.696335985596692, - "learning_rate": 9.057929859434902e-06, - "loss": 0.4188, - "step": 1330 - }, - { - "epoch": 0.09511472326229305, - "grad_norm": 15.82313293688476, - "learning_rate": 9.050830611955133e-06, - "loss": 0.433, - "step": 1340 - }, - { - "epoch": 0.0958245346299221, - "grad_norm": 5.692904308794704, - "learning_rate": 9.043731364475366e-06, - "loss": 0.4269, - "step": 1350 - }, - { - "epoch": 0.09653434599755115, - "grad_norm": 15.303387309564082, - "learning_rate": 9.0366321169956e-06, - "loss": 0.4174, - "step": 1360 - }, - { - "epoch": 0.0972441573651802, - "grad_norm": 3.9801928029461666, - "learning_rate": 9.029532869515832e-06, - "loss": 0.4132, - "step": 1370 - }, - { - "epoch": 0.09795396873280926, - "grad_norm": 3.523690216407914, - "learning_rate": 9.022433622036065e-06, - "loss": 0.4281, - "step": 1380 - }, - { - "epoch": 0.0986637801004383, - "grad_norm": 7.099888052775042, - "learning_rate": 9.015334374556298e-06, - "loss": 0.4431, - "step": 1390 - }, - { - "epoch": 0.09937359146806736, - "grad_norm": 6.538985360116972, - "learning_rate": 9.008235127076532e-06, - "loss": 0.4172, - "step": 1400 - }, - { - "epoch": 0.10008340283569642, - "grad_norm": 7.959800060910741, - "learning_rate": 9.001135879596763e-06, - "loss": 0.4243, - "step": 1410 - }, - { - "epoch": 0.10079321420332546, - "grad_norm": 8.790445771142394, - "learning_rate": 8.994036632116996e-06, - "loss": 0.4254, - "step": 1420 - }, - { - "epoch": 0.10150302557095452, - "grad_norm": 4.285966498899181, - "learning_rate": 8.98693738463723e-06, - "loss": 0.4122, - "step": 1430 - }, - { - "epoch": 0.10221283693858357, - "grad_norm": 6.286806035291326, - "learning_rate": 8.979838137157462e-06, - "loss": 0.433, - "step": 1440 - }, - { - "epoch": 0.10292264830621263, - "grad_norm": 7.3066834855049345, - "learning_rate": 8.972738889677695e-06, - "loss": 0.4258, - "step": 1450 - }, - { - "epoch": 0.10363245967384167, - "grad_norm": 6.5695520214785565, - "learning_rate": 8.965639642197927e-06, - "loss": 0.4164, - "step": 1460 - }, - { - "epoch": 0.10434227104147073, - "grad_norm": 20.93641513291179, - "learning_rate": 8.95854039471816e-06, - "loss": 0.4095, - "step": 1470 - }, - { - "epoch": 0.10505208240909979, - "grad_norm": 5.657042957398901, - "learning_rate": 8.951441147238393e-06, - "loss": 0.4168, - "step": 1480 - }, - { - "epoch": 0.10576189377672883, - "grad_norm": 6.076726326140851, - "learning_rate": 8.944341899758626e-06, - "loss": 0.4112, - "step": 1490 - }, - { - "epoch": 0.10647170514435789, - "grad_norm": 5.092565408624009, - "learning_rate": 8.93724265227886e-06, - "loss": 0.4269, - "step": 1500 - }, - { - "epoch": 0.10718151651198694, - "grad_norm": 2.894012289515038, - "learning_rate": 8.930143404799092e-06, - "loss": 0.4239, - "step": 1510 - }, - { - "epoch": 0.10789132787961599, - "grad_norm": 3.7173915295575637, - "learning_rate": 8.923044157319325e-06, - "loss": 0.4288, - "step": 1520 - }, - { - "epoch": 0.10860113924724504, - "grad_norm": 3.025402596869208, - "learning_rate": 8.915944909839559e-06, - "loss": 0.4421, - "step": 1530 - }, - { - "epoch": 0.1093109506148741, - "grad_norm": 8.212502187483185, - "learning_rate": 8.90884566235979e-06, - "loss": 0.4241, - "step": 1540 - }, - { - "epoch": 0.11002076198250314, - "grad_norm": 5.773771344339805, - "learning_rate": 8.901746414880023e-06, - "loss": 0.4355, - "step": 1550 - }, - { - "epoch": 0.1107305733501322, - "grad_norm": 4.158426885786249, - "learning_rate": 8.894647167400256e-06, - "loss": 0.436, - "step": 1560 - }, - { - "epoch": 0.11144038471776126, - "grad_norm": 6.56740526603354, - "learning_rate": 8.88754791992049e-06, - "loss": 0.4397, - "step": 1570 - }, - { - "epoch": 0.11215019608539031, - "grad_norm": 8.263663970839248, - "learning_rate": 8.880448672440722e-06, - "loss": 0.4201, - "step": 1580 - }, - { - "epoch": 0.11286000745301936, - "grad_norm": 2.424368072981463, - "learning_rate": 8.873349424960955e-06, - "loss": 0.4235, - "step": 1590 - }, - { - "epoch": 0.11356981882064841, - "grad_norm": 6.489454078474153, - "learning_rate": 8.866250177481189e-06, - "loss": 0.4243, - "step": 1600 - }, - { - "epoch": 0.11427963018827747, - "grad_norm": 3.541006640864803, - "learning_rate": 8.859150930001422e-06, - "loss": 0.4313, - "step": 1610 - }, - { - "epoch": 0.11498944155590651, - "grad_norm": 12.323605643567065, - "learning_rate": 8.852051682521653e-06, - "loss": 0.4253, - "step": 1620 - }, - { - "epoch": 0.11569925292353557, - "grad_norm": 4.600225981753095, - "learning_rate": 8.844952435041886e-06, - "loss": 0.42, - "step": 1630 - }, - { - "epoch": 0.11640906429116463, - "grad_norm": 8.589796661850784, - "learning_rate": 8.83785318756212e-06, - "loss": 0.4219, - "step": 1640 - }, - { - "epoch": 0.11711887565879367, - "grad_norm": 10.182911442610934, - "learning_rate": 8.830753940082352e-06, - "loss": 0.4285, - "step": 1650 - }, - { - "epoch": 0.11782868702642273, - "grad_norm": 5.186284643440543, - "learning_rate": 8.823654692602584e-06, - "loss": 0.4139, - "step": 1660 - }, - { - "epoch": 0.11853849839405178, - "grad_norm": 5.23154203196852, - "learning_rate": 8.816555445122817e-06, - "loss": 0.4251, - "step": 1670 - }, - { - "epoch": 0.11924830976168084, - "grad_norm": 6.9839536559537505, - "learning_rate": 8.80945619764305e-06, - "loss": 0.4233, - "step": 1680 - }, - { - "epoch": 0.11995812112930988, - "grad_norm": 6.376179671333375, - "learning_rate": 8.802356950163283e-06, - "loss": 0.4089, - "step": 1690 - }, - { - "epoch": 0.12066793249693894, - "grad_norm": 3.824113092644885, - "learning_rate": 8.795257702683516e-06, - "loss": 0.4347, - "step": 1700 - }, - { - "epoch": 0.121377743864568, - "grad_norm": 11.282936555631686, - "learning_rate": 8.78815845520375e-06, - "loss": 0.423, - "step": 1710 - }, - { - "epoch": 0.12208755523219704, - "grad_norm": 4.218268240264897, - "learning_rate": 8.781059207723982e-06, - "loss": 0.4188, - "step": 1720 - }, - { - "epoch": 0.1227973665998261, - "grad_norm": 3.943582749857493, - "learning_rate": 8.773959960244215e-06, - "loss": 0.4276, - "step": 1730 - }, - { - "epoch": 0.12350717796745515, - "grad_norm": 9.679933576473074, - "learning_rate": 8.766860712764449e-06, - "loss": 0.42, - "step": 1740 - }, - { - "epoch": 0.1242169893350842, - "grad_norm": 15.414309701859608, - "learning_rate": 8.75976146528468e-06, - "loss": 0.4316, - "step": 1750 - }, - { - "epoch": 0.12492680070271325, - "grad_norm": 9.429737278511919, - "learning_rate": 8.752662217804913e-06, - "loss": 0.422, - "step": 1760 - }, - { - "epoch": 0.1256366120703423, - "grad_norm": 23.10494354556988, - "learning_rate": 8.745562970325146e-06, - "loss": 0.4276, - "step": 1770 - }, - { - "epoch": 0.12634642343797137, - "grad_norm": 13.541923724604345, - "learning_rate": 8.73846372284538e-06, - "loss": 0.4271, - "step": 1780 - }, - { - "epoch": 0.1270562348056004, - "grad_norm": 2.846694152973873, - "learning_rate": 8.731364475365612e-06, - "loss": 0.4151, - "step": 1790 - }, - { - "epoch": 0.12776604617322945, - "grad_norm": 6.934597145753292, - "learning_rate": 8.724265227885845e-06, - "loss": 0.4247, - "step": 1800 - }, - { - "epoch": 0.12847585754085852, - "grad_norm": 3.435112347451886, - "learning_rate": 8.717165980406079e-06, - "loss": 0.4225, - "step": 1810 - }, - { - "epoch": 0.12918566890848757, - "grad_norm": 3.4829699382867823, - "learning_rate": 8.71006673292631e-06, - "loss": 0.4458, - "step": 1820 - }, - { - "epoch": 0.1298954802761166, - "grad_norm": 5.077072978235785, - "learning_rate": 8.702967485446543e-06, - "loss": 0.4283, - "step": 1830 - }, - { - "epoch": 0.13060529164374568, - "grad_norm": 5.917300462616358, - "learning_rate": 8.695868237966776e-06, - "loss": 0.4119, - "step": 1840 - }, - { - "epoch": 0.13131510301137472, - "grad_norm": 10.693397543481625, - "learning_rate": 8.68876899048701e-06, - "loss": 0.4305, - "step": 1850 - }, - { - "epoch": 0.13202491437900377, - "grad_norm": 3.6456780546239456, - "learning_rate": 8.681669743007242e-06, - "loss": 0.4391, - "step": 1860 - }, - { - "epoch": 0.13273472574663284, - "grad_norm": 14.68038430401678, - "learning_rate": 8.674570495527474e-06, - "loss": 0.4111, - "step": 1870 - }, - { - "epoch": 0.13344453711426188, - "grad_norm": 5.101838800313352, - "learning_rate": 8.667471248047707e-06, - "loss": 0.4323, - "step": 1880 - }, - { - "epoch": 0.13415434848189095, - "grad_norm": 4.497686869632987, - "learning_rate": 8.66037200056794e-06, - "loss": 0.4154, - "step": 1890 - }, - { - "epoch": 0.13486415984952, - "grad_norm": 9.511227824879294, - "learning_rate": 8.653272753088173e-06, - "loss": 0.4295, - "step": 1900 - }, - { - "epoch": 0.13557397121714904, - "grad_norm": 5.344003791146658, - "learning_rate": 8.646173505608406e-06, - "loss": 0.4254, - "step": 1910 - }, - { - "epoch": 0.1362837825847781, - "grad_norm": 8.10132953922794, - "learning_rate": 8.63907425812864e-06, - "loss": 0.4219, - "step": 1920 - }, - { - "epoch": 0.13699359395240715, - "grad_norm": 8.840386508572838, - "learning_rate": 8.631975010648872e-06, - "loss": 0.416, - "step": 1930 - }, - { - "epoch": 0.1377034053200362, - "grad_norm": 5.639143297883941, - "learning_rate": 8.624875763169106e-06, - "loss": 0.4246, - "step": 1940 - }, - { - "epoch": 0.13841321668766526, - "grad_norm": 5.375177742256173, - "learning_rate": 8.617776515689339e-06, - "loss": 0.4263, - "step": 1950 - }, - { - "epoch": 0.1391230280552943, - "grad_norm": 13.872628674699765, - "learning_rate": 8.61067726820957e-06, - "loss": 0.4368, - "step": 1960 - }, - { - "epoch": 0.13983283942292335, - "grad_norm": 6.612051924514802, - "learning_rate": 8.603578020729803e-06, - "loss": 0.4235, - "step": 1970 - }, - { - "epoch": 0.14054265079055242, - "grad_norm": 7.420592038738273, - "learning_rate": 8.596478773250036e-06, - "loss": 0.4315, - "step": 1980 - }, - { - "epoch": 0.14125246215818146, - "grad_norm": 3.883491154973528, - "learning_rate": 8.58937952577027e-06, - "loss": 0.4394, - "step": 1990 - }, - { - "epoch": 0.1419622735258105, - "grad_norm": 4.031594828995353, - "learning_rate": 8.582280278290502e-06, - "loss": 0.4274, - "step": 2000 - }, - { - "epoch": 0.14267208489343958, - "grad_norm": 6.272786134188022, - "learning_rate": 8.575181030810736e-06, - "loss": 0.42, - "step": 2010 - }, - { - "epoch": 0.14338189626106862, - "grad_norm": 8.45570312290703, - "learning_rate": 8.568081783330967e-06, - "loss": 0.4336, - "step": 2020 - }, - { - "epoch": 0.14409170762869766, - "grad_norm": 3.8497660341027693, - "learning_rate": 8.5609825358512e-06, - "loss": 0.4259, - "step": 2030 - }, - { - "epoch": 0.14480151899632673, - "grad_norm": 10.12069309920438, - "learning_rate": 8.553883288371433e-06, - "loss": 0.4208, - "step": 2040 - }, - { - "epoch": 0.14551133036395578, - "grad_norm": 5.128975578462212, - "learning_rate": 8.546784040891666e-06, - "loss": 0.4215, - "step": 2050 - }, - { - "epoch": 0.14622114173158482, - "grad_norm": 4.45602583843403, - "learning_rate": 8.5396847934119e-06, - "loss": 0.4135, - "step": 2060 - }, - { - "epoch": 0.1469309530992139, - "grad_norm": 5.172069700283945, - "learning_rate": 8.53258554593213e-06, - "loss": 0.4122, - "step": 2070 - }, - { - "epoch": 0.14764076446684293, - "grad_norm": 7.147216717746435, - "learning_rate": 8.525486298452364e-06, - "loss": 0.4423, - "step": 2080 - }, - { - "epoch": 0.14835057583447198, - "grad_norm": 14.946527022046613, - "learning_rate": 8.518387050972597e-06, - "loss": 0.4094, - "step": 2090 - }, - { - "epoch": 0.14906038720210105, - "grad_norm": 8.460267496546166, - "learning_rate": 8.51128780349283e-06, - "loss": 0.4186, - "step": 2100 - }, - { - "epoch": 0.1497701985697301, - "grad_norm": 8.93023218882671, - "learning_rate": 8.504188556013063e-06, - "loss": 0.4062, - "step": 2110 - }, - { - "epoch": 0.15048000993735916, - "grad_norm": 3.213343020811049, - "learning_rate": 8.497089308533296e-06, - "loss": 0.3994, - "step": 2120 - }, - { - "epoch": 0.1511898213049882, - "grad_norm": 8.718801113577726, - "learning_rate": 8.48999006105353e-06, - "loss": 0.4232, - "step": 2130 - }, - { - "epoch": 0.15189963267261725, - "grad_norm": 2.832643819770658, - "learning_rate": 8.482890813573762e-06, - "loss": 0.4261, - "step": 2140 - }, - { - "epoch": 0.15260944404024632, - "grad_norm": 3.2673324405839255, - "learning_rate": 8.475791566093996e-06, - "loss": 0.42, - "step": 2150 - }, - { - "epoch": 0.15331925540787536, - "grad_norm": 3.2621489770969214, - "learning_rate": 8.468692318614227e-06, - "loss": 0.4282, - "step": 2160 - }, - { - "epoch": 0.1540290667755044, - "grad_norm": 17.34420036770468, - "learning_rate": 8.46159307113446e-06, - "loss": 0.4198, - "step": 2170 - }, - { - "epoch": 0.15473887814313347, - "grad_norm": 3.6148665582762094, - "learning_rate": 8.454493823654693e-06, - "loss": 0.4157, - "step": 2180 - }, - { - "epoch": 0.15544868951076252, - "grad_norm": 2.775836768166624, - "learning_rate": 8.447394576174926e-06, - "loss": 0.417, - "step": 2190 - }, - { - "epoch": 0.15615850087839156, - "grad_norm": 5.052761832862739, - "learning_rate": 8.44029532869516e-06, - "loss": 0.4035, - "step": 2200 - }, - { - "epoch": 0.15686831224602063, - "grad_norm": 4.778779661514333, - "learning_rate": 8.433196081215393e-06, - "loss": 0.4445, - "step": 2210 - }, - { - "epoch": 0.15757812361364967, - "grad_norm": 4.6274782338902325, - "learning_rate": 8.426096833735626e-06, - "loss": 0.4147, - "step": 2220 - }, - { - "epoch": 0.15828793498127872, - "grad_norm": 4.310225523508245, - "learning_rate": 8.418997586255857e-06, - "loss": 0.4167, - "step": 2230 - }, - { - "epoch": 0.1589977463489078, - "grad_norm": 4.802519845626961, - "learning_rate": 8.41189833877609e-06, - "loss": 0.4052, - "step": 2240 - }, - { - "epoch": 0.15970755771653683, - "grad_norm": 3.949892413625005, - "learning_rate": 8.404799091296323e-06, - "loss": 0.4263, - "step": 2250 - }, - { - "epoch": 0.16041736908416587, - "grad_norm": 5.685661053410237, - "learning_rate": 8.397699843816556e-06, - "loss": 0.4148, - "step": 2260 - }, - { - "epoch": 0.16112718045179494, - "grad_norm": 4.337480471983148, - "learning_rate": 8.39060059633679e-06, - "loss": 0.4101, - "step": 2270 - }, - { - "epoch": 0.161836991819424, - "grad_norm": 4.809277499740254, - "learning_rate": 8.38350134885702e-06, - "loss": 0.4071, - "step": 2280 - }, - { - "epoch": 0.16254680318705303, - "grad_norm": 7.364507480899371, - "learning_rate": 8.376402101377254e-06, - "loss": 0.4021, - "step": 2290 - }, - { - "epoch": 0.1632566145546821, - "grad_norm": 5.408145626972555, - "learning_rate": 8.369302853897487e-06, - "loss": 0.4154, - "step": 2300 - }, - { - "epoch": 0.16396642592231114, - "grad_norm": 2.9449217220121784, - "learning_rate": 8.36220360641772e-06, - "loss": 0.4296, - "step": 2310 - }, - { - "epoch": 0.16467623728994019, - "grad_norm": 3.843647555602573, - "learning_rate": 8.355104358937953e-06, - "loss": 0.4197, - "step": 2320 - }, - { - "epoch": 0.16538604865756926, - "grad_norm": 5.843629733774891, - "learning_rate": 8.348005111458186e-06, - "loss": 0.4052, - "step": 2330 - }, - { - "epoch": 0.1660958600251983, - "grad_norm": 4.182196885965926, - "learning_rate": 8.34090586397842e-06, - "loss": 0.4304, - "step": 2340 - }, - { - "epoch": 0.16680567139282734, - "grad_norm": 12.343897765958163, - "learning_rate": 8.333806616498653e-06, - "loss": 0.4057, - "step": 2350 - }, - { - "epoch": 0.1675154827604564, - "grad_norm": 4.52770872028285, - "learning_rate": 8.326707369018886e-06, - "loss": 0.4234, - "step": 2360 - }, - { - "epoch": 0.16822529412808546, - "grad_norm": 5.473115632671873, - "learning_rate": 8.319608121539117e-06, - "loss": 0.4127, - "step": 2370 - }, - { - "epoch": 0.16893510549571453, - "grad_norm": 5.243162829393595, - "learning_rate": 8.31250887405935e-06, - "loss": 0.4148, - "step": 2380 - }, - { - "epoch": 0.16964491686334357, - "grad_norm": 9.638919529909746, - "learning_rate": 8.305409626579583e-06, - "loss": 0.4244, - "step": 2390 - }, - { - "epoch": 0.1703547282309726, - "grad_norm": 5.824204497516263, - "learning_rate": 8.298310379099816e-06, - "loss": 0.3991, - "step": 2400 - }, - { - "epoch": 0.17106453959860168, - "grad_norm": 8.92013550945478, - "learning_rate": 8.29121113162005e-06, - "loss": 0.4107, - "step": 2410 - }, - { - "epoch": 0.17177435096623073, - "grad_norm": 4.310339052965044, - "learning_rate": 8.284111884140283e-06, - "loss": 0.4198, - "step": 2420 - }, - { - "epoch": 0.17248416233385977, - "grad_norm": 3.674140188675587, - "learning_rate": 8.277012636660514e-06, - "loss": 0.4066, - "step": 2430 - }, - { - "epoch": 0.17319397370148884, - "grad_norm": 3.2816580938205986, - "learning_rate": 8.269913389180747e-06, - "loss": 0.3948, - "step": 2440 - }, - { - "epoch": 0.17390378506911788, - "grad_norm": 3.119520711268051, - "learning_rate": 8.26281414170098e-06, - "loss": 0.4236, - "step": 2450 - }, - { - "epoch": 0.17461359643674693, - "grad_norm": 3.9529990200341216, - "learning_rate": 8.255714894221213e-06, - "loss": 0.4028, - "step": 2460 - }, - { - "epoch": 0.175323407804376, - "grad_norm": 6.5624619571577, - "learning_rate": 8.248615646741446e-06, - "loss": 0.4207, - "step": 2470 - }, - { - "epoch": 0.17603321917200504, - "grad_norm": 6.563862400109423, - "learning_rate": 8.24151639926168e-06, - "loss": 0.4234, - "step": 2480 - }, - { - "epoch": 0.17674303053963408, - "grad_norm": 4.124646423199101, - "learning_rate": 8.234417151781911e-06, - "loss": 0.421, - "step": 2490 - }, - { - "epoch": 0.17745284190726315, - "grad_norm": 8.460797246337737, - "learning_rate": 8.227317904302144e-06, - "loss": 0.4169, - "step": 2500 - }, - { - "epoch": 0.1781626532748922, - "grad_norm": 4.636207121737827, - "learning_rate": 8.220218656822377e-06, - "loss": 0.4154, - "step": 2510 - }, - { - "epoch": 0.17887246464252124, - "grad_norm": 15.193279765427832, - "learning_rate": 8.21311940934261e-06, - "loss": 0.4, - "step": 2520 - }, - { - "epoch": 0.1795822760101503, - "grad_norm": 8.394690912531237, - "learning_rate": 8.206020161862843e-06, - "loss": 0.3994, - "step": 2530 - }, - { - "epoch": 0.18029208737777935, - "grad_norm": 11.829872869588135, - "learning_rate": 8.198920914383076e-06, - "loss": 0.4045, - "step": 2540 - }, - { - "epoch": 0.1810018987454084, - "grad_norm": 10.598164946336963, - "learning_rate": 8.19182166690331e-06, - "loss": 0.4167, - "step": 2550 - }, - { - "epoch": 0.18171171011303747, - "grad_norm": 8.644167493937724, - "learning_rate": 8.184722419423543e-06, - "loss": 0.4193, - "step": 2560 - }, - { - "epoch": 0.1824215214806665, - "grad_norm": 5.532113862418252, - "learning_rate": 8.177623171943776e-06, - "loss": 0.4134, - "step": 2570 - }, - { - "epoch": 0.18313133284829555, - "grad_norm": 8.962347784457894, - "learning_rate": 8.170523924464007e-06, - "loss": 0.4231, - "step": 2580 - }, - { - "epoch": 0.18384114421592462, - "grad_norm": 4.789480578365759, - "learning_rate": 8.16342467698424e-06, - "loss": 0.4056, - "step": 2590 - }, - { - "epoch": 0.18455095558355367, - "grad_norm": 7.463666547462272, - "learning_rate": 8.156325429504473e-06, - "loss": 0.4082, - "step": 2600 - }, - { - "epoch": 0.1852607669511827, - "grad_norm": 3.543632295285487, - "learning_rate": 8.149226182024706e-06, - "loss": 0.3957, - "step": 2610 - }, - { - "epoch": 0.18597057831881178, - "grad_norm": 10.128862482609126, - "learning_rate": 8.14212693454494e-06, - "loss": 0.4104, - "step": 2620 - }, - { - "epoch": 0.18668038968644082, - "grad_norm": 2.279815139257822, - "learning_rate": 8.135027687065171e-06, - "loss": 0.4023, - "step": 2630 - }, - { - "epoch": 0.1873902010540699, - "grad_norm": 5.651432220535337, - "learning_rate": 8.127928439585404e-06, - "loss": 0.4174, - "step": 2640 - }, - { - "epoch": 0.18810001242169894, - "grad_norm": 2.764126752423827, - "learning_rate": 8.120829192105637e-06, - "loss": 0.4316, - "step": 2650 - }, - { - "epoch": 0.18880982378932798, - "grad_norm": 2.2008942019632443, - "learning_rate": 8.11372994462587e-06, - "loss": 0.3998, - "step": 2660 - }, - { - "epoch": 0.18951963515695705, - "grad_norm": 2.6464894767494194, - "learning_rate": 8.106630697146103e-06, - "loss": 0.4152, - "step": 2670 - }, - { - "epoch": 0.1902294465245861, - "grad_norm": 2.9891233500309697, - "learning_rate": 8.099531449666336e-06, - "loss": 0.4065, - "step": 2680 - }, - { - "epoch": 0.19093925789221514, - "grad_norm": 3.2947192783933303, - "learning_rate": 8.092432202186568e-06, - "loss": 0.4096, - "step": 2690 - }, - { - "epoch": 0.1916490692598442, - "grad_norm": 2.6266501022263093, - "learning_rate": 8.085332954706801e-06, - "loss": 0.4079, - "step": 2700 - }, - { - "epoch": 0.19235888062747325, - "grad_norm": 2.0600161188196258, - "learning_rate": 8.078233707227034e-06, - "loss": 0.4245, - "step": 2710 - }, - { - "epoch": 0.1930686919951023, - "grad_norm": 3.4259686474049587, - "learning_rate": 8.071134459747267e-06, - "loss": 0.4168, - "step": 2720 - }, - { - "epoch": 0.19377850336273136, - "grad_norm": 4.184352662206747, - "learning_rate": 8.0640352122675e-06, - "loss": 0.4265, - "step": 2730 - }, - { - "epoch": 0.1944883147303604, - "grad_norm": 3.7320888080359174, - "learning_rate": 8.056935964787733e-06, - "loss": 0.4172, - "step": 2740 - }, - { - "epoch": 0.19519812609798945, - "grad_norm": 3.750448672171502, - "learning_rate": 8.049836717307966e-06, - "loss": 0.4327, - "step": 2750 - }, - { - "epoch": 0.19590793746561852, - "grad_norm": 3.0158382271152564, - "learning_rate": 8.0427374698282e-06, - "loss": 0.4284, - "step": 2760 - }, - { - "epoch": 0.19661774883324756, - "grad_norm": 2.438159262347708, - "learning_rate": 8.035638222348433e-06, - "loss": 0.4117, - "step": 2770 - }, - { - "epoch": 0.1973275602008766, - "grad_norm": 4.795802800628808, - "learning_rate": 8.028538974868664e-06, - "loss": 0.4207, - "step": 2780 - }, - { - "epoch": 0.19803737156850568, - "grad_norm": 2.5291141301554405, - "learning_rate": 8.021439727388897e-06, - "loss": 0.4146, - "step": 2790 - }, - { - "epoch": 0.19874718293613472, - "grad_norm": 2.4740979454164727, - "learning_rate": 8.01434047990913e-06, - "loss": 0.3999, - "step": 2800 - }, - { - "epoch": 0.19945699430376376, - "grad_norm": 3.4467777684569927, - "learning_rate": 8.007241232429363e-06, - "loss": 0.4151, - "step": 2810 - }, - { - "epoch": 0.20016680567139283, - "grad_norm": 2.741445348023422, - "learning_rate": 8.000141984949596e-06, - "loss": 0.4165, - "step": 2820 - }, - { - "epoch": 0.20087661703902188, - "grad_norm": 2.977547725757033, - "learning_rate": 7.99304273746983e-06, - "loss": 0.4137, - "step": 2830 - }, - { - "epoch": 0.20158642840665092, - "grad_norm": 3.493123708582949, - "learning_rate": 7.985943489990061e-06, - "loss": 0.4095, - "step": 2840 - }, - { - "epoch": 0.20229623977428, - "grad_norm": 9.43644672917822, - "learning_rate": 7.978844242510294e-06, - "loss": 0.4066, - "step": 2850 - }, - { - "epoch": 0.20300605114190903, - "grad_norm": 4.050870492633986, - "learning_rate": 7.971744995030527e-06, - "loss": 0.4079, - "step": 2860 - }, - { - "epoch": 0.2037158625095381, - "grad_norm": 7.830134940271083, - "learning_rate": 7.96464574755076e-06, - "loss": 0.3896, - "step": 2870 - }, - { - "epoch": 0.20442567387716715, - "grad_norm": 7.557535176254197, - "learning_rate": 7.957546500070993e-06, - "loss": 0.4096, - "step": 2880 - }, - { - "epoch": 0.2051354852447962, - "grad_norm": 4.715465621080843, - "learning_rate": 7.950447252591226e-06, - "loss": 0.3907, - "step": 2890 - }, - { - "epoch": 0.20584529661242526, - "grad_norm": 30.299863630729803, - "learning_rate": 7.943348005111458e-06, - "loss": 0.4142, - "step": 2900 - }, - { - "epoch": 0.2065551079800543, - "grad_norm": 13.362349279952854, - "learning_rate": 7.936248757631691e-06, - "loss": 0.4211, - "step": 2910 - }, - { - "epoch": 0.20726491934768335, - "grad_norm": 7.166470527615742, - "learning_rate": 7.929149510151924e-06, - "loss": 0.4038, - "step": 2920 - }, - { - "epoch": 0.20797473071531242, - "grad_norm": 218.37559359733393, - "learning_rate": 7.922050262672157e-06, - "loss": 0.3814, - "step": 2930 - }, - { - "epoch": 0.20868454208294146, - "grad_norm": 4.776318350142146, - "learning_rate": 7.91495101519239e-06, - "loss": 0.4033, - "step": 2940 - }, - { - "epoch": 0.2093943534505705, - "grad_norm": 6.050705359465637, - "learning_rate": 7.907851767712623e-06, - "loss": 0.4006, - "step": 2950 - }, - { - "epoch": 0.21010416481819957, - "grad_norm": 7.0609749250244125, - "learning_rate": 7.900752520232857e-06, - "loss": 0.3996, - "step": 2960 - }, - { - "epoch": 0.21081397618582862, - "grad_norm": 5.2294105499183985, - "learning_rate": 7.89365327275309e-06, - "loss": 0.3906, - "step": 2970 - }, - { - "epoch": 0.21152378755345766, - "grad_norm": 5.037453517661707, - "learning_rate": 7.886554025273323e-06, - "loss": 0.3925, - "step": 2980 - }, - { - "epoch": 0.21223359892108673, - "grad_norm": 4.329367488091813, - "learning_rate": 7.879454777793554e-06, - "loss": 0.4005, - "step": 2990 - }, - { - "epoch": 0.21294341028871577, - "grad_norm": 4.587934783884384, - "learning_rate": 7.872355530313787e-06, - "loss": 0.3949, - "step": 3000 - }, - { - "epoch": 0.21365322165634482, - "grad_norm": 4.34538375508175, - "learning_rate": 7.86525628283402e-06, - "loss": 0.3963, - "step": 3010 - }, - { - "epoch": 0.2143630330239739, - "grad_norm": 14.538466945533717, - "learning_rate": 7.858157035354253e-06, - "loss": 0.4145, - "step": 3020 - }, - { - "epoch": 0.21507284439160293, - "grad_norm": 5.725604081866674, - "learning_rate": 7.851057787874487e-06, - "loss": 0.397, - "step": 3030 - }, - { - "epoch": 0.21578265575923197, - "grad_norm": 4.100595238075657, - "learning_rate": 7.843958540394718e-06, - "loss": 0.407, - "step": 3040 - }, - { - "epoch": 0.21649246712686104, - "grad_norm": 3.6102459737641452, - "learning_rate": 7.836859292914951e-06, - "loss": 0.3941, - "step": 3050 - }, - { - "epoch": 0.2172022784944901, - "grad_norm": 9.48884086833176, - "learning_rate": 7.829760045435184e-06, - "loss": 0.3981, - "step": 3060 - }, - { - "epoch": 0.21791208986211913, - "grad_norm": 5.265598040684193, - "learning_rate": 7.822660797955417e-06, - "loss": 0.3865, - "step": 3070 - }, - { - "epoch": 0.2186219012297482, - "grad_norm": 5.853395704700518, - "learning_rate": 7.81556155047565e-06, - "loss": 0.4089, - "step": 3080 - }, - { - "epoch": 0.21933171259737724, - "grad_norm": 2.867041909768411, - "learning_rate": 7.808462302995883e-06, - "loss": 0.411, - "step": 3090 - }, - { - "epoch": 0.22004152396500629, - "grad_norm": 6.447556295363806, - "learning_rate": 7.801363055516117e-06, - "loss": 0.4054, - "step": 3100 - }, - { - "epoch": 0.22075133533263536, - "grad_norm": 6.665403407542621, - "learning_rate": 7.794263808036348e-06, - "loss": 0.4331, - "step": 3110 - }, - { - "epoch": 0.2214611467002644, - "grad_norm": 3.740543632288075, - "learning_rate": 7.787164560556581e-06, - "loss": 0.4132, - "step": 3120 - }, - { - "epoch": 0.22217095806789347, - "grad_norm": 19.12212944661018, - "learning_rate": 7.780065313076814e-06, - "loss": 0.4229, - "step": 3130 - }, - { - "epoch": 0.2228807694355225, - "grad_norm": 5.646216224084272, - "learning_rate": 7.772966065597047e-06, - "loss": 0.4123, - "step": 3140 - }, - { - "epoch": 0.22359058080315156, - "grad_norm": 12.549975615460761, - "learning_rate": 7.76586681811728e-06, - "loss": 0.4156, - "step": 3150 - }, - { - "epoch": 0.22430039217078063, - "grad_norm": 5.34509934381609, - "learning_rate": 7.758767570637513e-06, - "loss": 0.3935, - "step": 3160 - }, - { - "epoch": 0.22501020353840967, - "grad_norm": 4.868356423660982, - "learning_rate": 7.751668323157747e-06, - "loss": 0.4121, - "step": 3170 - }, - { - "epoch": 0.2257200149060387, - "grad_norm": 3.604594374317723, - "learning_rate": 7.74456907567798e-06, - "loss": 0.3949, - "step": 3180 - }, - { - "epoch": 0.22642982627366778, - "grad_norm": 2.6762060130385565, - "learning_rate": 7.737469828198211e-06, - "loss": 0.4192, - "step": 3190 - }, - { - "epoch": 0.22713963764129683, - "grad_norm": 3.7277037964888957, - "learning_rate": 7.730370580718444e-06, - "loss": 0.4063, - "step": 3200 - }, - { - "epoch": 0.22784944900892587, - "grad_norm": 4.2017308560808395, - "learning_rate": 7.723271333238677e-06, - "loss": 0.3983, - "step": 3210 - }, - { - "epoch": 0.22855926037655494, - "grad_norm": 6.82717398390433, - "learning_rate": 7.71617208575891e-06, - "loss": 0.4003, - "step": 3220 - }, - { - "epoch": 0.22926907174418398, - "grad_norm": 3.3720424392184865, - "learning_rate": 7.709072838279143e-06, - "loss": 0.384, - "step": 3230 - }, - { - "epoch": 0.22997888311181303, - "grad_norm": 15.234041629621501, - "learning_rate": 7.701973590799375e-06, - "loss": 0.3936, - "step": 3240 - }, - { - "epoch": 0.2306886944794421, - "grad_norm": 6.450291645106787, - "learning_rate": 7.694874343319608e-06, - "loss": 0.4153, - "step": 3250 - }, - { - "epoch": 0.23139850584707114, - "grad_norm": 5.0596647748479056, - "learning_rate": 7.687775095839841e-06, - "loss": 0.4098, - "step": 3260 - }, - { - "epoch": 0.23210831721470018, - "grad_norm": 6.351369993733097, - "learning_rate": 7.680675848360074e-06, - "loss": 0.4036, - "step": 3270 - }, - { - "epoch": 0.23281812858232925, - "grad_norm": 7.706709044787595, - "learning_rate": 7.673576600880307e-06, - "loss": 0.4137, - "step": 3280 - }, - { - "epoch": 0.2335279399499583, - "grad_norm": 6.111103199878706, - "learning_rate": 7.66647735340054e-06, - "loss": 0.4163, - "step": 3290 - }, - { - "epoch": 0.23423775131758734, - "grad_norm": 3.182362422678598, - "learning_rate": 7.659378105920773e-06, - "loss": 0.4007, - "step": 3300 - }, - { - "epoch": 0.2349475626852164, - "grad_norm": 3.929827344563346, - "learning_rate": 7.652278858441005e-06, - "loss": 0.4011, - "step": 3310 - }, - { - "epoch": 0.23565737405284545, - "grad_norm": 6.606808853169358, - "learning_rate": 7.645179610961238e-06, - "loss": 0.4113, - "step": 3320 - }, - { - "epoch": 0.2363671854204745, - "grad_norm": 7.983975561443669, - "learning_rate": 7.638080363481471e-06, - "loss": 0.3941, - "step": 3330 - }, - { - "epoch": 0.23707699678810357, - "grad_norm": 2.551810232754013, - "learning_rate": 7.630981116001704e-06, - "loss": 0.3987, - "step": 3340 - }, - { - "epoch": 0.2377868081557326, - "grad_norm": 16.325804366695763, - "learning_rate": 7.623881868521937e-06, - "loss": 0.3814, - "step": 3350 - }, - { - "epoch": 0.23849661952336168, - "grad_norm": 17.86582631307272, - "learning_rate": 7.61678262104217e-06, - "loss": 0.4065, - "step": 3360 - }, - { - "epoch": 0.23920643089099072, - "grad_norm": 4.439905284094514, - "learning_rate": 7.6096833735624035e-06, - "loss": 0.4079, - "step": 3370 - }, - { - "epoch": 0.23991624225861977, - "grad_norm": 13.632710588001641, - "learning_rate": 7.602584126082636e-06, - "loss": 0.4075, - "step": 3380 - }, - { - "epoch": 0.24062605362624884, - "grad_norm": 7.4557485788963405, - "learning_rate": 7.595484878602869e-06, - "loss": 0.399, - "step": 3390 - }, - { - "epoch": 0.24133586499387788, - "grad_norm": 6.032057911933067, - "learning_rate": 7.588385631123102e-06, - "loss": 0.3892, - "step": 3400 - }, - { - "epoch": 0.24204567636150692, - "grad_norm": 5.1424876309924, - "learning_rate": 7.581286383643335e-06, - "loss": 0.396, - "step": 3410 - }, - { - "epoch": 0.242755487729136, - "grad_norm": 3.6691932120100987, - "learning_rate": 7.574187136163567e-06, - "loss": 0.4108, - "step": 3420 - }, - { - "epoch": 0.24346529909676504, - "grad_norm": 2.8083232656002033, - "learning_rate": 7.5670878886838004e-06, - "loss": 0.3984, - "step": 3430 - }, - { - "epoch": 0.24417511046439408, - "grad_norm": 13.589049355107566, - "learning_rate": 7.5599886412040335e-06, - "loss": 0.3957, - "step": 3440 - }, - { - "epoch": 0.24488492183202315, - "grad_norm": 6.813624263530042, - "learning_rate": 7.552889393724265e-06, - "loss": 0.4105, - "step": 3450 - }, - { - "epoch": 0.2455947331996522, - "grad_norm": 13.609829369379536, - "learning_rate": 7.545790146244498e-06, - "loss": 0.4175, - "step": 3460 - }, - { - "epoch": 0.24630454456728124, - "grad_norm": 5.1258006881261915, - "learning_rate": 7.538690898764731e-06, - "loss": 0.3966, - "step": 3470 - }, - { - "epoch": 0.2470143559349103, - "grad_norm": 40.31962236147607, - "learning_rate": 7.531591651284964e-06, - "loss": 0.3839, - "step": 3480 - }, - { - "epoch": 0.24772416730253935, - "grad_norm": 6.537768993909155, - "learning_rate": 7.524492403805197e-06, - "loss": 0.4122, - "step": 3490 - }, - { - "epoch": 0.2484339786701684, - "grad_norm": 17.652356012021233, - "learning_rate": 7.51739315632543e-06, - "loss": 0.3948, - "step": 3500 - }, - { - "epoch": 0.24914379003779746, - "grad_norm": 3.85528406182526, - "learning_rate": 7.510293908845663e-06, - "loss": 0.3938, - "step": 3510 - }, - { - "epoch": 0.2498536014054265, - "grad_norm": 125.62304184951121, - "learning_rate": 7.503194661365896e-06, - "loss": 0.389, - "step": 3520 - }, - { - "epoch": 0.25056341277305555, - "grad_norm": 8.558355724038593, - "learning_rate": 7.496095413886129e-06, - "loss": 0.3787, - "step": 3530 - }, - { - "epoch": 0.2512732241406846, - "grad_norm": 4.216427070872869, - "learning_rate": 7.488996166406361e-06, - "loss": 0.3835, - "step": 3540 - }, - { - "epoch": 0.2519830355083137, - "grad_norm": 4.314131483032103, - "learning_rate": 7.481896918926594e-06, - "loss": 0.3946, - "step": 3550 - }, - { - "epoch": 0.25269284687594273, - "grad_norm": 4.159823786853909, - "learning_rate": 7.474797671446827e-06, - "loss": 0.3972, - "step": 3560 - }, - { - "epoch": 0.2534026582435718, - "grad_norm": 3.4947296702394586, - "learning_rate": 7.4676984239670605e-06, - "loss": 0.4165, - "step": 3570 - }, - { - "epoch": 0.2541124696112008, - "grad_norm": 4.022241190948728, - "learning_rate": 7.4605991764872936e-06, - "loss": 0.3988, - "step": 3580 - }, - { - "epoch": 0.25482228097882986, - "grad_norm": 3.4849637281174006, - "learning_rate": 7.453499929007526e-06, - "loss": 0.4106, - "step": 3590 - }, - { - "epoch": 0.2555320923464589, - "grad_norm": 5.338306458076586, - "learning_rate": 7.446400681527759e-06, - "loss": 0.4082, - "step": 3600 - }, - { - "epoch": 0.256241903714088, - "grad_norm": 4.970005106695202, - "learning_rate": 7.439301434047992e-06, - "loss": 0.3914, - "step": 3610 - }, - { - "epoch": 0.25695171508171705, - "grad_norm": 6.355373029038747, - "learning_rate": 7.432202186568225e-06, - "loss": 0.3989, - "step": 3620 - }, - { - "epoch": 0.2576615264493461, - "grad_norm": 5.996742366501121, - "learning_rate": 7.425102939088457e-06, - "loss": 0.3999, - "step": 3630 - }, - { - "epoch": 0.25837133781697513, - "grad_norm": 6.966686936423967, - "learning_rate": 7.4180036916086905e-06, - "loss": 0.3831, - "step": 3640 - }, - { - "epoch": 0.2590811491846042, - "grad_norm": 4.185121399245409, - "learning_rate": 7.410904444128923e-06, - "loss": 0.408, - "step": 3650 - }, - { - "epoch": 0.2597909605522332, - "grad_norm": 2.2056616209460866, - "learning_rate": 7.403805196649155e-06, - "loss": 0.3931, - "step": 3660 - }, - { - "epoch": 0.2605007719198623, - "grad_norm": 4.176248780095696, - "learning_rate": 7.396705949169388e-06, - "loss": 0.409, - "step": 3670 - }, - { - "epoch": 0.26121058328749136, - "grad_norm": 2.47926985794175, - "learning_rate": 7.389606701689621e-06, - "loss": 0.4091, - "step": 3680 - }, - { - "epoch": 0.2619203946551204, - "grad_norm": 3.02240842448802, - "learning_rate": 7.382507454209854e-06, - "loss": 0.4102, - "step": 3690 - }, - { - "epoch": 0.26263020602274945, - "grad_norm": 2.0291710541228816, - "learning_rate": 7.3754082067300866e-06, - "loss": 0.382, - "step": 3700 - }, - { - "epoch": 0.2633400173903785, - "grad_norm": 2.1912303159611084, - "learning_rate": 7.36830895925032e-06, - "loss": 0.3974, - "step": 3710 - }, - { - "epoch": 0.26404982875800753, - "grad_norm": 2.964541482780821, - "learning_rate": 7.361209711770553e-06, - "loss": 0.4096, - "step": 3720 - }, - { - "epoch": 0.26475964012563663, - "grad_norm": 5.810099164313448, - "learning_rate": 7.354110464290786e-06, - "loss": 0.4092, - "step": 3730 - }, - { - "epoch": 0.2654694514932657, - "grad_norm": 4.879409457746285, - "learning_rate": 7.347011216811019e-06, - "loss": 0.4034, - "step": 3740 - }, - { - "epoch": 0.2661792628608947, - "grad_norm": 2.761287928392515, - "learning_rate": 7.339911969331251e-06, - "loss": 0.3971, - "step": 3750 - }, - { - "epoch": 0.26688907422852376, - "grad_norm": 14.80879239487425, - "learning_rate": 7.332812721851484e-06, - "loss": 0.4203, - "step": 3760 - }, - { - "epoch": 0.2675988855961528, - "grad_norm": 2.589550559546521, - "learning_rate": 7.325713474371717e-06, - "loss": 0.4065, - "step": 3770 - }, - { - "epoch": 0.2683086969637819, - "grad_norm": 2.1908148156089204, - "learning_rate": 7.3186142268919505e-06, - "loss": 0.4001, - "step": 3780 - }, - { - "epoch": 0.26901850833141094, - "grad_norm": 3.614429975395643, - "learning_rate": 7.311514979412183e-06, - "loss": 0.3949, - "step": 3790 - }, - { - "epoch": 0.26972831969904, - "grad_norm": 8.199581604131074, - "learning_rate": 7.304415731932416e-06, - "loss": 0.4027, - "step": 3800 - }, - { - "epoch": 0.27043813106666903, - "grad_norm": 1.9841735875976263, - "learning_rate": 7.297316484452649e-06, - "loss": 0.3803, - "step": 3810 - }, - { - "epoch": 0.2711479424342981, - "grad_norm": 1.7818490390141006, - "learning_rate": 7.290217236972882e-06, - "loss": 0.3979, - "step": 3820 - }, - { - "epoch": 0.2718577538019271, - "grad_norm": 2.664420697627613, - "learning_rate": 7.283117989493115e-06, - "loss": 0.4112, - "step": 3830 - }, - { - "epoch": 0.2725675651695562, - "grad_norm": 7.6015896940216345, - "learning_rate": 7.2760187420133474e-06, - "loss": 0.3978, - "step": 3840 - }, - { - "epoch": 0.27327737653718526, - "grad_norm": 5.109710356060471, - "learning_rate": 7.2689194945335805e-06, - "loss": 0.3911, - "step": 3850 - }, - { - "epoch": 0.2739871879048143, - "grad_norm": 1.8719451344781273, - "learning_rate": 7.261820247053813e-06, - "loss": 0.4039, - "step": 3860 - }, - { - "epoch": 0.27469699927244334, - "grad_norm": 7.834590688589366, - "learning_rate": 7.254720999574045e-06, - "loss": 0.3972, - "step": 3870 - }, - { - "epoch": 0.2754068106400724, - "grad_norm": 3.4725606354409915, - "learning_rate": 7.247621752094278e-06, - "loss": 0.4106, - "step": 3880 - }, - { - "epoch": 0.27611662200770143, - "grad_norm": 2.131887069098727, - "learning_rate": 7.240522504614511e-06, - "loss": 0.3921, - "step": 3890 - }, - { - "epoch": 0.2768264333753305, - "grad_norm": 3.840712773368679, - "learning_rate": 7.233423257134744e-06, - "loss": 0.3963, - "step": 3900 - }, - { - "epoch": 0.27753624474295957, - "grad_norm": 1.8435607174327202, - "learning_rate": 7.226324009654977e-06, - "loss": 0.4171, - "step": 3910 - }, - { - "epoch": 0.2782460561105886, - "grad_norm": 2.927315889095762, - "learning_rate": 7.21922476217521e-06, - "loss": 0.4078, - "step": 3920 - }, - { - "epoch": 0.27895586747821766, - "grad_norm": 2.4533548064235955, - "learning_rate": 7.212125514695443e-06, - "loss": 0.4018, - "step": 3930 - }, - { - "epoch": 0.2796656788458467, - "grad_norm": 2.6808622987821424, - "learning_rate": 7.205026267215676e-06, - "loss": 0.3952, - "step": 3940 - }, - { - "epoch": 0.28037549021347574, - "grad_norm": 2.006870713713202, - "learning_rate": 7.197927019735908e-06, - "loss": 0.4041, - "step": 3950 - }, - { - "epoch": 0.28108530158110484, - "grad_norm": 4.1552921396903955, - "learning_rate": 7.190827772256141e-06, - "loss": 0.3815, - "step": 3960 - }, - { - "epoch": 0.2817951129487339, - "grad_norm": 3.088912130241367, - "learning_rate": 7.183728524776374e-06, - "loss": 0.4018, - "step": 3970 - }, - { - "epoch": 0.2825049243163629, - "grad_norm": 2.9619382181530853, - "learning_rate": 7.1766292772966075e-06, - "loss": 0.4071, - "step": 3980 - }, - { - "epoch": 0.28321473568399197, - "grad_norm": 3.194525382034512, - "learning_rate": 7.1695300298168406e-06, - "loss": 0.3861, - "step": 3990 - }, - { - "epoch": 0.283924547051621, - "grad_norm": 2.58824315637412, - "learning_rate": 7.162430782337073e-06, - "loss": 0.4022, - "step": 4000 - }, - { - "epoch": 0.2846343584192501, - "grad_norm": 1.6807083864960135, - "learning_rate": 7.155331534857306e-06, - "loss": 0.3953, - "step": 4010 - }, - { - "epoch": 0.28534416978687915, - "grad_norm": 2.9052226494936706, - "learning_rate": 7.148232287377539e-06, - "loss": 0.3803, - "step": 4020 - }, - { - "epoch": 0.2860539811545082, - "grad_norm": 1.9518486816171219, - "learning_rate": 7.141133039897772e-06, - "loss": 0.4076, - "step": 4030 - }, - { - "epoch": 0.28676379252213724, - "grad_norm": 2.223176862483651, - "learning_rate": 7.134033792418004e-06, - "loss": 0.4058, - "step": 4040 - }, - { - "epoch": 0.2874736038897663, - "grad_norm": 2.2196780309614854, - "learning_rate": 7.1269345449382375e-06, - "loss": 0.3926, - "step": 4050 - }, - { - "epoch": 0.2881834152573953, - "grad_norm": 6.524368077094248, - "learning_rate": 7.11983529745847e-06, - "loss": 0.4172, - "step": 4060 - }, - { - "epoch": 0.2888932266250244, - "grad_norm": 5.292339769504148, - "learning_rate": 7.112736049978702e-06, - "loss": 0.3908, - "step": 4070 - }, - { - "epoch": 0.28960303799265347, - "grad_norm": 2.3067804343233282, - "learning_rate": 7.105636802498935e-06, - "loss": 0.3899, - "step": 4080 - }, - { - "epoch": 0.2903128493602825, - "grad_norm": 3.23451698379491, - "learning_rate": 7.098537555019168e-06, - "loss": 0.4078, - "step": 4090 - }, - { - "epoch": 0.29102266072791155, - "grad_norm": 1.9975711149406958, - "learning_rate": 7.091438307539401e-06, - "loss": 0.3892, - "step": 4100 - }, - { - "epoch": 0.2917324720955406, - "grad_norm": 2.172457996529036, - "learning_rate": 7.084339060059634e-06, - "loss": 0.4024, - "step": 4110 - }, - { - "epoch": 0.29244228346316964, - "grad_norm": 4.2611345539293985, - "learning_rate": 7.077239812579867e-06, - "loss": 0.4051, - "step": 4120 - }, - { - "epoch": 0.29315209483079874, - "grad_norm": 4.8499954927547915, - "learning_rate": 7.0701405651001e-06, - "loss": 0.4051, - "step": 4130 - }, - { - "epoch": 0.2938619061984278, - "grad_norm": 3.133374032170856, - "learning_rate": 7.063041317620333e-06, - "loss": 0.4113, - "step": 4140 - }, - { - "epoch": 0.2945717175660568, - "grad_norm": 3.0408556337828667, - "learning_rate": 7.055942070140566e-06, - "loss": 0.3918, - "step": 4150 - }, - { - "epoch": 0.29528152893368587, - "grad_norm": 2.967610716656761, - "learning_rate": 7.048842822660798e-06, - "loss": 0.3935, - "step": 4160 - }, - { - "epoch": 0.2959913403013149, - "grad_norm": 4.089654504142007, - "learning_rate": 7.041743575181031e-06, - "loss": 0.3812, - "step": 4170 - }, - { - "epoch": 0.29670115166894395, - "grad_norm": 6.123820735815897, - "learning_rate": 7.0346443277012644e-06, - "loss": 0.3894, - "step": 4180 - }, - { - "epoch": 0.29741096303657305, - "grad_norm": 9.52031358542494, - "learning_rate": 7.0275450802214975e-06, - "loss": 0.3933, - "step": 4190 - }, - { - "epoch": 0.2981207744042021, - "grad_norm": 4.241656002923987, - "learning_rate": 7.02044583274173e-06, - "loss": 0.3938, - "step": 4200 - }, - { - "epoch": 0.29883058577183114, - "grad_norm": 10.364254693083032, - "learning_rate": 7.013346585261963e-06, - "loss": 0.3939, - "step": 4210 - }, - { - "epoch": 0.2995403971394602, - "grad_norm": 2.493001703497579, - "learning_rate": 7.006247337782196e-06, - "loss": 0.3904, - "step": 4220 - }, - { - "epoch": 0.3002502085070892, - "grad_norm": 2.372260556132136, - "learning_rate": 6.999148090302429e-06, - "loss": 0.4002, - "step": 4230 - }, - { - "epoch": 0.3009600198747183, - "grad_norm": 4.447948099801884, - "learning_rate": 6.992048842822662e-06, - "loss": 0.3894, - "step": 4240 - }, - { - "epoch": 0.30166983124234736, - "grad_norm": 2.4733723007039847, - "learning_rate": 6.9849495953428944e-06, - "loss": 0.3863, - "step": 4250 - }, - { - "epoch": 0.3023796426099764, - "grad_norm": 11.318740156291982, - "learning_rate": 6.977850347863127e-06, - "loss": 0.3881, - "step": 4260 - }, - { - "epoch": 0.30308945397760545, - "grad_norm": 3.6328999006662563, - "learning_rate": 6.97075110038336e-06, - "loss": 0.3894, - "step": 4270 - }, - { - "epoch": 0.3037992653452345, - "grad_norm": 2.0376811180198353, - "learning_rate": 6.963651852903592e-06, - "loss": 0.3993, - "step": 4280 - }, - { - "epoch": 0.30450907671286354, - "grad_norm": 2.1376755414320625, - "learning_rate": 6.956552605423825e-06, - "loss": 0.3903, - "step": 4290 - }, - { - "epoch": 0.30521888808049263, - "grad_norm": 2.883515618882684, - "learning_rate": 6.949453357944058e-06, - "loss": 0.4082, - "step": 4300 - }, - { - "epoch": 0.3059286994481217, - "grad_norm": 2.0964398516334444, - "learning_rate": 6.942354110464291e-06, - "loss": 0.3857, - "step": 4310 - }, - { - "epoch": 0.3066385108157507, - "grad_norm": 5.410779818418891, - "learning_rate": 6.935254862984524e-06, - "loss": 0.391, - "step": 4320 - }, - { - "epoch": 0.30734832218337976, - "grad_norm": 4.439425532620099, - "learning_rate": 6.928155615504757e-06, - "loss": 0.4099, - "step": 4330 - }, - { - "epoch": 0.3080581335510088, - "grad_norm": 12.275643206811255, - "learning_rate": 6.92105636802499e-06, - "loss": 0.3953, - "step": 4340 - }, - { - "epoch": 0.30876794491863785, - "grad_norm": 5.947992733400443, - "learning_rate": 6.913957120545223e-06, - "loss": 0.3945, - "step": 4350 - }, - { - "epoch": 0.30947775628626695, - "grad_norm": 3.4397054213510843, - "learning_rate": 6.906857873065456e-06, - "loss": 0.3875, - "step": 4360 - }, - { - "epoch": 0.310187567653896, - "grad_norm": 41.88563893552131, - "learning_rate": 6.899758625585688e-06, - "loss": 0.3928, - "step": 4370 - }, - { - "epoch": 0.31089737902152503, - "grad_norm": 3.227989243444744, - "learning_rate": 6.892659378105921e-06, - "loss": 0.3908, - "step": 4380 - }, - { - "epoch": 0.3116071903891541, - "grad_norm": 22.897381721878148, - "learning_rate": 6.8855601306261545e-06, - "loss": 0.391, - "step": 4390 - }, - { - "epoch": 0.3123170017567831, - "grad_norm": 3.3630974135990406, - "learning_rate": 6.878460883146388e-06, - "loss": 0.374, - "step": 4400 - }, - { - "epoch": 0.31302681312441216, - "grad_norm": 4.877401136832981, - "learning_rate": 6.87136163566662e-06, - "loss": 0.3923, - "step": 4410 - }, - { - "epoch": 0.31373662449204126, - "grad_norm": 6.179682561885886, - "learning_rate": 6.864262388186853e-06, - "loss": 0.3865, - "step": 4420 - }, - { - "epoch": 0.3144464358596703, - "grad_norm": 4.8910756460648885, - "learning_rate": 6.857163140707086e-06, - "loss": 0.3865, - "step": 4430 - }, - { - "epoch": 0.31515624722729935, - "grad_norm": 3.260915462621521, - "learning_rate": 6.850063893227319e-06, - "loss": 0.3982, - "step": 4440 - }, - { - "epoch": 0.3158660585949284, - "grad_norm": 4.599472395508018, - "learning_rate": 6.842964645747551e-06, - "loss": 0.3961, - "step": 4450 - }, - { - "epoch": 0.31657586996255743, - "grad_norm": 7.776943140920524, - "learning_rate": 6.8358653982677845e-06, - "loss": 0.3873, - "step": 4460 - }, - { - "epoch": 0.3172856813301865, - "grad_norm": 3.0126570398502723, - "learning_rate": 6.828766150788017e-06, - "loss": 0.3859, - "step": 4470 - }, - { - "epoch": 0.3179954926978156, - "grad_norm": 1.935360939609241, - "learning_rate": 6.82166690330825e-06, - "loss": 0.3893, - "step": 4480 - }, - { - "epoch": 0.3187053040654446, - "grad_norm": 2.8545870894952055, - "learning_rate": 6.814567655828482e-06, - "loss": 0.3963, - "step": 4490 - }, - { - "epoch": 0.31941511543307366, - "grad_norm": 4.70013317139999, - "learning_rate": 6.807468408348715e-06, - "loss": 0.3939, - "step": 4500 - }, - { - "epoch": 0.3201249268007027, - "grad_norm": 3.264719904276936, - "learning_rate": 6.800369160868948e-06, - "loss": 0.3851, - "step": 4510 - }, - { - "epoch": 0.32083473816833175, - "grad_norm": 19.735683632874615, - "learning_rate": 6.793269913389181e-06, - "loss": 0.3722, - "step": 4520 - }, - { - "epoch": 0.32154454953596084, - "grad_norm": 2.501896594333183, - "learning_rate": 6.786170665909414e-06, - "loss": 0.3744, - "step": 4530 - }, - { - "epoch": 0.3222543609035899, - "grad_norm": 6.776418259400934, - "learning_rate": 6.779071418429647e-06, - "loss": 0.3868, - "step": 4540 - }, - { - "epoch": 0.32296417227121893, - "grad_norm": 7.759324029832955, - "learning_rate": 6.77197217094988e-06, - "loss": 0.3978, - "step": 4550 - }, - { - "epoch": 0.323673983638848, - "grad_norm": 5.1020465787210805, - "learning_rate": 6.764872923470113e-06, - "loss": 0.3756, - "step": 4560 - }, - { - "epoch": 0.324383795006477, - "grad_norm": 4.584721636805871, - "learning_rate": 6.757773675990345e-06, - "loss": 0.3962, - "step": 4570 - }, - { - "epoch": 0.32509360637410606, - "grad_norm": 5.227400251430727, - "learning_rate": 6.750674428510578e-06, - "loss": 0.3934, - "step": 4580 - }, - { - "epoch": 0.32580341774173516, - "grad_norm": 6.3055606292098645, - "learning_rate": 6.7435751810308114e-06, - "loss": 0.3921, - "step": 4590 - }, - { - "epoch": 0.3265132291093642, - "grad_norm": 3.6872617865325914, - "learning_rate": 6.7364759335510445e-06, - "loss": 0.3818, - "step": 4600 - }, - { - "epoch": 0.32722304047699324, - "grad_norm": 2.007884918336012, - "learning_rate": 6.729376686071278e-06, - "loss": 0.4005, - "step": 4610 - }, - { - "epoch": 0.3279328518446223, - "grad_norm": 5.042964957635144, - "learning_rate": 6.72227743859151e-06, - "loss": 0.3934, - "step": 4620 - }, - { - "epoch": 0.32864266321225133, - "grad_norm": 4.122572427444757, - "learning_rate": 6.715178191111743e-06, - "loss": 0.3835, - "step": 4630 - }, - { - "epoch": 0.32935247457988037, - "grad_norm": 4.528744366296638, - "learning_rate": 6.708078943631976e-06, - "loss": 0.3781, - "step": 4640 - }, - { - "epoch": 0.33006228594750947, - "grad_norm": 3.0405586193089107, - "learning_rate": 6.700979696152209e-06, - "loss": 0.4013, - "step": 4650 - }, - { - "epoch": 0.3307720973151385, - "grad_norm": 2.497528895602537, - "learning_rate": 6.6938804486724415e-06, - "loss": 0.4012, - "step": 4660 - }, - { - "epoch": 0.33148190868276756, - "grad_norm": 3.949569099861772, - "learning_rate": 6.686781201192674e-06, - "loss": 0.3791, - "step": 4670 - }, - { - "epoch": 0.3321917200503966, - "grad_norm": 2.9026740036563714, - "learning_rate": 6.679681953712907e-06, - "loss": 0.379, - "step": 4680 - }, - { - "epoch": 0.33290153141802564, - "grad_norm": 4.750694201369016, - "learning_rate": 6.672582706233139e-06, - "loss": 0.3962, - "step": 4690 - }, - { - "epoch": 0.3336113427856547, - "grad_norm": 4.9647752226572655, - "learning_rate": 6.665483458753372e-06, - "loss": 0.4014, - "step": 4700 - }, - { - "epoch": 0.3343211541532838, - "grad_norm": 5.007567374826438, - "learning_rate": 6.658384211273605e-06, - "loss": 0.386, - "step": 4710 - }, - { - "epoch": 0.3350309655209128, - "grad_norm": 24.665793733036637, - "learning_rate": 6.651284963793838e-06, - "loss": 0.3904, - "step": 4720 - }, - { - "epoch": 0.33574077688854187, - "grad_norm": 8.807448982539153, - "learning_rate": 6.6441857163140715e-06, - "loss": 0.3817, - "step": 4730 - }, - { - "epoch": 0.3364505882561709, - "grad_norm": 5.649488918187287, - "learning_rate": 6.637086468834304e-06, - "loss": 0.3952, - "step": 4740 - }, - { - "epoch": 0.33716039962379996, - "grad_norm": 10.030238684862177, - "learning_rate": 6.629987221354537e-06, - "loss": 0.3894, - "step": 4750 - }, - { - "epoch": 0.33787021099142905, - "grad_norm": 8.229307584465264, - "learning_rate": 6.62288797387477e-06, - "loss": 0.3777, - "step": 4760 - }, - { - "epoch": 0.3385800223590581, - "grad_norm": 4.702015980686352, - "learning_rate": 6.615788726395003e-06, - "loss": 0.3846, - "step": 4770 - }, - { - "epoch": 0.33928983372668714, - "grad_norm": 7.609531980298162, - "learning_rate": 6.608689478915235e-06, - "loss": 0.3876, - "step": 4780 - }, - { - "epoch": 0.3399996450943162, - "grad_norm": 9.359016840144466, - "learning_rate": 6.601590231435468e-06, - "loss": 0.3912, - "step": 4790 - }, - { - "epoch": 0.3407094564619452, - "grad_norm": 6.921512932106153, - "learning_rate": 6.5944909839557015e-06, - "loss": 0.3808, - "step": 4800 - }, - { - "epoch": 0.34141926782957427, - "grad_norm": 7.896921462163668, - "learning_rate": 6.587391736475935e-06, - "loss": 0.3822, - "step": 4810 - }, - { - "epoch": 0.34212907919720337, - "grad_norm": 41.265653283488135, - "learning_rate": 6.580292488996167e-06, - "loss": 0.3704, - "step": 4820 - }, - { - "epoch": 0.3428388905648324, - "grad_norm": 22.410728414840314, - "learning_rate": 6.5731932415164e-06, - "loss": 0.3879, - "step": 4830 - }, - { - "epoch": 0.34354870193246145, - "grad_norm": 28.36796548695283, - "learning_rate": 6.566093994036633e-06, - "loss": 0.3819, - "step": 4840 - }, - { - "epoch": 0.3442585133000905, - "grad_norm": 5.964443376270807, - "learning_rate": 6.558994746556866e-06, - "loss": 0.3793, - "step": 4850 - }, - { - "epoch": 0.34496832466771954, - "grad_norm": 4.876522423500047, - "learning_rate": 6.551895499077099e-06, - "loss": 0.3882, - "step": 4860 - }, - { - "epoch": 0.3456781360353486, - "grad_norm": 4.871742533391797, - "learning_rate": 6.544796251597331e-06, - "loss": 0.3896, - "step": 4870 - }, - { - "epoch": 0.3463879474029777, - "grad_norm": 11.91690423514364, - "learning_rate": 6.537697004117564e-06, - "loss": 0.3736, - "step": 4880 - }, - { - "epoch": 0.3470977587706067, - "grad_norm": 5.986322327762981, - "learning_rate": 6.530597756637797e-06, - "loss": 0.368, - "step": 4890 - }, - { - "epoch": 0.34780757013823577, - "grad_norm": 4.671637222361169, - "learning_rate": 6.523498509158029e-06, - "loss": 0.3722, - "step": 4900 - }, - { - "epoch": 0.3485173815058648, - "grad_norm": 16.438976188514197, - "learning_rate": 6.516399261678262e-06, - "loss": 0.3776, - "step": 4910 - }, - { - "epoch": 0.34922719287349385, - "grad_norm": 11.76911671905372, - "learning_rate": 6.509300014198495e-06, - "loss": 0.3987, - "step": 4920 - }, - { - "epoch": 0.3499370042411229, - "grad_norm": 12.380867918847773, - "learning_rate": 6.502200766718728e-06, - "loss": 0.3949, - "step": 4930 - }, - { - "epoch": 0.350646815608752, - "grad_norm": 8.367704037629133, - "learning_rate": 6.495101519238961e-06, - "loss": 0.3767, - "step": 4940 - }, - { - "epoch": 0.35135662697638104, - "grad_norm": 74.35690108296033, - "learning_rate": 6.488002271759194e-06, - "loss": 0.3819, - "step": 4950 - }, - { - "epoch": 0.3520664383440101, - "grad_norm": 16.231219614665278, - "learning_rate": 6.480903024279427e-06, - "loss": 0.3859, - "step": 4960 - }, - { - "epoch": 0.3527762497116391, - "grad_norm": 9.060846103909238, - "learning_rate": 6.47380377679966e-06, - "loss": 0.394, - "step": 4970 - }, - { - "epoch": 0.35348606107926817, - "grad_norm": 21.88016531222193, - "learning_rate": 6.466704529319893e-06, - "loss": 0.4167, - "step": 4980 - }, - { - "epoch": 0.35419587244689726, - "grad_norm": 9.919040843315045, - "learning_rate": 6.459605281840125e-06, - "loss": 0.4192, - "step": 4990 - }, - { - "epoch": 0.3549056838145263, - "grad_norm": 5.183299722151934, - "learning_rate": 6.4525060343603584e-06, - "loss": 0.4249, - "step": 5000 - }, - { - "epoch": 0.35561549518215535, - "grad_norm": 8.847185946354221, - "learning_rate": 6.4454067868805915e-06, - "loss": 0.4112, - "step": 5010 - }, - { - "epoch": 0.3563253065497844, - "grad_norm": 11.864215621262682, - "learning_rate": 6.438307539400825e-06, - "loss": 0.4165, - "step": 5020 - }, - { - "epoch": 0.35703511791741344, - "grad_norm": 3.3703428369603503, - "learning_rate": 6.431208291921057e-06, - "loss": 0.3978, - "step": 5030 - }, - { - "epoch": 0.3577449292850425, - "grad_norm": 5.015316577294299, - "learning_rate": 6.42410904444129e-06, - "loss": 0.3872, - "step": 5040 - }, - { - "epoch": 0.3584547406526716, - "grad_norm": 4.2137919102595305, - "learning_rate": 6.417009796961523e-06, - "loss": 0.3766, - "step": 5050 - }, - { - "epoch": 0.3591645520203006, - "grad_norm": 3.0372315306510056, - "learning_rate": 6.409910549481756e-06, - "loss": 0.3842, - "step": 5060 - }, - { - "epoch": 0.35987436338792966, - "grad_norm": 2.7515400586423318, - "learning_rate": 6.4028113020019885e-06, - "loss": 0.3993, - "step": 5070 - }, - { - "epoch": 0.3605841747555587, - "grad_norm": 9.185207292504243, - "learning_rate": 6.395712054522221e-06, - "loss": 0.3875, - "step": 5080 - }, - { - "epoch": 0.36129398612318775, - "grad_norm": 19.515842749867563, - "learning_rate": 6.388612807042454e-06, - "loss": 0.4035, - "step": 5090 - }, - { - "epoch": 0.3620037974908168, - "grad_norm": 12.30636697197178, - "learning_rate": 6.381513559562686e-06, - "loss": 0.4035, - "step": 5100 - }, - { - "epoch": 0.3627136088584459, - "grad_norm": 6.732979846623905, - "learning_rate": 6.374414312082919e-06, - "loss": 0.4079, - "step": 5110 - }, - { - "epoch": 0.36342342022607493, - "grad_norm": 6.642326962423095, - "learning_rate": 6.367315064603152e-06, - "loss": 0.3945, - "step": 5120 - }, - { - "epoch": 0.364133231593704, - "grad_norm": 6.314154234087903, - "learning_rate": 6.360215817123385e-06, - "loss": 0.394, - "step": 5130 - }, - { - "epoch": 0.364843042961333, - "grad_norm": 4.760512258914551, - "learning_rate": 6.3531165696436185e-06, - "loss": 0.3863, - "step": 5140 - }, - { - "epoch": 0.36555285432896206, - "grad_norm": 4.048747245175314, - "learning_rate": 6.346017322163851e-06, - "loss": 0.3863, - "step": 5150 - }, - { - "epoch": 0.3662626656965911, - "grad_norm": 4.190578946223062, - "learning_rate": 6.338918074684084e-06, - "loss": 0.3723, - "step": 5160 - }, - { - "epoch": 0.3669724770642202, - "grad_norm": 4.175965799380943, - "learning_rate": 6.331818827204317e-06, - "loss": 0.3889, - "step": 5170 - }, - { - "epoch": 0.36768228843184925, - "grad_norm": 4.807186811656143, - "learning_rate": 6.32471957972455e-06, - "loss": 0.3874, - "step": 5180 - }, - { - "epoch": 0.3683920997994783, - "grad_norm": 6.659345248185456, - "learning_rate": 6.317620332244782e-06, - "loss": 0.3711, - "step": 5190 - }, - { - "epoch": 0.36910191116710733, - "grad_norm": 7.2186380945453905, - "learning_rate": 6.310521084765015e-06, - "loss": 0.3827, - "step": 5200 - }, - { - "epoch": 0.3698117225347364, - "grad_norm": 5.005630183658748, - "learning_rate": 6.3034218372852485e-06, - "loss": 0.3983, - "step": 5210 - }, - { - "epoch": 0.3705215339023654, - "grad_norm": 3.527405153009429, - "learning_rate": 6.296322589805482e-06, - "loss": 0.367, - "step": 5220 - }, - { - "epoch": 0.3712313452699945, - "grad_norm": 3.882199465110045, - "learning_rate": 6.289223342325715e-06, - "loss": 0.3883, - "step": 5230 - }, - { - "epoch": 0.37194115663762356, - "grad_norm": 7.463055050907344, - "learning_rate": 6.282124094845947e-06, - "loss": 0.3823, - "step": 5240 - }, - { - "epoch": 0.3726509680052526, - "grad_norm": 8.000906237369843, - "learning_rate": 6.27502484736618e-06, - "loss": 0.383, - "step": 5250 - }, - { - "epoch": 0.37336077937288165, - "grad_norm": 8.362063303535368, - "learning_rate": 6.267925599886413e-06, - "loss": 0.3893, - "step": 5260 - }, - { - "epoch": 0.3740705907405107, - "grad_norm": 4.721914441661691, - "learning_rate": 6.260826352406646e-06, - "loss": 0.3763, - "step": 5270 - }, - { - "epoch": 0.3747804021081398, - "grad_norm": 12.175797518430029, - "learning_rate": 6.253727104926878e-06, - "loss": 0.3977, - "step": 5280 - }, - { - "epoch": 0.37549021347576883, - "grad_norm": 9.814402397906687, - "learning_rate": 6.246627857447111e-06, - "loss": 0.3716, - "step": 5290 - }, - { - "epoch": 0.3762000248433979, - "grad_norm": 47.1450002499556, - "learning_rate": 6.239528609967344e-06, - "loss": 0.3792, - "step": 5300 - }, - { - "epoch": 0.3769098362110269, - "grad_norm": 27.513481595283608, - "learning_rate": 6.232429362487576e-06, - "loss": 0.3734, - "step": 5310 - }, - { - "epoch": 0.37761964757865596, - "grad_norm": 48.09984812385904, - "learning_rate": 6.225330115007809e-06, - "loss": 0.3873, - "step": 5320 - }, - { - "epoch": 0.378329458946285, - "grad_norm": 5.065884658180426, - "learning_rate": 6.218230867528042e-06, - "loss": 0.39, - "step": 5330 - }, - { - "epoch": 0.3790392703139141, - "grad_norm": 9.226418902203303, - "learning_rate": 6.2111316200482754e-06, - "loss": 0.3819, - "step": 5340 - }, - { - "epoch": 0.37974908168154314, - "grad_norm": 6.998201025336219, - "learning_rate": 6.204032372568508e-06, - "loss": 0.3818, - "step": 5350 - }, - { - "epoch": 0.3804588930491722, - "grad_norm": 4.086309894015096, - "learning_rate": 6.196933125088741e-06, - "loss": 0.3573, - "step": 5360 - }, - { - "epoch": 0.38116870441680123, - "grad_norm": 8.280993749723958, - "learning_rate": 6.189833877608974e-06, - "loss": 0.3763, - "step": 5370 - }, - { - "epoch": 0.3818785157844303, - "grad_norm": 4.086208683086361, - "learning_rate": 6.182734630129207e-06, - "loss": 0.3754, - "step": 5380 - }, - { - "epoch": 0.3825883271520593, - "grad_norm": 5.958244425553627, - "learning_rate": 6.17563538264944e-06, - "loss": 0.3844, - "step": 5390 - }, - { - "epoch": 0.3832981385196884, - "grad_norm": 3.580000162662889, - "learning_rate": 6.168536135169672e-06, - "loss": 0.382, - "step": 5400 - }, - { - "epoch": 0.38400794988731746, - "grad_norm": 2.986600327490101, - "learning_rate": 6.1614368876899054e-06, - "loss": 0.3722, - "step": 5410 - }, - { - "epoch": 0.3847177612549465, - "grad_norm": 3.253411703330411, - "learning_rate": 6.1543376402101386e-06, - "loss": 0.3723, - "step": 5420 - }, - { - "epoch": 0.38542757262257554, - "grad_norm": 5.02266916683139, - "learning_rate": 6.147238392730372e-06, - "loss": 0.353, - "step": 5430 - }, - { - "epoch": 0.3861373839902046, - "grad_norm": 6.509810117314743, - "learning_rate": 6.140139145250604e-06, - "loss": 0.3859, - "step": 5440 - }, - { - "epoch": 0.38684719535783363, - "grad_norm": 3.024955665262126, - "learning_rate": 6.133039897770837e-06, - "loss": 0.3929, - "step": 5450 - }, - { - "epoch": 0.3875570067254627, - "grad_norm": 3.1517938939602206, - "learning_rate": 6.12594065029107e-06, - "loss": 0.3899, - "step": 5460 - }, - { - "epoch": 0.38826681809309177, - "grad_norm": 4.545747430477116, - "learning_rate": 6.118841402811303e-06, - "loss": 0.376, - "step": 5470 - }, - { - "epoch": 0.3889766294607208, - "grad_norm": 4.069699163399179, - "learning_rate": 6.111742155331535e-06, - "loss": 0.3813, - "step": 5480 - }, - { - "epoch": 0.38968644082834986, - "grad_norm": 3.562062075517251, - "learning_rate": 6.104642907851768e-06, - "loss": 0.383, - "step": 5490 - }, - { - "epoch": 0.3903962521959789, - "grad_norm": 9.15980720106711, - "learning_rate": 6.097543660372001e-06, - "loss": 0.3921, - "step": 5500 - }, - { - "epoch": 0.391106063563608, - "grad_norm": 4.449111409231249, - "learning_rate": 6.090444412892234e-06, - "loss": 0.3823, - "step": 5510 - }, - { - "epoch": 0.39181587493123704, - "grad_norm": 12.724861852641904, - "learning_rate": 6.083345165412466e-06, - "loss": 0.3851, - "step": 5520 - }, - { - "epoch": 0.3925256862988661, - "grad_norm": 6.615402324691555, - "learning_rate": 6.076245917932699e-06, - "loss": 0.3667, - "step": 5530 - }, - { - "epoch": 0.3932354976664951, - "grad_norm": 8.817203015753774, - "learning_rate": 6.069146670452932e-06, - "loss": 0.3886, - "step": 5540 - }, - { - "epoch": 0.39394530903412417, - "grad_norm": 9.192960733910674, - "learning_rate": 6.0620474229731655e-06, - "loss": 0.3794, - "step": 5550 - }, - { - "epoch": 0.3946551204017532, - "grad_norm": 4.825188565131958, - "learning_rate": 6.054948175493398e-06, - "loss": 0.3786, - "step": 5560 - }, - { - "epoch": 0.3953649317693823, - "grad_norm": 6.68078822940831, - "learning_rate": 6.047848928013631e-06, - "loss": 0.3835, - "step": 5570 - }, - { - "epoch": 0.39607474313701135, - "grad_norm": 2.6400726840916175, - "learning_rate": 6.040749680533864e-06, - "loss": 0.381, - "step": 5580 - }, - { - "epoch": 0.3967845545046404, - "grad_norm": 3.6668671304324967, - "learning_rate": 6.033650433054097e-06, - "loss": 0.3745, - "step": 5590 - }, - { - "epoch": 0.39749436587226944, - "grad_norm": 2.639833206365908, - "learning_rate": 6.026551185574329e-06, - "loss": 0.3777, - "step": 5600 - }, - { - "epoch": 0.3982041772398985, - "grad_norm": 3.79888213287165, - "learning_rate": 6.019451938094562e-06, - "loss": 0.3911, - "step": 5610 - }, - { - "epoch": 0.3989139886075275, - "grad_norm": 5.09183422587413, - "learning_rate": 6.0123526906147955e-06, - "loss": 0.3832, - "step": 5620 - }, - { - "epoch": 0.3996237999751566, - "grad_norm": 3.3401895175000926, - "learning_rate": 6.005253443135029e-06, - "loss": 0.3862, - "step": 5630 - }, - { - "epoch": 0.40033361134278567, - "grad_norm": 2.5702329959348726, - "learning_rate": 5.998154195655262e-06, - "loss": 0.3934, - "step": 5640 - }, - { - "epoch": 0.4010434227104147, - "grad_norm": 3.0044071678975937, - "learning_rate": 5.991054948175494e-06, - "loss": 0.3826, - "step": 5650 - }, - { - "epoch": 0.40175323407804375, - "grad_norm": 2.412654779599852, - "learning_rate": 5.983955700695727e-06, - "loss": 0.3969, - "step": 5660 - }, - { - "epoch": 0.4024630454456728, - "grad_norm": 3.0767944703908356, - "learning_rate": 5.97685645321596e-06, - "loss": 0.3961, - "step": 5670 - }, - { - "epoch": 0.40317285681330184, - "grad_norm": 2.8053230371522124, - "learning_rate": 5.969757205736193e-06, - "loss": 0.3869, - "step": 5680 - }, - { - "epoch": 0.40388266818093094, - "grad_norm": 7.472643121749521, - "learning_rate": 5.962657958256425e-06, - "loss": 0.3851, - "step": 5690 - }, - { - "epoch": 0.40459247954856, - "grad_norm": 14.585388143398843, - "learning_rate": 5.955558710776658e-06, - "loss": 0.3905, - "step": 5700 - }, - { - "epoch": 0.405302290916189, - "grad_norm": 4.416692599365141, - "learning_rate": 5.948459463296891e-06, - "loss": 0.3862, - "step": 5710 - }, - { - "epoch": 0.40601210228381807, - "grad_norm": 3.4729116521336776, - "learning_rate": 5.941360215817123e-06, - "loss": 0.402, - "step": 5720 - }, - { - "epoch": 0.4067219136514471, - "grad_norm": 3.5423705326787114, - "learning_rate": 5.934260968337356e-06, - "loss": 0.3884, - "step": 5730 - }, - { - "epoch": 0.4074317250190762, - "grad_norm": 3.1365000657861497, - "learning_rate": 5.927161720857589e-06, - "loss": 0.3825, - "step": 5740 - }, - { - "epoch": 0.40814153638670525, - "grad_norm": 5.219488757508086, - "learning_rate": 5.9200624733778224e-06, - "loss": 0.3894, - "step": 5750 - }, - { - "epoch": 0.4088513477543343, - "grad_norm": 3.596909048940233, - "learning_rate": 5.9129632258980555e-06, - "loss": 0.3831, - "step": 5760 - }, - { - "epoch": 0.40956115912196334, - "grad_norm": 2.476134224023759, - "learning_rate": 5.905863978418288e-06, - "loss": 0.3825, - "step": 5770 - }, - { - "epoch": 0.4102709704895924, - "grad_norm": 3.407930958961138, - "learning_rate": 5.898764730938521e-06, - "loss": 0.3714, - "step": 5780 - }, - { - "epoch": 0.4109807818572214, - "grad_norm": 3.6349280667767636, - "learning_rate": 5.891665483458754e-06, - "loss": 0.3949, - "step": 5790 - }, - { - "epoch": 0.4116905932248505, - "grad_norm": 10.032880290815127, - "learning_rate": 5.884566235978987e-06, - "loss": 0.3827, - "step": 5800 - }, - { - "epoch": 0.41240040459247956, - "grad_norm": 4.403552459945297, - "learning_rate": 5.877466988499219e-06, - "loss": 0.3738, - "step": 5810 - }, - { - "epoch": 0.4131102159601086, - "grad_norm": 3.2630803210797086, - "learning_rate": 5.8703677410194525e-06, - "loss": 0.3947, - "step": 5820 - }, - { - "epoch": 0.41382002732773765, - "grad_norm": 11.228663057773362, - "learning_rate": 5.8632684935396856e-06, - "loss": 0.3825, - "step": 5830 - }, - { - "epoch": 0.4145298386953667, - "grad_norm": 18.33844649221444, - "learning_rate": 5.856169246059919e-06, - "loss": 0.381, - "step": 5840 - }, - { - "epoch": 0.41523965006299574, - "grad_norm": 14.576257048715338, - "learning_rate": 5.849069998580152e-06, - "loss": 0.389, - "step": 5850 - }, - { - "epoch": 0.41594946143062483, - "grad_norm": 3.3799659706310177, - "learning_rate": 5.841970751100384e-06, - "loss": 0.3687, - "step": 5860 - }, - { - "epoch": 0.4166592727982539, - "grad_norm": 4.306786145673671, - "learning_rate": 5.834871503620617e-06, - "loss": 0.3846, - "step": 5870 - }, - { - "epoch": 0.4173690841658829, - "grad_norm": 2.71585444285802, - "learning_rate": 5.82777225614085e-06, - "loss": 0.397, - "step": 5880 - }, - { - "epoch": 0.41807889553351196, - "grad_norm": 4.530639455269193, - "learning_rate": 5.820673008661082e-06, - "loss": 0.3633, - "step": 5890 - }, - { - "epoch": 0.418788706901141, - "grad_norm": 5.299365856406392, - "learning_rate": 5.813573761181315e-06, - "loss": 0.3854, - "step": 5900 - }, - { - "epoch": 0.41949851826877005, - "grad_norm": 3.5533453867575786, - "learning_rate": 5.806474513701548e-06, - "loss": 0.3855, - "step": 5910 - }, - { - "epoch": 0.42020832963639915, - "grad_norm": 9.388008852057116, - "learning_rate": 5.799375266221781e-06, - "loss": 0.3911, - "step": 5920 - }, - { - "epoch": 0.4209181410040282, - "grad_norm": 3.378607546141685, - "learning_rate": 5.792276018742013e-06, - "loss": 0.3751, - "step": 5930 - }, - { - "epoch": 0.42162795237165723, - "grad_norm": 12.222073948575716, - "learning_rate": 5.785176771262246e-06, - "loss": 0.3778, - "step": 5940 - }, - { - "epoch": 0.4223377637392863, - "grad_norm": 4.297952573306613, - "learning_rate": 5.778077523782479e-06, - "loss": 0.3827, - "step": 5950 - }, - { - "epoch": 0.4230475751069153, - "grad_norm": 9.764464171752504, - "learning_rate": 5.7709782763027125e-06, - "loss": 0.3893, - "step": 5960 - }, - { - "epoch": 0.4237573864745444, - "grad_norm": 3.7569225597805658, - "learning_rate": 5.763879028822945e-06, - "loss": 0.3901, - "step": 5970 - }, - { - "epoch": 0.42446719784217346, - "grad_norm": 3.0005485619903824, - "learning_rate": 5.756779781343178e-06, - "loss": 0.3753, - "step": 5980 - }, - { - "epoch": 0.4251770092098025, - "grad_norm": 6.457104695432505, - "learning_rate": 5.749680533863411e-06, - "loss": 0.3585, - "step": 5990 - }, - { - "epoch": 0.42588682057743155, - "grad_norm": 4.252684527352716, - "learning_rate": 5.742581286383644e-06, - "loss": 0.3745, - "step": 6000 - }, - { - "epoch": 0.4265966319450606, - "grad_norm": 3.3319349737549673, - "learning_rate": 5.735482038903877e-06, - "loss": 0.3836, - "step": 6010 - }, - { - "epoch": 0.42730644331268963, - "grad_norm": 4.333001859655407, - "learning_rate": 5.728382791424109e-06, - "loss": 0.3698, - "step": 6020 - }, - { - "epoch": 0.42801625468031873, - "grad_norm": 3.9838864194561343, - "learning_rate": 5.7212835439443425e-06, - "loss": 0.3686, - "step": 6030 - }, - { - "epoch": 0.4287260660479478, - "grad_norm": 3.206673737162168, - "learning_rate": 5.714184296464576e-06, - "loss": 0.374, - "step": 6040 - }, - { - "epoch": 0.4294358774155768, - "grad_norm": 7.910008181832549, - "learning_rate": 5.707085048984809e-06, - "loss": 0.3731, - "step": 6050 - }, - { - "epoch": 0.43014568878320586, - "grad_norm": 11.533279860672804, - "learning_rate": 5.699985801505041e-06, - "loss": 0.3842, - "step": 6060 - }, - { - "epoch": 0.4308555001508349, - "grad_norm": 4.06817553254219, - "learning_rate": 5.692886554025274e-06, - "loss": 0.3717, - "step": 6070 - }, - { - "epoch": 0.43156531151846395, - "grad_norm": 12.082596102938004, - "learning_rate": 5.685787306545507e-06, - "loss": 0.3971, - "step": 6080 - }, - { - "epoch": 0.43227512288609304, - "grad_norm": 2.685455478240202, - "learning_rate": 5.678688059065739e-06, - "loss": 0.3822, - "step": 6090 - }, - { - "epoch": 0.4329849342537221, - "grad_norm": 3.1399973614222643, - "learning_rate": 5.671588811585972e-06, - "loss": 0.3774, - "step": 6100 - }, - { - "epoch": 0.43369474562135113, - "grad_norm": 3.518374812592983, - "learning_rate": 5.664489564106205e-06, - "loss": 0.3781, - "step": 6110 - }, - { - "epoch": 0.4344045569889802, - "grad_norm": 4.803932844471321, - "learning_rate": 5.657390316626438e-06, - "loss": 0.3757, - "step": 6120 - }, - { - "epoch": 0.4351143683566092, - "grad_norm": 12.690594810777407, - "learning_rate": 5.650291069146671e-06, - "loss": 0.3747, - "step": 6130 - }, - { - "epoch": 0.43582417972423826, - "grad_norm": 10.80688099347966, - "learning_rate": 5.643191821666903e-06, - "loss": 0.3676, - "step": 6140 - }, - { - "epoch": 0.43653399109186736, - "grad_norm": 4.232034052682343, - "learning_rate": 5.636092574187136e-06, - "loss": 0.395, - "step": 6150 - }, - { - "epoch": 0.4372438024594964, - "grad_norm": 3.422739256279243, - "learning_rate": 5.6289933267073694e-06, - "loss": 0.3693, - "step": 6160 - }, - { - "epoch": 0.43795361382712544, - "grad_norm": 32.06006758689784, - "learning_rate": 5.6218940792276025e-06, - "loss": 0.3782, - "step": 6170 - }, - { - "epoch": 0.4386634251947545, - "grad_norm": 5.623034465377633, - "learning_rate": 5.614794831747835e-06, - "loss": 0.3813, - "step": 6180 - }, - { - "epoch": 0.43937323656238353, - "grad_norm": 10.612805886316337, - "learning_rate": 5.607695584268068e-06, - "loss": 0.3702, - "step": 6190 - }, - { - "epoch": 0.44008304793001257, - "grad_norm": 6.077674805742986, - "learning_rate": 5.600596336788301e-06, - "loss": 0.3643, - "step": 6200 - }, - { - "epoch": 0.44079285929764167, - "grad_norm": 7.053795971115957, - "learning_rate": 5.593497089308534e-06, - "loss": 0.3911, - "step": 6210 - }, - { - "epoch": 0.4415026706652707, - "grad_norm": 6.212842792838621, - "learning_rate": 5.586397841828766e-06, - "loss": 0.3774, - "step": 6220 - }, - { - "epoch": 0.44221248203289976, - "grad_norm": 7.598832178623656, - "learning_rate": 5.5792985943489995e-06, - "loss": 0.3808, - "step": 6230 - }, - { - "epoch": 0.4429222934005288, - "grad_norm": 14.834315377312098, - "learning_rate": 5.5721993468692326e-06, - "loss": 0.3765, - "step": 6240 - }, - { - "epoch": 0.44363210476815784, - "grad_norm": 15.459970963070427, - "learning_rate": 5.565100099389466e-06, - "loss": 0.3863, - "step": 6250 - }, - { - "epoch": 0.44434191613578694, - "grad_norm": 5.002895033502256, - "learning_rate": 5.558000851909699e-06, - "loss": 0.3718, - "step": 6260 - }, - { - "epoch": 0.445051727503416, - "grad_norm": 4.67592371180372, - "learning_rate": 5.550901604429931e-06, - "loss": 0.3869, - "step": 6270 - }, - { - "epoch": 0.445761538871045, - "grad_norm": 4.246040554798665, - "learning_rate": 5.543802356950164e-06, - "loss": 0.3673, - "step": 6280 - }, - { - "epoch": 0.44647135023867407, - "grad_norm": 5.698576828390134, - "learning_rate": 5.536703109470397e-06, - "loss": 0.3733, - "step": 6290 - }, - { - "epoch": 0.4471811616063031, - "grad_norm": 4.890818923695549, - "learning_rate": 5.529603861990629e-06, - "loss": 0.3917, - "step": 6300 - }, - { - "epoch": 0.44789097297393216, - "grad_norm": 3.5954099385229, - "learning_rate": 5.522504614510862e-06, - "loss": 0.387, - "step": 6310 - }, - { - "epoch": 0.44860078434156125, - "grad_norm": 5.819667912733057, - "learning_rate": 5.515405367031095e-06, - "loss": 0.3772, - "step": 6320 - }, - { - "epoch": 0.4493105957091903, - "grad_norm": 4.924613328068802, - "learning_rate": 5.508306119551328e-06, - "loss": 0.3691, - "step": 6330 - }, - { - "epoch": 0.45002040707681934, - "grad_norm": 4.077670226838275, - "learning_rate": 5.50120687207156e-06, - "loss": 0.3606, - "step": 6340 - }, - { - "epoch": 0.4507302184444484, - "grad_norm": 4.7425966011878815, - "learning_rate": 5.494107624591793e-06, - "loss": 0.3712, - "step": 6350 - }, - { - "epoch": 0.4514400298120774, - "grad_norm": 3.7724063921848, - "learning_rate": 5.487008377112026e-06, - "loss": 0.3707, - "step": 6360 - }, - { - "epoch": 0.45214984117970647, - "grad_norm": 2.8597041255348183, - "learning_rate": 5.4799091296322595e-06, - "loss": 0.364, - "step": 6370 - }, - { - "epoch": 0.45285965254733557, - "grad_norm": 5.386440052681094, - "learning_rate": 5.472809882152493e-06, - "loss": 0.3785, - "step": 6380 - }, - { - "epoch": 0.4535694639149646, - "grad_norm": 4.20147189666546, - "learning_rate": 5.465710634672725e-06, - "loss": 0.384, - "step": 6390 - }, - { - "epoch": 0.45427927528259365, - "grad_norm": 5.4360613411555185, - "learning_rate": 5.458611387192958e-06, - "loss": 0.3676, - "step": 6400 - }, - { - "epoch": 0.4549890866502227, - "grad_norm": 7.4543272167324846, - "learning_rate": 5.451512139713191e-06, - "loss": 0.3973, - "step": 6410 - }, - { - "epoch": 0.45569889801785174, - "grad_norm": 5.302161729787796, - "learning_rate": 5.444412892233424e-06, - "loss": 0.3878, - "step": 6420 - }, - { - "epoch": 0.4564087093854808, - "grad_norm": 4.774927845954586, - "learning_rate": 5.437313644753656e-06, - "loss": 0.368, - "step": 6430 - }, - { - "epoch": 0.4571185207531099, - "grad_norm": 4.733108202290537, - "learning_rate": 5.4302143972738895e-06, - "loss": 0.3841, - "step": 6440 - }, - { - "epoch": 0.4578283321207389, - "grad_norm": 4.581655513075473, - "learning_rate": 5.423115149794123e-06, - "loss": 0.3805, - "step": 6450 - }, - { - "epoch": 0.45853814348836797, - "grad_norm": 2.4364404744853445, - "learning_rate": 5.416015902314356e-06, - "loss": 0.3587, - "step": 6460 - }, - { - "epoch": 0.459247954855997, - "grad_norm": 5.16394378928267, - "learning_rate": 5.408916654834588e-06, - "loss": 0.3793, - "step": 6470 - }, - { - "epoch": 0.45995776622362605, - "grad_norm": 8.232574335670192, - "learning_rate": 5.401817407354821e-06, - "loss": 0.3794, - "step": 6480 - }, - { - "epoch": 0.46066757759125515, - "grad_norm": 10.509485180483269, - "learning_rate": 5.394718159875054e-06, - "loss": 0.3742, - "step": 6490 - }, - { - "epoch": 0.4613773889588842, - "grad_norm": 3.418180521754276, - "learning_rate": 5.387618912395286e-06, - "loss": 0.3733, - "step": 6500 - }, - { - "epoch": 0.46208720032651324, - "grad_norm": 4.2689703556593495, - "learning_rate": 5.380519664915519e-06, - "loss": 0.374, - "step": 6510 - }, - { - "epoch": 0.4627970116941423, - "grad_norm": 7.896842999549548, - "learning_rate": 5.373420417435752e-06, - "loss": 0.3799, - "step": 6520 - }, - { - "epoch": 0.4635068230617713, - "grad_norm": 3.4870838077093893, - "learning_rate": 5.366321169955985e-06, - "loss": 0.3712, - "step": 6530 - }, - { - "epoch": 0.46421663442940037, - "grad_norm": 27.778526824166995, - "learning_rate": 5.359221922476218e-06, - "loss": 0.3655, - "step": 6540 - }, - { - "epoch": 0.46492644579702946, - "grad_norm": 16.796202092439216, - "learning_rate": 5.35212267499645e-06, - "loss": 0.3846, - "step": 6550 - }, - { - "epoch": 0.4656362571646585, - "grad_norm": 5.698856930659158, - "learning_rate": 5.345023427516683e-06, - "loss": 0.3877, - "step": 6560 - }, - { - "epoch": 0.46634606853228755, - "grad_norm": 8.694016798434083, - "learning_rate": 5.3379241800369165e-06, - "loss": 0.3607, - "step": 6570 - }, - { - "epoch": 0.4670558798999166, - "grad_norm": 3.617969654098083, - "learning_rate": 5.3308249325571496e-06, - "loss": 0.36, - "step": 6580 - }, - { - "epoch": 0.46776569126754564, - "grad_norm": 7.181014577384461, - "learning_rate": 5.323725685077382e-06, - "loss": 0.3783, - "step": 6590 - }, - { - "epoch": 0.4684755026351747, - "grad_norm": 9.52331650225055, - "learning_rate": 5.316626437597615e-06, - "loss": 0.3707, - "step": 6600 - }, - { - "epoch": 0.4691853140028038, - "grad_norm": 5.927560976046885, - "learning_rate": 5.309527190117848e-06, - "loss": 0.3747, - "step": 6610 - }, - { - "epoch": 0.4698951253704328, - "grad_norm": 33.354649195054265, - "learning_rate": 5.302427942638081e-06, - "loss": 0.3622, - "step": 6620 - }, - { - "epoch": 0.47060493673806186, - "grad_norm": 5.109478632635811, - "learning_rate": 5.295328695158314e-06, - "loss": 0.3702, - "step": 6630 - }, - { - "epoch": 0.4713147481056909, - "grad_norm": 62.14127099005149, - "learning_rate": 5.2882294476785465e-06, - "loss": 0.3718, - "step": 6640 - }, - { - "epoch": 0.47202455947331995, - "grad_norm": 3.9646315343813674, - "learning_rate": 5.2811302001987796e-06, - "loss": 0.3579, - "step": 6650 - }, - { - "epoch": 0.472734370840949, - "grad_norm": 5.822229945732986, - "learning_rate": 5.274030952719013e-06, - "loss": 0.358, - "step": 6660 - }, - { - "epoch": 0.4734441822085781, - "grad_norm": 3.0706990453586607, - "learning_rate": 5.266931705239246e-06, - "loss": 0.3712, - "step": 6670 - }, - { - "epoch": 0.47415399357620713, - "grad_norm": 2.763541771977754, - "learning_rate": 5.259832457759478e-06, - "loss": 0.3862, - "step": 6680 - }, - { - "epoch": 0.4748638049438362, - "grad_norm": 2.8054880505902746, - "learning_rate": 5.252733210279711e-06, - "loss": 0.3609, - "step": 6690 - }, - { - "epoch": 0.4755736163114652, - "grad_norm": 3.5455500616555864, - "learning_rate": 5.245633962799943e-06, - "loss": 0.3845, - "step": 6700 - }, - { - "epoch": 0.47628342767909426, - "grad_norm": 6.871049315984216, - "learning_rate": 5.238534715320176e-06, - "loss": 0.3681, - "step": 6710 - }, - { - "epoch": 0.47699323904672336, - "grad_norm": 4.626136895991325, - "learning_rate": 5.231435467840409e-06, - "loss": 0.3694, - "step": 6720 - }, - { - "epoch": 0.4777030504143524, - "grad_norm": 4.1689737774582385, - "learning_rate": 5.224336220360642e-06, - "loss": 0.3722, - "step": 6730 - }, - { - "epoch": 0.47841286178198145, - "grad_norm": 2.345831388882716, - "learning_rate": 5.217236972880875e-06, - "loss": 0.3778, - "step": 6740 - }, - { - "epoch": 0.4791226731496105, - "grad_norm": 5.181993551246977, - "learning_rate": 5.210137725401107e-06, - "loss": 0.3649, - "step": 6750 - }, - { - "epoch": 0.47983248451723953, - "grad_norm": 4.144025528380454, - "learning_rate": 5.20303847792134e-06, - "loss": 0.3854, - "step": 6760 - }, - { - "epoch": 0.4805422958848686, - "grad_norm": 4.0013049178877536, - "learning_rate": 5.195939230441573e-06, - "loss": 0.3832, - "step": 6770 - }, - { - "epoch": 0.4812521072524977, - "grad_norm": 4.375334224867565, - "learning_rate": 5.1888399829618065e-06, - "loss": 0.3678, - "step": 6780 - }, - { - "epoch": 0.4819619186201267, - "grad_norm": 2.8158913555106926, - "learning_rate": 5.18174073548204e-06, - "loss": 0.3735, - "step": 6790 - }, - { - "epoch": 0.48267172998775576, - "grad_norm": 4.286259213586135, - "learning_rate": 5.174641488002272e-06, - "loss": 0.3824, - "step": 6800 - }, - { - "epoch": 0.4833815413553848, - "grad_norm": 2.917255310557774, - "learning_rate": 5.167542240522505e-06, - "loss": 0.367, - "step": 6810 - }, - { - "epoch": 0.48409135272301385, - "grad_norm": 2.9474809991081194, - "learning_rate": 5.160442993042738e-06, - "loss": 0.37, - "step": 6820 - }, - { - "epoch": 0.4848011640906429, - "grad_norm": 8.0892973566849, - "learning_rate": 5.153343745562971e-06, - "loss": 0.385, - "step": 6830 - }, - { - "epoch": 0.485510975458272, - "grad_norm": 5.46237208189901, - "learning_rate": 5.1462444980832034e-06, - "loss": 0.3723, - "step": 6840 - }, - { - "epoch": 0.48622078682590103, - "grad_norm": 4.813397707683654, - "learning_rate": 5.1391452506034365e-06, - "loss": 0.3847, - "step": 6850 - }, - { - "epoch": 0.4869305981935301, - "grad_norm": 3.839632822272105, - "learning_rate": 5.13204600312367e-06, - "loss": 0.3994, - "step": 6860 - }, - { - "epoch": 0.4876404095611591, - "grad_norm": 2.731217984269613, - "learning_rate": 5.124946755643903e-06, - "loss": 0.3928, - "step": 6870 - }, - { - "epoch": 0.48835022092878816, - "grad_norm": 7.062296596699752, - "learning_rate": 5.117847508164136e-06, - "loss": 0.4141, - "step": 6880 - }, - { - "epoch": 0.4890600322964172, - "grad_norm": 3.0471865890050034, - "learning_rate": 5.110748260684368e-06, - "loss": 0.3712, - "step": 6890 - }, - { - "epoch": 0.4897698436640463, - "grad_norm": 8.240874357274272, - "learning_rate": 5.103649013204601e-06, - "loss": 0.3828, - "step": 6900 - }, - { - "epoch": 0.49047965503167534, - "grad_norm": 4.557814239490917, - "learning_rate": 5.0965497657248334e-06, - "loss": 0.3794, - "step": 6910 - }, - { - "epoch": 0.4911894663993044, - "grad_norm": 6.50934729087624, - "learning_rate": 5.089450518245066e-06, - "loss": 0.3655, - "step": 6920 - }, - { - "epoch": 0.49189927776693343, - "grad_norm": 2.7892154452796696, - "learning_rate": 5.082351270765299e-06, - "loss": 0.3477, - "step": 6930 - }, - { - "epoch": 0.4926090891345625, - "grad_norm": 4.296820022815862, - "learning_rate": 5.075252023285532e-06, - "loss": 0.3917, - "step": 6940 - }, - { - "epoch": 0.4933189005021915, - "grad_norm": 3.7811542108069514, - "learning_rate": 5.068152775805765e-06, - "loss": 0.3846, - "step": 6950 - }, - { - "epoch": 0.4940287118698206, - "grad_norm": 12.150770506288081, - "learning_rate": 5.061053528325997e-06, - "loss": 0.3991, - "step": 6960 - }, - { - "epoch": 0.49473852323744966, - "grad_norm": 8.737862487013935, - "learning_rate": 5.05395428084623e-06, - "loss": 0.376, - "step": 6970 - }, - { - "epoch": 0.4954483346050787, - "grad_norm": 4.705086993153889, - "learning_rate": 5.0468550333664635e-06, - "loss": 0.3774, - "step": 6980 - }, - { - "epoch": 0.49615814597270774, - "grad_norm": 3.95177864719572, - "learning_rate": 5.0397557858866966e-06, - "loss": 0.3867, - "step": 6990 - }, - { - "epoch": 0.4968679573403368, - "grad_norm": 4.9228476674024995, - "learning_rate": 5.03265653840693e-06, - "loss": 0.3868, - "step": 7000 - }, - { - "epoch": 0.4975777687079659, - "grad_norm": 7.598944675436029, - "learning_rate": 5.025557290927162e-06, - "loss": 0.3791, - "step": 7010 - }, - { - "epoch": 0.4982875800755949, - "grad_norm": 3.948022335506646, - "learning_rate": 5.018458043447395e-06, - "loss": 0.3878, - "step": 7020 - }, - { - "epoch": 0.49899739144322397, - "grad_norm": 2.97600555704115, - "learning_rate": 5.011358795967628e-06, - "loss": 0.3891, - "step": 7030 - }, - { - "epoch": 0.499707202810853, - "grad_norm": 7.322058927387839, - "learning_rate": 5.004259548487861e-06, - "loss": 0.3739, - "step": 7040 - }, - { - "epoch": 0.5004170141784821, - "grad_norm": 4.054563164115399, - "learning_rate": 4.9971603010080935e-06, - "loss": 0.3654, - "step": 7050 - }, - { - "epoch": 0.5011268255461111, - "grad_norm": 6.433797069878189, - "learning_rate": 4.990061053528326e-06, - "loss": 0.3769, - "step": 7060 - }, - { - "epoch": 0.5018366369137401, - "grad_norm": 6.244381336548628, - "learning_rate": 4.982961806048559e-06, - "loss": 0.3698, - "step": 7070 - }, - { - "epoch": 0.5025464482813692, - "grad_norm": 4.649812061123292, - "learning_rate": 4.975862558568792e-06, - "loss": 0.3597, - "step": 7080 - }, - { - "epoch": 0.5032562596489982, - "grad_norm": 13.131635539716475, - "learning_rate": 4.968763311089025e-06, - "loss": 0.3737, - "step": 7090 - }, - { - "epoch": 0.5039660710166274, - "grad_norm": 11.654767208116397, - "learning_rate": 4.961664063609258e-06, - "loss": 0.3809, - "step": 7100 - }, - { - "epoch": 0.5046758823842564, - "grad_norm": 5.54405844933368, - "learning_rate": 4.95456481612949e-06, - "loss": 0.3668, - "step": 7110 - }, - { - "epoch": 0.5053856937518855, - "grad_norm": 17.63140898183613, - "learning_rate": 4.9474655686497235e-06, - "loss": 0.3751, - "step": 7120 - }, - { - "epoch": 0.5060955051195145, - "grad_norm": 4.735270750917372, - "learning_rate": 4.940366321169957e-06, - "loss": 0.3759, - "step": 7130 - }, - { - "epoch": 0.5068053164871436, - "grad_norm": 3.6005983980475214, - "learning_rate": 4.93326707369019e-06, - "loss": 0.3932, - "step": 7140 - }, - { - "epoch": 0.5075151278547726, - "grad_norm": 5.073652881259414, - "learning_rate": 4.926167826210422e-06, - "loss": 0.3689, - "step": 7150 - }, - { - "epoch": 0.5082249392224016, - "grad_norm": 6.515311066715168, - "learning_rate": 4.919068578730655e-06, - "loss": 0.3675, - "step": 7160 - }, - { - "epoch": 0.5089347505900307, - "grad_norm": 12.98913332417653, - "learning_rate": 4.911969331250887e-06, - "loss": 0.3861, - "step": 7170 - }, - { - "epoch": 0.5096445619576597, - "grad_norm": 5.1500756291258005, - "learning_rate": 4.90487008377112e-06, - "loss": 0.3731, - "step": 7180 - }, - { - "epoch": 0.5103543733252888, - "grad_norm": 5.833801547579832, - "learning_rate": 4.8977708362913535e-06, - "loss": 0.3831, - "step": 7190 - }, - { - "epoch": 0.5110641846929178, - "grad_norm": 8.343761477251691, - "learning_rate": 4.890671588811587e-06, - "loss": 0.3716, - "step": 7200 - }, - { - "epoch": 0.511773996060547, - "grad_norm": 6.740845613760958, - "learning_rate": 4.883572341331819e-06, - "loss": 0.377, - "step": 7210 - }, - { - "epoch": 0.512483807428176, - "grad_norm": 6.834960096187304, - "learning_rate": 4.876473093852052e-06, - "loss": 0.3774, - "step": 7220 - }, - { - "epoch": 0.513193618795805, - "grad_norm": 6.333904565562881, - "learning_rate": 4.869373846372285e-06, - "loss": 0.3786, - "step": 7230 - }, - { - "epoch": 0.5139034301634341, - "grad_norm": 7.380378873059882, - "learning_rate": 4.862274598892518e-06, - "loss": 0.3641, - "step": 7240 - }, - { - "epoch": 0.5146132415310631, - "grad_norm": 8.15711157363267, - "learning_rate": 4.855175351412751e-06, - "loss": 0.354, - "step": 7250 - }, - { - "epoch": 0.5153230528986922, - "grad_norm": 5.298194233144714, - "learning_rate": 4.8480761039329835e-06, - "loss": 0.3648, - "step": 7260 - }, - { - "epoch": 0.5160328642663212, - "grad_norm": 6.169565228174972, - "learning_rate": 4.840976856453216e-06, - "loss": 0.3606, - "step": 7270 - }, - { - "epoch": 0.5167426756339503, - "grad_norm": 4.633952354333419, - "learning_rate": 4.833877608973449e-06, - "loss": 0.3627, - "step": 7280 - }, - { - "epoch": 0.5174524870015793, - "grad_norm": 7.754370375548218, - "learning_rate": 4.826778361493682e-06, - "loss": 0.384, - "step": 7290 - }, - { - "epoch": 0.5181622983692084, - "grad_norm": 4.628647672477682, - "learning_rate": 4.819679114013915e-06, - "loss": 0.3717, - "step": 7300 - }, - { - "epoch": 0.5188721097368374, - "grad_norm": 4.6108119740619165, - "learning_rate": 4.812579866534147e-06, - "loss": 0.3531, - "step": 7310 - }, - { - "epoch": 0.5195819211044664, - "grad_norm": 3.777480319775288, - "learning_rate": 4.8054806190543805e-06, - "loss": 0.3735, - "step": 7320 - }, - { - "epoch": 0.5202917324720956, - "grad_norm": 6.455151414772601, - "learning_rate": 4.7983813715746136e-06, - "loss": 0.3845, - "step": 7330 - }, - { - "epoch": 0.5210015438397246, - "grad_norm": 5.0016880570007, - "learning_rate": 4.791282124094847e-06, - "loss": 0.3588, - "step": 7340 - }, - { - "epoch": 0.5217113552073537, - "grad_norm": 3.596195253014758, - "learning_rate": 4.78418287661508e-06, - "loss": 0.3664, - "step": 7350 - }, - { - "epoch": 0.5224211665749827, - "grad_norm": 4.6111563525428005, - "learning_rate": 4.777083629135312e-06, - "loss": 0.3815, - "step": 7360 - }, - { - "epoch": 0.5231309779426118, - "grad_norm": 3.81079107236397, - "learning_rate": 4.769984381655544e-06, - "loss": 0.3603, - "step": 7370 - }, - { - "epoch": 0.5238407893102408, - "grad_norm": 10.081677733455512, - "learning_rate": 4.762885134175777e-06, - "loss": 0.3748, - "step": 7380 - }, - { - "epoch": 0.5245506006778698, - "grad_norm": 4.011909680570432, - "learning_rate": 4.7557858866960105e-06, - "loss": 0.3736, - "step": 7390 - }, - { - "epoch": 0.5252604120454989, - "grad_norm": 4.008812937992125, - "learning_rate": 4.7486866392162436e-06, - "loss": 0.3718, - "step": 7400 - }, - { - "epoch": 0.5259702234131279, - "grad_norm": 8.895014071619777, - "learning_rate": 4.741587391736477e-06, - "loss": 0.3747, - "step": 7410 - }, - { - "epoch": 0.526680034780757, - "grad_norm": 2.5646865204368394, - "learning_rate": 4.734488144256709e-06, - "loss": 0.3593, - "step": 7420 - }, - { - "epoch": 0.527389846148386, - "grad_norm": 2.8583907278858147, - "learning_rate": 4.727388896776942e-06, - "loss": 0.387, - "step": 7430 - }, - { - "epoch": 0.5280996575160151, - "grad_norm": 2.8626323560816296, - "learning_rate": 4.720289649297175e-06, - "loss": 0.3756, - "step": 7440 - }, - { - "epoch": 0.5288094688836442, - "grad_norm": 7.38191434335366, - "learning_rate": 4.713190401817408e-06, - "loss": 0.3715, - "step": 7450 - }, - { - "epoch": 0.5295192802512733, - "grad_norm": 3.187699709665762, - "learning_rate": 4.7060911543376405e-06, - "loss": 0.3763, - "step": 7460 - }, - { - "epoch": 0.5302290916189023, - "grad_norm": 2.2423385405265366, - "learning_rate": 4.698991906857874e-06, - "loss": 0.367, - "step": 7470 - }, - { - "epoch": 0.5309389029865313, - "grad_norm": 3.5525056364166465, - "learning_rate": 4.691892659378106e-06, - "loss": 0.3639, - "step": 7480 - }, - { - "epoch": 0.5316487143541604, - "grad_norm": 2.5840538292895405, - "learning_rate": 4.684793411898339e-06, - "loss": 0.3713, - "step": 7490 - }, - { - "epoch": 0.5323585257217894, - "grad_norm": 3.6015272776951366, - "learning_rate": 4.677694164418572e-06, - "loss": 0.3672, - "step": 7500 - }, - { - "epoch": 0.5330683370894185, - "grad_norm": 2.958338857599813, - "learning_rate": 4.670594916938805e-06, - "loss": 0.364, - "step": 7510 - }, - { - "epoch": 0.5337781484570475, - "grad_norm": 2.6780802400700248, - "learning_rate": 4.663495669459037e-06, - "loss": 0.3871, - "step": 7520 - }, - { - "epoch": 0.5344879598246766, - "grad_norm": 2.141486624042336, - "learning_rate": 4.6563964219792705e-06, - "loss": 0.3918, - "step": 7530 - }, - { - "epoch": 0.5351977711923056, - "grad_norm": 11.627725180923038, - "learning_rate": 4.649297174499504e-06, - "loss": 0.369, - "step": 7540 - }, - { - "epoch": 0.5359075825599346, - "grad_norm": 2.164302320101156, - "learning_rate": 4.642197927019737e-06, - "loss": 0.3763, - "step": 7550 - }, - { - "epoch": 0.5366173939275638, - "grad_norm": 2.5355641201406716, - "learning_rate": 4.63509867953997e-06, - "loss": 0.3709, - "step": 7560 - }, - { - "epoch": 0.5373272052951928, - "grad_norm": 1.7486780225096559, - "learning_rate": 4.627999432060202e-06, - "loss": 0.3778, - "step": 7570 - }, - { - "epoch": 0.5380370166628219, - "grad_norm": 2.1996857828607066, - "learning_rate": 4.620900184580434e-06, - "loss": 0.3878, - "step": 7580 - }, - { - "epoch": 0.5387468280304509, - "grad_norm": 2.2718302971034325, - "learning_rate": 4.613800937100667e-06, - "loss": 0.3691, - "step": 7590 - }, - { - "epoch": 0.53945663939808, - "grad_norm": 2.247788269458988, - "learning_rate": 4.6067016896209005e-06, - "loss": 0.3764, - "step": 7600 - }, - { - "epoch": 0.540166450765709, - "grad_norm": 4.951241532022136, - "learning_rate": 4.599602442141134e-06, - "loss": 0.3696, - "step": 7610 - }, - { - "epoch": 0.5408762621333381, - "grad_norm": 18.87723312065313, - "learning_rate": 4.592503194661366e-06, - "loss": 0.3752, - "step": 7620 - }, - { - "epoch": 0.5415860735009671, - "grad_norm": 4.839150391451601, - "learning_rate": 4.585403947181599e-06, - "loss": 0.3704, - "step": 7630 - }, - { - "epoch": 0.5422958848685961, - "grad_norm": 3.252448644894675, - "learning_rate": 4.578304699701832e-06, - "loss": 0.3662, - "step": 7640 - }, - { - "epoch": 0.5430056962362252, - "grad_norm": 4.636061450249123, - "learning_rate": 4.571205452222065e-06, - "loss": 0.3695, - "step": 7650 - }, - { - "epoch": 0.5437155076038542, - "grad_norm": 2.217398025384477, - "learning_rate": 4.564106204742298e-06, - "loss": 0.381, - "step": 7660 - }, - { - "epoch": 0.5444253189714833, - "grad_norm": 3.2864797627789764, - "learning_rate": 4.5570069572625305e-06, - "loss": 0.3766, - "step": 7670 - }, - { - "epoch": 0.5451351303391124, - "grad_norm": 2.5595280528292346, - "learning_rate": 4.549907709782763e-06, - "loss": 0.3753, - "step": 7680 - }, - { - "epoch": 0.5458449417067415, - "grad_norm": 3.5869951931087356, - "learning_rate": 4.542808462302996e-06, - "loss": 0.3649, - "step": 7690 - }, - { - "epoch": 0.5465547530743705, - "grad_norm": 2.878804286325741, - "learning_rate": 4.535709214823229e-06, - "loss": 0.365, - "step": 7700 - }, - { - "epoch": 0.5472645644419996, - "grad_norm": 3.835428702840037, - "learning_rate": 4.528609967343462e-06, - "loss": 0.388, - "step": 7710 - }, - { - "epoch": 0.5479743758096286, - "grad_norm": 3.3115804743584225, - "learning_rate": 4.521510719863695e-06, - "loss": 0.3498, - "step": 7720 - }, - { - "epoch": 0.5486841871772576, - "grad_norm": 2.155325207710473, - "learning_rate": 4.5144114723839275e-06, - "loss": 0.3638, - "step": 7730 - }, - { - "epoch": 0.5493939985448867, - "grad_norm": 2.9473064158817506, - "learning_rate": 4.5073122249041606e-06, - "loss": 0.3756, - "step": 7740 - }, - { - "epoch": 0.5501038099125157, - "grad_norm": 3.977038197892431, - "learning_rate": 4.500212977424394e-06, - "loss": 0.3674, - "step": 7750 - }, - { - "epoch": 0.5508136212801448, - "grad_norm": 5.638630944163406, - "learning_rate": 4.493113729944627e-06, - "loss": 0.3528, - "step": 7760 - }, - { - "epoch": 0.5515234326477738, - "grad_norm": 2.8534926361264286, - "learning_rate": 4.486014482464859e-06, - "loss": 0.3697, - "step": 7770 - }, - { - "epoch": 0.5522332440154029, - "grad_norm": 6.069502646886042, - "learning_rate": 4.478915234985092e-06, - "loss": 0.3843, - "step": 7780 - }, - { - "epoch": 0.552943055383032, - "grad_norm": 4.343605351910854, - "learning_rate": 4.471815987505324e-06, - "loss": 0.3783, - "step": 7790 - }, - { - "epoch": 0.553652866750661, - "grad_norm": 2.104465858436518, - "learning_rate": 4.4647167400255575e-06, - "loss": 0.3601, - "step": 7800 - }, - { - "epoch": 0.5543626781182901, - "grad_norm": 3.0902122663518448, - "learning_rate": 4.457617492545791e-06, - "loss": 0.3801, - "step": 7810 - }, - { - "epoch": 0.5550724894859191, - "grad_norm": 4.573352955842933, - "learning_rate": 4.450518245066024e-06, - "loss": 0.3835, - "step": 7820 - }, - { - "epoch": 0.5557823008535482, - "grad_norm": 2.9707860507790924, - "learning_rate": 4.443418997586256e-06, - "loss": 0.3709, - "step": 7830 - }, - { - "epoch": 0.5564921122211772, - "grad_norm": 2.5687241689417806, - "learning_rate": 4.436319750106489e-06, - "loss": 0.3835, - "step": 7840 - }, - { - "epoch": 0.5572019235888063, - "grad_norm": 3.347322471582433, - "learning_rate": 4.429220502626722e-06, - "loss": 0.3735, - "step": 7850 - }, - { - "epoch": 0.5579117349564353, - "grad_norm": 6.431823861299619, - "learning_rate": 4.422121255146955e-06, - "loss": 0.3704, - "step": 7860 - }, - { - "epoch": 0.5586215463240644, - "grad_norm": 3.050115422109329, - "learning_rate": 4.4150220076671875e-06, - "loss": 0.3822, - "step": 7870 - }, - { - "epoch": 0.5593313576916934, - "grad_norm": 1.7811591664189523, - "learning_rate": 4.407922760187421e-06, - "loss": 0.3658, - "step": 7880 - }, - { - "epoch": 0.5600411690593224, - "grad_norm": 3.442846796158278, - "learning_rate": 4.400823512707653e-06, - "loss": 0.3621, - "step": 7890 - }, - { - "epoch": 0.5607509804269515, - "grad_norm": 7.2461896738177, - "learning_rate": 4.393724265227886e-06, - "loss": 0.3526, - "step": 7900 - }, - { - "epoch": 0.5614607917945806, - "grad_norm": 2.0219408065827875, - "learning_rate": 4.386625017748119e-06, - "loss": 0.3659, - "step": 7910 - }, - { - "epoch": 0.5621706031622097, - "grad_norm": 4.896944413168855, - "learning_rate": 4.379525770268352e-06, - "loss": 0.3765, - "step": 7920 - }, - { - "epoch": 0.5628804145298387, - "grad_norm": 2.1094695887191848, - "learning_rate": 4.372426522788584e-06, - "loss": 0.3644, - "step": 7930 - }, - { - "epoch": 0.5635902258974678, - "grad_norm": 5.596991296221292, - "learning_rate": 4.3653272753088175e-06, - "loss": 0.3835, - "step": 7940 - }, - { - "epoch": 0.5643000372650968, - "grad_norm": 2.373450501523087, - "learning_rate": 4.358228027829051e-06, - "loss": 0.3756, - "step": 7950 - }, - { - "epoch": 0.5650098486327259, - "grad_norm": 4.1947432157390026, - "learning_rate": 4.351128780349284e-06, - "loss": 0.3787, - "step": 7960 - }, - { - "epoch": 0.5657196600003549, - "grad_norm": 2.921985411820113, - "learning_rate": 4.344029532869517e-06, - "loss": 0.3746, - "step": 7970 - }, - { - "epoch": 0.5664294713679839, - "grad_norm": 13.63904398617421, - "learning_rate": 4.336930285389749e-06, - "loss": 0.3535, - "step": 7980 - }, - { - "epoch": 0.567139282735613, - "grad_norm": 2.6665592498045037, - "learning_rate": 4.329831037909981e-06, - "loss": 0.3668, - "step": 7990 - }, - { - "epoch": 0.567849094103242, - "grad_norm": 2.7866449972058795, - "learning_rate": 4.3227317904302144e-06, - "loss": 0.3747, - "step": 8000 - }, - { - "epoch": 0.5685589054708711, - "grad_norm": 2.795372211208224, - "learning_rate": 4.3156325429504475e-06, - "loss": 0.3737, - "step": 8010 - }, - { - "epoch": 0.5692687168385002, - "grad_norm": 2.829992387736084, - "learning_rate": 4.308533295470681e-06, - "loss": 0.3813, - "step": 8020 - }, - { - "epoch": 0.5699785282061293, - "grad_norm": 3.8835793195310706, - "learning_rate": 4.301434047990914e-06, - "loss": 0.3934, - "step": 8030 - }, - { - "epoch": 0.5706883395737583, - "grad_norm": 2.157944880021205, - "learning_rate": 4.294334800511146e-06, - "loss": 0.3619, - "step": 8040 - }, - { - "epoch": 0.5713981509413874, - "grad_norm": 2.576031100575868, - "learning_rate": 4.287235553031379e-06, - "loss": 0.3654, - "step": 8050 - }, - { - "epoch": 0.5721079623090164, - "grad_norm": 2.1013120962560445, - "learning_rate": 4.280136305551612e-06, - "loss": 0.3808, - "step": 8060 - }, - { - "epoch": 0.5728177736766454, - "grad_norm": 8.72915943640877, - "learning_rate": 4.273037058071845e-06, - "loss": 0.3865, - "step": 8070 - }, - { - "epoch": 0.5735275850442745, - "grad_norm": 3.1373379205439123, - "learning_rate": 4.2659378105920776e-06, - "loss": 0.3631, - "step": 8080 - }, - { - "epoch": 0.5742373964119035, - "grad_norm": 10.697527972561883, - "learning_rate": 4.258838563112311e-06, - "loss": 0.3597, - "step": 8090 - }, - { - "epoch": 0.5749472077795326, - "grad_norm": 3.6970932139238095, - "learning_rate": 4.251739315632543e-06, - "loss": 0.3635, - "step": 8100 - }, - { - "epoch": 0.5756570191471616, - "grad_norm": 2.4203467674630206, - "learning_rate": 4.244640068152776e-06, - "loss": 0.359, - "step": 8110 - }, - { - "epoch": 0.5763668305147907, - "grad_norm": 2.9395692807103035, - "learning_rate": 4.237540820673009e-06, - "loss": 0.3603, - "step": 8120 - }, - { - "epoch": 0.5770766418824197, - "grad_norm": 3.012599979258794, - "learning_rate": 4.230441573193242e-06, - "loss": 0.3568, - "step": 8130 - }, - { - "epoch": 0.5777864532500488, - "grad_norm": 6.667370402568531, - "learning_rate": 4.2233423257134745e-06, - "loss": 0.3629, - "step": 8140 - }, - { - "epoch": 0.5784962646176779, - "grad_norm": 4.471487834006219, - "learning_rate": 4.2162430782337076e-06, - "loss": 0.3683, - "step": 8150 - }, - { - "epoch": 0.5792060759853069, - "grad_norm": 3.599804032694662, - "learning_rate": 4.209143830753941e-06, - "loss": 0.3554, - "step": 8160 - }, - { - "epoch": 0.579915887352936, - "grad_norm": 2.9142466980850985, - "learning_rate": 4.202044583274174e-06, - "loss": 0.3524, - "step": 8170 - }, - { - "epoch": 0.580625698720565, - "grad_norm": 3.8569199714753295, - "learning_rate": 4.194945335794406e-06, - "loss": 0.3663, - "step": 8180 - }, - { - "epoch": 0.5813355100881941, - "grad_norm": 2.4068975949006077, - "learning_rate": 4.187846088314639e-06, - "loss": 0.3747, - "step": 8190 - }, - { - "epoch": 0.5820453214558231, - "grad_norm": 6.174322801188514, - "learning_rate": 4.180746840834871e-06, - "loss": 0.372, - "step": 8200 - }, - { - "epoch": 0.5827551328234521, - "grad_norm": 2.888969982284499, - "learning_rate": 4.1736475933551045e-06, - "loss": 0.361, - "step": 8210 - }, - { - "epoch": 0.5834649441910812, - "grad_norm": 4.910093339119916, - "learning_rate": 4.166548345875338e-06, - "loss": 0.3574, - "step": 8220 - }, - { - "epoch": 0.5841747555587102, - "grad_norm": 5.1058356496999755, - "learning_rate": 4.159449098395571e-06, - "loss": 0.3786, - "step": 8230 - }, - { - "epoch": 0.5848845669263393, - "grad_norm": 14.081326767892058, - "learning_rate": 4.152349850915803e-06, - "loss": 0.3729, - "step": 8240 - }, - { - "epoch": 0.5855943782939684, - "grad_norm": 4.958684438886047, - "learning_rate": 4.145250603436036e-06, - "loss": 0.3566, - "step": 8250 - }, - { - "epoch": 0.5863041896615975, - "grad_norm": 3.9438637049329075, - "learning_rate": 4.138151355956269e-06, - "loss": 0.3861, - "step": 8260 - }, - { - "epoch": 0.5870140010292265, - "grad_norm": 2.9499712942928107, - "learning_rate": 4.131052108476502e-06, - "loss": 0.3439, - "step": 8270 - }, - { - "epoch": 0.5877238123968556, - "grad_norm": 3.332966504823502, - "learning_rate": 4.1239528609967345e-06, - "loss": 0.3788, - "step": 8280 - }, - { - "epoch": 0.5884336237644846, - "grad_norm": 27.970854056782667, - "learning_rate": 4.116853613516968e-06, - "loss": 0.3591, - "step": 8290 - }, - { - "epoch": 0.5891434351321136, - "grad_norm": 4.487327484061174, - "learning_rate": 4.1097543660372e-06, - "loss": 0.3625, - "step": 8300 - }, - { - "epoch": 0.5898532464997427, - "grad_norm": 3.8006981727665496, - "learning_rate": 4.102655118557433e-06, - "loss": 0.3709, - "step": 8310 - }, - { - "epoch": 0.5905630578673717, - "grad_norm": 3.463457513521014, - "learning_rate": 4.095555871077666e-06, - "loss": 0.3641, - "step": 8320 - }, - { - "epoch": 0.5912728692350008, - "grad_norm": 7.640707242523127, - "learning_rate": 4.088456623597899e-06, - "loss": 0.3648, - "step": 8330 - }, - { - "epoch": 0.5919826806026298, - "grad_norm": 2.8614936603096295, - "learning_rate": 4.081357376118132e-06, - "loss": 0.3616, - "step": 8340 - }, - { - "epoch": 0.5926924919702589, - "grad_norm": 3.296737746561609, - "learning_rate": 4.0742581286383645e-06, - "loss": 0.3808, - "step": 8350 - }, - { - "epoch": 0.5934023033378879, - "grad_norm": 3.2426352432246976, - "learning_rate": 4.067158881158598e-06, - "loss": 0.3583, - "step": 8360 - }, - { - "epoch": 0.5941121147055171, - "grad_norm": 3.4522007032736806, - "learning_rate": 4.060059633678831e-06, - "loss": 0.365, - "step": 8370 - }, - { - "epoch": 0.5948219260731461, - "grad_norm": 3.9166457660699145, - "learning_rate": 4.052960386199063e-06, - "loss": 0.3692, - "step": 8380 - }, - { - "epoch": 0.5955317374407751, - "grad_norm": 2.9039677495535874, - "learning_rate": 4.045861138719296e-06, - "loss": 0.3468, - "step": 8390 - }, - { - "epoch": 0.5962415488084042, - "grad_norm": 3.187977468656372, - "learning_rate": 4.038761891239529e-06, - "loss": 0.359, - "step": 8400 - }, - { - "epoch": 0.5969513601760332, - "grad_norm": 4.529576318117622, - "learning_rate": 4.0316626437597614e-06, - "loss": 0.3452, - "step": 8410 - }, - { - "epoch": 0.5976611715436623, - "grad_norm": 6.601726345536697, - "learning_rate": 4.0245633962799945e-06, - "loss": 0.3713, - "step": 8420 - }, - { - "epoch": 0.5983709829112913, - "grad_norm": 2.4278158486667576, - "learning_rate": 4.017464148800228e-06, - "loss": 0.3628, - "step": 8430 - }, - { - "epoch": 0.5990807942789204, - "grad_norm": 2.76630569189727, - "learning_rate": 4.010364901320461e-06, - "loss": 0.3704, - "step": 8440 - }, - { - "epoch": 0.5997906056465494, - "grad_norm": 6.7843620715556545, - "learning_rate": 4.003265653840693e-06, - "loss": 0.3682, - "step": 8450 - }, - { - "epoch": 0.6005004170141784, - "grad_norm": 2.9403338895288336, - "learning_rate": 3.996166406360926e-06, - "loss": 0.3608, - "step": 8460 - }, - { - "epoch": 0.6012102283818075, - "grad_norm": 4.301178222098619, - "learning_rate": 3.989067158881159e-06, - "loss": 0.3595, - "step": 8470 - }, - { - "epoch": 0.6019200397494366, - "grad_norm": 3.0914199152912696, - "learning_rate": 3.981967911401392e-06, - "loss": 0.3718, - "step": 8480 - }, - { - "epoch": 0.6026298511170657, - "grad_norm": 2.753384437967004, - "learning_rate": 3.9748686639216246e-06, - "loss": 0.3672, - "step": 8490 - }, - { - "epoch": 0.6033396624846947, - "grad_norm": 2.576321546323924, - "learning_rate": 3.967769416441858e-06, - "loss": 0.3706, - "step": 8500 - }, - { - "epoch": 0.6040494738523238, - "grad_norm": 2.617904283815147, - "learning_rate": 3.96067016896209e-06, - "loss": 0.3539, - "step": 8510 - }, - { - "epoch": 0.6047592852199528, - "grad_norm": 4.862875127190094, - "learning_rate": 3.953570921482323e-06, - "loss": 0.3763, - "step": 8520 - }, - { - "epoch": 0.6054690965875819, - "grad_norm": 4.741023889550647, - "learning_rate": 3.946471674002556e-06, - "loss": 0.3611, - "step": 8530 - }, - { - "epoch": 0.6061789079552109, - "grad_norm": 6.394478684199079, - "learning_rate": 3.939372426522789e-06, - "loss": 0.3615, - "step": 8540 - }, - { - "epoch": 0.6068887193228399, - "grad_norm": 4.045100357410319, - "learning_rate": 3.9322731790430215e-06, - "loss": 0.3648, - "step": 8550 - }, - { - "epoch": 0.607598530690469, - "grad_norm": 3.756852697194425, - "learning_rate": 3.925173931563255e-06, - "loss": 0.3689, - "step": 8560 - }, - { - "epoch": 0.608308342058098, - "grad_norm": 4.04897373953826, - "learning_rate": 3.918074684083488e-06, - "loss": 0.3644, - "step": 8570 - }, - { - "epoch": 0.6090181534257271, - "grad_norm": 4.036663207362448, - "learning_rate": 3.910975436603721e-06, - "loss": 0.366, - "step": 8580 - }, - { - "epoch": 0.6097279647933561, - "grad_norm": 4.156260594948616, - "learning_rate": 3.903876189123953e-06, - "loss": 0.3554, - "step": 8590 - }, - { - "epoch": 0.6104377761609853, - "grad_norm": 3.398605568980307, - "learning_rate": 3.896776941644186e-06, - "loss": 0.3717, - "step": 8600 - }, - { - "epoch": 0.6111475875286143, - "grad_norm": 3.5114677948249065, - "learning_rate": 3.889677694164418e-06, - "loss": 0.3677, - "step": 8610 - }, - { - "epoch": 0.6118573988962434, - "grad_norm": 4.753605099187553, - "learning_rate": 3.8825784466846515e-06, - "loss": 0.3547, - "step": 8620 - }, - { - "epoch": 0.6125672102638724, - "grad_norm": 3.4243729659259334, - "learning_rate": 3.875479199204885e-06, - "loss": 0.3762, - "step": 8630 - }, - { - "epoch": 0.6132770216315014, - "grad_norm": 5.94912381861312, - "learning_rate": 3.868379951725118e-06, - "loss": 0.359, - "step": 8640 - }, - { - "epoch": 0.6139868329991305, - "grad_norm": 6.590267176028699, - "learning_rate": 3.861280704245351e-06, - "loss": 0.3758, - "step": 8650 - }, - { - "epoch": 0.6146966443667595, - "grad_norm": 3.3256854782540497, - "learning_rate": 3.854181456765583e-06, - "loss": 0.3562, - "step": 8660 - }, - { - "epoch": 0.6154064557343886, - "grad_norm": 3.6453120360212816, - "learning_rate": 3.847082209285816e-06, - "loss": 0.3619, - "step": 8670 - }, - { - "epoch": 0.6161162671020176, - "grad_norm": 13.965716037023453, - "learning_rate": 3.839982961806049e-06, - "loss": 0.3646, - "step": 8680 - }, - { - "epoch": 0.6168260784696467, - "grad_norm": 7.837860273774759, - "learning_rate": 3.8328837143262815e-06, - "loss": 0.3457, - "step": 8690 - }, - { - "epoch": 0.6175358898372757, - "grad_norm": 4.729547574214101, - "learning_rate": 3.825784466846515e-06, - "loss": 0.3565, - "step": 8700 - }, - { - "epoch": 0.6182457012049049, - "grad_norm": 2.5619385732076987, - "learning_rate": 3.818685219366748e-06, - "loss": 0.3676, - "step": 8710 - }, - { - "epoch": 0.6189555125725339, - "grad_norm": 6.790019325573497, - "learning_rate": 3.8115859718869804e-06, - "loss": 0.3646, - "step": 8720 - }, - { - "epoch": 0.6196653239401629, - "grad_norm": 3.3195434105048665, - "learning_rate": 3.804486724407213e-06, - "loss": 0.3575, - "step": 8730 - }, - { - "epoch": 0.620375135307792, - "grad_norm": 3.805294873305076, - "learning_rate": 3.797387476927446e-06, - "loss": 0.3657, - "step": 8740 - }, - { - "epoch": 0.621084946675421, - "grad_norm": 5.59682650769057, - "learning_rate": 3.790288229447679e-06, - "loss": 0.3609, - "step": 8750 - }, - { - "epoch": 0.6217947580430501, - "grad_norm": 4.89958212672841, - "learning_rate": 3.783188981967912e-06, - "loss": 0.3669, - "step": 8760 - }, - { - "epoch": 0.6225045694106791, - "grad_norm": 8.274929479843232, - "learning_rate": 3.7760897344881446e-06, - "loss": 0.3581, - "step": 8770 - }, - { - "epoch": 0.6232143807783082, - "grad_norm": 3.2978821299433445, - "learning_rate": 3.7689904870083777e-06, - "loss": 0.3679, - "step": 8780 - }, - { - "epoch": 0.6239241921459372, - "grad_norm": 12.435473632592815, - "learning_rate": 3.76189123952861e-06, - "loss": 0.3677, - "step": 8790 - }, - { - "epoch": 0.6246340035135662, - "grad_norm": 4.195421567773733, - "learning_rate": 3.754791992048843e-06, - "loss": 0.3492, - "step": 8800 - }, - { - "epoch": 0.6253438148811953, - "grad_norm": 4.406904963403177, - "learning_rate": 3.7476927445690758e-06, - "loss": 0.3597, - "step": 8810 - }, - { - "epoch": 0.6260536262488243, - "grad_norm": 4.199730218503971, - "learning_rate": 3.740593497089309e-06, - "loss": 0.3797, - "step": 8820 - }, - { - "epoch": 0.6267634376164535, - "grad_norm": 3.3446382282646705, - "learning_rate": 3.7334942496095415e-06, - "loss": 0.3638, - "step": 8830 - }, - { - "epoch": 0.6274732489840825, - "grad_norm": 4.862585068251522, - "learning_rate": 3.7263950021297747e-06, - "loss": 0.3573, - "step": 8840 - }, - { - "epoch": 0.6281830603517116, - "grad_norm": 8.107090011887513, - "learning_rate": 3.7192957546500073e-06, - "loss": 0.3672, - "step": 8850 - }, - { - "epoch": 0.6288928717193406, - "grad_norm": 4.3962651782052005, - "learning_rate": 3.7121965071702404e-06, - "loss": 0.3412, - "step": 8860 - }, - { - "epoch": 0.6296026830869697, - "grad_norm": 4.6424143973536935, - "learning_rate": 3.705097259690473e-06, - "loss": 0.3667, - "step": 8870 - }, - { - "epoch": 0.6303124944545987, - "grad_norm": 3.840268427443435, - "learning_rate": 3.697998012210706e-06, - "loss": 0.3557, - "step": 8880 - }, - { - "epoch": 0.6310223058222277, - "grad_norm": 3.6388205049600018, - "learning_rate": 3.6908987647309385e-06, - "loss": 0.3631, - "step": 8890 - }, - { - "epoch": 0.6317321171898568, - "grad_norm": 5.233530712843461, - "learning_rate": 3.6837995172511716e-06, - "loss": 0.3648, - "step": 8900 - }, - { - "epoch": 0.6324419285574858, - "grad_norm": 3.781452701492992, - "learning_rate": 3.6767002697714042e-06, - "loss": 0.3788, - "step": 8910 - }, - { - "epoch": 0.6331517399251149, - "grad_norm": 6.068345043524154, - "learning_rate": 3.6696010222916373e-06, - "loss": 0.3566, - "step": 8920 - }, - { - "epoch": 0.6338615512927439, - "grad_norm": 5.599734595118006, - "learning_rate": 3.66250177481187e-06, - "loss": 0.349, - "step": 8930 - }, - { - "epoch": 0.634571362660373, - "grad_norm": 10.428150341049763, - "learning_rate": 3.655402527332103e-06, - "loss": 0.3584, - "step": 8940 - }, - { - "epoch": 0.6352811740280021, - "grad_norm": 17.681698800577582, - "learning_rate": 3.648303279852336e-06, - "loss": 0.3458, - "step": 8950 - }, - { - "epoch": 0.6359909853956311, - "grad_norm": 6.591627899287575, - "learning_rate": 3.641204032372569e-06, - "loss": 0.3643, - "step": 8960 - }, - { - "epoch": 0.6367007967632602, - "grad_norm": 31.04186356298661, - "learning_rate": 3.634104784892802e-06, - "loss": 0.3577, - "step": 8970 - }, - { - "epoch": 0.6374106081308892, - "grad_norm": 8.824274787999325, - "learning_rate": 3.6270055374130347e-06, - "loss": 0.3618, - "step": 8980 - }, - { - "epoch": 0.6381204194985183, - "grad_norm": 4.7185603252655826, - "learning_rate": 3.619906289933267e-06, - "loss": 0.3598, - "step": 8990 - }, - { - "epoch": 0.6388302308661473, - "grad_norm": 5.394376788444082, - "learning_rate": 3.6128070424535e-06, - "loss": 0.362, - "step": 9000 - }, - { - "epoch": 0.6395400422337764, - "grad_norm": 7.158347387403476, - "learning_rate": 3.6057077949737327e-06, - "loss": 0.3694, - "step": 9010 - }, - { - "epoch": 0.6402498536014054, - "grad_norm": 8.033101525768098, - "learning_rate": 3.598608547493966e-06, - "loss": 0.3626, - "step": 9020 - }, - { - "epoch": 0.6409596649690344, - "grad_norm": 2.7105647455701667, - "learning_rate": 3.591509300014199e-06, - "loss": 0.3462, - "step": 9030 - }, - { - "epoch": 0.6416694763366635, - "grad_norm": 6.3548259889750955, - "learning_rate": 3.5844100525344316e-06, - "loss": 0.3632, - "step": 9040 - }, - { - "epoch": 0.6423792877042925, - "grad_norm": 7.341190059846113, - "learning_rate": 3.5773108050546647e-06, - "loss": 0.3653, - "step": 9050 - }, - { - "epoch": 0.6430890990719217, - "grad_norm": 3.8869033025489723, - "learning_rate": 3.5702115575748974e-06, - "loss": 0.3412, - "step": 9060 - }, - { - "epoch": 0.6437989104395507, - "grad_norm": 4.918908181105817, - "learning_rate": 3.5631123100951305e-06, - "loss": 0.3616, - "step": 9070 - }, - { - "epoch": 0.6445087218071798, - "grad_norm": 6.124064792410853, - "learning_rate": 3.556013062615363e-06, - "loss": 0.3585, - "step": 9080 - }, - { - "epoch": 0.6452185331748088, - "grad_norm": 3.6806357015000764, - "learning_rate": 3.5489138151355963e-06, - "loss": 0.3668, - "step": 9090 - }, - { - "epoch": 0.6459283445424379, - "grad_norm": 5.193254667513745, - "learning_rate": 3.5418145676558285e-06, - "loss": 0.3669, - "step": 9100 - }, - { - "epoch": 0.6466381559100669, - "grad_norm": 10.978524486328482, - "learning_rate": 3.5347153201760616e-06, - "loss": 0.3597, - "step": 9110 - }, - { - "epoch": 0.647347967277696, - "grad_norm": 4.6611361687349175, - "learning_rate": 3.5276160726962943e-06, - "loss": 0.3695, - "step": 9120 - }, - { - "epoch": 0.648057778645325, - "grad_norm": 5.205492428214056, - "learning_rate": 3.5205168252165274e-06, - "loss": 0.3663, - "step": 9130 - }, - { - "epoch": 0.648767590012954, - "grad_norm": 5.139991204646184, - "learning_rate": 3.51341757773676e-06, - "loss": 0.3551, - "step": 9140 - }, - { - "epoch": 0.6494774013805831, - "grad_norm": 16.35255401640736, - "learning_rate": 3.506318330256993e-06, - "loss": 0.3553, - "step": 9150 - }, - { - "epoch": 0.6501872127482121, - "grad_norm": 10.145378264655722, - "learning_rate": 3.499219082777226e-06, - "loss": 0.3583, - "step": 9160 - }, - { - "epoch": 0.6508970241158412, - "grad_norm": 24.878144093372033, - "learning_rate": 3.492119835297459e-06, - "loss": 0.3555, - "step": 9170 - }, - { - "epoch": 0.6516068354834703, - "grad_norm": 3.902743241561423, - "learning_rate": 3.4850205878176916e-06, - "loss": 0.3723, - "step": 9180 - }, - { - "epoch": 0.6523166468510994, - "grad_norm": 4.458085439514939, - "learning_rate": 3.4779213403379247e-06, - "loss": 0.3701, - "step": 9190 - }, - { - "epoch": 0.6530264582187284, - "grad_norm": 4.717552266761064, - "learning_rate": 3.470822092858157e-06, - "loss": 0.3618, - "step": 9200 - }, - { - "epoch": 0.6537362695863574, - "grad_norm": 4.427364622798698, - "learning_rate": 3.46372284537839e-06, - "loss": 0.3614, - "step": 9210 - }, - { - "epoch": 0.6544460809539865, - "grad_norm": 8.323851654330221, - "learning_rate": 3.4566235978986228e-06, - "loss": 0.3678, - "step": 9220 - }, - { - "epoch": 0.6551558923216155, - "grad_norm": 4.966094347637934, - "learning_rate": 3.449524350418856e-06, - "loss": 0.3688, - "step": 9230 - }, - { - "epoch": 0.6558657036892446, - "grad_norm": 4.930577227679058, - "learning_rate": 3.4424251029390886e-06, - "loss": 0.3503, - "step": 9240 - }, - { - "epoch": 0.6565755150568736, - "grad_norm": 5.52399635730182, - "learning_rate": 3.4353258554593217e-06, - "loss": 0.3696, - "step": 9250 - }, - { - "epoch": 0.6572853264245027, - "grad_norm": 4.590670373221129, - "learning_rate": 3.4282266079795543e-06, - "loss": 0.3685, - "step": 9260 - }, - { - "epoch": 0.6579951377921317, - "grad_norm": 8.264828163926657, - "learning_rate": 3.4211273604997874e-06, - "loss": 0.3575, - "step": 9270 - }, - { - "epoch": 0.6587049491597607, - "grad_norm": 8.133262914973033, - "learning_rate": 3.4140281130200205e-06, - "loss": 0.3713, - "step": 9280 - }, - { - "epoch": 0.6594147605273899, - "grad_norm": 5.742760195932282, - "learning_rate": 3.4069288655402532e-06, - "loss": 0.3725, - "step": 9290 - }, - { - "epoch": 0.6601245718950189, - "grad_norm": 8.53035579823295, - "learning_rate": 3.3998296180604855e-06, - "loss": 0.3599, - "step": 9300 - }, - { - "epoch": 0.660834383262648, - "grad_norm": 4.142002947123207, - "learning_rate": 3.3927303705807186e-06, - "loss": 0.3661, - "step": 9310 - }, - { - "epoch": 0.661544194630277, - "grad_norm": 6.246166093324293, - "learning_rate": 3.3856311231009513e-06, - "loss": 0.351, - "step": 9320 - }, - { - "epoch": 0.6622540059979061, - "grad_norm": 16.243950855343193, - "learning_rate": 3.3785318756211844e-06, - "loss": 0.3479, - "step": 9330 - }, - { - "epoch": 0.6629638173655351, - "grad_norm": 6.147144910165458, - "learning_rate": 3.3714326281414175e-06, - "loss": 0.3543, - "step": 9340 - }, - { - "epoch": 0.6636736287331642, - "grad_norm": 4.099934401177817, - "learning_rate": 3.36433338066165e-06, - "loss": 0.3636, - "step": 9350 - }, - { - "epoch": 0.6643834401007932, - "grad_norm": 4.17019707869721, - "learning_rate": 3.3572341331818832e-06, - "loss": 0.351, - "step": 9360 - }, - { - "epoch": 0.6650932514684222, - "grad_norm": 4.102146778496878, - "learning_rate": 3.350134885702116e-06, - "loss": 0.3737, - "step": 9370 - }, - { - "epoch": 0.6658030628360513, - "grad_norm": 4.155164161456904, - "learning_rate": 3.343035638222349e-06, - "loss": 0.3505, - "step": 9380 - }, - { - "epoch": 0.6665128742036803, - "grad_norm": 4.042739251178277, - "learning_rate": 3.3359363907425817e-06, - "loss": 0.3578, - "step": 9390 - }, - { - "epoch": 0.6672226855713094, - "grad_norm": 3.4724621327513057, - "learning_rate": 3.328837143262814e-06, - "loss": 0.3733, - "step": 9400 - }, - { - "epoch": 0.6679324969389385, - "grad_norm": 3.284294254497063, - "learning_rate": 3.321737895783047e-06, - "loss": 0.361, - "step": 9410 - }, - { - "epoch": 0.6686423083065676, - "grad_norm": 5.224665667041366, - "learning_rate": 3.31463864830328e-06, - "loss": 0.3597, - "step": 9420 - }, - { - "epoch": 0.6693521196741966, - "grad_norm": 13.317891191179472, - "learning_rate": 3.307539400823513e-06, - "loss": 0.36, - "step": 9430 - }, - { - "epoch": 0.6700619310418257, - "grad_norm": 8.338179465785696, - "learning_rate": 3.300440153343746e-06, - "loss": 0.3708, - "step": 9440 - }, - { - "epoch": 0.6707717424094547, - "grad_norm": 4.022884248031831, - "learning_rate": 3.2933409058639786e-06, - "loss": 0.357, - "step": 9450 - }, - { - "epoch": 0.6714815537770837, - "grad_norm": 2.816929350582557, - "learning_rate": 3.2862416583842117e-06, - "loss": 0.3618, - "step": 9460 - }, - { - "epoch": 0.6721913651447128, - "grad_norm": 3.2609706893982278, - "learning_rate": 3.2791424109044444e-06, - "loss": 0.3566, - "step": 9470 - }, - { - "epoch": 0.6729011765123418, - "grad_norm": 2.0212043627509177, - "learning_rate": 3.2720431634246775e-06, - "loss": 0.3631, - "step": 9480 - }, - { - "epoch": 0.6736109878799709, - "grad_norm": 3.472359881135022, - "learning_rate": 3.26494391594491e-06, - "loss": 0.3465, - "step": 9490 - }, - { - "epoch": 0.6743207992475999, - "grad_norm": 2.365708920981696, - "learning_rate": 3.257844668465143e-06, - "loss": 0.36, - "step": 9500 - }, - { - "epoch": 0.675030610615229, - "grad_norm": 6.47059083775482, - "learning_rate": 3.2507454209853755e-06, - "loss": 0.3589, - "step": 9510 - }, - { - "epoch": 0.6757404219828581, - "grad_norm": 2.9761715896390872, - "learning_rate": 3.2436461735056086e-06, - "loss": 0.3737, - "step": 9520 - }, - { - "epoch": 0.6764502333504872, - "grad_norm": 3.2920710102385375, - "learning_rate": 3.2365469260258413e-06, - "loss": 0.3631, - "step": 9530 - }, - { - "epoch": 0.6771600447181162, - "grad_norm": 2.24517655258034, - "learning_rate": 3.2294476785460744e-06, - "loss": 0.3565, - "step": 9540 - }, - { - "epoch": 0.6778698560857452, - "grad_norm": 4.585199424065417, - "learning_rate": 3.222348431066307e-06, - "loss": 0.3587, - "step": 9550 - }, - { - "epoch": 0.6785796674533743, - "grad_norm": 2.616245813772314, - "learning_rate": 3.21524918358654e-06, - "loss": 0.3641, - "step": 9560 - }, - { - "epoch": 0.6792894788210033, - "grad_norm": 6.790868775160296, - "learning_rate": 3.208149936106773e-06, - "loss": 0.3542, - "step": 9570 - }, - { - "epoch": 0.6799992901886324, - "grad_norm": 4.6720875235574955, - "learning_rate": 3.201050688627006e-06, - "loss": 0.3724, - "step": 9580 - }, - { - "epoch": 0.6807091015562614, - "grad_norm": 2.929891653919803, - "learning_rate": 3.193951441147239e-06, - "loss": 0.355, - "step": 9590 - }, - { - "epoch": 0.6814189129238905, - "grad_norm": 2.5935885874594935, - "learning_rate": 3.1868521936674717e-06, - "loss": 0.3477, - "step": 9600 - }, - { - "epoch": 0.6821287242915195, - "grad_norm": 4.16743323358689, - "learning_rate": 3.179752946187704e-06, - "loss": 0.3732, - "step": 9610 - }, - { - "epoch": 0.6828385356591485, - "grad_norm": 3.119963047712144, - "learning_rate": 3.172653698707937e-06, - "loss": 0.3583, - "step": 9620 - }, - { - "epoch": 0.6835483470267776, - "grad_norm": 4.025619816942283, - "learning_rate": 3.1655544512281698e-06, - "loss": 0.3814, - "step": 9630 - }, - { - "epoch": 0.6842581583944067, - "grad_norm": 10.60216606667068, - "learning_rate": 3.158455203748403e-06, - "loss": 0.3599, - "step": 9640 - }, - { - "epoch": 0.6849679697620358, - "grad_norm": 4.461108822226996, - "learning_rate": 3.1513559562686356e-06, - "loss": 0.3619, - "step": 9650 - }, - { - "epoch": 0.6856777811296648, - "grad_norm": 2.7381838956818596, - "learning_rate": 3.1442567087888687e-06, - "loss": 0.361, - "step": 9660 - }, - { - "epoch": 0.6863875924972939, - "grad_norm": 3.3932603213636536, - "learning_rate": 3.1371574613091018e-06, - "loss": 0.3722, - "step": 9670 - }, - { - "epoch": 0.6870974038649229, - "grad_norm": 3.0238463961256556, - "learning_rate": 3.1300582138293344e-06, - "loss": 0.3677, - "step": 9680 - }, - { - "epoch": 0.687807215232552, - "grad_norm": 2.9020326019536236, - "learning_rate": 3.1229589663495675e-06, - "loss": 0.3587, - "step": 9690 - }, - { - "epoch": 0.688517026600181, - "grad_norm": 3.4182793620767313, - "learning_rate": 3.1158597188698002e-06, - "loss": 0.3958, - "step": 9700 - }, - { - "epoch": 0.68922683796781, - "grad_norm": 2.7346693208831123, - "learning_rate": 3.1087604713900325e-06, - "loss": 0.3746, - "step": 9710 - }, - { - "epoch": 0.6899366493354391, - "grad_norm": 2.7001110030197184, - "learning_rate": 3.1016612239102656e-06, - "loss": 0.3596, - "step": 9720 - }, - { - "epoch": 0.6906464607030681, - "grad_norm": 3.8786526590857706, - "learning_rate": 3.0945619764304987e-06, - "loss": 0.3677, - "step": 9730 - }, - { - "epoch": 0.6913562720706972, - "grad_norm": 3.601819125137747, - "learning_rate": 3.0874627289507314e-06, - "loss": 0.3599, - "step": 9740 - }, - { - "epoch": 0.6920660834383263, - "grad_norm": 4.257577712986774, - "learning_rate": 3.0803634814709645e-06, - "loss": 0.3653, - "step": 9750 - }, - { - "epoch": 0.6927758948059554, - "grad_norm": 16.2562479732823, - "learning_rate": 3.073264233991197e-06, - "loss": 0.3786, - "step": 9760 - }, - { - "epoch": 0.6934857061735844, - "grad_norm": 2.8308341290836037, - "learning_rate": 3.0661649865114302e-06, - "loss": 0.347, - "step": 9770 - }, - { - "epoch": 0.6941955175412134, - "grad_norm": 2.386467475595729, - "learning_rate": 3.059065739031663e-06, - "loss": 0.3785, - "step": 9780 - }, - { - "epoch": 0.6949053289088425, - "grad_norm": 3.11594441686047, - "learning_rate": 3.051966491551896e-06, - "loss": 0.3613, - "step": 9790 - }, - { - "epoch": 0.6956151402764715, - "grad_norm": 3.4457140851193677, - "learning_rate": 3.0448672440721287e-06, - "loss": 0.3592, - "step": 9800 - }, - { - "epoch": 0.6963249516441006, - "grad_norm": 6.7733834909511135, - "learning_rate": 3.0377679965923614e-06, - "loss": 0.3503, - "step": 9810 - }, - { - "epoch": 0.6970347630117296, - "grad_norm": 2.552293405448118, - "learning_rate": 3.030668749112594e-06, - "loss": 0.3565, - "step": 9820 - }, - { - "epoch": 0.6977445743793587, - "grad_norm": 7.3573968999972985, - "learning_rate": 3.023569501632827e-06, - "loss": 0.3534, - "step": 9830 - }, - { - "epoch": 0.6984543857469877, - "grad_norm": 2.2835556419626286, - "learning_rate": 3.01647025415306e-06, - "loss": 0.3627, - "step": 9840 - }, - { - "epoch": 0.6991641971146167, - "grad_norm": 4.158935806681915, - "learning_rate": 3.009371006673293e-06, - "loss": 0.3676, - "step": 9850 - }, - { - "epoch": 0.6998740084822458, - "grad_norm": 3.444386024390724, - "learning_rate": 3.0022717591935256e-06, - "loss": 0.3498, - "step": 9860 - }, - { - "epoch": 0.700583819849875, - "grad_norm": 76.68033690471103, - "learning_rate": 2.9951725117137587e-06, - "loss": 0.3465, - "step": 9870 - }, - { - "epoch": 0.701293631217504, - "grad_norm": 2.753848553217651, - "learning_rate": 2.9880732642339914e-06, - "loss": 0.3579, - "step": 9880 - }, - { - "epoch": 0.702003442585133, - "grad_norm": 6.8770901385155465, - "learning_rate": 2.9809740167542245e-06, - "loss": 0.3644, - "step": 9890 - }, - { - "epoch": 0.7027132539527621, - "grad_norm": 8.050770443325867, - "learning_rate": 2.9738747692744576e-06, - "loss": 0.3534, - "step": 9900 - }, - { - "epoch": 0.7034230653203911, - "grad_norm": 6.2381173840397794, - "learning_rate": 2.96677552179469e-06, - "loss": 0.3799, - "step": 9910 - }, - { - "epoch": 0.7041328766880202, - "grad_norm": 2.527197221067041, - "learning_rate": 2.9596762743149225e-06, - "loss": 0.3702, - "step": 9920 - }, - { - "epoch": 0.7048426880556492, - "grad_norm": 3.365675129758323, - "learning_rate": 2.9525770268351556e-06, - "loss": 0.3618, - "step": 9930 - }, - { - "epoch": 0.7055524994232782, - "grad_norm": 3.7307831294643323, - "learning_rate": 2.9454777793553883e-06, - "loss": 0.3552, - "step": 9940 - }, - { - "epoch": 0.7062623107909073, - "grad_norm": 10.13055799757591, - "learning_rate": 2.9383785318756214e-06, - "loss": 0.369, - "step": 9950 - }, - { - "epoch": 0.7069721221585363, - "grad_norm": 3.79159989826404, - "learning_rate": 2.931279284395854e-06, - "loss": 0.3393, - "step": 9960 - }, - { - "epoch": 0.7076819335261654, - "grad_norm": 11.361319554472407, - "learning_rate": 2.924180036916087e-06, - "loss": 0.3726, - "step": 9970 - }, - { - "epoch": 0.7083917448937945, - "grad_norm": 2.2727709813242, - "learning_rate": 2.9170807894363203e-06, - "loss": 0.3558, - "step": 9980 - }, - { - "epoch": 0.7091015562614236, - "grad_norm": 13.54783288221351, - "learning_rate": 2.909981541956553e-06, - "loss": 0.3522, - "step": 9990 - }, - { - "epoch": 0.7098113676290526, - "grad_norm": 3.4738198913190037, - "learning_rate": 2.902882294476786e-06, - "loss": 0.3636, - "step": 10000 - }, - { - "epoch": 0.7105211789966817, - "grad_norm": 2.599196507580769, - "learning_rate": 2.8957830469970183e-06, - "loss": 0.373, - "step": 10010 - }, - { - "epoch": 0.7112309903643107, - "grad_norm": 4.846340487255633, - "learning_rate": 2.888683799517251e-06, - "loss": 0.364, - "step": 10020 - }, - { - "epoch": 0.7119408017319397, - "grad_norm": 4.14481835106229, - "learning_rate": 2.881584552037484e-06, - "loss": 0.3565, - "step": 10030 - }, - { - "epoch": 0.7126506130995688, - "grad_norm": 3.12959687042078, - "learning_rate": 2.8744853045577172e-06, - "loss": 0.3597, - "step": 10040 - }, - { - "epoch": 0.7133604244671978, - "grad_norm": 2.0499607045489157, - "learning_rate": 2.86738605707795e-06, - "loss": 0.3665, - "step": 10050 - }, - { - "epoch": 0.7140702358348269, - "grad_norm": 3.4345739303394964, - "learning_rate": 2.860286809598183e-06, - "loss": 0.3406, - "step": 10060 - }, - { - "epoch": 0.7147800472024559, - "grad_norm": 3.2507549549593677, - "learning_rate": 2.8531875621184157e-06, - "loss": 0.3691, - "step": 10070 - }, - { - "epoch": 0.715489858570085, - "grad_norm": 3.088999571380729, - "learning_rate": 2.8460883146386488e-06, - "loss": 0.3512, - "step": 10080 - }, - { - "epoch": 0.716199669937714, - "grad_norm": 3.992697102415428, - "learning_rate": 2.8389890671588815e-06, - "loss": 0.3584, - "step": 10090 - }, - { - "epoch": 0.7169094813053432, - "grad_norm": 8.327520697203159, - "learning_rate": 2.8318898196791146e-06, - "loss": 0.3604, - "step": 10100 - }, - { - "epoch": 0.7176192926729722, - "grad_norm": 4.600972082353797, - "learning_rate": 2.824790572199347e-06, - "loss": 0.3641, - "step": 10110 - }, - { - "epoch": 0.7183291040406012, - "grad_norm": 3.6403983429872384, - "learning_rate": 2.81769132471958e-06, - "loss": 0.3496, - "step": 10120 - }, - { - "epoch": 0.7190389154082303, - "grad_norm": 2.831902492470625, - "learning_rate": 2.8105920772398126e-06, - "loss": 0.3611, - "step": 10130 - }, - { - "epoch": 0.7197487267758593, - "grad_norm": 4.428260390842955, - "learning_rate": 2.8034928297600457e-06, - "loss": 0.3572, - "step": 10140 - }, - { - "epoch": 0.7204585381434884, - "grad_norm": 5.5528766539260825, - "learning_rate": 2.7963935822802784e-06, - "loss": 0.3605, - "step": 10150 - }, - { - "epoch": 0.7211683495111174, - "grad_norm": 3.3271150324051124, - "learning_rate": 2.7892943348005115e-06, - "loss": 0.3646, - "step": 10160 - }, - { - "epoch": 0.7218781608787465, - "grad_norm": 4.353636452465487, - "learning_rate": 2.782195087320744e-06, - "loss": 0.3745, - "step": 10170 - }, - { - "epoch": 0.7225879722463755, - "grad_norm": 4.938483709090633, - "learning_rate": 2.7750958398409773e-06, - "loss": 0.3586, - "step": 10180 - }, - { - "epoch": 0.7232977836140045, - "grad_norm": 4.667393928494558, - "learning_rate": 2.76799659236121e-06, - "loss": 0.3526, - "step": 10190 - }, - { - "epoch": 0.7240075949816336, - "grad_norm": 5.312814121573459, - "learning_rate": 2.760897344881443e-06, - "loss": 0.3539, - "step": 10200 - }, - { - "epoch": 0.7247174063492627, - "grad_norm": 3.102848391211554, - "learning_rate": 2.7537980974016757e-06, - "loss": 0.3453, - "step": 10210 - }, - { - "epoch": 0.7254272177168918, - "grad_norm": 3.036840145081599, - "learning_rate": 2.7466988499219084e-06, - "loss": 0.3627, - "step": 10220 - }, - { - "epoch": 0.7261370290845208, - "grad_norm": 5.647990352632265, - "learning_rate": 2.739599602442141e-06, - "loss": 0.3555, - "step": 10230 - }, - { - "epoch": 0.7268468404521499, - "grad_norm": 4.66342024342857, - "learning_rate": 2.732500354962374e-06, - "loss": 0.3722, - "step": 10240 - }, - { - "epoch": 0.7275566518197789, - "grad_norm": 3.168307885423117, - "learning_rate": 2.725401107482607e-06, - "loss": 0.3673, - "step": 10250 - }, - { - "epoch": 0.728266463187408, - "grad_norm": 4.968172759395676, - "learning_rate": 2.71830186000284e-06, - "loss": 0.3556, - "step": 10260 - }, - { - "epoch": 0.728976274555037, - "grad_norm": 3.5154935991341123, - "learning_rate": 2.7112026125230726e-06, - "loss": 0.3593, - "step": 10270 - }, - { - "epoch": 0.729686085922666, - "grad_norm": 5.0083468168620655, - "learning_rate": 2.7041033650433057e-06, - "loss": 0.3592, - "step": 10280 - }, - { - "epoch": 0.7303958972902951, - "grad_norm": 3.379094612224907, - "learning_rate": 2.697004117563539e-06, - "loss": 0.3643, - "step": 10290 - }, - { - "epoch": 0.7311057086579241, - "grad_norm": 4.180270451928424, - "learning_rate": 2.6899048700837715e-06, - "loss": 0.3574, - "step": 10300 - }, - { - "epoch": 0.7318155200255532, - "grad_norm": 4.640198570927561, - "learning_rate": 2.6828056226040046e-06, - "loss": 0.3578, - "step": 10310 - }, - { - "epoch": 0.7325253313931822, - "grad_norm": 10.365125402351024, - "learning_rate": 2.675706375124237e-06, - "loss": 0.3614, - "step": 10320 - }, - { - "epoch": 0.7332351427608114, - "grad_norm": 15.355341780635097, - "learning_rate": 2.6686071276444695e-06, - "loss": 0.3631, - "step": 10330 - }, - { - "epoch": 0.7339449541284404, - "grad_norm": 6.738981517513828, - "learning_rate": 2.6615078801647026e-06, - "loss": 0.3493, - "step": 10340 - }, - { - "epoch": 0.7346547654960695, - "grad_norm": 7.55570609393924, - "learning_rate": 2.6544086326849357e-06, - "loss": 0.371, - "step": 10350 - }, - { - "epoch": 0.7353645768636985, - "grad_norm": 2.6482961979611526, - "learning_rate": 2.6473093852051684e-06, - "loss": 0.3591, - "step": 10360 - }, - { - "epoch": 0.7360743882313275, - "grad_norm": 8.054548870993123, - "learning_rate": 2.6402101377254015e-06, - "loss": 0.3577, - "step": 10370 - }, - { - "epoch": 0.7367841995989566, - "grad_norm": 7.370207938746124, - "learning_rate": 2.633110890245634e-06, - "loss": 0.3509, - "step": 10380 - }, - { - "epoch": 0.7374940109665856, - "grad_norm": 8.915363239178143, - "learning_rate": 2.6260116427658673e-06, - "loss": 0.3595, - "step": 10390 - }, - { - "epoch": 0.7382038223342147, - "grad_norm": 6.453539668987391, - "learning_rate": 2.6189123952861e-06, - "loss": 0.3735, - "step": 10400 - }, - { - "epoch": 0.7389136337018437, - "grad_norm": 13.429374820990935, - "learning_rate": 2.611813147806333e-06, - "loss": 0.343, - "step": 10410 - }, - { - "epoch": 0.7396234450694728, - "grad_norm": 4.019465503184252, - "learning_rate": 2.6047139003265653e-06, - "loss": 0.3619, - "step": 10420 - }, - { - "epoch": 0.7403332564371018, - "grad_norm": 4.77728942914678, - "learning_rate": 2.5976146528467984e-06, - "loss": 0.3602, - "step": 10430 - }, - { - "epoch": 0.7410430678047308, - "grad_norm": 16.82021280745509, - "learning_rate": 2.590515405367031e-06, - "loss": 0.3765, - "step": 10440 - }, - { - "epoch": 0.74175287917236, - "grad_norm": 4.7659520678895735, - "learning_rate": 2.5834161578872642e-06, - "loss": 0.3557, - "step": 10450 - }, - { - "epoch": 0.742462690539989, - "grad_norm": 5.846901706253607, - "learning_rate": 2.576316910407497e-06, - "loss": 0.3574, - "step": 10460 - }, - { - "epoch": 0.7431725019076181, - "grad_norm": 5.00717365628058, - "learning_rate": 2.56921766292773e-06, - "loss": 0.371, - "step": 10470 - }, - { - "epoch": 0.7438823132752471, - "grad_norm": 12.812616706907704, - "learning_rate": 2.5621184154479627e-06, - "loss": 0.3612, - "step": 10480 - }, - { - "epoch": 0.7445921246428762, - "grad_norm": 2.7312101929568375, - "learning_rate": 2.5550191679681958e-06, - "loss": 0.3551, - "step": 10490 - }, - { - "epoch": 0.7453019360105052, - "grad_norm": 3.0759041075210782, - "learning_rate": 2.5479199204884285e-06, - "loss": 0.3574, - "step": 10500 - }, - { - "epoch": 0.7460117473781342, - "grad_norm": 7.165278043719281, - "learning_rate": 2.5408206730086616e-06, - "loss": 0.3605, - "step": 10510 - }, - { - "epoch": 0.7467215587457633, - "grad_norm": 4.908665990783306, - "learning_rate": 2.533721425528894e-06, - "loss": 0.3479, - "step": 10520 - }, - { - "epoch": 0.7474313701133923, - "grad_norm": 3.4583261557450227, - "learning_rate": 2.526622178049127e-06, - "loss": 0.3542, - "step": 10530 - }, - { - "epoch": 0.7481411814810214, - "grad_norm": 11.387458565670322, - "learning_rate": 2.5195229305693596e-06, - "loss": 0.3619, - "step": 10540 - }, - { - "epoch": 0.7488509928486504, - "grad_norm": 10.198798372329442, - "learning_rate": 2.5124236830895927e-06, - "loss": 0.3434, - "step": 10550 - }, - { - "epoch": 0.7495608042162796, - "grad_norm": 3.893599380410888, - "learning_rate": 2.5053244356098254e-06, - "loss": 0.362, - "step": 10560 - }, - { - "epoch": 0.7502706155839086, - "grad_norm": 5.107597028464082, - "learning_rate": 2.4982251881300585e-06, - "loss": 0.3688, - "step": 10570 - }, - { - "epoch": 0.7509804269515377, - "grad_norm": 4.219068583835792, - "learning_rate": 2.491125940650291e-06, - "loss": 0.3649, - "step": 10580 - }, - { - "epoch": 0.7516902383191667, - "grad_norm": 4.535592066198855, - "learning_rate": 2.4840266931705243e-06, - "loss": 0.37, - "step": 10590 - }, - { - "epoch": 0.7524000496867957, - "grad_norm": 3.541264339618074, - "learning_rate": 2.476927445690757e-06, - "loss": 0.3679, - "step": 10600 - }, - { - "epoch": 0.7531098610544248, - "grad_norm": 4.7884449114332845, - "learning_rate": 2.4698281982109896e-06, - "loss": 0.3472, - "step": 10610 - }, - { - "epoch": 0.7538196724220538, - "grad_norm": 8.667808097909838, - "learning_rate": 2.4627289507312227e-06, - "loss": 0.3704, - "step": 10620 - }, - { - "epoch": 0.7545294837896829, - "grad_norm": 4.925434074834849, - "learning_rate": 2.455629703251456e-06, - "loss": 0.3701, - "step": 10630 - }, - { - "epoch": 0.7552392951573119, - "grad_norm": 3.8594886335750807, - "learning_rate": 2.4485304557716885e-06, - "loss": 0.3662, - "step": 10640 - }, - { - "epoch": 0.755949106524941, - "grad_norm": 4.971536391123703, - "learning_rate": 2.441431208291921e-06, - "loss": 0.35, - "step": 10650 - }, - { - "epoch": 0.75665891789257, - "grad_norm": 15.055144352578429, - "learning_rate": 2.434331960812154e-06, - "loss": 0.3584, - "step": 10660 - }, - { - "epoch": 0.757368729260199, - "grad_norm": 14.432076661811932, - "learning_rate": 2.427232713332387e-06, - "loss": 0.3621, - "step": 10670 - }, - { - "epoch": 0.7580785406278282, - "grad_norm": 9.810669772230819, - "learning_rate": 2.42013346585262e-06, - "loss": 0.3588, - "step": 10680 - }, - { - "epoch": 0.7587883519954572, - "grad_norm": 5.765479927608821, - "learning_rate": 2.4130342183728527e-06, - "loss": 0.3549, - "step": 10690 - }, - { - "epoch": 0.7594981633630863, - "grad_norm": 13.617197754978974, - "learning_rate": 2.4059349708930854e-06, - "loss": 0.3759, - "step": 10700 - }, - { - "epoch": 0.7602079747307153, - "grad_norm": 5.614482278416453, - "learning_rate": 2.3988357234133185e-06, - "loss": 0.3376, - "step": 10710 - }, - { - "epoch": 0.7609177860983444, - "grad_norm": 17.701642596831444, - "learning_rate": 2.391736475933551e-06, - "loss": 0.3647, - "step": 10720 - }, - { - "epoch": 0.7616275974659734, - "grad_norm": 4.910333781437824, - "learning_rate": 2.3846372284537843e-06, - "loss": 0.3643, - "step": 10730 - }, - { - "epoch": 0.7623374088336025, - "grad_norm": 3.415309685272355, - "learning_rate": 2.377537980974017e-06, - "loss": 0.3488, - "step": 10740 - }, - { - "epoch": 0.7630472202012315, - "grad_norm": 4.350903829153794, - "learning_rate": 2.3704387334942497e-06, - "loss": 0.3577, - "step": 10750 - }, - { - "epoch": 0.7637570315688605, - "grad_norm": 3.9361079752185435, - "learning_rate": 2.3633394860144828e-06, - "loss": 0.3591, - "step": 10760 - }, - { - "epoch": 0.7644668429364896, - "grad_norm": 5.913083445040196, - "learning_rate": 2.3562402385347154e-06, - "loss": 0.3486, - "step": 10770 - }, - { - "epoch": 0.7651766543041186, - "grad_norm": 5.982161931863015, - "learning_rate": 2.3491409910549485e-06, - "loss": 0.3714, - "step": 10780 - }, - { - "epoch": 0.7658864656717478, - "grad_norm": 4.5231254195655906, - "learning_rate": 2.3420417435751812e-06, - "loss": 0.3534, - "step": 10790 - }, - { - "epoch": 0.7665962770393768, - "grad_norm": 5.099871081954513, - "learning_rate": 2.334942496095414e-06, - "loss": 0.3509, - "step": 10800 - }, - { - "epoch": 0.7673060884070059, - "grad_norm": 3.361247181502804, - "learning_rate": 2.327843248615647e-06, - "loss": 0.3692, - "step": 10810 - }, - { - "epoch": 0.7680158997746349, - "grad_norm": 6.553423618292367, - "learning_rate": 2.3207440011358797e-06, - "loss": 0.353, - "step": 10820 - }, - { - "epoch": 0.768725711142264, - "grad_norm": 2.985537513367268, - "learning_rate": 2.3136447536561128e-06, - "loss": 0.3498, - "step": 10830 - }, - { - "epoch": 0.769435522509893, - "grad_norm": 3.0266471519507427, - "learning_rate": 2.3065455061763455e-06, - "loss": 0.3563, - "step": 10840 - }, - { - "epoch": 0.770145333877522, - "grad_norm": 17.644165005698888, - "learning_rate": 2.299446258696578e-06, - "loss": 0.3662, - "step": 10850 - }, - { - "epoch": 0.7708551452451511, - "grad_norm": 3.1894412768611016, - "learning_rate": 2.2923470112168112e-06, - "loss": 0.3503, - "step": 10860 - }, - { - "epoch": 0.7715649566127801, - "grad_norm": 4.492544324422795, - "learning_rate": 2.285247763737044e-06, - "loss": 0.3436, - "step": 10870 - }, - { - "epoch": 0.7722747679804092, - "grad_norm": 4.173829674998731, - "learning_rate": 2.278148516257277e-06, - "loss": 0.363, - "step": 10880 - }, - { - "epoch": 0.7729845793480382, - "grad_norm": 3.114718418646357, - "learning_rate": 2.2710492687775097e-06, - "loss": 0.3368, - "step": 10890 - }, - { - "epoch": 0.7736943907156673, - "grad_norm": 2.6323429503484443, - "learning_rate": 2.2639500212977424e-06, - "loss": 0.3489, - "step": 10900 - }, - { - "epoch": 0.7744042020832964, - "grad_norm": 2.8865277064459223, - "learning_rate": 2.2568507738179755e-06, - "loss": 0.3571, - "step": 10910 - }, - { - "epoch": 0.7751140134509255, - "grad_norm": 8.888602826244627, - "learning_rate": 2.249751526338208e-06, - "loss": 0.3399, - "step": 10920 - }, - { - "epoch": 0.7758238248185545, - "grad_norm": 3.532724353902858, - "learning_rate": 2.2426522788584412e-06, - "loss": 0.3493, - "step": 10930 - }, - { - "epoch": 0.7765336361861835, - "grad_norm": 3.6781547439101883, - "learning_rate": 2.235553031378674e-06, - "loss": 0.3462, - "step": 10940 - }, - { - "epoch": 0.7772434475538126, - "grad_norm": 13.16004359433701, - "learning_rate": 2.2284537838989066e-06, - "loss": 0.3649, - "step": 10950 - }, - { - "epoch": 0.7779532589214416, - "grad_norm": 9.642968589987298, - "learning_rate": 2.2213545364191397e-06, - "loss": 0.3582, - "step": 10960 - }, - { - "epoch": 0.7786630702890707, - "grad_norm": 6.16050392324128, - "learning_rate": 2.2142552889393724e-06, - "loss": 0.3624, - "step": 10970 - }, - { - "epoch": 0.7793728816566997, - "grad_norm": 4.012346442724565, - "learning_rate": 2.2071560414596055e-06, - "loss": 0.3448, - "step": 10980 - }, - { - "epoch": 0.7800826930243288, - "grad_norm": 2.6066193255622956, - "learning_rate": 2.2000567939798386e-06, - "loss": 0.3644, - "step": 10990 - }, - { - "epoch": 0.7807925043919578, - "grad_norm": 7.331639609512875, - "learning_rate": 2.1929575465000713e-06, - "loss": 0.3515, - "step": 11000 - }, - { - "epoch": 0.7815023157595868, - "grad_norm": 2.990816174000455, - "learning_rate": 2.185858299020304e-06, - "loss": 0.3505, - "step": 11010 - }, - { - "epoch": 0.782212127127216, - "grad_norm": 3.6112792490950554, - "learning_rate": 2.178759051540537e-06, - "loss": 0.3548, - "step": 11020 - }, - { - "epoch": 0.782921938494845, - "grad_norm": 3.8221043132066286, - "learning_rate": 2.1716598040607697e-06, - "loss": 0.3571, - "step": 11030 - }, - { - "epoch": 0.7836317498624741, - "grad_norm": 7.476265982563856, - "learning_rate": 2.164560556581003e-06, - "loss": 0.3428, - "step": 11040 - }, - { - "epoch": 0.7843415612301031, - "grad_norm": 5.554911455235443, - "learning_rate": 2.1574613091012355e-06, - "loss": 0.354, - "step": 11050 - }, - { - "epoch": 0.7850513725977322, - "grad_norm": 2.9298081851011117, - "learning_rate": 2.150362061621468e-06, - "loss": 0.3597, - "step": 11060 - }, - { - "epoch": 0.7857611839653612, - "grad_norm": 5.325097733237352, - "learning_rate": 2.1432628141417013e-06, - "loss": 0.3486, - "step": 11070 - }, - { - "epoch": 0.7864709953329903, - "grad_norm": 3.5814394523109114, - "learning_rate": 2.136163566661934e-06, - "loss": 0.3544, - "step": 11080 - }, - { - "epoch": 0.7871808067006193, - "grad_norm": 3.6972554376986, - "learning_rate": 2.129064319182167e-06, - "loss": 0.3546, - "step": 11090 - }, - { - "epoch": 0.7878906180682483, - "grad_norm": 6.754098899246775, - "learning_rate": 2.1219650717023997e-06, - "loss": 0.3537, - "step": 11100 - }, - { - "epoch": 0.7886004294358774, - "grad_norm": 3.3122898855719876, - "learning_rate": 2.1148658242226324e-06, - "loss": 0.3645, - "step": 11110 - }, - { - "epoch": 0.7893102408035064, - "grad_norm": 2.8223728276754128, - "learning_rate": 2.1077665767428655e-06, - "loss": 0.3599, - "step": 11120 - }, - { - "epoch": 0.7900200521711355, - "grad_norm": 2.5012481292133937, - "learning_rate": 2.100667329263098e-06, - "loss": 0.3486, - "step": 11130 - }, - { - "epoch": 0.7907298635387646, - "grad_norm": 11.033197138630223, - "learning_rate": 2.0935680817833313e-06, - "loss": 0.3467, - "step": 11140 - }, - { - "epoch": 0.7914396749063937, - "grad_norm": 3.730389968284293, - "learning_rate": 2.086468834303564e-06, - "loss": 0.3544, - "step": 11150 - }, - { - "epoch": 0.7921494862740227, - "grad_norm": 5.898064410181565, - "learning_rate": 2.0793695868237967e-06, - "loss": 0.3477, - "step": 11160 - }, - { - "epoch": 0.7928592976416518, - "grad_norm": 4.55198088261442, - "learning_rate": 2.0722703393440298e-06, - "loss": 0.3527, - "step": 11170 - }, - { - "epoch": 0.7935691090092808, - "grad_norm": 5.318762071563834, - "learning_rate": 2.0651710918642624e-06, - "loss": 0.3478, - "step": 11180 - }, - { - "epoch": 0.7942789203769098, - "grad_norm": 6.161214607463883, - "learning_rate": 2.0580718443844955e-06, - "loss": 0.3546, - "step": 11190 - }, - { - "epoch": 0.7949887317445389, - "grad_norm": 3.1236830623318537, - "learning_rate": 2.0509725969047282e-06, - "loss": 0.3565, - "step": 11200 - }, - { - "epoch": 0.7956985431121679, - "grad_norm": 4.197839999078878, - "learning_rate": 2.043873349424961e-06, - "loss": 0.3496, - "step": 11210 - }, - { - "epoch": 0.796408354479797, - "grad_norm": 3.2762330861667515, - "learning_rate": 2.036774101945194e-06, - "loss": 0.348, - "step": 11220 - }, - { - "epoch": 0.797118165847426, - "grad_norm": 5.961140258537488, - "learning_rate": 2.0296748544654267e-06, - "loss": 0.3637, - "step": 11230 - }, - { - "epoch": 0.797827977215055, - "grad_norm": 2.0964322412177263, - "learning_rate": 2.0225756069856598e-06, - "loss": 0.341, - "step": 11240 - }, - { - "epoch": 0.7985377885826842, - "grad_norm": 11.078753928620895, - "learning_rate": 2.0154763595058925e-06, - "loss": 0.3582, - "step": 11250 - }, - { - "epoch": 0.7992475999503132, - "grad_norm": 11.615859636107096, - "learning_rate": 2.008377112026125e-06, - "loss": 0.3504, - "step": 11260 - }, - { - "epoch": 0.7999574113179423, - "grad_norm": 9.267486623233392, - "learning_rate": 2.0012778645463582e-06, - "loss": 0.3585, - "step": 11270 - }, - { - "epoch": 0.8006672226855713, - "grad_norm": 3.7638868565818613, - "learning_rate": 1.994178617066591e-06, - "loss": 0.3572, - "step": 11280 - }, - { - "epoch": 0.8013770340532004, - "grad_norm": 4.274096264509613, - "learning_rate": 1.987079369586824e-06, - "loss": 0.352, - "step": 11290 - }, - { - "epoch": 0.8020868454208294, - "grad_norm": 3.0651382288741824, - "learning_rate": 1.979980122107057e-06, - "loss": 0.3487, - "step": 11300 - }, - { - "epoch": 0.8027966567884585, - "grad_norm": 2.585139354778811, - "learning_rate": 1.9728808746272894e-06, - "loss": 0.3509, - "step": 11310 - }, - { - "epoch": 0.8035064681560875, - "grad_norm": 3.4507245702670013, - "learning_rate": 1.9657816271475225e-06, - "loss": 0.3605, - "step": 11320 - }, - { - "epoch": 0.8042162795237165, - "grad_norm": 2.168473869134373, - "learning_rate": 1.9586823796677556e-06, - "loss": 0.3473, - "step": 11330 - }, - { - "epoch": 0.8049260908913456, - "grad_norm": 3.3138804394827126, - "learning_rate": 1.9515831321879883e-06, - "loss": 0.3451, - "step": 11340 - }, - { - "epoch": 0.8056359022589746, - "grad_norm": 2.9967871033094284, - "learning_rate": 1.9444838847082214e-06, - "loss": 0.3586, - "step": 11350 - }, - { - "epoch": 0.8063457136266037, - "grad_norm": 2.218098420224771, - "learning_rate": 1.9373846372284536e-06, - "loss": 0.3629, - "step": 11360 - }, - { - "epoch": 0.8070555249942328, - "grad_norm": 4.124703498173868, - "learning_rate": 1.9302853897486867e-06, - "loss": 0.349, - "step": 11370 - }, - { - "epoch": 0.8077653363618619, - "grad_norm": 4.336301638014139, - "learning_rate": 1.92318614226892e-06, - "loss": 0.3474, - "step": 11380 - }, - { - "epoch": 0.8084751477294909, - "grad_norm": 5.67446885361532, - "learning_rate": 1.9160868947891525e-06, - "loss": 0.3577, - "step": 11390 - }, - { - "epoch": 0.80918495909712, - "grad_norm": 5.496735292829206, - "learning_rate": 1.9089876473093856e-06, - "loss": 0.3606, - "step": 11400 - }, - { - "epoch": 0.809894770464749, - "grad_norm": 2.3181036706188505, - "learning_rate": 1.901888399829618e-06, - "loss": 0.3573, - "step": 11410 - }, - { - "epoch": 0.810604581832378, - "grad_norm": 4.2823563842257695, - "learning_rate": 1.894789152349851e-06, - "loss": 0.3456, - "step": 11420 - }, - { - "epoch": 0.8113143932000071, - "grad_norm": 9.041186743139388, - "learning_rate": 1.8876899048700838e-06, - "loss": 0.3493, - "step": 11430 - }, - { - "epoch": 0.8120242045676361, - "grad_norm": 2.135565041402105, - "learning_rate": 1.8805906573903167e-06, - "loss": 0.3573, - "step": 11440 - }, - { - "epoch": 0.8127340159352652, - "grad_norm": 4.2654812969837295, - "learning_rate": 1.8734914099105498e-06, - "loss": 0.3462, - "step": 11450 - }, - { - "epoch": 0.8134438273028942, - "grad_norm": 3.0226693302416465, - "learning_rate": 1.8663921624307823e-06, - "loss": 0.3399, - "step": 11460 - }, - { - "epoch": 0.8141536386705233, - "grad_norm": 5.674429424631266, - "learning_rate": 1.8592929149510152e-06, - "loss": 0.3445, - "step": 11470 - }, - { - "epoch": 0.8148634500381524, - "grad_norm": 5.107735874370569, - "learning_rate": 1.852193667471248e-06, - "loss": 0.3498, - "step": 11480 - }, - { - "epoch": 0.8155732614057815, - "grad_norm": 4.211595369240753, - "learning_rate": 1.8450944199914812e-06, - "loss": 0.3509, - "step": 11490 - }, - { - "epoch": 0.8162830727734105, - "grad_norm": 3.2874196387814485, - "learning_rate": 1.837995172511714e-06, - "loss": 0.352, - "step": 11500 - }, - { - "epoch": 0.8169928841410395, - "grad_norm": 2.51051446421893, - "learning_rate": 1.8308959250319465e-06, - "loss": 0.3445, - "step": 11510 - }, - { - "epoch": 0.8177026955086686, - "grad_norm": 13.267874952448258, - "learning_rate": 1.8237966775521796e-06, - "loss": 0.354, - "step": 11520 - }, - { - "epoch": 0.8184125068762976, - "grad_norm": 4.900767095828628, - "learning_rate": 1.8166974300724125e-06, - "loss": 0.3594, - "step": 11530 - }, - { - "epoch": 0.8191223182439267, - "grad_norm": 8.3230418317363, - "learning_rate": 1.8095981825926454e-06, - "loss": 0.3471, - "step": 11540 - }, - { - "epoch": 0.8198321296115557, - "grad_norm": 2.8346340256917815, - "learning_rate": 1.8024989351128783e-06, - "loss": 0.3695, - "step": 11550 - }, - { - "epoch": 0.8205419409791848, - "grad_norm": 5.533189262204602, - "learning_rate": 1.795399687633111e-06, - "loss": 0.3728, - "step": 11560 - }, - { - "epoch": 0.8212517523468138, - "grad_norm": 3.187071233846852, - "learning_rate": 1.7883004401533439e-06, - "loss": 0.3464, - "step": 11570 - }, - { - "epoch": 0.8219615637144428, - "grad_norm": 3.9314257894883937, - "learning_rate": 1.7812011926735768e-06, - "loss": 0.3532, - "step": 11580 - }, - { - "epoch": 0.8226713750820719, - "grad_norm": 3.6730541227348277, - "learning_rate": 1.7741019451938097e-06, - "loss": 0.3565, - "step": 11590 - }, - { - "epoch": 0.823381186449701, - "grad_norm": 2.9136274666194306, - "learning_rate": 1.7670026977140426e-06, - "loss": 0.3603, - "step": 11600 - }, - { - "epoch": 0.8240909978173301, - "grad_norm": 6.106992201577366, - "learning_rate": 1.7599034502342754e-06, - "loss": 0.3484, - "step": 11610 - }, - { - "epoch": 0.8248008091849591, - "grad_norm": 4.230462903274037, - "learning_rate": 1.7528042027545081e-06, - "loss": 0.35, - "step": 11620 - }, - { - "epoch": 0.8255106205525882, - "grad_norm": 3.376064932155992, - "learning_rate": 1.745704955274741e-06, - "loss": 0.35, - "step": 11630 - }, - { - "epoch": 0.8262204319202172, - "grad_norm": 2.8424779046250612, - "learning_rate": 1.738605707794974e-06, - "loss": 0.3552, - "step": 11640 - }, - { - "epoch": 0.8269302432878463, - "grad_norm": 3.6044824322491347, - "learning_rate": 1.7315064603152068e-06, - "loss": 0.3633, - "step": 11650 - }, - { - "epoch": 0.8276400546554753, - "grad_norm": 3.3041226058016324, - "learning_rate": 1.7244072128354397e-06, - "loss": 0.3453, - "step": 11660 - }, - { - "epoch": 0.8283498660231043, - "grad_norm": 3.461976575510189, - "learning_rate": 1.7173079653556724e-06, - "loss": 0.3607, - "step": 11670 - }, - { - "epoch": 0.8290596773907334, - "grad_norm": 3.96624408516477, - "learning_rate": 1.7102087178759052e-06, - "loss": 0.3431, - "step": 11680 - }, - { - "epoch": 0.8297694887583624, - "grad_norm": 10.446490548963004, - "learning_rate": 1.7031094703961381e-06, - "loss": 0.3518, - "step": 11690 - }, - { - "epoch": 0.8304793001259915, - "grad_norm": 2.4894424633296888, - "learning_rate": 1.696010222916371e-06, - "loss": 0.3618, - "step": 11700 - }, - { - "epoch": 0.8311891114936206, - "grad_norm": 3.7097939930537494, - "learning_rate": 1.688910975436604e-06, - "loss": 0.3577, - "step": 11710 - }, - { - "epoch": 0.8318989228612497, - "grad_norm": 2.591589818986439, - "learning_rate": 1.6818117279568366e-06, - "loss": 0.3454, - "step": 11720 - }, - { - "epoch": 0.8326087342288787, - "grad_norm": 3.0415000039562816, - "learning_rate": 1.6747124804770695e-06, - "loss": 0.3514, - "step": 11730 - }, - { - "epoch": 0.8333185455965078, - "grad_norm": 3.185465708245909, - "learning_rate": 1.6676132329973024e-06, - "loss": 0.3437, - "step": 11740 - }, - { - "epoch": 0.8340283569641368, - "grad_norm": 8.153250864972724, - "learning_rate": 1.6605139855175353e-06, - "loss": 0.3418, - "step": 11750 - }, - { - "epoch": 0.8347381683317658, - "grad_norm": 17.15311701699765, - "learning_rate": 1.6534147380377682e-06, - "loss": 0.3533, - "step": 11760 - }, - { - "epoch": 0.8354479796993949, - "grad_norm": 2.956498750624732, - "learning_rate": 1.6463154905580008e-06, - "loss": 0.3539, - "step": 11770 - }, - { - "epoch": 0.8361577910670239, - "grad_norm": 5.182422880739596, - "learning_rate": 1.6392162430782337e-06, - "loss": 0.3543, - "step": 11780 - }, - { - "epoch": 0.836867602434653, - "grad_norm": 5.245759433932608, - "learning_rate": 1.6321169955984666e-06, - "loss": 0.3506, - "step": 11790 - }, - { - "epoch": 0.837577413802282, - "grad_norm": 2.8777113855306, - "learning_rate": 1.6250177481186997e-06, - "loss": 0.351, - "step": 11800 - }, - { - "epoch": 0.838287225169911, - "grad_norm": 3.317900354948997, - "learning_rate": 1.6179185006389326e-06, - "loss": 0.3426, - "step": 11810 - }, - { - "epoch": 0.8389970365375401, - "grad_norm": 2.7259998460321295, - "learning_rate": 1.610819253159165e-06, - "loss": 0.3416, - "step": 11820 - }, - { - "epoch": 0.8397068479051693, - "grad_norm": 7.203501395811214, - "learning_rate": 1.603720005679398e-06, - "loss": 0.346, - "step": 11830 - }, - { - "epoch": 0.8404166592727983, - "grad_norm": 3.5281319520469343, - "learning_rate": 1.596620758199631e-06, - "loss": 0.3415, - "step": 11840 - }, - { - "epoch": 0.8411264706404273, - "grad_norm": 2.8068995456792085, - "learning_rate": 1.589521510719864e-06, - "loss": 0.3506, - "step": 11850 - }, - { - "epoch": 0.8418362820080564, - "grad_norm": 5.8571413992691, - "learning_rate": 1.5824222632400968e-06, - "loss": 0.3492, - "step": 11860 - }, - { - "epoch": 0.8425460933756854, - "grad_norm": 2.8473277239745625, - "learning_rate": 1.5753230157603295e-06, - "loss": 0.3464, - "step": 11870 - }, - { - "epoch": 0.8432559047433145, - "grad_norm": 2.743001963303042, - "learning_rate": 1.5682237682805624e-06, - "loss": 0.3457, - "step": 11880 - }, - { - "epoch": 0.8439657161109435, - "grad_norm": 10.213481491528695, - "learning_rate": 1.5611245208007953e-06, - "loss": 0.3578, - "step": 11890 - }, - { - "epoch": 0.8446755274785726, - "grad_norm": 3.735755256117381, - "learning_rate": 1.5540252733210282e-06, - "loss": 0.3503, - "step": 11900 - }, - { - "epoch": 0.8453853388462016, - "grad_norm": 4.459890794830131, - "learning_rate": 1.546926025841261e-06, - "loss": 0.3409, - "step": 11910 - }, - { - "epoch": 0.8460951502138306, - "grad_norm": 4.8029617986261295, - "learning_rate": 1.5398267783614938e-06, - "loss": 0.3538, - "step": 11920 - }, - { - "epoch": 0.8468049615814597, - "grad_norm": 7.056776646894436, - "learning_rate": 1.5327275308817267e-06, - "loss": 0.346, - "step": 11930 - }, - { - "epoch": 0.8475147729490888, - "grad_norm": 7.364554673266408, - "learning_rate": 1.5256282834019595e-06, - "loss": 0.3478, - "step": 11940 - }, - { - "epoch": 0.8482245843167179, - "grad_norm": 3.605377806044163, - "learning_rate": 1.5185290359221924e-06, - "loss": 0.3499, - "step": 11950 - }, - { - "epoch": 0.8489343956843469, - "grad_norm": 2.452400869581193, - "learning_rate": 1.5114297884424253e-06, - "loss": 0.339, - "step": 11960 - }, - { - "epoch": 0.849644207051976, - "grad_norm": 2.870621078183671, - "learning_rate": 1.504330540962658e-06, - "loss": 0.3441, - "step": 11970 - }, - { - "epoch": 0.850354018419605, - "grad_norm": 4.473314694561015, - "learning_rate": 1.4972312934828909e-06, - "loss": 0.3559, - "step": 11980 - }, - { - "epoch": 0.851063829787234, - "grad_norm": 5.114834992133615, - "learning_rate": 1.4901320460031238e-06, - "loss": 0.3541, - "step": 11990 - }, - { - "epoch": 0.8517736411548631, - "grad_norm": 12.083657543428806, - "learning_rate": 1.4830327985233567e-06, - "loss": 0.358, - "step": 12000 - }, - { - "epoch": 0.8524834525224921, - "grad_norm": 3.7361409384047923, - "learning_rate": 1.4759335510435896e-06, - "loss": 0.3395, - "step": 12010 - }, - { - "epoch": 0.8531932638901212, - "grad_norm": 3.4424635097779657, - "learning_rate": 1.4688343035638222e-06, - "loss": 0.3593, - "step": 12020 - }, - { - "epoch": 0.8539030752577502, - "grad_norm": 1.9645069008952134, - "learning_rate": 1.4617350560840551e-06, - "loss": 0.3508, - "step": 12030 - }, - { - "epoch": 0.8546128866253793, - "grad_norm": 4.627652849790996, - "learning_rate": 1.454635808604288e-06, - "loss": 0.3408, - "step": 12040 - }, - { - "epoch": 0.8553226979930083, - "grad_norm": 3.831924600437753, - "learning_rate": 1.447536561124521e-06, - "loss": 0.3487, - "step": 12050 - }, - { - "epoch": 0.8560325093606375, - "grad_norm": 4.570169273747359, - "learning_rate": 1.4404373136447538e-06, - "loss": 0.3415, - "step": 12060 - }, - { - "epoch": 0.8567423207282665, - "grad_norm": 4.6135182738223595, - "learning_rate": 1.4333380661649865e-06, - "loss": 0.3604, - "step": 12070 - }, - { - "epoch": 0.8574521320958955, - "grad_norm": 4.751574062951781, - "learning_rate": 1.4262388186852194e-06, - "loss": 0.3636, - "step": 12080 - }, - { - "epoch": 0.8581619434635246, - "grad_norm": 3.378379003665899, - "learning_rate": 1.4191395712054523e-06, - "loss": 0.3432, - "step": 12090 - }, - { - "epoch": 0.8588717548311536, - "grad_norm": 16.540688675093385, - "learning_rate": 1.4120403237256851e-06, - "loss": 0.3396, - "step": 12100 - }, - { - "epoch": 0.8595815661987827, - "grad_norm": 4.814104030359969, - "learning_rate": 1.404941076245918e-06, - "loss": 0.3461, - "step": 12110 - }, - { - "epoch": 0.8602913775664117, - "grad_norm": 10.051601410520883, - "learning_rate": 1.3978418287661507e-06, - "loss": 0.3447, - "step": 12120 - }, - { - "epoch": 0.8610011889340408, - "grad_norm": 2.642610961406552, - "learning_rate": 1.3907425812863836e-06, - "loss": 0.3361, - "step": 12130 - }, - { - "epoch": 0.8617110003016698, - "grad_norm": 4.614329866790318, - "learning_rate": 1.3836433338066165e-06, - "loss": 0.3528, - "step": 12140 - }, - { - "epoch": 0.8624208116692988, - "grad_norm": 5.744791519089807, - "learning_rate": 1.3765440863268496e-06, - "loss": 0.3607, - "step": 12150 - }, - { - "epoch": 0.8631306230369279, - "grad_norm": 3.9315757108747618, - "learning_rate": 1.3694448388470825e-06, - "loss": 0.3598, - "step": 12160 - }, - { - "epoch": 0.8638404344045569, - "grad_norm": 5.812032059514415, - "learning_rate": 1.3623455913673154e-06, - "loss": 0.3406, - "step": 12170 - }, - { - "epoch": 0.8645502457721861, - "grad_norm": 3.1863830261887784, - "learning_rate": 1.3552463438875478e-06, - "loss": 0.3435, - "step": 12180 - }, - { - "epoch": 0.8652600571398151, - "grad_norm": 3.164333810889643, - "learning_rate": 1.348147096407781e-06, - "loss": 0.3477, - "step": 12190 - }, - { - "epoch": 0.8659698685074442, - "grad_norm": 4.132090281780686, - "learning_rate": 1.3410478489280138e-06, - "loss": 0.3476, - "step": 12200 - }, - { - "epoch": 0.8666796798750732, - "grad_norm": 3.050674443165291, - "learning_rate": 1.3339486014482467e-06, - "loss": 0.3451, - "step": 12210 - }, - { - "epoch": 0.8673894912427023, - "grad_norm": 5.9765372634611476, - "learning_rate": 1.3268493539684796e-06, - "loss": 0.3516, - "step": 12220 - }, - { - "epoch": 0.8680993026103313, - "grad_norm": 10.801904177839997, - "learning_rate": 1.3197501064887123e-06, - "loss": 0.3525, - "step": 12230 - }, - { - "epoch": 0.8688091139779603, - "grad_norm": 10.795290079471496, - "learning_rate": 1.3126508590089452e-06, - "loss": 0.3458, - "step": 12240 - }, - { - "epoch": 0.8695189253455894, - "grad_norm": 5.185082480943749, - "learning_rate": 1.305551611529178e-06, - "loss": 0.3471, - "step": 12250 - }, - { - "epoch": 0.8702287367132184, - "grad_norm": 5.967453058115287, - "learning_rate": 1.298452364049411e-06, - "loss": 0.3593, - "step": 12260 - }, - { - "epoch": 0.8709385480808475, - "grad_norm": 2.9260514202439807, - "learning_rate": 1.2913531165696439e-06, - "loss": 0.3401, - "step": 12270 - }, - { - "epoch": 0.8716483594484765, - "grad_norm": 3.5904246593138924, - "learning_rate": 1.2842538690898765e-06, - "loss": 0.3407, - "step": 12280 - }, - { - "epoch": 0.8723581708161057, - "grad_norm": 5.983622275696177, - "learning_rate": 1.2771546216101094e-06, - "loss": 0.3453, - "step": 12290 - }, - { - "epoch": 0.8730679821837347, - "grad_norm": 4.330501853746522, - "learning_rate": 1.2700553741303423e-06, - "loss": 0.3494, - "step": 12300 - }, - { - "epoch": 0.8737777935513638, - "grad_norm": 3.642467957948953, - "learning_rate": 1.2629561266505752e-06, - "loss": 0.3458, - "step": 12310 - }, - { - "epoch": 0.8744876049189928, - "grad_norm": 5.610238111701037, - "learning_rate": 1.255856879170808e-06, - "loss": 0.3533, - "step": 12320 - }, - { - "epoch": 0.8751974162866218, - "grad_norm": 5.47126817738485, - "learning_rate": 1.248757631691041e-06, - "loss": 0.3685, - "step": 12330 - }, - { - "epoch": 0.8759072276542509, - "grad_norm": 2.9438005039273953, - "learning_rate": 1.2416583842112737e-06, - "loss": 0.3325, - "step": 12340 - }, - { - "epoch": 0.8766170390218799, - "grad_norm": 3.7896440417507415, - "learning_rate": 1.2345591367315065e-06, - "loss": 0.3445, - "step": 12350 - }, - { - "epoch": 0.877326850389509, - "grad_norm": 5.754468251004695, - "learning_rate": 1.2274598892517394e-06, - "loss": 0.3374, - "step": 12360 - }, - { - "epoch": 0.878036661757138, - "grad_norm": 4.267624406753751, - "learning_rate": 1.2203606417719723e-06, - "loss": 0.341, - "step": 12370 - }, - { - "epoch": 0.8787464731247671, - "grad_norm": 3.1963277785921993, - "learning_rate": 1.2132613942922052e-06, - "loss": 0.3381, - "step": 12380 - }, - { - "epoch": 0.8794562844923961, - "grad_norm": 6.653906616284059, - "learning_rate": 1.206162146812438e-06, - "loss": 0.3506, - "step": 12390 - }, - { - "epoch": 0.8801660958600251, - "grad_norm": 3.897977105597471, - "learning_rate": 1.1990628993326708e-06, - "loss": 0.3475, - "step": 12400 - }, - { - "epoch": 0.8808759072276543, - "grad_norm": 4.962651576299262, - "learning_rate": 1.1919636518529037e-06, - "loss": 0.349, - "step": 12410 - }, - { - "epoch": 0.8815857185952833, - "grad_norm": 5.136741390825168, - "learning_rate": 1.1848644043731366e-06, - "loss": 0.3465, - "step": 12420 - }, - { - "epoch": 0.8822955299629124, - "grad_norm": 4.445543310701251, - "learning_rate": 1.1777651568933695e-06, - "loss": 0.3548, - "step": 12430 - }, - { - "epoch": 0.8830053413305414, - "grad_norm": 20.40372637998409, - "learning_rate": 1.1706659094136021e-06, - "loss": 0.3583, - "step": 12440 - }, - { - "epoch": 0.8837151526981705, - "grad_norm": 3.982374880512643, - "learning_rate": 1.163566661933835e-06, - "loss": 0.3317, - "step": 12450 - }, - { - "epoch": 0.8844249640657995, - "grad_norm": 32.55413999411799, - "learning_rate": 1.156467414454068e-06, - "loss": 0.3514, - "step": 12460 - }, - { - "epoch": 0.8851347754334286, - "grad_norm": 5.420145750098025, - "learning_rate": 1.1493681669743008e-06, - "loss": 0.3318, - "step": 12470 - }, - { - "epoch": 0.8858445868010576, - "grad_norm": 3.685854173880656, - "learning_rate": 1.1422689194945337e-06, - "loss": 0.3429, - "step": 12480 - }, - { - "epoch": 0.8865543981686866, - "grad_norm": 4.6974765931702605, - "learning_rate": 1.1351696720147664e-06, - "loss": 0.357, - "step": 12490 - }, - { - "epoch": 0.8872642095363157, - "grad_norm": 6.795504660900696, - "learning_rate": 1.1280704245349995e-06, - "loss": 0.3531, - "step": 12500 - }, - { - "epoch": 0.8879740209039447, - "grad_norm": 4.927867549600845, - "learning_rate": 1.1209711770552324e-06, - "loss": 0.3647, - "step": 12510 - }, - { - "epoch": 0.8886838322715739, - "grad_norm": 70.3319920713418, - "learning_rate": 1.113871929575465e-06, - "loss": 0.3481, - "step": 12520 - }, - { - "epoch": 0.8893936436392029, - "grad_norm": 29.187269789239732, - "learning_rate": 1.106772682095698e-06, - "loss": 0.3487, - "step": 12530 - }, - { - "epoch": 0.890103455006832, - "grad_norm": 2.619165987059257, - "learning_rate": 1.0996734346159308e-06, - "loss": 0.3557, - "step": 12540 - }, - { - "epoch": 0.890813266374461, - "grad_norm": 5.724483375383932, - "learning_rate": 1.0925741871361637e-06, - "loss": 0.3587, - "step": 12550 - }, - { - "epoch": 0.89152307774209, - "grad_norm": 4.2668973076468, - "learning_rate": 1.0854749396563966e-06, - "loss": 0.3462, - "step": 12560 - }, - { - "epoch": 0.8922328891097191, - "grad_norm": 9.234745768295488, - "learning_rate": 1.0783756921766293e-06, - "loss": 0.3537, - "step": 12570 - }, - { - "epoch": 0.8929427004773481, - "grad_norm": 3.665665785771113, - "learning_rate": 1.0712764446968622e-06, - "loss": 0.3643, - "step": 12580 - }, - { - "epoch": 0.8936525118449772, - "grad_norm": 2.6258893539339656, - "learning_rate": 1.064177197217095e-06, - "loss": 0.3338, - "step": 12590 - }, - { - "epoch": 0.8943623232126062, - "grad_norm": 3.154491930622594, - "learning_rate": 1.057077949737328e-06, - "loss": 0.3444, - "step": 12600 - }, - { - "epoch": 0.8950721345802353, - "grad_norm": 7.836052713310002, - "learning_rate": 1.0499787022575608e-06, - "loss": 0.3628, - "step": 12610 - }, - { - "epoch": 0.8957819459478643, - "grad_norm": 3.8943175763479996, - "learning_rate": 1.0428794547777935e-06, - "loss": 0.3403, - "step": 12620 - }, - { - "epoch": 0.8964917573154934, - "grad_norm": 15.29553673398478, - "learning_rate": 1.0357802072980264e-06, - "loss": 0.3521, - "step": 12630 - }, - { - "epoch": 0.8972015686831225, - "grad_norm": 4.442650541355824, - "learning_rate": 1.0286809598182595e-06, - "loss": 0.3342, - "step": 12640 - }, - { - "epoch": 0.8979113800507516, - "grad_norm": 3.9047310665092247, - "learning_rate": 1.0215817123384922e-06, - "loss": 0.3427, - "step": 12650 - }, - { - "epoch": 0.8986211914183806, - "grad_norm": 2.1332446352398544, - "learning_rate": 1.014482464858725e-06, - "loss": 0.349, - "step": 12660 - }, - { - "epoch": 0.8993310027860096, - "grad_norm": 2.8714716164962923, - "learning_rate": 1.0073832173789578e-06, - "loss": 0.357, - "step": 12670 - }, - { - "epoch": 0.9000408141536387, - "grad_norm": 5.513019742153847, - "learning_rate": 1.0002839698991909e-06, - "loss": 0.3404, - "step": 12680 - }, - { - "epoch": 0.9007506255212677, - "grad_norm": 3.940129513886605, - "learning_rate": 9.931847224194237e-07, - "loss": 0.3637, - "step": 12690 - }, - { - "epoch": 0.9014604368888968, - "grad_norm": 3.9515535744587256, - "learning_rate": 9.860854749396564e-07, - "loss": 0.3498, - "step": 12700 - }, - { - "epoch": 0.9021702482565258, - "grad_norm": 3.0069372274862234, - "learning_rate": 9.789862274598893e-07, - "loss": 0.3398, - "step": 12710 - }, - { - "epoch": 0.9028800596241549, - "grad_norm": 3.5043049442535072, - "learning_rate": 9.718869799801222e-07, - "loss": 0.339, - "step": 12720 - }, - { - "epoch": 0.9035898709917839, - "grad_norm": 4.7818413498969825, - "learning_rate": 9.64787732500355e-07, - "loss": 0.3482, - "step": 12730 - }, - { - "epoch": 0.9042996823594129, - "grad_norm": 2.9143937043517485, - "learning_rate": 9.57688485020588e-07, - "loss": 0.3289, - "step": 12740 - }, - { - "epoch": 0.9050094937270421, - "grad_norm": 3.530470062388488, - "learning_rate": 9.505892375408208e-07, - "loss": 0.3406, - "step": 12750 - }, - { - "epoch": 0.9057193050946711, - "grad_norm": 3.6289940943514245, - "learning_rate": 9.434899900610537e-07, - "loss": 0.343, - "step": 12760 - }, - { - "epoch": 0.9064291164623002, - "grad_norm": 11.92232636233806, - "learning_rate": 9.363907425812864e-07, - "loss": 0.3538, - "step": 12770 - }, - { - "epoch": 0.9071389278299292, - "grad_norm": 3.3864038291963787, - "learning_rate": 9.292914951015193e-07, - "loss": 0.3361, - "step": 12780 - }, - { - "epoch": 0.9078487391975583, - "grad_norm": 4.345114007441839, - "learning_rate": 9.221922476217522e-07, - "loss": 0.3307, - "step": 12790 - }, - { - "epoch": 0.9085585505651873, - "grad_norm": 3.2046183568204687, - "learning_rate": 9.15093000141985e-07, - "loss": 0.3467, - "step": 12800 - }, - { - "epoch": 0.9092683619328163, - "grad_norm": 3.030859855481088, - "learning_rate": 9.079937526622179e-07, - "loss": 0.3467, - "step": 12810 - }, - { - "epoch": 0.9099781733004454, - "grad_norm": 4.579582289306875, - "learning_rate": 9.008945051824507e-07, - "loss": 0.3232, - "step": 12820 - }, - { - "epoch": 0.9106879846680744, - "grad_norm": 3.760749336756688, - "learning_rate": 8.937952577026836e-07, - "loss": 0.3467, - "step": 12830 - }, - { - "epoch": 0.9113977960357035, - "grad_norm": 3.179418594295822, - "learning_rate": 8.866960102229165e-07, - "loss": 0.3473, - "step": 12840 - }, - { - "epoch": 0.9121076074033325, - "grad_norm": 3.983021666456075, - "learning_rate": 8.795967627431492e-07, - "loss": 0.3587, - "step": 12850 - }, - { - "epoch": 0.9128174187709616, - "grad_norm": 2.6025747411648243, - "learning_rate": 8.724975152633821e-07, - "loss": 0.3462, - "step": 12860 - }, - { - "epoch": 0.9135272301385907, - "grad_norm": 4.3088037403974315, - "learning_rate": 8.65398267783615e-07, - "loss": 0.3428, - "step": 12870 - }, - { - "epoch": 0.9142370415062198, - "grad_norm": 3.7771085521562644, - "learning_rate": 8.582990203038478e-07, - "loss": 0.3398, - "step": 12880 - }, - { - "epoch": 0.9149468528738488, - "grad_norm": 2.5115102656996853, - "learning_rate": 8.511997728240808e-07, - "loss": 0.3419, - "step": 12890 - }, - { - "epoch": 0.9156566642414778, - "grad_norm": 2.646423568943871, - "learning_rate": 8.441005253443135e-07, - "loss": 0.3326, - "step": 12900 - }, - { - "epoch": 0.9163664756091069, - "grad_norm": 4.308215071259538, - "learning_rate": 8.370012778645465e-07, - "loss": 0.3383, - "step": 12910 - }, - { - "epoch": 0.9170762869767359, - "grad_norm": 7.273858221430791, - "learning_rate": 8.299020303847794e-07, - "loss": 0.3411, - "step": 12920 - }, - { - "epoch": 0.917786098344365, - "grad_norm": 3.1600055981634183, - "learning_rate": 8.228027829050122e-07, - "loss": 0.3577, - "step": 12930 - }, - { - "epoch": 0.918495909711994, - "grad_norm": 6.08255963796338, - "learning_rate": 8.15703535425245e-07, - "loss": 0.3589, - "step": 12940 - }, - { - "epoch": 0.9192057210796231, - "grad_norm": 4.397885394689723, - "learning_rate": 8.086042879454778e-07, - "loss": 0.3492, - "step": 12950 - }, - { - "epoch": 0.9199155324472521, - "grad_norm": 227.99760672787355, - "learning_rate": 8.015050404657107e-07, - "loss": 0.3346, - "step": 12960 - }, - { - "epoch": 0.9206253438148811, - "grad_norm": 2.2307237070418853, - "learning_rate": 7.944057929859436e-07, - "loss": 0.3441, - "step": 12970 - }, - { - "epoch": 0.9213351551825103, - "grad_norm": 5.180228064847272, - "learning_rate": 7.873065455061764e-07, - "loss": 0.3465, - "step": 12980 - }, - { - "epoch": 0.9220449665501393, - "grad_norm": 3.2003044967213836, - "learning_rate": 7.802072980264093e-07, - "loss": 0.3425, - "step": 12990 - }, - { - "epoch": 0.9227547779177684, - "grad_norm": 2.734492726273123, - "learning_rate": 7.731080505466421e-07, - "loss": 0.3403, - "step": 13000 - }, - { - "epoch": 0.9234645892853974, - "grad_norm": 2.825363146947483, - "learning_rate": 7.66008803066875e-07, - "loss": 0.3644, - "step": 13010 - }, - { - "epoch": 0.9241744006530265, - "grad_norm": 6.94935444401322, - "learning_rate": 7.589095555871078e-07, - "loss": 0.3498, - "step": 13020 - }, - { - "epoch": 0.9248842120206555, - "grad_norm": 2.8121909722558924, - "learning_rate": 7.518103081073406e-07, - "loss": 0.356, - "step": 13030 - }, - { - "epoch": 0.9255940233882846, - "grad_norm": 2.7024231170054946, - "learning_rate": 7.447110606275735e-07, - "loss": 0.3415, - "step": 13040 - }, - { - "epoch": 0.9263038347559136, - "grad_norm": 2.9617596087956195, - "learning_rate": 7.376118131478063e-07, - "loss": 0.3372, - "step": 13050 - }, - { - "epoch": 0.9270136461235426, - "grad_norm": 42.5976926609076, - "learning_rate": 7.305125656680392e-07, - "loss": 0.3541, - "step": 13060 - }, - { - "epoch": 0.9277234574911717, - "grad_norm": 3.769476187835692, - "learning_rate": 7.234133181882722e-07, - "loss": 0.3594, - "step": 13070 - }, - { - "epoch": 0.9284332688588007, - "grad_norm": 3.749361674379726, - "learning_rate": 7.163140707085049e-07, - "loss": 0.3348, - "step": 13080 - }, - { - "epoch": 0.9291430802264298, - "grad_norm": 2.5267280447133937, - "learning_rate": 7.092148232287379e-07, - "loss": 0.3579, - "step": 13090 - }, - { - "epoch": 0.9298528915940589, - "grad_norm": 3.0968195473762097, - "learning_rate": 7.021155757489707e-07, - "loss": 0.3392, - "step": 13100 - }, - { - "epoch": 0.930562702961688, - "grad_norm": 3.9129176862736674, - "learning_rate": 6.950163282692035e-07, - "loss": 0.3533, - "step": 13110 - }, - { - "epoch": 0.931272514329317, - "grad_norm": 2.7485456874581122, - "learning_rate": 6.879170807894364e-07, - "loss": 0.3399, - "step": 13120 - }, - { - "epoch": 0.9319823256969461, - "grad_norm": 4.769184944849367, - "learning_rate": 6.808178333096692e-07, - "loss": 0.3551, - "step": 13130 - }, - { - "epoch": 0.9326921370645751, - "grad_norm": 2.8275717207772098, - "learning_rate": 6.737185858299021e-07, - "loss": 0.348, - "step": 13140 - }, - { - "epoch": 0.9334019484322041, - "grad_norm": 2.1023857426151595, - "learning_rate": 6.66619338350135e-07, - "loss": 0.3381, - "step": 13150 - }, - { - "epoch": 0.9341117597998332, - "grad_norm": 2.8745163990655125, - "learning_rate": 6.595200908703678e-07, - "loss": 0.3488, - "step": 13160 - }, - { - "epoch": 0.9348215711674622, - "grad_norm": 3.97821451395574, - "learning_rate": 6.524208433906007e-07, - "loss": 0.349, - "step": 13170 - }, - { - "epoch": 0.9355313825350913, - "grad_norm": 7.304369226663597, - "learning_rate": 6.453215959108335e-07, - "loss": 0.352, - "step": 13180 - }, - { - "epoch": 0.9362411939027203, - "grad_norm": 4.654909122469299, - "learning_rate": 6.382223484310663e-07, - "loss": 0.3478, - "step": 13190 - }, - { - "epoch": 0.9369510052703494, - "grad_norm": 3.4074758383445296, - "learning_rate": 6.311231009512992e-07, - "loss": 0.3265, - "step": 13200 - }, - { - "epoch": 0.9376608166379785, - "grad_norm": 2.8891732151802687, - "learning_rate": 6.24023853471532e-07, - "loss": 0.342, - "step": 13210 - }, - { - "epoch": 0.9383706280056076, - "grad_norm": 4.315712149288758, - "learning_rate": 6.169246059917649e-07, - "loss": 0.3542, - "step": 13220 - }, - { - "epoch": 0.9390804393732366, - "grad_norm": 4.202849073092827, - "learning_rate": 6.098253585119978e-07, - "loss": 0.3464, - "step": 13230 - }, - { - "epoch": 0.9397902507408656, - "grad_norm": 4.402135376104271, - "learning_rate": 6.027261110322307e-07, - "loss": 0.3493, - "step": 13240 - }, - { - "epoch": 0.9405000621084947, - "grad_norm": 3.3375797449619804, - "learning_rate": 5.956268635524635e-07, - "loss": 0.3431, - "step": 13250 - }, - { - "epoch": 0.9412098734761237, - "grad_norm": 2.58448811647569, - "learning_rate": 5.885276160726964e-07, - "loss": 0.3516, - "step": 13260 - }, - { - "epoch": 0.9419196848437528, - "grad_norm": 3.1207357827554216, - "learning_rate": 5.814283685929293e-07, - "loss": 0.3469, - "step": 13270 - }, - { - "epoch": 0.9426294962113818, - "grad_norm": 5.535335579042853, - "learning_rate": 5.74329121113162e-07, - "loss": 0.3411, - "step": 13280 - }, - { - "epoch": 0.9433393075790109, - "grad_norm": 4.157192002051246, - "learning_rate": 5.672298736333949e-07, - "loss": 0.3357, - "step": 13290 - }, - { - "epoch": 0.9440491189466399, - "grad_norm": 4.609541473632524, - "learning_rate": 5.601306261536277e-07, - "loss": 0.3297, - "step": 13300 - }, - { - "epoch": 0.9447589303142689, - "grad_norm": 4.556290013887312, - "learning_rate": 5.530313786738606e-07, - "loss": 0.3268, - "step": 13310 - }, - { - "epoch": 0.945468741681898, - "grad_norm": 4.334131807132338, - "learning_rate": 5.459321311940935e-07, - "loss": 0.3582, - "step": 13320 - }, - { - "epoch": 0.9461785530495271, - "grad_norm": 4.733377355574472, - "learning_rate": 5.388328837143264e-07, - "loss": 0.3366, - "step": 13330 - }, - { - "epoch": 0.9468883644171562, - "grad_norm": 6.762724277887754, - "learning_rate": 5.317336362345592e-07, - "loss": 0.345, - "step": 13340 - }, - { - "epoch": 0.9475981757847852, - "grad_norm": 2.9705397730746634, - "learning_rate": 5.246343887547921e-07, - "loss": 0.3465, - "step": 13350 - }, - { - "epoch": 0.9483079871524143, - "grad_norm": 3.195893348669726, - "learning_rate": 5.175351412750249e-07, - "loss": 0.3348, - "step": 13360 - }, - { - "epoch": 0.9490177985200433, - "grad_norm": 7.323985518462735, - "learning_rate": 5.104358937952577e-07, - "loss": 0.3543, - "step": 13370 - }, - { - "epoch": 0.9497276098876724, - "grad_norm": 2.799618403745627, - "learning_rate": 5.033366463154906e-07, - "loss": 0.3431, - "step": 13380 - }, - { - "epoch": 0.9504374212553014, - "grad_norm": 2.7728876598155843, - "learning_rate": 4.962373988357234e-07, - "loss": 0.3249, - "step": 13390 - }, - { - "epoch": 0.9511472326229304, - "grad_norm": 5.195465798306655, - "learning_rate": 4.891381513559563e-07, - "loss": 0.3413, - "step": 13400 - }, - { - "epoch": 0.9518570439905595, - "grad_norm": 10.319650407110732, - "learning_rate": 4.820389038761892e-07, - "loss": 0.3289, - "step": 13410 - }, - { - "epoch": 0.9525668553581885, - "grad_norm": 3.639550539774894, - "learning_rate": 4.74939656396422e-07, - "loss": 0.358, - "step": 13420 - }, - { - "epoch": 0.9532766667258176, - "grad_norm": 3.005922518183922, - "learning_rate": 4.6784040891665486e-07, - "loss": 0.3483, - "step": 13430 - }, - { - "epoch": 0.9539864780934467, - "grad_norm": 3.658172908229024, - "learning_rate": 4.607411614368877e-07, - "loss": 0.3503, - "step": 13440 - }, - { - "epoch": 0.9546962894610758, - "grad_norm": 3.17836271977541, - "learning_rate": 4.5364191395712053e-07, - "loss": 0.32, - "step": 13450 - }, - { - "epoch": 0.9554061008287048, - "grad_norm": 2.6050315565816513, - "learning_rate": 4.465426664773535e-07, - "loss": 0.336, - "step": 13460 - }, - { - "epoch": 0.9561159121963339, - "grad_norm": 2.516963929561299, - "learning_rate": 4.394434189975863e-07, - "loss": 0.3461, - "step": 13470 - }, - { - "epoch": 0.9568257235639629, - "grad_norm": 5.182889994348168, - "learning_rate": 4.3234417151781915e-07, - "loss": 0.3453, - "step": 13480 - }, - { - "epoch": 0.9575355349315919, - "grad_norm": 2.2527308195923843, - "learning_rate": 4.25244924038052e-07, - "loss": 0.3394, - "step": 13490 - }, - { - "epoch": 0.958245346299221, - "grad_norm": 5.702042483324615, - "learning_rate": 4.181456765582848e-07, - "loss": 0.3464, - "step": 13500 - }, - { - "epoch": 0.95895515766685, - "grad_norm": 4.320082944510015, - "learning_rate": 4.110464290785177e-07, - "loss": 0.361, - "step": 13510 - }, - { - "epoch": 0.9596649690344791, - "grad_norm": 2.7057123674561683, - "learning_rate": 4.0394718159875055e-07, - "loss": 0.3451, - "step": 13520 - }, - { - "epoch": 0.9603747804021081, - "grad_norm": 6.179223629975322, - "learning_rate": 3.968479341189834e-07, - "loss": 0.3371, - "step": 13530 - }, - { - "epoch": 0.9610845917697372, - "grad_norm": 2.5395758819730267, - "learning_rate": 3.897486866392163e-07, - "loss": 0.3587, - "step": 13540 - }, - { - "epoch": 0.9617944031373662, - "grad_norm": 3.6526335466786835, - "learning_rate": 3.8264943915944917e-07, - "loss": 0.3439, - "step": 13550 - }, - { - "epoch": 0.9625042145049953, - "grad_norm": 6.134974420857256, - "learning_rate": 3.75550191679682e-07, - "loss": 0.3413, - "step": 13560 - }, - { - "epoch": 0.9632140258726244, - "grad_norm": 4.231152248304582, - "learning_rate": 3.6845094419991484e-07, - "loss": 0.3412, - "step": 13570 - }, - { - "epoch": 0.9639238372402534, - "grad_norm": 19.9166049671889, - "learning_rate": 3.613516967201477e-07, - "loss": 0.3457, - "step": 13580 - }, - { - "epoch": 0.9646336486078825, - "grad_norm": 3.0744926751867565, - "learning_rate": 3.542524492403805e-07, - "loss": 0.3501, - "step": 13590 - }, - { - "epoch": 0.9653434599755115, - "grad_norm": 4.316210901775538, - "learning_rate": 3.471532017606134e-07, - "loss": 0.3391, - "step": 13600 - }, - { - "epoch": 0.9660532713431406, - "grad_norm": 5.568442813862272, - "learning_rate": 3.400539542808463e-07, - "loss": 0.3571, - "step": 13610 - }, - { - "epoch": 0.9667630827107696, - "grad_norm": 2.464997647373043, - "learning_rate": 3.3295470680107913e-07, - "loss": 0.3403, - "step": 13620 - }, - { - "epoch": 0.9674728940783986, - "grad_norm": 9.203447351864554, - "learning_rate": 3.2585545932131197e-07, - "loss": 0.3372, - "step": 13630 - }, - { - "epoch": 0.9681827054460277, - "grad_norm": 4.083574237624433, - "learning_rate": 3.187562118415448e-07, - "loss": 0.3523, - "step": 13640 - }, - { - "epoch": 0.9688925168136567, - "grad_norm": 2.580899686505033, - "learning_rate": 3.1165696436177764e-07, - "loss": 0.3331, - "step": 13650 - }, - { - "epoch": 0.9696023281812858, - "grad_norm": 4.461792584369479, - "learning_rate": 3.0455771688201053e-07, - "loss": 0.3436, - "step": 13660 - }, - { - "epoch": 0.9703121395489148, - "grad_norm": 6.002729090963929, - "learning_rate": 2.9745846940224337e-07, - "loss": 0.3392, - "step": 13670 - }, - { - "epoch": 0.971021950916544, - "grad_norm": 15.908649085501459, - "learning_rate": 2.9035922192247626e-07, - "loss": 0.3401, - "step": 13680 - }, - { - "epoch": 0.971731762284173, - "grad_norm": 3.2548319133826875, - "learning_rate": 2.832599744427091e-07, - "loss": 0.3466, - "step": 13690 - }, - { - "epoch": 0.9724415736518021, - "grad_norm": 2.810860141109629, - "learning_rate": 2.76160726962942e-07, - "loss": 0.3445, - "step": 13700 - }, - { - "epoch": 0.9731513850194311, - "grad_norm": 5.404897398221347, - "learning_rate": 2.690614794831748e-07, - "loss": 0.3464, - "step": 13710 - }, - { - "epoch": 0.9738611963870601, - "grad_norm": 3.07947902781157, - "learning_rate": 2.6196223200340766e-07, - "loss": 0.3295, - "step": 13720 - }, - { - "epoch": 0.9745710077546892, - "grad_norm": 3.2905796500928814, - "learning_rate": 2.548629845236405e-07, - "loss": 0.3491, - "step": 13730 - }, - { - "epoch": 0.9752808191223182, - "grad_norm": 4.431073995020802, - "learning_rate": 2.4776373704387334e-07, - "loss": 0.3483, - "step": 13740 - }, - { - "epoch": 0.9759906304899473, - "grad_norm": 3.5179707782287166, - "learning_rate": 2.406644895641062e-07, - "loss": 0.3469, - "step": 13750 - }, - { - "epoch": 0.9767004418575763, - "grad_norm": 4.221356923748856, - "learning_rate": 2.3356524208433906e-07, - "loss": 0.3343, - "step": 13760 - }, - { - "epoch": 0.9774102532252054, - "grad_norm": 286.15418214313974, - "learning_rate": 2.2646599460457195e-07, - "loss": 0.3349, - "step": 13770 - }, - { - "epoch": 0.9781200645928344, - "grad_norm": 3.4922335144175576, - "learning_rate": 2.193667471248048e-07, - "loss": 0.3485, - "step": 13780 - }, - { - "epoch": 0.9788298759604636, - "grad_norm": 3.944308898398798, - "learning_rate": 2.1226749964503763e-07, - "loss": 0.3288, - "step": 13790 - }, - { - "epoch": 0.9795396873280926, - "grad_norm": 3.16447581060814, - "learning_rate": 2.0516825216527052e-07, - "loss": 0.3435, - "step": 13800 - }, - { - "epoch": 0.9802494986957216, - "grad_norm": 7.105988741131366, - "learning_rate": 1.9806900468550335e-07, - "loss": 0.342, - "step": 13810 - }, - { - "epoch": 0.9809593100633507, - "grad_norm": 3.311616450653751, - "learning_rate": 1.9096975720573622e-07, - "loss": 0.365, - "step": 13820 - }, - { - "epoch": 0.9816691214309797, - "grad_norm": 3.1283492138129128, - "learning_rate": 1.8387050972596905e-07, - "loss": 0.3497, - "step": 13830 - }, - { - "epoch": 0.9823789327986088, - "grad_norm": 4.720800332800002, - "learning_rate": 1.7677126224620194e-07, - "loss": 0.3356, - "step": 13840 - }, - { - "epoch": 0.9830887441662378, - "grad_norm": 5.755549723756511, - "learning_rate": 1.6967201476643478e-07, - "loss": 0.3534, - "step": 13850 - }, - { - "epoch": 0.9837985555338669, - "grad_norm": 12.413957162417217, - "learning_rate": 1.6257276728666762e-07, - "loss": 0.3514, - "step": 13860 - }, - { - "epoch": 0.9845083669014959, - "grad_norm": 3.7416649036415195, - "learning_rate": 1.5547351980690048e-07, - "loss": 0.3468, - "step": 13870 - }, - { - "epoch": 0.985218178269125, - "grad_norm": 5.096087166471907, - "learning_rate": 1.4837427232713335e-07, - "loss": 0.3478, - "step": 13880 - }, - { - "epoch": 0.985927989636754, - "grad_norm": 2.8643069595501847, - "learning_rate": 1.4127502484736618e-07, - "loss": 0.3307, - "step": 13890 - }, - { - "epoch": 0.986637801004383, - "grad_norm": 4.161106542911394, - "learning_rate": 1.3417577736759905e-07, - "loss": 0.3451, - "step": 13900 - }, - { - "epoch": 0.9873476123720122, - "grad_norm": 3.161705990477656, - "learning_rate": 1.270765298878319e-07, - "loss": 0.3389, - "step": 13910 - }, - { - "epoch": 0.9880574237396412, - "grad_norm": 3.2196566259908637, - "learning_rate": 1.1997728240806475e-07, - "loss": 0.3508, - "step": 13920 - }, - { - "epoch": 0.9887672351072703, - "grad_norm": 3.0061617959710403, - "learning_rate": 1.1287803492829761e-07, - "loss": 0.357, - "step": 13930 - }, - { - "epoch": 0.9894770464748993, - "grad_norm": 7.195163761877952, - "learning_rate": 1.0577878744853047e-07, - "loss": 0.3344, - "step": 13940 - }, - { - "epoch": 0.9901868578425284, - "grad_norm": 4.778295681909435, - "learning_rate": 9.867953996876332e-08, - "loss": 0.3404, - "step": 13950 - }, - { - "epoch": 0.9908966692101574, - "grad_norm": 3.6751893575330072, - "learning_rate": 9.158029248899617e-08, - "loss": 0.3222, - "step": 13960 - }, - { - "epoch": 0.9916064805777864, - "grad_norm": 6.066838850034421, - "learning_rate": 8.448104500922902e-08, - "loss": 0.3373, - "step": 13970 - }, - { - "epoch": 0.9923162919454155, - "grad_norm": 5.8640066255244525, - "learning_rate": 7.738179752946189e-08, - "loss": 0.35, - "step": 13980 - }, - { - "epoch": 0.9930261033130445, - "grad_norm": 4.063550481932921, - "learning_rate": 7.028255004969474e-08, - "loss": 0.3424, - "step": 13990 - }, - { - "epoch": 0.9937359146806736, - "grad_norm": 6.923421784576789, - "learning_rate": 6.31833025699276e-08, - "loss": 0.3584, - "step": 14000 - }, - { - "epoch": 0.9944457260483026, - "grad_norm": 4.621602275306591, - "learning_rate": 5.6084055090160446e-08, - "loss": 0.3381, - "step": 14010 - }, - { - "epoch": 0.9951555374159318, - "grad_norm": 5.495946912076004, - "learning_rate": 4.89848076103933e-08, - "loss": 0.3557, - "step": 14020 - }, - { - "epoch": 0.9958653487835608, - "grad_norm": 2.261874767912811, - "learning_rate": 4.188556013062616e-08, - "loss": 0.3346, - "step": 14030 - }, - { - "epoch": 0.9965751601511899, - "grad_norm": 3.528699506394003, - "learning_rate": 3.478631265085901e-08, - "loss": 0.3284, - "step": 14040 - }, - { - "epoch": 0.9972849715188189, - "grad_norm": 3.0483860239618314, - "learning_rate": 2.7687065171091867e-08, - "loss": 0.3341, - "step": 14050 - }, - { - "epoch": 0.9979947828864479, - "grad_norm": 4.681194219809911, - "learning_rate": 2.0587817691324724e-08, - "loss": 0.333, - "step": 14060 - }, - { - "epoch": 0.998704594254077, - "grad_norm": 5.802114485594721, - "learning_rate": 1.3488570211557575e-08, - "loss": 0.3457, - "step": 14070 - }, - { - "epoch": 0.999414405621706, - "grad_norm": 2.8616716300198775, - "learning_rate": 6.389322731790431e-09, - "loss": 0.3398, - "step": 14080 - } - ], - "logging_steps": 10, - "max_steps": 14088, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 5000, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 9975763395674112.0, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}